added make_archive (and secondary APIs) to shutil

This commit is contained in:
Tarek Ziadé 2010-02-23 05:16:41 +00:00
parent b0aad6cd09
commit 48cc8dc958
3 changed files with 694 additions and 5 deletions

View File

@ -27,6 +27,8 @@ copying and removal. For operations on individual files, see also the
not be correct. On Windows, file owners, ACLs and alternate data streams
are not copied.
Directory and files operations
------------------------------
.. function:: copyfileobj(fsrc, fdst[, length])
@ -169,11 +171,10 @@ copying and removal. For operations on individual files, see also the
.. versionadded:: 2.3
.. _shutil-example:
Example
-------
copytree example
::::::::::::::::
This example is the implementation of the :func:`copytree` function, described
above, with the docstring omitted. It demonstrates many of the other functions
@ -238,3 +239,97 @@ Another example that uses the *ignore* argument to add a logging call::
copytree(source, destination, ignore=_logpath)
Archives operations
-------------------
.. function:: make_archive(base_name, format, [root_dir, [base_dir, [verbose, [dry_run, [owner, [group, [logger]]]]]]])
Create an archive file (eg. zip or tar) and returns its name.
*base_name* is the name of the file to create, including the path, minus
any format-specific extension. *format* is the archive format: one of
"zip", "tar", "ztar", or "gztar".
*root_dir* is a directory that will be the root directory of the
archive; ie. we typically chdir into *root_dir* before creating the
archive.
*base_dir* is the directory where we start archiving from;
ie. *base_dir* will be the common prefix of all files and
directories in the archive.
*root_dir* and *base_dir* both default to the current directory.
*owner* and *group* are used when creating a tar archive. By default,
uses the current owner and group.
.. versionadded:: 2.7
.. function:: get_archive_formats()
Returns a list of supported formats for archiving.
Each element of the returned sequence is a tuple ``(name, description)``
By default :mod:`shutil` provides these formats:
- *gztar*: gzip'ed tar-file
- *bztar*: bzip2'ed tar-file
- *ztar*: compressed tar file
- *tar*: uncompressed tar file
- *zip*: ZIP file
You can register new formats or provide your own archiver for any existing
formats, by using :func:`register_archive_format`.
.. versionadded:: 2.7
.. function:: register_archive_format(name, function, [extra_args, [description]])
Registers an archiver for the format *name*. *function* is a callable that
will be used to invoke the archiver.
If given, *extra_args* is a sequence of ``(name, value)`` that will be
used as extra keywords arguments when the archiver callable is used.
*description* is used by :func:`get_archive_formats` which returns the
list of archivers. Defaults to an empty list.
.. versionadded:: 2.7
.. function:: unregister_archive_format(name)
Remove the archive format *name* from the list of supported formats.
.. versionadded:: 2.7
Archiving example
:::::::::::::::::
In this example, we create a gzip'ed tar-file archive containing all files
found in the :file:`.ssh` directory of the user::
>>> from shutil import make_archive
>>> import os
>>> archive_name = os.path.expanduser(os.path.join('~', 'myarchive'))
>>> root_dir = os.path.expanduser(os.path.join('~', '.ssh'))
>>> make_archive(archive_name, 'gztar', root_dir)
'/Users/tarek/myarchive.tar.gz'
The resulting archive contains::
$ tar -tzvf /Users/tarek/myarchive.tar.gz
drwx------ tarek/staff 0 2010-02-01 16:23:40 ./
-rw-r--r-- tarek/staff 609 2008-06-09 13:26:54 ./authorized_keys
-rwxr-xr-x tarek/staff 65 2008-06-09 13:26:54 ./config
-rwx------ tarek/staff 668 2008-06-09 13:26:54 ./id_dsa
-rwxr-xr-x tarek/staff 609 2008-06-09 13:26:54 ./id_dsa.pub
-rw------- tarek/staff 1675 2008-06-09 13:26:54 ./id_rsa
-rw-r--r-- tarek/staff 397 2008-06-09 13:26:54 ./id_rsa.pub
-rw-r--r-- tarek/staff 37192 2010-02-06 18:23:10 ./known_hosts

View File

@ -9,9 +9,21 @@ import sys
import stat
from os.path import abspath
import fnmatch
from warnings import warn
try:
from pwd import getpwnam
except ImportError:
getpwnam = None
try:
from grp import getgrnam
except ImportError:
getgrnam = None
__all__ = ["copyfileobj","copyfile","copymode","copystat","copy","copy2",
"copytree","move","rmtree","Error", "SpecialFileError"]
"copytree","move","rmtree","Error", "SpecialFileError",
"ExecError","make_archive"]
class Error(EnvironmentError):
pass
@ -20,6 +32,9 @@ class SpecialFileError(EnvironmentError):
"""Raised when trying to do a kind of operation (e.g. copying) which is
not supported on a special file (e.g. a named pipe)"""
class ExecError(EnvironmentError):
"""Raised when a command could not be executed"""
try:
WindowsError
except NameError:
@ -286,3 +301,267 @@ def _destinsrc(src, dst):
if not dst.endswith(os.path.sep):
dst += os.path.sep
return dst.startswith(src)
def _get_gid(name):
"""Returns a gid, given a group name."""
if getgrnam is None or name is None:
return None
try:
result = getgrnam(name)
except KeyError:
result = None
if result is not None:
return result[2]
return None
def _get_uid(name):
"""Returns an uid, given a user name."""
if getpwnam is None or name is None:
return None
try:
result = getpwnam(name)
except KeyError:
result = None
if result is not None:
return result[2]
return None
def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0,
owner=None, group=None, logger=None):
"""Create a (possibly compressed) tar file from all the files under
'base_dir'.
'compress' must be "gzip" (the default), "compress", "bzip2", or None.
(compress will be deprecated in Python 3.2)
'owner' and 'group' can be used to define an owner and a group for the
archive that is being built. If not provided, the current owner and group
will be used.
The output tar file will be named 'base_dir' + ".tar", possibly plus
the appropriate compression extension (".gz", ".bz2" or ".Z").
Returns the output filename.
"""
tar_compression = {'gzip': 'gz', 'bzip2': 'bz2', None: '', 'compress': ''}
compress_ext = {'gzip': '.gz', 'bzip2': '.bz2', 'compress': '.Z'}
# flags for compression program, each element of list will be an argument
if compress is not None and compress not in compress_ext.keys():
raise ValueError, \
("bad value for 'compress': must be None, 'gzip', 'bzip2' "
"or 'compress'")
archive_name = base_name + '.tar'
if compress != 'compress':
archive_name += compress_ext.get(compress, '')
archive_dir = os.path.dirname(archive_name)
if not os.path.exists(archive_dir):
logger.info("creating %s" % archive_dir)
if not dry_run:
os.makedirs(archive_dir)
# creating the tarball
import tarfile # late import so Python build itself doesn't break
if logger is not None:
logger.info('Creating tar archive')
uid = _get_uid(owner)
gid = _get_gid(group)
def _set_uid_gid(tarinfo):
if gid is not None:
tarinfo.gid = gid
tarinfo.gname = group
if uid is not None:
tarinfo.uid = uid
tarinfo.uname = owner
return tarinfo
if not dry_run:
tar = tarfile.open(archive_name, 'w|%s' % tar_compression[compress])
try:
tar.add(base_dir, filter=_set_uid_gid)
finally:
tar.close()
# compression using `compress`
# XXX this block will be removed in Python 3.2
if compress == 'compress':
warn("'compress' will be deprecated.", PendingDeprecationWarning)
# the option varies depending on the platform
compressed_name = archive_name + compress_ext[compress]
if sys.platform == 'win32':
cmd = [compress, archive_name, compressed_name]
else:
cmd = [compress, '-f', archive_name]
from distutils.spawn import spawn
spawn(cmd, dry_run=dry_run)
return compressed_name
return archive_name
def _call_external_zip(directory, verbose=False):
# XXX see if we want to keep an external call here
if verbose:
zipoptions = "-r"
else:
zipoptions = "-rq"
from distutils.errors import DistutilsExecError
from distutils.spawn import spawn
try:
spawn(["zip", zipoptions, zip_filename, base_dir], dry_run=dry_run)
except DistutilsExecError:
# XXX really should distinguish between "couldn't find
# external 'zip' command" and "zip failed".
raise ExecError, \
("unable to create zip file '%s': "
"could neither import the 'zipfile' module nor "
"find a standalone zip utility") % zip_filename
def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None):
"""Create a zip file from all the files under 'base_dir'.
The output zip file will be named 'base_dir' + ".zip". Uses either the
"zipfile" Python module (if available) or the InfoZIP "zip" utility
(if installed and found on the default search path). If neither tool is
available, raises ExecError. Returns the name of the output zip
file.
"""
zip_filename = base_name + ".zip"
archive_dir = os.path.dirname(base_name)
if not os.path.exists(archive_dir):
if logger is not None:
logger.info("creating %s", archive_dir)
if not dry_run:
os.makedirs(archive_dir)
# If zipfile module is not available, try spawning an external 'zip'
# command.
try:
import zipfile
except ImportError:
zipfile = None
if zipfile is None:
_call_external_zip(base_dir, verbose)
else:
if logger is not None:
logger.info("creating '%s' and adding '%s' to it",
zip_filename, base_dir)
if not dry_run:
zip = zipfile.ZipFile(zip_filename, "w",
compression=zipfile.ZIP_DEFLATED)
for dirpath, dirnames, filenames in os.walk(base_dir):
for name in filenames:
path = os.path.normpath(os.path.join(dirpath, name))
if os.path.isfile(path):
zip.write(path, path)
if logger is not None:
logger.info("adding '%s'", path)
zip.close()
return zip_filename
_ARCHIVE_FORMATS = {
'gztar': (_make_tarball, [('compress', 'gzip')], "gzip'ed tar-file"),
'bztar': (_make_tarball, [('compress', 'bzip2')], "bzip2'ed tar-file"),
'ztar': (_make_tarball, [('compress', 'compress')],
"compressed tar file"),
'tar': (_make_tarball, [('compress', None)], "uncompressed tar file"),
'zip': (_make_zipfile, [],"ZIP file")
}
def get_archive_formats():
"""Returns a list of supported formats for archiving and unarchiving.
Each element of the returned sequence is a tuple (name, description)
"""
formats = [(name, registry[2]) for name, registry in
_ARCHIVE_FORMATS.items()]
formats.sort()
return formats
def register_archive_format(name, function, extra_args=None, description=''):
"""Registers an archive format.
name is the name of the format. function is the callable that will be
used to create archives. If provided, extra_args is a sequence of
(name, value) tuples that will be passed as arguments to the callable.
description can be provided to describe the format, and will be returned
by the get_archive_formats() function.
"""
if extra_args is None:
extra_args = []
if not callable(function):
raise TypeError('The %s object is not callable' % function)
if not isinstance(extra_args, (tuple, list)):
raise TypeError('extra_args needs to be a sequence')
for element in extra_args:
if not isinstance(element, (tuple, list)) or len(element) !=2 :
raise TypeError('extra_args elements are : (arg_name, value)')
_ARCHIVE_FORMATS[name] = (function, extra_args, description)
def unregister_archive_format(name):
del _ARCHIVE_FORMATS[name]
def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0,
dry_run=0, owner=None, group=None, logger=None):
"""Create an archive file (eg. zip or tar).
'base_name' is the name of the file to create, minus any format-specific
extension; 'format' is the archive format: one of "zip", "tar", "ztar",
or "gztar".
'root_dir' is a directory that will be the root directory of the
archive; ie. we typically chdir into 'root_dir' before creating the
archive. 'base_dir' is the directory where we start archiving from;
ie. 'base_dir' will be the common prefix of all files and
directories in the archive. 'root_dir' and 'base_dir' both default
to the current directory. Returns the name of the archive file.
'owner' and 'group' are used when creating a tar archive. By default,
uses the current owner and group.
"""
save_cwd = os.getcwd()
if root_dir is not None:
if logger is not None:
logger.debug("changing into '%s'", root_dir)
base_name = os.path.abspath(base_name)
if not dry_run:
os.chdir(root_dir)
if base_dir is None:
base_dir = os.curdir
kwargs = {'dry_run': dry_run, 'logger': logger}
try:
format_info = _ARCHIVE_FORMATS[format]
except KeyError:
raise ValueError, "unknown archive format '%s'" % format
func = format_info[0]
for arg, val in format_info[1]:
kwargs[arg] = val
if format != 'zip':
kwargs['owner'] = owner
kwargs['group'] = group
try:
filename = func(base_name, base_dir, **kwargs)
finally:
if root_dir is not None:
if logger is not None:
logger.debug("changing back to '%s'", save_cwd)
os.chdir(save_cwd)
return filename

View File

@ -7,11 +7,71 @@ import sys
import stat
import os
import os.path
from os.path import splitdrive
from distutils.spawn import find_executable, spawn
from shutil import (_make_tarball, _make_zipfile, make_archive,
register_archive_format, unregister_archive_format,
get_archive_formats)
import tarfile
import warnings
from test import test_support
from test.test_support import TESTFN
from test.test_support import TESTFN, check_warnings, captured_stdout
TESTFN2 = TESTFN + "2"
try:
import grp
import pwd
UID_GID_SUPPORT = True
except ImportError:
UID_GID_SUPPORT = False
try:
import zlib
except ImportError:
zlib = None
try:
import zipfile
ZIP_SUPPORT = True
except ImportError:
ZIP_SUPPORT = find_executable('zip')
class TestShutil(unittest.TestCase):
def setUp(self):
super(TestShutil, self).setUp()
self.tempdirs = []
def tearDown(self):
super(TestShutil, self).tearDown()
while self.tempdirs:
d = self.tempdirs.pop()
shutil.rmtree(d, os.name in ('nt', 'cygwin'))
def write_file(self, path, content='xxx'):
"""Writes a file in the given path.
path can be a string or a sequence.
"""
if isinstance(path, (list, tuple)):
path = os.path.join(*path)
f = open(path, 'w')
try:
f.write(content)
finally:
f.close()
def mkdtemp(self):
"""Create a temporary directory that will be cleaned up.
Returns the path of the directory.
"""
d = tempfile.mkdtemp()
self.tempdirs.append(d)
return d
def test_rmtree_errors(self):
# filename is guaranteed not to exist
filename = tempfile.mktemp()
@ -277,6 +337,261 @@ class TestShutil(unittest.TestCase):
shutil.rmtree(TESTFN, ignore_errors=True)
shutil.rmtree(TESTFN2, ignore_errors=True)
@unittest.skipUnless(zlib, "requires zlib")
def test_make_tarball(self):
# creating something to tar
tmpdir = self.mkdtemp()
self.write_file([tmpdir, 'file1'], 'xxx')
self.write_file([tmpdir, 'file2'], 'xxx')
os.mkdir(os.path.join(tmpdir, 'sub'))
self.write_file([tmpdir, 'sub', 'file3'], 'xxx')
tmpdir2 = self.mkdtemp()
unittest.skipUnless(splitdrive(tmpdir)[0] == splitdrive(tmpdir2)[0],
"source and target should be on same drive")
base_name = os.path.join(tmpdir2, 'archive')
# working with relative paths to avoid tar warnings
old_dir = os.getcwd()
os.chdir(tmpdir)
try:
_make_tarball(splitdrive(base_name)[1], '.')
finally:
os.chdir(old_dir)
# check if the compressed tarball was created
tarball = base_name + '.tar.gz'
self.assertTrue(os.path.exists(tarball))
# trying an uncompressed one
base_name = os.path.join(tmpdir2, 'archive')
old_dir = os.getcwd()
os.chdir(tmpdir)
try:
_make_tarball(splitdrive(base_name)[1], '.', compress=None)
finally:
os.chdir(old_dir)
tarball = base_name + '.tar'
self.assertTrue(os.path.exists(tarball))
def _tarinfo(self, path):
tar = tarfile.open(path)
try:
names = tar.getnames()
names.sort()
return tuple(names)
finally:
tar.close()
def _create_files(self):
# creating something to tar
tmpdir = self.mkdtemp()
dist = os.path.join(tmpdir, 'dist')
os.mkdir(dist)
self.write_file([dist, 'file1'], 'xxx')
self.write_file([dist, 'file2'], 'xxx')
os.mkdir(os.path.join(dist, 'sub'))
self.write_file([dist, 'sub', 'file3'], 'xxx')
os.mkdir(os.path.join(dist, 'sub2'))
tmpdir2 = self.mkdtemp()
base_name = os.path.join(tmpdir2, 'archive')
return tmpdir, tmpdir2, base_name
@unittest.skipUnless(zlib, "Requires zlib")
@unittest.skipUnless(find_executable('tar') and find_executable('gzip'),
'Need the tar command to run')
def test_tarfile_vs_tar(self):
tmpdir, tmpdir2, base_name = self._create_files()
old_dir = os.getcwd()
os.chdir(tmpdir)
try:
_make_tarball(base_name, 'dist')
finally:
os.chdir(old_dir)
# check if the compressed tarball was created
tarball = base_name + '.tar.gz'
self.assertTrue(os.path.exists(tarball))
# now create another tarball using `tar`
tarball2 = os.path.join(tmpdir, 'archive2.tar.gz')
tar_cmd = ['tar', '-cf', 'archive2.tar', 'dist']
gzip_cmd = ['gzip', '-f9', 'archive2.tar']
old_dir = os.getcwd()
os.chdir(tmpdir)
try:
with captured_stdout() as s:
spawn(tar_cmd)
spawn(gzip_cmd)
finally:
os.chdir(old_dir)
self.assertTrue(os.path.exists(tarball2))
# let's compare both tarballs
self.assertEquals(self._tarinfo(tarball), self._tarinfo(tarball2))
# trying an uncompressed one
base_name = os.path.join(tmpdir2, 'archive')
old_dir = os.getcwd()
os.chdir(tmpdir)
try:
_make_tarball(base_name, 'dist', compress=None)
finally:
os.chdir(old_dir)
tarball = base_name + '.tar'
self.assertTrue(os.path.exists(tarball))
# now for a dry_run
base_name = os.path.join(tmpdir2, 'archive')
old_dir = os.getcwd()
os.chdir(tmpdir)
try:
_make_tarball(base_name, 'dist', compress=None, dry_run=True)
finally:
os.chdir(old_dir)
tarball = base_name + '.tar'
self.assertTrue(os.path.exists(tarball))
@unittest.skipUnless(find_executable('compress'),
'The compress program is required')
def test_compress_deprecated(self):
tmpdir, tmpdir2, base_name = self._create_files()
# using compress and testing the PendingDeprecationWarning
old_dir = os.getcwd()
os.chdir(tmpdir)
try:
with captured_stdout() as s:
with check_warnings() as w:
warnings.simplefilter("always")
_make_tarball(base_name, 'dist', compress='compress')
finally:
os.chdir(old_dir)
tarball = base_name + '.tar.Z'
self.assertTrue(os.path.exists(tarball))
self.assertEquals(len(w.warnings), 1)
# same test with dry_run
os.remove(tarball)
old_dir = os.getcwd()
os.chdir(tmpdir)
try:
with captured_stdout() as s:
with check_warnings() as w:
warnings.simplefilter("always")
_make_tarball(base_name, 'dist', compress='compress',
dry_run=True)
finally:
os.chdir(old_dir)
self.assertTrue(not os.path.exists(tarball))
self.assertEquals(len(w.warnings), 1)
@unittest.skipUnless(zlib, "Requires zlib")
@unittest.skipUnless(ZIP_SUPPORT, 'Need zip support to run')
def test_make_zipfile(self):
# creating something to tar
tmpdir = self.mkdtemp()
self.write_file([tmpdir, 'file1'], 'xxx')
self.write_file([tmpdir, 'file2'], 'xxx')
tmpdir2 = self.mkdtemp()
base_name = os.path.join(tmpdir2, 'archive')
_make_zipfile(base_name, tmpdir)
# check if the compressed tarball was created
tarball = base_name + '.zip'
def test_make_archive(self):
tmpdir = self.mkdtemp()
base_name = os.path.join(tmpdir, 'archive')
self.assertRaises(ValueError, make_archive, base_name, 'xxx')
@unittest.skipUnless(zlib, "Requires zlib")
def test_make_archive_owner_group(self):
# testing make_archive with owner and group, with various combinations
# this works even if there's not gid/uid support
if UID_GID_SUPPORT:
group = grp.getgrgid(0)[0]
owner = pwd.getpwuid(0)[0]
else:
group = owner = 'root'
base_dir, root_dir, base_name = self._create_files()
base_name = os.path.join(self.mkdtemp() , 'archive')
res = make_archive(base_name, 'zip', root_dir, base_dir, owner=owner,
group=group)
self.assertTrue(os.path.exists(res))
res = make_archive(base_name, 'zip', root_dir, base_dir)
self.assertTrue(os.path.exists(res))
res = make_archive(base_name, 'tar', root_dir, base_dir,
owner=owner, group=group)
self.assertTrue(os.path.exists(res))
res = make_archive(base_name, 'tar', root_dir, base_dir,
owner='kjhkjhkjg', group='oihohoh')
self.assertTrue(os.path.exists(res))
@unittest.skipUnless(zlib, "Requires zlib")
@unittest.skipUnless(UID_GID_SUPPORT, "Requires grp and pwd support")
def test_tarfile_root_owner(self):
tmpdir, tmpdir2, base_name = self._create_files()
old_dir = os.getcwd()
os.chdir(tmpdir)
group = grp.getgrgid(0)[0]
owner = pwd.getpwuid(0)[0]
try:
archive_name = _make_tarball(base_name, 'dist', compress=None,
owner=owner, group=group)
finally:
os.chdir(old_dir)
# check if the compressed tarball was created
self.assertTrue(os.path.exists(archive_name))
# now checks the rights
archive = tarfile.open(archive_name)
try:
for member in archive.getmembers():
self.assertEquals(member.uid, 0)
self.assertEquals(member.gid, 0)
finally:
archive.close()
def test_make_archive_cwd(self):
current_dir = os.getcwd()
def _breaks(*args, **kw):
raise RuntimeError()
register_archive_format('xxx', _breaks, [], 'xxx file')
try:
try:
make_archive('xxx', 'xxx', root_dir=self.mkdtemp())
except Exception:
pass
self.assertEquals(os.getcwd(), current_dir)
finally:
unregister_archive_format('xxx')
def test_register_archive_format(self):
self.assertRaises(TypeError, register_archive_format, 'xxx', 1)
self.assertRaises(TypeError, register_archive_format, 'xxx', lambda: x,
1)
self.assertRaises(TypeError, register_archive_format, 'xxx', lambda: x,
[(1, 2), (1, 2, 3)])
register_archive_format('xxx', lambda: x, [(1, 2)], 'xxx file')
formats = [name for name, params in get_archive_formats()]
self.assertIn('xxx', formats)
unregister_archive_format('xxx')
formats = [name for name, params in get_archive_formats()]
self.assertNotIn('xxx', formats)
class TestMove(unittest.TestCase):