diff --git a/Doc/library/shutil.rst b/Doc/library/shutil.rst index ad3ab57882a..25c911ff742 100644 --- a/Doc/library/shutil.rst +++ b/Doc/library/shutil.rst @@ -27,6 +27,8 @@ copying and removal. For operations on individual files, see also the not be correct. On Windows, file owners, ACLs and alternate data streams are not copied. +Directory and files operations +------------------------------ .. function:: copyfileobj(fsrc, fdst[, length]) @@ -169,11 +171,10 @@ copying and removal. For operations on individual files, see also the .. versionadded:: 2.3 - .. _shutil-example: -Example -------- +copytree example +:::::::::::::::: This example is the implementation of the :func:`copytree` function, described above, with the docstring omitted. It demonstrates many of the other functions @@ -238,3 +239,97 @@ Another example that uses the *ignore* argument to add a logging call:: copytree(source, destination, ignore=_logpath) + +Archives operations +------------------- + +.. function:: make_archive(base_name, format, [root_dir, [base_dir, [verbose, [dry_run, [owner, [group, [logger]]]]]]]) + + Create an archive file (eg. zip or tar) and returns its name. + + *base_name* is the name of the file to create, including the path, minus + any format-specific extension. *format* is the archive format: one of + "zip", "tar", "ztar", or "gztar". + + *root_dir* is a directory that will be the root directory of the + archive; ie. we typically chdir into *root_dir* before creating the + archive. + + *base_dir* is the directory where we start archiving from; + ie. *base_dir* will be the common prefix of all files and + directories in the archive. + + *root_dir* and *base_dir* both default to the current directory. + + *owner* and *group* are used when creating a tar archive. By default, + uses the current owner and group. + + .. versionadded:: 2.7 + + +.. function:: get_archive_formats() + + Returns a list of supported formats for archiving. + Each element of the returned sequence is a tuple ``(name, description)`` + + By default :mod:`shutil` provides these formats: + + - *gztar*: gzip'ed tar-file + - *bztar*: bzip2'ed tar-file + - *ztar*: compressed tar file + - *tar*: uncompressed tar file + - *zip*: ZIP file + + You can register new formats or provide your own archiver for any existing + formats, by using :func:`register_archive_format`. + + .. versionadded:: 2.7 + + +.. function:: register_archive_format(name, function, [extra_args, [description]]) + + Registers an archiver for the format *name*. *function* is a callable that + will be used to invoke the archiver. + + If given, *extra_args* is a sequence of ``(name, value)`` that will be + used as extra keywords arguments when the archiver callable is used. + + *description* is used by :func:`get_archive_formats` which returns the + list of archivers. Defaults to an empty list. + + .. versionadded:: 2.7 + + +.. function:: unregister_archive_format(name) + + Remove the archive format *name* from the list of supported formats. + + .. versionadded:: 2.7 + + +Archiving example +::::::::::::::::: + +In this example, we create a gzip'ed tar-file archive containing all files +found in the :file:`.ssh` directory of the user:: + + >>> from shutil import make_archive + >>> import os + >>> archive_name = os.path.expanduser(os.path.join('~', 'myarchive')) + >>> root_dir = os.path.expanduser(os.path.join('~', '.ssh')) + >>> make_archive(archive_name, 'gztar', root_dir) + '/Users/tarek/myarchive.tar.gz' + +The resulting archive contains:: + + $ tar -tzvf /Users/tarek/myarchive.tar.gz + drwx------ tarek/staff 0 2010-02-01 16:23:40 ./ + -rw-r--r-- tarek/staff 609 2008-06-09 13:26:54 ./authorized_keys + -rwxr-xr-x tarek/staff 65 2008-06-09 13:26:54 ./config + -rwx------ tarek/staff 668 2008-06-09 13:26:54 ./id_dsa + -rwxr-xr-x tarek/staff 609 2008-06-09 13:26:54 ./id_dsa.pub + -rw------- tarek/staff 1675 2008-06-09 13:26:54 ./id_rsa + -rw-r--r-- tarek/staff 397 2008-06-09 13:26:54 ./id_rsa.pub + -rw-r--r-- tarek/staff 37192 2010-02-06 18:23:10 ./known_hosts + + diff --git a/Lib/shutil.py b/Lib/shutil.py index a689256f15d..ebed14003b9 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -9,9 +9,21 @@ import sys import stat from os.path import abspath import fnmatch +from warnings import warn + +try: + from pwd import getpwnam +except ImportError: + getpwnam = None + +try: + from grp import getgrnam +except ImportError: + getgrnam = None __all__ = ["copyfileobj","copyfile","copymode","copystat","copy","copy2", - "copytree","move","rmtree","Error", "SpecialFileError"] + "copytree","move","rmtree","Error", "SpecialFileError", + "ExecError","make_archive"] class Error(EnvironmentError): pass @@ -20,6 +32,9 @@ class SpecialFileError(EnvironmentError): """Raised when trying to do a kind of operation (e.g. copying) which is not supported on a special file (e.g. a named pipe)""" +class ExecError(EnvironmentError): + """Raised when a command could not be executed""" + try: WindowsError except NameError: @@ -286,3 +301,267 @@ def _destinsrc(src, dst): if not dst.endswith(os.path.sep): dst += os.path.sep return dst.startswith(src) + +def _get_gid(name): + """Returns a gid, given a group name.""" + if getgrnam is None or name is None: + return None + try: + result = getgrnam(name) + except KeyError: + result = None + if result is not None: + return result[2] + return None + +def _get_uid(name): + """Returns an uid, given a user name.""" + if getpwnam is None or name is None: + return None + try: + result = getpwnam(name) + except KeyError: + result = None + if result is not None: + return result[2] + return None + +def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0, + owner=None, group=None, logger=None): + """Create a (possibly compressed) tar file from all the files under + 'base_dir'. + + 'compress' must be "gzip" (the default), "compress", "bzip2", or None. + (compress will be deprecated in Python 3.2) + + 'owner' and 'group' can be used to define an owner and a group for the + archive that is being built. If not provided, the current owner and group + will be used. + + The output tar file will be named 'base_dir' + ".tar", possibly plus + the appropriate compression extension (".gz", ".bz2" or ".Z"). + + Returns the output filename. + """ + tar_compression = {'gzip': 'gz', 'bzip2': 'bz2', None: '', 'compress': ''} + compress_ext = {'gzip': '.gz', 'bzip2': '.bz2', 'compress': '.Z'} + + # flags for compression program, each element of list will be an argument + if compress is not None and compress not in compress_ext.keys(): + raise ValueError, \ + ("bad value for 'compress': must be None, 'gzip', 'bzip2' " + "or 'compress'") + + archive_name = base_name + '.tar' + if compress != 'compress': + archive_name += compress_ext.get(compress, '') + + archive_dir = os.path.dirname(archive_name) + if not os.path.exists(archive_dir): + logger.info("creating %s" % archive_dir) + if not dry_run: + os.makedirs(archive_dir) + + + # creating the tarball + import tarfile # late import so Python build itself doesn't break + + if logger is not None: + logger.info('Creating tar archive') + + uid = _get_uid(owner) + gid = _get_gid(group) + + def _set_uid_gid(tarinfo): + if gid is not None: + tarinfo.gid = gid + tarinfo.gname = group + if uid is not None: + tarinfo.uid = uid + tarinfo.uname = owner + return tarinfo + + if not dry_run: + tar = tarfile.open(archive_name, 'w|%s' % tar_compression[compress]) + try: + tar.add(base_dir, filter=_set_uid_gid) + finally: + tar.close() + + # compression using `compress` + # XXX this block will be removed in Python 3.2 + if compress == 'compress': + warn("'compress' will be deprecated.", PendingDeprecationWarning) + # the option varies depending on the platform + compressed_name = archive_name + compress_ext[compress] + if sys.platform == 'win32': + cmd = [compress, archive_name, compressed_name] + else: + cmd = [compress, '-f', archive_name] + from distutils.spawn import spawn + spawn(cmd, dry_run=dry_run) + return compressed_name + + return archive_name + +def _call_external_zip(directory, verbose=False): + # XXX see if we want to keep an external call here + if verbose: + zipoptions = "-r" + else: + zipoptions = "-rq" + from distutils.errors import DistutilsExecError + from distutils.spawn import spawn + try: + spawn(["zip", zipoptions, zip_filename, base_dir], dry_run=dry_run) + except DistutilsExecError: + # XXX really should distinguish between "couldn't find + # external 'zip' command" and "zip failed". + raise ExecError, \ + ("unable to create zip file '%s': " + "could neither import the 'zipfile' module nor " + "find a standalone zip utility") % zip_filename + +def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None): + """Create a zip file from all the files under 'base_dir'. + + The output zip file will be named 'base_dir' + ".zip". Uses either the + "zipfile" Python module (if available) or the InfoZIP "zip" utility + (if installed and found on the default search path). If neither tool is + available, raises ExecError. Returns the name of the output zip + file. + """ + zip_filename = base_name + ".zip" + archive_dir = os.path.dirname(base_name) + + if not os.path.exists(archive_dir): + if logger is not None: + logger.info("creating %s", archive_dir) + if not dry_run: + os.makedirs(archive_dir) + + # If zipfile module is not available, try spawning an external 'zip' + # command. + try: + import zipfile + except ImportError: + zipfile = None + + if zipfile is None: + _call_external_zip(base_dir, verbose) + else: + if logger is not None: + logger.info("creating '%s' and adding '%s' to it", + zip_filename, base_dir) + + if not dry_run: + zip = zipfile.ZipFile(zip_filename, "w", + compression=zipfile.ZIP_DEFLATED) + + for dirpath, dirnames, filenames in os.walk(base_dir): + for name in filenames: + path = os.path.normpath(os.path.join(dirpath, name)) + if os.path.isfile(path): + zip.write(path, path) + if logger is not None: + logger.info("adding '%s'", path) + zip.close() + + return zip_filename + +_ARCHIVE_FORMATS = { + 'gztar': (_make_tarball, [('compress', 'gzip')], "gzip'ed tar-file"), + 'bztar': (_make_tarball, [('compress', 'bzip2')], "bzip2'ed tar-file"), + 'ztar': (_make_tarball, [('compress', 'compress')], + "compressed tar file"), + 'tar': (_make_tarball, [('compress', None)], "uncompressed tar file"), + 'zip': (_make_zipfile, [],"ZIP file") + } + +def get_archive_formats(): + """Returns a list of supported formats for archiving and unarchiving. + + Each element of the returned sequence is a tuple (name, description) + """ + formats = [(name, registry[2]) for name, registry in + _ARCHIVE_FORMATS.items()] + formats.sort() + return formats + +def register_archive_format(name, function, extra_args=None, description=''): + """Registers an archive format. + + name is the name of the format. function is the callable that will be + used to create archives. If provided, extra_args is a sequence of + (name, value) tuples that will be passed as arguments to the callable. + description can be provided to describe the format, and will be returned + by the get_archive_formats() function. + """ + if extra_args is None: + extra_args = [] + if not callable(function): + raise TypeError('The %s object is not callable' % function) + if not isinstance(extra_args, (tuple, list)): + raise TypeError('extra_args needs to be a sequence') + for element in extra_args: + if not isinstance(element, (tuple, list)) or len(element) !=2 : + raise TypeError('extra_args elements are : (arg_name, value)') + + _ARCHIVE_FORMATS[name] = (function, extra_args, description) + +def unregister_archive_format(name): + del _ARCHIVE_FORMATS[name] + +def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0, + dry_run=0, owner=None, group=None, logger=None): + """Create an archive file (eg. zip or tar). + + 'base_name' is the name of the file to create, minus any format-specific + extension; 'format' is the archive format: one of "zip", "tar", "ztar", + or "gztar". + + 'root_dir' is a directory that will be the root directory of the + archive; ie. we typically chdir into 'root_dir' before creating the + archive. 'base_dir' is the directory where we start archiving from; + ie. 'base_dir' will be the common prefix of all files and + directories in the archive. 'root_dir' and 'base_dir' both default + to the current directory. Returns the name of the archive file. + + 'owner' and 'group' are used when creating a tar archive. By default, + uses the current owner and group. + """ + save_cwd = os.getcwd() + if root_dir is not None: + if logger is not None: + logger.debug("changing into '%s'", root_dir) + base_name = os.path.abspath(base_name) + if not dry_run: + os.chdir(root_dir) + + if base_dir is None: + base_dir = os.curdir + + kwargs = {'dry_run': dry_run, 'logger': logger} + + try: + format_info = _ARCHIVE_FORMATS[format] + except KeyError: + raise ValueError, "unknown archive format '%s'" % format + + func = format_info[0] + for arg, val in format_info[1]: + kwargs[arg] = val + + if format != 'zip': + kwargs['owner'] = owner + kwargs['group'] = group + + try: + filename = func(base_name, base_dir, **kwargs) + finally: + if root_dir is not None: + if logger is not None: + logger.debug("changing back to '%s'", save_cwd) + os.chdir(save_cwd) + + return filename diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py index 151577eb5da..871541f20d5 100644 --- a/Lib/test/test_shutil.py +++ b/Lib/test/test_shutil.py @@ -7,11 +7,71 @@ import sys import stat import os import os.path +from os.path import splitdrive +from distutils.spawn import find_executable, spawn +from shutil import (_make_tarball, _make_zipfile, make_archive, + register_archive_format, unregister_archive_format, + get_archive_formats) +import tarfile +import warnings + from test import test_support -from test.test_support import TESTFN +from test.test_support import TESTFN, check_warnings, captured_stdout + TESTFN2 = TESTFN + "2" +try: + import grp + import pwd + UID_GID_SUPPORT = True +except ImportError: + UID_GID_SUPPORT = False + +try: + import zlib +except ImportError: + zlib = None + +try: + import zipfile + ZIP_SUPPORT = True +except ImportError: + ZIP_SUPPORT = find_executable('zip') + class TestShutil(unittest.TestCase): + + def setUp(self): + super(TestShutil, self).setUp() + self.tempdirs = [] + + def tearDown(self): + super(TestShutil, self).tearDown() + while self.tempdirs: + d = self.tempdirs.pop() + shutil.rmtree(d, os.name in ('nt', 'cygwin')) + + def write_file(self, path, content='xxx'): + """Writes a file in the given path. + + + path can be a string or a sequence. + """ + if isinstance(path, (list, tuple)): + path = os.path.join(*path) + f = open(path, 'w') + try: + f.write(content) + finally: + f.close() + + def mkdtemp(self): + """Create a temporary directory that will be cleaned up. + + Returns the path of the directory. + """ + d = tempfile.mkdtemp() + self.tempdirs.append(d) + return d def test_rmtree_errors(self): # filename is guaranteed not to exist filename = tempfile.mktemp() @@ -277,6 +337,261 @@ class TestShutil(unittest.TestCase): shutil.rmtree(TESTFN, ignore_errors=True) shutil.rmtree(TESTFN2, ignore_errors=True) + @unittest.skipUnless(zlib, "requires zlib") + def test_make_tarball(self): + # creating something to tar + tmpdir = self.mkdtemp() + self.write_file([tmpdir, 'file1'], 'xxx') + self.write_file([tmpdir, 'file2'], 'xxx') + os.mkdir(os.path.join(tmpdir, 'sub')) + self.write_file([tmpdir, 'sub', 'file3'], 'xxx') + + tmpdir2 = self.mkdtemp() + unittest.skipUnless(splitdrive(tmpdir)[0] == splitdrive(tmpdir2)[0], + "source and target should be on same drive") + + base_name = os.path.join(tmpdir2, 'archive') + + # working with relative paths to avoid tar warnings + old_dir = os.getcwd() + os.chdir(tmpdir) + try: + _make_tarball(splitdrive(base_name)[1], '.') + finally: + os.chdir(old_dir) + + # check if the compressed tarball was created + tarball = base_name + '.tar.gz' + self.assertTrue(os.path.exists(tarball)) + + # trying an uncompressed one + base_name = os.path.join(tmpdir2, 'archive') + old_dir = os.getcwd() + os.chdir(tmpdir) + try: + _make_tarball(splitdrive(base_name)[1], '.', compress=None) + finally: + os.chdir(old_dir) + tarball = base_name + '.tar' + self.assertTrue(os.path.exists(tarball)) + + def _tarinfo(self, path): + tar = tarfile.open(path) + try: + names = tar.getnames() + names.sort() + return tuple(names) + finally: + tar.close() + + def _create_files(self): + # creating something to tar + tmpdir = self.mkdtemp() + dist = os.path.join(tmpdir, 'dist') + os.mkdir(dist) + self.write_file([dist, 'file1'], 'xxx') + self.write_file([dist, 'file2'], 'xxx') + os.mkdir(os.path.join(dist, 'sub')) + self.write_file([dist, 'sub', 'file3'], 'xxx') + os.mkdir(os.path.join(dist, 'sub2')) + tmpdir2 = self.mkdtemp() + base_name = os.path.join(tmpdir2, 'archive') + return tmpdir, tmpdir2, base_name + + @unittest.skipUnless(zlib, "Requires zlib") + @unittest.skipUnless(find_executable('tar') and find_executable('gzip'), + 'Need the tar command to run') + def test_tarfile_vs_tar(self): + tmpdir, tmpdir2, base_name = self._create_files() + old_dir = os.getcwd() + os.chdir(tmpdir) + try: + _make_tarball(base_name, 'dist') + finally: + os.chdir(old_dir) + + # check if the compressed tarball was created + tarball = base_name + '.tar.gz' + self.assertTrue(os.path.exists(tarball)) + + # now create another tarball using `tar` + tarball2 = os.path.join(tmpdir, 'archive2.tar.gz') + tar_cmd = ['tar', '-cf', 'archive2.tar', 'dist'] + gzip_cmd = ['gzip', '-f9', 'archive2.tar'] + old_dir = os.getcwd() + os.chdir(tmpdir) + try: + with captured_stdout() as s: + spawn(tar_cmd) + spawn(gzip_cmd) + finally: + os.chdir(old_dir) + + self.assertTrue(os.path.exists(tarball2)) + # let's compare both tarballs + self.assertEquals(self._tarinfo(tarball), self._tarinfo(tarball2)) + + # trying an uncompressed one + base_name = os.path.join(tmpdir2, 'archive') + old_dir = os.getcwd() + os.chdir(tmpdir) + try: + _make_tarball(base_name, 'dist', compress=None) + finally: + os.chdir(old_dir) + tarball = base_name + '.tar' + self.assertTrue(os.path.exists(tarball)) + + # now for a dry_run + base_name = os.path.join(tmpdir2, 'archive') + old_dir = os.getcwd() + os.chdir(tmpdir) + try: + _make_tarball(base_name, 'dist', compress=None, dry_run=True) + finally: + os.chdir(old_dir) + tarball = base_name + '.tar' + self.assertTrue(os.path.exists(tarball)) + + @unittest.skipUnless(find_executable('compress'), + 'The compress program is required') + def test_compress_deprecated(self): + tmpdir, tmpdir2, base_name = self._create_files() + + # using compress and testing the PendingDeprecationWarning + old_dir = os.getcwd() + os.chdir(tmpdir) + try: + with captured_stdout() as s: + with check_warnings() as w: + warnings.simplefilter("always") + _make_tarball(base_name, 'dist', compress='compress') + finally: + os.chdir(old_dir) + tarball = base_name + '.tar.Z' + self.assertTrue(os.path.exists(tarball)) + self.assertEquals(len(w.warnings), 1) + + # same test with dry_run + os.remove(tarball) + old_dir = os.getcwd() + os.chdir(tmpdir) + try: + with captured_stdout() as s: + with check_warnings() as w: + warnings.simplefilter("always") + _make_tarball(base_name, 'dist', compress='compress', + dry_run=True) + finally: + os.chdir(old_dir) + self.assertTrue(not os.path.exists(tarball)) + self.assertEquals(len(w.warnings), 1) + + @unittest.skipUnless(zlib, "Requires zlib") + @unittest.skipUnless(ZIP_SUPPORT, 'Need zip support to run') + def test_make_zipfile(self): + # creating something to tar + tmpdir = self.mkdtemp() + self.write_file([tmpdir, 'file1'], 'xxx') + self.write_file([tmpdir, 'file2'], 'xxx') + + tmpdir2 = self.mkdtemp() + base_name = os.path.join(tmpdir2, 'archive') + _make_zipfile(base_name, tmpdir) + + # check if the compressed tarball was created + tarball = base_name + '.zip' + + + def test_make_archive(self): + tmpdir = self.mkdtemp() + base_name = os.path.join(tmpdir, 'archive') + self.assertRaises(ValueError, make_archive, base_name, 'xxx') + + @unittest.skipUnless(zlib, "Requires zlib") + def test_make_archive_owner_group(self): + # testing make_archive with owner and group, with various combinations + # this works even if there's not gid/uid support + if UID_GID_SUPPORT: + group = grp.getgrgid(0)[0] + owner = pwd.getpwuid(0)[0] + else: + group = owner = 'root' + + base_dir, root_dir, base_name = self._create_files() + base_name = os.path.join(self.mkdtemp() , 'archive') + res = make_archive(base_name, 'zip', root_dir, base_dir, owner=owner, + group=group) + self.assertTrue(os.path.exists(res)) + + res = make_archive(base_name, 'zip', root_dir, base_dir) + self.assertTrue(os.path.exists(res)) + + res = make_archive(base_name, 'tar', root_dir, base_dir, + owner=owner, group=group) + self.assertTrue(os.path.exists(res)) + + res = make_archive(base_name, 'tar', root_dir, base_dir, + owner='kjhkjhkjg', group='oihohoh') + self.assertTrue(os.path.exists(res)) + + @unittest.skipUnless(zlib, "Requires zlib") + @unittest.skipUnless(UID_GID_SUPPORT, "Requires grp and pwd support") + def test_tarfile_root_owner(self): + tmpdir, tmpdir2, base_name = self._create_files() + old_dir = os.getcwd() + os.chdir(tmpdir) + group = grp.getgrgid(0)[0] + owner = pwd.getpwuid(0)[0] + try: + archive_name = _make_tarball(base_name, 'dist', compress=None, + owner=owner, group=group) + finally: + os.chdir(old_dir) + + # check if the compressed tarball was created + self.assertTrue(os.path.exists(archive_name)) + + # now checks the rights + archive = tarfile.open(archive_name) + try: + for member in archive.getmembers(): + self.assertEquals(member.uid, 0) + self.assertEquals(member.gid, 0) + finally: + archive.close() + + def test_make_archive_cwd(self): + current_dir = os.getcwd() + def _breaks(*args, **kw): + raise RuntimeError() + + register_archive_format('xxx', _breaks, [], 'xxx file') + try: + try: + make_archive('xxx', 'xxx', root_dir=self.mkdtemp()) + except Exception: + pass + self.assertEquals(os.getcwd(), current_dir) + finally: + unregister_archive_format('xxx') + + def test_register_archive_format(self): + + self.assertRaises(TypeError, register_archive_format, 'xxx', 1) + self.assertRaises(TypeError, register_archive_format, 'xxx', lambda: x, + 1) + self.assertRaises(TypeError, register_archive_format, 'xxx', lambda: x, + [(1, 2), (1, 2, 3)]) + + register_archive_format('xxx', lambda: x, [(1, 2)], 'xxx file') + formats = [name for name, params in get_archive_formats()] + self.assertIn('xxx', formats) + + unregister_archive_format('xxx') + formats = [name for name, params in get_archive_formats()] + self.assertNotIn('xxx', formats) + class TestMove(unittest.TestCase):