Issue #14061: Misc fixes and cleanups in archiving code in shutil.

Imporoved the documentation and tests for make_archive() and unpack_archive().
Improved error handling when corresponding compress module is not available.
Brake circular dependency between shutil and tarfile modules.
This commit is contained in:
Serhiy Storchaka 2016-12-16 18:58:33 +02:00
parent 610f5d739d
commit 20cdffd830
3 changed files with 110 additions and 107 deletions

View File

@ -458,6 +458,10 @@ Archiving operations
.. versionadded:: 3.2
.. versionchanged:: 3.5
Added support for the *xztar* format.
High-level utilities to create and read compressed and archived files are also
provided. They rely on the :mod:`zipfile` and :mod:`tarfile` modules.
@ -467,8 +471,9 @@ provided. They rely on the :mod:`zipfile` and :mod:`tarfile` modules.
*base_name* is the name of the file to create, including the path, minus
any format-specific extension. *format* is the archive format: one of
"zip", "tar", "bztar" (if the :mod:`bz2` module is available), "xztar"
(if the :mod:`lzma` module is available) or "gztar".
"zip" (if the :mod:`zlib` module is available), "tar", "gztar" (if the
:mod:`zlib` module is available), "bztar" (if the :mod:`bz2` module is
available), or "xztar" (if the :mod:`lzma` module is available).
*root_dir* is a directory that will be the root directory of the
archive; for example, we typically chdir into *root_dir* before creating the
@ -491,9 +496,6 @@ provided. They rely on the :mod:`zipfile` and :mod:`tarfile` modules.
The *verbose* argument is unused and deprecated.
.. versionchanged:: 3.5
Added support for the *xztar* format.
.. function:: get_archive_formats()
@ -502,11 +504,11 @@ provided. They rely on the :mod:`zipfile` and :mod:`tarfile` modules.
By default :mod:`shutil` provides these formats:
- *gztar*: gzip'ed tar-file
- *bztar*: bzip2'ed tar-file (if the :mod:`bz2` module is available.)
- *xztar*: xz'ed tar-file (if the :mod:`lzma` module is available.)
- *tar*: uncompressed tar file
- *zip*: ZIP file
- *zip*: ZIP file (if the :mod:`zlib` module is available).
- *tar*: uncompressed tar file.
- *gztar*: gzip'ed tar-file (if the :mod:`zlib` module is available).
- *bztar*: bzip2'ed tar-file (if the :mod:`bz2` module is available).
- *xztar*: xz'ed tar-file (if the :mod:`lzma` module is available).
You can register new formats or provide your own archiver for any existing
formats, by using :func:`register_archive_format`.
@ -541,11 +543,12 @@ provided. They rely on the :mod:`zipfile` and :mod:`tarfile` modules.
*extract_dir* is the name of the target directory where the archive is
unpacked. If not provided, the current working directory is used.
*format* is the archive format: one of "zip", "tar", or "gztar". Or any
other format registered with :func:`register_unpack_format`. If not
provided, :func:`unpack_archive` will use the archive file name extension
and see if an unpacker was registered for that extension. In case none is
found, a :exc:`ValueError` is raised.
*format* is the archive format: one of "zip", "tar", "gztar", "bztar", or
"xztar". Or any other format registered with
:func:`register_unpack_format`. If not provided, :func:`unpack_archive`
will use the archive file name extension and see if an unpacker was
registered for that extension. In case none is found,
a :exc:`ValueError` is raised.
.. function:: register_unpack_format(name, extensions, function[, extra_args[, description]])
@ -578,11 +581,12 @@ provided. They rely on the :mod:`zipfile` and :mod:`tarfile` modules.
By default :mod:`shutil` provides these formats:
- *gztar*: gzip'ed tar-file
- *bztar*: bzip2'ed tar-file (if the :mod:`bz2` module is available.)
- *xztar*: xz'ed tar-file (if the :mod:`lzma` module is available.)
- *tar*: uncompressed tar file
- *zip*: ZIP file
- *zip*: ZIP file (unpacking compressed files works only if corresponding
module is available).
- *tar*: uncompressed tar file.
- *gztar*: gzip'ed tar-file (if the :mod:`zlib` module is available).
- *bztar*: bzip2'ed tar-file (if the :mod:`bz2` module is available).
- *xztar*: xz'ed tar-file (if the :mod:`lzma` module is available).
You can register new formats or provide your own unpacker for any existing
formats, by using :func:`register_unpack_format`.

View File

@ -10,7 +10,13 @@ import stat
import fnmatch
import collections
import errno
import tarfile
try:
import zlib
del zlib
_ZLIB_SUPPORTED = True
except ImportError:
_ZLIB_SUPPORTED = False
try:
import bz2
@ -602,23 +608,22 @@ def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0,
Returns the output filename.
"""
tar_compression = {'gzip': 'gz', None: ''}
compress_ext = {'gzip': '.gz'}
if _BZ2_SUPPORTED:
tar_compression['bzip2'] = 'bz2'
compress_ext['bzip2'] = '.bz2'
if _LZMA_SUPPORTED:
tar_compression['xz'] = 'xz'
compress_ext['xz'] = '.xz'
# flags for compression program, each element of list will be an argument
if compress is not None and compress not in compress_ext:
if compress is None:
tar_compression = ''
elif _ZLIB_SUPPORTED and compress == 'gzip':
tar_compression = 'gz'
elif _BZ2_SUPPORTED and compress == 'bzip2':
tar_compression = 'bz2'
elif _LZMA_SUPPORTED and compress == 'xz':
tar_compression = 'xz'
else:
raise ValueError("bad value for 'compress', or compression format not "
"supported : {0}".format(compress))
archive_name = base_name + '.tar' + compress_ext.get(compress, '')
import tarfile # late import for breaking circular dependency
compress_ext = '.' + tar_compression if compress else ''
archive_name = base_name + '.tar' + compress_ext
archive_dir = os.path.dirname(archive_name)
if archive_dir and not os.path.exists(archive_dir):
@ -644,7 +649,7 @@ def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0,
return tarinfo
if not dry_run:
tar = tarfile.open(archive_name, 'w|%s' % tar_compression[compress])
tar = tarfile.open(archive_name, 'w|%s' % tar_compression)
try:
tar.add(base_dir, filter=_set_uid_gid)
finally:
@ -655,13 +660,10 @@ def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0,
def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None):
"""Create a zip file from all the files under 'base_dir'.
The output zip file will be named 'base_name' + ".zip". Uses either the
"zipfile" Python module (if available) or the InfoZIP "zip" utility
(if installed and found on the default search path). If neither tool is
available, raises ExecError. Returns the name of the output zip
file.
The output zip file will be named 'base_name' + ".zip". Returns the
name of the output zip file.
"""
import zipfile
import zipfile # late import for breaking circular dependency
zip_filename = base_name + ".zip"
archive_dir = os.path.dirname(base_name)
@ -700,10 +702,13 @@ def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None):
return zip_filename
_ARCHIVE_FORMATS = {
'gztar': (_make_tarball, [('compress', 'gzip')], "gzip'ed tar-file"),
'tar': (_make_tarball, [('compress', None)], "uncompressed tar file"),
'zip': (_make_zipfile, [], "ZIP file")
}
}
if _ZLIB_SUPPORTED:
_ARCHIVE_FORMATS['gztar'] = (_make_tarball, [('compress', 'gzip')],
"gzip'ed tar-file")
_ARCHIVE_FORMATS['zip'] = (_make_zipfile, [], "ZIP file")
if _BZ2_SUPPORTED:
_ARCHIVE_FORMATS['bztar'] = (_make_tarball, [('compress', 'bzip2')],
@ -752,8 +757,8 @@ def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0,
"""Create an archive file (eg. zip or tar).
'base_name' is the name of the file to create, minus any format-specific
extension; 'format' is the archive format: one of "zip", "tar", "bztar"
or "gztar".
extension; 'format' is the archive format: one of "zip", "tar", "gztar",
"bztar", or "xztar". Or any other registered format.
'root_dir' is a directory that will be the root directory of the
archive; ie. we typically chdir into 'root_dir' before creating the
@ -866,10 +871,7 @@ def _ensure_directory(path):
def _unpack_zipfile(filename, extract_dir):
"""Unpack zip `filename` to `extract_dir`
"""
try:
import zipfile
except ImportError:
raise ReadError('zlib not supported, cannot unpack this archive.')
import zipfile # late import for breaking circular dependency
if not zipfile.is_zipfile(filename):
raise ReadError("%s is not a zip file" % filename)
@ -903,6 +905,7 @@ def _unpack_zipfile(filename, extract_dir):
def _unpack_tarfile(filename, extract_dir):
"""Unpack tar/tar.gz/tar.bz2/tar.xz `filename` to `extract_dir`
"""
import tarfile # late import for breaking circular dependency
try:
tarobj = tarfile.open(filename)
except tarfile.TarError:
@ -914,10 +917,13 @@ def _unpack_tarfile(filename, extract_dir):
tarobj.close()
_UNPACK_FORMATS = {
'gztar': (['.tar.gz', '.tgz'], _unpack_tarfile, [], "gzip'ed tar-file"),
'tar': (['.tar'], _unpack_tarfile, [], "uncompressed tar file"),
'zip': (['.zip'], _unpack_zipfile, [], "ZIP file")
}
'zip': (['.zip'], _unpack_zipfile, [], "ZIP file"),
}
if _ZLIB_SUPPORTED:
_UNPACK_FORMATS['gztar'] = (['.tar.gz', '.tgz'], _unpack_tarfile, [],
"gzip'ed tar-file")
if _BZ2_SUPPORTED:
_UNPACK_FORMATS['bztar'] = (['.tar.bz2', '.tbz2'], _unpack_tarfile, [],
@ -942,10 +948,10 @@ def unpack_archive(filename, extract_dir=None, format=None):
`extract_dir` is the name of the target directory, where the archive
is unpacked. If not provided, the current working directory is used.
`format` is the archive format: one of "zip", "tar", or "gztar". Or any
other registered format. If not provided, unpack_archive will use the
filename extension and see if an unpacker was registered for that
extension.
`format` is the archive format: one of "zip", "tar", "gztar", "bztar",
or "xztar". Or any other registered format. If not provided,
unpack_archive will use the filename extension and see if an unpacker
was registered for that extension.
In case none is found, a ValueError is raised.
"""

View File

@ -19,22 +19,11 @@ from shutil import (make_archive,
unregister_unpack_format, get_unpack_formats,
SameFileError)
import tarfile
import zipfile
import warnings
from test import support
from test.support import TESTFN, check_warnings, captured_stdout, requires_zlib
try:
import bz2
BZ2_SUPPORTED = True
except ImportError:
BZ2_SUPPORTED = False
try:
import lzma
LZMA_SUPPORTED = True
except ImportError:
LZMA_SUPPORTED = False
from test.support import TESTFN, check_warnings, captured_stdout
TESTFN2 = TESTFN + "2"
@ -45,12 +34,6 @@ try:
except ImportError:
UID_GID_SUPPORT = False
try:
import zipfile
ZIP_SUPPORT = True
except ImportError:
ZIP_SUPPORT = shutil.which('zip')
def _fake_rename(*args, **kwargs):
# Pretend the destination path is on a different filesystem.
raise OSError(getattr(errno, 'EXDEV', 18), "Invalid cross-device link")
@ -964,7 +947,7 @@ class TestShutil(unittest.TestCase):
self.assertEqual(getattr(file1_stat, 'st_flags'),
getattr(file2_stat, 'st_flags'))
@requires_zlib
@support.requires_zlib
def test_make_tarball(self):
# creating something to tar
root_dir, base_dir = self._create_files('')
@ -1020,7 +1003,7 @@ class TestShutil(unittest.TestCase):
write_file((root_dir, 'outer'), 'xxx')
return root_dir, base_dir
@requires_zlib
@support.requires_zlib
@unittest.skipUnless(shutil.which('tar'),
'Need the tar command to run')
def test_tarfile_vs_tar(self):
@ -1053,8 +1036,7 @@ class TestShutil(unittest.TestCase):
self.assertEqual(tarball, base_name + '.tar')
self.assertTrue(os.path.isfile(tarball))
@requires_zlib
@unittest.skipUnless(ZIP_SUPPORT, 'Need zip support to run')
@support.requires_zlib
def test_make_zipfile(self):
# creating something to zip
root_dir, base_dir = self._create_files()
@ -1091,8 +1073,7 @@ class TestShutil(unittest.TestCase):
['dist/', 'dist/sub/', 'dist/sub2/',
'dist/file1', 'dist/file2', 'dist/sub/file3'])
@requires_zlib
@unittest.skipUnless(ZIP_SUPPORT, 'Need zip support to run')
@support.requires_zlib
@unittest.skipUnless(shutil.which('zip'),
'Need the zip command to run')
def test_zipfile_vs_zip(self):
@ -1118,8 +1099,7 @@ class TestShutil(unittest.TestCase):
names2 = zf.namelist()
self.assertEqual(sorted(names), sorted(names2))
@requires_zlib
@unittest.skipUnless(ZIP_SUPPORT, 'Need zip support to run')
@support.requires_zlib
@unittest.skipUnless(shutil.which('unzip'),
'Need the unzip command to run')
def test_unzip_zipfile(self):
@ -1146,7 +1126,7 @@ class TestShutil(unittest.TestCase):
base_name = os.path.join(tmpdir, 'archive')
self.assertRaises(ValueError, make_archive, base_name, 'xxx')
@requires_zlib
@support.requires_zlib
def test_make_archive_owner_group(self):
# testing make_archive with owner and group, with various combinations
# this works even if there's not gid/uid support
@ -1174,7 +1154,7 @@ class TestShutil(unittest.TestCase):
self.assertTrue(os.path.isfile(res))
@requires_zlib
@support.requires_zlib
@unittest.skipUnless(UID_GID_SUPPORT, "Requires grp and pwd support")
def test_tarfile_root_owner(self):
root_dir, base_dir = self._create_files()
@ -1219,7 +1199,7 @@ class TestShutil(unittest.TestCase):
self.assertEqual(make_archive('test', 'tar'), 'test.tar')
self.assertTrue(os.path.isfile('test.tar'))
@requires_zlib
@support.requires_zlib
def test_make_zipfile_in_curdir(self):
# Issue #21280
root_dir = self.mkdtemp()
@ -1243,33 +1223,46 @@ class TestShutil(unittest.TestCase):
formats = [name for name, params in get_archive_formats()]
self.assertNotIn('xxx', formats)
@requires_zlib
def test_unpack_archive(self):
formats = ['tar', 'gztar', 'zip']
if BZ2_SUPPORTED:
formats.append('bztar')
if LZMA_SUPPORTED:
formats.append('xztar')
def check_unpack_archive(self, format):
root_dir, base_dir = self._create_files()
expected = rlistdir(root_dir)
expected.remove('outer')
for format in formats:
base_name = os.path.join(self.mkdtemp(), 'archive')
filename = make_archive(base_name, format, root_dir, base_dir)
# let's try to unpack it now
tmpdir2 = self.mkdtemp()
unpack_archive(filename, tmpdir2)
self.assertEqual(rlistdir(tmpdir2), expected)
base_name = os.path.join(self.mkdtemp(), 'archive')
filename = make_archive(base_name, format, root_dir, base_dir)
# let's try to unpack it now
tmpdir2 = self.mkdtemp()
unpack_archive(filename, tmpdir2)
self.assertEqual(rlistdir(tmpdir2), expected)
# and again, this time with the format specified
tmpdir3 = self.mkdtemp()
unpack_archive(filename, tmpdir3, format=format)
self.assertEqual(rlistdir(tmpdir3), expected)
# and again, this time with the format specified
tmpdir3 = self.mkdtemp()
unpack_archive(filename, tmpdir3, format=format)
self.assertEqual(rlistdir(tmpdir3), expected)
self.assertRaises(shutil.ReadError, unpack_archive, TESTFN)
self.assertRaises(ValueError, unpack_archive, TESTFN, format='xxx')
def test_unpack_archive_tar(self):
self.check_unpack_archive('tar')
@support.requires_zlib
def test_unpack_archive_gztar(self):
self.check_unpack_archive('gztar')
@support.requires_bz2
def test_unpack_archive_bztar(self):
self.check_unpack_archive('bztar')
@support.requires_lzma
def test_unpack_archive_xztar(self):
self.check_unpack_archive('xztar')
@support.requires_zlib
def test_unpack_archive_zip(self):
self.check_unpack_archive('zip')
def test_unpack_registry(self):
formats = get_unpack_formats()