mirror of https://github.com/python/cpython
bpo-21417: Add compresslevel= to the zipfile module (GH-5385)
This allows the compression level to be specified when writing zipfiles (for the entire file *and* overridden on a per-file basis). Contributed by Bo Bayles
This commit is contained in:
parent
f4d644f36f
commit
ce237c7d58
|
@ -130,10 +130,12 @@ ZipFile Objects
|
||||||
---------------
|
---------------
|
||||||
|
|
||||||
|
|
||||||
.. class:: ZipFile(file, mode='r', compression=ZIP_STORED, allowZip64=True)
|
.. class:: ZipFile(file, mode='r', compression=ZIP_STORED, allowZip64=True, \
|
||||||
|
compresslevel=None)
|
||||||
|
|
||||||
Open a ZIP file, where *file* can be a path to a file (a string), a
|
Open a ZIP file, where *file* can be a path to a file (a string), a
|
||||||
file-like object or a :term:`path-like object`.
|
file-like object or a :term:`path-like object`.
|
||||||
|
|
||||||
The *mode* parameter should be ``'r'`` to read an existing
|
The *mode* parameter should be ``'r'`` to read an existing
|
||||||
file, ``'w'`` to truncate and write a new file, ``'a'`` to append to an
|
file, ``'w'`` to truncate and write a new file, ``'a'`` to append to an
|
||||||
existing file, or ``'x'`` to exclusively create and write a new file.
|
existing file, or ``'x'`` to exclusively create and write a new file.
|
||||||
|
@ -145,16 +147,27 @@ ZipFile Objects
|
||||||
adding a ZIP archive to another file (such as :file:`python.exe`). If
|
adding a ZIP archive to another file (such as :file:`python.exe`). If
|
||||||
*mode* is ``'a'`` and the file does not exist at all, it is created.
|
*mode* is ``'a'`` and the file does not exist at all, it is created.
|
||||||
If *mode* is ``'r'`` or ``'a'``, the file should be seekable.
|
If *mode* is ``'r'`` or ``'a'``, the file should be seekable.
|
||||||
|
|
||||||
*compression* is the ZIP compression method to use when writing the archive,
|
*compression* is the ZIP compression method to use when writing the archive,
|
||||||
and should be :const:`ZIP_STORED`, :const:`ZIP_DEFLATED`,
|
and should be :const:`ZIP_STORED`, :const:`ZIP_DEFLATED`,
|
||||||
:const:`ZIP_BZIP2` or :const:`ZIP_LZMA`; unrecognized
|
:const:`ZIP_BZIP2` or :const:`ZIP_LZMA`; unrecognized
|
||||||
values will cause :exc:`NotImplementedError` to be raised. If :const:`ZIP_DEFLATED`,
|
values will cause :exc:`NotImplementedError` to be raised. If
|
||||||
:const:`ZIP_BZIP2` or :const:`ZIP_LZMA` is specified but the corresponding module
|
:const:`ZIP_DEFLATED`, :const:`ZIP_BZIP2` or :const:`ZIP_LZMA` is specified
|
||||||
(:mod:`zlib`, :mod:`bz2` or :mod:`lzma`) is not available, :exc:`RuntimeError`
|
but the corresponding module (:mod:`zlib`, :mod:`bz2` or :mod:`lzma`) is not
|
||||||
is raised. The default is :const:`ZIP_STORED`. If *allowZip64* is
|
available, :exc:`RuntimeError` is raised. The default is :const:`ZIP_STORED`.
|
||||||
``True`` (the default) zipfile will create ZIP files that use the ZIP64
|
|
||||||
extensions when the zipfile is larger than 4 GiB. If it is false :mod:`zipfile`
|
If *allowZip64* is ``True`` (the default) zipfile will create ZIP files that
|
||||||
will raise an exception when the ZIP file would require ZIP64 extensions.
|
use the ZIP64 extensions when the zipfile is larger than 4 GiB. If it is
|
||||||
|
``false`` :mod:`zipfile` will raise an exception when the ZIP file would
|
||||||
|
require ZIP64 extensions.
|
||||||
|
|
||||||
|
The *compresslevel* parameter controls the compression level to use when
|
||||||
|
writing files to the archive.
|
||||||
|
When using :const:`ZIP_STORED` or :const:`ZIP_LZMA` it has no effect.
|
||||||
|
When using :const:`ZIP_DEFLATED` integers ``0`` through ``9`` are accepted
|
||||||
|
(see :class:`zlib <zlib.compressobj>` for more information).
|
||||||
|
When using :const:`ZIP_BZIP2` integers ``1`` through ``9`` are accepted
|
||||||
|
(see :class:`bz2 <bz2.BZ2File>` for more information).
|
||||||
|
|
||||||
If the file is created with mode ``'w'``, ``'x'`` or ``'a'`` and then
|
If the file is created with mode ``'w'``, ``'x'`` or ``'a'`` and then
|
||||||
:meth:`closed <close>` without adding any files to the archive, the appropriate
|
:meth:`closed <close>` without adding any files to the archive, the appropriate
|
||||||
|
@ -187,6 +200,9 @@ ZipFile Objects
|
||||||
.. versionchanged:: 3.6.2
|
.. versionchanged:: 3.6.2
|
||||||
The *file* parameter accepts a :term:`path-like object`.
|
The *file* parameter accepts a :term:`path-like object`.
|
||||||
|
|
||||||
|
.. versionchanged:: 3.7
|
||||||
|
Add the *compresslevel* parameter.
|
||||||
|
|
||||||
|
|
||||||
.. method:: ZipFile.close()
|
.. method:: ZipFile.close()
|
||||||
|
|
||||||
|
@ -351,13 +367,15 @@ ZipFile Objects
|
||||||
:exc:`ValueError`. Previously, a :exc:`RuntimeError` was raised.
|
:exc:`ValueError`. Previously, a :exc:`RuntimeError` was raised.
|
||||||
|
|
||||||
|
|
||||||
.. method:: ZipFile.write(filename, arcname=None, compress_type=None)
|
.. method:: ZipFile.write(filename, arcname=None, compress_type=None, \
|
||||||
|
compresslevel=None)
|
||||||
|
|
||||||
Write the file named *filename* to the archive, giving it the archive name
|
Write the file named *filename* to the archive, giving it the archive name
|
||||||
*arcname* (by default, this will be the same as *filename*, but without a drive
|
*arcname* (by default, this will be the same as *filename*, but without a drive
|
||||||
letter and with leading path separators removed). If given, *compress_type*
|
letter and with leading path separators removed). If given, *compress_type*
|
||||||
overrides the value given for the *compression* parameter to the constructor for
|
overrides the value given for the *compression* parameter to the constructor for
|
||||||
the new entry.
|
the new entry. Similarly, *compresslevel* will override the constructor if
|
||||||
|
given.
|
||||||
The archive must be open with mode ``'w'``, ``'x'`` or ``'a'``.
|
The archive must be open with mode ``'w'``, ``'x'`` or ``'a'``.
|
||||||
|
|
||||||
.. note::
|
.. note::
|
||||||
|
@ -383,7 +401,8 @@ ZipFile Objects
|
||||||
a :exc:`RuntimeError` was raised.
|
a :exc:`RuntimeError` was raised.
|
||||||
|
|
||||||
|
|
||||||
.. method:: ZipFile.writestr(zinfo_or_arcname, data[, compress_type])
|
.. method:: ZipFile.writestr(zinfo_or_arcname, data, compress_type=None, \
|
||||||
|
compresslevel=None)
|
||||||
|
|
||||||
Write the string *data* to the archive; *zinfo_or_arcname* is either the file
|
Write the string *data* to the archive; *zinfo_or_arcname* is either the file
|
||||||
name it will be given in the archive, or a :class:`ZipInfo` instance. If it's
|
name it will be given in the archive, or a :class:`ZipInfo` instance. If it's
|
||||||
|
@ -393,7 +412,8 @@ ZipFile Objects
|
||||||
|
|
||||||
If given, *compress_type* overrides the value given for the *compression*
|
If given, *compress_type* overrides the value given for the *compression*
|
||||||
parameter to the constructor for the new entry, or in the *zinfo_or_arcname*
|
parameter to the constructor for the new entry, or in the *zinfo_or_arcname*
|
||||||
(if that is a :class:`ZipInfo` instance).
|
(if that is a :class:`ZipInfo` instance). Similarly, *compresslevel* will
|
||||||
|
override the constructor if given.
|
||||||
|
|
||||||
.. note::
|
.. note::
|
||||||
|
|
||||||
|
|
|
@ -53,9 +53,10 @@ class AbstractTestsWithSourceFile:
|
||||||
with open(TESTFN, "wb") as fp:
|
with open(TESTFN, "wb") as fp:
|
||||||
fp.write(self.data)
|
fp.write(self.data)
|
||||||
|
|
||||||
def make_test_archive(self, f, compression):
|
def make_test_archive(self, f, compression, compresslevel=None):
|
||||||
|
kwargs = {'compression': compression, 'compresslevel': compresslevel}
|
||||||
# Create the ZIP archive
|
# Create the ZIP archive
|
||||||
with zipfile.ZipFile(f, "w", compression) as zipfp:
|
with zipfile.ZipFile(f, "w", **kwargs) as zipfp:
|
||||||
zipfp.write(TESTFN, "another.name")
|
zipfp.write(TESTFN, "another.name")
|
||||||
zipfp.write(TESTFN, TESTFN)
|
zipfp.write(TESTFN, TESTFN)
|
||||||
zipfp.writestr("strfile", self.data)
|
zipfp.writestr("strfile", self.data)
|
||||||
|
@ -63,8 +64,8 @@ class AbstractTestsWithSourceFile:
|
||||||
for line in self.line_gen:
|
for line in self.line_gen:
|
||||||
f.write(line)
|
f.write(line)
|
||||||
|
|
||||||
def zip_test(self, f, compression):
|
def zip_test(self, f, compression, compresslevel=None):
|
||||||
self.make_test_archive(f, compression)
|
self.make_test_archive(f, compression, compresslevel)
|
||||||
|
|
||||||
# Read the ZIP archive
|
# Read the ZIP archive
|
||||||
with zipfile.ZipFile(f, "r", compression) as zipfp:
|
with zipfile.ZipFile(f, "r", compression) as zipfp:
|
||||||
|
@ -297,6 +298,22 @@ class AbstractTestsWithSourceFile:
|
||||||
info = zipfp.getinfo('b.txt')
|
info = zipfp.getinfo('b.txt')
|
||||||
self.assertEqual(info.compress_type, self.compression)
|
self.assertEqual(info.compress_type, self.compression)
|
||||||
|
|
||||||
|
def test_writestr_compresslevel(self):
|
||||||
|
zipfp = zipfile.ZipFile(TESTFN2, "w", compresslevel=1)
|
||||||
|
zipfp.writestr("a.txt", "hello world", compress_type=self.compression)
|
||||||
|
zipfp.writestr("b.txt", "hello world", compress_type=self.compression,
|
||||||
|
compresslevel=2)
|
||||||
|
|
||||||
|
# Compression level follows the constructor.
|
||||||
|
a_info = zipfp.getinfo('a.txt')
|
||||||
|
self.assertEqual(a_info.compress_type, self.compression)
|
||||||
|
self.assertEqual(a_info._compresslevel, 1)
|
||||||
|
|
||||||
|
# Compression level is overridden.
|
||||||
|
b_info = zipfp.getinfo('b.txt')
|
||||||
|
self.assertEqual(b_info.compress_type, self.compression)
|
||||||
|
self.assertEqual(b_info._compresslevel, 2)
|
||||||
|
|
||||||
def test_read_return_size(self):
|
def test_read_return_size(self):
|
||||||
# Issue #9837: ZipExtFile.read() shouldn't return more bytes
|
# Issue #9837: ZipExtFile.read() shouldn't return more bytes
|
||||||
# than requested.
|
# than requested.
|
||||||
|
@ -370,6 +387,21 @@ class AbstractTestsWithSourceFile:
|
||||||
self.assertIn('[closed]', repr(zipopen))
|
self.assertIn('[closed]', repr(zipopen))
|
||||||
self.assertIn('[closed]', repr(zipfp))
|
self.assertIn('[closed]', repr(zipfp))
|
||||||
|
|
||||||
|
def test_compresslevel_basic(self):
|
||||||
|
for f in get_files(self):
|
||||||
|
self.zip_test(f, self.compression, compresslevel=9)
|
||||||
|
|
||||||
|
def test_per_file_compresslevel(self):
|
||||||
|
"""Check that files within a Zip archive can have different
|
||||||
|
compression levels."""
|
||||||
|
with zipfile.ZipFile(TESTFN2, "w", compresslevel=1) as zipfp:
|
||||||
|
zipfp.write(TESTFN, 'compress_1')
|
||||||
|
zipfp.write(TESTFN, 'compress_9', compresslevel=9)
|
||||||
|
one_info = zipfp.getinfo('compress_1')
|
||||||
|
nine_info = zipfp.getinfo('compress_9')
|
||||||
|
self.assertEqual(one_info._compresslevel, 1)
|
||||||
|
self.assertEqual(nine_info._compresslevel, 9)
|
||||||
|
|
||||||
def tearDown(self):
|
def tearDown(self):
|
||||||
unlink(TESTFN)
|
unlink(TESTFN)
|
||||||
unlink(TESTFN2)
|
unlink(TESTFN2)
|
||||||
|
|
|
@ -295,6 +295,7 @@ class ZipInfo (object):
|
||||||
'filename',
|
'filename',
|
||||||
'date_time',
|
'date_time',
|
||||||
'compress_type',
|
'compress_type',
|
||||||
|
'_compresslevel',
|
||||||
'comment',
|
'comment',
|
||||||
'extra',
|
'extra',
|
||||||
'create_system',
|
'create_system',
|
||||||
|
@ -334,6 +335,7 @@ class ZipInfo (object):
|
||||||
|
|
||||||
# Standard values:
|
# Standard values:
|
||||||
self.compress_type = ZIP_STORED # Type of compression for the file
|
self.compress_type = ZIP_STORED # Type of compression for the file
|
||||||
|
self._compresslevel = None # Level for the compressor
|
||||||
self.comment = b"" # Comment for each file
|
self.comment = b"" # Comment for each file
|
||||||
self.extra = b"" # ZIP extra data
|
self.extra = b"" # ZIP extra data
|
||||||
if sys.platform == 'win32':
|
if sys.platform == 'win32':
|
||||||
|
@ -654,12 +656,16 @@ def _check_compression(compression):
|
||||||
raise NotImplementedError("That compression method is not supported")
|
raise NotImplementedError("That compression method is not supported")
|
||||||
|
|
||||||
|
|
||||||
def _get_compressor(compress_type):
|
def _get_compressor(compress_type, compresslevel=None):
|
||||||
if compress_type == ZIP_DEFLATED:
|
if compress_type == ZIP_DEFLATED:
|
||||||
return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
|
if compresslevel is not None:
|
||||||
zlib.DEFLATED, -15)
|
return zlib.compressobj(compresslevel, zlib.DEFLATED, -15)
|
||||||
|
return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15)
|
||||||
elif compress_type == ZIP_BZIP2:
|
elif compress_type == ZIP_BZIP2:
|
||||||
|
if compresslevel is not None:
|
||||||
|
return bz2.BZ2Compressor(compresslevel)
|
||||||
return bz2.BZ2Compressor()
|
return bz2.BZ2Compressor()
|
||||||
|
# compresslevel is ignored for ZIP_LZMA
|
||||||
elif compress_type == ZIP_LZMA:
|
elif compress_type == ZIP_LZMA:
|
||||||
return LZMACompressor()
|
return LZMACompressor()
|
||||||
else:
|
else:
|
||||||
|
@ -963,7 +969,8 @@ class _ZipWriteFile(io.BufferedIOBase):
|
||||||
self._zinfo = zinfo
|
self._zinfo = zinfo
|
||||||
self._zip64 = zip64
|
self._zip64 = zip64
|
||||||
self._zipfile = zf
|
self._zipfile = zf
|
||||||
self._compressor = _get_compressor(zinfo.compress_type)
|
self._compressor = _get_compressor(zinfo.compress_type,
|
||||||
|
zinfo._compresslevel)
|
||||||
self._file_size = 0
|
self._file_size = 0
|
||||||
self._compress_size = 0
|
self._compress_size = 0
|
||||||
self._crc = 0
|
self._crc = 0
|
||||||
|
@ -1035,7 +1042,8 @@ class _ZipWriteFile(io.BufferedIOBase):
|
||||||
class ZipFile:
|
class ZipFile:
|
||||||
""" Class with methods to open, read, write, close, list zip files.
|
""" Class with methods to open, read, write, close, list zip files.
|
||||||
|
|
||||||
z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True)
|
z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True,
|
||||||
|
compresslevel=None)
|
||||||
|
|
||||||
file: Either the path to the file, or a file-like object.
|
file: Either the path to the file, or a file-like object.
|
||||||
If it is a path, the file will be opened and closed by ZipFile.
|
If it is a path, the file will be opened and closed by ZipFile.
|
||||||
|
@ -1046,13 +1054,19 @@ class ZipFile:
|
||||||
allowZip64: if True ZipFile will create files with ZIP64 extensions when
|
allowZip64: if True ZipFile will create files with ZIP64 extensions when
|
||||||
needed, otherwise it will raise an exception when this would
|
needed, otherwise it will raise an exception when this would
|
||||||
be necessary.
|
be necessary.
|
||||||
|
compresslevel: None (default for the given compression type) or an integer
|
||||||
|
specifying the level to pass to the compressor.
|
||||||
|
When using ZIP_STORED or ZIP_LZMA this keyword has no effect.
|
||||||
|
When using ZIP_DEFLATED integers 0 through 9 are accepted.
|
||||||
|
When using ZIP_BZIP2 integers 1 through 9 are accepted.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
fp = None # Set here since __del__ checks it
|
fp = None # Set here since __del__ checks it
|
||||||
_windows_illegal_name_trans_table = None
|
_windows_illegal_name_trans_table = None
|
||||||
|
|
||||||
def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True):
|
def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
|
||||||
|
compresslevel=None):
|
||||||
"""Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
|
"""Open the ZIP file with mode read 'r', write 'w', exclusive create 'x',
|
||||||
or append 'a'."""
|
or append 'a'."""
|
||||||
if mode not in ('r', 'w', 'x', 'a'):
|
if mode not in ('r', 'w', 'x', 'a'):
|
||||||
|
@ -1066,6 +1080,7 @@ class ZipFile:
|
||||||
self.NameToInfo = {} # Find file info given name
|
self.NameToInfo = {} # Find file info given name
|
||||||
self.filelist = [] # List of ZipInfo instances for archive
|
self.filelist = [] # List of ZipInfo instances for archive
|
||||||
self.compression = compression # Method of compression
|
self.compression = compression # Method of compression
|
||||||
|
self.compresslevel = compresslevel
|
||||||
self.mode = mode
|
self.mode = mode
|
||||||
self.pwd = None
|
self.pwd = None
|
||||||
self._comment = b''
|
self._comment = b''
|
||||||
|
@ -1342,6 +1357,7 @@ class ZipFile:
|
||||||
elif mode == 'w':
|
elif mode == 'w':
|
||||||
zinfo = ZipInfo(name)
|
zinfo = ZipInfo(name)
|
||||||
zinfo.compress_type = self.compression
|
zinfo.compress_type = self.compression
|
||||||
|
zinfo._compresslevel = self.compresslevel
|
||||||
else:
|
else:
|
||||||
# Get info object for name
|
# Get info object for name
|
||||||
zinfo = self.getinfo(name)
|
zinfo = self.getinfo(name)
|
||||||
|
@ -1575,7 +1591,8 @@ class ZipFile:
|
||||||
raise LargeZipFile(requires_zip64 +
|
raise LargeZipFile(requires_zip64 +
|
||||||
" would require ZIP64 extensions")
|
" would require ZIP64 extensions")
|
||||||
|
|
||||||
def write(self, filename, arcname=None, compress_type=None):
|
def write(self, filename, arcname=None,
|
||||||
|
compress_type=None, compresslevel=None):
|
||||||
"""Put the bytes from filename into the archive under the name
|
"""Put the bytes from filename into the archive under the name
|
||||||
arcname."""
|
arcname."""
|
||||||
if not self.fp:
|
if not self.fp:
|
||||||
|
@ -1597,6 +1614,11 @@ class ZipFile:
|
||||||
else:
|
else:
|
||||||
zinfo.compress_type = self.compression
|
zinfo.compress_type = self.compression
|
||||||
|
|
||||||
|
if compresslevel is not None:
|
||||||
|
zinfo._compresslevel = compresslevel
|
||||||
|
else:
|
||||||
|
zinfo._compresslevel = self.compresslevel
|
||||||
|
|
||||||
if zinfo.is_dir():
|
if zinfo.is_dir():
|
||||||
with self._lock:
|
with self._lock:
|
||||||
if self._seekable:
|
if self._seekable:
|
||||||
|
@ -1617,7 +1639,8 @@ class ZipFile:
|
||||||
with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
|
with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
|
||||||
shutil.copyfileobj(src, dest, 1024*8)
|
shutil.copyfileobj(src, dest, 1024*8)
|
||||||
|
|
||||||
def writestr(self, zinfo_or_arcname, data, compress_type=None):
|
def writestr(self, zinfo_or_arcname, data,
|
||||||
|
compress_type=None, compresslevel=None):
|
||||||
"""Write a file into the archive. The contents is 'data', which
|
"""Write a file into the archive. The contents is 'data', which
|
||||||
may be either a 'str' or a 'bytes' instance; if it is a 'str',
|
may be either a 'str' or a 'bytes' instance; if it is a 'str',
|
||||||
it is encoded as UTF-8 first.
|
it is encoded as UTF-8 first.
|
||||||
|
@ -1629,6 +1652,7 @@ class ZipFile:
|
||||||
zinfo = ZipInfo(filename=zinfo_or_arcname,
|
zinfo = ZipInfo(filename=zinfo_or_arcname,
|
||||||
date_time=time.localtime(time.time())[:6])
|
date_time=time.localtime(time.time())[:6])
|
||||||
zinfo.compress_type = self.compression
|
zinfo.compress_type = self.compression
|
||||||
|
zinfo._compresslevel = self.compresslevel
|
||||||
if zinfo.filename[-1] == '/':
|
if zinfo.filename[-1] == '/':
|
||||||
zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x
|
zinfo.external_attr = 0o40775 << 16 # drwxrwxr-x
|
||||||
zinfo.external_attr |= 0x10 # MS-DOS directory flag
|
zinfo.external_attr |= 0x10 # MS-DOS directory flag
|
||||||
|
@ -1648,6 +1672,9 @@ class ZipFile:
|
||||||
if compress_type is not None:
|
if compress_type is not None:
|
||||||
zinfo.compress_type = compress_type
|
zinfo.compress_type = compress_type
|
||||||
|
|
||||||
|
if compresslevel is not None:
|
||||||
|
zinfo._compresslevel = compresslevel
|
||||||
|
|
||||||
zinfo.file_size = len(data) # Uncompressed size
|
zinfo.file_size = len(data) # Uncompressed size
|
||||||
with self._lock:
|
with self._lock:
|
||||||
with self.open(zinfo, mode='w') as dest:
|
with self.open(zinfo, mode='w') as dest:
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
Added support for setting the compression level for zipfile.ZipFile.
|
Loading…
Reference in New Issue