Issue #14371: Support bzip2 in zipfile module.

Patch by Serhiy Storchaka.
This commit is contained in:
Martin v. Löwis 2012-05-01 07:58:44 +02:00
parent 9acbb6074f
commit f6b16a4b50
5 changed files with 334 additions and 98 deletions

View File

@ -87,7 +87,22 @@ The module defines the following items:
.. data:: ZIP_DEFLATED .. data:: ZIP_DEFLATED
The numeric constant for the usual ZIP compression method. This requires the The numeric constant for the usual ZIP compression method. This requires the
zlib module. No other compression methods are currently supported. zlib module.
.. data:: ZIP_BZIP2
The numeric constant for the BZIP2 compression method. This requires the
bz2 module.
.. versionadded:: 3.3
.. note::
The ZIP file format specification has included support for bzip2 compression
since 2001. However, some tools (including older Python releases) do not
support it, and may either refuse to process the ZIP file altogether, or
fail to extract individual files.
.. seealso:: .. seealso::
@ -118,9 +133,11 @@ ZipFile Objects
adding a ZIP archive to another file (such as :file:`python.exe`). If adding a ZIP archive to another file (such as :file:`python.exe`). If
*mode* is ``a`` and the file does not exist at all, it is created. *mode* is ``a`` and the file does not exist at all, it is created.
*compression* is the ZIP compression method to use when writing the archive, *compression* is the ZIP compression method to use when writing the archive,
and should be :const:`ZIP_STORED` or :const:`ZIP_DEFLATED`; unrecognized and should be :const:`ZIP_STORED`, :const:`ZIP_DEFLATED`; or
values will cause :exc:`RuntimeError` to be raised. If :const:`ZIP_DEFLATED` :const:`ZIP_DEFLATED`; unrecognized
is specified but the :mod:`zlib` module is not available, :exc:`RuntimeError` values will cause :exc:`RuntimeError` to be raised. If :const:`ZIP_DEFLATED` or
:const:`ZIP_BZIP2` is specified but the corresponded module
(:mod:`zlib` or :mod:`bz2`) is not available, :exc:`RuntimeError`
is also raised. The default is :const:`ZIP_STORED`. If *allowZip64* is is also raised. The default is :const:`ZIP_STORED`. If *allowZip64* is
``True`` zipfile will create ZIP files that use the ZIP64 extensions when ``True`` zipfile will create ZIP files that use the ZIP64 extensions when
the zipfile is larger than 2 GB. If it is false (the default) :mod:`zipfile` the zipfile is larger than 2 GB. If it is false (the default) :mod:`zipfile`
@ -143,6 +160,9 @@ ZipFile Objects
.. versionadded:: 3.2 .. versionadded:: 3.2
Added the ability to use :class:`ZipFile` as a context manager. Added the ability to use :class:`ZipFile` as a context manager.
.. versionchanged:: 3.3
Added support for :mod:`bzip2` compression.
.. method:: ZipFile.close() .. method:: ZipFile.close()

View File

@ -40,6 +40,11 @@ try:
except ImportError: except ImportError:
zlib = None zlib = None
try:
import bz2
except ImportError:
bz2 = None
__all__ = [ __all__ = [
"Error", "TestFailed", "ResourceDenied", "import_module", "Error", "TestFailed", "ResourceDenied", "import_module",
"verbose", "use_resources", "max_memuse", "record_original_stdout", "verbose", "use_resources", "max_memuse", "record_original_stdout",
@ -57,7 +62,7 @@ __all__ = [
"get_attribute", "swap_item", "swap_attr", "requires_IEEE_754", "get_attribute", "swap_item", "swap_attr", "requires_IEEE_754",
"TestHandler", "Matcher", "can_symlink", "skip_unless_symlink", "TestHandler", "Matcher", "can_symlink", "skip_unless_symlink",
"import_fresh_module", "requires_zlib", "PIPE_MAX_SIZE", "failfast", "import_fresh_module", "requires_zlib", "PIPE_MAX_SIZE", "failfast",
"anticipate_failure", "run_with_tz" "anticipate_failure", "run_with_tz", "requires_bz2"
] ]
class Error(Exception): class Error(Exception):
@ -506,6 +511,8 @@ requires_IEEE_754 = unittest.skipUnless(
requires_zlib = unittest.skipUnless(zlib, 'requires zlib') requires_zlib = unittest.skipUnless(zlib, 'requires zlib')
requires_bz2 = unittest.skipUnless(bz2, 'requires bz2')
is_jython = sys.platform.startswith('java') is_jython = sys.platform.startswith('java')
# Filename used for testing # Filename used for testing

View File

@ -13,7 +13,7 @@ from tempfile import TemporaryFile
from random import randint, random from random import randint, random
from unittest import skipUnless from unittest import skipUnless
from test.support import TESTFN, run_unittest, findfile, unlink, requires_zlib from test.support import TESTFN, run_unittest, findfile, unlink, requires_zlib, requires_bz2
TESTFN2 = TESTFN + "2" TESTFN2 = TESTFN + "2"
TESTFNDIR = TESTFN + "d" TESTFNDIR = TESTFN + "d"
@ -313,6 +313,54 @@ class TestsWithSourceFile(unittest.TestCase):
self.assertEqual(openobj.read(1), b'1') self.assertEqual(openobj.read(1), b'1')
self.assertEqual(openobj.read(1), b'2') self.assertEqual(openobj.read(1), b'2')
@requires_bz2
def test_bzip2(self):
for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
self.zip_test(f, zipfile.ZIP_BZIP2)
@requires_bz2
def test_open_bzip2(self):
for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
self.zip_open_test(f, zipfile.ZIP_BZIP2)
@requires_bz2
def test_random_open_bzip2(self):
for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
self.zip_random_open_test(f, zipfile.ZIP_BZIP2)
@requires_bz2
def test_readline_read_bzip2(self):
for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
self.zip_readline_read_test(f, zipfile.ZIP_BZIP2)
@requires_bz2
def test_readline_bzip2(self):
for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
self.zip_readline_test(f, zipfile.ZIP_BZIP2)
@requires_bz2
def test_readlines_bzip2(self):
for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
self.zip_readlines_test(f, zipfile.ZIP_BZIP2)
@requires_bz2
def test_iterlines_bzip2(self):
for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
self.zip_iterlines_test(f, zipfile.ZIP_BZIP2)
@requires_bz2
def test_low_compression_bzip2(self):
"""Check for cases where compressed data is larger than original."""
# Create the ZIP archive
with zipfile.ZipFile(TESTFN2, "w", zipfile.ZIP_BZIP2) as zipfp:
zipfp.writestr("strfile", '12')
# Get an open object for strfile
with zipfile.ZipFile(TESTFN2, "r", zipfile.ZIP_BZIP2) as zipfp:
with zipfp.open("strfile") as openobj:
self.assertEqual(openobj.read(1), b'1')
self.assertEqual(openobj.read(1), b'2')
def test_absolute_arcnames(self): def test_absolute_arcnames(self):
with zipfile.ZipFile(TESTFN2, "w", zipfile.ZIP_STORED) as zipfp: with zipfile.ZipFile(TESTFN2, "w", zipfile.ZIP_STORED) as zipfp:
zipfp.write(TESTFN, "/absolute") zipfp.write(TESTFN, "/absolute")
@ -453,6 +501,13 @@ class TestsWithSourceFile(unittest.TestCase):
info = zipfp.getinfo('b.txt') info = zipfp.getinfo('b.txt')
self.assertEqual(info.compress_type, zipfile.ZIP_DEFLATED) self.assertEqual(info.compress_type, zipfile.ZIP_DEFLATED)
@requires_bz2
def test_writestr_compression_bzip2(self):
zipfp = zipfile.ZipFile(TESTFN2, "w")
zipfp.writestr("b.txt", "hello world", compress_type=zipfile.ZIP_BZIP2)
info = zipfp.getinfo('b.txt')
self.assertEqual(info.compress_type, zipfile.ZIP_BZIP2)
def zip_test_writestr_permissions(self, f, compression): def zip_test_writestr_permissions(self, f, compression):
# Make sure that writestr creates files with mode 0600, # Make sure that writestr creates files with mode 0600,
# when it is passed a name rather than a ZipInfo instance. # when it is passed a name rather than a ZipInfo instance.
@ -626,6 +681,11 @@ class TestZip64InSmallFiles(unittest.TestCase):
for f in (TESTFN2, TemporaryFile(), io.BytesIO()): for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
self.zip_test(f, zipfile.ZIP_DEFLATED) self.zip_test(f, zipfile.ZIP_DEFLATED)
@requires_bz2
def test_bzip2(self):
for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
self.zip_test(f, zipfile.ZIP_BZIP2)
def test_absolute_arcnames(self): def test_absolute_arcnames(self):
with zipfile.ZipFile(TESTFN2, "w", zipfile.ZIP_STORED, with zipfile.ZipFile(TESTFN2, "w", zipfile.ZIP_STORED,
allowZip64=True) as zipfp: allowZip64=True) as zipfp:
@ -754,6 +814,18 @@ class OtherTests(unittest.TestCase):
b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x80\x01\x00\x00\x00' b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x80\x01\x00\x00\x00'
b'\x00afilePK\x05\x06\x00\x00\x00\x00\x01\x00' b'\x00afilePK\x05\x06\x00\x00\x00\x00\x01\x00'
b'\x01\x003\x00\x00\x003\x00\x00\x00\x00\x00'), b'\x01\x003\x00\x00\x003\x00\x00\x00\x00\x00'),
zipfile.ZIP_BZIP2: (
b'PK\x03\x04\x14\x03\x00\x00\x0c\x00nu\x0c=FA'
b'KE8\x00\x00\x00n\x00\x00\x00\x05\x00\x00\x00af'
b'ileBZh91AY&SY\xd4\xa8\xca'
b'\x7f\x00\x00\x0f\x11\x80@\x00\x06D\x90\x80 \x00 \xa5'
b'P\xd9!\x03\x03\x13\x13\x13\x89\xa9\xa9\xc2u5:\x9f'
b'\x8b\xb9"\x9c(HjTe?\x80PK\x01\x02\x14'
b'\x03\x14\x03\x00\x00\x0c\x00nu\x0c=FAKE8'
b'\x00\x00\x00n\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00'
b'\x00 \x80\x80\x81\x00\x00\x00\x00afilePK'
b'\x05\x06\x00\x00\x00\x00\x01\x00\x01\x003\x00\x00\x00[\x00'
b'\x00\x00\x00\x00'),
} }
def test_unicode_filenames(self): def test_unicode_filenames(self):
@ -1007,6 +1079,10 @@ class OtherTests(unittest.TestCase):
def test_testzip_with_bad_crc_deflated(self): def test_testzip_with_bad_crc_deflated(self):
self.check_testzip_with_bad_crc(zipfile.ZIP_DEFLATED) self.check_testzip_with_bad_crc(zipfile.ZIP_DEFLATED)
@requires_bz2
def test_testzip_with_bad_crc_bzip2(self):
self.check_testzip_with_bad_crc(zipfile.ZIP_BZIP2)
def check_read_with_bad_crc(self, compression): def check_read_with_bad_crc(self, compression):
"""Tests that files with bad CRCs raise a BadZipFile exception when read.""" """Tests that files with bad CRCs raise a BadZipFile exception when read."""
zipdata = self.zips_with_bad_crc[compression] zipdata = self.zips_with_bad_crc[compression]
@ -1035,6 +1111,10 @@ class OtherTests(unittest.TestCase):
def test_read_with_bad_crc_deflated(self): def test_read_with_bad_crc_deflated(self):
self.check_read_with_bad_crc(zipfile.ZIP_DEFLATED) self.check_read_with_bad_crc(zipfile.ZIP_DEFLATED)
@requires_bz2
def test_read_with_bad_crc_bzip2(self):
self.check_read_with_bad_crc(zipfile.ZIP_BZIP2)
def check_read_return_size(self, compression): def check_read_return_size(self, compression):
# Issue #9837: ZipExtFile.read() shouldn't return more bytes # Issue #9837: ZipExtFile.read() shouldn't return more bytes
# than requested. # than requested.
@ -1055,6 +1135,10 @@ class OtherTests(unittest.TestCase):
def test_read_return_size_deflated(self): def test_read_return_size_deflated(self):
self.check_read_return_size(zipfile.ZIP_DEFLATED) self.check_read_return_size(zipfile.ZIP_DEFLATED)
@requires_bz2
def test_read_return_size_bzip2(self):
self.check_read_return_size(zipfile.ZIP_BZIP2)
def test_empty_zipfile(self): def test_empty_zipfile(self):
# Check that creating a file in 'w' or 'a' mode and closing without # Check that creating a file in 'w' or 'a' mode and closing without
# adding any files to the archives creates a valid empty ZIP file # adding any files to the archives creates a valid empty ZIP file
@ -1196,6 +1280,11 @@ class TestsWithRandomBinaryFiles(unittest.TestCase):
for f in (TESTFN2, TemporaryFile(), io.BytesIO()): for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
self.zip_test(f, zipfile.ZIP_DEFLATED) self.zip_test(f, zipfile.ZIP_DEFLATED)
@requires_bz2
def test_bzip2(self):
for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
self.zip_test(f, zipfile.ZIP_BZIP2)
def zip_open_test(self, f, compression): def zip_open_test(self, f, compression):
self.make_test_archive(f, compression) self.make_test_archive(f, compression)
@ -1236,6 +1325,11 @@ class TestsWithRandomBinaryFiles(unittest.TestCase):
for f in (TESTFN2, TemporaryFile(), io.BytesIO()): for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
self.zip_open_test(f, zipfile.ZIP_DEFLATED) self.zip_open_test(f, zipfile.ZIP_DEFLATED)
@requires_bz2
def test_open_bzip2(self):
for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
self.zip_open_test(f, zipfile.ZIP_BZIP2)
def zip_random_open_test(self, f, compression): def zip_random_open_test(self, f, compression):
self.make_test_archive(f, compression) self.make_test_archive(f, compression)
@ -1264,6 +1358,11 @@ class TestsWithRandomBinaryFiles(unittest.TestCase):
for f in (TESTFN2, TemporaryFile(), io.BytesIO()): for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
self.zip_random_open_test(f, zipfile.ZIP_DEFLATED) self.zip_random_open_test(f, zipfile.ZIP_DEFLATED)
@requires_bz2
def test_random_open_bzip2(self):
for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
self.zip_random_open_test(f, zipfile.ZIP_BZIP2)
@requires_zlib @requires_zlib
class TestsWithMultipleOpens(unittest.TestCase): class TestsWithMultipleOpens(unittest.TestCase):
@ -1483,6 +1582,31 @@ class UniversalNewlineTests(unittest.TestCase):
for f in (TESTFN2, TemporaryFile(), io.BytesIO()): for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
self.iterlines_test(f, zipfile.ZIP_DEFLATED) self.iterlines_test(f, zipfile.ZIP_DEFLATED)
@requires_bz2
def test_read_bzip2(self):
for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
self.read_test(f, zipfile.ZIP_BZIP2)
@requires_bz2
def test_readline_read_bzip2(self):
for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
self.readline_read_test(f, zipfile.ZIP_BZIP2)
@requires_bz2
def test_readline_bzip2(self):
for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
self.readline_test(f, zipfile.ZIP_BZIP2)
@requires_bz2
def test_readlines_bzip2(self):
for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
self.readlines_test(f, zipfile.ZIP_BZIP2)
@requires_bz2
def test_iterlines_bzip2(self):
for f in (TESTFN2, TemporaryFile(), io.BytesIO()):
self.iterlines_test(f, zipfile.ZIP_BZIP2)
def tearDown(self): def tearDown(self):
for sep, fn in self.arcfiles.items(): for sep, fn in self.arcfiles.items():
os.remove(fn) os.remove(fn)

View File

@ -22,7 +22,13 @@ except ImportError:
zlib = None zlib = None
crc32 = binascii.crc32 crc32 = binascii.crc32
__all__ = ["BadZipFile", "BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", try:
import bz2 # We may need its compression method
except ImportError:
bz2 = None
__all__ = ["BadZipFile", "BadZipfile", "error",
"ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2"
"is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"] "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"]
class BadZipFile(Exception): class BadZipFile(Exception):
@ -45,8 +51,13 @@ ZIP_MAX_COMMENT = (1 << 16) - 1
# constants for Zip file compression methods # constants for Zip file compression methods
ZIP_STORED = 0 ZIP_STORED = 0
ZIP_DEFLATED = 8 ZIP_DEFLATED = 8
ZIP_BZIP2 = 12
# Other ZIP compression methods not supported # Other ZIP compression methods not supported
DEFAULT_VERSION = 20
ZIP64_VERSION = 45
BZIP2_VERSION = 46
# Below are some formats and associated data for reading/writing headers using # Below are some formats and associated data for reading/writing headers using
# the struct module. The names and structures of headers/records are those used # the struct module. The names and structures of headers/records are those used
# in the PKWARE description of the ZIP file format: # in the PKWARE description of the ZIP file format:
@ -313,8 +324,8 @@ class ZipInfo (object):
else: else:
# Assume everything else is unix-y # Assume everything else is unix-y
self.create_system = 3 # System which created ZIP archive self.create_system = 3 # System which created ZIP archive
self.create_version = 20 # Version which created ZIP archive self.create_version = DEFAULT_VERSION # Version which created ZIP archive
self.extract_version = 20 # Version needed to extract archive self.extract_version = DEFAULT_VERSION # Version needed to extract archive
self.reserved = 0 # Must be zero self.reserved = 0 # Must be zero
self.flag_bits = 0 # ZIP flag bits self.flag_bits = 0 # ZIP flag bits
self.volume = 0 # Volume number of file header self.volume = 0 # Volume number of file header
@ -341,6 +352,7 @@ class ZipInfo (object):
extra = self.extra extra = self.extra
min_version = 0
if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT: if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
# File is larger than what fits into a 4 byte integer, # File is larger than what fits into a 4 byte integer,
# fall back to the ZIP64 extension # fall back to the ZIP64 extension
@ -349,9 +361,13 @@ class ZipInfo (object):
1, struct.calcsize(fmt)-4, file_size, compress_size) 1, struct.calcsize(fmt)-4, file_size, compress_size)
file_size = 0xffffffff file_size = 0xffffffff
compress_size = 0xffffffff compress_size = 0xffffffff
self.extract_version = max(45, self.extract_version) min_version = ZIP64_VERSION
self.create_version = max(45, self.extract_version)
if self.compress_type == ZIP_BZIP2:
min_version = max(BZIP2_VERSION, min_version)
self.extract_version = max(min_version, self.extract_version)
self.create_version = max(min_version, self.create_version)
filename, flag_bits = self._encodeFilenameFlags() filename, flag_bits = self._encodeFilenameFlags()
header = struct.pack(structFileHeader, stringFileHeader, header = struct.pack(structFileHeader, stringFileHeader,
self.extract_version, self.reserved, flag_bits, self.extract_version, self.reserved, flag_bits,
@ -461,6 +477,41 @@ class _ZipDecrypter:
self._UpdateKeys(c) self._UpdateKeys(c)
return c return c
def _check_compression(compression):
if compression == ZIP_STORED:
pass
elif compression == ZIP_DEFLATED:
if not zlib:
raise RuntimeError(
"Compression requires the (missing) zlib module")
elif compression == ZIP_BZIP2:
if not bz2:
raise RuntimeError(
"Compression requires the (missing) bz2 module")
else:
raise RuntimeError("That compression method is not supported")
def _get_compressor(compress_type):
if compress_type == ZIP_DEFLATED:
return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
zlib.DEFLATED, -15)
elif compress_type == ZIP_BZIP2:
return bz2.BZ2Compressor()
else:
return None
def _get_decompressor(compress_type):
if compress_type == ZIP_DEFLATED:
return zlib.decompressobj(-15)
elif compress_type == ZIP_BZIP2:
return bz2.BZ2Decompressor()
else:
return None
class ZipExtFile(io.BufferedIOBase): class ZipExtFile(io.BufferedIOBase):
"""File-like object for reading an archive member. """File-like object for reading an archive member.
Is returned by ZipFile.open(). Is returned by ZipFile.open().
@ -482,13 +533,12 @@ class ZipExtFile(io.BufferedIOBase):
self._close_fileobj = close_fileobj self._close_fileobj = close_fileobj
self._compress_type = zipinfo.compress_type self._compress_type = zipinfo.compress_type
self._compress_size = zipinfo.compress_size
self._compress_left = zipinfo.compress_size self._compress_left = zipinfo.compress_size
self._left = zipinfo.file_size
if self._compress_type == ZIP_DEFLATED: self._decompressor = _get_decompressor(self._compress_type)
self._decompressor = zlib.decompressobj(-15)
self._unconsumed = b''
self._eof = False
self._readbuffer = b'' self._readbuffer = b''
self._offset = 0 self._offset = 0
@ -563,7 +613,11 @@ class ZipExtFile(io.BufferedIOBase):
"""Returns buffered bytes without advancing the position.""" """Returns buffered bytes without advancing the position."""
if n > len(self._readbuffer) - self._offset: if n > len(self._readbuffer) - self._offset:
chunk = self.read(n) chunk = self.read(n)
self._offset -= len(chunk) if len(chunk) > self._offset:
self._readbuffer = chunk + self._readbuffer[self._offset:]
self._offset = 0
else:
self._offset -= len(chunk)
# Return up to 512 bytes to reduce allocation overhead for tight loops. # Return up to 512 bytes to reduce allocation overhead for tight loops.
return self._readbuffer[self._offset: self._offset + 512] return self._readbuffer[self._offset: self._offset + 512]
@ -575,80 +629,121 @@ class ZipExtFile(io.BufferedIOBase):
"""Read and return up to n bytes. """Read and return up to n bytes.
If the argument is omitted, None, or negative, data is read and returned until EOF is reached.. If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
""" """
buf = b'' if n is None or n < 0:
if n is None: buf = self._readbuffer[self._offset:]
n = -1 self._readbuffer = b''
while True: self._offset = 0
if n < 0: while not self._eof:
data = self.read1(n) buf += self._read1(self.MAX_N)
elif n > len(buf): return buf
data = self.read1(n - len(buf))
else:
return buf
if len(data) == 0:
return buf
buf += data
def _update_crc(self, newdata, eof): n -= len(self._readbuffer) - self._offset
if n < 0:
buf = self._readbuffer[self._offset:n]
self._offset += len(buf)
return buf
buf = self._readbuffer[self._offset:]
self._readbuffer = b''
self._offset = 0
while n > 0 and not self._eof:
data = self._read1(n)
if n < len(data):
self._readbuffer = data
self._offset = n
buf += data[:n]
break
buf += data
n -= len(data)
return buf
def _update_crc(self, newdata):
# Update the CRC using the given data. # Update the CRC using the given data.
if self._expected_crc is None: if self._expected_crc is None:
# No need to compute the CRC if we don't have a reference value # No need to compute the CRC if we don't have a reference value
return return
self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
# Check the CRC if we're at the end of the file # Check the CRC if we're at the end of the file
if eof and self._running_crc != self._expected_crc: if self._eof and self._running_crc != self._expected_crc:
raise BadZipFile("Bad CRC-32 for file %r" % self.name) raise BadZipFile("Bad CRC-32 for file %r" % self.name)
def read1(self, n): def read1(self, n):
"""Read up to n bytes with at most one read() system call.""" """Read up to n bytes with at most one read() system call."""
# Simplify algorithm (branching) by transforming negative n to large n. if n is None or n < 0:
if n < 0 or n is None: buf = self._readbuffer[self._offset:]
n = self.MAX_N self._readbuffer = b''
self._offset = 0
data = self._read1(self.MAX_N)
buf += data
return buf
# Bytes available in read buffer. n -= len(self._readbuffer) - self._offset
len_readbuffer = len(self._readbuffer) - self._offset if n < 0:
buf = self._readbuffer[self._offset:n]
self._offset += len(buf)
return buf
buf = self._readbuffer[self._offset:]
self._readbuffer = b''
self._offset = 0
if n > 0:
data = self._read1(n)
if n < len(data):
self._readbuffer = data
self._offset = n
data = data[:n]
buf += data
return buf
def _read1(self, n):
# Read up to n compressed bytes with at most one read() system call,
# decrypt and decompress them.
if self._eof or n <= 0:
return b''
# Read from file. # Read from file.
if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed): if self._compress_type == ZIP_DEFLATED:
nbytes = n - len_readbuffer - len(self._unconsumed) ## Handle unconsumed data.
nbytes = max(nbytes, self.MIN_READ_SIZE) data = self._decompressor.unconsumed_tail
nbytes = min(nbytes, self._compress_left) if n > len(data):
data += self._read2(n - len(data))
else:
data = self._read2(n)
data = self._fileobj.read(nbytes) if self._compress_type == ZIP_STORED:
self._compress_left -= len(data) self._eof = self._compress_left <= 0
elif self._compress_type == ZIP_DEFLATED:
if data and self._decrypter is not None: n = max(n, self.MIN_READ_SIZE)
data = bytes(map(self._decrypter, data)) data = self._decompressor.decompress(data, n)
self._eof = (self._decompressor.eof or
if self._compress_type == ZIP_STORED: self._compress_left <= 0 and
self._update_crc(data, eof=(self._compress_left==0)) not self._decompressor.unconsumed_tail)
self._readbuffer = self._readbuffer[self._offset:] + data if self._eof:
self._offset = 0
else:
# Prepare deflated bytes for decompression.
self._unconsumed += data
# Handle unconsumed data.
if (len(self._unconsumed) > 0 and n > len_readbuffer and
self._compress_type == ZIP_DEFLATED):
data = self._decompressor.decompress(
self._unconsumed,
max(n - len_readbuffer, self.MIN_READ_SIZE)
)
self._unconsumed = self._decompressor.unconsumed_tail
eof = len(self._unconsumed) == 0 and self._compress_left == 0
if eof:
data += self._decompressor.flush() data += self._decompressor.flush()
else:
data = self._decompressor.decompress(data)
self._eof = self._decompressor.eof or self._compress_left <= 0
self._update_crc(data, eof=eof) data = data[:self._left]
self._readbuffer = self._readbuffer[self._offset:] + data self._left -= len(data)
self._offset = 0 if self._left <= 0:
self._eof = True
self._update_crc(data)
return data
# Read from buffer. def _read2(self, n):
data = self._readbuffer[self._offset: self._offset + n] if self._compress_left <= 0:
self._offset += len(data) return b''
n = max(n, self.MIN_READ_SIZE)
n = min(n, self._compress_left)
data = self._fileobj.read(n)
self._compress_left -= len(data)
if self._decrypter is not None:
data = bytes(map(self._decrypter, data))
return data return data
def close(self): def close(self):
@ -667,7 +762,8 @@ class ZipFile:
file: Either the path to the file, or a file-like object. file: Either the path to the file, or a file-like object.
If it is a path, the file will be opened and closed by ZipFile. If it is a path, the file will be opened and closed by ZipFile.
mode: The mode can be either read "r", write "w" or append "a". mode: The mode can be either read "r", write "w" or append "a".
compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib). compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib) or
ZIP_BZIP2 (requires bz2).
allowZip64: if True ZipFile will create files with ZIP64 extensions when allowZip64: if True ZipFile will create files with ZIP64 extensions when
needed, otherwise it will raise an exception when this would needed, otherwise it will raise an exception when this would
be necessary. be necessary.
@ -681,14 +777,7 @@ class ZipFile:
if mode not in ("r", "w", "a"): if mode not in ("r", "w", "a"):
raise RuntimeError('ZipFile() requires mode "r", "w", or "a"') raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
if compression == ZIP_STORED: _check_compression(compression)
pass
elif compression == ZIP_DEFLATED:
if not zlib:
raise RuntimeError(
"Compression requires the (missing) zlib module")
else:
raise RuntimeError("That compression method is not supported")
self._allowZip64 = allowZip64 self._allowZip64 = allowZip64
self._didModify = False self._didModify = False
@ -1067,11 +1156,7 @@ class ZipFile:
if not self.fp: if not self.fp:
raise RuntimeError( raise RuntimeError(
"Attempt to write ZIP archive that was already closed") "Attempt to write ZIP archive that was already closed")
if zinfo.compress_type == ZIP_DEFLATED and not zlib: _check_compression(zinfo.compress_type)
raise RuntimeError(
"Compression requires the (missing) zlib module")
if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
raise RuntimeError("That compression method is not supported")
if zinfo.file_size > ZIP64_LIMIT: if zinfo.file_size > ZIP64_LIMIT:
if not self._allowZip64: if not self._allowZip64:
raise LargeZipFile("Filesize would require ZIP64 extensions") raise LargeZipFile("Filesize would require ZIP64 extensions")
@ -1122,17 +1207,13 @@ class ZipFile:
self.fp.write(zinfo.FileHeader()) self.fp.write(zinfo.FileHeader())
return return
cmpr = _get_compressor(zinfo.compress_type)
with open(filename, "rb") as fp: with open(filename, "rb") as fp:
# Must overwrite CRC and sizes with correct data later # Must overwrite CRC and sizes with correct data later
zinfo.CRC = CRC = 0 zinfo.CRC = CRC = 0
zinfo.compress_size = compress_size = 0 zinfo.compress_size = compress_size = 0
zinfo.file_size = file_size = 0 zinfo.file_size = file_size = 0
self.fp.write(zinfo.FileHeader()) self.fp.write(zinfo.FileHeader())
if zinfo.compress_type == ZIP_DEFLATED:
cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
zlib.DEFLATED, -15)
else:
cmpr = None
while 1: while 1:
buf = fp.read(1024 * 8) buf = fp.read(1024 * 8)
if not buf: if not buf:
@ -1189,9 +1270,8 @@ class ZipFile:
self._writecheck(zinfo) self._writecheck(zinfo)
self._didModify = True self._didModify = True
zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum
if zinfo.compress_type == ZIP_DEFLATED: co = _get_compressor(zinfo.compress_type)
co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, if co:
zlib.DEFLATED, -15)
data = co.compress(data) + co.flush() data = co.compress(data) + co.flush()
zinfo.compress_size = len(data) # Compressed size zinfo.compress_size = len(data) # Compressed size
else: else:
@ -1243,18 +1323,20 @@ class ZipFile:
header_offset = zinfo.header_offset header_offset = zinfo.header_offset
extra_data = zinfo.extra extra_data = zinfo.extra
min_version = 0
if extra: if extra:
# Append a ZIP64 field to the extra's # Append a ZIP64 field to the extra's
extra_data = struct.pack( extra_data = struct.pack(
'<HH' + 'Q'*len(extra), '<HH' + 'Q'*len(extra),
1, 8*len(extra), *extra) + extra_data 1, 8*len(extra), *extra) + extra_data
extract_version = max(45, zinfo.extract_version) min_version = ZIP64_VERSION
create_version = max(45, zinfo.create_version)
else:
extract_version = zinfo.extract_version
create_version = zinfo.create_version
if zinfo.compress_type == ZIP_BZIP2:
min_version = max(BZIP2_VERSION, min_version)
extract_version = max(min_version, zinfo.extract_version)
create_version = max(min_version, zinfo.create_version)
try: try:
filename, flag_bits = zinfo._encodeFilenameFlags() filename, flag_bits = zinfo._encodeFilenameFlags()
centdir = struct.pack(structCentralDir, centdir = struct.pack(structCentralDir,

View File

@ -87,6 +87,9 @@ Core and Builtins
Library Library
------- -------
- Issue #14371: Support bzip2 in zipfile module.
Patch by Serhiy Storchaka.
- Issue #13183: Fix pdb skipping frames after hitting a breakpoint and running - Issue #13183: Fix pdb skipping frames after hitting a breakpoint and running
step. Patch by Xavier de Gaye. step. Patch by Xavier de Gaye.