Issue #9720: zipfile now writes correct local headers for files larger than 4 GiB.

This commit is contained in:
Serhiy Storchaka 2013-01-15 00:29:51 +02:00
parent 3d4a02ab8c
commit 80a9fd77a0
2 changed files with 35 additions and 15 deletions

View File

@ -316,7 +316,7 @@ class ZipInfo (object):
# compress_size Size of the compressed file # compress_size Size of the compressed file
# file_size Size of the uncompressed file # file_size Size of the uncompressed file
def FileHeader(self): def FileHeader(self, zip64=None):
"""Return the per-file header as a string.""" """Return the per-file header as a string."""
dt = self.date_time dt = self.date_time
dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
@ -331,12 +331,17 @@ class ZipInfo (object):
extra = self.extra extra = self.extra
if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT: if zip64 is None:
# File is larger than what fits into a 4 byte integer, zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
# fall back to the ZIP64 extension if zip64:
fmt = '<HHQQ' fmt = '<HHQQ'
extra = extra + struct.pack(fmt, extra = extra + struct.pack(fmt,
1, struct.calcsize(fmt)-4, file_size, compress_size) 1, struct.calcsize(fmt)-4, file_size, compress_size)
if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
if not zip64:
raise LargeZipFile("Filesize would require ZIP64 extensions")
# File is larger than what fits into a 4 byte integer,
# fall back to the ZIP64 extension
file_size = 0xffffffff file_size = 0xffffffff
compress_size = 0xffffffff compress_size = 0xffffffff
self.extract_version = max(45, self.extract_version) self.extract_version = max(45, self.extract_version)
@ -1113,20 +1118,23 @@ class ZipFile(object):
zinfo.CRC = 0 zinfo.CRC = 0
self.filelist.append(zinfo) self.filelist.append(zinfo)
self.NameToInfo[zinfo.filename] = zinfo self.NameToInfo[zinfo.filename] = zinfo
self.fp.write(zinfo.FileHeader()) self.fp.write(zinfo.FileHeader(False))
return return
with open(filename, "rb") as fp: with open(filename, "rb") as fp:
# Must overwrite CRC and sizes with correct data later # Must overwrite CRC and sizes with correct data later
zinfo.CRC = CRC = 0 zinfo.CRC = CRC = 0
zinfo.compress_size = compress_size = 0 zinfo.compress_size = compress_size = 0
zinfo.file_size = file_size = 0 # Compressed size can be larger than uncompressed size
self.fp.write(zinfo.FileHeader()) zip64 = self._allowZip64 and \
zinfo.file_size * 1.05 > ZIP64_LIMIT
self.fp.write(zinfo.FileHeader(zip64))
if zinfo.compress_type == ZIP_DEFLATED: if zinfo.compress_type == ZIP_DEFLATED:
cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
zlib.DEFLATED, -15) zlib.DEFLATED, -15)
else: else:
cmpr = None cmpr = None
file_size = 0
while 1: while 1:
buf = fp.read(1024 * 8) buf = fp.read(1024 * 8)
if not buf: if not buf:
@ -1146,11 +1154,16 @@ class ZipFile(object):
zinfo.compress_size = file_size zinfo.compress_size = file_size
zinfo.CRC = CRC zinfo.CRC = CRC
zinfo.file_size = file_size zinfo.file_size = file_size
# Seek backwards and write CRC and file sizes if not zip64 and self._allowZip64:
if file_size > ZIP64_LIMIT:
raise RuntimeError('File size has increased during compressing')
if compress_size > ZIP64_LIMIT:
raise RuntimeError('Compressed size larger than uncompressed size')
# Seek backwards and write file header (which will now include
# correct CRC and file sizes)
position = self.fp.tell() # Preserve current position in file position = self.fp.tell() # Preserve current position in file
self.fp.seek(zinfo.header_offset + 14, 0) self.fp.seek(zinfo.header_offset, 0)
self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size, self.fp.write(zinfo.FileHeader(zip64))
zinfo.file_size))
self.fp.seek(position, 0) self.fp.seek(position, 0)
self.filelist.append(zinfo) self.filelist.append(zinfo)
self.NameToInfo[zinfo.filename] = zinfo self.NameToInfo[zinfo.filename] = zinfo
@ -1187,14 +1200,18 @@ class ZipFile(object):
zinfo.compress_size = len(bytes) # Compressed size zinfo.compress_size = len(bytes) # Compressed size
else: else:
zinfo.compress_size = zinfo.file_size zinfo.compress_size = zinfo.file_size
zinfo.header_offset = self.fp.tell() # Start of header bytes zip64 = zinfo.file_size > ZIP64_LIMIT or \
self.fp.write(zinfo.FileHeader()) zinfo.compress_size > ZIP64_LIMIT
if zip64 and not self._allowZip64:
raise LargeZipFile("Filesize would require ZIP64 extensions")
self.fp.write(zinfo.FileHeader(zip64))
self.fp.write(bytes) self.fp.write(bytes)
self.fp.flush()
if zinfo.flag_bits & 0x08: if zinfo.flag_bits & 0x08:
# Write CRC and file sizes after the file data # Write CRC and file sizes after the file data
self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size, fmt = '<LQQ' if zip64 else '<LLL'
self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
zinfo.file_size)) zinfo.file_size))
self.fp.flush()
self.filelist.append(zinfo) self.filelist.append(zinfo)
self.NameToInfo[zinfo.filename] = zinfo self.NameToInfo[zinfo.filename] = zinfo

View File

@ -186,6 +186,9 @@ Core and Builtins
Library Library
------- -------
- Issue #9720: zipfile now writes correct local headers for files larger than
4 GiB.
- Issue #16829: IDLE printing no longer fails if there are spaces or other - Issue #16829: IDLE printing no longer fails if there are spaces or other
special characters in the file path. special characters in the file path.