gh-103861: Fix Zip64 extensions not being properly applied in some cases (#103863)

Fix Zip64 extensions not being properly applied in some cases:

Fixes an issue where adding a small file to a `ZipFile`
object while forcing zip64 extensions causes an extra Zip64 record to be
added to the zip, but doesn't update the `min_version` or file sizes in
the primary central directory header.

Also fixed an edge case in checking if zip64 extensions are required:

This fixes an issue where if data requiring zip64 extensions was added
to an unseekable stream without specifying `force_zip64=True`, zip64
extensions would not be used and a RuntimeError would not be raised when
closing the file (even though the size would be known at that point).
This would result in successfully writing corrupt zip files.

Deciding if zip64 extensions are required outside of the `FileHeader`
function means that both `FileHeader` and `_ZipWriteFile` will always be
in sync. Previously, the `FileHeader` function could enable zip64
extensions without propagating that decision to the `_ZipWriteFile`
class, which would then not correctly write the data descriptor record
or check for errors on close.

If anyone is actually using `ZipInfo.FileHeader` as a public API without
explicitly passing True or False in for zip64, their own code may still be
susceptible to that kind of bug unless they make a similar change to
where the zip64 decision happens.

Fixes #103861

---------

Co-authored-by: Gregory P. Smith <greg@krypto.org>
This commit is contained in:
Carey Metcalfe 2023-05-16 01:43:44 -06:00 committed by GitHub
parent 85ec192ac4
commit 798bcaa1eb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 172 additions and 15 deletions

View File

@ -1080,6 +1080,159 @@ class StoredTestZip64InSmallFiles(AbstractTestZip64InSmallFiles,
self.assertEqual(zinfo.header_offset, expected_header_offset)
self.assertEqual(zf.read(zinfo), expected_content)
def test_force_zip64(self):
"""Test that forcing zip64 extensions correctly notes this in the zip file"""
# GH-103861 describes an issue where forcing a small file to use zip64
# extensions would add a zip64 extra record, but not change the data
# sizes to 0xFFFFFFFF to indicate to the extractor that the zip64
# record should be read. Additionally, it would not set the required
# version to indicate that zip64 extensions are required to extract it.
# This test replicates the situation and reads the raw data to specifically ensure:
# - The required extract version is always >= ZIP64_VERSION
# - The compressed and uncompressed size in the file headers are both
# 0xFFFFFFFF (ie. point to zip64 record)
# - The zip64 record is provided and has the correct sizes in it
# Other aspects of the zip are checked as well, but verifying the above is the main goal.
# Because this is hard to verify by parsing the data as a zip, the raw
# bytes are checked to ensure that they line up with the zip spec.
# The spec for this can be found at: https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT
# The relevent sections for this test are:
# - 4.3.7 for local file header
# - 4.5.3 for zip64 extra field
data = io.BytesIO()
with zipfile.ZipFile(data, mode="w", allowZip64=True) as zf:
with zf.open("text.txt", mode="w", force_zip64=True) as zi:
zi.write(b"_")
zipdata = data.getvalue()
# pull out and check zip information
(
header, vers, os, flags, comp, csize, usize, fn_len,
ex_total_len, filename, ex_id, ex_len, ex_usize, ex_csize, cd_sig
) = struct.unpack("<4sBBHH8xIIHH8shhQQx4s", zipdata[:63])
self.assertEqual(header, b"PK\x03\x04") # local file header
self.assertGreaterEqual(vers, zipfile.ZIP64_VERSION) # requires zip64 to extract
self.assertEqual(os, 0) # compatible with MS-DOS
self.assertEqual(flags, 0) # no flags
self.assertEqual(comp, 0) # compression method = stored
self.assertEqual(csize, 0xFFFFFFFF) # sizes are in zip64 extra
self.assertEqual(usize, 0xFFFFFFFF)
self.assertEqual(fn_len, 8) # filename len
self.assertEqual(ex_total_len, 20) # size of extra records
self.assertEqual(ex_id, 1) # Zip64 extra record
self.assertEqual(ex_len, 16) # 16 bytes of data
self.assertEqual(ex_usize, 1) # uncompressed size
self.assertEqual(ex_csize, 1) # compressed size
self.assertEqual(cd_sig, b"PK\x01\x02") # ensure the central directory header is next
z = zipfile.ZipFile(io.BytesIO(zipdata))
zinfos = z.infolist()
self.assertEqual(len(zinfos), 1)
self.assertGreaterEqual(zinfos[0].extract_version, zipfile.ZIP64_VERSION) # requires zip64 to extract
def test_unseekable_zip_unknown_filesize(self):
"""Test that creating a zip with/without seeking will raise a RuntimeError if zip64 was required but not used"""
def make_zip(fp):
with zipfile.ZipFile(fp, mode="w", allowZip64=True) as zf:
with zf.open("text.txt", mode="w", force_zip64=False) as zi:
zi.write(b"_" * (zipfile.ZIP64_LIMIT + 1))
self.assertRaises(RuntimeError, make_zip, io.BytesIO())
self.assertRaises(RuntimeError, make_zip, Unseekable(io.BytesIO()))
def test_zip64_required_not_allowed_fail(self):
"""Test that trying to add a large file to a zip that doesn't allow zip64 extensions fails on add"""
def make_zip(fp):
with zipfile.ZipFile(fp, mode="w", allowZip64=False) as zf:
# pretend zipfile.ZipInfo.from_file was used to get the name and filesize
info = zipfile.ZipInfo("text.txt")
info.file_size = zipfile.ZIP64_LIMIT + 1
zf.open(info, mode="w")
self.assertRaises(zipfile.LargeZipFile, make_zip, io.BytesIO())
self.assertRaises(zipfile.LargeZipFile, make_zip, Unseekable(io.BytesIO()))
def test_unseekable_zip_known_filesize(self):
"""Test that creating a zip without seeking will use zip64 extensions if the file size is provided up-front"""
# This test ensures that the zip will use a zip64 data descriptor (same
# as a regular data descriptor except the sizes are 8 bytes instead of
# 4) record to communicate the size of a file if the zip is being
# written to an unseekable stream.
# Because this sort of thing is hard to verify by parsing the data back
# in as a zip, this test looks at the raw bytes created to ensure that
# the correct data has been generated.
# The spec for this can be found at: https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT
# The relevent sections for this test are:
# - 4.3.7 for local file header
# - 4.3.9 for the data descriptor
# - 4.5.3 for zip64 extra field
file_size = zipfile.ZIP64_LIMIT + 1
def make_zip(fp):
with zipfile.ZipFile(fp, mode="w", allowZip64=True) as zf:
# pretend zipfile.ZipInfo.from_file was used to get the name and filesize
info = zipfile.ZipInfo("text.txt")
info.file_size = file_size
with zf.open(info, mode="w", force_zip64=False) as zi:
zi.write(b"_" * file_size)
return fp
# check seekable file information
seekable_data = make_zip(io.BytesIO()).getvalue()
(
header, vers, os, flags, comp, csize, usize, fn_len,
ex_total_len, filename, ex_id, ex_len, ex_usize, ex_csize,
cd_sig
) = struct.unpack("<4sBBHH8xIIHH8shhQQ{}x4s".format(file_size), seekable_data[:62 + file_size])
self.assertEqual(header, b"PK\x03\x04") # local file header
self.assertGreaterEqual(vers, zipfile.ZIP64_VERSION) # requires zip64 to extract
self.assertEqual(os, 0) # compatible with MS-DOS
self.assertEqual(flags, 0) # no flags set
self.assertEqual(comp, 0) # compression method = stored
self.assertEqual(csize, 0xFFFFFFFF) # sizes are in zip64 extra
self.assertEqual(usize, 0xFFFFFFFF)
self.assertEqual(fn_len, 8) # filename len
self.assertEqual(ex_total_len, 20) # size of extra records
self.assertEqual(ex_id, 1) # Zip64 extra record
self.assertEqual(ex_len, 16) # 16 bytes of data
self.assertEqual(ex_usize, file_size) # uncompressed size
self.assertEqual(ex_csize, file_size) # compressed size
self.assertEqual(cd_sig, b"PK\x01\x02") # ensure the central directory header is next
# check unseekable file information
unseekable_data = make_zip(Unseekable(io.BytesIO())).fp.getvalue()
(
header, vers, os, flags, comp, csize, usize, fn_len,
ex_total_len, filename, ex_id, ex_len, ex_usize, ex_csize,
dd_header, dd_usize, dd_csize, cd_sig
) = struct.unpack("<4sBBHH8xIIHH8shhQQ{}x4s4xQQ4s".format(file_size), unseekable_data[:86 + file_size])
self.assertEqual(header, b"PK\x03\x04") # local file header
self.assertGreaterEqual(vers, zipfile.ZIP64_VERSION) # requires zip64 to extract
self.assertEqual(os, 0) # compatible with MS-DOS
self.assertEqual("{:b}".format(flags), "1000") # streaming flag set
self.assertEqual(comp, 0) # compression method = stored
self.assertEqual(csize, 0xFFFFFFFF) # sizes are in zip64 extra
self.assertEqual(usize, 0xFFFFFFFF)
self.assertEqual(fn_len, 8) # filename len
self.assertEqual(ex_total_len, 20) # size of extra records
self.assertEqual(ex_id, 1) # Zip64 extra record
self.assertEqual(ex_len, 16) # 16 bytes of data
self.assertEqual(ex_usize, 0) # uncompressed size - 0 to defer to data descriptor
self.assertEqual(ex_csize, 0) # compressed size - 0 to defer to data descriptor
self.assertEqual(dd_header, b"PK\07\x08") # data descriptor
self.assertEqual(dd_usize, file_size) # file size (8 bytes because zip64)
self.assertEqual(dd_csize, file_size) # compressed size (8 bytes because zip64)
self.assertEqual(cd_sig, b"PK\x01\x02") # ensure the central directory header is next
@requires_zlib()
class DeflateTestZip64InSmallFiles(AbstractTestZip64InSmallFiles,

View File

@ -442,7 +442,12 @@ class ZipInfo (object):
return ''.join(result)
def FileHeader(self, zip64=None):
"""Return the per-file header as a bytes object."""
"""Return the per-file header as a bytes object.
When the optional zip64 arg is None rather than a bool, we will
decide based upon the file_size and compress_size, if known,
False otherwise.
"""
dt = self.date_time
dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
@ -458,16 +463,13 @@ class ZipInfo (object):
min_version = 0
if zip64 is None:
# We always explicitly pass zip64 within this module.... This
# remains for anyone using ZipInfo.FileHeader as a public API.
zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
if zip64:
fmt = '<HHQQ'
extra = extra + struct.pack(fmt,
1, struct.calcsize(fmt)-4, file_size, compress_size)
if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
if not zip64:
raise LargeZipFile("Filesize would require ZIP64 extensions")
# File is larger than what fits into a 4 byte integer,
# fall back to the ZIP64 extension
file_size = 0xffffffff
compress_size = 0xffffffff
min_version = ZIP64_VERSION
@ -1219,6 +1221,12 @@ class _ZipWriteFile(io.BufferedIOBase):
self._zinfo.CRC = self._crc
self._zinfo.file_size = self._file_size
if not self._zip64:
if self._file_size > ZIP64_LIMIT:
raise RuntimeError("File size too large, try using force_zip64")
if self._compress_size > ZIP64_LIMIT:
raise RuntimeError("Compressed size too large, try using force_zip64")
# Write updated header info
if self._zinfo.flag_bits & _MASK_USE_DATA_DESCRIPTOR:
# Write CRC and file sizes after the file data
@ -1227,13 +1235,6 @@ class _ZipWriteFile(io.BufferedIOBase):
self._zinfo.compress_size, self._zinfo.file_size))
self._zipfile.start_dir = self._fileobj.tell()
else:
if not self._zip64:
if self._file_size > ZIP64_LIMIT:
raise RuntimeError(
'File size too large, try using force_zip64')
if self._compress_size > ZIP64_LIMIT:
raise RuntimeError(
'Compressed size too large, try using force_zip64')
# Seek backwards and write file header (which will now include
# correct CRC and file sizes)
@ -1672,8 +1673,9 @@ class ZipFile:
zinfo.external_attr = 0o600 << 16 # permissions: ?rw-------
# Compressed size can be larger than uncompressed size
zip64 = self._allowZip64 and \
(force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT)
zip64 = force_zip64 or (zinfo.file_size * 1.05 > ZIP64_LIMIT)
if not self._allowZip64 and zip64:
raise LargeZipFile("Filesize would require ZIP64 extensions")
if self._seekable:
self.fp.seek(self.start_dir)

View File

@ -0,0 +1,2 @@
Fix ``zipfile.Zipfile`` creating invalid zip files when ``force_zip64`` was
used to add files to them. Patch by Carey Metcalfe.