bpo-36993: Improve error reporting for zipfiles with bad zip64 extra data. (GH-14656)

This commit is contained in:
Daniel Hillier 2019-10-29 18:24:18 +11:00 committed by Serhiy Storchaka
parent 4c155f738d
commit da6ce58dd5
3 changed files with 236 additions and 0 deletions

View File

@ -1,6 +1,7 @@
import contextlib
import importlib.util
import io
import itertools
import os
import pathlib
import posixpath
@ -824,6 +825,227 @@ class StoredTestZip64InSmallFiles(AbstractTestZip64InSmallFiles,
zinfo = zipfp.getinfo("strfile")
self.assertEqual(zinfo.extra, extra)
def make_zip64_file(
self, file_size_64_set=False, file_size_extra=False,
compress_size_64_set=False, compress_size_extra=False,
header_offset_64_set=False, header_offset_extra=False,
):
"""Generate bytes sequence for a zip with (incomplete) zip64 data.
The actual values (not the zip 64 0xffffffff values) stored in the file
are:
file_size: 8
compress_size: 8
header_offset: 0
"""
actual_size = 8
actual_header_offset = 0
local_zip64_fields = []
central_zip64_fields = []
file_size = actual_size
if file_size_64_set:
file_size = 0xffffffff
if file_size_extra:
local_zip64_fields.append(actual_size)
central_zip64_fields.append(actual_size)
file_size = struct.pack("<L", file_size)
compress_size = actual_size
if compress_size_64_set:
compress_size = 0xffffffff
if compress_size_extra:
local_zip64_fields.append(actual_size)
central_zip64_fields.append(actual_size)
compress_size = struct.pack("<L", compress_size)
header_offset = actual_header_offset
if header_offset_64_set:
header_offset = 0xffffffff
if header_offset_extra:
central_zip64_fields.append(actual_header_offset)
header_offset = struct.pack("<L", header_offset)
local_extra = struct.pack(
'<HH' + 'Q'*len(local_zip64_fields),
0x0001,
8*len(local_zip64_fields),
*local_zip64_fields
)
central_extra = struct.pack(
'<HH' + 'Q'*len(central_zip64_fields),
0x0001,
8*len(central_zip64_fields),
*central_zip64_fields
)
central_dir_size = struct.pack('<Q', 58 + 8 * len(central_zip64_fields))
offset_to_central_dir = struct.pack('<Q', 50 + 8 * len(local_zip64_fields))
local_extra_length = struct.pack("<H", 4 + 8 * len(local_zip64_fields))
central_extra_length = struct.pack("<H", 4 + 8 * len(central_zip64_fields))
filename = b"test.txt"
content = b"test1234"
filename_length = struct.pack("<H", len(filename))
zip64_contents = (
# Local file header
b"PK\x03\x04\x14\x00\x00\x00\x00\x00\x00\x00!\x00\x9e%\xf5\xaf"
+ compress_size
+ file_size
+ filename_length
+ local_extra_length
+ filename
+ local_extra
+ content
# Central directory:
+ b"PK\x01\x02-\x03-\x00\x00\x00\x00\x00\x00\x00!\x00\x9e%\xf5\xaf"
+ compress_size
+ file_size
+ filename_length
+ central_extra_length
+ b"\x00\x00\x00\x00\x00\x00\x00\x00\x80\x01"
+ header_offset
+ filename
+ central_extra
# Zip64 end of central directory
+ b"PK\x06\x06,\x00\x00\x00\x00\x00\x00\x00-\x00-"
+ b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00"
+ b"\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00"
+ central_dir_size
+ offset_to_central_dir
# Zip64 end of central directory locator
+ b"PK\x06\x07\x00\x00\x00\x00l\x00\x00\x00\x00\x00\x00\x00\x01"
+ b"\x00\x00\x00"
# end of central directory
+ b"PK\x05\x06\x00\x00\x00\x00\x01\x00\x01\x00:\x00\x00\x002\x00"
+ b"\x00\x00\x00\x00"
)
return zip64_contents
def test_bad_zip64_extra(self):
"""Missing zip64 extra records raises an exception.
There are 4 fields that the zip64 format handles (the disk number is
not used in this module and so is ignored here). According to the zip
spec:
The order of the fields in the zip64 extended
information record is fixed, but the fields MUST
only appear if the corresponding Local or Central
directory record field is set to 0xFFFF or 0xFFFFFFFF.
If the zip64 extra content doesn't contain enough entries for the
number of fields marked with 0xFFFF or 0xFFFFFFFF, we raise an error.
This test mismatches the length of the zip64 extra field and the number
of fields set to indicate the presence of zip64 data.
"""
# zip64 file size present, no fields in extra, expecting one, equals
# missing file size.
missing_file_size_extra = self.make_zip64_file(
file_size_64_set=True,
)
with self.assertRaises(zipfile.BadZipFile) as e:
zipfile.ZipFile(io.BytesIO(missing_file_size_extra))
self.assertIn('file size', str(e.exception).lower())
# zip64 file size present, zip64 compress size present, one field in
# extra, expecting two, equals missing compress size.
missing_compress_size_extra = self.make_zip64_file(
file_size_64_set=True,
file_size_extra=True,
compress_size_64_set=True,
)
with self.assertRaises(zipfile.BadZipFile) as e:
zipfile.ZipFile(io.BytesIO(missing_compress_size_extra))
self.assertIn('compress size', str(e.exception).lower())
# zip64 compress size present, no fields in extra, expecting one,
# equals missing compress size.
missing_compress_size_extra = self.make_zip64_file(
compress_size_64_set=True,
)
with self.assertRaises(zipfile.BadZipFile) as e:
zipfile.ZipFile(io.BytesIO(missing_compress_size_extra))
self.assertIn('compress size', str(e.exception).lower())
# zip64 file size present, zip64 compress size present, zip64 header
# offset present, two fields in extra, expecting three, equals missing
# header offset
missing_header_offset_extra = self.make_zip64_file(
file_size_64_set=True,
file_size_extra=True,
compress_size_64_set=True,
compress_size_extra=True,
header_offset_64_set=True,
)
with self.assertRaises(zipfile.BadZipFile) as e:
zipfile.ZipFile(io.BytesIO(missing_header_offset_extra))
self.assertIn('header offset', str(e.exception).lower())
# zip64 compress size present, zip64 header offset present, one field
# in extra, expecting two, equals missing header offset
missing_header_offset_extra = self.make_zip64_file(
file_size_64_set=False,
compress_size_64_set=True,
compress_size_extra=True,
header_offset_64_set=True,
)
with self.assertRaises(zipfile.BadZipFile) as e:
zipfile.ZipFile(io.BytesIO(missing_header_offset_extra))
self.assertIn('header offset', str(e.exception).lower())
# zip64 file size present, zip64 header offset present, one field in
# extra, expecting two, equals missing header offset
missing_header_offset_extra = self.make_zip64_file(
file_size_64_set=True,
file_size_extra=True,
compress_size_64_set=False,
header_offset_64_set=True,
)
with self.assertRaises(zipfile.BadZipFile) as e:
zipfile.ZipFile(io.BytesIO(missing_header_offset_extra))
self.assertIn('header offset', str(e.exception).lower())
# zip64 header offset present, no fields in extra, expecting one,
# equals missing header offset
missing_header_offset_extra = self.make_zip64_file(
file_size_64_set=False,
compress_size_64_set=False,
header_offset_64_set=True,
)
with self.assertRaises(zipfile.BadZipFile) as e:
zipfile.ZipFile(io.BytesIO(missing_header_offset_extra))
self.assertIn('header offset', str(e.exception).lower())
def test_generated_valid_zip64_extra(self):
# These values are what is set in the make_zip64_file method.
expected_file_size = 8
expected_compress_size = 8
expected_header_offset = 0
expected_content = b"test1234"
# Loop through the various valid combinations of zip64 masks
# present and extra fields present.
params = (
{"file_size_64_set": True, "file_size_extra": True},
{"compress_size_64_set": True, "compress_size_extra": True},
{"header_offset_64_set": True, "header_offset_extra": True},
)
for r in range(1, len(params) + 1):
for combo in itertools.combinations(params, r):
kwargs = {}
for c in combo:
kwargs.update(c)
with zipfile.ZipFile(io.BytesIO(self.make_zip64_file(**kwargs))) as zf:
zinfo = zf.infolist()[0]
self.assertEqual(zinfo.file_size, expected_file_size)
self.assertEqual(zinfo.compress_size, expected_compress_size)
self.assertEqual(zinfo.header_offset, expected_header_offset)
self.assertEqual(zf.read(zinfo), expected_content)
@requires_zlib
class DeflateTestZip64InSmallFiles(AbstractTestZip64InSmallFiles,
unittest.TestCase):

View File

@ -480,14 +480,26 @@ class ZipInfo (object):
# ZIP64 extension (large files and/or large archives)
if self.file_size in (0xffffffffffffffff, 0xffffffff):
if len(counts) <= idx:
raise BadZipFile(
"Corrupt zip64 extra field. File size not found."
)
self.file_size = counts[idx]
idx += 1
if self.compress_size == 0xFFFFFFFF:
if len(counts) <= idx:
raise BadZipFile(
"Corrupt zip64 extra field. Compress size not found."
)
self.compress_size = counts[idx]
idx += 1
if self.header_offset == 0xffffffff:
if len(counts) <= idx:
raise BadZipFile(
"Corrupt zip64 extra field. Header offset not found."
)
old = self.header_offset
self.header_offset = counts[idx]
idx+=1

View File

@ -0,0 +1,2 @@
Improve error reporting for corrupt zip files with bad zip64 extra data. Patch
by Daniel Hillier.