bpo-28494: Improve zipfile.is_zipfile reliability
The zipfile.is_zipfile function would only search for the EndOfZipfile section header. This failed to correctly identify non-zipfiles that contained this header. Now the zipfile.is_zipfile function verifies the first central directory entry. Changes: * Extended zipfile.is_zipfile to verify zipfile catalog * Added tests to validate failure of binary non-zipfiles
This commit is contained in:
parent
e307e5cd06
commit
2da9363f25
|
@ -1411,6 +1411,25 @@ class OtherTests(unittest.TestCase):
|
|||
self.assertFalse(zipfile.is_zipfile(fp))
|
||||
fp.seek(0, 0)
|
||||
self.assertFalse(zipfile.is_zipfile(fp))
|
||||
# - passing non-zipfile with ZIP header elements
|
||||
# data created using pyPNG like so:
|
||||
# d = [(ord('P'), ord('K'), 5, 6), (ord('P'), ord('K'), 6, 6)]
|
||||
# w = png.Writer(1,2,alpha=True,compression=0)
|
||||
# f = open('onepix.png', 'wb')
|
||||
# w.write(f, d)
|
||||
# w.close()
|
||||
data = (b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00"
|
||||
b"\x00\x02\x08\x06\x00\x00\x00\x99\x81\xb6'\x00\x00\x00\x15I"
|
||||
b"DATx\x01\x01\n\x00\xf5\xff\x00PK\x05\x06\x00PK\x06\x06\x07"
|
||||
b"\xac\x01N\xc6|a\r\x00\x00\x00\x00IEND\xaeB`\x82")
|
||||
# - passing a filename
|
||||
with open(TESTFN, "wb") as fp:
|
||||
fp.write(data)
|
||||
self.assertFalse(zipfile.is_zipfile(TESTFN))
|
||||
# - passing a file-like object
|
||||
fp = io.BytesIO()
|
||||
fp.write(data)
|
||||
self.assertFalse(zipfile.is_zipfile(fp))
|
||||
|
||||
def test_damaged_zipfile(self):
|
||||
"""Check that zipfiles with missing bytes at the end raise BadZipFile."""
|
||||
|
|
|
@ -186,8 +186,18 @@ def _strip_extra(extra, xids):
|
|||
|
||||
def _check_zipfile(fp):
|
||||
try:
|
||||
if _EndRecData(fp):
|
||||
return True # file has correct magic number
|
||||
endrec = _EndRecData(fp)
|
||||
if endrec:
|
||||
if endrec[_ECD_ENTRIES_TOTAL] == 0 and endrec[_ECD_SIZE] == 0 and endrec[_ECD_OFFSET] == 0:
|
||||
return True # Empty zipfiles are still zipfiles
|
||||
elif endrec[_ECD_DISK_NUMBER] == endrec[_ECD_DISK_START]:
|
||||
fp.seek(endrec[_ECD_OFFSET]) # Central directory is on the same disk
|
||||
if fp.tell() == endrec[_ECD_OFFSET] and endrec[_ECD_SIZE] >= sizeCentralDir:
|
||||
data = fp.read(sizeCentralDir) # CD is where we expect it to be
|
||||
if len(data) == sizeCentralDir:
|
||||
centdir = struct.unpack(structCentralDir, data) # CD is the right size
|
||||
if centdir[_CD_SIGNATURE] == stringCentralDir:
|
||||
return True # First central directory entry has correct magic number
|
||||
except OSError:
|
||||
pass
|
||||
return False
|
||||
|
|
|
@ -0,0 +1,13 @@
|
|||
Improve zipfile validation in `zipfile.is_zipfile`.
|
||||
|
||||
Before this change `zipfile.is_zipfile()` only checked the End Central Directory
|
||||
signature. If the signature could be found in the last 64k of the file,
|
||||
success! This produced false positives on any file with `'PK\x05\x06'` in the
|
||||
last 64k of the file - including PDFs and PNGs.
|
||||
|
||||
This is now corrected by actually validating the Central Directory location
|
||||
and size based on the information provided by the End Central Directory
|
||||
along with verifying the Central Directory signature of the first entry.
|
||||
|
||||
This should be sufficient for the vast number of zipfiles with fewer
|
||||
false positives.
|
Loading…
Reference in New Issue