bpo-28494: Improve zipfile.is_zipfile reliability

The zipfile.is_zipfile function would only search for the EndOfZipfile
section header. This failed to correctly identify non-zipfiles that
contained this header. Now the zipfile.is_zipfile function verifies
the first central directory entry.

Changes:
* Extended zipfile.is_zipfile to verify zipfile catalog
* Added tests to validate failure of binary non-zipfiles
This commit is contained in:
John Jolly 2017-12-30 11:07:51 -07:00 committed by John L. Jolly
parent e307e5cd06
commit 2da9363f25
3 changed files with 44 additions and 2 deletions

View File

@ -1411,6 +1411,25 @@ class OtherTests(unittest.TestCase):
self.assertFalse(zipfile.is_zipfile(fp))
fp.seek(0, 0)
self.assertFalse(zipfile.is_zipfile(fp))
# - passing non-zipfile with ZIP header elements
# data created using pyPNG like so:
# d = [(ord('P'), ord('K'), 5, 6), (ord('P'), ord('K'), 6, 6)]
# w = png.Writer(1,2,alpha=True,compression=0)
# f = open('onepix.png', 'wb')
# w.write(f, d)
# w.close()
data = (b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00"
b"\x00\x02\x08\x06\x00\x00\x00\x99\x81\xb6'\x00\x00\x00\x15I"
b"DATx\x01\x01\n\x00\xf5\xff\x00PK\x05\x06\x00PK\x06\x06\x07"
b"\xac\x01N\xc6|a\r\x00\x00\x00\x00IEND\xaeB`\x82")
# - passing a filename
with open(TESTFN, "wb") as fp:
fp.write(data)
self.assertFalse(zipfile.is_zipfile(TESTFN))
# - passing a file-like object
fp = io.BytesIO()
fp.write(data)
self.assertFalse(zipfile.is_zipfile(fp))
def test_damaged_zipfile(self):
"""Check that zipfiles with missing bytes at the end raise BadZipFile."""

View File

@ -186,8 +186,18 @@ def _strip_extra(extra, xids):
def _check_zipfile(fp):
try:
if _EndRecData(fp):
return True # file has correct magic number
endrec = _EndRecData(fp)
if endrec:
if endrec[_ECD_ENTRIES_TOTAL] == 0 and endrec[_ECD_SIZE] == 0 and endrec[_ECD_OFFSET] == 0:
return True # Empty zipfiles are still zipfiles
elif endrec[_ECD_DISK_NUMBER] == endrec[_ECD_DISK_START]:
fp.seek(endrec[_ECD_OFFSET]) # Central directory is on the same disk
if fp.tell() == endrec[_ECD_OFFSET] and endrec[_ECD_SIZE] >= sizeCentralDir:
data = fp.read(sizeCentralDir) # CD is where we expect it to be
if len(data) == sizeCentralDir:
centdir = struct.unpack(structCentralDir, data) # CD is the right size
if centdir[_CD_SIGNATURE] == stringCentralDir:
return True # First central directory entry has correct magic number
except OSError:
pass
return False

View File

@ -0,0 +1,13 @@
Improve zipfile validation in `zipfile.is_zipfile`.
Before this change `zipfile.is_zipfile()` only checked the End Central Directory
signature. If the signature could be found in the last 64k of the file,
success! This produced false positives on any file with `'PK\x05\x06'` in the
last 64k of the file - including PDFs and PNGs.
This is now corrected by actually validating the Central Directory location
and size based on the information provided by the End Central Directory
along with verifying the Central Directory signature of the first entry.
This should be sufficient for the vast number of zipfiles with fewer
false positives.