Patch #611760: read archives with comments.

This commit is contained in:
Martin v. Löwis 2002-10-13 13:54:50 +00:00
parent b24e3477dc
commit 6f6873baa1
1 changed files with 48 additions and 23 deletions

View File

@ -66,21 +66,51 @@ _FH_FILENAME_LENGTH = 10
_FH_EXTRA_FIELD_LENGTH = 11 _FH_EXTRA_FIELD_LENGTH = 11
def is_zipfile(filename): def is_zipfile(filename):
"""Quickly see if file is a ZIP file by checking the magic number. """Quickly see if file is a ZIP file by checking the magic number."""
Will not accept a ZIP archive with an ending comment.
"""
try: try:
fpin = open(filename, "rb") fpin = open(filename, "rb")
fpin.seek(-22, 2) # Seek to end-of-file record endrec = _EndRecData(fpin)
endrec = fpin.read()
fpin.close() fpin.close()
if endrec[0:4] == "PK\005\006" and endrec[-2:] == "\000\000": if endrec:
return True # file has correct magic number return True # file has correct magic number
except IOError: except IOError:
pass pass
return False return False
def _EndRecData(fpin):
"""Return data from the "End of Central Directory" record, or None.
The data is a list of the nine items in the ZIP "End of central dir"
record followed by a tenth item, the file seek offset of this record."""
fpin.seek(-22, 2) # Assume no archive comment.
filesize = fpin.tell() + 22 # Get file size
data = fpin.read()
if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
endrec = struct.unpack(structEndArchive, data)
endrec = list(endrec)
endrec.append("") # Append the archive comment
endrec.append(filesize - 22) # Append the record start offset
return endrec
# Search the last END_BLOCK bytes of the file for the record signature.
# The comment is appended to the ZIP file and has a 16 bit length.
# So the comment may be up to 64K long. We limit the search for the
# signature to a few Kbytes at the end of the file for efficiency.
# also, the signature must not appear in the comment.
END_BLOCK = min(filesize, 1024 * 4)
fpin.seek(filesize - END_BLOCK, 0)
data = fpin.read()
start = data.rfind(stringEndArchive)
if start >= 0: # Correct signature string was found
endrec = struct.unpack(structEndArchive, data[start:start+22])
endrec = list(endrec)
comment = data[start+22:]
if endrec[7] == len(comment): # Comment length checks out
# Append the archive comment and start offset
endrec.append(comment)
endrec.append(filesize - END_BLOCK + start)
return endrec
return # Error, return None
class ZipInfo: class ZipInfo:
"""Class with attributes describing each file in the ZIP archive.""" """Class with attributes describing each file in the ZIP archive."""
@ -183,16 +213,12 @@ class ZipFile:
elif key == 'w': elif key == 'w':
pass pass
elif key == 'a': elif key == 'a':
fp = self.fp try: # See if file is a zip file
fp.seek(-22, 2) # Seek to end-of-file record self._RealGetContents()
endrec = fp.read()
if endrec[0:4] == stringEndArchive and \
endrec[-2:] == "\000\000":
self._GetContents() # file is a zip file
# seek to start of directory and overwrite # seek to start of directory and overwrite
fp.seek(self.start_dir, 0) self.fp.seek(self.start_dir, 0)
else: # file is not a zip file, just append except BadZipfile: # file is not a zip file, just append
fp.seek(0, 2) self.fp.seek(0, 2)
else: else:
if not self._filePassed: if not self._filePassed:
self.fp.close() self.fp.close()
@ -213,17 +239,16 @@ class ZipFile:
def _RealGetContents(self): def _RealGetContents(self):
"""Read in the table of contents for the ZIP file.""" """Read in the table of contents for the ZIP file."""
fp = self.fp fp = self.fp
fp.seek(-22, 2) # Start of end-of-archive record endrec = _EndRecData(fp)
filesize = fp.tell() + 22 # Get file size if not endrec:
endrec = fp.read(22) # Archive must not end with a comment! raise BadZipfile, "File is not a zip file"
if endrec[0:4] != stringEndArchive or endrec[-2:] != "\000\000":
raise BadZipfile, "File is not a zip file, or ends with a comment"
endrec = struct.unpack(structEndArchive, endrec)
if self.debug > 1: if self.debug > 1:
print endrec print endrec
size_cd = endrec[5] # bytes in central directory size_cd = endrec[5] # bytes in central directory
offset_cd = endrec[6] # offset of central directory offset_cd = endrec[6] # offset of central directory
x = filesize - 22 - size_cd self.comment = endrec[8] # archive comment
# endrec[9] is the offset of the "End of Central Dir" record
x = endrec[9] - size_cd
# "concat" is zero, unless zip was concatenated to another file # "concat" is zero, unless zip was concatenated to another file
concat = x - offset_cd concat = x - offset_cd
if self.debug > 2: if self.debug > 2: