Patch #611760: read archives with comments.

This commit is contained in:
Martin v. Löwis 2002-10-13 13:54:50 +00:00
parent b24e3477dc
commit 6f6873baa1
1 changed files with 48 additions and 23 deletions

View File

@ -66,21 +66,51 @@ _FH_FILENAME_LENGTH = 10
_FH_EXTRA_FIELD_LENGTH = 11
def is_zipfile(filename):
"""Quickly see if file is a ZIP file by checking the magic number.
Will not accept a ZIP archive with an ending comment.
"""
"""Quickly see if file is a ZIP file by checking the magic number."""
try:
fpin = open(filename, "rb")
fpin.seek(-22, 2) # Seek to end-of-file record
endrec = fpin.read()
endrec = _EndRecData(fpin)
fpin.close()
if endrec[0:4] == "PK\005\006" and endrec[-2:] == "\000\000":
if endrec:
return True # file has correct magic number
except IOError:
pass
return False
def _EndRecData(fpin):
"""Return data from the "End of Central Directory" record, or None.
The data is a list of the nine items in the ZIP "End of central dir"
record followed by a tenth item, the file seek offset of this record."""
fpin.seek(-22, 2) # Assume no archive comment.
filesize = fpin.tell() + 22 # Get file size
data = fpin.read()
if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
endrec = struct.unpack(structEndArchive, data)
endrec = list(endrec)
endrec.append("") # Append the archive comment
endrec.append(filesize - 22) # Append the record start offset
return endrec
# Search the last END_BLOCK bytes of the file for the record signature.
# The comment is appended to the ZIP file and has a 16 bit length.
# So the comment may be up to 64K long. We limit the search for the
# signature to a few Kbytes at the end of the file for efficiency.
# also, the signature must not appear in the comment.
END_BLOCK = min(filesize, 1024 * 4)
fpin.seek(filesize - END_BLOCK, 0)
data = fpin.read()
start = data.rfind(stringEndArchive)
if start >= 0: # Correct signature string was found
endrec = struct.unpack(structEndArchive, data[start:start+22])
endrec = list(endrec)
comment = data[start+22:]
if endrec[7] == len(comment): # Comment length checks out
# Append the archive comment and start offset
endrec.append(comment)
endrec.append(filesize - END_BLOCK + start)
return endrec
return # Error, return None
class ZipInfo:
"""Class with attributes describing each file in the ZIP archive."""
@ -183,16 +213,12 @@ class ZipFile:
elif key == 'w':
pass
elif key == 'a':
fp = self.fp
fp.seek(-22, 2) # Seek to end-of-file record
endrec = fp.read()
if endrec[0:4] == stringEndArchive and \
endrec[-2:] == "\000\000":
self._GetContents() # file is a zip file
try: # See if file is a zip file
self._RealGetContents()
# seek to start of directory and overwrite
fp.seek(self.start_dir, 0)
else: # file is not a zip file, just append
fp.seek(0, 2)
self.fp.seek(self.start_dir, 0)
except BadZipfile: # file is not a zip file, just append
self.fp.seek(0, 2)
else:
if not self._filePassed:
self.fp.close()
@ -213,17 +239,16 @@ class ZipFile:
def _RealGetContents(self):
"""Read in the table of contents for the ZIP file."""
fp = self.fp
fp.seek(-22, 2) # Start of end-of-archive record
filesize = fp.tell() + 22 # Get file size
endrec = fp.read(22) # Archive must not end with a comment!
if endrec[0:4] != stringEndArchive or endrec[-2:] != "\000\000":
raise BadZipfile, "File is not a zip file, or ends with a comment"
endrec = struct.unpack(structEndArchive, endrec)
endrec = _EndRecData(fp)
if not endrec:
raise BadZipfile, "File is not a zip file"
if self.debug > 1:
print endrec
size_cd = endrec[5] # bytes in central directory
offset_cd = endrec[6] # offset of central directory
x = filesize - 22 - size_cd
self.comment = endrec[8] # archive comment
# endrec[9] is the offset of the "End of Central Dir" record
x = endrec[9] - size_cd
# "concat" is zero, unless zip was concatenated to another file
concat = x - offset_cd
if self.debug > 2: