Patch #1622: Correct interpretation of various ZIP header fields.
Also fixes - Issue #1526: Allow more than 64k files to be added to Zip64 file. - Issue #1746: Correct handling of zipfile archive comments (previously archives with comments over 4k were flagged as invalid). Allow writing Zip files with archives by setting the 'comment' attribute of a ZipFile.
This commit is contained in:
parent
f5d2b4ce93
commit
8c43641271
|
@ -285,7 +285,7 @@ ZipFile Objects
|
||||||
member of the given :class:`ZipInfo` instance. By default, the
|
member of the given :class:`ZipInfo` instance. By default, the
|
||||||
:class:`ZipInfo` constructor sets this member to :const:`ZIP_STORED`.
|
:class:`ZipInfo` constructor sets this member to :const:`ZIP_STORED`.
|
||||||
|
|
||||||
The following data attribute is also available:
|
The following data attributes are also available:
|
||||||
|
|
||||||
|
|
||||||
.. attribute:: ZipFile.debug
|
.. attribute:: ZipFile.debug
|
||||||
|
@ -294,6 +294,12 @@ The following data attribute is also available:
|
||||||
output) to ``3`` (the most output). Debugging information is written to
|
output) to ``3`` (the most output). Debugging information is written to
|
||||||
``sys.stdout``.
|
``sys.stdout``.
|
||||||
|
|
||||||
|
.. attribute:: ZipFile.comment
|
||||||
|
|
||||||
|
The comment text associated with the ZIP file. If assigning a comment to a
|
||||||
|
:class:`ZipFile` instance created with mode 'a' or 'w', this should be a
|
||||||
|
string no longer than 65535 bytes. Comments longer than this will be
|
||||||
|
truncated in the written archive when :meth:`ZipFile.close` is called.
|
||||||
|
|
||||||
.. _pyzipfile-objects:
|
.. _pyzipfile-objects:
|
||||||
|
|
||||||
|
|
|
@ -712,6 +712,54 @@ class OtherTests(unittest.TestCase):
|
||||||
zipf.writestr("foo.txt\x00qqq", "O, for a Muse of Fire!")
|
zipf.writestr("foo.txt\x00qqq", "O, for a Muse of Fire!")
|
||||||
self.assertEqual(zipf.namelist(), ['foo.txt'])
|
self.assertEqual(zipf.namelist(), ['foo.txt'])
|
||||||
|
|
||||||
|
def test_StructSizes(self):
|
||||||
|
# check that ZIP internal structure sizes are calculated correctly
|
||||||
|
self.assertEqual(zipfile.sizeEndCentDir, 22)
|
||||||
|
self.assertEqual(zipfile.sizeCentralDir, 46)
|
||||||
|
self.assertEqual(zipfile.sizeEndCentDir64, 56)
|
||||||
|
self.assertEqual(zipfile.sizeEndCentDir64Locator, 20)
|
||||||
|
|
||||||
|
def testComments(self):
|
||||||
|
# This test checks that comments on the archive are handled properly
|
||||||
|
|
||||||
|
# check default comment is empty
|
||||||
|
zipf = zipfile.ZipFile(TESTFN, mode="w")
|
||||||
|
self.assertEqual(zipf.comment, '')
|
||||||
|
zipf.writestr("foo.txt", "O, for a Muse of Fire!")
|
||||||
|
zipf.close()
|
||||||
|
zipfr = zipfile.ZipFile(TESTFN, mode="r")
|
||||||
|
self.assertEqual(zipfr.comment, '')
|
||||||
|
zipfr.close()
|
||||||
|
|
||||||
|
# check a simple short comment
|
||||||
|
comment = 'Bravely taking to his feet, he beat a very brave retreat.'
|
||||||
|
zipf = zipfile.ZipFile(TESTFN, mode="w")
|
||||||
|
zipf.comment = comment
|
||||||
|
zipf.writestr("foo.txt", "O, for a Muse of Fire!")
|
||||||
|
zipf.close()
|
||||||
|
zipfr = zipfile.ZipFile(TESTFN, mode="r")
|
||||||
|
self.assertEqual(zipfr.comment, comment)
|
||||||
|
zipfr.close()
|
||||||
|
|
||||||
|
# check a comment of max length
|
||||||
|
comment2 = ''.join(['%d' % (i**3 % 10) for i in xrange((1 << 16)-1)])
|
||||||
|
zipf = zipfile.ZipFile(TESTFN, mode="w")
|
||||||
|
zipf.comment = comment2
|
||||||
|
zipf.writestr("foo.txt", "O, for a Muse of Fire!")
|
||||||
|
zipf.close()
|
||||||
|
zipfr = zipfile.ZipFile(TESTFN, mode="r")
|
||||||
|
self.assertEqual(zipfr.comment, comment2)
|
||||||
|
zipfr.close()
|
||||||
|
|
||||||
|
# check a comment that is too long is truncated
|
||||||
|
zipf = zipfile.ZipFile(TESTFN, mode="w")
|
||||||
|
zipf.comment = comment2 + 'oops'
|
||||||
|
zipf.writestr("foo.txt", "O, for a Muse of Fire!")
|
||||||
|
zipf.close()
|
||||||
|
zipfr = zipfile.ZipFile(TESTFN, mode="r")
|
||||||
|
self.assertEqual(zipfr.comment, comment2)
|
||||||
|
zipfr.close()
|
||||||
|
|
||||||
def tearDown(self):
|
def tearDown(self):
|
||||||
support.unlink(TESTFN)
|
support.unlink(TESTFN)
|
||||||
support.unlink(TESTFN2)
|
support.unlink(TESTFN2)
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
# The test_support.requires call is the only reason for keeping this separate
|
# The test_support.requires call is the only reason for keeping this separate
|
||||||
# from test_zipfile
|
# from test_zipfile
|
||||||
from test import test_support
|
from test import test_support
|
||||||
|
|
||||||
# XXX(nnorwitz): disable this test by looking for extra largfile resource
|
# XXX(nnorwitz): disable this test by looking for extra largfile resource
|
||||||
# which doesn't exist. This test takes over 30 minutes to run in general
|
# which doesn't exist. This test takes over 30 minutes to run in general
|
||||||
# and requires more disk space than most of the buildbots.
|
# and requires more disk space than most of the buildbots.
|
||||||
|
@ -93,8 +94,31 @@ class TestsWithSourceFile(unittest.TestCase):
|
||||||
if os.path.exists(fname):
|
if os.path.exists(fname):
|
||||||
os.remove(fname)
|
os.remove(fname)
|
||||||
|
|
||||||
|
|
||||||
|
class OtherTests(unittest.TestCase):
|
||||||
|
def testMoreThan64kFiles(self):
|
||||||
|
# This test checks that more than 64k files can be added to an archive,
|
||||||
|
# and that the resulting archive can be read properly by ZipFile
|
||||||
|
zipf = zipfile.ZipFile(TESTFN, mode="w")
|
||||||
|
zipf.debug = 100
|
||||||
|
numfiles = (1 << 16) * 3/2
|
||||||
|
for i in xrange(numfiles):
|
||||||
|
zipf.writestr("foo%08d" % i, "%d" % (i**3 % 57))
|
||||||
|
self.assertEqual(len(zipf.namelist()), numfiles)
|
||||||
|
zipf.close()
|
||||||
|
|
||||||
|
zipf2 = zipfile.ZipFile(TESTFN, mode="r")
|
||||||
|
self.assertEqual(len(zipf2.namelist()), numfiles)
|
||||||
|
for i in xrange(numfiles):
|
||||||
|
self.assertEqual(zipf2.read("foo%08d" % i), "%d" % (i**3 % 57))
|
||||||
|
zipf.close()
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
test_support.unlink(TESTFN)
|
||||||
|
test_support.unlink(TESTFN2)
|
||||||
|
|
||||||
def test_main():
|
def test_main():
|
||||||
run_unittest(TestsWithSourceFile)
|
run_unittest(TestsWithSourceFile, OtherTests)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
test_main()
|
test_main()
|
||||||
|
|
295
Lib/zipfile.py
295
Lib/zipfile.py
|
@ -27,31 +27,79 @@ class LargeZipFile(Exception):
|
||||||
error = BadZipfile # The exception raised by this module
|
error = BadZipfile # The exception raised by this module
|
||||||
|
|
||||||
ZIP64_LIMIT= (1 << 31) - 1
|
ZIP64_LIMIT= (1 << 31) - 1
|
||||||
|
ZIP_FILECOUNT_LIMIT = 1 << 16
|
||||||
|
ZIP_MAX_COMMENT = (1 << 16) - 1
|
||||||
|
|
||||||
# constants for Zip file compression methods
|
# constants for Zip file compression methods
|
||||||
ZIP_STORED = 0
|
ZIP_STORED = 0
|
||||||
ZIP_DEFLATED = 8
|
ZIP_DEFLATED = 8
|
||||||
# Other ZIP compression methods not supported
|
# Other ZIP compression methods not supported
|
||||||
|
|
||||||
# Here are some struct module formats for reading headers
|
# Below are some formats and associated data for reading/writing headers using
|
||||||
structEndArchive = "<4s4H2LH" # 9 items, end of archive, 22 bytes
|
# the struct module. The names and structures of headers/records are those used
|
||||||
stringEndArchive = "PK\005\006" # magic number for end of archive record
|
# in the PKWARE description of the ZIP file format:
|
||||||
structCentralDir = "<4s4B4HLLL5HLL"# 19 items, central directory, 46 bytes
|
# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
|
||||||
stringCentralDir = "PK\001\002" # magic number for central directory
|
# (URL valid as of January 2008)
|
||||||
structFileHeader = "<4s2B4HLLL2H" # 12 items, file header record, 30 bytes
|
|
||||||
stringFileHeader = "PK\003\004" # magic number for file header
|
|
||||||
structEndArchive64Locator = "<4sLQL" # 4 items, locate Zip64 header, 20 bytes
|
|
||||||
stringEndArchive64Locator = "PK\x06\x07" # magic token for locator header
|
|
||||||
structEndArchive64 = "<4sQHHLLQQQQ" # 10 items, end of archive (Zip64), 56 bytes
|
|
||||||
stringEndArchive64 = "PK\x06\x06" # magic token for Zip64 header
|
|
||||||
|
|
||||||
|
# The "end of central directory" structure, magic number, size, and indices
|
||||||
|
# (section V.I in the format document)
|
||||||
|
structEndCentDir = "<4s4H2LH"
|
||||||
|
magicEndCentDir = "PK\005\006"
|
||||||
|
sizeEndCentDir = struct.calcsize(structEndCentDir)
|
||||||
|
|
||||||
|
_ECD_SIGNATURE = 0
|
||||||
|
_ECD_DISK_NUMBER = 1
|
||||||
|
_ECD_DISK_START = 2
|
||||||
|
_ECD_ENTRIES_THIS_DISK = 3
|
||||||
|
_ECD_ENTRIES_TOTAL = 4
|
||||||
|
_ECD_SIZE = 5
|
||||||
|
_ECD_OFFSET = 6
|
||||||
|
_ECD_COMMENT_SIZE = 7
|
||||||
|
# These last two indices are not part of the structure as defined in the
|
||||||
|
# spec, but they are used internally by this module as a convenience
|
||||||
|
_ECD_COMMENT = 8
|
||||||
|
_ECD_LOCATION = 9
|
||||||
|
|
||||||
|
# The "central directory" structure, magic number, size, and indices
|
||||||
|
# of entries in the structure (section V.F in the format document)
|
||||||
|
structCentralDir = "<4s4B4HL2L5H2L"
|
||||||
|
magicCentralDir = "PK\001\002"
|
||||||
|
sizeCentralDir = struct.calcsize(structCentralDir)
|
||||||
|
|
||||||
|
# The "local file header" structure, magic number, size, and indices
|
||||||
|
# (section V.A in the format document)
|
||||||
|
structFileHeader = "<4s2B4HL2L2H"
|
||||||
|
magicFileHeader = "PK\003\004"
|
||||||
|
sizeFileHeader = struct.calcsize(structFileHeader)
|
||||||
|
|
||||||
|
# The "Zip64 end of central directory locator" structure, magic number, and size
|
||||||
|
structEndCentDir64Locator = "<4sLQL"
|
||||||
|
magicEndCentDir64Locator = "PK\x06\x07"
|
||||||
|
sizeEndCentDir64Locator = struct.calcsize(structEndCentDir64Locator)
|
||||||
|
|
||||||
|
# The "Zip64 end of central directory" record, magic number, size, and indices
|
||||||
|
# (section V.G in the format document)
|
||||||
|
structEndCentDir64 = "<4sQ2H2L4Q"
|
||||||
|
magicEndCentDir64 = "PK\x06\x06"
|
||||||
|
sizeEndCentDir64 = struct.calcsize(structEndCentDir64)
|
||||||
|
|
||||||
|
_CD64_SIGNATURE = 0
|
||||||
|
_CD64_DIRECTORY_RECSIZE = 1
|
||||||
|
_CD64_CREATE_VERSION = 2
|
||||||
|
_CD64_EXTRACT_VERSION = 3
|
||||||
|
_CD64_DISK_NUMBER = 4
|
||||||
|
_CD64_DISK_NUMBER_START = 5
|
||||||
|
_CD64_NUMBER_ENTRIES_THIS_DISK = 6
|
||||||
|
_CD64_NUMBER_ENTRIES_TOTAL = 7
|
||||||
|
_CD64_DIRECTORY_SIZE = 8
|
||||||
|
_CD64_OFFSET_START_CENTDIR = 9
|
||||||
|
|
||||||
# indexes of entries in the central directory structure
|
# indexes of entries in the central directory structure
|
||||||
_CD_SIGNATURE = 0
|
_CD_SIGNATURE = 0
|
||||||
_CD_CREATE_VERSION = 1
|
_CD_CREATE_VERSION = 1
|
||||||
_CD_CREATE_SYSTEM = 2
|
_CD_CREATE_SYSTEM = 2
|
||||||
_CD_EXTRACT_VERSION = 3
|
_CD_EXTRACT_VERSION = 3
|
||||||
_CD_EXTRACT_SYSTEM = 4 # is this meaningful?
|
_CD_EXTRACT_SYSTEM = 4
|
||||||
_CD_FLAG_BITS = 5
|
_CD_FLAG_BITS = 5
|
||||||
_CD_COMPRESS_TYPE = 6
|
_CD_COMPRESS_TYPE = 6
|
||||||
_CD_TIME = 7
|
_CD_TIME = 7
|
||||||
|
@ -67,10 +115,15 @@ _CD_INTERNAL_FILE_ATTRIBUTES = 16
|
||||||
_CD_EXTERNAL_FILE_ATTRIBUTES = 17
|
_CD_EXTERNAL_FILE_ATTRIBUTES = 17
|
||||||
_CD_LOCAL_HEADER_OFFSET = 18
|
_CD_LOCAL_HEADER_OFFSET = 18
|
||||||
|
|
||||||
# indexes of entries in the local file header structure
|
# The "local file header" structure, magic number, size, and indices
|
||||||
|
# (section V.A in the format document)
|
||||||
|
structFileHeader = "<4s2B4HL2L2H"
|
||||||
|
magicFileHeader = "PK\003\004"
|
||||||
|
sizeFileHeader = struct.calcsize(structFileHeader)
|
||||||
|
|
||||||
_FH_SIGNATURE = 0
|
_FH_SIGNATURE = 0
|
||||||
_FH_EXTRACT_VERSION = 1
|
_FH_EXTRACT_VERSION = 1
|
||||||
_FH_EXTRACT_SYSTEM = 2 # is this meaningful?
|
_FH_EXTRACT_SYSTEM = 2
|
||||||
_FH_GENERAL_PURPOSE_FLAG_BITS = 3
|
_FH_GENERAL_PURPOSE_FLAG_BITS = 3
|
||||||
_FH_COMPRESSION_METHOD = 4
|
_FH_COMPRESSION_METHOD = 4
|
||||||
_FH_LAST_MOD_TIME = 5
|
_FH_LAST_MOD_TIME = 5
|
||||||
|
@ -81,6 +134,28 @@ _FH_UNCOMPRESSED_SIZE = 9
|
||||||
_FH_FILENAME_LENGTH = 10
|
_FH_FILENAME_LENGTH = 10
|
||||||
_FH_EXTRA_FIELD_LENGTH = 11
|
_FH_EXTRA_FIELD_LENGTH = 11
|
||||||
|
|
||||||
|
# The "Zip64 end of central directory locator" structure, magic number, and size
|
||||||
|
structEndCentDir64Locator = "<4sLQL"
|
||||||
|
magicEndCentDir64Locator = "PK\x06\x07"
|
||||||
|
sizeEndCentDir64Locator = struct.calcsize(structEndCentDir64Locator)
|
||||||
|
|
||||||
|
# The "Zip64 end of central directory" record, magic number, size, and indices
|
||||||
|
# (section V.G in the format document)
|
||||||
|
structEndCentDir64 = "<4sQ2H2L4Q"
|
||||||
|
magicEndCentDir64 = "PK\x06\x06"
|
||||||
|
sizeEndCentDir64 = struct.calcsize(structEndCentDir64)
|
||||||
|
|
||||||
|
_CD64_SIGNATURE = 0
|
||||||
|
_CD64_DIRECTORY_RECSIZE = 1
|
||||||
|
_CD64_CREATE_VERSION = 2
|
||||||
|
_CD64_EXTRACT_VERSION = 3
|
||||||
|
_CD64_DISK_NUMBER = 4
|
||||||
|
_CD64_DISK_NUMBER_START = 5
|
||||||
|
_CD64_NUMBER_ENTRIES_THIS_DISK = 6
|
||||||
|
_CD64_NUMBER_ENTRIES_TOTAL = 7
|
||||||
|
_CD64_DIRECTORY_SIZE = 8
|
||||||
|
_CD64_OFFSET_START_CENTDIR = 9
|
||||||
|
|
||||||
def is_zipfile(filename):
|
def is_zipfile(filename):
|
||||||
"""Quickly see if file is a ZIP file by checking the magic number."""
|
"""Quickly see if file is a ZIP file by checking the magic number."""
|
||||||
try:
|
try:
|
||||||
|
@ -97,33 +172,31 @@ def _EndRecData64(fpin, offset, endrec):
|
||||||
"""
|
"""
|
||||||
Read the ZIP64 end-of-archive records and use that to update endrec
|
Read the ZIP64 end-of-archive records and use that to update endrec
|
||||||
"""
|
"""
|
||||||
locatorSize = struct.calcsize(structEndArchive64Locator)
|
fpin.seek(offset - sizeEndCentDir64Locator, 2)
|
||||||
fpin.seek(offset - locatorSize, 2)
|
data = fpin.read(sizeEndCentDir64Locator)
|
||||||
data = fpin.read(locatorSize)
|
sig, diskno, reloff, disks = struct.unpack(structEndCentDir64Locator, data)
|
||||||
sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
|
if sig != magicEndCentDir64Locator:
|
||||||
if sig != stringEndArchive64Locator:
|
|
||||||
return endrec
|
return endrec
|
||||||
|
|
||||||
if diskno != 0 or disks != 1:
|
if diskno != 0 or disks != 1:
|
||||||
raise BadZipfile("zipfiles that span multiple disks are not supported")
|
raise BadZipfile("zipfiles that span multiple disks are not supported")
|
||||||
|
|
||||||
# Assume no 'zip64 extensible data'
|
# Assume no 'zip64 extensible data'
|
||||||
endArchiveSize = struct.calcsize(structEndArchive64)
|
fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
|
||||||
fpin.seek(offset - locatorSize - endArchiveSize, 2)
|
data = fpin.read(sizeEndCentDir64)
|
||||||
data = fpin.read(endArchiveSize)
|
|
||||||
sig, sz, create_version, read_version, disk_num, disk_dir, \
|
sig, sz, create_version, read_version, disk_num, disk_dir, \
|
||||||
dircount, dircount2, dirsize, diroffset = \
|
dircount, dircount2, dirsize, diroffset = \
|
||||||
struct.unpack(structEndArchive64, data)
|
struct.unpack(structEndCentDir64, data)
|
||||||
if sig != stringEndArchive64:
|
if sig != magicEndCentDir64:
|
||||||
return endrec
|
return endrec
|
||||||
|
|
||||||
# Update the original endrec using data from the ZIP64 record
|
# Update the original endrec using data from the ZIP64 record
|
||||||
endrec[1] = disk_num
|
endrec[_ECD_DISK_NUMBER] = disk_num
|
||||||
endrec[2] = disk_dir
|
endrec[_ECD_DISK_START] = disk_dir
|
||||||
endrec[3] = dircount
|
endrec[_ECD_ENTRIES_THIS_DISK] = dircount
|
||||||
endrec[4] = dircount2
|
endrec[_ECD_ENTRIES_TOTAL] = dircount2
|
||||||
endrec[5] = dirsize
|
endrec[_ECD_SIZE] = dirsize
|
||||||
endrec[6] = diroffset
|
endrec[_ECD_OFFSET] = diroffset
|
||||||
return endrec
|
return endrec
|
||||||
|
|
||||||
|
|
||||||
|
@ -132,38 +205,59 @@ def _EndRecData(fpin):
|
||||||
|
|
||||||
The data is a list of the nine items in the ZIP "End of central dir"
|
The data is a list of the nine items in the ZIP "End of central dir"
|
||||||
record followed by a tenth item, the file seek offset of this record."""
|
record followed by a tenth item, the file seek offset of this record."""
|
||||||
fpin.seek(-22, 2) # Assume no archive comment.
|
|
||||||
filesize = fpin.tell() + 22 # Get file size
|
# Determine file size
|
||||||
|
fpin.seek(0, 2)
|
||||||
|
filesize = fpin.tell()
|
||||||
|
|
||||||
|
# Check to see if this is ZIP file with no archive comment (the
|
||||||
|
# "end of central directory" structure should be the last item in the
|
||||||
|
# file if this is the case).
|
||||||
|
fpin.seek(-sizeEndCentDir, 2)
|
||||||
data = fpin.read()
|
data = fpin.read()
|
||||||
if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
|
if data[0:4] == magicEndCentDir and data[-2:] == "\000\000":
|
||||||
endrec = struct.unpack(structEndArchive, data)
|
# the signature is correct and there's no comment, unpack structure
|
||||||
endrec = list(endrec)
|
endrec = struct.unpack(structEndCentDir, data)
|
||||||
endrec.append("") # Append the archive comment
|
endrec=list(endrec)
|
||||||
endrec.append(filesize - 22) # Append the record start offset
|
|
||||||
if endrec[-4] == 0xffffffff:
|
# Append a blank comment and record start offset
|
||||||
return _EndRecData64(fpin, -22, endrec)
|
endrec.append("")
|
||||||
|
endrec.append(filesize - sizeEndCentDir)
|
||||||
|
if endrec[_ECD_OFFSET] == 0xffffffff:
|
||||||
|
# the value for the "offset of the start of the central directory"
|
||||||
|
# indicates that there is a "Zip64 end of central directory"
|
||||||
|
# structure present, so go look for it
|
||||||
|
return _EndRecData64(fpin, -sizeEndCentDir, endrec)
|
||||||
|
|
||||||
return endrec
|
return endrec
|
||||||
# Search the last END_BLOCK bytes of the file for the record signature.
|
|
||||||
# The comment is appended to the ZIP file and has a 16 bit length.
|
# Either this is not a ZIP file, or it is a ZIP file with an archive
|
||||||
# So the comment may be up to 64K long. We limit the search for the
|
# comment. Search the end of the file for the "end of central directory"
|
||||||
# signature to a few Kbytes at the end of the file for efficiency.
|
# record signature. The comment is the last item in the ZIP file and may be
|
||||||
# also, the signature must not appear in the comment.
|
# up to 64K long. It is assumed that the "end of central directory" magic
|
||||||
END_BLOCK = min(filesize, 1024 * 4)
|
# number does not appear in the comment.
|
||||||
fpin.seek(filesize - END_BLOCK, 0)
|
maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
|
||||||
|
fpin.seek(maxCommentStart, 0)
|
||||||
data = fpin.read()
|
data = fpin.read()
|
||||||
start = data.rfind(stringEndArchive)
|
start = data.rfind(magicEndCentDir)
|
||||||
if start >= 0: # Correct signature string was found
|
if start >= 0:
|
||||||
endrec = struct.unpack(structEndArchive, data[start:start+22])
|
# found the magic number; attempt to unpack and interpret
|
||||||
endrec = list(endrec)
|
recData = data[start:start+sizeEndCentDir]
|
||||||
comment = data[start+22:]
|
endrec = list(struct.unpack(structEndCentDir, recData))
|
||||||
if endrec[7] == len(comment): # Comment length checks out
|
comment = data[start+sizeEndCentDir:]
|
||||||
|
# check that comment length is correct
|
||||||
|
if endrec[_ECD_COMMENT_SIZE] == len(comment):
|
||||||
# Append the archive comment and start offset
|
# Append the archive comment and start offset
|
||||||
endrec.append(comment)
|
endrec.append(comment)
|
||||||
endrec.append(filesize - END_BLOCK + start)
|
endrec.append(maxCommentStart + start)
|
||||||
if endrec[-4] == 0xffffffff:
|
if endrec[_ECD_OFFSET] == 0xffffffff:
|
||||||
return _EndRecData64(fpin, - END_BLOCK + start, endrec)
|
# There is apparently a "Zip64 end of central directory"
|
||||||
|
# structure present, so go look for it
|
||||||
|
return _EndRecData64(fpin, start - filesize, endrec)
|
||||||
return endrec
|
return endrec
|
||||||
return # Error, return None
|
|
||||||
|
# Unable to find a valid end of central directory structure
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
class ZipInfo (object):
|
class ZipInfo (object):
|
||||||
|
@ -250,13 +344,13 @@ class ZipInfo (object):
|
||||||
fmt = '<HHQQ'
|
fmt = '<HHQQ'
|
||||||
extra = extra + struct.pack(fmt,
|
extra = extra + struct.pack(fmt,
|
||||||
1, struct.calcsize(fmt)-4, file_size, compress_size)
|
1, struct.calcsize(fmt)-4, file_size, compress_size)
|
||||||
file_size = 0xffffffff # -1
|
file_size = 0xffffffff
|
||||||
compress_size = 0xffffffff # -1
|
compress_size = 0xffffffff
|
||||||
self.extract_version = max(45, self.extract_version)
|
self.extract_version = max(45, self.extract_version)
|
||||||
self.create_version = max(45, self.extract_version)
|
self.create_version = max(45, self.extract_version)
|
||||||
|
|
||||||
filename, flag_bits = self._encodeFilenameFlags()
|
filename, flag_bits = self._encodeFilenameFlags()
|
||||||
header = struct.pack(structFileHeader, stringFileHeader,
|
header = struct.pack(structFileHeader, magicFileHeader,
|
||||||
self.extract_version, self.reserved, flag_bits,
|
self.extract_version, self.reserved, flag_bits,
|
||||||
self.compress_type, dostime, dosdate, CRC,
|
self.compress_type, dostime, dosdate, CRC,
|
||||||
compress_size, file_size,
|
compress_size, file_size,
|
||||||
|
@ -299,16 +393,15 @@ class ZipInfo (object):
|
||||||
idx = 0
|
idx = 0
|
||||||
|
|
||||||
# ZIP64 extension (large files and/or large archives)
|
# ZIP64 extension (large files and/or large archives)
|
||||||
# XXX Is this correct? won't this exclude 2**32-1 byte files?
|
|
||||||
if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
|
if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
|
||||||
self.file_size = counts[idx]
|
self.file_size = counts[idx]
|
||||||
idx += 1
|
idx += 1
|
||||||
|
|
||||||
if self.compress_size == -1 or self.compress_size == 0xFFFFFFFFL:
|
if self.compress_size == 0xFFFFFFFFL:
|
||||||
self.compress_size = counts[idx]
|
self.compress_size = counts[idx]
|
||||||
idx += 1
|
idx += 1
|
||||||
|
|
||||||
if self.header_offset == -1 or self.header_offset == 0xffffffffL:
|
if self.header_offset == 0xffffffffL:
|
||||||
old = self.header_offset
|
old = self.header_offset
|
||||||
self.header_offset = counts[idx]
|
self.header_offset = counts[idx]
|
||||||
idx+=1
|
idx+=1
|
||||||
|
@ -572,7 +665,7 @@ class ZipExtFile:
|
||||||
class ZipFile:
|
class ZipFile:
|
||||||
""" Class with methods to open, read, write, close, list zip files.
|
""" Class with methods to open, read, write, close, list zip files.
|
||||||
|
|
||||||
z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True)
|
z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
|
||||||
|
|
||||||
file: Either the path to the file, or a file-like object.
|
file: Either the path to the file, or a file-like object.
|
||||||
If it is a path, the file will be opened and closed by ZipFile.
|
If it is a path, the file will be opened and closed by ZipFile.
|
||||||
|
@ -608,6 +701,7 @@ class ZipFile:
|
||||||
self.compression = compression # Method of compression
|
self.compression = compression # Method of compression
|
||||||
self.mode = key = mode.replace('b', '')[0]
|
self.mode = key = mode.replace('b', '')[0]
|
||||||
self.pwd = None
|
self.pwd = None
|
||||||
|
self.comment = ''
|
||||||
|
|
||||||
# Check if we were passed a file-like object
|
# Check if we were passed a file-like object
|
||||||
if isinstance(file, basestring):
|
if isinstance(file, basestring):
|
||||||
|
@ -663,18 +757,20 @@ class ZipFile:
|
||||||
raise BadZipfile, "File is not a zip file"
|
raise BadZipfile, "File is not a zip file"
|
||||||
if self.debug > 1:
|
if self.debug > 1:
|
||||||
print endrec
|
print endrec
|
||||||
size_cd = endrec[5] # bytes in central directory
|
size_cd = endrec[_ECD_SIZE] # bytes in central directory
|
||||||
offset_cd = endrec[6] # offset of central directory
|
offset_cd = endrec[_ECD_OFFSET] # offset of central directory
|
||||||
self.comment = endrec[8] # archive comment
|
self.comment = endrec[_ECD_COMMENT] # archive comment
|
||||||
# endrec[9] is the offset of the "End of Central Dir" record
|
|
||||||
if endrec[9] > ZIP64_LIMIT:
|
|
||||||
x = endrec[9] - size_cd - 56 - 20
|
|
||||||
else:
|
|
||||||
x = endrec[9] - size_cd
|
|
||||||
# "concat" is zero, unless zip was concatenated to another file
|
# "concat" is zero, unless zip was concatenated to another file
|
||||||
concat = x - offset_cd
|
concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
|
||||||
|
if endrec[_ECD_LOCATION] > ZIP64_LIMIT:
|
||||||
|
# If the offset of the "End of Central Dir" record requires Zip64
|
||||||
|
# extension structures, account for them
|
||||||
|
concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
|
||||||
|
|
||||||
if self.debug > 2:
|
if self.debug > 2:
|
||||||
print "given, inferred, offset", offset_cd, x, concat
|
inferred = concat + offset_cd
|
||||||
|
print "given, inferred, offset", offset_cd, inferred, concat
|
||||||
# self.start_dir: Position of start of central directory
|
# self.start_dir: Position of start of central directory
|
||||||
self.start_dir = offset_cd + concat
|
self.start_dir = offset_cd + concat
|
||||||
fp.seek(self.start_dir, 0)
|
fp.seek(self.start_dir, 0)
|
||||||
|
@ -682,9 +778,8 @@ class ZipFile:
|
||||||
fp = cStringIO.StringIO(data)
|
fp = cStringIO.StringIO(data)
|
||||||
total = 0
|
total = 0
|
||||||
while total < size_cd:
|
while total < size_cd:
|
||||||
centdir = fp.read(46)
|
centdir = fp.read(sizeCentralDir)
|
||||||
total = total + 46
|
if centdir[0:4] != magicCentralDir:
|
||||||
if centdir[0:4] != stringCentralDir:
|
|
||||||
raise BadZipfile, "Bad magic number for central directory"
|
raise BadZipfile, "Bad magic number for central directory"
|
||||||
centdir = struct.unpack(structCentralDir, centdir)
|
centdir = struct.unpack(structCentralDir, centdir)
|
||||||
if self.debug > 2:
|
if self.debug > 2:
|
||||||
|
@ -694,9 +789,6 @@ class ZipFile:
|
||||||
x = ZipInfo(filename)
|
x = ZipInfo(filename)
|
||||||
x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
|
x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
|
||||||
x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
|
x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
|
||||||
total = (total + centdir[_CD_FILENAME_LENGTH]
|
|
||||||
+ centdir[_CD_EXTRA_FIELD_LENGTH]
|
|
||||||
+ centdir[_CD_COMMENT_LENGTH])
|
|
||||||
x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
|
x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
|
||||||
(x.create_version, x.create_system, x.extract_version, x.reserved,
|
(x.create_version, x.create_system, x.extract_version, x.reserved,
|
||||||
x.flag_bits, x.compress_type, t, d,
|
x.flag_bits, x.compress_type, t, d,
|
||||||
|
@ -712,6 +804,12 @@ class ZipFile:
|
||||||
x.filename = x._decodeFilename()
|
x.filename = x._decodeFilename()
|
||||||
self.filelist.append(x)
|
self.filelist.append(x)
|
||||||
self.NameToInfo[x.filename] = x
|
self.NameToInfo[x.filename] = x
|
||||||
|
|
||||||
|
# update total bytes read from central directory
|
||||||
|
total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
|
||||||
|
+ centdir[_CD_EXTRA_FIELD_LENGTH]
|
||||||
|
+ centdir[_CD_COMMENT_LENGTH])
|
||||||
|
|
||||||
if self.debug > 2:
|
if self.debug > 2:
|
||||||
print "total", total
|
print "total", total
|
||||||
|
|
||||||
|
@ -743,7 +841,6 @@ class ZipFile:
|
||||||
except BadZipfile:
|
except BadZipfile:
|
||||||
return zinfo.filename
|
return zinfo.filename
|
||||||
|
|
||||||
|
|
||||||
def getinfo(self, name):
|
def getinfo(self, name):
|
||||||
"""Return the instance of ZipInfo given 'name'."""
|
"""Return the instance of ZipInfo given 'name'."""
|
||||||
info = self.NameToInfo.get(name)
|
info = self.NameToInfo.get(name)
|
||||||
|
@ -787,8 +884,8 @@ class ZipFile:
|
||||||
zef_file.seek(zinfo.header_offset, 0)
|
zef_file.seek(zinfo.header_offset, 0)
|
||||||
|
|
||||||
# Skip the file header:
|
# Skip the file header:
|
||||||
fheader = zef_file.read(30)
|
fheader = zef_file.read(sizeFileHeader)
|
||||||
if fheader[0:4] != stringFileHeader:
|
if fheader[0:4] != magicFileHeader:
|
||||||
raise BadZipfile, "Bad magic number for file header"
|
raise BadZipfile, "Bad magic number for file header"
|
||||||
|
|
||||||
fheader = struct.unpack(structFileHeader, fheader)
|
fheader = struct.unpack(structFileHeader, fheader)
|
||||||
|
@ -1048,15 +1145,15 @@ class ZipFile:
|
||||||
or zinfo.compress_size > ZIP64_LIMIT:
|
or zinfo.compress_size > ZIP64_LIMIT:
|
||||||
extra.append(zinfo.file_size)
|
extra.append(zinfo.file_size)
|
||||||
extra.append(zinfo.compress_size)
|
extra.append(zinfo.compress_size)
|
||||||
file_size = 0xffffffff #-1
|
file_size = 0xffffffff
|
||||||
compress_size = 0xffffffff #-1
|
compress_size = 0xffffffff
|
||||||
else:
|
else:
|
||||||
file_size = zinfo.file_size
|
file_size = zinfo.file_size
|
||||||
compress_size = zinfo.compress_size
|
compress_size = zinfo.compress_size
|
||||||
|
|
||||||
if zinfo.header_offset > ZIP64_LIMIT:
|
if zinfo.header_offset > ZIP64_LIMIT:
|
||||||
extra.append(zinfo.header_offset)
|
extra.append(zinfo.header_offset)
|
||||||
header_offset = 0xffffffffL # -1 32 bit
|
header_offset = 0xffffffffL
|
||||||
else:
|
else:
|
||||||
header_offset = zinfo.header_offset
|
header_offset = zinfo.header_offset
|
||||||
|
|
||||||
|
@ -1076,7 +1173,7 @@ class ZipFile:
|
||||||
try:
|
try:
|
||||||
filename, flag_bits = zinfo._encodeFilenameFlags()
|
filename, flag_bits = zinfo._encodeFilenameFlags()
|
||||||
centdir = struct.pack(structCentralDir,
|
centdir = struct.pack(structCentralDir,
|
||||||
stringCentralDir, create_version,
|
magicCentralDir, create_version,
|
||||||
zinfo.create_system, extract_version, zinfo.reserved,
|
zinfo.create_system, extract_version, zinfo.reserved,
|
||||||
flag_bits, zinfo.compress_type, dostime, dosdate,
|
flag_bits, zinfo.compress_type, dostime, dosdate,
|
||||||
zinfo.CRC, compress_size, file_size,
|
zinfo.CRC, compress_size, file_size,
|
||||||
|
@ -1100,27 +1197,35 @@ class ZipFile:
|
||||||
|
|
||||||
pos2 = self.fp.tell()
|
pos2 = self.fp.tell()
|
||||||
# Write end-of-zip-archive record
|
# Write end-of-zip-archive record
|
||||||
|
centDirOffset = pos1
|
||||||
if pos1 > ZIP64_LIMIT:
|
if pos1 > ZIP64_LIMIT:
|
||||||
# Need to write the ZIP64 end-of-archive records
|
# Need to write the ZIP64 end-of-archive records
|
||||||
zip64endrec = struct.pack(
|
zip64endrec = struct.pack(
|
||||||
structEndArchive64, stringEndArchive64,
|
structEndCentDir64, magicEndCentDir64,
|
||||||
44, 45, 45, 0, 0, count, count, pos2 - pos1, pos1)
|
44, 45, 45, 0, 0, count, count, pos2 - pos1, pos1)
|
||||||
self.fp.write(zip64endrec)
|
self.fp.write(zip64endrec)
|
||||||
|
|
||||||
zip64locrec = struct.pack(
|
zip64locrec = struct.pack(
|
||||||
structEndArchive64Locator,
|
structEndCentDir64Locator,
|
||||||
stringEndArchive64Locator, 0, pos2, 1)
|
magicEndCentDir64Locator, 0, pos2, 1)
|
||||||
self.fp.write(zip64locrec)
|
self.fp.write(zip64locrec)
|
||||||
|
centDirOffset = 0xFFFFFFFF
|
||||||
|
|
||||||
endrec = struct.pack(structEndArchive, stringEndArchive,
|
# check for valid comment length
|
||||||
0, 0, count, count, pos2 - pos1, 0xffffffffL, 0)
|
if len(self.comment) >= ZIP_MAX_COMMENT:
|
||||||
self.fp.write(endrec)
|
if self.debug > 0:
|
||||||
|
msg = 'Archive comment is too long; truncating to %d bytes' \
|
||||||
|
% ZIP_MAX_COMMENT
|
||||||
|
self.comment = self.comment[:ZIP_MAX_COMMENT]
|
||||||
|
|
||||||
else:
|
endrec = struct.pack(structEndCentDir, magicEndCentDir,
|
||||||
endrec = struct.pack(structEndArchive, stringEndArchive,
|
0, 0, count % ZIP_FILECOUNT_LIMIT,
|
||||||
0, 0, count, count, pos2 - pos1, pos1, 0)
|
count % ZIP_FILECOUNT_LIMIT, pos2 - pos1,
|
||||||
|
centDirOffset, len(self.comment))
|
||||||
self.fp.write(endrec)
|
self.fp.write(endrec)
|
||||||
|
self.fp.write(self.comment)
|
||||||
self.fp.flush()
|
self.fp.flush()
|
||||||
|
|
||||||
if not self._filePassed:
|
if not self._filePassed:
|
||||||
self.fp.close()
|
self.fp.close()
|
||||||
self.fp = None
|
self.fp = None
|
||||||
|
|
|
@ -33,6 +33,14 @@ Core and Builtins
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Issue #1622: Correct interpretation of various ZIP header fields.
|
||||||
|
|
||||||
|
- Issue #1526: Allow more than 64k files to be added to Zip64 file.
|
||||||
|
|
||||||
|
- Issue #1746: Correct handling of zipfile archive comments (previously
|
||||||
|
archives with comments over 4k were flagged as invalid). Allow writing
|
||||||
|
Zip files with archives by setting the 'comment' attribute of a ZipFile.
|
||||||
|
|
||||||
- Issue #449227: Now with the rlcompleter module, callable objects are added
|
- Issue #449227: Now with the rlcompleter module, callable objects are added
|
||||||
"(" when completed.
|
"(" when completed.
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue