Patch #1446489 (zipfile: support for ZIP64)

This commit is contained in:
Ronald Oussoren 2006-06-15 08:14:18 +00:00
parent 0eac11826a
commit 143cefb846
5 changed files with 665 additions and 63 deletions

View File

@ -17,7 +17,8 @@ understanding of the format, as defined in
Note}. Note}.
This module does not currently handle ZIP files which have appended This module does not currently handle ZIP files which have appended
comments, or multi-disk ZIP files. comments, or multi-disk ZIP files. It can handle ZIP files that use the
ZIP64 extensions (that is ZIP files that are more than 4 GByte in size).
The available attributes of this module are: The available attributes of this module are:
@ -25,6 +26,11 @@ The available attributes of this module are:
The error raised for bad ZIP files. The error raised for bad ZIP files.
\end{excdesc} \end{excdesc}
\begin{excdesc}{LargeZipFile}
The error raised when a ZIP file would require ZIP64 functionality but that
has not been enabled.
\end{excdesc}
\begin{classdesc*}{ZipFile} \begin{classdesc*}{ZipFile}
The class for reading and writing ZIP files. See The class for reading and writing ZIP files. See
``\citetitle{ZipFile Objects}'' (section \ref{zipfile-objects}) for ``\citetitle{ZipFile Objects}'' (section \ref{zipfile-objects}) for
@ -77,7 +83,7 @@ The available attributes of this module are:
\subsection{ZipFile Objects \label{zipfile-objects}} \subsection{ZipFile Objects \label{zipfile-objects}}
\begin{classdesc}{ZipFile}{file\optional{, mode\optional{, compression}}} \begin{classdesc}{ZipFile}{file\optional{, mode\optional{, compression\optional{, allowZip64}}}}
Open a ZIP file, where \var{file} can be either a path to a file Open a ZIP file, where \var{file} can be either a path to a file
(a string) or a file-like object. The \var{mode} parameter (a string) or a file-like object. The \var{mode} parameter
should be \code{'r'} to read an existing file, \code{'w'} to should be \code{'r'} to read an existing file, \code{'w'} to
@ -100,6 +106,12 @@ cat myzip.zip >> python.exe
is specified but the \refmodule{zlib} module is not available, is specified but the \refmodule{zlib} module is not available,
\exception{RuntimeError} is also raised. The default is \exception{RuntimeError} is also raised. The default is
\constant{ZIP_STORED}. \constant{ZIP_STORED}.
If \var{allowZip64} is \code{True} zipfile will create zipfiles that use
the ZIP64 extensions when the zipfile is larger than 2GBytes. If it is
false (the default) zipfile will raise an exception when the zipfile would
require ZIP64 extensions. ZIP64 extensions are disabled by default because
the default zip and unzip commands on Unix (the InfoZIP utilities) don't
support these extensions.
\end{classdesc} \end{classdesc}
\begin{methoddesc}{close}{} \begin{methoddesc}{close}{}
@ -132,8 +144,8 @@ cat myzip.zip >> python.exe
\end{methoddesc} \end{methoddesc}
\begin{methoddesc}{testzip}{} \begin{methoddesc}{testzip}{}
Read all the files in the archive and check their CRC's. Return the Read all the files in the archive and check their CRC's and file
name of the first bad file, or else return \code{None}. headers. Return the name of the first bad file, or else return \code{None}.
\end{methoddesc} \end{methoddesc}
\begin{methoddesc}{write}{filename\optional{, arcname\optional{, \begin{methoddesc}{write}{filename\optional{, arcname\optional{,
@ -284,10 +296,6 @@ Instances have the following attributes:
Byte offset to the file header. Byte offset to the file header.
\end{memberdesc} \end{memberdesc}
\begin{memberdesc}[ZipInfo]{file_offset}
Byte offset to the start of the file data.
\end{memberdesc}
\begin{memberdesc}[ZipInfo]{CRC} \begin{memberdesc}[ZipInfo]{CRC}
CRC-32 of the uncompressed file. CRC-32 of the uncompressed file.
\end{memberdesc} \end{memberdesc}

View File

@ -4,7 +4,7 @@ try:
except ImportError: except ImportError:
zlib = None zlib = None
import zipfile, os, unittest import zipfile, os, unittest, sys, shutil
from StringIO import StringIO from StringIO import StringIO
from tempfile import TemporaryFile from tempfile import TemporaryFile
@ -28,14 +28,70 @@ class TestsWithSourceFile(unittest.TestCase):
zipfp = zipfile.ZipFile(f, "w", compression) zipfp = zipfile.ZipFile(f, "w", compression)
zipfp.write(TESTFN, "another"+os.extsep+"name") zipfp.write(TESTFN, "another"+os.extsep+"name")
zipfp.write(TESTFN, TESTFN) zipfp.write(TESTFN, TESTFN)
zipfp.writestr("strfile", self.data)
zipfp.close() zipfp.close()
# Read the ZIP archive # Read the ZIP archive
zipfp = zipfile.ZipFile(f, "r", compression) zipfp = zipfile.ZipFile(f, "r", compression)
self.assertEqual(zipfp.read(TESTFN), self.data) self.assertEqual(zipfp.read(TESTFN), self.data)
self.assertEqual(zipfp.read("another"+os.extsep+"name"), self.data) self.assertEqual(zipfp.read("another"+os.extsep+"name"), self.data)
self.assertEqual(zipfp.read("strfile"), self.data)
# Print the ZIP directory
fp = StringIO()
stdout = sys.stdout
try:
sys.stdout = fp
zipfp.printdir()
finally:
sys.stdout = stdout
directory = fp.getvalue()
lines = directory.splitlines()
self.assertEquals(len(lines), 4) # Number of files + header
self.assert_('File Name' in lines[0])
self.assert_('Modified' in lines[0])
self.assert_('Size' in lines[0])
fn, date, time, size = lines[1].split()
self.assertEquals(fn, 'another.name')
# XXX: timestamp is not tested
self.assertEquals(size, str(len(self.data)))
# Check the namelist
names = zipfp.namelist()
self.assertEquals(len(names), 3)
self.assert_(TESTFN in names)
self.assert_("another"+os.extsep+"name" in names)
self.assert_("strfile" in names)
# Check infolist
infos = zipfp.infolist()
names = [ i.filename for i in infos ]
self.assertEquals(len(names), 3)
self.assert_(TESTFN in names)
self.assert_("another"+os.extsep+"name" in names)
self.assert_("strfile" in names)
for i in infos:
self.assertEquals(i.file_size, len(self.data))
# check getinfo
for nm in (TESTFN, "another"+os.extsep+"name", "strfile"):
info = zipfp.getinfo(nm)
self.assertEquals(info.filename, nm)
self.assertEquals(info.file_size, len(self.data))
# Check that testzip doesn't raise an exception
zipfp.testzip()
zipfp.close() zipfp.close()
def testStored(self): def testStored(self):
for f in (TESTFN2, TemporaryFile(), StringIO()): for f in (TESTFN2, TemporaryFile(), StringIO()):
self.zipTest(f, zipfile.ZIP_STORED) self.zipTest(f, zipfile.ZIP_STORED)
@ -59,6 +115,197 @@ class TestsWithSourceFile(unittest.TestCase):
os.remove(TESTFN) os.remove(TESTFN)
os.remove(TESTFN2) os.remove(TESTFN2)
class TestZip64InSmallFiles(unittest.TestCase):
# These tests test the ZIP64 functionality without using large files,
# see test_zipfile64 for proper tests.
def setUp(self):
self._limit = zipfile.ZIP64_LIMIT
zipfile.ZIP64_LIMIT = 5
line_gen = ("Test of zipfile line %d." % i for i in range(0, 1000))
self.data = '\n'.join(line_gen)
# Make a source file with some lines
fp = open(TESTFN, "wb")
fp.write(self.data)
fp.close()
def largeFileExceptionTest(self, f, compression):
zipfp = zipfile.ZipFile(f, "w", compression)
self.assertRaises(zipfile.LargeZipFile,
zipfp.write, TESTFN, "another"+os.extsep+"name")
zipfp.close()
def largeFileExceptionTest2(self, f, compression):
zipfp = zipfile.ZipFile(f, "w", compression)
self.assertRaises(zipfile.LargeZipFile,
zipfp.writestr, "another"+os.extsep+"name", self.data)
zipfp.close()
def testLargeFileException(self):
for f in (TESTFN2, TemporaryFile(), StringIO()):
self.largeFileExceptionTest(f, zipfile.ZIP_STORED)
self.largeFileExceptionTest2(f, zipfile.ZIP_STORED)
def zipTest(self, f, compression):
# Create the ZIP archive
zipfp = zipfile.ZipFile(f, "w", compression, allowZip64=True)
zipfp.write(TESTFN, "another"+os.extsep+"name")
zipfp.write(TESTFN, TESTFN)
zipfp.writestr("strfile", self.data)
zipfp.close()
# Read the ZIP archive
zipfp = zipfile.ZipFile(f, "r", compression)
self.assertEqual(zipfp.read(TESTFN), self.data)
self.assertEqual(zipfp.read("another"+os.extsep+"name"), self.data)
self.assertEqual(zipfp.read("strfile"), self.data)
# Print the ZIP directory
fp = StringIO()
stdout = sys.stdout
try:
sys.stdout = fp
zipfp.printdir()
finally:
sys.stdout = stdout
directory = fp.getvalue()
lines = directory.splitlines()
self.assertEquals(len(lines), 4) # Number of files + header
self.assert_('File Name' in lines[0])
self.assert_('Modified' in lines[0])
self.assert_('Size' in lines[0])
fn, date, time, size = lines[1].split()
self.assertEquals(fn, 'another.name')
# XXX: timestamp is not tested
self.assertEquals(size, str(len(self.data)))
# Check the namelist
names = zipfp.namelist()
self.assertEquals(len(names), 3)
self.assert_(TESTFN in names)
self.assert_("another"+os.extsep+"name" in names)
self.assert_("strfile" in names)
# Check infolist
infos = zipfp.infolist()
names = [ i.filename for i in infos ]
self.assertEquals(len(names), 3)
self.assert_(TESTFN in names)
self.assert_("another"+os.extsep+"name" in names)
self.assert_("strfile" in names)
for i in infos:
self.assertEquals(i.file_size, len(self.data))
# check getinfo
for nm in (TESTFN, "another"+os.extsep+"name", "strfile"):
info = zipfp.getinfo(nm)
self.assertEquals(info.filename, nm)
self.assertEquals(info.file_size, len(self.data))
# Check that testzip doesn't raise an exception
zipfp.testzip()
zipfp.close()
def testStored(self):
for f in (TESTFN2, TemporaryFile(), StringIO()):
self.zipTest(f, zipfile.ZIP_STORED)
if zlib:
def testDeflated(self):
for f in (TESTFN2, TemporaryFile(), StringIO()):
self.zipTest(f, zipfile.ZIP_DEFLATED)
def testAbsoluteArcnames(self):
zipfp = zipfile.ZipFile(TESTFN2, "w", zipfile.ZIP_STORED, allowZip64=True)
zipfp.write(TESTFN, "/absolute")
zipfp.close()
zipfp = zipfile.ZipFile(TESTFN2, "r", zipfile.ZIP_STORED)
self.assertEqual(zipfp.namelist(), ["absolute"])
zipfp.close()
def tearDown(self):
zipfile.ZIP64_LIMIT = self._limit
os.remove(TESTFN)
os.remove(TESTFN2)
class PyZipFileTests(unittest.TestCase):
def testWritePyfile(self):
zipfp = zipfile.PyZipFile(TemporaryFile(), "w")
fn = __file__
if fn.endswith('.pyc') or fn.endswith('.pyo'):
fn = fn[:-1]
zipfp.writepy(fn)
bn = os.path.basename(fn)
self.assert_(bn not in zipfp.namelist())
self.assert_(bn + 'o' in zipfp.namelist() or bn + 'c' in zipfp.namelist())
zipfp.close()
zipfp = zipfile.PyZipFile(TemporaryFile(), "w")
fn = __file__
if fn.endswith('.pyc') or fn.endswith('.pyo'):
fn = fn[:-1]
zipfp.writepy(fn, "testpackage")
bn = "%s/%s"%("testpackage", os.path.basename(fn))
self.assert_(bn not in zipfp.namelist())
self.assert_(bn + 'o' in zipfp.namelist() or bn + 'c' in zipfp.namelist())
zipfp.close()
def testWritePythonPackage(self):
import email
packagedir = os.path.dirname(email.__file__)
zipfp = zipfile.PyZipFile(TemporaryFile(), "w")
zipfp.writepy(packagedir)
# Check for a couple of modules at different levels of the hieararchy
names = zipfp.namelist()
self.assert_('email/__init__.pyo' in names or 'email/__init__.pyc' in names)
self.assert_('email/mime/text.pyo' in names or 'email/mime/text.pyc' in names)
def testWritePythonDirectory(self):
os.mkdir(TESTFN2)
try:
fp = open(os.path.join(TESTFN2, "mod1.py"), "w")
fp.write("print 42\n")
fp.close()
fp = open(os.path.join(TESTFN2, "mod2.py"), "w")
fp.write("print 42 * 42\n")
fp.close()
fp = open(os.path.join(TESTFN2, "mod2.txt"), "w")
fp.write("bla bla bla\n")
fp.close()
zipfp = zipfile.PyZipFile(TemporaryFile(), "w")
zipfp.writepy(TESTFN2)
names = zipfp.namelist()
self.assert_('mod1.pyc' in names or 'mod1.pyo' in names)
self.assert_('mod2.pyc' in names or 'mod2.pyo' in names)
self.assert_('mod2.txt' not in names)
finally:
shutil.rmtree(TESTFN2)
class OtherTests(unittest.TestCase): class OtherTests(unittest.TestCase):
def testCloseErroneousFile(self): def testCloseErroneousFile(self):
# This test checks that the ZipFile constructor closes the file object # This test checks that the ZipFile constructor closes the file object
@ -103,7 +350,8 @@ class OtherTests(unittest.TestCase):
self.assertRaises(RuntimeError, zipf.testzip) self.assertRaises(RuntimeError, zipf.testzip)
def test_main(): def test_main():
run_unittest(TestsWithSourceFile, OtherTests) run_unittest(TestsWithSourceFile, TestZip64InSmallFiles, OtherTests, PyZipFileTests)
#run_unittest(TestZip64InSmallFiles)
if __name__ == "__main__": if __name__ == "__main__":
test_main() test_main()

View File

@ -0,0 +1,67 @@
# Tests of the full ZIP64 functionality of zipfile
# The test_support.requires call is the only reason for keeping this separate
# from test_zipfile
from test import test_support
test_support.requires(
'largefile',
'test requires loads of disk-space bytes and a long time to run'
)
# We can test part of the module without zlib.
try:
import zlib
except ImportError:
zlib = None
import zipfile, os, unittest
from StringIO import StringIO
from tempfile import TemporaryFile
from test.test_support import TESTFN, run_unittest
TESTFN2 = TESTFN + "2"
class TestsWithSourceFile(unittest.TestCase):
def setUp(self):
line_gen = ("Test of zipfile line %d." % i for i in range(0, 1000000))
self.data = '\n'.join(line_gen)
# Make a source file with some lines
fp = open(TESTFN, "wb")
fp.write(self.data)
fp.close()
def zipTest(self, f, compression):
# Create the ZIP archive
filecount = int(((1 << 32) / len(self.data)) * 1.5)
zipfp = zipfile.ZipFile(f, "w", compression, allowZip64=True)
for num in range(filecount):
zipfp.writestr("testfn%d"%(num,), self.data)
zipfp.close()
# Read the ZIP archive
zipfp = zipfile.ZipFile(f, "r", compression)
for num in range(filecount):
self.assertEqual(zipfp.read("testfn%d"%(num,)), self.data)
zipfp.close()
def testStored(self):
for f in (TESTFN2, TemporaryFile()):
self.zipTest(f, zipfile.ZIP_STORED)
if zlib:
def testDeflated(self):
for f in (TESTFN2, TemporaryFile()):
self.zipTest(f, zipfile.ZIP_DEFLATED)
def tearDown(self):
os.remove(TESTFN)
os.remove(TESTFN2)
def test_main():
run_unittest(TestsWithSourceFile)
if __name__ == "__main__":
test_main()

View File

@ -1,7 +1,8 @@
"Read and write ZIP files." """
Read and write ZIP files.
"""
import struct, os, time, sys import struct, os, time, sys
import binascii import binascii, cStringIO
try: try:
import zlib # We may need its compression method import zlib # We may need its compression method
@ -9,12 +10,22 @@ except ImportError:
zlib = None zlib = None
__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile", __all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
"ZipInfo", "ZipFile", "PyZipFile"] "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
class BadZipfile(Exception): class BadZipfile(Exception):
pass pass
class LargeZipFile(Exception):
"""
Raised when writing a zipfile, the zipfile requires ZIP64 extensions
and those extensions are disabled.
"""
error = BadZipfile # The exception raised by this module error = BadZipfile # The exception raised by this module
ZIP64_LIMIT= (1 << 31) - 1
# constants for Zip file compression methods # constants for Zip file compression methods
ZIP_STORED = 0 ZIP_STORED = 0
ZIP_DEFLATED = 8 ZIP_DEFLATED = 8
@ -27,6 +38,11 @@ structCentralDir = "<4s4B4HlLL5HLl"# 19 items, central directory, 46 bytes
stringCentralDir = "PK\001\002" # magic number for central directory stringCentralDir = "PK\001\002" # magic number for central directory
structFileHeader = "<4s2B4HlLL2H" # 12 items, file header record, 30 bytes structFileHeader = "<4s2B4HlLL2H" # 12 items, file header record, 30 bytes
stringFileHeader = "PK\003\004" # magic number for file header stringFileHeader = "PK\003\004" # magic number for file header
structEndArchive64Locator = "<4slql" # 4 items, locate Zip64 header, 20 bytes
stringEndArchive64Locator = "PK\x06\x07" # magic token for locator header
structEndArchive64 = "<4sqhhllqqqq" # 10 items, end of archive (Zip64), 56 bytes
stringEndArchive64 = "PK\x06\x06" # magic token for Zip64 header
# indexes of entries in the central directory structure # indexes of entries in the central directory structure
_CD_SIGNATURE = 0 _CD_SIGNATURE = 0
@ -75,6 +91,40 @@ def is_zipfile(filename):
pass pass
return False return False
def _EndRecData64(fpin, offset, endrec):
"""
Read the ZIP64 end-of-archive records and use that to update endrec
"""
locatorSize = struct.calcsize(structEndArchive64Locator)
fpin.seek(offset - locatorSize, 2)
data = fpin.read(locatorSize)
sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
if sig != stringEndArchive64Locator:
return endrec
if diskno != 0 or disks != 1:
raise BadZipfile("zipfiles that span multiple disks are not supported")
# Assume no 'zip64 extensible data'
endArchiveSize = struct.calcsize(structEndArchive64)
fpin.seek(offset - locatorSize - endArchiveSize, 2)
data = fpin.read(endArchiveSize)
sig, sz, create_version, read_version, disk_num, disk_dir, \
dircount, dircount2, dirsize, diroffset = \
struct.unpack(structEndArchive64, data)
if sig != stringEndArchive64:
return endrec
# Update the original endrec using data from the ZIP64 record
endrec[1] = disk_num
endrec[2] = disk_dir
endrec[3] = dircount
endrec[4] = dircount2
endrec[5] = dirsize
endrec[6] = diroffset
return endrec
def _EndRecData(fpin): def _EndRecData(fpin):
"""Return data from the "End of Central Directory" record, or None. """Return data from the "End of Central Directory" record, or None.
@ -88,6 +138,8 @@ def _EndRecData(fpin):
endrec = list(endrec) endrec = list(endrec)
endrec.append("") # Append the archive comment endrec.append("") # Append the archive comment
endrec.append(filesize - 22) # Append the record start offset endrec.append(filesize - 22) # Append the record start offset
if endrec[-4] == -1 or endrec[-4] == 0xffffffff:
return _EndRecData64(fpin, -22, endrec)
return endrec return endrec
# Search the last END_BLOCK bytes of the file for the record signature. # Search the last END_BLOCK bytes of the file for the record signature.
# The comment is appended to the ZIP file and has a 16 bit length. # The comment is appended to the ZIP file and has a 16 bit length.
@ -106,15 +158,39 @@ def _EndRecData(fpin):
# Append the archive comment and start offset # Append the archive comment and start offset
endrec.append(comment) endrec.append(comment)
endrec.append(filesize - END_BLOCK + start) endrec.append(filesize - END_BLOCK + start)
if endrec[-4] == -1 or endrec[-4] == 0xffffffff:
return _EndRecData64(fpin, - END_BLOCK + start, endrec)
return endrec return endrec
return # Error, return None return # Error, return None
class ZipInfo: class ZipInfo (object):
"""Class with attributes describing each file in the ZIP archive.""" """Class with attributes describing each file in the ZIP archive."""
__slots__ = (
'orig_filename',
'filename',
'date_time',
'compress_type',
'comment',
'extra',
'create_system',
'create_version',
'extract_version',
'reserved',
'flag_bits',
'volume',
'internal_attr',
'external_attr',
'header_offset',
'CRC',
'compress_size',
'file_size',
)
def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)): def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
self.orig_filename = filename # Original file name in archive self.orig_filename = filename # Original file name in archive
# Terminate the file name at the first null byte. Null bytes in file # Terminate the file name at the first null byte. Null bytes in file
# names are used as tricks by viruses in archives. # names are used as tricks by viruses in archives.
null_byte = filename.find(chr(0)) null_byte = filename.find(chr(0))
@ -123,8 +199,9 @@ class ZipInfo:
# This is used to ensure paths in generated ZIP files always use # This is used to ensure paths in generated ZIP files always use
# forward slashes as the directory separator, as required by the # forward slashes as the directory separator, as required by the
# ZIP format specification. # ZIP format specification.
if os.sep != "/": if os.sep != "/" and os.sep in filename:
filename = filename.replace(os.sep, "/") filename = filename.replace(os.sep, "/")
self.filename = filename # Normalized file name self.filename = filename # Normalized file name
self.date_time = date_time # year, month, day, hour, min, sec self.date_time = date_time # year, month, day, hour, min, sec
# Standard values: # Standard values:
@ -145,7 +222,6 @@ class ZipInfo:
self.external_attr = 0 # External file attributes self.external_attr = 0 # External file attributes
# Other attributes are set by class ZipFile: # Other attributes are set by class ZipFile:
# header_offset Byte offset to the file header # header_offset Byte offset to the file header
# file_offset Byte offset to the start of the file data
# CRC CRC-32 of the uncompressed file # CRC CRC-32 of the uncompressed file
# compress_size Size of the compressed file # compress_size Size of the compressed file
# file_size Size of the uncompressed file # file_size Size of the uncompressed file
@ -162,29 +238,85 @@ class ZipInfo:
CRC = self.CRC CRC = self.CRC
compress_size = self.compress_size compress_size = self.compress_size
file_size = self.file_size file_size = self.file_size
extra = self.extra
if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
# File is larger than what fits into a 4 byte integer,
# fall back to the ZIP64 extension
fmt = '<hhqq'
extra = extra + struct.pack(fmt,
1, struct.calcsize(fmt)-4, file_size, compress_size)
file_size = 0xffffffff # -1
compress_size = 0xffffffff # -1
self.extract_version = max(45, self.extract_version)
self.create_version = max(45, self.extract_version)
header = struct.pack(structFileHeader, stringFileHeader, header = struct.pack(structFileHeader, stringFileHeader,
self.extract_version, self.reserved, self.flag_bits, self.extract_version, self.reserved, self.flag_bits,
self.compress_type, dostime, dosdate, CRC, self.compress_type, dostime, dosdate, CRC,
compress_size, file_size, compress_size, file_size,
len(self.filename), len(self.extra)) len(self.filename), len(extra))
return header + self.filename + self.extra return header + self.filename + extra
def _decodeExtra(self):
# Try to decode the extra field.
extra = self.extra
unpack = struct.unpack
while extra:
tp, ln = unpack('<hh', extra[:4])
if tp == 1:
if ln >= 24:
counts = unpack('<qqq', extra[4:28])
elif ln == 16:
counts = unpack('<qq', extra[4:20])
elif ln == 8:
counts = unpack('<q', extra[4:12])
elif ln == 0:
counts = ()
else:
raise RuntimeError, "Corrupt extra field %s"%(ln,)
idx = 0
# ZIP64 extension (large files and/or large archives)
if self.file_size == -1 or self.file_size == 0xFFFFFFFFL:
self.file_size = counts[idx]
idx += 1
if self.compress_size == -1 or self.compress_size == 0xFFFFFFFFL:
self.compress_size = counts[idx]
idx += 1
if self.header_offset == -1 or self.header_offset == 0xffffffffL:
old = self.header_offset
self.header_offset = counts[idx]
idx+=1
extra = extra[ln+4:]
class ZipFile: class ZipFile:
""" Class with methods to open, read, write, close, list zip files. """ Class with methods to open, read, write, close, list zip files.
z = ZipFile(file, mode="r", compression=ZIP_STORED) z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True)
file: Either the path to the file, or a file-like object. file: Either the path to the file, or a file-like object.
If it is a path, the file will be opened and closed by ZipFile. If it is a path, the file will be opened and closed by ZipFile.
mode: The mode can be either read "r", write "w" or append "a". mode: The mode can be either read "r", write "w" or append "a".
compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib). compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
allowZip64: if True ZipFile will create files with ZIP64 extensions when
needed, otherwise it will raise an exception when this would
be necessary.
""" """
fp = None # Set here since __del__ checks it fp = None # Set here since __del__ checks it
def __init__(self, file, mode="r", compression=ZIP_STORED): def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
"""Open the ZIP file with mode read "r", write "w" or append "a".""" """Open the ZIP file with mode read "r", write "w" or append "a"."""
self._allowZip64 = allowZip64
self._didModify = False
if compression == ZIP_STORED: if compression == ZIP_STORED:
pass pass
elif compression == ZIP_DEFLATED: elif compression == ZIP_DEFLATED:
@ -250,6 +382,9 @@ class ZipFile:
offset_cd = endrec[6] # offset of central directory offset_cd = endrec[6] # offset of central directory
self.comment = endrec[8] # archive comment self.comment = endrec[8] # archive comment
# endrec[9] is the offset of the "End of Central Dir" record # endrec[9] is the offset of the "End of Central Dir" record
if endrec[9] > ZIP64_LIMIT:
x = endrec[9] - size_cd - 56 - 20
else:
x = endrec[9] - size_cd x = endrec[9] - size_cd
# "concat" is zero, unless zip was concatenated to another file # "concat" is zero, unless zip was concatenated to another file
concat = x - offset_cd concat = x - offset_cd
@ -258,6 +393,8 @@ class ZipFile:
# self.start_dir: Position of start of central directory # self.start_dir: Position of start of central directory
self.start_dir = offset_cd + concat self.start_dir = offset_cd + concat
fp.seek(self.start_dir, 0) fp.seek(self.start_dir, 0)
data = fp.read(size_cd)
fp = cStringIO.StringIO(data)
total = 0 total = 0
while total < size_cd: while total < size_cd:
centdir = fp.read(46) centdir = fp.read(46)
@ -275,8 +412,7 @@ class ZipFile:
total = (total + centdir[_CD_FILENAME_LENGTH] total = (total + centdir[_CD_FILENAME_LENGTH]
+ centdir[_CD_EXTRA_FIELD_LENGTH] + centdir[_CD_EXTRA_FIELD_LENGTH]
+ centdir[_CD_COMMENT_LENGTH]) + centdir[_CD_COMMENT_LENGTH])
x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] + concat x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
# file_offset must be computed below...
(x.create_version, x.create_system, x.extract_version, x.reserved, (x.create_version, x.create_system, x.extract_version, x.reserved,
x.flag_bits, x.compress_type, t, d, x.flag_bits, x.compress_type, t, d,
x.CRC, x.compress_size, x.file_size) = centdir[1:12] x.CRC, x.compress_size, x.file_size) = centdir[1:12]
@ -284,28 +420,14 @@ class ZipFile:
# Convert date/time code to (year, month, day, hour, min, sec) # Convert date/time code to (year, month, day, hour, min, sec)
x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F, x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
t>>11, (t>>5)&0x3F, (t&0x1F) * 2 ) t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
x._decodeExtra()
x.header_offset = x.header_offset + concat
self.filelist.append(x) self.filelist.append(x)
self.NameToInfo[x.filename] = x self.NameToInfo[x.filename] = x
if self.debug > 2: if self.debug > 2:
print "total", total print "total", total
for data in self.filelist:
fp.seek(data.header_offset, 0)
fheader = fp.read(30)
if fheader[0:4] != stringFileHeader:
raise BadZipfile, "Bad magic number for file header"
fheader = struct.unpack(structFileHeader, fheader)
# file_offset is computed here, since the extra field for
# the central directory and for the local file header
# refer to different fields, and they can have different
# lengths
data.file_offset = (data.header_offset + 30
+ fheader[_FH_FILENAME_LENGTH]
+ fheader[_FH_EXTRA_FIELD_LENGTH])
fname = fp.read(fheader[_FH_FILENAME_LENGTH])
if fname != data.orig_filename:
raise RuntimeError, \
'File name in directory "%s" and header "%s" differ.' % (
data.orig_filename, fname)
def namelist(self): def namelist(self):
"""Return a list of file names in the archive.""" """Return a list of file names in the archive."""
@ -334,6 +456,7 @@ class ZipFile:
except BadZipfile: except BadZipfile:
return zinfo.filename return zinfo.filename
def getinfo(self, name): def getinfo(self, name):
"""Return the instance of ZipInfo given 'name'.""" """Return the instance of ZipInfo given 'name'."""
return self.NameToInfo[name] return self.NameToInfo[name]
@ -347,7 +470,24 @@ class ZipFile:
"Attempt to read ZIP archive that was already closed" "Attempt to read ZIP archive that was already closed"
zinfo = self.getinfo(name) zinfo = self.getinfo(name)
filepos = self.fp.tell() filepos = self.fp.tell()
self.fp.seek(zinfo.file_offset, 0)
self.fp.seek(zinfo.header_offset, 0)
# Skip the file header:
fheader = self.fp.read(30)
if fheader[0:4] != stringFileHeader:
raise BadZipfile, "Bad magic number for file header"
fheader = struct.unpack(structFileHeader, fheader)
fname = self.fp.read(fheader[_FH_FILENAME_LENGTH])
if fheader[_FH_EXTRA_FIELD_LENGTH]:
self.fp.read(fheader[_FH_EXTRA_FIELD_LENGTH])
if fname != zinfo.orig_filename:
raise BadZipfile, \
'File name in directory "%s" and header "%s" differ.' % (
zinfo.orig_filename, fname)
bytes = self.fp.read(zinfo.compress_size) bytes = self.fp.read(zinfo.compress_size)
self.fp.seek(filepos, 0) self.fp.seek(filepos, 0)
if zinfo.compress_type == ZIP_STORED: if zinfo.compress_type == ZIP_STORED:
@ -388,6 +528,12 @@ class ZipFile:
if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED): if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
raise RuntimeError, \ raise RuntimeError, \
"That compression method is not supported" "That compression method is not supported"
if zinfo.file_size > ZIP64_LIMIT:
if not self._allowZip64:
raise LargeZipFile("Filesize would require ZIP64 extensions")
if zinfo.header_offset > ZIP64_LIMIT:
if not self._allowZip64:
raise LargeZipFile("Zipfile size would require ZIP64 extensions")
def write(self, filename, arcname=None, compress_type=None): def write(self, filename, arcname=None, compress_type=None):
"""Put the bytes from filename into the archive under the name """Put the bytes from filename into the archive under the name
@ -407,16 +553,19 @@ class ZipFile:
zinfo.compress_type = self.compression zinfo.compress_type = self.compression
else: else:
zinfo.compress_type = compress_type zinfo.compress_type = compress_type
self._writecheck(zinfo)
fp = open(filename, "rb") zinfo.file_size = st.st_size
zinfo.flag_bits = 0x00 zinfo.flag_bits = 0x00
zinfo.header_offset = self.fp.tell() # Start of header bytes zinfo.header_offset = self.fp.tell() # Start of header bytes
self._writecheck(zinfo)
self._didModify = True
fp = open(filename, "rb")
# Must overwrite CRC and sizes with correct data later # Must overwrite CRC and sizes with correct data later
zinfo.CRC = CRC = 0 zinfo.CRC = CRC = 0
zinfo.compress_size = compress_size = 0 zinfo.compress_size = compress_size = 0
zinfo.file_size = file_size = 0 zinfo.file_size = file_size = 0
self.fp.write(zinfo.FileHeader()) self.fp.write(zinfo.FileHeader())
zinfo.file_offset = self.fp.tell() # Start of file bytes
if zinfo.compress_type == ZIP_DEFLATED: if zinfo.compress_type == ZIP_DEFLATED:
cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
zlib.DEFLATED, -15) zlib.DEFLATED, -15)
@ -461,8 +610,10 @@ class ZipFile:
zinfo.compress_type = self.compression zinfo.compress_type = self.compression
else: else:
zinfo = zinfo_or_arcname zinfo = zinfo_or_arcname
self._writecheck(zinfo)
zinfo.file_size = len(bytes) # Uncompressed size zinfo.file_size = len(bytes) # Uncompressed size
zinfo.header_offset = self.fp.tell() # Start of header bytes
self._writecheck(zinfo)
self._didModify = True
zinfo.CRC = binascii.crc32(bytes) # CRC-32 checksum zinfo.CRC = binascii.crc32(bytes) # CRC-32 checksum
if zinfo.compress_type == ZIP_DEFLATED: if zinfo.compress_type == ZIP_DEFLATED:
co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
@ -473,8 +624,8 @@ class ZipFile:
zinfo.compress_size = zinfo.file_size zinfo.compress_size = zinfo.file_size
zinfo.header_offset = self.fp.tell() # Start of header bytes zinfo.header_offset = self.fp.tell() # Start of header bytes
self.fp.write(zinfo.FileHeader()) self.fp.write(zinfo.FileHeader())
zinfo.file_offset = self.fp.tell() # Start of file bytes
self.fp.write(bytes) self.fp.write(bytes)
self.fp.flush()
if zinfo.flag_bits & 0x08: if zinfo.flag_bits & 0x08:
# Write CRC and file sizes after the file data # Write CRC and file sizes after the file data
self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size, self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
@ -491,7 +642,8 @@ class ZipFile:
records.""" records."""
if self.fp is None: if self.fp is None:
return return
if self.mode in ("w", "a"): # write ending records
if self.mode in ("w", "a") and self._didModify: # write ending records
count = 0 count = 0
pos1 = self.fp.tell() pos1 = self.fp.tell()
for zinfo in self.filelist: # write central directory for zinfo in self.filelist: # write central directory
@ -499,20 +651,69 @@ class ZipFile:
dt = zinfo.date_time dt = zinfo.date_time
dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
extra = []
if zinfo.file_size > ZIP64_LIMIT \
or zinfo.compress_size > ZIP64_LIMIT:
extra.append(zinfo.file_size)
extra.append(zinfo.compress_size)
file_size = 0xffffffff #-1
compress_size = 0xffffffff #-1
else:
file_size = zinfo.file_size
compress_size = zinfo.compress_size
if zinfo.header_offset > ZIP64_LIMIT:
extra.append(zinfo.header_offset)
header_offset = 0xffffffff #-1
else:
header_offset = zinfo.header_offset
extra_data = zinfo.extra
if extra:
# Append a ZIP64 field to the extra's
extra_data = struct.pack(
'<hh' + 'q'*len(extra),
1, 8*len(extra), *extra) + extra_data
extract_version = max(45, zinfo.extract_version)
create_version = max(45, zinfo.create_version)
else:
extract_version = zinfo.extract_version
create_version = zinfo.create_version
centdir = struct.pack(structCentralDir, centdir = struct.pack(structCentralDir,
stringCentralDir, zinfo.create_version, stringCentralDir, create_version,
zinfo.create_system, zinfo.extract_version, zinfo.reserved, zinfo.create_system, extract_version, zinfo.reserved,
zinfo.flag_bits, zinfo.compress_type, dostime, dosdate, zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
zinfo.CRC, zinfo.compress_size, zinfo.file_size, zinfo.CRC, compress_size, file_size,
len(zinfo.filename), len(zinfo.extra), len(zinfo.comment), len(zinfo.filename), len(extra_data), len(zinfo.comment),
0, zinfo.internal_attr, zinfo.external_attr, 0, zinfo.internal_attr, zinfo.external_attr,
zinfo.header_offset) header_offset)
self.fp.write(centdir) self.fp.write(centdir)
self.fp.write(zinfo.filename) self.fp.write(zinfo.filename)
self.fp.write(zinfo.extra) self.fp.write(extra_data)
self.fp.write(zinfo.comment) self.fp.write(zinfo.comment)
pos2 = self.fp.tell() pos2 = self.fp.tell()
# Write end-of-zip-archive record # Write end-of-zip-archive record
if pos1 > ZIP64_LIMIT:
# Need to write the ZIP64 end-of-archive records
zip64endrec = struct.pack(
structEndArchive64, stringEndArchive64,
44, 45, 45, 0, 0, count, count, pos2 - pos1, pos1)
self.fp.write(zip64endrec)
zip64locrec = struct.pack(
structEndArchive64Locator,
stringEndArchive64Locator, 0, pos2, 1)
self.fp.write(zip64locrec)
pos3 = self.fp.tell()
endrec = struct.pack(structEndArchive, stringEndArchive,
0, 0, count, count, pos2 - pos1, 0xffffffff, 0) # -1, 0)
self.fp.write(endrec)
else:
endrec = struct.pack(structEndArchive, stringEndArchive, endrec = struct.pack(structEndArchive, stringEndArchive,
0, 0, count, count, pos2 - pos1, pos1, 0) 0, 0, count, count, pos2 - pos1, pos1, 0)
self.fp.write(endrec) self.fp.write(endrec)
@ -619,3 +820,80 @@ class PyZipFile(ZipFile):
if basename: if basename:
archivename = "%s/%s" % (basename, archivename) archivename = "%s/%s" % (basename, archivename)
return (fname, archivename) return (fname, archivename)
def main(args = None):
import textwrap
USAGE=textwrap.dedent("""\
Usage:
zipfile.py -l zipfile.zip # Show listing of a zipfile
zipfile.py -t zipfile.zip # Test if a zipfile is valid
zipfile.py -e zipfile.zip target # Extract zipfile into target dir
zipfile.py -c zipfile.zip src ... # Create zipfile from sources
""")
if args is None:
args = sys.argv[1:]
if not args or args[0] not in ('-l', '-c', '-e', '-t'):
print USAGE
sys.exit(1)
if args[0] == '-l':
if len(args) != 2:
print USAGE
sys.exit(1)
zf = ZipFile(args[1], 'r')
zf.printdir()
zf.close()
elif args[0] == '-t':
if len(args) != 2:
print USAGE
sys.exit(1)
zf = ZipFile(args[1], 'r')
zf.testzip()
print "Done testing"
elif args[0] == '-e':
if len(args) != 3:
print USAGE
sys.exit(1)
zf = ZipFile(args[1], 'r')
out = args[2]
for path in zf.namelist():
if path.startswith('./'):
tgt = os.path.join(out, path[2:])
else:
tgt = os.path.join(out, path)
tgtdir = os.path.dirname(tgt)
if not os.path.exists(tgtdir):
os.makedirs(tgtdir)
fp = open(tgt, 'wb')
fp.write(zf.read(path))
fp.close()
zf.close()
elif args[0] == '-c':
if len(args) < 3:
print USAGE
sys.exit(1)
def addToZip(zf, path, zippath):
if os.path.isfile(path):
zf.write(path, zippath, ZIP_DEFLATED)
elif os.path.isdir(path):
for nm in os.listdir(path):
addToZip(zf,
os.path.join(path, nm), os.path.join(zippath, nm))
# else: ignore
zf = ZipFile(args[1], 'w', allowZip64=True)
for src in args[2:]:
addToZip(zf, src, os.path.basename(src))
zf.close()
if __name__ == "__main__":
main()

View File

@ -152,6 +152,7 @@ Extension Modules
aborts the db transaction safely when a modifier callback fails. aborts the db transaction safely when a modifier callback fails.
Fixes SF python patch/bug #1408584. Fixes SF python patch/bug #1408584.
- Patch #1446489: add support for the ZIP64 extensions to zipfile.
Library Library
------- -------