Patch #1446489 (zipfile: support for ZIP64)
This commit is contained in:
parent
0eac11826a
commit
143cefb846
|
@ -17,7 +17,8 @@ understanding of the format, as defined in
|
|||
Note}.
|
||||
|
||||
This module does not currently handle ZIP files which have appended
|
||||
comments, or multi-disk ZIP files.
|
||||
comments, or multi-disk ZIP files. It can handle ZIP files that use the
|
||||
ZIP64 extensions (that is ZIP files that are more than 4 GByte in size).
|
||||
|
||||
The available attributes of this module are:
|
||||
|
||||
|
@ -25,6 +26,11 @@ The available attributes of this module are:
|
|||
The error raised for bad ZIP files.
|
||||
\end{excdesc}
|
||||
|
||||
\begin{excdesc}{LargeZipFile}
|
||||
The error raised when a ZIP file would require ZIP64 functionality but that
|
||||
has not been enabled.
|
||||
\end{excdesc}
|
||||
|
||||
\begin{classdesc*}{ZipFile}
|
||||
The class for reading and writing ZIP files. See
|
||||
``\citetitle{ZipFile Objects}'' (section \ref{zipfile-objects}) for
|
||||
|
@ -77,7 +83,7 @@ The available attributes of this module are:
|
|||
|
||||
\subsection{ZipFile Objects \label{zipfile-objects}}
|
||||
|
||||
\begin{classdesc}{ZipFile}{file\optional{, mode\optional{, compression}}}
|
||||
\begin{classdesc}{ZipFile}{file\optional{, mode\optional{, compression\optional{, allowZip64}}}}
|
||||
Open a ZIP file, where \var{file} can be either a path to a file
|
||||
(a string) or a file-like object. The \var{mode} parameter
|
||||
should be \code{'r'} to read an existing file, \code{'w'} to
|
||||
|
@ -100,6 +106,12 @@ cat myzip.zip >> python.exe
|
|||
is specified but the \refmodule{zlib} module is not available,
|
||||
\exception{RuntimeError} is also raised. The default is
|
||||
\constant{ZIP_STORED}.
|
||||
If \var{allowZip64} is \code{True} zipfile will create zipfiles that use
|
||||
the ZIP64 extensions when the zipfile is larger than 2GBytes. If it is
|
||||
false (the default) zipfile will raise an exception when the zipfile would
|
||||
require ZIP64 extensions. ZIP64 extensions are disabled by default because
|
||||
the default zip and unzip commands on Unix (the InfoZIP utilities) don't
|
||||
support these extensions.
|
||||
\end{classdesc}
|
||||
|
||||
\begin{methoddesc}{close}{}
|
||||
|
@ -132,8 +144,8 @@ cat myzip.zip >> python.exe
|
|||
\end{methoddesc}
|
||||
|
||||
\begin{methoddesc}{testzip}{}
|
||||
Read all the files in the archive and check their CRC's. Return the
|
||||
name of the first bad file, or else return \code{None}.
|
||||
Read all the files in the archive and check their CRC's and file
|
||||
headers. Return the name of the first bad file, or else return \code{None}.
|
||||
\end{methoddesc}
|
||||
|
||||
\begin{methoddesc}{write}{filename\optional{, arcname\optional{,
|
||||
|
@ -284,10 +296,6 @@ Instances have the following attributes:
|
|||
Byte offset to the file header.
|
||||
\end{memberdesc}
|
||||
|
||||
\begin{memberdesc}[ZipInfo]{file_offset}
|
||||
Byte offset to the start of the file data.
|
||||
\end{memberdesc}
|
||||
|
||||
\begin{memberdesc}[ZipInfo]{CRC}
|
||||
CRC-32 of the uncompressed file.
|
||||
\end{memberdesc}
|
||||
|
|
|
@ -4,7 +4,7 @@ try:
|
|||
except ImportError:
|
||||
zlib = None
|
||||
|
||||
import zipfile, os, unittest
|
||||
import zipfile, os, unittest, sys, shutil
|
||||
|
||||
from StringIO import StringIO
|
||||
from tempfile import TemporaryFile
|
||||
|
@ -28,14 +28,70 @@ class TestsWithSourceFile(unittest.TestCase):
|
|||
zipfp = zipfile.ZipFile(f, "w", compression)
|
||||
zipfp.write(TESTFN, "another"+os.extsep+"name")
|
||||
zipfp.write(TESTFN, TESTFN)
|
||||
zipfp.writestr("strfile", self.data)
|
||||
zipfp.close()
|
||||
|
||||
# Read the ZIP archive
|
||||
zipfp = zipfile.ZipFile(f, "r", compression)
|
||||
self.assertEqual(zipfp.read(TESTFN), self.data)
|
||||
self.assertEqual(zipfp.read("another"+os.extsep+"name"), self.data)
|
||||
self.assertEqual(zipfp.read("strfile"), self.data)
|
||||
|
||||
# Print the ZIP directory
|
||||
fp = StringIO()
|
||||
stdout = sys.stdout
|
||||
try:
|
||||
sys.stdout = fp
|
||||
|
||||
zipfp.printdir()
|
||||
finally:
|
||||
sys.stdout = stdout
|
||||
|
||||
directory = fp.getvalue()
|
||||
lines = directory.splitlines()
|
||||
self.assertEquals(len(lines), 4) # Number of files + header
|
||||
|
||||
self.assert_('File Name' in lines[0])
|
||||
self.assert_('Modified' in lines[0])
|
||||
self.assert_('Size' in lines[0])
|
||||
|
||||
fn, date, time, size = lines[1].split()
|
||||
self.assertEquals(fn, 'another.name')
|
||||
# XXX: timestamp is not tested
|
||||
self.assertEquals(size, str(len(self.data)))
|
||||
|
||||
# Check the namelist
|
||||
names = zipfp.namelist()
|
||||
self.assertEquals(len(names), 3)
|
||||
self.assert_(TESTFN in names)
|
||||
self.assert_("another"+os.extsep+"name" in names)
|
||||
self.assert_("strfile" in names)
|
||||
|
||||
# Check infolist
|
||||
infos = zipfp.infolist()
|
||||
names = [ i.filename for i in infos ]
|
||||
self.assertEquals(len(names), 3)
|
||||
self.assert_(TESTFN in names)
|
||||
self.assert_("another"+os.extsep+"name" in names)
|
||||
self.assert_("strfile" in names)
|
||||
for i in infos:
|
||||
self.assertEquals(i.file_size, len(self.data))
|
||||
|
||||
# check getinfo
|
||||
for nm in (TESTFN, "another"+os.extsep+"name", "strfile"):
|
||||
info = zipfp.getinfo(nm)
|
||||
self.assertEquals(info.filename, nm)
|
||||
self.assertEquals(info.file_size, len(self.data))
|
||||
|
||||
# Check that testzip doesn't raise an exception
|
||||
zipfp.testzip()
|
||||
|
||||
|
||||
zipfp.close()
|
||||
|
||||
|
||||
|
||||
|
||||
def testStored(self):
|
||||
for f in (TESTFN2, TemporaryFile(), StringIO()):
|
||||
self.zipTest(f, zipfile.ZIP_STORED)
|
||||
|
@ -59,6 +115,197 @@ class TestsWithSourceFile(unittest.TestCase):
|
|||
os.remove(TESTFN)
|
||||
os.remove(TESTFN2)
|
||||
|
||||
class TestZip64InSmallFiles(unittest.TestCase):
|
||||
# These tests test the ZIP64 functionality without using large files,
|
||||
# see test_zipfile64 for proper tests.
|
||||
|
||||
def setUp(self):
|
||||
self._limit = zipfile.ZIP64_LIMIT
|
||||
zipfile.ZIP64_LIMIT = 5
|
||||
|
||||
line_gen = ("Test of zipfile line %d." % i for i in range(0, 1000))
|
||||
self.data = '\n'.join(line_gen)
|
||||
|
||||
# Make a source file with some lines
|
||||
fp = open(TESTFN, "wb")
|
||||
fp.write(self.data)
|
||||
fp.close()
|
||||
|
||||
def largeFileExceptionTest(self, f, compression):
|
||||
zipfp = zipfile.ZipFile(f, "w", compression)
|
||||
self.assertRaises(zipfile.LargeZipFile,
|
||||
zipfp.write, TESTFN, "another"+os.extsep+"name")
|
||||
zipfp.close()
|
||||
|
||||
def largeFileExceptionTest2(self, f, compression):
|
||||
zipfp = zipfile.ZipFile(f, "w", compression)
|
||||
self.assertRaises(zipfile.LargeZipFile,
|
||||
zipfp.writestr, "another"+os.extsep+"name", self.data)
|
||||
zipfp.close()
|
||||
|
||||
def testLargeFileException(self):
|
||||
for f in (TESTFN2, TemporaryFile(), StringIO()):
|
||||
self.largeFileExceptionTest(f, zipfile.ZIP_STORED)
|
||||
self.largeFileExceptionTest2(f, zipfile.ZIP_STORED)
|
||||
|
||||
def zipTest(self, f, compression):
|
||||
# Create the ZIP archive
|
||||
zipfp = zipfile.ZipFile(f, "w", compression, allowZip64=True)
|
||||
zipfp.write(TESTFN, "another"+os.extsep+"name")
|
||||
zipfp.write(TESTFN, TESTFN)
|
||||
zipfp.writestr("strfile", self.data)
|
||||
zipfp.close()
|
||||
|
||||
# Read the ZIP archive
|
||||
zipfp = zipfile.ZipFile(f, "r", compression)
|
||||
self.assertEqual(zipfp.read(TESTFN), self.data)
|
||||
self.assertEqual(zipfp.read("another"+os.extsep+"name"), self.data)
|
||||
self.assertEqual(zipfp.read("strfile"), self.data)
|
||||
|
||||
# Print the ZIP directory
|
||||
fp = StringIO()
|
||||
stdout = sys.stdout
|
||||
try:
|
||||
sys.stdout = fp
|
||||
|
||||
zipfp.printdir()
|
||||
finally:
|
||||
sys.stdout = stdout
|
||||
|
||||
directory = fp.getvalue()
|
||||
lines = directory.splitlines()
|
||||
self.assertEquals(len(lines), 4) # Number of files + header
|
||||
|
||||
self.assert_('File Name' in lines[0])
|
||||
self.assert_('Modified' in lines[0])
|
||||
self.assert_('Size' in lines[0])
|
||||
|
||||
fn, date, time, size = lines[1].split()
|
||||
self.assertEquals(fn, 'another.name')
|
||||
# XXX: timestamp is not tested
|
||||
self.assertEquals(size, str(len(self.data)))
|
||||
|
||||
# Check the namelist
|
||||
names = zipfp.namelist()
|
||||
self.assertEquals(len(names), 3)
|
||||
self.assert_(TESTFN in names)
|
||||
self.assert_("another"+os.extsep+"name" in names)
|
||||
self.assert_("strfile" in names)
|
||||
|
||||
# Check infolist
|
||||
infos = zipfp.infolist()
|
||||
names = [ i.filename for i in infos ]
|
||||
self.assertEquals(len(names), 3)
|
||||
self.assert_(TESTFN in names)
|
||||
self.assert_("another"+os.extsep+"name" in names)
|
||||
self.assert_("strfile" in names)
|
||||
for i in infos:
|
||||
self.assertEquals(i.file_size, len(self.data))
|
||||
|
||||
# check getinfo
|
||||
for nm in (TESTFN, "another"+os.extsep+"name", "strfile"):
|
||||
info = zipfp.getinfo(nm)
|
||||
self.assertEquals(info.filename, nm)
|
||||
self.assertEquals(info.file_size, len(self.data))
|
||||
|
||||
# Check that testzip doesn't raise an exception
|
||||
zipfp.testzip()
|
||||
|
||||
|
||||
zipfp.close()
|
||||
|
||||
def testStored(self):
|
||||
for f in (TESTFN2, TemporaryFile(), StringIO()):
|
||||
self.zipTest(f, zipfile.ZIP_STORED)
|
||||
|
||||
|
||||
if zlib:
|
||||
def testDeflated(self):
|
||||
for f in (TESTFN2, TemporaryFile(), StringIO()):
|
||||
self.zipTest(f, zipfile.ZIP_DEFLATED)
|
||||
|
||||
def testAbsoluteArcnames(self):
|
||||
zipfp = zipfile.ZipFile(TESTFN2, "w", zipfile.ZIP_STORED, allowZip64=True)
|
||||
zipfp.write(TESTFN, "/absolute")
|
||||
zipfp.close()
|
||||
|
||||
zipfp = zipfile.ZipFile(TESTFN2, "r", zipfile.ZIP_STORED)
|
||||
self.assertEqual(zipfp.namelist(), ["absolute"])
|
||||
zipfp.close()
|
||||
|
||||
|
||||
def tearDown(self):
|
||||
zipfile.ZIP64_LIMIT = self._limit
|
||||
os.remove(TESTFN)
|
||||
os.remove(TESTFN2)
|
||||
|
||||
class PyZipFileTests(unittest.TestCase):
|
||||
def testWritePyfile(self):
|
||||
zipfp = zipfile.PyZipFile(TemporaryFile(), "w")
|
||||
fn = __file__
|
||||
if fn.endswith('.pyc') or fn.endswith('.pyo'):
|
||||
fn = fn[:-1]
|
||||
|
||||
zipfp.writepy(fn)
|
||||
|
||||
bn = os.path.basename(fn)
|
||||
self.assert_(bn not in zipfp.namelist())
|
||||
self.assert_(bn + 'o' in zipfp.namelist() or bn + 'c' in zipfp.namelist())
|
||||
zipfp.close()
|
||||
|
||||
|
||||
zipfp = zipfile.PyZipFile(TemporaryFile(), "w")
|
||||
fn = __file__
|
||||
if fn.endswith('.pyc') or fn.endswith('.pyo'):
|
||||
fn = fn[:-1]
|
||||
|
||||
zipfp.writepy(fn, "testpackage")
|
||||
|
||||
bn = "%s/%s"%("testpackage", os.path.basename(fn))
|
||||
self.assert_(bn not in zipfp.namelist())
|
||||
self.assert_(bn + 'o' in zipfp.namelist() or bn + 'c' in zipfp.namelist())
|
||||
zipfp.close()
|
||||
|
||||
def testWritePythonPackage(self):
|
||||
import email
|
||||
packagedir = os.path.dirname(email.__file__)
|
||||
|
||||
zipfp = zipfile.PyZipFile(TemporaryFile(), "w")
|
||||
zipfp.writepy(packagedir)
|
||||
|
||||
# Check for a couple of modules at different levels of the hieararchy
|
||||
names = zipfp.namelist()
|
||||
self.assert_('email/__init__.pyo' in names or 'email/__init__.pyc' in names)
|
||||
self.assert_('email/mime/text.pyo' in names or 'email/mime/text.pyc' in names)
|
||||
|
||||
def testWritePythonDirectory(self):
|
||||
os.mkdir(TESTFN2)
|
||||
try:
|
||||
fp = open(os.path.join(TESTFN2, "mod1.py"), "w")
|
||||
fp.write("print 42\n")
|
||||
fp.close()
|
||||
|
||||
fp = open(os.path.join(TESTFN2, "mod2.py"), "w")
|
||||
fp.write("print 42 * 42\n")
|
||||
fp.close()
|
||||
|
||||
fp = open(os.path.join(TESTFN2, "mod2.txt"), "w")
|
||||
fp.write("bla bla bla\n")
|
||||
fp.close()
|
||||
|
||||
zipfp = zipfile.PyZipFile(TemporaryFile(), "w")
|
||||
zipfp.writepy(TESTFN2)
|
||||
|
||||
names = zipfp.namelist()
|
||||
self.assert_('mod1.pyc' in names or 'mod1.pyo' in names)
|
||||
self.assert_('mod2.pyc' in names or 'mod2.pyo' in names)
|
||||
self.assert_('mod2.txt' not in names)
|
||||
|
||||
finally:
|
||||
shutil.rmtree(TESTFN2)
|
||||
|
||||
|
||||
|
||||
class OtherTests(unittest.TestCase):
|
||||
def testCloseErroneousFile(self):
|
||||
# This test checks that the ZipFile constructor closes the file object
|
||||
|
@ -103,7 +350,8 @@ class OtherTests(unittest.TestCase):
|
|||
self.assertRaises(RuntimeError, zipf.testzip)
|
||||
|
||||
def test_main():
|
||||
run_unittest(TestsWithSourceFile, OtherTests)
|
||||
run_unittest(TestsWithSourceFile, TestZip64InSmallFiles, OtherTests, PyZipFileTests)
|
||||
#run_unittest(TestZip64InSmallFiles)
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_main()
|
||||
|
|
|
@ -0,0 +1,67 @@
|
|||
# Tests of the full ZIP64 functionality of zipfile
|
||||
# The test_support.requires call is the only reason for keeping this separate
|
||||
# from test_zipfile
|
||||
from test import test_support
|
||||
test_support.requires(
|
||||
'largefile',
|
||||
'test requires loads of disk-space bytes and a long time to run'
|
||||
)
|
||||
|
||||
# We can test part of the module without zlib.
|
||||
try:
|
||||
import zlib
|
||||
except ImportError:
|
||||
zlib = None
|
||||
|
||||
import zipfile, os, unittest
|
||||
|
||||
from StringIO import StringIO
|
||||
from tempfile import TemporaryFile
|
||||
|
||||
from test.test_support import TESTFN, run_unittest
|
||||
|
||||
TESTFN2 = TESTFN + "2"
|
||||
|
||||
class TestsWithSourceFile(unittest.TestCase):
|
||||
def setUp(self):
|
||||
line_gen = ("Test of zipfile line %d." % i for i in range(0, 1000000))
|
||||
self.data = '\n'.join(line_gen)
|
||||
|
||||
# Make a source file with some lines
|
||||
fp = open(TESTFN, "wb")
|
||||
fp.write(self.data)
|
||||
fp.close()
|
||||
|
||||
def zipTest(self, f, compression):
|
||||
# Create the ZIP archive
|
||||
filecount = int(((1 << 32) / len(self.data)) * 1.5)
|
||||
zipfp = zipfile.ZipFile(f, "w", compression, allowZip64=True)
|
||||
|
||||
for num in range(filecount):
|
||||
zipfp.writestr("testfn%d"%(num,), self.data)
|
||||
zipfp.close()
|
||||
|
||||
# Read the ZIP archive
|
||||
zipfp = zipfile.ZipFile(f, "r", compression)
|
||||
for num in range(filecount):
|
||||
self.assertEqual(zipfp.read("testfn%d"%(num,)), self.data)
|
||||
zipfp.close()
|
||||
|
||||
def testStored(self):
|
||||
for f in (TESTFN2, TemporaryFile()):
|
||||
self.zipTest(f, zipfile.ZIP_STORED)
|
||||
|
||||
if zlib:
|
||||
def testDeflated(self):
|
||||
for f in (TESTFN2, TemporaryFile()):
|
||||
self.zipTest(f, zipfile.ZIP_DEFLATED)
|
||||
|
||||
def tearDown(self):
|
||||
os.remove(TESTFN)
|
||||
os.remove(TESTFN2)
|
||||
|
||||
def test_main():
|
||||
run_unittest(TestsWithSourceFile)
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_main()
|
384
Lib/zipfile.py
384
Lib/zipfile.py
|
@ -1,7 +1,8 @@
|
|||
"Read and write ZIP files."
|
||||
|
||||
"""
|
||||
Read and write ZIP files.
|
||||
"""
|
||||
import struct, os, time, sys
|
||||
import binascii
|
||||
import binascii, cStringIO
|
||||
|
||||
try:
|
||||
import zlib # We may need its compression method
|
||||
|
@ -9,12 +10,22 @@ except ImportError:
|
|||
zlib = None
|
||||
|
||||
__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
|
||||
"ZipInfo", "ZipFile", "PyZipFile"]
|
||||
"ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
|
||||
|
||||
class BadZipfile(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class LargeZipFile(Exception):
|
||||
"""
|
||||
Raised when writing a zipfile, the zipfile requires ZIP64 extensions
|
||||
and those extensions are disabled.
|
||||
"""
|
||||
|
||||
error = BadZipfile # The exception raised by this module
|
||||
|
||||
ZIP64_LIMIT= (1 << 31) - 1
|
||||
|
||||
# constants for Zip file compression methods
|
||||
ZIP_STORED = 0
|
||||
ZIP_DEFLATED = 8
|
||||
|
@ -27,6 +38,11 @@ structCentralDir = "<4s4B4HlLL5HLl"# 19 items, central directory, 46 bytes
|
|||
stringCentralDir = "PK\001\002" # magic number for central directory
|
||||
structFileHeader = "<4s2B4HlLL2H" # 12 items, file header record, 30 bytes
|
||||
stringFileHeader = "PK\003\004" # magic number for file header
|
||||
structEndArchive64Locator = "<4slql" # 4 items, locate Zip64 header, 20 bytes
|
||||
stringEndArchive64Locator = "PK\x06\x07" # magic token for locator header
|
||||
structEndArchive64 = "<4sqhhllqqqq" # 10 items, end of archive (Zip64), 56 bytes
|
||||
stringEndArchive64 = "PK\x06\x06" # magic token for Zip64 header
|
||||
|
||||
|
||||
# indexes of entries in the central directory structure
|
||||
_CD_SIGNATURE = 0
|
||||
|
@ -75,6 +91,40 @@ def is_zipfile(filename):
|
|||
pass
|
||||
return False
|
||||
|
||||
def _EndRecData64(fpin, offset, endrec):
|
||||
"""
|
||||
Read the ZIP64 end-of-archive records and use that to update endrec
|
||||
"""
|
||||
locatorSize = struct.calcsize(structEndArchive64Locator)
|
||||
fpin.seek(offset - locatorSize, 2)
|
||||
data = fpin.read(locatorSize)
|
||||
sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
|
||||
if sig != stringEndArchive64Locator:
|
||||
return endrec
|
||||
|
||||
if diskno != 0 or disks != 1:
|
||||
raise BadZipfile("zipfiles that span multiple disks are not supported")
|
||||
|
||||
# Assume no 'zip64 extensible data'
|
||||
endArchiveSize = struct.calcsize(structEndArchive64)
|
||||
fpin.seek(offset - locatorSize - endArchiveSize, 2)
|
||||
data = fpin.read(endArchiveSize)
|
||||
sig, sz, create_version, read_version, disk_num, disk_dir, \
|
||||
dircount, dircount2, dirsize, diroffset = \
|
||||
struct.unpack(structEndArchive64, data)
|
||||
if sig != stringEndArchive64:
|
||||
return endrec
|
||||
|
||||
# Update the original endrec using data from the ZIP64 record
|
||||
endrec[1] = disk_num
|
||||
endrec[2] = disk_dir
|
||||
endrec[3] = dircount
|
||||
endrec[4] = dircount2
|
||||
endrec[5] = dirsize
|
||||
endrec[6] = diroffset
|
||||
return endrec
|
||||
|
||||
|
||||
def _EndRecData(fpin):
|
||||
"""Return data from the "End of Central Directory" record, or None.
|
||||
|
||||
|
@ -88,6 +138,8 @@ def _EndRecData(fpin):
|
|||
endrec = list(endrec)
|
||||
endrec.append("") # Append the archive comment
|
||||
endrec.append(filesize - 22) # Append the record start offset
|
||||
if endrec[-4] == -1 or endrec[-4] == 0xffffffff:
|
||||
return _EndRecData64(fpin, -22, endrec)
|
||||
return endrec
|
||||
# Search the last END_BLOCK bytes of the file for the record signature.
|
||||
# The comment is appended to the ZIP file and has a 16 bit length.
|
||||
|
@ -106,25 +158,50 @@ def _EndRecData(fpin):
|
|||
# Append the archive comment and start offset
|
||||
endrec.append(comment)
|
||||
endrec.append(filesize - END_BLOCK + start)
|
||||
if endrec[-4] == -1 or endrec[-4] == 0xffffffff:
|
||||
return _EndRecData64(fpin, - END_BLOCK + start, endrec)
|
||||
return endrec
|
||||
return # Error, return None
|
||||
|
||||
|
||||
class ZipInfo:
|
||||
class ZipInfo (object):
|
||||
"""Class with attributes describing each file in the ZIP archive."""
|
||||
|
||||
__slots__ = (
|
||||
'orig_filename',
|
||||
'filename',
|
||||
'date_time',
|
||||
'compress_type',
|
||||
'comment',
|
||||
'extra',
|
||||
'create_system',
|
||||
'create_version',
|
||||
'extract_version',
|
||||
'reserved',
|
||||
'flag_bits',
|
||||
'volume',
|
||||
'internal_attr',
|
||||
'external_attr',
|
||||
'header_offset',
|
||||
'CRC',
|
||||
'compress_size',
|
||||
'file_size',
|
||||
)
|
||||
|
||||
def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
|
||||
self.orig_filename = filename # Original file name in archive
|
||||
# Terminate the file name at the first null byte. Null bytes in file
|
||||
# names are used as tricks by viruses in archives.
|
||||
|
||||
# Terminate the file name at the first null byte. Null bytes in file
|
||||
# names are used as tricks by viruses in archives.
|
||||
null_byte = filename.find(chr(0))
|
||||
if null_byte >= 0:
|
||||
filename = filename[0:null_byte]
|
||||
# This is used to ensure paths in generated ZIP files always use
|
||||
# forward slashes as the directory separator, as required by the
|
||||
# ZIP format specification.
|
||||
if os.sep != "/":
|
||||
# This is used to ensure paths in generated ZIP files always use
|
||||
# forward slashes as the directory separator, as required by the
|
||||
# ZIP format specification.
|
||||
if os.sep != "/" and os.sep in filename:
|
||||
filename = filename.replace(os.sep, "/")
|
||||
|
||||
self.filename = filename # Normalized file name
|
||||
self.date_time = date_time # year, month, day, hour, min, sec
|
||||
# Standard values:
|
||||
|
@ -145,7 +222,6 @@ class ZipInfo:
|
|||
self.external_attr = 0 # External file attributes
|
||||
# Other attributes are set by class ZipFile:
|
||||
# header_offset Byte offset to the file header
|
||||
# file_offset Byte offset to the start of the file data
|
||||
# CRC CRC-32 of the uncompressed file
|
||||
# compress_size Size of the compressed file
|
||||
# file_size Size of the uncompressed file
|
||||
|
@ -162,29 +238,85 @@ class ZipInfo:
|
|||
CRC = self.CRC
|
||||
compress_size = self.compress_size
|
||||
file_size = self.file_size
|
||||
|
||||
extra = self.extra
|
||||
|
||||
if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
|
||||
# File is larger than what fits into a 4 byte integer,
|
||||
# fall back to the ZIP64 extension
|
||||
fmt = '<hhqq'
|
||||
extra = extra + struct.pack(fmt,
|
||||
1, struct.calcsize(fmt)-4, file_size, compress_size)
|
||||
file_size = 0xffffffff # -1
|
||||
compress_size = 0xffffffff # -1
|
||||
self.extract_version = max(45, self.extract_version)
|
||||
self.create_version = max(45, self.extract_version)
|
||||
|
||||
header = struct.pack(structFileHeader, stringFileHeader,
|
||||
self.extract_version, self.reserved, self.flag_bits,
|
||||
self.compress_type, dostime, dosdate, CRC,
|
||||
compress_size, file_size,
|
||||
len(self.filename), len(self.extra))
|
||||
return header + self.filename + self.extra
|
||||
len(self.filename), len(extra))
|
||||
return header + self.filename + extra
|
||||
|
||||
def _decodeExtra(self):
|
||||
# Try to decode the extra field.
|
||||
extra = self.extra
|
||||
unpack = struct.unpack
|
||||
while extra:
|
||||
tp, ln = unpack('<hh', extra[:4])
|
||||
if tp == 1:
|
||||
if ln >= 24:
|
||||
counts = unpack('<qqq', extra[4:28])
|
||||
elif ln == 16:
|
||||
counts = unpack('<qq', extra[4:20])
|
||||
elif ln == 8:
|
||||
counts = unpack('<q', extra[4:12])
|
||||
elif ln == 0:
|
||||
counts = ()
|
||||
else:
|
||||
raise RuntimeError, "Corrupt extra field %s"%(ln,)
|
||||
|
||||
idx = 0
|
||||
|
||||
# ZIP64 extension (large files and/or large archives)
|
||||
if self.file_size == -1 or self.file_size == 0xFFFFFFFFL:
|
||||
self.file_size = counts[idx]
|
||||
idx += 1
|
||||
|
||||
if self.compress_size == -1 or self.compress_size == 0xFFFFFFFFL:
|
||||
self.compress_size = counts[idx]
|
||||
idx += 1
|
||||
|
||||
if self.header_offset == -1 or self.header_offset == 0xffffffffL:
|
||||
old = self.header_offset
|
||||
self.header_offset = counts[idx]
|
||||
idx+=1
|
||||
|
||||
extra = extra[ln+4:]
|
||||
|
||||
|
||||
class ZipFile:
|
||||
""" Class with methods to open, read, write, close, list zip files.
|
||||
|
||||
z = ZipFile(file, mode="r", compression=ZIP_STORED)
|
||||
z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True)
|
||||
|
||||
file: Either the path to the file, or a file-like object.
|
||||
If it is a path, the file will be opened and closed by ZipFile.
|
||||
mode: The mode can be either read "r", write "w" or append "a".
|
||||
compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
|
||||
allowZip64: if True ZipFile will create files with ZIP64 extensions when
|
||||
needed, otherwise it will raise an exception when this would
|
||||
be necessary.
|
||||
|
||||
"""
|
||||
|
||||
fp = None # Set here since __del__ checks it
|
||||
|
||||
def __init__(self, file, mode="r", compression=ZIP_STORED):
|
||||
def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
|
||||
"""Open the ZIP file with mode read "r", write "w" or append "a"."""
|
||||
self._allowZip64 = allowZip64
|
||||
self._didModify = False
|
||||
if compression == ZIP_STORED:
|
||||
pass
|
||||
elif compression == ZIP_DEFLATED:
|
||||
|
@ -250,7 +382,10 @@ class ZipFile:
|
|||
offset_cd = endrec[6] # offset of central directory
|
||||
self.comment = endrec[8] # archive comment
|
||||
# endrec[9] is the offset of the "End of Central Dir" record
|
||||
x = endrec[9] - size_cd
|
||||
if endrec[9] > ZIP64_LIMIT:
|
||||
x = endrec[9] - size_cd - 56 - 20
|
||||
else:
|
||||
x = endrec[9] - size_cd
|
||||
# "concat" is zero, unless zip was concatenated to another file
|
||||
concat = x - offset_cd
|
||||
if self.debug > 2:
|
||||
|
@ -258,6 +393,8 @@ class ZipFile:
|
|||
# self.start_dir: Position of start of central directory
|
||||
self.start_dir = offset_cd + concat
|
||||
fp.seek(self.start_dir, 0)
|
||||
data = fp.read(size_cd)
|
||||
fp = cStringIO.StringIO(data)
|
||||
total = 0
|
||||
while total < size_cd:
|
||||
centdir = fp.read(46)
|
||||
|
@ -275,8 +412,7 @@ class ZipFile:
|
|||
total = (total + centdir[_CD_FILENAME_LENGTH]
|
||||
+ centdir[_CD_EXTRA_FIELD_LENGTH]
|
||||
+ centdir[_CD_COMMENT_LENGTH])
|
||||
x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] + concat
|
||||
# file_offset must be computed below...
|
||||
x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
|
||||
(x.create_version, x.create_system, x.extract_version, x.reserved,
|
||||
x.flag_bits, x.compress_type, t, d,
|
||||
x.CRC, x.compress_size, x.file_size) = centdir[1:12]
|
||||
|
@ -284,28 +420,14 @@ class ZipFile:
|
|||
# Convert date/time code to (year, month, day, hour, min, sec)
|
||||
x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
|
||||
t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
|
||||
|
||||
x._decodeExtra()
|
||||
x.header_offset = x.header_offset + concat
|
||||
self.filelist.append(x)
|
||||
self.NameToInfo[x.filename] = x
|
||||
if self.debug > 2:
|
||||
print "total", total
|
||||
for data in self.filelist:
|
||||
fp.seek(data.header_offset, 0)
|
||||
fheader = fp.read(30)
|
||||
if fheader[0:4] != stringFileHeader:
|
||||
raise BadZipfile, "Bad magic number for file header"
|
||||
fheader = struct.unpack(structFileHeader, fheader)
|
||||
# file_offset is computed here, since the extra field for
|
||||
# the central directory and for the local file header
|
||||
# refer to different fields, and they can have different
|
||||
# lengths
|
||||
data.file_offset = (data.header_offset + 30
|
||||
+ fheader[_FH_FILENAME_LENGTH]
|
||||
+ fheader[_FH_EXTRA_FIELD_LENGTH])
|
||||
fname = fp.read(fheader[_FH_FILENAME_LENGTH])
|
||||
if fname != data.orig_filename:
|
||||
raise RuntimeError, \
|
||||
'File name in directory "%s" and header "%s" differ.' % (
|
||||
data.orig_filename, fname)
|
||||
|
||||
|
||||
def namelist(self):
|
||||
"""Return a list of file names in the archive."""
|
||||
|
@ -334,6 +456,7 @@ class ZipFile:
|
|||
except BadZipfile:
|
||||
return zinfo.filename
|
||||
|
||||
|
||||
def getinfo(self, name):
|
||||
"""Return the instance of ZipInfo given 'name'."""
|
||||
return self.NameToInfo[name]
|
||||
|
@ -347,7 +470,24 @@ class ZipFile:
|
|||
"Attempt to read ZIP archive that was already closed"
|
||||
zinfo = self.getinfo(name)
|
||||
filepos = self.fp.tell()
|
||||
self.fp.seek(zinfo.file_offset, 0)
|
||||
|
||||
self.fp.seek(zinfo.header_offset, 0)
|
||||
|
||||
# Skip the file header:
|
||||
fheader = self.fp.read(30)
|
||||
if fheader[0:4] != stringFileHeader:
|
||||
raise BadZipfile, "Bad magic number for file header"
|
||||
|
||||
fheader = struct.unpack(structFileHeader, fheader)
|
||||
fname = self.fp.read(fheader[_FH_FILENAME_LENGTH])
|
||||
if fheader[_FH_EXTRA_FIELD_LENGTH]:
|
||||
self.fp.read(fheader[_FH_EXTRA_FIELD_LENGTH])
|
||||
|
||||
if fname != zinfo.orig_filename:
|
||||
raise BadZipfile, \
|
||||
'File name in directory "%s" and header "%s" differ.' % (
|
||||
zinfo.orig_filename, fname)
|
||||
|
||||
bytes = self.fp.read(zinfo.compress_size)
|
||||
self.fp.seek(filepos, 0)
|
||||
if zinfo.compress_type == ZIP_STORED:
|
||||
|
@ -388,6 +528,12 @@ class ZipFile:
|
|||
if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
|
||||
raise RuntimeError, \
|
||||
"That compression method is not supported"
|
||||
if zinfo.file_size > ZIP64_LIMIT:
|
||||
if not self._allowZip64:
|
||||
raise LargeZipFile("Filesize would require ZIP64 extensions")
|
||||
if zinfo.header_offset > ZIP64_LIMIT:
|
||||
if not self._allowZip64:
|
||||
raise LargeZipFile("Zipfile size would require ZIP64 extensions")
|
||||
|
||||
def write(self, filename, arcname=None, compress_type=None):
|
||||
"""Put the bytes from filename into the archive under the name
|
||||
|
@ -407,16 +553,19 @@ class ZipFile:
|
|||
zinfo.compress_type = self.compression
|
||||
else:
|
||||
zinfo.compress_type = compress_type
|
||||
self._writecheck(zinfo)
|
||||
fp = open(filename, "rb")
|
||||
|
||||
zinfo.file_size = st.st_size
|
||||
zinfo.flag_bits = 0x00
|
||||
zinfo.header_offset = self.fp.tell() # Start of header bytes
|
||||
|
||||
self._writecheck(zinfo)
|
||||
self._didModify = True
|
||||
fp = open(filename, "rb")
|
||||
# Must overwrite CRC and sizes with correct data later
|
||||
zinfo.CRC = CRC = 0
|
||||
zinfo.compress_size = compress_size = 0
|
||||
zinfo.file_size = file_size = 0
|
||||
self.fp.write(zinfo.FileHeader())
|
||||
zinfo.file_offset = self.fp.tell() # Start of file bytes
|
||||
if zinfo.compress_type == ZIP_DEFLATED:
|
||||
cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
|
||||
zlib.DEFLATED, -15)
|
||||
|
@ -461,8 +610,10 @@ class ZipFile:
|
|||
zinfo.compress_type = self.compression
|
||||
else:
|
||||
zinfo = zinfo_or_arcname
|
||||
self._writecheck(zinfo)
|
||||
zinfo.file_size = len(bytes) # Uncompressed size
|
||||
zinfo.header_offset = self.fp.tell() # Start of header bytes
|
||||
self._writecheck(zinfo)
|
||||
self._didModify = True
|
||||
zinfo.CRC = binascii.crc32(bytes) # CRC-32 checksum
|
||||
if zinfo.compress_type == ZIP_DEFLATED:
|
||||
co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
|
||||
|
@ -473,8 +624,8 @@ class ZipFile:
|
|||
zinfo.compress_size = zinfo.file_size
|
||||
zinfo.header_offset = self.fp.tell() # Start of header bytes
|
||||
self.fp.write(zinfo.FileHeader())
|
||||
zinfo.file_offset = self.fp.tell() # Start of file bytes
|
||||
self.fp.write(bytes)
|
||||
self.fp.flush()
|
||||
if zinfo.flag_bits & 0x08:
|
||||
# Write CRC and file sizes after the file data
|
||||
self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
|
||||
|
@ -491,7 +642,8 @@ class ZipFile:
|
|||
records."""
|
||||
if self.fp is None:
|
||||
return
|
||||
if self.mode in ("w", "a"): # write ending records
|
||||
|
||||
if self.mode in ("w", "a") and self._didModify: # write ending records
|
||||
count = 0
|
||||
pos1 = self.fp.tell()
|
||||
for zinfo in self.filelist: # write central directory
|
||||
|
@ -499,23 +651,72 @@ class ZipFile:
|
|||
dt = zinfo.date_time
|
||||
dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
|
||||
dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
|
||||
extra = []
|
||||
if zinfo.file_size > ZIP64_LIMIT \
|
||||
or zinfo.compress_size > ZIP64_LIMIT:
|
||||
extra.append(zinfo.file_size)
|
||||
extra.append(zinfo.compress_size)
|
||||
file_size = 0xffffffff #-1
|
||||
compress_size = 0xffffffff #-1
|
||||
else:
|
||||
file_size = zinfo.file_size
|
||||
compress_size = zinfo.compress_size
|
||||
|
||||
if zinfo.header_offset > ZIP64_LIMIT:
|
||||
extra.append(zinfo.header_offset)
|
||||
header_offset = 0xffffffff #-1
|
||||
else:
|
||||
header_offset = zinfo.header_offset
|
||||
|
||||
extra_data = zinfo.extra
|
||||
if extra:
|
||||
# Append a ZIP64 field to the extra's
|
||||
extra_data = struct.pack(
|
||||
'<hh' + 'q'*len(extra),
|
||||
1, 8*len(extra), *extra) + extra_data
|
||||
|
||||
extract_version = max(45, zinfo.extract_version)
|
||||
create_version = max(45, zinfo.create_version)
|
||||
else:
|
||||
extract_version = zinfo.extract_version
|
||||
create_version = zinfo.create_version
|
||||
|
||||
centdir = struct.pack(structCentralDir,
|
||||
stringCentralDir, zinfo.create_version,
|
||||
zinfo.create_system, zinfo.extract_version, zinfo.reserved,
|
||||
stringCentralDir, create_version,
|
||||
zinfo.create_system, extract_version, zinfo.reserved,
|
||||
zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
|
||||
zinfo.CRC, zinfo.compress_size, zinfo.file_size,
|
||||
len(zinfo.filename), len(zinfo.extra), len(zinfo.comment),
|
||||
zinfo.CRC, compress_size, file_size,
|
||||
len(zinfo.filename), len(extra_data), len(zinfo.comment),
|
||||
0, zinfo.internal_attr, zinfo.external_attr,
|
||||
zinfo.header_offset)
|
||||
header_offset)
|
||||
self.fp.write(centdir)
|
||||
self.fp.write(zinfo.filename)
|
||||
self.fp.write(zinfo.extra)
|
||||
self.fp.write(extra_data)
|
||||
self.fp.write(zinfo.comment)
|
||||
|
||||
pos2 = self.fp.tell()
|
||||
# Write end-of-zip-archive record
|
||||
endrec = struct.pack(structEndArchive, stringEndArchive,
|
||||
0, 0, count, count, pos2 - pos1, pos1, 0)
|
||||
self.fp.write(endrec)
|
||||
if pos1 > ZIP64_LIMIT:
|
||||
# Need to write the ZIP64 end-of-archive records
|
||||
zip64endrec = struct.pack(
|
||||
structEndArchive64, stringEndArchive64,
|
||||
44, 45, 45, 0, 0, count, count, pos2 - pos1, pos1)
|
||||
self.fp.write(zip64endrec)
|
||||
|
||||
zip64locrec = struct.pack(
|
||||
structEndArchive64Locator,
|
||||
stringEndArchive64Locator, 0, pos2, 1)
|
||||
self.fp.write(zip64locrec)
|
||||
|
||||
pos3 = self.fp.tell()
|
||||
endrec = struct.pack(structEndArchive, stringEndArchive,
|
||||
0, 0, count, count, pos2 - pos1, 0xffffffff, 0) # -1, 0)
|
||||
self.fp.write(endrec)
|
||||
|
||||
else:
|
||||
endrec = struct.pack(structEndArchive, stringEndArchive,
|
||||
0, 0, count, count, pos2 - pos1, pos1, 0)
|
||||
self.fp.write(endrec)
|
||||
self.fp.flush()
|
||||
if not self._filePassed:
|
||||
self.fp.close()
|
||||
|
@ -619,3 +820,80 @@ class PyZipFile(ZipFile):
|
|||
if basename:
|
||||
archivename = "%s/%s" % (basename, archivename)
|
||||
return (fname, archivename)
|
||||
|
||||
|
||||
def main(args = None):
|
||||
import textwrap
|
||||
USAGE=textwrap.dedent("""\
|
||||
Usage:
|
||||
zipfile.py -l zipfile.zip # Show listing of a zipfile
|
||||
zipfile.py -t zipfile.zip # Test if a zipfile is valid
|
||||
zipfile.py -e zipfile.zip target # Extract zipfile into target dir
|
||||
zipfile.py -c zipfile.zip src ... # Create zipfile from sources
|
||||
""")
|
||||
if args is None:
|
||||
args = sys.argv[1:]
|
||||
|
||||
if not args or args[0] not in ('-l', '-c', '-e', '-t'):
|
||||
print USAGE
|
||||
sys.exit(1)
|
||||
|
||||
if args[0] == '-l':
|
||||
if len(args) != 2:
|
||||
print USAGE
|
||||
sys.exit(1)
|
||||
zf = ZipFile(args[1], 'r')
|
||||
zf.printdir()
|
||||
zf.close()
|
||||
|
||||
elif args[0] == '-t':
|
||||
if len(args) != 2:
|
||||
print USAGE
|
||||
sys.exit(1)
|
||||
zf = ZipFile(args[1], 'r')
|
||||
zf.testzip()
|
||||
print "Done testing"
|
||||
|
||||
elif args[0] == '-e':
|
||||
if len(args) != 3:
|
||||
print USAGE
|
||||
sys.exit(1)
|
||||
|
||||
zf = ZipFile(args[1], 'r')
|
||||
out = args[2]
|
||||
for path in zf.namelist():
|
||||
if path.startswith('./'):
|
||||
tgt = os.path.join(out, path[2:])
|
||||
else:
|
||||
tgt = os.path.join(out, path)
|
||||
|
||||
tgtdir = os.path.dirname(tgt)
|
||||
if not os.path.exists(tgtdir):
|
||||
os.makedirs(tgtdir)
|
||||
fp = open(tgt, 'wb')
|
||||
fp.write(zf.read(path))
|
||||
fp.close()
|
||||
zf.close()
|
||||
|
||||
elif args[0] == '-c':
|
||||
if len(args) < 3:
|
||||
print USAGE
|
||||
sys.exit(1)
|
||||
|
||||
def addToZip(zf, path, zippath):
|
||||
if os.path.isfile(path):
|
||||
zf.write(path, zippath, ZIP_DEFLATED)
|
||||
elif os.path.isdir(path):
|
||||
for nm in os.listdir(path):
|
||||
addToZip(zf,
|
||||
os.path.join(path, nm), os.path.join(zippath, nm))
|
||||
# else: ignore
|
||||
|
||||
zf = ZipFile(args[1], 'w', allowZip64=True)
|
||||
for src in args[2:]:
|
||||
addToZip(zf, src, os.path.basename(src))
|
||||
|
||||
zf.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
Loading…
Reference in New Issue