From 32abe6f7d0a82358efd0494992f3c388d7b24036 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 31 Mar 2000 17:30:02 +0000 Subject: [PATCH] Zip file handling module, by Jim Ahlstrom. --- Lib/zipfile.py | 470 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 470 insertions(+) create mode 100644 Lib/zipfile.py diff --git a/Lib/zipfile.py b/Lib/zipfile.py new file mode 100644 index 00000000000..7659ce43331 --- /dev/null +++ b/Lib/zipfile.py @@ -0,0 +1,470 @@ +"Read and write ZIP files" +# Written by James C. Ahlstrom jim@interet.com +# All rights transferred to CNRI pursuant to the Python contribution agreement + +import struct, os, time +import binascii, py_compile + +try: + import zlib # We may need its compression method +except: + zlib = None + +class _BadZipfile(Exception): + pass +error = _BadZipfile # The exception raised by this module + +# constants for Zip file compression methods +ZIP_STORED = 0 +ZIP_DEFLATED = 8 +# Other ZIP compression methods not supported + +# Here are some struct module formats for reading headers +structEndArchive = "<4s4H2lH" # 9 items, end of archive, 22 bytes +stringEndArchive = "PK\005\006" # magic number for end of archive record +structCentralDir = "<4s4B4H3l5H2l"# 19 items, central directory, 46 bytes +stringCentralDir = "PK\001\002" # magic number for central directory +structFileHeader = "<4s2B4H3l2H" # 12 items, file header record, 30 bytes +stringFileHeader = "PK\003\004" # magic number for file header + +def is_zipfile(filename): + """Quickly see if file is a ZIP file by checking the magic number. + +Will not accept a ZIP archive with an ending comment.""" + try: + fpin = open(filename, "rb") + fpin.seek(-22, 2) # Seek to end-of-file record + endrec = fpin.read() + fpin.close() + if endrec[0:4] == "PK\005\006" and endrec[-2:] == "\000\000": + return 1 # file has correct magic number + except: + pass + +class ZipInfo: + "Class with attributes describing each file in the ZIP archive" + def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)): + self.filename = filename # Name of the file in the archive + self.date_time = date_time # year, month, day, hour, min, sec + # Standard values: + self.compress_type = ZIP_STORED # Type of compression for the file + self.comment = "" # Comment for each file + self.extra = "" # ZIP extra data + self.create_system = 0 # System which created ZIP archive + self.create_version = 20 # Version which created ZIP archive + self.extract_version = 20 # Version needed to extract archive + self.reserved = 0 # Must be zero + self.flag_bits = 0 # ZIP flag bits + self.volume = 0 # Volume number of file header + self.internal_attr = 0 # Internal attributes + self.external_attr = 0 # External file attributes + # Other attributes are set by class ZipFile: + # header_offset Byte offset to the file header + # file_offset Byte offset to the start of the file data + # CRC CRC-32 of the uncompressed file + # compress_size Size of the compressed file + # file_size Size of the uncompressed file + + def FileHeader(self): + 'Return the per-file header as a string' + dt = self.date_time + dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] + dostime = dt[3] << 11 | dt[4] << 5 | dt[5] / 2 + if self.flag_bits & 0x08: + # Set these to zero because we write them after the file data + CRC = compress_size = file_size = 0 + else: + CRC = self.CRC + compress_size = self.compress_size + file_size = self.file_size + header = struct.pack(structFileHeader, stringFileHeader, + self.extract_version, self.reserved, self.flag_bits, + self.compress_type, dostime, dosdate, CRC, + compress_size, file_size, + len(self.filename), len(self.extra)) + return header + self.filename + self.extra + + +class ZipFile: + "Class with methods to open, read, write, close, list zip files" + def __init__(self, filename, mode="r", compression=ZIP_STORED): + 'Open the ZIP file with mode read "r", write "w" or append "a".' + if compression == ZIP_STORED: + pass + elif compression == ZIP_DEFLATED: + if not zlib: + raise RuntimeError,\ + "Compression requires the (missing) zlib module" + else: + raise RuntimeError, "That compression method is not supported" + self.debug = 0 # Level of printing: 0 through 3 + self.NameToInfo = {} # Find file info given name + self.filelist = [] # List of ZipInfo instances for archive + self.compression = compression # Method of compression + self.filename = filename + self.mode = key = mode[0] + if key == 'r': + self.fp = open(filename, "rb") + self._GetContents() + elif key == 'w': + self.fp = open(filename, "wb") + elif key == 'a': + fp = self.fp = open(filename, "r+b") + fp.seek(-22, 2) # Seek to end-of-file record + endrec = fp.read() + if endrec[0:4] == stringEndArchive and \ + endrec[-2:] == "\000\000": + self._GetContents() # file is a zip file + # seek to start of directory and overwrite + fp.seek(self.start_dir, 0) + else: # file is not a zip file, just append + fp.seek(0, 2) + else: + raise RuntimeError, 'Mode must be "r", "w" or "a"' + + def _GetContents(self): + "Read in the table of contents for the zip file" + fp = self.fp + fp.seek(-22, 2) # Start of end-of-archive record + filesize = fp.tell() + 22 # Get file size + endrec = fp.read(22) # Archive must not end with a comment! + if endrec[0:4] != stringEndArchive or endrec[-2:] != "\000\000": + raise BadZipfile, "File is not a zip file, or ends with a comment" + endrec = struct.unpack(structEndArchive, endrec) + if self.debug > 1: + print endrec + size_cd = endrec[5] # bytes in central directory + offset_cd = endrec[6] # offset of central directory + x = filesize - 22 - size_cd + # "concat" is zero, unless zip was concatenated to another file + concat = x - offset_cd + if self.debug > 2: + print "given, inferred, offset", offset_cd, x, concat + # self.start_dir: Position of start of central directory + self.start_dir = offset_cd + concat + fp.seek(self.start_dir, 0) + total = 0 + while total < size_cd: + centdir = fp.read(46) + total = total + 46 + if centdir[0:4] != stringCentralDir: + raise BadZipfile, "Bad magic number for central directory" + centdir = struct.unpack(structCentralDir, centdir) + if self.debug > 2: + print centdir + filename = fp.read(centdir[12]) + # Create ZipInfo instance to store file information + x = ZipInfo(filename) + x.extra = fp.read(centdir[13]) + x.comment = fp.read(centdir[14]) + total = total + centdir[12] + centdir[13] + centdir[14] + x.header_offset = centdir[18] + concat + x.file_offset = x.header_offset + 30 + centdir[12] + centdir[13] + (x.create_version, x.create_system, x.extract_version, x.reserved, + x.flag_bits, x.compress_type, t, d, + x.CRC, x.compress_size, x.file_size) = centdir[1:12] + x.volume, x.internal_attr, x.external_attr = centdir[15:18] + # Convert date/time code to (year, month, day, hour, min, sec) + x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F, + t>>11, (t>>5)&0x3F, t&0x1F * 2 ) + self.filelist.append(x) + self.NameToInfo[x.filename] = x + if self.debug > 2: + print "total", total + for data in self.filelist: + fp.seek(data.header_offset, 0) + fheader = fp.read(30) + if fheader[0:4] != stringFileHeader: + raise BadZipfile, "Bad magic number for file header" + fheader = struct.unpack(structFileHeader, fheader) + fname = fp.read(fheader[10]) + if fname != data.filename: + raise RuntimeError, \ + 'File name in Central Directory "%s" and File Header "%s" differ.' % ( + data.filename, fname) + + def namelist(self): + "Return a list of file names in the archive" + l = [] + for data in self.filelist: + l.append(data.filename) + return l + + def infolist(self): + "Return a list of class ZipInfo instances for files in the archive" + return self.filelist + + def printdir(self): + "Print a table of contents for the zip file" + print "%-46s %19s %12s" % ("File Name", "Modified ", "Size") + for zinfo in self.filelist: + date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time + print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size) + + def testzip(self): + "Read all the files and check the CRC" + for zinfo in self.filelist: + try: + self.read(zinfo.filename) # Check CRC-32 + except: + return zinfo.filename + + def getinfo(self, name): + 'Return the instance of ZipInfo given "name"' + return self.NameToInfo[name] + + def read(self, name): + "Return file bytes (as a string) for name" + if self.mode not in ("r", "a"): + raise RuntimeError, 'read() requires mode "r" or "a"' + if not self.fp: + raise RuntimeError, \ + "Attempt to read ZIP archive that was already closed" + zinfo = self.getinfo(name) + filepos = self.fp.tell() + self.fp.seek(zinfo.file_offset, 0) + bytes = self.fp.read(zinfo.compress_size) + self.fp.seek(filepos, 0) + if zinfo.compress_type == ZIP_STORED: + pass + elif zinfo.compress_type == ZIP_DEFLATED: + if not zlib: + raise RuntimeError, \ + "De-compression requires the (missing) zlib module" + # zlib compress/decompress code by Jeremy Hylton of CNRI + dc = zlib.decompressobj(-15) + bytes = dc.decompress(bytes) + # need to feed in unused pad byte so that zlib won't choke + ex = dc.decompress('Z') + dc.flush() + if ex: + bytes = bytes + ex + else: + raise BadZipfile, \ + "Unsupported compression method %d for file %s" % \ + (zinfo.compress_type, name) + crc = binascii.crc32(bytes) + if crc != zinfo.CRC: + raise BadZipfile, "Bad CRC-32 for file %s" % name + return bytes + + def _writecheck(self, zinfo): + 'Check for errors before writing a file to the archive' + if self.NameToInfo.has_key(zinfo.filename): + if self.debug: # Warning for duplicate names + print "Duplicate name:", zinfo.filename + if self.mode not in ("w", "a"): + raise RuntimeError, 'write() requires mode "w" or "a"' + if not self.fp: + raise RuntimeError, \ + "Attempt to write ZIP archive that was already closed" + if zinfo.compress_type == ZIP_DEFLATED and not zlib: + raise RuntimeError, \ + "Compression requires the (missing) zlib module" + if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED): + raise RuntimeError, \ + "That compression method is not supported" + + def write(self, filename, arcname=None, compress_type=None): + 'Put the bytes from filename into the archive under the name arcname.' + st = os.stat(filename) + mtime = time.localtime(st[8]) + date_time = mtime[0:6] + # Create ZipInfo instance to store file information + if arcname is None: + zinfo = ZipInfo(filename, date_time) + else: + zinfo = ZipInfo(arcname, date_time) + zinfo.external_attr = st[0] << 16 # Unix attributes + if compress_type is None: + zinfo.compress_type = self.compression + else: + zinfo.compress_type = compress_type + self._writecheck(zinfo) + fp = open(filename, "rb") + zinfo.flag_bits = 0x08 + zinfo.header_offset = self.fp.tell() # Start of header bytes + self.fp.write(zinfo.FileHeader()) + zinfo.file_offset = self.fp.tell() # Start of file bytes + CRC = 0 + compress_size = 0 + file_size = 0 + if zinfo.compress_type == ZIP_DEFLATED: + cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, + zlib.DEFLATED, -15) + else: + cmpr = None + while 1: + buf = fp.read(1024 * 8) + if not buf: + break + file_size = file_size + len(buf) + CRC = binascii.crc32(buf, CRC) + if cmpr: + buf = cmpr.compress(buf) + compress_size = compress_size + len(buf) + self.fp.write(buf) + fp.close() + if cmpr: + buf = cmpr.flush() + compress_size = compress_size + len(buf) + self.fp.write(buf) + zinfo.compress_size = compress_size + else: + zinfo.compress_size = file_size + zinfo.CRC = CRC + zinfo.file_size = file_size + # Write CRC and file sizes after the file data + self.fp.write(struct.pack("= os.stat(file_py)[8]: + fname = file_pyo # Use .pyo file + elif not os.path.isfile(file_pyc) or \ + os.stat(file_pyc)[8] < os.stat(file_py)[8]: + if self.debug: + print "Compiling", file_py + py_compile.compile(file_py, file_pyc) + fname = file_pyc + else: + fname = file_pyc + archivename = os.path.split(fname)[1] + if basename: + archivename = "%s/%s" % (basename, archivename) + return (fname, archivename) +