diff --git a/Doc/lib/libzipfile.tex b/Doc/lib/libzipfile.tex index 3d81e50a36a..d3fca64df95 100644 --- a/Doc/lib/libzipfile.tex +++ b/Doc/lib/libzipfile.tex @@ -17,8 +17,10 @@ understanding of the format, as defined in {PKZIP Application Note}. This module does not currently handle ZIP files which have appended -comments, or multi-disk ZIP files. It can handle ZIP files that use the -ZIP64 extensions (that is ZIP files that are more than 4 GByte in size). +comments, or multi-disk ZIP files. It can handle ZIP files that use +the ZIP64 extensions (that is ZIP files that are more than 4 GByte in +size). It supports decryption of encrypted files in ZIP archives, but +it cannot currently create an encrypted file. The available attributes of this module are: @@ -138,9 +140,18 @@ cat myzip.zip >> python.exe Print a table of contents for the archive to \code{sys.stdout}. \end{methoddesc} -\begin{methoddesc}{read}{name} +\begin{methoddesc}{setpassword}{pwd} + Set \var{pwd} as default password to extract encrypted files. + \versionadded{2.6} +\end{methoddesc} + +\begin{methoddesc}{read}{name\optional{, pwd}} Return the bytes of the file in the archive. The archive must be - open for read or append. + open for read or append. \var{pwd} is the password used for encrypted + files and, if specified, it will override the default password set with + \method{setpassword()}. + + \versionchanged[\var{pwd} was added]{2.6} \end{methoddesc} \begin{methoddesc}{testzip}{} diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py index 54684f3bbe9..2d206df5c32 100644 --- a/Lib/test/test_zipfile.py +++ b/Lib/test/test_zipfile.py @@ -349,8 +349,49 @@ class OtherTests(unittest.TestCase): # and report that the first file in the archive was corrupt. self.assertRaises(RuntimeError, zipf.testzip) + +class DecryptionTests(unittest.TestCase): + # This test checks that ZIP decryption works. Since the library does not + # support encryption at the moment, we use a pre-generated encrypted + # ZIP file + + data = ( + 'PK\x03\x04\x14\x00\x01\x00\x00\x00n\x92i.#y\xef?&\x00\x00\x00\x1a\x00' + '\x00\x00\x08\x00\x00\x00test.txt\xfa\x10\xa0gly|\xfa-\xc5\xc0=\xf9y' + '\x18\xe0\xa8r\xb3Z}Lg\xbc\xae\xf9|\x9b\x19\xe4\x8b\xba\xbb)\x8c\xb0\xdbl' + 'PK\x01\x02\x14\x00\x14\x00\x01\x00\x00\x00n\x92i.#y\xef?&\x00\x00\x00' + '\x1a\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x01\x00 \x00\xb6\x81' + '\x00\x00\x00\x00test.txtPK\x05\x06\x00\x00\x00\x00\x01\x00\x01\x006\x00' + '\x00\x00L\x00\x00\x00\x00\x00' ) + + plain = 'zipfile.py encryption test' + + def setUp(self): + fp = open(TESTFN, "wb") + fp.write(self.data) + fp.close() + self.zip = zipfile.ZipFile(TESTFN, "r") + + def tearDown(self): + self.zip.close() + os.unlink(TESTFN) + + def testNoPassword(self): + # Reading the encrypted file without password + # must generate a RunTime exception + self.assertRaises(RuntimeError, self.zip.read, "test.txt") + + def testBadPassword(self): + self.zip.setpassword("perl") + self.assertRaises(RuntimeError, self.zip.read, "test.txt") + + def testGoodPassword(self): + self.zip.setpassword("python") + self.assertEquals(self.zip.read("test.txt"), self.plain) + def test_main(): - run_unittest(TestsWithSourceFile, TestZip64InSmallFiles, OtherTests, PyZipFileTests) + run_unittest(TestsWithSourceFile, TestZip64InSmallFiles, OtherTests, + PyZipFileTests, DecryptionTests) #run_unittest(TestZip64InSmallFiles) if __name__ == "__main__": diff --git a/Lib/zipfile.py b/Lib/zipfile.py index 5c3fff3e74e..93a0b75edb5 100644 --- a/Lib/zipfile.py +++ b/Lib/zipfile.py @@ -296,6 +296,65 @@ class ZipInfo (object): extra = extra[ln+4:] +class _ZipDecrypter: + """Class to handle decryption of files stored within a ZIP archive. + + ZIP supports a password-based form of encryption. Even though known + plaintext attacks have been found against it, it is still useful + for low-level securicy. + + Usage: + zd = _ZipDecrypter(mypwd) + plain_char = zd(cypher_char) + plain_text = map(zd, cypher_text) + """ + + def _GenerateCRCTable(): + """Generate a CRC-32 table. + + ZIP encryption uses the CRC32 one-byte primitive for scrambling some + internal keys. We noticed that a direct implementation is faster than + relying on binascii.crc32(). + """ + poly = 0xedb88320 + table = [0] * 256 + for i in range(256): + crc = i + for j in range(8): + if crc & 1: + crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly + else: + crc = ((crc >> 1) & 0x7FFFFFFF) + table[i] = crc + return table + crctable = _GenerateCRCTable() + + def _crc32(self, ch, crc): + """Compute the CRC32 primitive on one byte.""" + return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff] + + def __init__(self, pwd): + self.key0 = 305419896 + self.key1 = 591751049 + self.key2 = 878082192 + for p in pwd: + self._UpdateKeys(p) + + def _UpdateKeys(self, c): + self.key0 = self._crc32(c, self.key0) + self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295 + self.key1 = (self.key1 * 134775813 + 1) & 4294967295 + self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2) + + def __call__(self, c): + """Decrypt a single character.""" + c = ord(c) + k = self.key2 | 2 + c = c ^ (((k * (k^1)) >> 8) & 255) + c = chr(c) + self._UpdateKeys(c) + return c + class ZipFile: """ Class with methods to open, read, write, close, list zip files. @@ -330,6 +389,7 @@ class ZipFile: self.filelist = [] # List of ZipInfo instances for archive self.compression = compression # Method of compression self.mode = key = mode.replace('b', '')[0] + self.pwd = None # Check if we were passed a file-like object if isinstance(file, basestring): @@ -461,7 +521,11 @@ class ZipFile: """Return the instance of ZipInfo given 'name'.""" return self.NameToInfo[name] - def read(self, name): + def setpassword(self, pwd): + """Set default password for encrypted files.""" + self.pwd = pwd + + def read(self, name, pwd=None): """Return file bytes (as a string) for name.""" if self.mode not in ("r", "a"): raise RuntimeError, 'read() requires mode "r" or "a"' @@ -469,6 +533,13 @@ class ZipFile: raise RuntimeError, \ "Attempt to read ZIP archive that was already closed" zinfo = self.getinfo(name) + is_encrypted = zinfo.flag_bits & 0x1 + if is_encrypted: + if not pwd: + pwd = self.pwd + if not pwd: + raise RuntimeError, "File %s is encrypted, " \ + "password required for extraction" % name filepos = self.fp.tell() self.fp.seek(zinfo.header_offset, 0) @@ -489,6 +560,18 @@ class ZipFile: zinfo.orig_filename, fname) bytes = self.fp.read(zinfo.compress_size) + # Go with decryption + if is_encrypted: + zd = _ZipDecrypter(pwd) + # The first 12 bytes in the cypher stream is an encryption header + # used to strengthen the algorithm. The first 11 bytes are + # completely random, while the 12th contains the MSB of the CRC, + # and is used to check the correctness of the password. + h = map(zd, bytes[0:12]) + if ord(h[11]) != ((zinfo.CRC>>24)&255): + raise RuntimeError, "Bad password for file %s" % name + bytes = "".join(map(zd, bytes[12:])) + # Go with decompression self.fp.seek(filepos, 0) if zinfo.compress_type == ZIP_STORED: pass diff --git a/Misc/NEWS b/Misc/NEWS index 7b300443456..06ffc165d79 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -128,6 +128,8 @@ Core and builtins Library ------- +- Patch #698833: Support file decryption in zipfile. + - Patch #685268: Consider a package's __path__ in imputil. - Patch 1463026: Support default namespace in XMLGenerator.