diff --git a/Doc/library/zipfile.rst b/Doc/library/zipfile.rst index 71cfe977f3d..261747aa06b 100644 --- a/Doc/library/zipfile.rst +++ b/Doc/library/zipfile.rst @@ -186,8 +186,14 @@ ZipFile Objects .. note:: - Objects returned by :meth:`.open` can operate independently of the - ZipFile. + If the ZipFile was created by passing in a file-like object as the first + argument to the constructor, then the object returned by :meth:`.open` shares the + ZipFile's file pointer. Under these circumstances, the object returned by + :meth:`.open` should not be used after any additional operations are performed + on the ZipFile object. If the ZipFile was created by passing in a string (the + filename) as the first argument to the constructor, then :meth:`.open` will + create a new file object that will be held by the ZipExtFile, allowing it to + operate independently of the ZipFile. .. note:: diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py index 95d3536fa05..f567c78ad8e 100644 --- a/Lib/test/test_zipfile.py +++ b/Lib/test/test_zipfile.py @@ -35,21 +35,9 @@ SMALL_TEST_DATA = [('_ziptest1', '1q2w3e4r5t'), ('ziptest2dir/ziptest3dir/_ziptest3', 'azsxdcfvgb'), ('ziptest2dir/ziptest3dir/ziptest4dir/_ziptest3', '6y7u8i9o0p')] -def getrandbytes(size): - return getrandbits(8 * size).to_bytes(size, 'little') - def getrandbytes(size): return bytes(bytearray.fromhex('%0*x' % (2 * size, getrandbits(8 * size)))) -def get_files(test): - yield TESTFN2 - with TemporaryFile() as f: - yield f - test.assertFalse(f.closed) - with io.BytesIO() as f: - yield f - test.assertFalse(f.closed) - class TestsWithSourceFile(unittest.TestCase): def setUp(self): self.line_gen = ["Zipfile test line %d. random float: %f" % (i, random()) @@ -1427,91 +1415,85 @@ class TestsWithMultipleOpens(unittest.TestCase): def test_same_file(self): # Verify that (when the ZipFile is in control of creating file objects) # multiple open() calls can be made without interfering with each other. - for f in get_files(self): - self.make_test_archive(f) - with zipfile.ZipFile(f, mode="r") as zipf: - with zipf.open('ones') as zopen1, zipf.open('ones') as zopen2: - data1 = zopen1.read(500) - data2 = zopen2.read(500) - data1 += zopen1.read() - data2 += zopen2.read() - self.assertEqual(data1, data2) - self.assertEqual(data1, self.data1) - - def test_different_file(self): - # Verify that (when the ZipFile is in control of creating file objects) - # multiple open() calls can be made without interfering with each other. - for f in get_files(self): - self.make_test_archive(f) - with zipfile.ZipFile(f, mode="r") as zipf: - with zipf.open('ones') as zopen1, zipf.open('twos') as zopen2: - data1 = zopen1.read(500) - data2 = zopen2.read(500) - data1 += zopen1.read() - data2 += zopen2.read() - self.assertEqual(data1, self.data1) - self.assertEqual(data2, self.data2) - - def test_interleaved(self): - # Verify that (when the ZipFile is in control of creating file objects) - # multiple open() calls can be made without interfering with each other. - for f in get_files(self): - self.make_test_archive(f) - with zipfile.ZipFile(f, mode="r") as zipf: - with zipf.open('ones') as zopen1, zipf.open('twos') as zopen2: - data1 = zopen1.read(500) - data2 = zopen2.read(500) - data1 += zopen1.read() - data2 += zopen2.read() - self.assertEqual(data1, self.data1) - self.assertEqual(data2, self.data2) - - def test_read_after_close(self): - for f in get_files(self): - self.make_test_archive(f) - zopen1 = zopen2 = None - try: - with zipfile.ZipFile(f, 'r') as zipf: - zopen1 = zipf.open('ones') - zopen2 = zipf.open('twos') + self.make_test_archive(TESTFN2) + with zipfile.ZipFile(TESTFN2, mode="r") as zipf: + with zipf.open('ones') as zopen1, zipf.open('ones') as zopen2: + data1 = zopen1.read(500) + data2 = zopen2.read(500) + data1 += zopen1.read() + data2 += zopen2.read() + self.assertEqual(data1, data2) + self.assertEqual(data1, self.data1) + + def test_different_file(self): + # Verify that (when the ZipFile is in control of creating file objects) + # multiple open() calls can be made without interfering with each other. + self.make_test_archive(TESTFN2) + with zipfile.ZipFile(TESTFN2, mode="r") as zipf: + with zipf.open('ones') as zopen1, zipf.open('twos') as zopen2: data1 = zopen1.read(500) data2 = zopen2.read(500) data1 += zopen1.read() data2 += zopen2.read() - finally: - if zopen1: - zopen1.close() - if zopen2: - zopen2.close() self.assertEqual(data1, self.data1) self.assertEqual(data2, self.data2) + def test_interleaved(self): + # Verify that (when the ZipFile is in control of creating file objects) + # multiple open() calls can be made without interfering with each other. + self.make_test_archive(TESTFN2) + with zipfile.ZipFile(TESTFN2, mode="r") as zipf: + with zipf.open('ones') as zopen1, zipf.open('twos') as zopen2: + data1 = zopen1.read(500) + data2 = zopen2.read(500) + data1 += zopen1.read() + data2 += zopen2.read() + self.assertEqual(data1, self.data1) + self.assertEqual(data2, self.data2) + + def test_read_after_close(self): + self.make_test_archive(TESTFN2) + zopen1 = zopen2 = None + try: + with zipfile.ZipFile(TESTFN2, 'r') as zipf: + zopen1 = zipf.open('ones') + zopen2 = zipf.open('twos') + data1 = zopen1.read(500) + data2 = zopen2.read(500) + data1 += zopen1.read() + data2 += zopen2.read() + finally: + if zopen1: + zopen1.close() + if zopen2: + zopen2.close() + self.assertEqual(data1, self.data1) + self.assertEqual(data2, self.data2) + def test_read_after_write(self): - for f in get_files(self): - with zipfile.ZipFile(f, 'w', zipfile.ZIP_DEFLATED) as zipf: - zipf.writestr('ones', self.data1) - zipf.writestr('twos', self.data2) - with zipf.open('ones') as zopen1: - data1 = zopen1.read(500) - self.assertEqual(data1, self.data1[:500]) - with zipfile.ZipFile(f, 'r') as zipf: - data1 = zipf.read('ones') - data2 = zipf.read('twos') - self.assertEqual(data1, self.data1) - self.assertEqual(data2, self.data2) + with zipfile.ZipFile(TESTFN2, 'w', zipfile.ZIP_DEFLATED) as zipf: + zipf.writestr('ones', self.data1) + zipf.writestr('twos', self.data2) + with zipf.open('ones') as zopen1: + data1 = zopen1.read(500) + self.assertEqual(data1, self.data1[:500]) + with zipfile.ZipFile(TESTFN2, 'r') as zipf: + data1 = zipf.read('ones') + data2 = zipf.read('twos') + self.assertEqual(data1, self.data1) + self.assertEqual(data2, self.data2) def test_write_after_read(self): - for f in get_files(self): - with zipfile.ZipFile(f, "w", zipfile.ZIP_DEFLATED) as zipf: - zipf.writestr('ones', self.data1) - with zipf.open('ones') as zopen1: - zopen1.read(500) - zipf.writestr('twos', self.data2) - with zipfile.ZipFile(f, 'r') as zipf: - data1 = zipf.read('ones') - data2 = zipf.read('twos') - self.assertEqual(data1, self.data1) - self.assertEqual(data2, self.data2) + with zipfile.ZipFile(TESTFN2, "w", zipfile.ZIP_DEFLATED) as zipf: + zipf.writestr('ones', self.data1) + with zipf.open('ones') as zopen1: + zopen1.read(500) + zipf.writestr('twos', self.data2) + with zipfile.ZipFile(TESTFN2, 'r') as zipf: + data1 = zipf.read('ones') + data2 = zipf.read('twos') + self.assertEqual(data1, self.data1) + self.assertEqual(data2, self.data2) def test_many_opens(self): # Verify that read() and open() promptly close the file descriptor, diff --git a/Lib/zipfile.py b/Lib/zipfile.py index 7619cfee78d..b77e6c88e46 100644 --- a/Lib/zipfile.py +++ b/Lib/zipfile.py @@ -498,25 +498,6 @@ compressor_names = { } -class _SharedFile: - def __init__(self, file, pos, close): - self._file = file - self._pos = pos - self._close = close - - def read(self, n=-1): - self._file.seek(self._pos) - data = self._file.read(n) - self._pos = self._file.tell() - return data - - def close(self): - if self._file is not None: - fileobj = self._file - self._file = None - self._close(fileobj) - - class ZipExtFile(io.BufferedIOBase): """File-like object for reading an archive member. Is returned by ZipFile.open(). @@ -762,7 +743,7 @@ class ZipFile(object): self.NameToInfo = {} # Find file info given name self.filelist = [] # List of ZipInfo instances for archive self.compression = compression # Method of compression - self.mode = mode + self.mode = key = mode.replace('b', '')[0] self.pwd = None self._comment = '' @@ -770,33 +751,28 @@ class ZipFile(object): if isinstance(file, basestring): self._filePassed = 0 self.filename = file - modeDict = {'r' : 'rb', 'w': 'w+b', 'a' : 'r+b', - 'r+b': 'w+b', 'w+b': 'wb'} - filemode = modeDict[mode] - while True: - try: - self.fp = io.open(file, filemode) - except IOError: - if filemode in modeDict: - filemode = modeDict[filemode] - continue + modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'} + try: + self.fp = open(file, modeDict[mode]) + except IOError: + if mode == 'a': + mode = key = 'w' + self.fp = open(file, modeDict[mode]) + else: raise - break else: self._filePassed = 1 self.fp = file self.filename = getattr(file, 'name', None) - self._fileRefCnt = 1 try: - if mode == 'r': + if key == 'r': self._RealGetContents() - elif mode == 'w': + elif key == 'w': # set the modified flag so central directory gets written # even if no files are added to the archive self._didModify = True - self.start_dir = 0 - elif mode == 'a': + elif key == 'a': try: # See if file is a zip file self._RealGetContents() @@ -809,13 +785,13 @@ class ZipFile(object): # set the modified flag so central directory gets written # even if no files are added to the archive self._didModify = True - self.start_dir = self.fp.tell() else: raise RuntimeError('Mode must be "r", "w" or "a"') except: fp = self.fp self.fp = None - self._fpclose(fp) + if not self._filePassed: + fp.close() raise def __enter__(self): @@ -966,17 +942,26 @@ class ZipFile(object): raise RuntimeError, \ "Attempt to read ZIP archive that was already closed" - # Make sure we have an info object - if isinstance(name, ZipInfo): - # 'name' is already an info object - zinfo = name + # Only open a new file for instances where we were not + # given a file object in the constructor + if self._filePassed: + zef_file = self.fp + should_close = False else: - # Get info object for name - zinfo = self.getinfo(name) + zef_file = open(self.filename, 'rb') + should_close = True - self._fileRefCnt += 1 - zef_file = _SharedFile(self.fp, zinfo.header_offset, self._fpclose) try: + # Make sure we have an info object + if isinstance(name, ZipInfo): + # 'name' is already an info object + zinfo = name + else: + # Get info object for name + zinfo = self.getinfo(name) + + zef_file.seek(zinfo.header_offset, 0) + # Skip the file header: fheader = zef_file.read(sizeFileHeader) if len(fheader) != sizeFileHeader: @@ -1021,9 +1006,11 @@ class ZipFile(object): if ord(h[11]) != check_byte: raise RuntimeError("Bad password for file", name) - return ZipExtFile(zef_file, mode, zinfo, zd, True) + return ZipExtFile(zef_file, mode, zinfo, zd, + close_fileobj=should_close) except: - zef_file.close() + if should_close: + zef_file.close() raise def extract(self, member, path=None, pwd=None): @@ -1154,7 +1141,6 @@ class ZipFile(object): zinfo.file_size = st.st_size zinfo.flag_bits = 0x00 - self.fp.seek(self.start_dir, 0) zinfo.header_offset = self.fp.tell() # Start of header bytes self._writecheck(zinfo) @@ -1168,7 +1154,6 @@ class ZipFile(object): self.filelist.append(zinfo) self.NameToInfo[zinfo.filename] = zinfo self.fp.write(zinfo.FileHeader(False)) - self.start_dir = self.fp.tell() return with open(filename, "rb") as fp: @@ -1211,10 +1196,10 @@ class ZipFile(object): raise RuntimeError('Compressed size larger than uncompressed size') # Seek backwards and write file header (which will now include # correct CRC and file sizes) - self.start_dir = self.fp.tell() # Preserve current position in file + position = self.fp.tell() # Preserve current position in file self.fp.seek(zinfo.header_offset, 0) self.fp.write(zinfo.FileHeader(zip64)) - self.fp.seek(self.start_dir, 0) + self.fp.seek(position, 0) self.filelist.append(zinfo) self.NameToInfo[zinfo.filename] = zinfo @@ -1243,7 +1228,6 @@ class ZipFile(object): zinfo.compress_type = compress_type zinfo.file_size = len(bytes) # Uncompressed size - self.fp.seek(self.start_dir, 0) zinfo.header_offset = self.fp.tell() # Start of header bytes self._writecheck(zinfo) self._didModify = True @@ -1267,7 +1251,6 @@ class ZipFile(object): self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size, zinfo.file_size)) self.fp.flush() - self.start_dir = self.fp.tell() self.filelist.append(zinfo) self.NameToInfo[zinfo.filename] = zinfo @@ -1283,7 +1266,7 @@ class ZipFile(object): try: if self.mode in ("w", "a") and self._didModify: # write ending records - self.fp.seek(self.start_dir, 0) + pos1 = self.fp.tell() for zinfo in self.filelist: # write central directory dt = zinfo.date_time dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] @@ -1346,8 +1329,8 @@ class ZipFile(object): pos2 = self.fp.tell() # Write end-of-zip-archive record centDirCount = len(self.filelist) - centDirSize = pos2 - self.start_dir - centDirOffset = self.start_dir + centDirSize = pos2 - pos1 + centDirOffset = pos1 requires_zip64 = None if centDirCount > ZIP_FILECOUNT_LIMIT: requires_zip64 = "Files count" @@ -1383,13 +1366,8 @@ class ZipFile(object): finally: fp = self.fp self.fp = None - self._fpclose(fp) - - def _fpclose(self, fp): - assert self._fileRefCnt > 0 - self._fileRefCnt -= 1 - if not self._fileRefCnt and not self._filePassed: - fp.close() + if not self._filePassed: + fp.close() class PyZipFile(ZipFile): diff --git a/Misc/NEWS b/Misc/NEWS index fa5acd860ba..6e466e46598 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -53,11 +53,6 @@ Library - Issue #23016: A warning no longer produces an AttributeError when sys.stderr is None. -- Issue #14099: ZipFile.open() no longer reopen the underlying file. Objects - returned by ZipFile.open() can now operate independently of the ZipFile even - if the ZipFile was created by passing in a file-like object as the first - argument to the constructor. - - Issue #21032. Fixed socket leak if HTTPConnection.getresponse() fails. Original patch by Martin Panter.