Issue #14099: ZipFile.open() no longer reopen the underlying file. Objects
returned by ZipFile.open() can now operate independently of the ZipFile even if the ZipFile was created by passing in a file-like object as the first argument to the constructor.
This commit is contained in:
parent
d87de83582
commit
1ad088f3ea
|
@ -219,14 +219,8 @@ ZipFile Objects
|
|||
|
||||
.. note::
|
||||
|
||||
If the ZipFile was created by passing in a file-like object as the first
|
||||
argument to the constructor, then the object returned by :meth:`.open` shares the
|
||||
ZipFile's file pointer. Under these circumstances, the object returned by
|
||||
:meth:`.open` should not be used after any additional operations are performed
|
||||
on the ZipFile object. If the ZipFile was created by passing in a string (the
|
||||
filename) as the first argument to the constructor, then :meth:`.open` will
|
||||
create a new file object that will be held by the ZipExtFile, allowing it to
|
||||
operate independently of the ZipFile.
|
||||
Objects returned by :meth:`.open` can operate independently of the
|
||||
ZipFile.
|
||||
|
||||
.. note::
|
||||
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
import contextlib
|
||||
import io
|
||||
import os
|
||||
import sys
|
||||
|
@ -25,6 +26,9 @@ SMALL_TEST_DATA = [('_ziptest1', '1q2w3e4r5t'),
|
|||
('ziptest2dir/ziptest3dir/_ziptest3', 'azsxdcfvgb'),
|
||||
('ziptest2dir/ziptest3dir/ziptest4dir/_ziptest3', '6y7u8i9o0p')]
|
||||
|
||||
def getrandbytes(size):
|
||||
return getrandbits(8 * size).to_bytes(size, 'little')
|
||||
|
||||
def get_files(test):
|
||||
yield TESTFN2
|
||||
with TemporaryFile() as f:
|
||||
|
@ -289,7 +293,7 @@ class AbstractTestsWithSourceFile:
|
|||
# than requested.
|
||||
for test_size in (1, 4095, 4096, 4097, 16384):
|
||||
file_size = test_size + 1
|
||||
junk = getrandbits(8 * file_size).to_bytes(file_size, 'little')
|
||||
junk = getrandbytes(file_size)
|
||||
with zipfile.ZipFile(io.BytesIO(), "w", self.compression) as zipf:
|
||||
zipf.writestr('foo', junk)
|
||||
with zipf.open('foo', 'r') as fp:
|
||||
|
@ -1635,46 +1639,111 @@ class LzmaTestsWithRandomBinaryFiles(AbstractTestsWithRandomBinaryFiles,
|
|||
|
||||
@requires_zlib
|
||||
class TestsWithMultipleOpens(unittest.TestCase):
|
||||
def setUp(self):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.data1 = b'111' + getrandbytes(10000)
|
||||
cls.data2 = b'222' + getrandbytes(10000)
|
||||
|
||||
def make_test_archive(self, f):
|
||||
# Create the ZIP archive
|
||||
with zipfile.ZipFile(TESTFN2, "w", zipfile.ZIP_DEFLATED) as zipfp:
|
||||
zipfp.writestr('ones', '1'*FIXEDTEST_SIZE)
|
||||
zipfp.writestr('twos', '2'*FIXEDTEST_SIZE)
|
||||
with zipfile.ZipFile(f, "w", zipfile.ZIP_DEFLATED) as zipfp:
|
||||
zipfp.writestr('ones', self.data1)
|
||||
zipfp.writestr('twos', self.data2)
|
||||
|
||||
def test_same_file(self):
|
||||
# Verify that (when the ZipFile is in control of creating file objects)
|
||||
# multiple open() calls can be made without interfering with each other.
|
||||
with zipfile.ZipFile(TESTFN2, mode="r") as zipf:
|
||||
with zipf.open('ones') as zopen1, zipf.open('ones') as zopen2:
|
||||
data1 = zopen1.read(500)
|
||||
data2 = zopen2.read(500)
|
||||
data1 += zopen1.read(500)
|
||||
data2 += zopen2.read(500)
|
||||
self.assertEqual(data1, data2)
|
||||
for f in get_files(self):
|
||||
self.make_test_archive(f)
|
||||
with zipfile.ZipFile(f, mode="r") as zipf:
|
||||
with zipf.open('ones') as zopen1, zipf.open('ones') as zopen2:
|
||||
data1 = zopen1.read(500)
|
||||
data2 = zopen2.read(500)
|
||||
data1 += zopen1.read()
|
||||
data2 += zopen2.read()
|
||||
self.assertEqual(data1, data2)
|
||||
self.assertEqual(data1, self.data1)
|
||||
|
||||
def test_different_file(self):
|
||||
# Verify that (when the ZipFile is in control of creating file objects)
|
||||
# multiple open() calls can be made without interfering with each other.
|
||||
with zipfile.ZipFile(TESTFN2, mode="r") as zipf:
|
||||
with zipf.open('ones') as zopen1, zipf.open('twos') as zopen2:
|
||||
data1 = zopen1.read(500)
|
||||
data2 = zopen2.read(500)
|
||||
data1 += zopen1.read(500)
|
||||
data2 += zopen2.read(500)
|
||||
self.assertEqual(data1, b'1'*FIXEDTEST_SIZE)
|
||||
self.assertEqual(data2, b'2'*FIXEDTEST_SIZE)
|
||||
for f in get_files(self):
|
||||
self.make_test_archive(f)
|
||||
with zipfile.ZipFile(f, mode="r") as zipf:
|
||||
with zipf.open('ones') as zopen1, zipf.open('twos') as zopen2:
|
||||
data1 = zopen1.read(500)
|
||||
data2 = zopen2.read(500)
|
||||
data1 += zopen1.read()
|
||||
data2 += zopen2.read()
|
||||
self.assertEqual(data1, self.data1)
|
||||
self.assertEqual(data2, self.data2)
|
||||
|
||||
def test_interleaved(self):
|
||||
# Verify that (when the ZipFile is in control of creating file objects)
|
||||
# multiple open() calls can be made without interfering with each other.
|
||||
with zipfile.ZipFile(TESTFN2, mode="r") as zipf:
|
||||
with zipf.open('ones') as zopen1, zipf.open('twos') as zopen2:
|
||||
for f in get_files(self):
|
||||
self.make_test_archive(f)
|
||||
with zipfile.ZipFile(f, mode="r") as zipf:
|
||||
with zipf.open('ones') as zopen1, zipf.open('twos') as zopen2:
|
||||
data1 = zopen1.read(500)
|
||||
data2 = zopen2.read(500)
|
||||
data1 += zopen1.read()
|
||||
data2 += zopen2.read()
|
||||
self.assertEqual(data1, self.data1)
|
||||
self.assertEqual(data2, self.data2)
|
||||
|
||||
def test_read_after_close(self):
|
||||
for f in get_files(self):
|
||||
self.make_test_archive(f)
|
||||
with contextlib.ExitStack() as stack:
|
||||
with zipfile.ZipFile(f, 'r') as zipf:
|
||||
zopen1 = stack.enter_context(zipf.open('ones'))
|
||||
zopen2 = stack.enter_context(zipf.open('twos'))
|
||||
data1 = zopen1.read(500)
|
||||
data2 = zopen2.read(500)
|
||||
data1 += zopen1.read(500)
|
||||
data2 += zopen2.read(500)
|
||||
self.assertEqual(data1, b'1'*FIXEDTEST_SIZE)
|
||||
self.assertEqual(data2, b'2'*FIXEDTEST_SIZE)
|
||||
data1 += zopen1.read()
|
||||
data2 += zopen2.read()
|
||||
self.assertEqual(data1, self.data1)
|
||||
self.assertEqual(data2, self.data2)
|
||||
|
||||
def test_read_after_write(self):
|
||||
for f in get_files(self):
|
||||
with zipfile.ZipFile(f, 'w', zipfile.ZIP_DEFLATED) as zipf:
|
||||
zipf.writestr('ones', self.data1)
|
||||
zipf.writestr('twos', self.data2)
|
||||
with zipf.open('ones') as zopen1:
|
||||
data1 = zopen1.read(500)
|
||||
self.assertEqual(data1, self.data1[:500])
|
||||
with zipfile.ZipFile(f, 'r') as zipf:
|
||||
data1 = zipf.read('ones')
|
||||
data2 = zipf.read('twos')
|
||||
self.assertEqual(data1, self.data1)
|
||||
self.assertEqual(data2, self.data2)
|
||||
|
||||
def test_write_after_read(self):
|
||||
for f in get_files(self):
|
||||
with zipfile.ZipFile(f, "w", zipfile.ZIP_DEFLATED) as zipf:
|
||||
zipf.writestr('ones', self.data1)
|
||||
with zipf.open('ones') as zopen1:
|
||||
zopen1.read(500)
|
||||
zipf.writestr('twos', self.data2)
|
||||
with zipfile.ZipFile(f, 'r') as zipf:
|
||||
data1 = zipf.read('ones')
|
||||
data2 = zipf.read('twos')
|
||||
self.assertEqual(data1, self.data1)
|
||||
self.assertEqual(data2, self.data2)
|
||||
|
||||
def test_many_opens(self):
|
||||
# Verify that read() and open() promptly close the file descriptor,
|
||||
# and don't rely on the garbage collector to free resources.
|
||||
self.make_test_archive(TESTFN2)
|
||||
with zipfile.ZipFile(TESTFN2, mode="r") as zipf:
|
||||
for x in range(100):
|
||||
zipf.read('ones')
|
||||
with zipf.open('ones') as zopen1:
|
||||
pass
|
||||
with open(os.devnull) as f:
|
||||
self.assertLess(f.fileno(), 100)
|
||||
|
||||
def tearDown(self):
|
||||
unlink(TESTFN2)
|
||||
|
|
103
Lib/zipfile.py
103
Lib/zipfile.py
|
@ -624,6 +624,25 @@ def _get_decompressor(compress_type):
|
|||
raise NotImplementedError("compression type %d" % (compress_type,))
|
||||
|
||||
|
||||
class _SharedFile:
|
||||
def __init__(self, file, pos, close):
|
||||
self._file = file
|
||||
self._pos = pos
|
||||
self._close = close
|
||||
|
||||
def read(self, n=-1):
|
||||
self._file.seek(self._pos)
|
||||
data = self._file.read(n)
|
||||
self._pos = self._file.tell()
|
||||
return data
|
||||
|
||||
def close(self):
|
||||
if self._file is not None:
|
||||
fileobj = self._file
|
||||
self._file = None
|
||||
self._close(fileobj)
|
||||
|
||||
|
||||
class ZipExtFile(io.BufferedIOBase):
|
||||
"""File-like object for reading an archive member.
|
||||
Is returned by ZipFile.open().
|
||||
|
@ -909,7 +928,7 @@ class ZipFile:
|
|||
self.NameToInfo = {} # Find file info given name
|
||||
self.filelist = [] # List of ZipInfo instances for archive
|
||||
self.compression = compression # Method of compression
|
||||
self.mode = key = mode.replace('b', '')[0]
|
||||
self.mode = mode
|
||||
self.pwd = None
|
||||
self._comment = b''
|
||||
|
||||
|
@ -918,28 +937,33 @@ class ZipFile:
|
|||
# No, it's a filename
|
||||
self._filePassed = 0
|
||||
self.filename = file
|
||||
modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
|
||||
try:
|
||||
self.fp = io.open(file, modeDict[mode])
|
||||
except OSError:
|
||||
if mode == 'a':
|
||||
mode = key = 'w'
|
||||
self.fp = io.open(file, modeDict[mode])
|
||||
else:
|
||||
modeDict = {'r' : 'rb', 'w': 'w+b', 'a' : 'r+b',
|
||||
'r+b': 'w+b', 'w+b': 'wb'}
|
||||
filemode = modeDict[mode]
|
||||
while True:
|
||||
try:
|
||||
self.fp = io.open(file, filemode)
|
||||
except OSError:
|
||||
if filemode in modeDict:
|
||||
filemode = modeDict[filemode]
|
||||
continue
|
||||
raise
|
||||
break
|
||||
else:
|
||||
self._filePassed = 1
|
||||
self.fp = file
|
||||
self.filename = getattr(file, 'name', None)
|
||||
self._fileRefCnt = 1
|
||||
|
||||
try:
|
||||
if key == 'r':
|
||||
if mode == 'r':
|
||||
self._RealGetContents()
|
||||
elif key == 'w':
|
||||
elif mode == 'w':
|
||||
# set the modified flag so central directory gets written
|
||||
# even if no files are added to the archive
|
||||
self._didModify = True
|
||||
elif key == 'a':
|
||||
self.start_dir = 0
|
||||
elif mode == 'a':
|
||||
try:
|
||||
# See if file is a zip file
|
||||
self._RealGetContents()
|
||||
|
@ -952,13 +976,13 @@ class ZipFile:
|
|||
# set the modified flag so central directory gets written
|
||||
# even if no files are added to the archive
|
||||
self._didModify = True
|
||||
self.start_dir = self.fp.tell()
|
||||
else:
|
||||
raise RuntimeError('Mode must be "r", "w" or "a"')
|
||||
except:
|
||||
fp = self.fp
|
||||
self.fp = None
|
||||
if not self._filePassed:
|
||||
fp.close()
|
||||
self._fpclose(fp)
|
||||
raise
|
||||
|
||||
def __enter__(self):
|
||||
|
@ -1131,23 +1155,17 @@ class ZipFile:
|
|||
raise RuntimeError(
|
||||
"Attempt to read ZIP archive that was already closed")
|
||||
|
||||
# Only open a new file for instances where we were not
|
||||
# given a file object in the constructor
|
||||
if self._filePassed:
|
||||
zef_file = self.fp
|
||||
# Make sure we have an info object
|
||||
if isinstance(name, ZipInfo):
|
||||
# 'name' is already an info object
|
||||
zinfo = name
|
||||
else:
|
||||
zef_file = io.open(self.filename, 'rb')
|
||||
# Get info object for name
|
||||
zinfo = self.getinfo(name)
|
||||
|
||||
self._fileRefCnt += 1
|
||||
zef_file = _SharedFile(self.fp, zinfo.header_offset, self._fpclose)
|
||||
try:
|
||||
# Make sure we have an info object
|
||||
if isinstance(name, ZipInfo):
|
||||
# 'name' is already an info object
|
||||
zinfo = name
|
||||
else:
|
||||
# Get info object for name
|
||||
zinfo = self.getinfo(name)
|
||||
zef_file.seek(zinfo.header_offset, 0)
|
||||
|
||||
# Skip the file header:
|
||||
fheader = zef_file.read(sizeFileHeader)
|
||||
if len(fheader) != sizeFileHeader:
|
||||
|
@ -1206,11 +1224,9 @@ class ZipFile:
|
|||
if h[11] != check_byte:
|
||||
raise RuntimeError("Bad password for file", name)
|
||||
|
||||
return ZipExtFile(zef_file, mode, zinfo, zd,
|
||||
close_fileobj=not self._filePassed)
|
||||
return ZipExtFile(zef_file, mode, zinfo, zd, True)
|
||||
except:
|
||||
if not self._filePassed:
|
||||
zef_file.close()
|
||||
zef_file.close()
|
||||
raise
|
||||
|
||||
def extract(self, member, path=None, pwd=None):
|
||||
|
@ -1344,6 +1360,7 @@ class ZipFile:
|
|||
|
||||
zinfo.file_size = st.st_size
|
||||
zinfo.flag_bits = 0x00
|
||||
self.fp.seek(self.start_dir, 0)
|
||||
zinfo.header_offset = self.fp.tell() # Start of header bytes
|
||||
if zinfo.compress_type == ZIP_LZMA:
|
||||
# Compressed data includes an end-of-stream (EOS) marker
|
||||
|
@ -1360,6 +1377,7 @@ class ZipFile:
|
|||
self.filelist.append(zinfo)
|
||||
self.NameToInfo[zinfo.filename] = zinfo
|
||||
self.fp.write(zinfo.FileHeader(False))
|
||||
self.start_dir = self.fp.tell()
|
||||
return
|
||||
|
||||
cmpr = _get_compressor(zinfo.compress_type)
|
||||
|
@ -1398,10 +1416,10 @@ class ZipFile:
|
|||
raise RuntimeError('Compressed size larger than uncompressed size')
|
||||
# Seek backwards and write file header (which will now include
|
||||
# correct CRC and file sizes)
|
||||
position = self.fp.tell() # Preserve current position in file
|
||||
self.start_dir = self.fp.tell() # Preserve current position in file
|
||||
self.fp.seek(zinfo.header_offset, 0)
|
||||
self.fp.write(zinfo.FileHeader(zip64))
|
||||
self.fp.seek(position, 0)
|
||||
self.fp.seek(self.start_dir, 0)
|
||||
self.filelist.append(zinfo)
|
||||
self.NameToInfo[zinfo.filename] = zinfo
|
||||
|
||||
|
@ -1430,6 +1448,7 @@ class ZipFile:
|
|||
"Attempt to write to ZIP archive that was already closed")
|
||||
|
||||
zinfo.file_size = len(data) # Uncompressed size
|
||||
self.fp.seek(self.start_dir, 0)
|
||||
zinfo.header_offset = self.fp.tell() # Start of header data
|
||||
if compress_type is not None:
|
||||
zinfo.compress_type = compress_type
|
||||
|
@ -1458,6 +1477,7 @@ class ZipFile:
|
|||
self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
|
||||
zinfo.file_size))
|
||||
self.fp.flush()
|
||||
self.start_dir = self.fp.tell()
|
||||
self.filelist.append(zinfo)
|
||||
self.NameToInfo[zinfo.filename] = zinfo
|
||||
|
||||
|
@ -1473,7 +1493,7 @@ class ZipFile:
|
|||
|
||||
try:
|
||||
if self.mode in ("w", "a") and self._didModify: # write ending records
|
||||
pos1 = self.fp.tell()
|
||||
self.fp.seek(self.start_dir, 0)
|
||||
for zinfo in self.filelist: # write central directory
|
||||
dt = zinfo.date_time
|
||||
dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
|
||||
|
@ -1539,8 +1559,8 @@ class ZipFile:
|
|||
pos2 = self.fp.tell()
|
||||
# Write end-of-zip-archive record
|
||||
centDirCount = len(self.filelist)
|
||||
centDirSize = pos2 - pos1
|
||||
centDirOffset = pos1
|
||||
centDirSize = pos2 - self.start_dir
|
||||
centDirOffset = self.start_dir
|
||||
requires_zip64 = None
|
||||
if centDirCount > ZIP_FILECOUNT_LIMIT:
|
||||
requires_zip64 = "Files count"
|
||||
|
@ -1576,8 +1596,13 @@ class ZipFile:
|
|||
finally:
|
||||
fp = self.fp
|
||||
self.fp = None
|
||||
if not self._filePassed:
|
||||
fp.close()
|
||||
self._fpclose(fp)
|
||||
|
||||
def _fpclose(self, fp):
|
||||
assert self._fileRefCnt > 0
|
||||
self._fileRefCnt -= 1
|
||||
if not self._fileRefCnt and not self._filePassed:
|
||||
fp.close()
|
||||
|
||||
|
||||
class PyZipFile(ZipFile):
|
||||
|
|
|
@ -39,6 +39,11 @@ Core and Builtins
|
|||
Library
|
||||
-------
|
||||
|
||||
- Issue #14099: ZipFile.open() no longer reopen the underlying file. Objects
|
||||
returned by ZipFile.open() can now operate independently of the ZipFile even
|
||||
if the ZipFile was created by passing in a file-like object as the first
|
||||
argument to the constructor.
|
||||
|
||||
- Issue #22966: Fix __pycache__ pyc file name clobber when pyc_compile is
|
||||
asked to compile a source file containing multiple dots in the source file
|
||||
name.
|
||||
|
|
Loading…
Reference in New Issue