Issue #26039: zipfile.ZipFile.open() can now be used to write data into a ZIP

file, as well as for extracting data.  Patch by Thomas Kluyver.
This commit is contained in:
Serhiy Storchaka 2016-05-13 13:52:49 +03:00
parent 5d1110a952
commit 18ee29d0b8
5 changed files with 295 additions and 127 deletions

View File

@ -207,15 +207,15 @@ ZipFile Objects
.. index::
single: universal newlines; zipfile.ZipFile.open method
.. method:: ZipFile.open(name, mode='r', pwd=None)
.. method:: ZipFile.open(name, mode='r', pwd=None, force_zip64=False)
Extract a member from the archive as a file-like object (ZipExtFile). *name*
Access a member of the archive as a file-like object. *name*
is the name of the file in the archive, or a :class:`ZipInfo` object. The
*mode* parameter, if included, must be one of the following: ``'r'`` (the
default), ``'U'``, or ``'rU'``. Choosing ``'U'`` or ``'rU'`` will enable
:term:`universal newlines` support in the read-only object. *pwd* is the
password used for encrypted files. Calling :meth:`.open` on a closed
ZipFile will raise a :exc:`RuntimeError`.
default), ``'U'``, ``'rU'`` or ``'w'``. Choosing ``'U'`` or ``'rU'`` will
enable :term:`universal newlines` support in the read-only object. *pwd* is
the password used to decrypt encrypted ZIP files. Calling :meth:`.open` on
a closed ZipFile will raise a :exc:`RuntimeError`.
:meth:`~ZipFile.open` is also a context manager and therefore supports the
:keyword:`with` statement::
@ -224,17 +224,23 @@ ZipFile Objects
with myzip.open('eggs.txt') as myfile:
print(myfile.read())
.. note::
The file-like object is read-only and provides the following methods:
With *mode* ``'r'``, ``'U'`` or ``'rU'``, the file-like object
(``ZipExtFile``) is read-only and provides the following methods:
:meth:`~io.BufferedIOBase.read`, :meth:`~io.IOBase.readline`,
:meth:`~io.IOBase.readlines`, :meth:`__iter__`,
:meth:`~iterator.__next__`.
:meth:`~iterator.__next__`. These objects can operate independently of
the ZipFile.
.. note::
With ``mode='w'``, a writable file handle is returned, which supports the
:meth:`~io.BufferedIOBase.write` method. While a writable file handle is open,
attempting to read or write other files in the ZIP file will raise a
:exc:`RuntimeError`.
Objects returned by :meth:`.open` can operate independently of the
ZipFile.
When writing a file, if the file size is not known in advance but may exceed
2 GiB, pass ``force_zip64=True`` to ensure that the header format is
capable of supporting large files. If the file size is known in advance,
construct a :class:`ZipInfo` object with :attr:`~ZipInfo.file_size` set, and
use that as the *name* parameter.
.. note::
@ -246,6 +252,10 @@ ZipFile Objects
The ``'U'`` or ``'rU'`` mode. Use :class:`io.TextIOWrapper` for reading
compressed text files in :term:`universal newlines` mode.
.. versionchanged:: 3.6
:meth:`open` can now be used to write files into the archive with the
``mode='w'`` option.
.. method:: ZipFile.extract(member, path=None, pwd=None)
Extract a member from the archive to the current working directory; *member*

View File

@ -350,6 +350,10 @@ A new :meth:`ZipInfo.is_dir() <zipfile.ZipInfo.is_dir>` method can be used
to check if the :class:`~zipfile.ZipInfo` instance represents a directory.
(Contributed by Thomas Kluyver in :issue:`26039`.)
The :meth:`ZipFile.open() <zipfile.ZipFile.open>` method can now be used to
write data into a ZIP file, as well as for extracting data.
(Contributed by Thomas Kluyver in :issue:`26039`.)
zlib
----

View File

@ -61,6 +61,9 @@ class AbstractTestsWithSourceFile:
zipfp.write(TESTFN, "another.name")
zipfp.write(TESTFN, TESTFN)
zipfp.writestr("strfile", self.data)
with zipfp.open('written-open-w', mode='w') as f:
for line in self.line_gen:
f.write(line)
def zip_test(self, f, compression):
self.make_test_archive(f, compression)
@ -76,7 +79,7 @@ class AbstractTestsWithSourceFile:
zipfp.printdir(file=fp)
directory = fp.getvalue()
lines = directory.splitlines()
self.assertEqual(len(lines), 4) # Number of files + header
self.assertEqual(len(lines), 5) # Number of files + header
self.assertIn('File Name', lines[0])
self.assertIn('Modified', lines[0])
@ -90,23 +93,25 @@ class AbstractTestsWithSourceFile:
# Check the namelist
names = zipfp.namelist()
self.assertEqual(len(names), 3)
self.assertEqual(len(names), 4)
self.assertIn(TESTFN, names)
self.assertIn("another.name", names)
self.assertIn("strfile", names)
self.assertIn("written-open-w", names)
# Check infolist
infos = zipfp.infolist()
names = [i.filename for i in infos]
self.assertEqual(len(names), 3)
self.assertEqual(len(names), 4)
self.assertIn(TESTFN, names)
self.assertIn("another.name", names)
self.assertIn("strfile", names)
self.assertIn("written-open-w", names)
for i in infos:
self.assertEqual(i.file_size, len(self.data))
# check getinfo
for nm in (TESTFN, "another.name", "strfile"):
for nm in (TESTFN, "another.name", "strfile", "written-open-w"):
info = zipfp.getinfo(nm)
self.assertEqual(info.filename, nm)
self.assertEqual(info.file_size, len(self.data))
@ -372,14 +377,18 @@ class StoredTestsWithSourceFile(AbstractTestsWithSourceFile,
test_low_compression = None
def zip_test_writestr_permissions(self, f, compression):
# Make sure that writestr creates files with mode 0600,
# when it is passed a name rather than a ZipInfo instance.
# Make sure that writestr and open(... mode='w') create files with
# mode 0600, when they are passed a name rather than a ZipInfo
# instance.
self.make_test_archive(f, compression)
with zipfile.ZipFile(f, "r") as zipfp:
zinfo = zipfp.getinfo('strfile')
self.assertEqual(zinfo.external_attr, 0o600 << 16)
zinfo2 = zipfp.getinfo('written-open-w')
self.assertEqual(zinfo2.external_attr, 0o600 << 16)
def test_writestr_permissions(self):
for f in get_files(self):
self.zip_test_writestr_permissions(f, zipfile.ZIP_STORED)
@ -451,6 +460,10 @@ class StoredTestsWithSourceFile(AbstractTestsWithSourceFile,
with zipfile.ZipFile(TESTFN2, mode="r") as zipfp:
self.assertRaises(RuntimeError, zipfp.write, TESTFN)
with zipfile.ZipFile(TESTFN2, mode="r") as zipfp:
with self.assertRaises(RuntimeError):
zipfp.open(TESTFN, mode='w')
def test_add_file_before_1980(self):
# Set atime and mtime to 1970-01-01
os.utime(TESTFN, (0, 0))
@ -1428,6 +1441,35 @@ class OtherTests(unittest.TestCase):
# testzip returns the name of the first corrupt file, or None
self.assertIsNone(zipf.testzip())
def test_open_conflicting_handles(self):
# It's only possible to open one writable file handle at a time
msg1 = b"It's fun to charter an accountant!"
msg2 = b"And sail the wide accountant sea"
msg3 = b"To find, explore the funds offshore"
with zipfile.ZipFile(TESTFN2, 'w', zipfile.ZIP_STORED) as zipf:
with zipf.open('foo', mode='w') as w2:
w2.write(msg1)
with zipf.open('bar', mode='w') as w1:
with self.assertRaises(RuntimeError):
zipf.open('handle', mode='w')
with self.assertRaises(RuntimeError):
zipf.open('foo', mode='r')
with self.assertRaises(RuntimeError):
zipf.writestr('str', 'abcde')
with self.assertRaises(RuntimeError):
zipf.write(__file__, 'file')
with self.assertRaises(RuntimeError):
zipf.close()
w1.write(msg2)
with zipf.open('baz', mode='w') as w2:
w2.write(msg3)
with zipfile.ZipFile(TESTFN2, 'r') as zipf:
self.assertEqual(zipf.read('foo'), msg1)
self.assertEqual(zipf.read('bar'), msg2)
self.assertEqual(zipf.read('baz'), msg3)
self.assertEqual(zipf.namelist(), ['foo', 'bar', 'baz'])
def tearDown(self):
unlink(TESTFN)
unlink(TESTFN2)
@ -1761,6 +1803,22 @@ class UnseekableTests(unittest.TestCase):
with zipf.open('twos') as zopen:
self.assertEqual(zopen.read(), b'222')
def test_open_write(self):
for wrapper in (lambda f: f), Tellable, Unseekable:
with self.subTest(wrapper=wrapper):
f = io.BytesIO()
f.write(b'abc')
bf = io.BufferedWriter(f)
with zipfile.ZipFile(wrapper(bf), 'w', zipfile.ZIP_STORED) as zipf:
with zipf.open('ones', 'w') as zopen:
zopen.write(b'111')
with zipf.open('twos', 'w') as zopen:
zopen.write(b'222')
self.assertEqual(f.getvalue()[:5], b'abcPK')
with zipfile.ZipFile(f) as zipf:
self.assertEqual(zipf.read('ones'), b'111')
self.assertEqual(zipf.read('twos'), b'222')
@requires_zlib
class TestsWithMultipleOpens(unittest.TestCase):
@ -1870,6 +1928,19 @@ class TestsWithMultipleOpens(unittest.TestCase):
with open(os.devnull) as f:
self.assertLess(f.fileno(), 100)
def test_write_while_reading(self):
with zipfile.ZipFile(TESTFN2, 'w', zipfile.ZIP_DEFLATED) as zipf:
zipf.writestr('ones', self.data1)
with zipfile.ZipFile(TESTFN2, 'a', zipfile.ZIP_DEFLATED) as zipf:
with zipf.open('ones', 'r') as r1:
data1 = r1.read(500)
with zipf.open('twos', 'w') as w1:
w1.write(self.data2)
data1 += r1.read()
self.assertEqual(data1, self.data1)
with zipfile.ZipFile(TESTFN2) as zipf:
self.assertEqual(zipf.read('twos'), self.data2)
def tearDown(self):
unlink(TESTFN2)

View File

@ -686,14 +686,19 @@ def _get_decompressor(compress_type):
class _SharedFile:
def __init__(self, file, pos, close, lock):
def __init__(self, file, pos, close, lock, writing):
self._file = file
self._pos = pos
self._close = close
self._lock = lock
self._writing = writing
def read(self, n=-1):
with self._lock:
if self._writing():
raise RuntimeError("Can't read from the ZIP file while there "
"is an open writing handle on it. "
"Close the writing handle before trying to read.")
self._file.seek(self._pos)
data = self._file.read(n)
self._pos = self._file.tell()
@ -993,6 +998,76 @@ class ZipExtFile(io.BufferedIOBase):
super().close()
class _ZipWriteFile(io.BufferedIOBase):
def __init__(self, zf, zinfo, zip64):
self._zinfo = zinfo
self._zip64 = zip64
self._zipfile = zf
self._compressor = _get_compressor(zinfo.compress_type)
self._file_size = 0
self._compress_size = 0
self._crc = 0
@property
def _fileobj(self):
return self._zipfile.fp
def writable(self):
return True
def write(self, data):
nbytes = len(data)
self._file_size += nbytes
self._crc = crc32(data, self._crc)
if self._compressor:
data = self._compressor.compress(data)
self._compress_size += len(data)
self._fileobj.write(data)
return nbytes
def close(self):
super().close()
# Flush any data from the compressor, and update header info
if self._compressor:
buf = self._compressor.flush()
self._compress_size += len(buf)
self._fileobj.write(buf)
self._zinfo.compress_size = self._compress_size
else:
self._zinfo.compress_size = self._file_size
self._zinfo.CRC = self._crc
self._zinfo.file_size = self._file_size
# Write updated header info
if self._zinfo.flag_bits & 0x08:
# Write CRC and file sizes after the file data
fmt = '<LQQ' if self._zip64 else '<LLL'
self._fileobj.write(struct.pack(fmt, self._zinfo.CRC,
self._zinfo.compress_size, self._zinfo.file_size))
self._zipfile.start_dir = self._fileobj.tell()
else:
if not self._zip64:
if self._file_size > ZIP64_LIMIT:
raise RuntimeError('File size unexpectedly exceeded ZIP64 '
'limit')
if self._compress_size > ZIP64_LIMIT:
raise RuntimeError('Compressed size unexpectedly exceeded '
'ZIP64 limit')
# Seek backwards and write file header (which will now include
# correct CRC and file sizes)
# Preserve current position in file
self._zipfile.start_dir = self._fileobj.tell()
self._fileobj.seek(self._zinfo.header_offset)
self._fileobj.write(self._zinfo.FileHeader(self._zip64))
self._fileobj.seek(self._zipfile.start_dir)
self._zipfile._writing = False
# Successfully written: Add file to our caches
self._zipfile.filelist.append(self._zinfo)
self._zipfile.NameToInfo[self._zinfo.filename] = self._zinfo
class ZipFile:
""" Class with methods to open, read, write, close, list zip files.
@ -1055,6 +1130,7 @@ class ZipFile:
self._fileRefCnt = 1
self._lock = threading.RLock()
self._seekable = True
self._writing = False
try:
if mode == 'r':
@ -1267,30 +1343,59 @@ class ZipFile:
with self.open(name, "r", pwd) as fp:
return fp.read()
def open(self, name, mode="r", pwd=None):
"""Return file-like object for 'name'."""
if mode not in ("r", "U", "rU"):
raise RuntimeError('open() requires mode "r", "U", or "rU"')
def open(self, name, mode="r", pwd=None, force_zip64=False):
"""Return file-like object for 'name'.
name is a string for the file name within the ZIP file, or a ZipInfo
object.
mode should be 'r' to read a file already in the ZIP file, or 'w' to
write to a file newly added to the archive.
pwd is the password to decrypt files (only used for reading).
When writing, if the file size is not known in advance but may exceed
2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large
files. If the size is known in advance, it is best to pass a ZipInfo
instance for name, with zinfo.file_size set.
"""
if mode not in {"r", "w", "U", "rU"}:
raise RuntimeError('open() requires mode "r", "w", "U", or "rU"')
if 'U' in mode:
import warnings
warnings.warn("'U' mode is deprecated",
DeprecationWarning, 2)
if pwd and not isinstance(pwd, bytes):
raise TypeError("pwd: expected bytes, got %s" % type(pwd))
if pwd and (mode == "w"):
raise ValueError("pwd is only supported for reading files")
if not self.fp:
raise RuntimeError(
"Attempt to read ZIP archive that was already closed")
"Attempt to use ZIP archive that was already closed")
# Make sure we have an info object
if isinstance(name, ZipInfo):
# 'name' is already an info object
zinfo = name
elif mode == 'w':
zinfo = ZipInfo(name)
zinfo.compress_type = self.compression
else:
# Get info object for name
zinfo = self.getinfo(name)
if mode == 'w':
return self._open_to_write(zinfo, force_zip64=force_zip64)
if self._writing:
raise RuntimeError("Can't read from the ZIP file while there "
"is an open writing handle on it. "
"Close the writing handle before trying to read.")
# Open for reading:
self._fileRefCnt += 1
zef_file = _SharedFile(self.fp, zinfo.header_offset, self._fpclose, self._lock)
zef_file = _SharedFile(self.fp, zinfo.header_offset,
self._fpclose, self._lock, lambda: self._writing)
try:
# Skip the file header:
fheader = zef_file.read(sizeFileHeader)
@ -1355,6 +1460,49 @@ class ZipFile:
zef_file.close()
raise
def _open_to_write(self, zinfo, force_zip64=False):
if force_zip64 and not self._allowZip64:
raise ValueError(
"force_zip64 is True, but allowZip64 was False when opening "
"the ZIP file."
)
if self._writing:
raise RuntimeError("Can't write to the ZIP file while there is "
"another write handle open on it. "
"Close the first handle before opening another.")
# Sizes and CRC are overwritten with correct data after processing the file
if not hasattr(zinfo, 'file_size'):
zinfo.file_size = 0
zinfo.compress_size = 0
zinfo.CRC = 0
zinfo.flag_bits = 0x00
if zinfo.compress_type == ZIP_LZMA:
# Compressed data includes an end-of-stream (EOS) marker
zinfo.flag_bits |= 0x02
if not self._seekable:
zinfo.flag_bits |= 0x08
if not zinfo.external_attr:
zinfo.external_attr = 0o600 << 16 # permissions: ?rw-------
# Compressed size can be larger than uncompressed size
zip64 = self._allowZip64 and \
(force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT)
if self._seekable:
self.fp.seek(self.start_dir)
zinfo.header_offset = self.fp.tell()
self._writecheck(zinfo)
self._didModify = True
self.fp.write(zinfo.FileHeader(zip64))
self._writing = True
return _ZipWriteFile(self, zinfo, zip64)
def extract(self, member, path=None, pwd=None):
"""Extract a member from the archive to the current working directory,
using its full name. Its file information is extracted as accurately
@ -1464,6 +1612,10 @@ class ZipFile:
if not self.fp:
raise RuntimeError(
"Attempt to write to ZIP archive that was already closed")
if self._writing:
raise RuntimeError(
"Can't write to ZIP archive while an open writing handle exists"
)
zinfo = ZipInfo.from_file(filename, arcname)
@ -1476,6 +1628,7 @@ class ZipFile:
else:
zinfo.compress_type = self.compression
if zinfo.is_dir():
with self._lock:
if self._seekable:
self.fp.seek(self.start_dir)
@ -1487,64 +1640,13 @@ class ZipFile:
self._writecheck(zinfo)
self._didModify = True
if zinfo.is_dir():
self.filelist.append(zinfo)
self.NameToInfo[zinfo.filename] = zinfo
self.fp.write(zinfo.FileHeader(False))
self.start_dir = self.fp.tell()
return
cmpr = _get_compressor(zinfo.compress_type)
if not self._seekable:
zinfo.flag_bits |= 0x08
with open(filename, "rb") as fp:
# Must overwrite CRC and sizes with correct data later
zinfo.CRC = CRC = 0
zinfo.compress_size = compress_size = 0
# Compressed size can be larger than uncompressed size
zip64 = self._allowZip64 and \
zinfo.file_size * 1.05 > ZIP64_LIMIT
self.fp.write(zinfo.FileHeader(zip64))
file_size = 0
while 1:
buf = fp.read(1024 * 8)
if not buf:
break
file_size = file_size + len(buf)
CRC = crc32(buf, CRC)
if cmpr:
buf = cmpr.compress(buf)
compress_size = compress_size + len(buf)
self.fp.write(buf)
if cmpr:
buf = cmpr.flush()
compress_size = compress_size + len(buf)
self.fp.write(buf)
zinfo.compress_size = compress_size
else:
zinfo.compress_size = file_size
zinfo.CRC = CRC
zinfo.file_size = file_size
if zinfo.flag_bits & 0x08:
# Write CRC and file sizes after the file data
fmt = '<LQQ' if zip64 else '<LLL'
self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
zinfo.file_size))
self.start_dir = self.fp.tell()
else:
if not zip64 and self._allowZip64:
if file_size > ZIP64_LIMIT:
raise RuntimeError('File size has increased during compressing')
if compress_size > ZIP64_LIMIT:
raise RuntimeError('Compressed size larger than uncompressed size')
# Seek backwards and write file header (which will now include
# correct CRC and file sizes)
self.start_dir = self.fp.tell() # Preserve current position in file
self.fp.seek(zinfo.header_offset)
self.fp.write(zinfo.FileHeader(zip64))
self.fp.seek(self.start_dir)
self.filelist.append(zinfo)
self.NameToInfo[zinfo.filename] = zinfo
with open(filename, "rb") as src, self.open(zinfo, 'w') as dest:
shutil.copyfileobj(src, dest, 1024*8)
def writestr(self, zinfo_or_arcname, data, compress_type=None):
"""Write a file into the archive. The contents is 'data', which
@ -1569,45 +1671,18 @@ class ZipFile:
if not self.fp:
raise RuntimeError(
"Attempt to write to ZIP archive that was already closed")
if self._writing:
raise RuntimeError(
"Can't write to ZIP archive while an open writing handle exists."
)
if compress_type is not None:
zinfo.compress_type = compress_type
zinfo.file_size = len(data) # Uncompressed size
with self._lock:
if self._seekable:
self.fp.seek(self.start_dir)
zinfo.header_offset = self.fp.tell() # Start of header data
if compress_type is not None:
zinfo.compress_type = compress_type
zinfo.header_offset = self.fp.tell() # Start of header data
if compress_type is not None:
zinfo.compress_type = compress_type
if zinfo.compress_type == ZIP_LZMA:
# Compressed data includes an end-of-stream (EOS) marker
zinfo.flag_bits |= 0x02
self._writecheck(zinfo)
self._didModify = True
zinfo.CRC = crc32(data) # CRC-32 checksum
co = _get_compressor(zinfo.compress_type)
if co:
data = co.compress(data) + co.flush()
zinfo.compress_size = len(data) # Compressed size
else:
zinfo.compress_size = zinfo.file_size
zip64 = zinfo.file_size > ZIP64_LIMIT or \
zinfo.compress_size > ZIP64_LIMIT
if zip64 and not self._allowZip64:
raise LargeZipFile("Filesize would require ZIP64 extensions")
self.fp.write(zinfo.FileHeader(zip64))
self.fp.write(data)
if zinfo.flag_bits & 0x08:
# Write CRC and file sizes after the file data
fmt = '<LQQ' if zip64 else '<LLL'
self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
zinfo.file_size))
self.fp.flush()
self.start_dir = self.fp.tell()
self.filelist.append(zinfo)
self.NameToInfo[zinfo.filename] = zinfo
with self.open(zinfo, mode='w') as dest:
dest.write(data)
def __del__(self):
"""Call the "close()" method in case the user forgot."""
@ -1619,6 +1694,11 @@ class ZipFile:
if self.fp is None:
return
if self._writing:
raise RuntimeError("Can't close the ZIP file while there is "
"an open writing handle on it. "
"Close the writing handle before closing the zip.")
try:
if self.mode in ('w', 'x', 'a') and self._didModify: # write ending records
with self._lock:

View File

@ -277,6 +277,9 @@ Core and Builtins
Library
-------
- Issue #26039: zipfile.ZipFile.open() can now be used to write data into a ZIP
file, as well as for extracting data. Patch by Thomas Kluyver.
- Issue #26892: Honor debuglevel flag in urllib.request.HTTPHandler. Patch
contributed by Chi Hsuan Yen.