Issue #23252: Added support for writing ZIP files to unseekable streams.

This commit is contained in:
Serhiy Storchaka 2015-03-23 01:09:35 +02:00
parent f07a4b663d
commit 77d899726f
5 changed files with 120 additions and 35 deletions

View File

@ -140,6 +140,7 @@ ZipFile Objects
ZIP file, then a new ZIP archive is appended to the file. This is meant for
adding a ZIP archive to another file (such as :file:`python.exe`). If
*mode* is ``a`` and the file does not exist at all, it is created.
If *mode* is ``r`` or ``a``, the file should be seekable.
*compression* is the ZIP compression method to use when writing the archive,
and should be :const:`ZIP_STORED`, :const:`ZIP_DEFLATED`,
:const:`ZIP_BZIP2` or :const:`ZIP_LZMA`; unrecognized
@ -171,6 +172,9 @@ ZipFile Objects
.. versionchanged:: 3.4
ZIP64 extensions are enabled by default.
.. versionchanged:: 3.5
Added support for writing to unseekable streams.
.. method:: ZipFile.close()
@ -328,7 +332,6 @@ ZipFile Objects
If ``arcname`` (or ``filename``, if ``arcname`` is not given) contains a null
byte, the name of the file in the archive will be truncated at the null byte.
.. method:: ZipFile.writestr(zinfo_or_arcname, bytes[, compress_type])
Write the string *bytes* to the archive; *zinfo_or_arcname* is either the file

View File

@ -448,6 +448,12 @@ faulthandler
:func:`~faulthandler.dump_traceback_later` functions now accept file
descriptors. (Contributed by Wei Wu in :issue:`23566`.)
zipfile
-------
* Added support for writing ZIP files to unseekable streams.
(Contributed by Serhiy Storchaka in :issue:`23252`.)
Optimizations
=============

View File

@ -1685,25 +1685,63 @@ class Tellable:
self.offset = 0
def write(self, data):
self.offset += self.fp.write(data)
n = self.fp.write(data)
self.offset += n
return n
def tell(self):
return self.offset
def flush(self):
pass
self.fp.flush()
class Unseekable:
def __init__(self, fp):
self.fp = fp
def write(self, data):
return self.fp.write(data)
def flush(self):
self.fp.flush()
class UnseekableTests(unittest.TestCase):
def test_writestr_tellable(self):
f = io.BytesIO()
with zipfile.ZipFile(Tellable(f), 'w', zipfile.ZIP_STORED) as zipfp:
zipfp.writestr('ones', b'111')
zipfp.writestr('twos', b'222')
with zipfile.ZipFile(f, mode='r') as zipf:
with zipf.open('ones') as zopen:
self.assertEqual(zopen.read(), b'111')
with zipf.open('twos') as zopen:
self.assertEqual(zopen.read(), b'222')
def test_writestr(self):
for wrapper in (lambda f: f), Tellable, Unseekable:
with self.subTest(wrapper=wrapper):
f = io.BytesIO()
f.write(b'abc')
bf = io.BufferedWriter(f)
with zipfile.ZipFile(wrapper(bf), 'w', zipfile.ZIP_STORED) as zipfp:
zipfp.writestr('ones', b'111')
zipfp.writestr('twos', b'222')
self.assertEqual(f.getvalue()[:5], b'abcPK')
with zipfile.ZipFile(f, mode='r') as zipf:
with zipf.open('ones') as zopen:
self.assertEqual(zopen.read(), b'111')
with zipf.open('twos') as zopen:
self.assertEqual(zopen.read(), b'222')
def test_write(self):
for wrapper in (lambda f: f), Tellable, Unseekable:
with self.subTest(wrapper=wrapper):
f = io.BytesIO()
f.write(b'abc')
bf = io.BufferedWriter(f)
with zipfile.ZipFile(wrapper(bf), 'w', zipfile.ZIP_STORED) as zipfp:
self.addCleanup(unlink, TESTFN)
with open(TESTFN, 'wb') as f2:
f2.write(b'111')
zipfp.write(TESTFN, 'ones')
with open(TESTFN, 'wb') as f2:
f2.write(b'222')
zipfp.write(TESTFN, 'twos')
self.assertEqual(f.getvalue()[:5], b'abcPK')
with zipfile.ZipFile(f, mode='r') as zipf:
with zipf.open('ones') as zopen:
self.assertEqual(zopen.read(), b'111')
with zipf.open('twos') as zopen:
self.assertEqual(zopen.read(), b'222')
@requires_zlib

View File

@ -667,6 +667,26 @@ class _SharedFile:
self._file = None
self._close(fileobj)
# Provide the tell method for unseekable stream
class _Tellable:
def __init__(self, fp):
self.fp = fp
self.offset = 0
def write(self, data):
n = self.fp.write(data)
self.offset += n
return n
def tell(self):
return self.offset
def flush(self):
self.fp.flush()
def close(self):
self.fp.close()
class ZipExtFile(io.BufferedIOBase):
"""File-like object for reading an archive member.
@ -994,6 +1014,7 @@ class ZipFile:
self.filename = getattr(file, 'name', None)
self._fileRefCnt = 1
self._lock = threading.RLock()
self._seekable = True
try:
if mode == 'r':
@ -1002,13 +1023,24 @@ class ZipFile:
# set the modified flag so central directory gets written
# even if no files are added to the archive
self._didModify = True
self.start_dir = self.fp.tell()
try:
self.start_dir = self.fp.tell()
except (AttributeError, OSError):
self.fp = _Tellable(self.fp)
self.start_dir = 0
self._seekable = False
else:
# Some file-like objects can provide tell() but not seek()
try:
self.fp.seek(self.start_dir)
except (AttributeError, OSError):
self._seekable = False
elif mode == 'a':
try:
# See if file is a zip file
self._RealGetContents()
# seek to start of directory and overwrite
self.fp.seek(self.start_dir, 0)
self.fp.seek(self.start_dir)
except BadZipFile:
# file is not a zip file, just append
self.fp.seek(0, 2)
@ -1415,7 +1447,8 @@ class ZipFile:
zinfo.file_size = st.st_size
zinfo.flag_bits = 0x00
with self._lock:
self.fp.seek(self.start_dir, 0)
if self._seekable:
self.fp.seek(self.start_dir)
zinfo.header_offset = self.fp.tell() # Start of header bytes
if zinfo.compress_type == ZIP_LZMA:
# Compressed data includes an end-of-stream (EOS) marker
@ -1436,6 +1469,8 @@ class ZipFile:
return
cmpr = _get_compressor(zinfo.compress_type)
if not self._seekable:
zinfo.flag_bits |= 0x08
with open(filename, "rb") as fp:
# Must overwrite CRC and sizes with correct data later
zinfo.CRC = CRC = 0
@ -1464,17 +1499,24 @@ class ZipFile:
zinfo.compress_size = file_size
zinfo.CRC = CRC
zinfo.file_size = file_size
if not zip64 and self._allowZip64:
if file_size > ZIP64_LIMIT:
raise RuntimeError('File size has increased during compressing')
if compress_size > ZIP64_LIMIT:
raise RuntimeError('Compressed size larger than uncompressed size')
# Seek backwards and write file header (which will now include
# correct CRC and file sizes)
self.start_dir = self.fp.tell() # Preserve current position in file
self.fp.seek(zinfo.header_offset, 0)
self.fp.write(zinfo.FileHeader(zip64))
self.fp.seek(self.start_dir, 0)
if zinfo.flag_bits & 0x08:
# Write CRC and file sizes after the file data
fmt = '<LQQ' if zip64 else '<LLL'
self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
zinfo.file_size))
self.start_dir = self.fp.tell()
else:
if not zip64 and self._allowZip64:
if file_size > ZIP64_LIMIT:
raise RuntimeError('File size has increased during compressing')
if compress_size > ZIP64_LIMIT:
raise RuntimeError('Compressed size larger than uncompressed size')
# Seek backwards and write file header (which will now include
# correct CRC and file sizes)
self.start_dir = self.fp.tell() # Preserve current position in file
self.fp.seek(zinfo.header_offset)
self.fp.write(zinfo.FileHeader(zip64))
self.fp.seek(self.start_dir)
self.filelist.append(zinfo)
self.NameToInfo[zinfo.filename] = zinfo
@ -1504,11 +1546,8 @@ class ZipFile:
zinfo.file_size = len(data) # Uncompressed size
with self._lock:
try:
if self._seekable:
self.fp.seek(self.start_dir)
except (AttributeError, io.UnsupportedOperation):
# Some file-like objects can provide tell() but not seek()
pass
zinfo.header_offset = self.fp.tell() # Start of header data
if compress_type is not None:
zinfo.compress_type = compress_type
@ -1557,11 +1596,8 @@ class ZipFile:
try:
if self.mode in ("w", "a") and self._didModify: # write ending records
with self._lock:
try:
if self._seekable:
self.fp.seek(self.start_dir)
except (AttributeError, io.UnsupportedOperation):
# Some file-like objects can provide tell() but not seek()
pass
self._write_end_record()
finally:
fp = self.fp

View File

@ -23,6 +23,8 @@ Core and Builtins
Library
-------
- Issue #23252: Added support for writing ZIP files to unseekable streams.
- Issue #21526: Tkinter now supports new boolean type in Tcl 8.5.
- Issue #23647: Increase impalib's MAXLINE to accommodate modern mailbox sizes.