Issue #14099: Writing to ZipFile and reading multiple ZipExtFiles is

threadsafe now.
This commit is contained in:
Serhiy Storchaka 2015-01-26 13:53:38 +02:00
parent b67c516d8b
commit f15e524026
2 changed files with 209 additions and 195 deletions

View File

@ -13,6 +13,7 @@ import stat
import shutil import shutil
import struct import struct
import binascii import binascii
import threading
try: try:
@ -647,16 +648,18 @@ def _get_decompressor(compress_type):
class _SharedFile: class _SharedFile:
def __init__(self, file, pos, close): def __init__(self, file, pos, close, lock):
self._file = file self._file = file
self._pos = pos self._pos = pos
self._close = close self._close = close
self._lock = lock
def read(self, n=-1): def read(self, n=-1):
self._file.seek(self._pos) with self._lock:
data = self._file.read(n) self._file.seek(self._pos)
self._pos = self._file.tell() data = self._file.read(n)
return data self._pos = self._file.tell()
return data
def close(self): def close(self):
if self._file is not None: if self._file is not None:
@ -990,6 +993,7 @@ class ZipFile:
self.fp = file self.fp = file
self.filename = getattr(file, 'name', None) self.filename = getattr(file, 'name', None)
self._fileRefCnt = 1 self._fileRefCnt = 1
self._lock = threading.RLock()
try: try:
if mode == 'r': if mode == 'r':
@ -1214,7 +1218,7 @@ class ZipFile:
zinfo = self.getinfo(name) zinfo = self.getinfo(name)
self._fileRefCnt += 1 self._fileRefCnt += 1
zef_file = _SharedFile(self.fp, zinfo.header_offset, self._fpclose) zef_file = _SharedFile(self.fp, zinfo.header_offset, self._fpclose, self._lock)
try: try:
# Skip the file header: # Skip the file header:
fheader = zef_file.read(sizeFileHeader) fheader = zef_file.read(sizeFileHeader)
@ -1410,68 +1414,69 @@ class ZipFile:
zinfo.file_size = st.st_size zinfo.file_size = st.st_size
zinfo.flag_bits = 0x00 zinfo.flag_bits = 0x00
self.fp.seek(self.start_dir, 0) with self._lock:
zinfo.header_offset = self.fp.tell() # Start of header bytes self.fp.seek(self.start_dir, 0)
if zinfo.compress_type == ZIP_LZMA: zinfo.header_offset = self.fp.tell() # Start of header bytes
# Compressed data includes an end-of-stream (EOS) marker if zinfo.compress_type == ZIP_LZMA:
zinfo.flag_bits |= 0x02 # Compressed data includes an end-of-stream (EOS) marker
zinfo.flag_bits |= 0x02
self._writecheck(zinfo) self._writecheck(zinfo)
self._didModify = True self._didModify = True
if isdir: if isdir:
zinfo.file_size = 0 zinfo.file_size = 0
zinfo.compress_size = 0 zinfo.compress_size = 0
zinfo.CRC = 0 zinfo.CRC = 0
zinfo.external_attr |= 0x10 # MS-DOS directory flag zinfo.external_attr |= 0x10 # MS-DOS directory flag
self.filelist.append(zinfo)
self.NameToInfo[zinfo.filename] = zinfo
self.fp.write(zinfo.FileHeader(False))
self.start_dir = self.fp.tell()
return
cmpr = _get_compressor(zinfo.compress_type)
with open(filename, "rb") as fp:
# Must overwrite CRC and sizes with correct data later
zinfo.CRC = CRC = 0
zinfo.compress_size = compress_size = 0
# Compressed size can be larger than uncompressed size
zip64 = self._allowZip64 and \
zinfo.file_size * 1.05 > ZIP64_LIMIT
self.fp.write(zinfo.FileHeader(zip64))
file_size = 0
while 1:
buf = fp.read(1024 * 8)
if not buf:
break
file_size = file_size + len(buf)
CRC = crc32(buf, CRC) & 0xffffffff
if cmpr:
buf = cmpr.compress(buf)
compress_size = compress_size + len(buf)
self.fp.write(buf)
if cmpr:
buf = cmpr.flush()
compress_size = compress_size + len(buf)
self.fp.write(buf)
zinfo.compress_size = compress_size
else:
zinfo.compress_size = file_size
zinfo.CRC = CRC
zinfo.file_size = file_size
if not zip64 and self._allowZip64:
if file_size > ZIP64_LIMIT:
raise RuntimeError('File size has increased during compressing')
if compress_size > ZIP64_LIMIT:
raise RuntimeError('Compressed size larger than uncompressed size')
# Seek backwards and write file header (which will now include
# correct CRC and file sizes)
self.start_dir = self.fp.tell() # Preserve current position in file
self.fp.seek(zinfo.header_offset, 0)
self.fp.write(zinfo.FileHeader(zip64))
self.fp.seek(self.start_dir, 0)
self.filelist.append(zinfo) self.filelist.append(zinfo)
self.NameToInfo[zinfo.filename] = zinfo self.NameToInfo[zinfo.filename] = zinfo
self.fp.write(zinfo.FileHeader(False))
self.start_dir = self.fp.tell()
return
cmpr = _get_compressor(zinfo.compress_type)
with open(filename, "rb") as fp:
# Must overwrite CRC and sizes with correct data later
zinfo.CRC = CRC = 0
zinfo.compress_size = compress_size = 0
# Compressed size can be larger than uncompressed size
zip64 = self._allowZip64 and \
zinfo.file_size * 1.05 > ZIP64_LIMIT
self.fp.write(zinfo.FileHeader(zip64))
file_size = 0
while 1:
buf = fp.read(1024 * 8)
if not buf:
break
file_size = file_size + len(buf)
CRC = crc32(buf, CRC) & 0xffffffff
if cmpr:
buf = cmpr.compress(buf)
compress_size = compress_size + len(buf)
self.fp.write(buf)
if cmpr:
buf = cmpr.flush()
compress_size = compress_size + len(buf)
self.fp.write(buf)
zinfo.compress_size = compress_size
else:
zinfo.compress_size = file_size
zinfo.CRC = CRC
zinfo.file_size = file_size
if not zip64 and self._allowZip64:
if file_size > ZIP64_LIMIT:
raise RuntimeError('File size has increased during compressing')
if compress_size > ZIP64_LIMIT:
raise RuntimeError('Compressed size larger than uncompressed size')
# Seek backwards and write file header (which will now include
# correct CRC and file sizes)
self.start_dir = self.fp.tell() # Preserve current position in file
self.fp.seek(zinfo.header_offset, 0)
self.fp.write(zinfo.FileHeader(zip64))
self.fp.seek(self.start_dir, 0)
self.filelist.append(zinfo)
self.NameToInfo[zinfo.filename] = zinfo
def writestr(self, zinfo_or_arcname, data, compress_type=None): def writestr(self, zinfo_or_arcname, data, compress_type=None):
"""Write a file into the archive. The contents is 'data', which """Write a file into the archive. The contents is 'data', which
@ -1498,38 +1503,39 @@ class ZipFile:
"Attempt to write to ZIP archive that was already closed") "Attempt to write to ZIP archive that was already closed")
zinfo.file_size = len(data) # Uncompressed size zinfo.file_size = len(data) # Uncompressed size
self.fp.seek(self.start_dir, 0) with self._lock:
zinfo.header_offset = self.fp.tell() # Start of header data self.fp.seek(self.start_dir, 0)
if compress_type is not None: zinfo.header_offset = self.fp.tell() # Start of header data
zinfo.compress_type = compress_type if compress_type is not None:
if zinfo.compress_type == ZIP_LZMA: zinfo.compress_type = compress_type
# Compressed data includes an end-of-stream (EOS) marker if zinfo.compress_type == ZIP_LZMA:
zinfo.flag_bits |= 0x02 # Compressed data includes an end-of-stream (EOS) marker
zinfo.flag_bits |= 0x02
self._writecheck(zinfo) self._writecheck(zinfo)
self._didModify = True self._didModify = True
zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum
co = _get_compressor(zinfo.compress_type) co = _get_compressor(zinfo.compress_type)
if co: if co:
data = co.compress(data) + co.flush() data = co.compress(data) + co.flush()
zinfo.compress_size = len(data) # Compressed size zinfo.compress_size = len(data) # Compressed size
else: else:
zinfo.compress_size = zinfo.file_size zinfo.compress_size = zinfo.file_size
zip64 = zinfo.file_size > ZIP64_LIMIT or \ zip64 = zinfo.file_size > ZIP64_LIMIT or \
zinfo.compress_size > ZIP64_LIMIT zinfo.compress_size > ZIP64_LIMIT
if zip64 and not self._allowZip64: if zip64 and not self._allowZip64:
raise LargeZipFile("Filesize would require ZIP64 extensions") raise LargeZipFile("Filesize would require ZIP64 extensions")
self.fp.write(zinfo.FileHeader(zip64)) self.fp.write(zinfo.FileHeader(zip64))
self.fp.write(data) self.fp.write(data)
if zinfo.flag_bits & 0x08: if zinfo.flag_bits & 0x08:
# Write CRC and file sizes after the file data # Write CRC and file sizes after the file data
fmt = '<LQQ' if zip64 else '<LLL' fmt = '<LQQ' if zip64 else '<LLL'
self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size, self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
zinfo.file_size)) zinfo.file_size))
self.fp.flush() self.fp.flush()
self.start_dir = self.fp.tell() self.start_dir = self.fp.tell()
self.filelist.append(zinfo) self.filelist.append(zinfo)
self.NameToInfo[zinfo.filename] = zinfo self.NameToInfo[zinfo.filename] = zinfo
def __del__(self): def __del__(self):
"""Call the "close()" method in case the user forgot.""" """Call the "close()" method in case the user forgot."""
@ -1543,111 +1549,116 @@ class ZipFile:
try: try:
if self.mode in ("w", "a") and self._didModify: # write ending records if self.mode in ("w", "a") and self._didModify: # write ending records
self.fp.seek(self.start_dir, 0) with self._lock:
for zinfo in self.filelist: # write central directory self.fp.seek(self.start_dir, 0)
dt = zinfo.date_time self._write_end_record()
dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
extra = []
if zinfo.file_size > ZIP64_LIMIT \
or zinfo.compress_size > ZIP64_LIMIT:
extra.append(zinfo.file_size)
extra.append(zinfo.compress_size)
file_size = 0xffffffff
compress_size = 0xffffffff
else:
file_size = zinfo.file_size
compress_size = zinfo.compress_size
if zinfo.header_offset > ZIP64_LIMIT:
extra.append(zinfo.header_offset)
header_offset = 0xffffffff
else:
header_offset = zinfo.header_offset
extra_data = zinfo.extra
min_version = 0
if extra:
# Append a ZIP64 field to the extra's
extra_data = struct.pack(
'<HH' + 'Q'*len(extra),
1, 8*len(extra), *extra) + extra_data
min_version = ZIP64_VERSION
if zinfo.compress_type == ZIP_BZIP2:
min_version = max(BZIP2_VERSION, min_version)
elif zinfo.compress_type == ZIP_LZMA:
min_version = max(LZMA_VERSION, min_version)
extract_version = max(min_version, zinfo.extract_version)
create_version = max(min_version, zinfo.create_version)
try:
filename, flag_bits = zinfo._encodeFilenameFlags()
centdir = struct.pack(structCentralDir,
stringCentralDir, create_version,
zinfo.create_system, extract_version, zinfo.reserved,
flag_bits, zinfo.compress_type, dostime, dosdate,
zinfo.CRC, compress_size, file_size,
len(filename), len(extra_data), len(zinfo.comment),
0, zinfo.internal_attr, zinfo.external_attr,
header_offset)
except DeprecationWarning:
print((structCentralDir, stringCentralDir, create_version,
zinfo.create_system, extract_version, zinfo.reserved,
zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
zinfo.CRC, compress_size, file_size,
len(zinfo.filename), len(extra_data), len(zinfo.comment),
0, zinfo.internal_attr, zinfo.external_attr,
header_offset), file=sys.stderr)
raise
self.fp.write(centdir)
self.fp.write(filename)
self.fp.write(extra_data)
self.fp.write(zinfo.comment)
pos2 = self.fp.tell()
# Write end-of-zip-archive record
centDirCount = len(self.filelist)
centDirSize = pos2 - self.start_dir
centDirOffset = self.start_dir
requires_zip64 = None
if centDirCount > ZIP_FILECOUNT_LIMIT:
requires_zip64 = "Files count"
elif centDirOffset > ZIP64_LIMIT:
requires_zip64 = "Central directory offset"
elif centDirSize > ZIP64_LIMIT:
requires_zip64 = "Central directory size"
if requires_zip64:
# Need to write the ZIP64 end-of-archive records
if not self._allowZip64:
raise LargeZipFile(requires_zip64 +
" would require ZIP64 extensions")
zip64endrec = struct.pack(
structEndArchive64, stringEndArchive64,
44, 45, 45, 0, 0, centDirCount, centDirCount,
centDirSize, centDirOffset)
self.fp.write(zip64endrec)
zip64locrec = struct.pack(
structEndArchive64Locator,
stringEndArchive64Locator, 0, pos2, 1)
self.fp.write(zip64locrec)
centDirCount = min(centDirCount, 0xFFFF)
centDirSize = min(centDirSize, 0xFFFFFFFF)
centDirOffset = min(centDirOffset, 0xFFFFFFFF)
endrec = struct.pack(structEndArchive, stringEndArchive,
0, 0, centDirCount, centDirCount,
centDirSize, centDirOffset, len(self._comment))
self.fp.write(endrec)
self.fp.write(self._comment)
self.fp.flush()
finally: finally:
fp = self.fp fp = self.fp
self.fp = None self.fp = None
self._fpclose(fp) self._fpclose(fp)
def _write_end_record(self):
self.fp.seek(self.start_dir, 0)
for zinfo in self.filelist: # write central directory
dt = zinfo.date_time
dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
extra = []
if zinfo.file_size > ZIP64_LIMIT \
or zinfo.compress_size > ZIP64_LIMIT:
extra.append(zinfo.file_size)
extra.append(zinfo.compress_size)
file_size = 0xffffffff
compress_size = 0xffffffff
else:
file_size = zinfo.file_size
compress_size = zinfo.compress_size
if zinfo.header_offset > ZIP64_LIMIT:
extra.append(zinfo.header_offset)
header_offset = 0xffffffff
else:
header_offset = zinfo.header_offset
extra_data = zinfo.extra
min_version = 0
if extra:
# Append a ZIP64 field to the extra's
extra_data = struct.pack(
'<HH' + 'Q'*len(extra),
1, 8*len(extra), *extra) + extra_data
min_version = ZIP64_VERSION
if zinfo.compress_type == ZIP_BZIP2:
min_version = max(BZIP2_VERSION, min_version)
elif zinfo.compress_type == ZIP_LZMA:
min_version = max(LZMA_VERSION, min_version)
extract_version = max(min_version, zinfo.extract_version)
create_version = max(min_version, zinfo.create_version)
try:
filename, flag_bits = zinfo._encodeFilenameFlags()
centdir = struct.pack(structCentralDir,
stringCentralDir, create_version,
zinfo.create_system, extract_version, zinfo.reserved,
flag_bits, zinfo.compress_type, dostime, dosdate,
zinfo.CRC, compress_size, file_size,
len(filename), len(extra_data), len(zinfo.comment),
0, zinfo.internal_attr, zinfo.external_attr,
header_offset)
except DeprecationWarning:
print((structCentralDir, stringCentralDir, create_version,
zinfo.create_system, extract_version, zinfo.reserved,
zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
zinfo.CRC, compress_size, file_size,
len(zinfo.filename), len(extra_data), len(zinfo.comment),
0, zinfo.internal_attr, zinfo.external_attr,
header_offset), file=sys.stderr)
raise
self.fp.write(centdir)
self.fp.write(filename)
self.fp.write(extra_data)
self.fp.write(zinfo.comment)
pos2 = self.fp.tell()
# Write end-of-zip-archive record
centDirCount = len(self.filelist)
centDirSize = pos2 - self.start_dir
centDirOffset = self.start_dir
requires_zip64 = None
if centDirCount > ZIP_FILECOUNT_LIMIT:
requires_zip64 = "Files count"
elif centDirOffset > ZIP64_LIMIT:
requires_zip64 = "Central directory offset"
elif centDirSize > ZIP64_LIMIT:
requires_zip64 = "Central directory size"
if requires_zip64:
# Need to write the ZIP64 end-of-archive records
if not self._allowZip64:
raise LargeZipFile(requires_zip64 +
" would require ZIP64 extensions")
zip64endrec = struct.pack(
structEndArchive64, stringEndArchive64,
44, 45, 45, 0, 0, centDirCount, centDirCount,
centDirSize, centDirOffset)
self.fp.write(zip64endrec)
zip64locrec = struct.pack(
structEndArchive64Locator,
stringEndArchive64Locator, 0, pos2, 1)
self.fp.write(zip64locrec)
centDirCount = min(centDirCount, 0xFFFF)
centDirSize = min(centDirSize, 0xFFFFFFFF)
centDirOffset = min(centDirOffset, 0xFFFFFFFF)
endrec = struct.pack(structEndArchive, stringEndArchive,
0, 0, centDirCount, centDirCount,
centDirSize, centDirOffset, len(self._comment))
self.fp.write(endrec)
self.fp.write(self._comment)
self.fp.flush()
def _fpclose(self, fp): def _fpclose(self, fp):
assert self._fileRefCnt > 0 assert self._fileRefCnt > 0
self._fileRefCnt -= 1 self._fileRefCnt -= 1

View File

@ -218,6 +218,9 @@ Core and Builtins
Library Library
------- -------
- Issue #14099: Writing to ZipFile and reading multiple ZipExtFiles is
threadsafe now.
- Issue #19361: JSON decoder now raises JSONDecodeError instead of ValueError. - Issue #19361: JSON decoder now raises JSONDecodeError instead of ValueError.
- Issue #18518: timeit now rejects statements which can't be compiled outside - Issue #18518: timeit now rejects statements which can't be compiled outside