diff --git a/Lib/zipfile.py b/Lib/zipfile.py index a1f5b1809ab..d0789b69215 100644 --- a/Lib/zipfile.py +++ b/Lib/zipfile.py @@ -13,6 +13,7 @@ import stat import shutil import struct import binascii +import threading try: @@ -647,16 +648,18 @@ def _get_decompressor(compress_type): class _SharedFile: - def __init__(self, file, pos, close): + def __init__(self, file, pos, close, lock): self._file = file self._pos = pos self._close = close + self._lock = lock def read(self, n=-1): - self._file.seek(self._pos) - data = self._file.read(n) - self._pos = self._file.tell() - return data + with self._lock: + self._file.seek(self._pos) + data = self._file.read(n) + self._pos = self._file.tell() + return data def close(self): if self._file is not None: @@ -990,6 +993,7 @@ class ZipFile: self.fp = file self.filename = getattr(file, 'name', None) self._fileRefCnt = 1 + self._lock = threading.RLock() try: if mode == 'r': @@ -1214,7 +1218,7 @@ class ZipFile: zinfo = self.getinfo(name) self._fileRefCnt += 1 - zef_file = _SharedFile(self.fp, zinfo.header_offset, self._fpclose) + zef_file = _SharedFile(self.fp, zinfo.header_offset, self._fpclose, self._lock) try: # Skip the file header: fheader = zef_file.read(sizeFileHeader) @@ -1410,68 +1414,69 @@ class ZipFile: zinfo.file_size = st.st_size zinfo.flag_bits = 0x00 - self.fp.seek(self.start_dir, 0) - zinfo.header_offset = self.fp.tell() # Start of header bytes - if zinfo.compress_type == ZIP_LZMA: - # Compressed data includes an end-of-stream (EOS) marker - zinfo.flag_bits |= 0x02 + with self._lock: + self.fp.seek(self.start_dir, 0) + zinfo.header_offset = self.fp.tell() # Start of header bytes + if zinfo.compress_type == ZIP_LZMA: + # Compressed data includes an end-of-stream (EOS) marker + zinfo.flag_bits |= 0x02 - self._writecheck(zinfo) - self._didModify = True + self._writecheck(zinfo) + self._didModify = True - if isdir: - zinfo.file_size = 0 - zinfo.compress_size = 0 - zinfo.CRC = 0 - zinfo.external_attr |= 0x10 # MS-DOS directory flag + if isdir: + zinfo.file_size = 0 + zinfo.compress_size = 0 + zinfo.CRC = 0 + zinfo.external_attr |= 0x10 # MS-DOS directory flag + self.filelist.append(zinfo) + self.NameToInfo[zinfo.filename] = zinfo + self.fp.write(zinfo.FileHeader(False)) + self.start_dir = self.fp.tell() + return + + cmpr = _get_compressor(zinfo.compress_type) + with open(filename, "rb") as fp: + # Must overwrite CRC and sizes with correct data later + zinfo.CRC = CRC = 0 + zinfo.compress_size = compress_size = 0 + # Compressed size can be larger than uncompressed size + zip64 = self._allowZip64 and \ + zinfo.file_size * 1.05 > ZIP64_LIMIT + self.fp.write(zinfo.FileHeader(zip64)) + file_size = 0 + while 1: + buf = fp.read(1024 * 8) + if not buf: + break + file_size = file_size + len(buf) + CRC = crc32(buf, CRC) & 0xffffffff + if cmpr: + buf = cmpr.compress(buf) + compress_size = compress_size + len(buf) + self.fp.write(buf) + if cmpr: + buf = cmpr.flush() + compress_size = compress_size + len(buf) + self.fp.write(buf) + zinfo.compress_size = compress_size + else: + zinfo.compress_size = file_size + zinfo.CRC = CRC + zinfo.file_size = file_size + if not zip64 and self._allowZip64: + if file_size > ZIP64_LIMIT: + raise RuntimeError('File size has increased during compressing') + if compress_size > ZIP64_LIMIT: + raise RuntimeError('Compressed size larger than uncompressed size') + # Seek backwards and write file header (which will now include + # correct CRC and file sizes) + self.start_dir = self.fp.tell() # Preserve current position in file + self.fp.seek(zinfo.header_offset, 0) + self.fp.write(zinfo.FileHeader(zip64)) + self.fp.seek(self.start_dir, 0) self.filelist.append(zinfo) self.NameToInfo[zinfo.filename] = zinfo - self.fp.write(zinfo.FileHeader(False)) - self.start_dir = self.fp.tell() - return - - cmpr = _get_compressor(zinfo.compress_type) - with open(filename, "rb") as fp: - # Must overwrite CRC and sizes with correct data later - zinfo.CRC = CRC = 0 - zinfo.compress_size = compress_size = 0 - # Compressed size can be larger than uncompressed size - zip64 = self._allowZip64 and \ - zinfo.file_size * 1.05 > ZIP64_LIMIT - self.fp.write(zinfo.FileHeader(zip64)) - file_size = 0 - while 1: - buf = fp.read(1024 * 8) - if not buf: - break - file_size = file_size + len(buf) - CRC = crc32(buf, CRC) & 0xffffffff - if cmpr: - buf = cmpr.compress(buf) - compress_size = compress_size + len(buf) - self.fp.write(buf) - if cmpr: - buf = cmpr.flush() - compress_size = compress_size + len(buf) - self.fp.write(buf) - zinfo.compress_size = compress_size - else: - zinfo.compress_size = file_size - zinfo.CRC = CRC - zinfo.file_size = file_size - if not zip64 and self._allowZip64: - if file_size > ZIP64_LIMIT: - raise RuntimeError('File size has increased during compressing') - if compress_size > ZIP64_LIMIT: - raise RuntimeError('Compressed size larger than uncompressed size') - # Seek backwards and write file header (which will now include - # correct CRC and file sizes) - self.start_dir = self.fp.tell() # Preserve current position in file - self.fp.seek(zinfo.header_offset, 0) - self.fp.write(zinfo.FileHeader(zip64)) - self.fp.seek(self.start_dir, 0) - self.filelist.append(zinfo) - self.NameToInfo[zinfo.filename] = zinfo def writestr(self, zinfo_or_arcname, data, compress_type=None): """Write a file into the archive. The contents is 'data', which @@ -1498,38 +1503,39 @@ class ZipFile: "Attempt to write to ZIP archive that was already closed") zinfo.file_size = len(data) # Uncompressed size - self.fp.seek(self.start_dir, 0) - zinfo.header_offset = self.fp.tell() # Start of header data - if compress_type is not None: - zinfo.compress_type = compress_type - if zinfo.compress_type == ZIP_LZMA: - # Compressed data includes an end-of-stream (EOS) marker - zinfo.flag_bits |= 0x02 + with self._lock: + self.fp.seek(self.start_dir, 0) + zinfo.header_offset = self.fp.tell() # Start of header data + if compress_type is not None: + zinfo.compress_type = compress_type + if zinfo.compress_type == ZIP_LZMA: + # Compressed data includes an end-of-stream (EOS) marker + zinfo.flag_bits |= 0x02 - self._writecheck(zinfo) - self._didModify = True - zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum - co = _get_compressor(zinfo.compress_type) - if co: - data = co.compress(data) + co.flush() - zinfo.compress_size = len(data) # Compressed size - else: - zinfo.compress_size = zinfo.file_size - zip64 = zinfo.file_size > ZIP64_LIMIT or \ - zinfo.compress_size > ZIP64_LIMIT - if zip64 and not self._allowZip64: - raise LargeZipFile("Filesize would require ZIP64 extensions") - self.fp.write(zinfo.FileHeader(zip64)) - self.fp.write(data) - if zinfo.flag_bits & 0x08: - # Write CRC and file sizes after the file data - fmt = ' ZIP64_LIMIT or \ + zinfo.compress_size > ZIP64_LIMIT + if zip64 and not self._allowZip64: + raise LargeZipFile("Filesize would require ZIP64 extensions") + self.fp.write(zinfo.FileHeader(zip64)) + self.fp.write(data) + if zinfo.flag_bits & 0x08: + # Write CRC and file sizes after the file data + fmt = ' ZIP64_LIMIT \ - or zinfo.compress_size > ZIP64_LIMIT: - extra.append(zinfo.file_size) - extra.append(zinfo.compress_size) - file_size = 0xffffffff - compress_size = 0xffffffff - else: - file_size = zinfo.file_size - compress_size = zinfo.compress_size - - if zinfo.header_offset > ZIP64_LIMIT: - extra.append(zinfo.header_offset) - header_offset = 0xffffffff - else: - header_offset = zinfo.header_offset - - extra_data = zinfo.extra - min_version = 0 - if extra: - # Append a ZIP64 field to the extra's - extra_data = struct.pack( - ' ZIP_FILECOUNT_LIMIT: - requires_zip64 = "Files count" - elif centDirOffset > ZIP64_LIMIT: - requires_zip64 = "Central directory offset" - elif centDirSize > ZIP64_LIMIT: - requires_zip64 = "Central directory size" - if requires_zip64: - # Need to write the ZIP64 end-of-archive records - if not self._allowZip64: - raise LargeZipFile(requires_zip64 + - " would require ZIP64 extensions") - zip64endrec = struct.pack( - structEndArchive64, stringEndArchive64, - 44, 45, 45, 0, 0, centDirCount, centDirCount, - centDirSize, centDirOffset) - self.fp.write(zip64endrec) - - zip64locrec = struct.pack( - structEndArchive64Locator, - stringEndArchive64Locator, 0, pos2, 1) - self.fp.write(zip64locrec) - centDirCount = min(centDirCount, 0xFFFF) - centDirSize = min(centDirSize, 0xFFFFFFFF) - centDirOffset = min(centDirOffset, 0xFFFFFFFF) - - endrec = struct.pack(structEndArchive, stringEndArchive, - 0, 0, centDirCount, centDirCount, - centDirSize, centDirOffset, len(self._comment)) - self.fp.write(endrec) - self.fp.write(self._comment) - self.fp.flush() + with self._lock: + self.fp.seek(self.start_dir, 0) + self._write_end_record() finally: fp = self.fp self.fp = None self._fpclose(fp) + def _write_end_record(self): + self.fp.seek(self.start_dir, 0) + for zinfo in self.filelist: # write central directory + dt = zinfo.date_time + dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] + dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) + extra = [] + if zinfo.file_size > ZIP64_LIMIT \ + or zinfo.compress_size > ZIP64_LIMIT: + extra.append(zinfo.file_size) + extra.append(zinfo.compress_size) + file_size = 0xffffffff + compress_size = 0xffffffff + else: + file_size = zinfo.file_size + compress_size = zinfo.compress_size + + if zinfo.header_offset > ZIP64_LIMIT: + extra.append(zinfo.header_offset) + header_offset = 0xffffffff + else: + header_offset = zinfo.header_offset + + extra_data = zinfo.extra + min_version = 0 + if extra: + # Append a ZIP64 field to the extra's + extra_data = struct.pack( + ' ZIP_FILECOUNT_LIMIT: + requires_zip64 = "Files count" + elif centDirOffset > ZIP64_LIMIT: + requires_zip64 = "Central directory offset" + elif centDirSize > ZIP64_LIMIT: + requires_zip64 = "Central directory size" + if requires_zip64: + # Need to write the ZIP64 end-of-archive records + if not self._allowZip64: + raise LargeZipFile(requires_zip64 + + " would require ZIP64 extensions") + zip64endrec = struct.pack( + structEndArchive64, stringEndArchive64, + 44, 45, 45, 0, 0, centDirCount, centDirCount, + centDirSize, centDirOffset) + self.fp.write(zip64endrec) + + zip64locrec = struct.pack( + structEndArchive64Locator, + stringEndArchive64Locator, 0, pos2, 1) + self.fp.write(zip64locrec) + centDirCount = min(centDirCount, 0xFFFF) + centDirSize = min(centDirSize, 0xFFFFFFFF) + centDirOffset = min(centDirOffset, 0xFFFFFFFF) + + endrec = struct.pack(structEndArchive, stringEndArchive, + 0, 0, centDirCount, centDirCount, + centDirSize, centDirOffset, len(self._comment)) + self.fp.write(endrec) + self.fp.write(self._comment) + self.fp.flush() + def _fpclose(self, fp): assert self._fileRefCnt > 0 self._fileRefCnt -= 1 diff --git a/Misc/NEWS b/Misc/NEWS index 784542868b6..21f1ac9e161 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -218,6 +218,9 @@ Core and Builtins Library ------- +- Issue #14099: Writing to ZipFile and reading multiple ZipExtFiles is + threadsafe now. + - Issue #19361: JSON decoder now raises JSONDecodeError instead of ValueError. - Issue #18518: timeit now rejects statements which can't be compiled outside