diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py index d278e06a453..d18a77017fa 100644 --- a/Lib/test/test_zipfile.py +++ b/Lib/test/test_zipfile.py @@ -415,6 +415,49 @@ class StoredTestsWithSourceFile(AbstractTestsWithSourceFile, f.seek(len(data)) with zipfile.ZipFile(f, "r") as zipfp: self.assertEqual(zipfp.namelist(), [TESTFN]) + self.assertEqual(zipfp.read(TESTFN), self.data) + with open(TESTFN2, 'rb') as f: + self.assertEqual(f.read(len(data)), data) + zipfiledata = f.read() + with io.BytesIO(zipfiledata) as bio, zipfile.ZipFile(bio) as zipfp: + self.assertEqual(zipfp.namelist(), [TESTFN]) + self.assertEqual(zipfp.read(TESTFN), self.data) + + def test_read_concatenated_zip_file(self): + with io.BytesIO() as bio: + with zipfile.ZipFile(bio, 'w', zipfile.ZIP_STORED) as zipfp: + zipfp.write(TESTFN, TESTFN) + zipfiledata = bio.getvalue() + data = b'I am not a ZipFile!'*10 + with open(TESTFN2, 'wb') as f: + f.write(data) + f.write(zipfiledata) + + with zipfile.ZipFile(TESTFN2) as zipfp: + self.assertEqual(zipfp.namelist(), [TESTFN]) + self.assertEqual(zipfp.read(TESTFN), self.data) + + def test_append_to_concatenated_zip_file(self): + with io.BytesIO() as bio: + with zipfile.ZipFile(bio, 'w', zipfile.ZIP_STORED) as zipfp: + zipfp.write(TESTFN, TESTFN) + zipfiledata = bio.getvalue() + data = b'I am not a ZipFile!'*1000000 + with open(TESTFN2, 'wb') as f: + f.write(data) + f.write(zipfiledata) + + with zipfile.ZipFile(TESTFN2, 'a') as zipfp: + self.assertEqual(zipfp.namelist(), [TESTFN]) + zipfp.writestr('strfile', self.data) + + with open(TESTFN2, 'rb') as f: + self.assertEqual(f.read(len(data)), data) + zipfiledata = f.read() + with io.BytesIO(zipfiledata) as bio, zipfile.ZipFile(bio) as zipfp: + self.assertEqual(zipfp.namelist(), [TESTFN, 'strfile']) + self.assertEqual(zipfp.read(TESTFN), self.data) + self.assertEqual(zipfp.read('strfile'), self.data) def test_ignores_newline_at_end(self): with zipfile.ZipFile(TESTFN2, "w", zipfile.ZIP_STORED) as zipfp: diff --git a/Lib/zipfile.py b/Lib/zipfile.py index 56a2479fb38..2476717c963 100644 --- a/Lib/zipfile.py +++ b/Lib/zipfile.py @@ -1029,10 +1029,10 @@ class ZipFile: # even if no files are added to the archive self._didModify = True try: - self.start_dir = self.fp.tell() + self.start_dir = self._start_disk = self.fp.tell() except (AttributeError, OSError): self.fp = _Tellable(self.fp) - self.start_dir = 0 + self.start_dir = self._start_disk = 0 self._seekable = False else: # Some file-like objects can provide tell() but not seek() @@ -1053,7 +1053,7 @@ class ZipFile: # set the modified flag so central directory gets written # even if no files are added to the archive self._didModify = True - self.start_dir = self.fp.tell() + self.start_dir = self._start_disk = self.fp.tell() else: raise RuntimeError("Mode must be 'r', 'w', 'x', or 'a'") except: @@ -1097,17 +1097,18 @@ class ZipFile: offset_cd = endrec[_ECD_OFFSET] # offset of central directory self._comment = endrec[_ECD_COMMENT] # archive comment - # "concat" is zero, unless zip was concatenated to another file - concat = endrec[_ECD_LOCATION] - size_cd - offset_cd + # self._start_disk: Position of the start of ZIP archive + # It is zero, unless ZIP was concatenated to another file + self._start_disk = endrec[_ECD_LOCATION] - size_cd - offset_cd if endrec[_ECD_SIGNATURE] == stringEndArchive64: # If Zip64 extension structures are present, account for them - concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator) + self._start_disk -= (sizeEndCentDir64 + sizeEndCentDir64Locator) if self.debug > 2: - inferred = concat + offset_cd - print("given, inferred, offset", offset_cd, inferred, concat) + inferred = self._start_disk + offset_cd + print("given, inferred, offset", offset_cd, inferred, self._start_disk) # self.start_dir: Position of start of central directory - self.start_dir = offset_cd + concat + self.start_dir = offset_cd + self._start_disk fp.seek(self.start_dir, 0) data = fp.read(size_cd) fp = io.BytesIO(data) @@ -1147,7 +1148,7 @@ class ZipFile: t>>11, (t>>5)&0x3F, (t&0x1F) * 2 ) x._decodeExtra() - x.header_offset = x.header_offset + concat + x.header_offset = x.header_offset + self._start_disk self.filelist.append(x) self.NameToInfo[x.filename] = x @@ -1627,11 +1628,10 @@ class ZipFile: file_size = zinfo.file_size compress_size = zinfo.compress_size - if zinfo.header_offset > ZIP64_LIMIT: - extra.append(zinfo.header_offset) + header_offset = zinfo.header_offset - self._start_disk + if header_offset > ZIP64_LIMIT: + extra.append(header_offset) header_offset = 0xffffffff - else: - header_offset = zinfo.header_offset extra_data = zinfo.extra min_version = 0 @@ -1678,7 +1678,7 @@ class ZipFile: # Write end-of-zip-archive record centDirCount = len(self.filelist) centDirSize = pos2 - self.start_dir - centDirOffset = self.start_dir + centDirOffset = self.start_dir - self._start_disk requires_zip64 = None if centDirCount > ZIP_FILECOUNT_LIMIT: requires_zip64 = "Files count" diff --git a/Misc/NEWS b/Misc/NEWS index 369aa8b03f6..36a7f30a436 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -92,6 +92,10 @@ Core and Builtins Library ------- +- Issue #26293: Fixed writing ZIP files that starts not from the start of the + file. Offsets in ZIP file now are relative to the start of the archive in + conforming to the specification. + - Issue #28321: Fixed writing non-BMP characters with binary format in plistlib. - Issue #28322: Fixed possible crashes when unpickle itertools objects from