mirror of https://github.com/python/cpython
gh-88339: enable fast seeking of uncompressed unencrypted zipfile.ZipExtFile (GH-27737)
Avoid reading all of the intermediate data in uncompressed items in a zip file when the user seeks forward. Contributed by: @JuniorJPDJ
This commit is contained in:
parent
56af5a200d
commit
330f1d5828
|
@ -2032,6 +2032,7 @@ class OtherTests(unittest.TestCase):
|
|||
fp.seek(bloc, os.SEEK_CUR)
|
||||
self.assertEqual(fp.tell(), bloc)
|
||||
self.assertEqual(fp.read(5), txt[bloc:bloc+5])
|
||||
self.assertEqual(fp.tell(), bloc + 5)
|
||||
fp.seek(0, os.SEEK_END)
|
||||
self.assertEqual(fp.tell(), len(txt))
|
||||
fp.seek(0, os.SEEK_SET)
|
||||
|
@ -2049,6 +2050,7 @@ class OtherTests(unittest.TestCase):
|
|||
fp.seek(bloc, os.SEEK_CUR)
|
||||
self.assertEqual(fp.tell(), bloc)
|
||||
self.assertEqual(fp.read(5), txt[bloc:bloc+5])
|
||||
self.assertEqual(fp.tell(), bloc + 5)
|
||||
fp.seek(0, os.SEEK_END)
|
||||
self.assertEqual(fp.tell(), len(txt))
|
||||
fp.seek(0, os.SEEK_SET)
|
||||
|
|
|
@ -847,6 +847,7 @@ class ZipExtFile(io.BufferedIOBase):
|
|||
self._orig_compress_size = zipinfo.compress_size
|
||||
self._orig_file_size = zipinfo.file_size
|
||||
self._orig_start_crc = self._running_crc
|
||||
self._orig_crc = self._expected_crc
|
||||
self._seekable = True
|
||||
except AttributeError:
|
||||
pass
|
||||
|
@ -1069,17 +1070,17 @@ class ZipExtFile(io.BufferedIOBase):
|
|||
raise ValueError("I/O operation on closed file.")
|
||||
return self._seekable
|
||||
|
||||
def seek(self, offset, whence=0):
|
||||
def seek(self, offset, whence=os.SEEK_SET):
|
||||
if self.closed:
|
||||
raise ValueError("seek on closed file.")
|
||||
if not self._seekable:
|
||||
raise io.UnsupportedOperation("underlying stream is not seekable")
|
||||
curr_pos = self.tell()
|
||||
if whence == 0: # Seek from start of file
|
||||
if whence == os.SEEK_SET:
|
||||
new_pos = offset
|
||||
elif whence == 1: # Seek from current position
|
||||
elif whence == os.SEEK_CUR:
|
||||
new_pos = curr_pos + offset
|
||||
elif whence == 2: # Seek from EOF
|
||||
elif whence == os.SEEK_END:
|
||||
new_pos = self._orig_file_size + offset
|
||||
else:
|
||||
raise ValueError("whence must be os.SEEK_SET (0), "
|
||||
|
@ -1094,7 +1095,19 @@ class ZipExtFile(io.BufferedIOBase):
|
|||
read_offset = new_pos - curr_pos
|
||||
buff_offset = read_offset + self._offset
|
||||
|
||||
if buff_offset >= 0 and buff_offset < len(self._readbuffer):
|
||||
# Fast seek uncompressed unencrypted file
|
||||
if self._compress_type == ZIP_STORED and self._decrypter is None and read_offset > 0:
|
||||
# disable CRC checking after first seeking - it would be invalid
|
||||
self._expected_crc = None
|
||||
# seek actual file taking already buffered data into account
|
||||
read_offset -= len(self._readbuffer) - self._offset
|
||||
self._fileobj.seek(read_offset, os.SEEK_CUR)
|
||||
self._left -= read_offset
|
||||
read_offset = 0
|
||||
# flush read buffer
|
||||
self._readbuffer = b''
|
||||
self._offset = 0
|
||||
elif buff_offset >= 0 and buff_offset < len(self._readbuffer):
|
||||
# Just move the _offset index if the new position is in the _readbuffer
|
||||
self._offset = buff_offset
|
||||
read_offset = 0
|
||||
|
@ -1102,6 +1115,7 @@ class ZipExtFile(io.BufferedIOBase):
|
|||
# Position is before the current position. Reset the ZipExtFile
|
||||
self._fileobj.seek(self._orig_compress_start)
|
||||
self._running_crc = self._orig_start_crc
|
||||
self._expected_crc = self._orig_crc
|
||||
self._compress_left = self._orig_compress_size
|
||||
self._left = self._orig_file_size
|
||||
self._readbuffer = b''
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Enable fast seeking of uncompressed unencrypted :class:`zipfile.ZipExtFile`
|
Loading…
Reference in New Issue