From 353e54edc42168e0f830a3f346040652831b8b85 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 22 Jan 2013 17:13:26 +0200 Subject: [PATCH] Issue #1159051: GzipFile now raises EOFError when reading a corrupted file with truncated header or footer. Added tests for reading truncated gzip and bzip2 files. --- Lib/gzip.py | 68 ++++++++++++++++++++----------------------- Lib/test/test_bz2.py | 18 ++++++++++++ Lib/test/test_gzip.py | 18 ++++++++++++ Misc/NEWS | 3 ++ 4 files changed, 71 insertions(+), 36 deletions(-) diff --git a/Lib/gzip.py b/Lib/gzip.py index a613bae876d..a2f23679fa8 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -21,9 +21,6 @@ def write32u(output, value): # or unsigned. output.write(struct.pack(" self.extrasize: - self._read(readsize) - readsize = min(self.max_read_chunk, readsize * 2) - except EOFError: - if size > self.extrasize: - size = self.extrasize + while size > self.extrasize: + if not self._read(readsize): + if size > self.extrasize: + size = self.extrasize + break + readsize = min(self.max_read_chunk, readsize * 2) offset = self.offset - self.extrastart chunk = self.extrabuf[offset: offset + size] @@ -277,7 +274,7 @@ class GzipFile(io.BufferedIOBase): def _read(self, size=1024): if self.fileobj is None: - raise EOFError, "Reached EOF" + return False if self._new_member: # If the _new_member flag is set, we have to @@ -288,7 +285,7 @@ class GzipFile(io.BufferedIOBase): pos = self.fileobj.tell() # Save current position self.fileobj.seek(0, 2) # Seek to end of file if pos == self.fileobj.tell(): - raise EOFError, "Reached EOF" + return False else: self.fileobj.seek( pos ) # Return to original position @@ -305,9 +302,10 @@ class GzipFile(io.BufferedIOBase): if buf == "": uncompress = self.decompress.flush() + self.fileobj.seek(-len(self.decompress.unused_data), 1) self._read_eof() self._add_read_data( uncompress ) - raise EOFError, 'Reached EOF' + return False uncompress = self.decompress.decompress(buf) self._add_read_data( uncompress ) @@ -317,13 +315,14 @@ class GzipFile(io.BufferedIOBase): # so seek back to the start of the unused data, finish up # this member, and read a new gzip header. # (The number of bytes to seek back is the length of the unused - # data, minus 8 because _read_eof() will rewind a further 8 bytes) - self.fileobj.seek( -len(self.decompress.unused_data)+8, 1) + # data) + self.fileobj.seek(-len(self.decompress.unused_data), 1) # Check the CRC and file size, and set the flag so we read # a new member on the next call self._read_eof() self._new_member = True + return True def _add_read_data(self, data): self.crc = zlib.crc32(data, self.crc) & 0xffffffffL @@ -334,14 +333,11 @@ class GzipFile(io.BufferedIOBase): self.size = self.size + len(data) def _read_eof(self): - # We've read to the end of the file, so we have to rewind in order - # to reread the 8 bytes containing the CRC and the file size. + # We've read to the end of the file. # We check the that the computed CRC and size of the # uncompressed data matches the stored values. Note that the size # stored is the true file size mod 2**32. - self.fileobj.seek(-8, 1) - crc32 = read32(self.fileobj) - isize = read32(self.fileobj) # may exceed 2GB + crc32, isize = struct.unpack("