mirror of https://github.com/python/cpython
Fix gzip to deal with CRC's being signed values in Python 2.x properly and to
read 32bit values as unsigned to start with rather than applying signedness fixups allover the place afterwards. This hopefully fixes the test_tarfile failure on the alpha/tru64 buildbot.
This commit is contained in:
parent
73f57b0c05
commit
79b4ba8fd7
44
Lib/gzip.py
44
Lib/gzip.py
|
@ -16,28 +16,16 @@ FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16
|
||||||
READ, WRITE = 1, 2
|
READ, WRITE = 1, 2
|
||||||
|
|
||||||
def U32(i):
|
def U32(i):
|
||||||
"""Return i as an unsigned integer, assuming it fits in 32 bits.
|
"""Return the low-order 32 bits, as a non-negative int or long."""
|
||||||
|
|
||||||
If it's >= 2GB when viewed as a 32-bit unsigned int, return a long.
|
|
||||||
"""
|
|
||||||
if i < 0:
|
|
||||||
i += 1L << 32
|
|
||||||
return i
|
|
||||||
|
|
||||||
def LOWU32(i):
|
|
||||||
"""Return the low-order 32 bits of an int, as a non-negative int."""
|
|
||||||
return i & 0xFFFFFFFFL
|
return i & 0xFFFFFFFFL
|
||||||
|
|
||||||
def write32(output, value):
|
|
||||||
output.write(struct.pack("<l", value))
|
|
||||||
|
|
||||||
def write32u(output, value):
|
def write32u(output, value):
|
||||||
# The L format writes the bit pattern correctly whether signed
|
# The L format writes the bit pattern correctly whether signed
|
||||||
# or unsigned.
|
# or unsigned.
|
||||||
output.write(struct.pack("<L", value))
|
output.write(struct.pack("<L", value))
|
||||||
|
|
||||||
def read32(input):
|
def read32(input):
|
||||||
return struct.unpack("<l", input.read(4))[0]
|
return struct.unpack("<I", input.read(4))[0]
|
||||||
|
|
||||||
def open(filename, mode="rb", compresslevel=9):
|
def open(filename, mode="rb", compresslevel=9):
|
||||||
"""Shorthand for GzipFile(filename, mode, compresslevel).
|
"""Shorthand for GzipFile(filename, mode, compresslevel).
|
||||||
|
@ -141,7 +129,7 @@ class GzipFile:
|
||||||
|
|
||||||
def _init_write(self, filename):
|
def _init_write(self, filename):
|
||||||
self.name = filename
|
self.name = filename
|
||||||
self.crc = zlib.crc32("")
|
self.crc = zlib.crc32("") & 0xffffffffL
|
||||||
self.size = 0
|
self.size = 0
|
||||||
self.writebuf = []
|
self.writebuf = []
|
||||||
self.bufsize = 0
|
self.bufsize = 0
|
||||||
|
@ -163,7 +151,7 @@ class GzipFile:
|
||||||
self.fileobj.write(fname + '\000')
|
self.fileobj.write(fname + '\000')
|
||||||
|
|
||||||
def _init_read(self):
|
def _init_read(self):
|
||||||
self.crc = zlib.crc32("")
|
self.crc = zlib.crc32("") & 0xffffffffL
|
||||||
self.size = 0
|
self.size = 0
|
||||||
|
|
||||||
def _read_gzip_header(self):
|
def _read_gzip_header(self):
|
||||||
|
@ -209,7 +197,7 @@ class GzipFile:
|
||||||
raise ValueError, "write() on closed GzipFile object"
|
raise ValueError, "write() on closed GzipFile object"
|
||||||
if len(data) > 0:
|
if len(data) > 0:
|
||||||
self.size = self.size + len(data)
|
self.size = self.size + len(data)
|
||||||
self.crc = zlib.crc32(data, self.crc)
|
self.crc = zlib.crc32(data, self.crc) & 0xffffffffL
|
||||||
self.fileobj.write( self.compress.compress(data) )
|
self.fileobj.write( self.compress.compress(data) )
|
||||||
self.offset += len(data)
|
self.offset += len(data)
|
||||||
|
|
||||||
|
@ -301,7 +289,7 @@ class GzipFile:
|
||||||
self._new_member = True
|
self._new_member = True
|
||||||
|
|
||||||
def _add_read_data(self, data):
|
def _add_read_data(self, data):
|
||||||
self.crc = zlib.crc32(data, self.crc)
|
self.crc = zlib.crc32(data, self.crc) & 0xffffffffL
|
||||||
self.extrabuf = self.extrabuf + data
|
self.extrabuf = self.extrabuf + data
|
||||||
self.extrasize = self.extrasize + len(data)
|
self.extrasize = self.extrasize + len(data)
|
||||||
self.size = self.size + len(data)
|
self.size = self.size + len(data)
|
||||||
|
@ -314,25 +302,19 @@ class GzipFile:
|
||||||
# stored is the true file size mod 2**32.
|
# stored is the true file size mod 2**32.
|
||||||
self.fileobj.seek(-8, 1)
|
self.fileobj.seek(-8, 1)
|
||||||
crc32 = read32(self.fileobj)
|
crc32 = read32(self.fileobj)
|
||||||
isize = U32(read32(self.fileobj)) # may exceed 2GB
|
isize = read32(self.fileobj) # may exceed 2GB
|
||||||
if U32(crc32) != U32(self.crc):
|
if crc32 != self.crc:
|
||||||
raise IOError("CRC check failed %s != %s" % (hex(U32(crc32)),
|
raise IOError("CRC check failed %s != %s" % (hex(crc32),
|
||||||
hex(U32(self.crc))))
|
hex(self.crc)))
|
||||||
elif isize != LOWU32(self.size):
|
elif isize != self.size:
|
||||||
raise IOError, "Incorrect length of data produced"
|
raise IOError, "Incorrect length of data produced"
|
||||||
|
|
||||||
def close(self):
|
def close(self):
|
||||||
if self.mode == WRITE:
|
if self.mode == WRITE:
|
||||||
self.fileobj.write(self.compress.flush())
|
self.fileobj.write(self.compress.flush())
|
||||||
# The native zlib crc is an unsigned 32-bit integer, but
|
write32u(self.fileobj, self.crc)
|
||||||
# the Python wrapper implicitly casts that to a signed C
|
|
||||||
# long. So, on a 32-bit box self.crc may "look negative",
|
|
||||||
# while the same crc on a 64-bit box may "look positive".
|
|
||||||
# To avoid irksome warnings from the `struct` module, force
|
|
||||||
# it to look positive on all boxes.
|
|
||||||
write32u(self.fileobj, LOWU32(self.crc))
|
|
||||||
# self.size may exceed 2GB, or even 4GB
|
# self.size may exceed 2GB, or even 4GB
|
||||||
write32u(self.fileobj, LOWU32(self.size))
|
write32u(self.fileobj, self.size)
|
||||||
self.fileobj = None
|
self.fileobj = None
|
||||||
elif self.mode == READ:
|
elif self.mode == READ:
|
||||||
self.fileobj = None
|
self.fileobj = None
|
||||||
|
|
Loading…
Reference in New Issue