Issue #22341: Drop Python 2 workaround and document CRC initial value
Also align the parameter naming in binascii to be consistent with zlib.
This commit is contained in:
parent
7dda421bff
commit
b82032f935
|
@ -110,15 +110,16 @@ The :mod:`binascii` module defines the following functions:
|
||||||
possibly the last fragment).
|
possibly the last fragment).
|
||||||
|
|
||||||
|
|
||||||
.. function:: crc_hqx(data, crc)
|
.. function:: crc_hqx(data, value)
|
||||||
|
|
||||||
Compute the binhex4 crc value of *data*, starting with an initial *crc* and
|
Compute the binhex4 crc value of *data*, starting with *value* as the
|
||||||
returning the result.
|
initial crc, and return the result.
|
||||||
|
|
||||||
|
|
||||||
.. function:: crc32(data[, crc])
|
.. function:: crc32(data[, value])
|
||||||
|
|
||||||
Compute CRC-32, the 32-bit checksum of data, starting with an initial crc. This
|
Compute CRC-32, the 32-bit checksum of *data*, starting with an
|
||||||
|
initial CRC of *value*. The default initial CRC is zero. The algorithm
|
||||||
is consistent with the ZIP file checksum. Since the algorithm is designed for
|
is consistent with the ZIP file checksum. Since the algorithm is designed for
|
||||||
use as a checksum algorithm, it is not suitable for use as a general hash
|
use as a checksum algorithm, it is not suitable for use as a general hash
|
||||||
algorithm. Use as follows::
|
algorithm. Use as follows::
|
||||||
|
@ -126,15 +127,13 @@ The :mod:`binascii` module defines the following functions:
|
||||||
print(binascii.crc32(b"hello world"))
|
print(binascii.crc32(b"hello world"))
|
||||||
# Or, in two pieces:
|
# Or, in two pieces:
|
||||||
crc = binascii.crc32(b"hello")
|
crc = binascii.crc32(b"hello")
|
||||||
crc = binascii.crc32(b" world", crc) & 0xffffffff
|
crc = binascii.crc32(b" world", crc)
|
||||||
print('crc32 = {:#010x}'.format(crc))
|
print('crc32 = {:#010x}'.format(crc))
|
||||||
|
|
||||||
.. note::
|
.. versionchanged:: 3.0
|
||||||
To generate the same numeric value across all Python versions and
|
The result is always unsigned.
|
||||||
platforms use crc32(data) & 0xffffffff. If you are only using
|
To generate the same numeric value across all Python versions and
|
||||||
the checksum in packed binary format this is not necessary as the
|
platforms, use ``crc32(data) & 0xffffffff``.
|
||||||
return value is the correct 32bit binary representation
|
|
||||||
regardless of sign.
|
|
||||||
|
|
||||||
|
|
||||||
.. function:: b2a_hex(data)
|
.. function:: b2a_hex(data)
|
||||||
|
|
|
@ -31,22 +31,19 @@ The available exception and functions in this module are:
|
||||||
.. function:: adler32(data[, value])
|
.. function:: adler32(data[, value])
|
||||||
|
|
||||||
Computes an Adler-32 checksum of *data*. (An Adler-32 checksum is almost as
|
Computes an Adler-32 checksum of *data*. (An Adler-32 checksum is almost as
|
||||||
reliable as a CRC32 but can be computed much more quickly.) If *value* is
|
reliable as a CRC32 but can be computed much more quickly.) The result
|
||||||
present, it is used as the starting value of the checksum; otherwise, a fixed
|
is an unsigned 32-bit integer. If *value* is present, it is used as
|
||||||
default value is used. This allows computing a running checksum over the
|
the starting value of the checksum; otherwise, a default value of 1
|
||||||
|
is used. Passing in *value* allows computing a running checksum over the
|
||||||
concatenation of several inputs. The algorithm is not cryptographically
|
concatenation of several inputs. The algorithm is not cryptographically
|
||||||
strong, and should not be used for authentication or digital signatures. Since
|
strong, and should not be used for authentication or digital signatures. Since
|
||||||
the algorithm is designed for use as a checksum algorithm, it is not suitable
|
the algorithm is designed for use as a checksum algorithm, it is not suitable
|
||||||
for use as a general hash algorithm.
|
for use as a general hash algorithm.
|
||||||
|
|
||||||
Always returns an unsigned 32-bit integer.
|
.. versionchanged:: 3.0
|
||||||
|
Always returns an unsigned value.
|
||||||
.. note::
|
To generate the same numeric value across all Python versions and
|
||||||
To generate the same numeric value across all Python versions and
|
platforms, use ``adler32(data) & 0xffffffff``.
|
||||||
platforms use adler32(data) & 0xffffffff. If you are only using
|
|
||||||
the checksum in packed binary format this is not necessary as the
|
|
||||||
return value is the correct 32bit binary representation
|
|
||||||
regardless of sign.
|
|
||||||
|
|
||||||
|
|
||||||
.. function:: compress(data[, level])
|
.. function:: compress(data[, level])
|
||||||
|
@ -97,23 +94,19 @@ The available exception and functions in this module are:
|
||||||
single: Cyclic Redundancy Check
|
single: Cyclic Redundancy Check
|
||||||
single: checksum; Cyclic Redundancy Check
|
single: checksum; Cyclic Redundancy Check
|
||||||
|
|
||||||
Computes a CRC (Cyclic Redundancy Check) checksum of *data*. If *value* is
|
Computes a CRC (Cyclic Redundancy Check) checksum of *data*. The
|
||||||
present, it is used as the starting value of the checksum; otherwise, a fixed
|
result is an unsigned 32-bit integer. If *value* is present, it is used
|
||||||
default value is used. This allows computing a running checksum over the
|
as the starting value of the checksum; otherwise, a default value of 0
|
||||||
|
is used. Passing in *value* allows computing a running checksum over the
|
||||||
concatenation of several inputs. The algorithm is not cryptographically
|
concatenation of several inputs. The algorithm is not cryptographically
|
||||||
strong, and should not be used for authentication or digital signatures. Since
|
strong, and should not be used for authentication or digital signatures. Since
|
||||||
the algorithm is designed for use as a checksum algorithm, it is not suitable
|
the algorithm is designed for use as a checksum algorithm, it is not suitable
|
||||||
for use as a general hash algorithm.
|
for use as a general hash algorithm.
|
||||||
|
|
||||||
Always returns an unsigned 32-bit integer.
|
.. versionchanged:: 3.0
|
||||||
|
Always returns an unsigned value.
|
||||||
.. note::
|
|
||||||
|
|
||||||
To generate the same numeric value across all Python versions and
|
To generate the same numeric value across all Python versions and
|
||||||
platforms, use ``crc32(data) & 0xffffffff``. If you are only using
|
platforms, use ``crc32(data) & 0xffffffff``.
|
||||||
the checksum in packed binary format this is not necessary as the
|
|
||||||
return value is the correct 32-bit binary representation
|
|
||||||
regardless of sign.
|
|
||||||
|
|
||||||
|
|
||||||
.. function:: decompress(data[, wbits[, bufsize]])
|
.. function:: decompress(data[, wbits[, bufsize]])
|
||||||
|
|
|
@ -210,7 +210,7 @@ class GzipFile(_compression.BaseStream):
|
||||||
|
|
||||||
def _init_write(self, filename):
|
def _init_write(self, filename):
|
||||||
self.name = filename
|
self.name = filename
|
||||||
self.crc = zlib.crc32(b"") & 0xffffffff
|
self.crc = zlib.crc32(b"")
|
||||||
self.size = 0
|
self.size = 0
|
||||||
self.writebuf = []
|
self.writebuf = []
|
||||||
self.bufsize = 0
|
self.bufsize = 0
|
||||||
|
@ -261,7 +261,7 @@ class GzipFile(_compression.BaseStream):
|
||||||
if length > 0:
|
if length > 0:
|
||||||
self.fileobj.write(self.compress.compress(data))
|
self.fileobj.write(self.compress.compress(data))
|
||||||
self.size += length
|
self.size += length
|
||||||
self.crc = zlib.crc32(data, self.crc) & 0xffffffff
|
self.crc = zlib.crc32(data, self.crc)
|
||||||
self.offset += length
|
self.offset += length
|
||||||
|
|
||||||
return length
|
return length
|
||||||
|
@ -381,7 +381,7 @@ class _GzipReader(_compression.DecompressReader):
|
||||||
self._last_mtime = None
|
self._last_mtime = None
|
||||||
|
|
||||||
def _init_read(self):
|
def _init_read(self):
|
||||||
self._crc = zlib.crc32(b"") & 0xffffffff
|
self._crc = zlib.crc32(b"")
|
||||||
self._stream_size = 0 # Decompressed size of unconcatenated stream
|
self._stream_size = 0 # Decompressed size of unconcatenated stream
|
||||||
|
|
||||||
def _read_exact(self, n):
|
def _read_exact(self, n):
|
||||||
|
@ -485,7 +485,7 @@ class _GzipReader(_compression.DecompressReader):
|
||||||
return uncompress
|
return uncompress
|
||||||
|
|
||||||
def _add_read_data(self, data):
|
def _add_read_data(self, data):
|
||||||
self._crc = zlib.crc32(data, self._crc) & 0xffffffff
|
self._crc = zlib.crc32(data, self._crc)
|
||||||
self._stream_size = self._stream_size + len(data)
|
self._stream_size = self._stream_size + len(data)
|
||||||
|
|
||||||
def _read_eof(self):
|
def _read_eof(self):
|
||||||
|
|
|
@ -459,13 +459,7 @@ class _Stream:
|
||||||
self.fileobj.write(self.buf)
|
self.fileobj.write(self.buf)
|
||||||
self.buf = b""
|
self.buf = b""
|
||||||
if self.comptype == "gz":
|
if self.comptype == "gz":
|
||||||
# The native zlib crc is an unsigned 32-bit integer, but
|
self.fileobj.write(struct.pack("<L", self.crc))
|
||||||
# the Python wrapper implicitly casts that to a signed C
|
|
||||||
# long. So, on a 32-bit box self.crc may "look negative",
|
|
||||||
# while the same crc on a 64-bit box may "look positive".
|
|
||||||
# To avoid irksome warnings from the `struct` module, force
|
|
||||||
# it to look positive on all boxes.
|
|
||||||
self.fileobj.write(struct.pack("<L", self.crc & 0xffffffff))
|
|
||||||
self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
|
self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
|
||||||
finally:
|
finally:
|
||||||
if not self._extfileobj:
|
if not self._extfileobj:
|
||||||
|
|
|
@ -47,16 +47,11 @@ class ChecksumTestCase(unittest.TestCase):
|
||||||
self.assertEqual(zlib.adler32(b"", 1), 1)
|
self.assertEqual(zlib.adler32(b"", 1), 1)
|
||||||
self.assertEqual(zlib.adler32(b"", 432), 432)
|
self.assertEqual(zlib.adler32(b"", 432), 432)
|
||||||
|
|
||||||
def assertEqual32(self, seen, expected):
|
|
||||||
# 32-bit values masked -- checksums on 32- vs 64- bit machines
|
|
||||||
# This is important if bit 31 (0x08000000L) is set.
|
|
||||||
self.assertEqual(seen & 0x0FFFFFFFF, expected & 0x0FFFFFFFF)
|
|
||||||
|
|
||||||
def test_penguins(self):
|
def test_penguins(self):
|
||||||
self.assertEqual32(zlib.crc32(b"penguin", 0), 0x0e5c1a120)
|
self.assertEqual(zlib.crc32(b"penguin", 0), 0x0e5c1a120)
|
||||||
self.assertEqual32(zlib.crc32(b"penguin", 1), 0x43b6aa94)
|
self.assertEqual(zlib.crc32(b"penguin", 1), 0x43b6aa94)
|
||||||
self.assertEqual32(zlib.adler32(b"penguin", 0), 0x0bcf02f6)
|
self.assertEqual(zlib.adler32(b"penguin", 0), 0x0bcf02f6)
|
||||||
self.assertEqual32(zlib.adler32(b"penguin", 1), 0x0bd602f7)
|
self.assertEqual(zlib.adler32(b"penguin", 1), 0x0bd602f7)
|
||||||
|
|
||||||
self.assertEqual(zlib.crc32(b"penguin"), zlib.crc32(b"penguin", 0))
|
self.assertEqual(zlib.crc32(b"penguin"), zlib.crc32(b"penguin", 0))
|
||||||
self.assertEqual(zlib.adler32(b"penguin"),zlib.adler32(b"penguin",1))
|
self.assertEqual(zlib.adler32(b"penguin"),zlib.adler32(b"penguin",1))
|
||||||
|
|
|
@ -734,7 +734,7 @@ class ZipExtFile(io.BufferedIOBase):
|
||||||
|
|
||||||
if hasattr(zipinfo, 'CRC'):
|
if hasattr(zipinfo, 'CRC'):
|
||||||
self._expected_crc = zipinfo.CRC
|
self._expected_crc = zipinfo.CRC
|
||||||
self._running_crc = crc32(b'') & 0xffffffff
|
self._running_crc = crc32(b'')
|
||||||
else:
|
else:
|
||||||
self._expected_crc = None
|
self._expected_crc = None
|
||||||
|
|
||||||
|
@ -856,7 +856,7 @@ class ZipExtFile(io.BufferedIOBase):
|
||||||
if self._expected_crc is None:
|
if self._expected_crc is None:
|
||||||
# No need to compute the CRC if we don't have a reference value
|
# No need to compute the CRC if we don't have a reference value
|
||||||
return
|
return
|
||||||
self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
|
self._running_crc = crc32(newdata, self._running_crc)
|
||||||
# Check the CRC if we're at the end of the file
|
# Check the CRC if we're at the end of the file
|
||||||
if self._eof and self._running_crc != self._expected_crc:
|
if self._eof and self._running_crc != self._expected_crc:
|
||||||
raise BadZipFile("Bad CRC-32 for file %r" % self.name)
|
raise BadZipFile("Bad CRC-32 for file %r" % self.name)
|
||||||
|
@ -1492,7 +1492,7 @@ class ZipFile:
|
||||||
if not buf:
|
if not buf:
|
||||||
break
|
break
|
||||||
file_size = file_size + len(buf)
|
file_size = file_size + len(buf)
|
||||||
CRC = crc32(buf, CRC) & 0xffffffff
|
CRC = crc32(buf, CRC)
|
||||||
if cmpr:
|
if cmpr:
|
||||||
buf = cmpr.compress(buf)
|
buf = cmpr.compress(buf)
|
||||||
compress_size = compress_size + len(buf)
|
compress_size = compress_size + len(buf)
|
||||||
|
@ -1567,7 +1567,7 @@ class ZipFile:
|
||||||
|
|
||||||
self._writecheck(zinfo)
|
self._writecheck(zinfo)
|
||||||
self._didModify = True
|
self._didModify = True
|
||||||
zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum
|
zinfo.CRC = crc32(data) # CRC-32 checksum
|
||||||
co = _get_compressor(zinfo.compress_type)
|
co = _get_compressor(zinfo.compress_type)
|
||||||
if co:
|
if co:
|
||||||
data = co.compress(data) + co.flush()
|
data = co.compress(data) + co.flush()
|
||||||
|
|
Loading…
Reference in New Issue