Issue #22341: Drop Python 2 workaround and document CRC initial value
Also align the parameter naming in binascii to be consistent with zlib.
This commit is contained in:
parent
7dda421bff
commit
b82032f935
|
@ -110,15 +110,16 @@ The :mod:`binascii` module defines the following functions:
|
|||
possibly the last fragment).
|
||||
|
||||
|
||||
.. function:: crc_hqx(data, crc)
|
||||
.. function:: crc_hqx(data, value)
|
||||
|
||||
Compute the binhex4 crc value of *data*, starting with an initial *crc* and
|
||||
returning the result.
|
||||
Compute the binhex4 crc value of *data*, starting with *value* as the
|
||||
initial crc, and return the result.
|
||||
|
||||
|
||||
.. function:: crc32(data[, crc])
|
||||
.. function:: crc32(data[, value])
|
||||
|
||||
Compute CRC-32, the 32-bit checksum of data, starting with an initial crc. This
|
||||
Compute CRC-32, the 32-bit checksum of *data*, starting with an
|
||||
initial CRC of *value*. The default initial CRC is zero. The algorithm
|
||||
is consistent with the ZIP file checksum. Since the algorithm is designed for
|
||||
use as a checksum algorithm, it is not suitable for use as a general hash
|
||||
algorithm. Use as follows::
|
||||
|
@ -126,15 +127,13 @@ The :mod:`binascii` module defines the following functions:
|
|||
print(binascii.crc32(b"hello world"))
|
||||
# Or, in two pieces:
|
||||
crc = binascii.crc32(b"hello")
|
||||
crc = binascii.crc32(b" world", crc) & 0xffffffff
|
||||
crc = binascii.crc32(b" world", crc)
|
||||
print('crc32 = {:#010x}'.format(crc))
|
||||
|
||||
.. note::
|
||||
.. versionchanged:: 3.0
|
||||
The result is always unsigned.
|
||||
To generate the same numeric value across all Python versions and
|
||||
platforms use crc32(data) & 0xffffffff. If you are only using
|
||||
the checksum in packed binary format this is not necessary as the
|
||||
return value is the correct 32bit binary representation
|
||||
regardless of sign.
|
||||
platforms, use ``crc32(data) & 0xffffffff``.
|
||||
|
||||
|
||||
.. function:: b2a_hex(data)
|
||||
|
|
|
@ -31,22 +31,19 @@ The available exception and functions in this module are:
|
|||
.. function:: adler32(data[, value])
|
||||
|
||||
Computes an Adler-32 checksum of *data*. (An Adler-32 checksum is almost as
|
||||
reliable as a CRC32 but can be computed much more quickly.) If *value* is
|
||||
present, it is used as the starting value of the checksum; otherwise, a fixed
|
||||
default value is used. This allows computing a running checksum over the
|
||||
reliable as a CRC32 but can be computed much more quickly.) The result
|
||||
is an unsigned 32-bit integer. If *value* is present, it is used as
|
||||
the starting value of the checksum; otherwise, a default value of 1
|
||||
is used. Passing in *value* allows computing a running checksum over the
|
||||
concatenation of several inputs. The algorithm is not cryptographically
|
||||
strong, and should not be used for authentication or digital signatures. Since
|
||||
the algorithm is designed for use as a checksum algorithm, it is not suitable
|
||||
for use as a general hash algorithm.
|
||||
|
||||
Always returns an unsigned 32-bit integer.
|
||||
|
||||
.. note::
|
||||
.. versionchanged:: 3.0
|
||||
Always returns an unsigned value.
|
||||
To generate the same numeric value across all Python versions and
|
||||
platforms use adler32(data) & 0xffffffff. If you are only using
|
||||
the checksum in packed binary format this is not necessary as the
|
||||
return value is the correct 32bit binary representation
|
||||
regardless of sign.
|
||||
platforms, use ``adler32(data) & 0xffffffff``.
|
||||
|
||||
|
||||
.. function:: compress(data[, level])
|
||||
|
@ -97,23 +94,19 @@ The available exception and functions in this module are:
|
|||
single: Cyclic Redundancy Check
|
||||
single: checksum; Cyclic Redundancy Check
|
||||
|
||||
Computes a CRC (Cyclic Redundancy Check) checksum of *data*. If *value* is
|
||||
present, it is used as the starting value of the checksum; otherwise, a fixed
|
||||
default value is used. This allows computing a running checksum over the
|
||||
Computes a CRC (Cyclic Redundancy Check) checksum of *data*. The
|
||||
result is an unsigned 32-bit integer. If *value* is present, it is used
|
||||
as the starting value of the checksum; otherwise, a default value of 0
|
||||
is used. Passing in *value* allows computing a running checksum over the
|
||||
concatenation of several inputs. The algorithm is not cryptographically
|
||||
strong, and should not be used for authentication or digital signatures. Since
|
||||
the algorithm is designed for use as a checksum algorithm, it is not suitable
|
||||
for use as a general hash algorithm.
|
||||
|
||||
Always returns an unsigned 32-bit integer.
|
||||
|
||||
.. note::
|
||||
|
||||
.. versionchanged:: 3.0
|
||||
Always returns an unsigned value.
|
||||
To generate the same numeric value across all Python versions and
|
||||
platforms, use ``crc32(data) & 0xffffffff``. If you are only using
|
||||
the checksum in packed binary format this is not necessary as the
|
||||
return value is the correct 32-bit binary representation
|
||||
regardless of sign.
|
||||
platforms, use ``crc32(data) & 0xffffffff``.
|
||||
|
||||
|
||||
.. function:: decompress(data[, wbits[, bufsize]])
|
||||
|
|
|
@ -210,7 +210,7 @@ class GzipFile(_compression.BaseStream):
|
|||
|
||||
def _init_write(self, filename):
|
||||
self.name = filename
|
||||
self.crc = zlib.crc32(b"") & 0xffffffff
|
||||
self.crc = zlib.crc32(b"")
|
||||
self.size = 0
|
||||
self.writebuf = []
|
||||
self.bufsize = 0
|
||||
|
@ -261,7 +261,7 @@ class GzipFile(_compression.BaseStream):
|
|||
if length > 0:
|
||||
self.fileobj.write(self.compress.compress(data))
|
||||
self.size += length
|
||||
self.crc = zlib.crc32(data, self.crc) & 0xffffffff
|
||||
self.crc = zlib.crc32(data, self.crc)
|
||||
self.offset += length
|
||||
|
||||
return length
|
||||
|
@ -381,7 +381,7 @@ class _GzipReader(_compression.DecompressReader):
|
|||
self._last_mtime = None
|
||||
|
||||
def _init_read(self):
|
||||
self._crc = zlib.crc32(b"") & 0xffffffff
|
||||
self._crc = zlib.crc32(b"")
|
||||
self._stream_size = 0 # Decompressed size of unconcatenated stream
|
||||
|
||||
def _read_exact(self, n):
|
||||
|
@ -485,7 +485,7 @@ class _GzipReader(_compression.DecompressReader):
|
|||
return uncompress
|
||||
|
||||
def _add_read_data(self, data):
|
||||
self._crc = zlib.crc32(data, self._crc) & 0xffffffff
|
||||
self._crc = zlib.crc32(data, self._crc)
|
||||
self._stream_size = self._stream_size + len(data)
|
||||
|
||||
def _read_eof(self):
|
||||
|
|
|
@ -459,13 +459,7 @@ class _Stream:
|
|||
self.fileobj.write(self.buf)
|
||||
self.buf = b""
|
||||
if self.comptype == "gz":
|
||||
# The native zlib crc is an unsigned 32-bit integer, but
|
||||
# the Python wrapper implicitly casts that to a signed C
|
||||
# long. So, on a 32-bit box self.crc may "look negative",
|
||||
# while the same crc on a 64-bit box may "look positive".
|
||||
# To avoid irksome warnings from the `struct` module, force
|
||||
# it to look positive on all boxes.
|
||||
self.fileobj.write(struct.pack("<L", self.crc & 0xffffffff))
|
||||
self.fileobj.write(struct.pack("<L", self.crc))
|
||||
self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
|
||||
finally:
|
||||
if not self._extfileobj:
|
||||
|
|
|
@ -47,16 +47,11 @@ class ChecksumTestCase(unittest.TestCase):
|
|||
self.assertEqual(zlib.adler32(b"", 1), 1)
|
||||
self.assertEqual(zlib.adler32(b"", 432), 432)
|
||||
|
||||
def assertEqual32(self, seen, expected):
|
||||
# 32-bit values masked -- checksums on 32- vs 64- bit machines
|
||||
# This is important if bit 31 (0x08000000L) is set.
|
||||
self.assertEqual(seen & 0x0FFFFFFFF, expected & 0x0FFFFFFFF)
|
||||
|
||||
def test_penguins(self):
|
||||
self.assertEqual32(zlib.crc32(b"penguin", 0), 0x0e5c1a120)
|
||||
self.assertEqual32(zlib.crc32(b"penguin", 1), 0x43b6aa94)
|
||||
self.assertEqual32(zlib.adler32(b"penguin", 0), 0x0bcf02f6)
|
||||
self.assertEqual32(zlib.adler32(b"penguin", 1), 0x0bd602f7)
|
||||
self.assertEqual(zlib.crc32(b"penguin", 0), 0x0e5c1a120)
|
||||
self.assertEqual(zlib.crc32(b"penguin", 1), 0x43b6aa94)
|
||||
self.assertEqual(zlib.adler32(b"penguin", 0), 0x0bcf02f6)
|
||||
self.assertEqual(zlib.adler32(b"penguin", 1), 0x0bd602f7)
|
||||
|
||||
self.assertEqual(zlib.crc32(b"penguin"), zlib.crc32(b"penguin", 0))
|
||||
self.assertEqual(zlib.adler32(b"penguin"),zlib.adler32(b"penguin",1))
|
||||
|
|
|
@ -734,7 +734,7 @@ class ZipExtFile(io.BufferedIOBase):
|
|||
|
||||
if hasattr(zipinfo, 'CRC'):
|
||||
self._expected_crc = zipinfo.CRC
|
||||
self._running_crc = crc32(b'') & 0xffffffff
|
||||
self._running_crc = crc32(b'')
|
||||
else:
|
||||
self._expected_crc = None
|
||||
|
||||
|
@ -856,7 +856,7 @@ class ZipExtFile(io.BufferedIOBase):
|
|||
if self._expected_crc is None:
|
||||
# No need to compute the CRC if we don't have a reference value
|
||||
return
|
||||
self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
|
||||
self._running_crc = crc32(newdata, self._running_crc)
|
||||
# Check the CRC if we're at the end of the file
|
||||
if self._eof and self._running_crc != self._expected_crc:
|
||||
raise BadZipFile("Bad CRC-32 for file %r" % self.name)
|
||||
|
@ -1492,7 +1492,7 @@ class ZipFile:
|
|||
if not buf:
|
||||
break
|
||||
file_size = file_size + len(buf)
|
||||
CRC = crc32(buf, CRC) & 0xffffffff
|
||||
CRC = crc32(buf, CRC)
|
||||
if cmpr:
|
||||
buf = cmpr.compress(buf)
|
||||
compress_size = compress_size + len(buf)
|
||||
|
@ -1567,7 +1567,7 @@ class ZipFile:
|
|||
|
||||
self._writecheck(zinfo)
|
||||
self._didModify = True
|
||||
zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum
|
||||
zinfo.CRC = crc32(data) # CRC-32 checksum
|
||||
co = _get_compressor(zinfo.compress_type)
|
||||
if co:
|
||||
data = co.compress(data) + co.flush()
|
||||
|
|
Loading…
Reference in New Issue