diff --git a/Lib/gzip.py b/Lib/gzip.py index 983e0cee07f..0fa4ddfdb84 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -147,7 +147,7 @@ class GzipFile: def _init_write(self, filename): self.name = filename - self.crc = zlib.crc32("") & 0xffffffff + self.crc = zlib.crc32(b"") & 0xffffffff self.size = 0 self.writebuf = [] self.bufsize = 0 @@ -177,7 +177,7 @@ class GzipFile: self.fileobj.write(fname + b'\000') def _init_read(self): - self.crc = zlib.crc32("") & 0xffffffff + self.crc = zlib.crc32(b"") & 0xffffffff self.size = 0 def _read_gzip_header(self): diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 8eb80f88ee9..ef316b53eeb 100644 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -400,7 +400,7 @@ class _Stream: except ImportError: raise CompressionError("zlib module is not available") self.zlib = zlib - self.crc = zlib.crc32("") + self.crc = zlib.crc32(b"") if mode == "r": self._init_read_gz() else: diff --git a/Lib/test/test_zlib.py b/Lib/test/test_zlib.py index 3718ca7533f..2ea10a1dcc1 100644 --- a/Lib/test/test_zlib.py +++ b/Lib/test/test_zlib.py @@ -41,12 +41,12 @@ class ChecksumTestCase(unittest.TestCase): self.assertEqual(zlib.adler32(b"penguin"),zlib.adler32(b"penguin",1)) def test_crc32_adler32_unsigned(self): - foo = 'abcdefghijklmnop' + foo = b'abcdefghijklmnop' # explicitly test signed behavior self.assertEqual(zlib.crc32(foo), 2486878355) - self.assertEqual(zlib.crc32('spam'), 1138425661) + self.assertEqual(zlib.crc32(b'spam'), 1138425661) self.assertEqual(zlib.adler32(foo+foo), 3573550353) - self.assertEqual(zlib.adler32('spam'), 72286642) + self.assertEqual(zlib.adler32(b'spam'), 72286642) def test_same_as_binascii_crc32(self): foo = b'abcdefghijklmnop' @@ -63,7 +63,18 @@ class ExceptionTestCase(unittest.TestCase): # specifying compression level out of range causes an error # (but -1 is Z_DEFAULT_COMPRESSION and apparently the zlib # accepts 0 too) - self.assertRaises(zlib.error, zlib.compress, 'ERROR', 10) + self.assertRaises(zlib.error, zlib.compress, b'ERROR', 10) + + def test_badargs(self): + self.assertRaises(TypeError, zlib.adler32) + self.assertRaises(TypeError, zlib.crc32) + self.assertRaises(TypeError, zlib.compress) + self.assertRaises(TypeError, zlib.decompress) + for arg in (42, None, '', 'abc', (), []): + self.assertRaises(TypeError, zlib.adler32, arg) + self.assertRaises(TypeError, zlib.crc32, arg) + self.assertRaises(TypeError, zlib.compress, arg) + self.assertRaises(TypeError, zlib.decompress, arg) def test_badcompressobj(self): # verify failure on building compress object with bad params @@ -93,8 +104,9 @@ class CompressTestCase(unittest.TestCase): # compress more data data = HAMLET_SCENE * 128 x = zlib.compress(data) - self.assertEqual(zlib.decompress(x), data) - + self.assertEqual(zlib.compress(bytearray(data)), x) + for ob in x, bytearray(x): + self.assertEqual(zlib.decompress(ob), data) @@ -102,17 +114,22 @@ class CompressObjectTestCase(unittest.TestCase): # Test compression object def test_pair(self): # straightforward compress/decompress objects - data = HAMLET_SCENE * 128 - co = zlib.compressobj() - x1 = co.compress(data) - x2 = co.flush() - self.assertRaises(zlib.error, co.flush) # second flush should not work - dco = zlib.decompressobj() - y1 = dco.decompress(x1 + x2) - y2 = dco.flush() - self.assertEqual(data, y1 + y2) - self.assertTrue(isinstance(dco.unconsumed_tail, bytes)) - self.assertTrue(isinstance(dco.unused_data, bytes)) + datasrc = HAMLET_SCENE * 128 + datazip = zlib.compress(datasrc) + # should compress both bytes and bytearray data + for data in (datasrc, bytearray(datasrc)): + co = zlib.compressobj() + x1 = co.compress(data) + x2 = co.flush() + self.assertRaises(zlib.error, co.flush) # second flush should not work + self.assertEqual(x1 + x2, datazip) + for v1, v2 in ((x1, x2), (bytearray(x1), bytearray(x2))): + dco = zlib.decompressobj() + y1 = dco.decompress(v1 + v2) + y2 = dco.flush() + self.assertEqual(data, y1 + y2) + self.assertTrue(isinstance(dco.unconsumed_tail, bytes)) + self.assertTrue(isinstance(dco.unused_data, bytes)) def test_compressoptions(self): # specify lots of options to compressobj() @@ -173,7 +190,7 @@ class CompressObjectTestCase(unittest.TestCase): bufs.append(dco.flush()) else: while True: - chunk = dco.decompress('') + chunk = dco.decompress(b'') if chunk: bufs.append(chunk) else: @@ -241,7 +258,7 @@ class CompressObjectTestCase(unittest.TestCase): bufs.append(dco.flush()) else: while chunk: - chunk = dco.decompress('', max_length) + chunk = dco.decompress(b'', max_length) self.assertFalse(len(chunk) > max_length, 'chunk too big (%d>%d)' % (len(chunk),max_length)) bufs.append(chunk) @@ -253,7 +270,7 @@ class CompressObjectTestCase(unittest.TestCase): def test_maxlenmisc(self): # Misc tests of max_length dco = zlib.decompressobj() - self.assertRaises(ValueError, dco.decompress, "", -1) + self.assertRaises(ValueError, dco.decompress, b"", -1) self.assertEqual(b'', dco.unconsumed_tail) def test_flushes(self): diff --git a/Misc/NEWS b/Misc/NEWS index c69ca6ab6cc..4b5d102361c 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -58,6 +58,10 @@ Core and Builtins Library ------- +- Issue #4757: `zlib.compress` and other methods in the zlib module now + raise a TypeError when given an `str` object (rather than a `bytes`-like + object). Patch by Victor Stinner and Florent Xicluna. + - Issue #7349: Make methods of file objects in the io module accept None as an argument where file-like objects (ie StringIO and BytesIO) accept them to mean the same as passing no argument. diff --git a/Modules/zlibmodule.c b/Modules/zlibmodule.c index 6b818e54c80..2f2e214906f 100644 --- a/Modules/zlibmodule.c +++ b/Modules/zlibmodule.c @@ -107,7 +107,7 @@ PyZlib_compress(PyObject *self, PyObject *args) z_stream zst; /* require Python string object, optional 'level' arg */ - if (!PyArg_ParseTuple(args, "s*|i:compress", &pinput, &level)) + if (!PyArg_ParseTuple(args, "y*|i:compress", &pinput, &level)) return NULL; input = pinput.buf; length = pinput.len; @@ -190,7 +190,7 @@ PyZlib_decompress(PyObject *self, PyObject *args) Py_ssize_t r_strlen=DEFAULTALLOC; z_stream zst; - if (!PyArg_ParseTuple(args, "s*|in:decompress", + if (!PyArg_ParseTuple(args, "y*|in:decompress", &pinput, &wsize, &r_strlen)) return NULL; input = pinput.buf; @@ -402,7 +402,7 @@ PyZlib_objcompress(compobject *self, PyObject *args) Byte *input; unsigned long start_total_out; - if (!PyArg_ParseTuple(args, "s*:compress", &pinput)) + if (!PyArg_ParseTuple(args, "y*:compress", &pinput)) return NULL; input = pinput.buf; inplen = pinput.len; @@ -484,7 +484,7 @@ PyZlib_objdecompress(compobject *self, PyObject *args) Byte *input; unsigned long start_total_out; - if (!PyArg_ParseTuple(args, "s*|i:decompress", &pinput, + if (!PyArg_ParseTuple(args, "y*|i:decompress", &pinput, &max_length)) return NULL; input = pinput.buf; @@ -912,8 +912,8 @@ PyZlib_adler32(PyObject *self, PyObject *args) unsigned int adler32val = 1; /* adler32(0L, Z_NULL, 0) */ Py_buffer pbuf; - if (!PyArg_ParseTuple(args, "s*|I:adler32", &pbuf, &adler32val)) - return NULL; + if (!PyArg_ParseTuple(args, "y*|I:adler32", &pbuf, &adler32val)) + return NULL; /* Releasing the GIL for very small buffers is inefficient and may lower performance */ if (pbuf.len > 1024*5) { @@ -921,7 +921,7 @@ PyZlib_adler32(PyObject *self, PyObject *args) adler32val = adler32(adler32val, pbuf.buf, pbuf.len); Py_END_ALLOW_THREADS } else { - adler32val = adler32(adler32val, pbuf.buf, pbuf.len); + adler32val = adler32(adler32val, pbuf.buf, pbuf.len); } PyBuffer_Release(&pbuf); return PyLong_FromUnsignedLong(adler32val & 0xffffffffU); @@ -940,7 +940,7 @@ PyZlib_crc32(PyObject *self, PyObject *args) Py_buffer pbuf; int signed_val; - if (!PyArg_ParseTuple(args, "s*|I:crc32", &pbuf, &crc32val)) + if (!PyArg_ParseTuple(args, "y*|I:crc32", &pbuf, &crc32val)) return NULL; /* Releasing the GIL for very small buffers is inefficient and may lower performance */