From 84544c1020241afdd0422449d621ce9b762cb6d1 Mon Sep 17 00:00:00 2001 From: Martin Panter Date: Sat, 23 Jul 2016 03:02:07 +0000 Subject: [PATCH] =?UTF-8?q?Issue=20#27130:=20Fix=20handling=20of=20buffers?= =?UTF-8?q?=20exceeding=20UINT=5FMAX=20in=20=E2=80=9Czlib=E2=80=9D=20modul?= =?UTF-8?q?e?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch by Xiang Zhang. --- Doc/howto/clinic.rst | 12 +- Lib/test/test_zlib.py | 64 ++- Misc/NEWS | 5 + Modules/clinic/zlibmodule.c.h | 20 +- Modules/zlibmodule.c | 747 +++++++++++++++++----------------- 5 files changed, 433 insertions(+), 415 deletions(-) diff --git a/Doc/howto/clinic.rst b/Doc/howto/clinic.rst index b04edea1897..7f275a4659f 100644 --- a/Doc/howto/clinic.rst +++ b/Doc/howto/clinic.rst @@ -1249,17 +1249,17 @@ Here's the simplest example of a custom converter, from ``Modules/zlibmodule.c`` /*[python input] - class capped_uint_converter(CConverter): - type = 'unsigned int' - converter = 'capped_uint_converter' + class ssize_t_converter(CConverter): + type = 'Py_ssize_t' + converter = 'ssize_t_converter' [python start generated code]*/ /*[python end generated code: output=da39a3ee5e6b4b0d input=35521e4e733823c7]*/ -This block adds a converter to Argument Clinic named ``capped_uint``. Parameters -declared as ``capped_uint`` will be declared as type ``unsigned int``, and will +This block adds a converter to Argument Clinic named ``ssize_t``. Parameters +declared as ``ssize_t`` will be declared as type ``Py_ssize_t``, and will be parsed by the ``'O&'`` format unit, which will call the -``capped_uint_converter`` converter function. ``capped_uint`` variables +``ssize_t_converter`` converter function. ``ssize_t`` variables automatically support default values. More sophisticated custom converters can insert custom C code to diff --git a/Lib/test/test_zlib.py b/Lib/test/test_zlib.py index 78ecade950a..6fea893993b 100644 --- a/Lib/test/test_zlib.py +++ b/Lib/test/test_zlib.py @@ -121,6 +121,8 @@ class ExceptionTestCase(unittest.TestCase): def test_overflow(self): with self.assertRaisesRegex(OverflowError, 'int too large'): zlib.decompress(b'', 15, sys.maxsize + 1) + with self.assertRaisesRegex(OverflowError, 'int too large'): + zlib.decompressobj().decompress(b'', sys.maxsize + 1) with self.assertRaisesRegex(OverflowError, 'int too large'): zlib.decompressobj().flush(sys.maxsize + 1) @@ -188,15 +190,6 @@ class CompressTestCase(BaseCompressTestCase, unittest.TestCase): def test_big_decompress_buffer(self, size): self.check_big_decompress_buffer(size, zlib.decompress) - @bigmemtest(size=_4G + 100, memuse=1, dry_run=False) - def test_length_overflow(self, size): - data = b'x' * size - try: - self.assertRaises(OverflowError, zlib.compress, data, 1) - self.assertRaises(OverflowError, zlib.decompress, data) - finally: - data = None - @bigmemtest(size=_4G, memuse=1) def test_large_bufsize(self, size): # Test decompress(bufsize) parameter greater than the internal limit @@ -209,6 +202,16 @@ class CompressTestCase(BaseCompressTestCase, unittest.TestCase): compressed = zlib.compress(data, 1) self.assertEqual(zlib.decompress(compressed, 15, CustomInt()), data) + @unittest.skipUnless(sys.maxsize > 2**32, 'requires 64bit platform') + @bigmemtest(size=_4G + 100, memuse=4) + def test_64bit_compress(self, size): + data = b'x' * size + try: + comp = zlib.compress(data, 0) + self.assertEqual(zlib.decompress(comp), data) + finally: + comp = data = None + class CompressObjectTestCase(BaseCompressTestCase, unittest.TestCase): # Test compression object @@ -678,16 +681,45 @@ class CompressObjectTestCase(BaseCompressTestCase, unittest.TestCase): decompress = lambda s: d.decompress(s) + d.flush() self.check_big_decompress_buffer(size, decompress) - @bigmemtest(size=_4G + 100, memuse=1, dry_run=False) - def test_length_overflow(self, size): + @unittest.skipUnless(sys.maxsize > 2**32, 'requires 64bit platform') + @bigmemtest(size=_4G + 100, memuse=4) + def test_64bit_compress(self, size): data = b'x' * size - c = zlib.compressobj(1) - d = zlib.decompressobj() + co = zlib.compressobj(0) + do = zlib.decompressobj() try: - self.assertRaises(OverflowError, c.compress, data) - self.assertRaises(OverflowError, d.decompress, data) + comp = co.compress(data) + co.flush() + uncomp = do.decompress(comp) + do.flush() + self.assertEqual(uncomp, data) finally: - data = None + comp = uncomp = data = None + + @unittest.skipUnless(sys.maxsize > 2**32, 'requires 64bit platform') + @bigmemtest(size=_4G + 100, memuse=3) + def test_large_unused_data(self, size): + data = b'abcdefghijklmnop' + unused = b'x' * size + comp = zlib.compress(data) + unused + do = zlib.decompressobj() + try: + uncomp = do.decompress(comp) + do.flush() + self.assertEqual(unused, do.unused_data) + self.assertEqual(uncomp, data) + finally: + unused = comp = do = None + + @unittest.skipUnless(sys.maxsize > 2**32, 'requires 64bit platform') + @bigmemtest(size=_4G + 100, memuse=5) + def test_large_unconsumed_tail(self, size): + data = b'x' * size + do = zlib.decompressobj() + try: + comp = zlib.compress(data, 0) + uncomp = do.decompress(comp, 1) + do.flush() + self.assertEqual(uncomp, data) + self.assertEqual(do.unconsumed_tail, b'') + finally: + comp = uncomp = data = None def test_wbits(self): # wbits=0 only supported since zlib v1.2.3.5 diff --git a/Misc/NEWS b/Misc/NEWS index 115182434b6..921c89a5ff6 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -31,6 +31,11 @@ Core and Builtins Library ------- +- Issue #27130: In the "zlib" module, fix handling of large buffers + (typically 4 GiB) when compressing and decompressing. Previously, inputs + were limited to 4 GiB, and compression and decompression operations did not + properly handle results of 4 GiB. + - Issue #27533: Release GIL in nt._isdir - Issue #17711: Fixed unpickling by the persistent ID with protocol 0. diff --git a/Modules/clinic/zlibmodule.c.h b/Modules/clinic/zlibmodule.c.h index 71da0273a70..b1af7ce57ca 100644 --- a/Modules/clinic/zlibmodule.c.h +++ b/Modules/clinic/zlibmodule.c.h @@ -57,7 +57,7 @@ PyDoc_STRVAR(zlib_decompress__doc__, static PyObject * zlib_decompress_impl(PyObject *module, Py_buffer *data, int wbits, - unsigned int bufsize); + Py_ssize_t bufsize); static PyObject * zlib_decompress(PyObject *module, PyObject *args) @@ -65,10 +65,10 @@ zlib_decompress(PyObject *module, PyObject *args) PyObject *return_value = NULL; Py_buffer data = {NULL, NULL}; int wbits = MAX_WBITS; - unsigned int bufsize = DEF_BUF_SIZE; + Py_ssize_t bufsize = DEF_BUF_SIZE; if (!PyArg_ParseTuple(args, "y*|iO&:decompress", - &data, &wbits, capped_uint_converter, &bufsize)) + &data, &wbits, ssize_t_converter, &bufsize)) goto exit; return_value = zlib_decompress_impl(module, &data, wbits, bufsize); @@ -236,17 +236,17 @@ PyDoc_STRVAR(zlib_Decompress_decompress__doc__, static PyObject * zlib_Decompress_decompress_impl(compobject *self, Py_buffer *data, - unsigned int max_length); + Py_ssize_t max_length); static PyObject * zlib_Decompress_decompress(compobject *self, PyObject *args) { PyObject *return_value = NULL; Py_buffer data = {NULL, NULL}; - unsigned int max_length = 0; + Py_ssize_t max_length = 0; if (!PyArg_ParseTuple(args, "y*|O&:decompress", - &data, capped_uint_converter, &max_length)) + &data, ssize_t_converter, &max_length)) goto exit; return_value = zlib_Decompress_decompress_impl(self, &data, max_length); @@ -348,16 +348,16 @@ PyDoc_STRVAR(zlib_Decompress_flush__doc__, {"flush", (PyCFunction)zlib_Decompress_flush, METH_VARARGS, zlib_Decompress_flush__doc__}, static PyObject * -zlib_Decompress_flush_impl(compobject *self, unsigned int length); +zlib_Decompress_flush_impl(compobject *self, Py_ssize_t length); static PyObject * zlib_Decompress_flush(compobject *self, PyObject *args) { PyObject *return_value = NULL; - unsigned int length = DEF_BUF_SIZE; + Py_ssize_t length = DEF_BUF_SIZE; if (!PyArg_ParseTuple(args, "|O&:flush", - capped_uint_converter, &length)) + ssize_t_converter, &length)) goto exit; return_value = zlib_Decompress_flush_impl(self, length); @@ -442,4 +442,4 @@ exit: #ifndef ZLIB_COMPRESS_COPY_METHODDEF #define ZLIB_COMPRESS_COPY_METHODDEF #endif /* !defined(ZLIB_COMPRESS_COPY_METHODDEF) */ -/*[clinic end generated code: output=8545565b1a1822de input=a9049054013a1b77]*/ +/*[clinic end generated code: output=7711ef02d1d5776c input=a9049054013a1b77]*/ diff --git a/Modules/zlibmodule.c b/Modules/zlibmodule.c index ab5b65d264d..fccb6163e80 100644 --- a/Modules/zlibmodule.c +++ b/Modules/zlibmodule.c @@ -1,5 +1,5 @@ /* zlibmodule.c -- gzip-compatible data compression */ -/* See http://www.gzip.org/zlib/ */ +/* See http://zlib.net/ */ /* Windows users: read Python's PCbuild\readme.txt */ @@ -23,7 +23,7 @@ #endif #if defined(ZLIB_VERNUM) && ZLIB_VERNUM >= 0x1221 -#define AT_LEAST_ZLIB_1_2_2_1 +# define AT_LEAST_ZLIB_1_2_2_1 #endif /* The following parameters are copied from zutil.h, version 0.95 */ @@ -115,6 +115,7 @@ newcompobject(PyTypeObject *type) #ifdef WITH_THREAD self->lock = PyThread_allocate_lock(); if (self->lock == NULL) { + Py_DECREF(self); PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock"); return NULL; } @@ -138,6 +139,63 @@ PyZlib_Free(voidpf ctx, void *ptr) PyMem_RawFree(ptr); } +static void +arrange_input_buffer(z_stream *zst, Py_ssize_t *remains) +{ + zst->avail_in = Py_MIN((size_t)*remains, UINT_MAX); + *remains -= zst->avail_in; +} + +static Py_ssize_t +arrange_output_buffer_with_maximum(z_stream *zst, PyObject **buffer, + Py_ssize_t length, + Py_ssize_t max_length) +{ + Py_ssize_t occupied; + + if (*buffer == NULL) { + if (!(*buffer = PyBytes_FromStringAndSize(NULL, length))) + return -1; + occupied = 0; + } + else { + occupied = zst->next_out - (Byte *)PyBytes_AS_STRING(*buffer); + + if (length == occupied) { + Py_ssize_t new_length; + assert(length <= max_length); + /* can not scale the buffer over max_length */ + if (length == max_length) + return -2; + if (length <= (max_length >> 1)) + new_length = length << 1; + else + new_length = max_length; + if (_PyBytes_Resize(buffer, new_length) < 0) + return -1; + length = new_length; + } + } + + zst->avail_out = Py_MIN((size_t)(length - occupied), UINT_MAX); + zst->next_out = (Byte *)PyBytes_AS_STRING(*buffer) + occupied; + + return length; +} + +static Py_ssize_t +arrange_output_buffer(z_stream *zst, PyObject **buffer, Py_ssize_t length) +{ + Py_ssize_t ret; + + ret = arrange_output_buffer_with_maximum(zst, buffer, length, + PY_SSIZE_T_MAX); + if (ret == -2) + PyErr_NoMemory(); + + return ret; +} + /*[clinic input] zlib.compress @@ -154,50 +212,30 @@ static PyObject * zlib_compress_impl(PyObject *module, Py_buffer *bytes, int level) /*[clinic end generated code: output=ae64c2c3076321a0 input=be3abe9934bda4b3]*/ { - PyObject *ReturnVal = NULL; - Byte *input, *output = NULL; - unsigned int length; - int err; + PyObject *RetVal = NULL; + Byte *ibuf; + Py_ssize_t ibuflen, obuflen = DEF_BUF_SIZE; + int err, flush; z_stream zst; - if ((size_t)bytes->len > UINT_MAX) { - PyErr_SetString(PyExc_OverflowError, - "Size does not fit in an unsigned int"); - goto error; - } - input = bytes->buf; - length = (unsigned int)bytes->len; - - zst.avail_out = length + length/1000 + 12 + 1; - - output = (Byte*)PyMem_Malloc(zst.avail_out); - if (output == NULL) { - PyErr_SetString(PyExc_MemoryError, - "Can't allocate memory to compress data"); - goto error; - } - - /* Past the point of no return. From here on out, we need to make sure - we clean up mallocs & INCREFs. */ + ibuf = bytes->buf; + ibuflen = bytes->len; zst.opaque = NULL; zst.zalloc = PyZlib_Malloc; zst.zfree = PyZlib_Free; - zst.next_out = (Byte *)output; - zst.next_in = (Byte *)input; - zst.avail_in = length; + zst.next_in = ibuf; err = deflateInit(&zst, level); - switch(err) { - case(Z_OK): + switch (err) { + case Z_OK: break; - case(Z_MEM_ERROR): + case Z_MEM_ERROR: PyErr_SetString(PyExc_MemoryError, "Out of memory while compressing data"); goto error; - case(Z_STREAM_ERROR): - PyErr_SetString(ZlibError, - "Bad compression level"); + case Z_STREAM_ERROR: + PyErr_SetString(ZlibError, "Bad compression level"); goto error; default: deflateEnd(&zst); @@ -205,41 +243,59 @@ zlib_compress_impl(PyObject *module, Py_buffer *bytes, int level) goto error; } - Py_BEGIN_ALLOW_THREADS; - err = deflate(&zst, Z_FINISH); - Py_END_ALLOW_THREADS; + do { + arrange_input_buffer(&zst, &ibuflen); + flush = ibuflen == 0 ? Z_FINISH : Z_NO_FLUSH; - if (err != Z_STREAM_END) { - zlib_error(zst, err, "while compressing data"); - deflateEnd(&zst); - goto error; + do { + obuflen = arrange_output_buffer(&zst, &RetVal, obuflen); + if (obuflen < 0) { + deflateEnd(&zst); + goto error; + } + + Py_BEGIN_ALLOW_THREADS + err = deflate(&zst, flush); + Py_END_ALLOW_THREADS + + if (err == Z_STREAM_ERROR) { + deflateEnd(&zst); + zlib_error(zst, err, "while compressing data"); + goto error; + } + + } while (zst.avail_out == 0); + assert(zst.avail_in == 0); + + } while (flush != Z_FINISH); + assert(err == Z_STREAM_END); + + err = deflateEnd(&zst); + if (err == Z_OK) { + if (_PyBytes_Resize(&RetVal, zst.next_out - + (Byte *)PyBytes_AS_STRING(RetVal)) < 0) + goto error; + return RetVal; } - - err=deflateEnd(&zst); - if (err == Z_OK) - ReturnVal = PyBytes_FromStringAndSize((char *)output, - zst.total_out); else zlib_error(zst, err, "while finishing compression"); - error: - PyMem_Free(output); - - return ReturnVal; + Py_XDECREF(RetVal); + return NULL; } /*[python input] -class capped_uint_converter(CConverter): - type = 'unsigned int' - converter = 'capped_uint_converter' +class ssize_t_converter(CConverter): + type = 'Py_ssize_t' + converter = 'ssize_t_converter' c_ignored_default = "0" [python start generated code]*/ -/*[python end generated code: output=da39a3ee5e6b4b0d input=35521e4e733823c7]*/ +/*[python end generated code: output=da39a3ee5e6b4b0d input=5f34ba1b394cb8e7]*/ static int -capped_uint_converter(PyObject *obj, void *ptr) +ssize_t_converter(PyObject *obj, void *ptr) { PyObject *long_obj; Py_ssize_t val; @@ -253,19 +309,7 @@ capped_uint_converter(PyObject *obj, void *ptr) if (val == -1 && PyErr_Occurred()) { return 0; } - if (val < 0) { - PyErr_SetString(PyExc_ValueError, - "value must be positive"); - return 0; - } - - if ((size_t)val > UINT_MAX) { - *(unsigned int *)ptr = UINT_MAX; - } - else { - *(unsigned int *)ptr = Py_SAFE_DOWNCAST(val, Py_ssize_t, - unsigned int); - } + *(Py_ssize_t *)ptr = val; return 1; } @@ -276,7 +320,7 @@ zlib.decompress Compressed data. wbits: int(c_default="MAX_WBITS") = MAX_WBITS The window buffer size and container format. - bufsize: capped_uint(c_default="DEF_BUF_SIZE") = DEF_BUF_SIZE + bufsize: ssize_t(c_default="DEF_BUF_SIZE") = DEF_BUF_SIZE The initial output buffer size. / @@ -285,44 +329,36 @@ Returns a bytes object containing the uncompressed data. static PyObject * zlib_decompress_impl(PyObject *module, Py_buffer *data, int wbits, - unsigned int bufsize) -/*[clinic end generated code: output=475b36ead58b243d input=75123b0d4ff0541d]*/ + Py_ssize_t bufsize) +/*[clinic end generated code: output=77c7e35111dc8c42 input=c13dd2c5696cd17f]*/ { - PyObject *result_str = NULL; - Byte *input; - unsigned int length; - int err; - unsigned int new_bufsize; + PyObject *RetVal = NULL; + Byte *ibuf; + Py_ssize_t ibuflen; + int err, flush; z_stream zst; - if ((size_t)data->len > UINT_MAX) { - PyErr_SetString(PyExc_OverflowError, - "Size does not fit in an unsigned int"); - goto error; - } - input = data->buf; - length = (unsigned int)data->len; - - if (bufsize == 0) + if (bufsize < 0) { + PyErr_SetString(PyExc_ValueError, "bufsize must be non-negative"); + return NULL; + } else if (bufsize == 0) { bufsize = 1; + } - zst.avail_in = length; - zst.avail_out = bufsize; - - if (!(result_str = PyBytes_FromStringAndSize(NULL, bufsize))) - goto error; + ibuf = data->buf; + ibuflen = data->len; zst.opaque = NULL; zst.zalloc = PyZlib_Malloc; zst.zfree = PyZlib_Free; - zst.next_out = (Byte *)PyBytes_AS_STRING(result_str); - zst.next_in = (Byte *)input; + zst.avail_in = 0; + zst.next_in = ibuf; err = inflateInit2(&zst, wbits); - switch(err) { - case(Z_OK): + switch (err) { + case Z_OK: break; - case(Z_MEM_ERROR): + case Z_MEM_ERROR: PyErr_SetString(PyExc_MemoryError, "Out of memory while decompressing data"); goto error; @@ -333,46 +369,46 @@ zlib_decompress_impl(PyObject *module, Py_buffer *data, int wbits, } do { - Py_BEGIN_ALLOW_THREADS - err=inflate(&zst, Z_FINISH); - Py_END_ALLOW_THREADS + arrange_input_buffer(&zst, &ibuflen); + flush = ibuflen == 0 ? Z_FINISH : Z_NO_FLUSH; - switch(err) { - case(Z_STREAM_END): - break; - case(Z_BUF_ERROR): - /* - * If there is at least 1 byte of room according to zst.avail_out - * and we get this error, assume that it means zlib cannot - * process the inflate call() due to an error in the data. - */ - if (zst.avail_out > 0) { + do { + bufsize = arrange_output_buffer(&zst, &RetVal, bufsize); + if (bufsize < 0) { + inflateEnd(&zst); + goto error; + } + + Py_BEGIN_ALLOW_THREADS + err = inflate(&zst, flush); + Py_END_ALLOW_THREADS + + switch (err) { + case Z_OK: /* fall through */ + case Z_BUF_ERROR: /* fall through */ + case Z_STREAM_END: + break; + case Z_MEM_ERROR: + inflateEnd(&zst); + PyErr_SetString(PyExc_MemoryError, + "Out of memory while decompressing data"); + goto error; + default: + inflateEnd(&zst); zlib_error(zst, err, "while decompressing data"); - inflateEnd(&zst); goto error; } - /* fall through */ - case(Z_OK): - /* need more memory */ - if (bufsize <= (UINT_MAX >> 1)) - new_bufsize = bufsize << 1; - else - new_bufsize = UINT_MAX; - if (_PyBytes_Resize(&result_str, new_bufsize) < 0) { - inflateEnd(&zst); - goto error; - } - zst.next_out = - (unsigned char *)PyBytes_AS_STRING(result_str) + bufsize; - zst.avail_out = bufsize; - bufsize = new_bufsize; - break; - default: - inflateEnd(&zst); - zlib_error(zst, err, "while decompressing data"); - goto error; - } - } while (err != Z_STREAM_END); + + } while (zst.avail_out == 0); + + } while (err != Z_STREAM_END && ibuflen != 0); + + + if (err != Z_STREAM_END) { + inflateEnd(&zst); + zlib_error(zst, err, "while decompressing data"); + goto error; + } err = inflateEnd(&zst); if (err != Z_OK) { @@ -380,13 +416,14 @@ zlib_decompress_impl(PyObject *module, Py_buffer *data, int wbits, goto error; } - if (_PyBytes_Resize(&result_str, zst.total_out) < 0) + if (_PyBytes_Resize(&RetVal, zst.next_out - + (Byte *)PyBytes_AS_STRING(RetVal)) < 0) goto error; - return result_str; + return RetVal; error: - Py_XDECREF(result_str); + Py_XDECREF(RetVal); return NULL; } @@ -433,7 +470,7 @@ zlib_compressobj_impl(PyObject *module, int level, int method, int wbits, } self = newcompobject(&Comptype); - if (self==NULL) + if (self == NULL) goto error; self->zst.opaque = NULL; self->zst.zalloc = PyZlib_Malloc; @@ -441,8 +478,8 @@ zlib_compressobj_impl(PyObject *module, int level, int method, int wbits, self->zst.next_in = NULL; self->zst.avail_in = 0; err = deflateInit2(&self->zst, level, method, wbits, memLevel, strategy); - switch(err) { - case (Z_OK): + switch (err) { + case Z_OK: self->is_initialised = 1; if (zdict->buf == NULL) { goto success; @@ -450,9 +487,9 @@ zlib_compressobj_impl(PyObject *module, int level, int method, int wbits, err = deflateSetDictionary(&self->zst, zdict->buf, (unsigned int)zdict->len); switch (err) { - case (Z_OK): + case Z_OK: goto success; - case (Z_STREAM_ERROR): + case Z_STREAM_ERROR: PyErr_SetString(PyExc_ValueError, "Invalid dictionary"); goto error; default: @@ -460,11 +497,11 @@ zlib_compressobj_impl(PyObject *module, int level, int method, int wbits, goto error; } } - case (Z_MEM_ERROR): + case Z_MEM_ERROR: PyErr_SetString(PyExc_MemoryError, "Can't allocate memory for compression object"); goto error; - case(Z_STREAM_ERROR): + case Z_STREAM_ERROR: PyErr_SetString(PyExc_ValueError, "Invalid initialization option"); goto error; default: @@ -475,7 +512,7 @@ zlib_compressobj_impl(PyObject *module, int level, int method, int wbits, error: Py_CLEAR(self); success: - return (PyObject*)self; + return (PyObject *)self; } static int @@ -493,7 +530,7 @@ set_inflate_zdict(compobject *self) PyBuffer_Release(&zdict_buf); return -1; } - err = inflateSetDictionary(&(self->zst), + err = inflateSetDictionary(&self->zst, zdict_buf.buf, (unsigned int)zdict_buf.len); PyBuffer_Release(&zdict_buf); if (err != Z_OK) { @@ -530,7 +567,7 @@ zlib_decompressobj_impl(PyObject *module, int wbits, PyObject *zdict) self = newcompobject(&Decomptype); if (self == NULL) - return(NULL); + return NULL; self->zst.opaque = NULL; self->zst.zalloc = PyZlib_Malloc; self->zst.zfree = PyZlib_Free; @@ -541,8 +578,8 @@ zlib_decompressobj_impl(PyObject *module, int wbits, PyObject *zdict) self->zdict = zdict; } err = inflateInit2(&self->zst, wbits); - switch(err) { - case (Z_OK): + switch (err) { + case Z_OK: self->is_initialised = 1; if (self->zdict != NULL && wbits < 0) { #ifdef AT_LEAST_ZLIB_1_2_2_1 @@ -558,12 +595,12 @@ zlib_decompressobj_impl(PyObject *module, int wbits, PyObject *zdict) return NULL; #endif } - return (PyObject*)self; - case(Z_STREAM_ERROR): + return (PyObject *)self; + case Z_STREAM_ERROR: Py_DECREF(self); PyErr_SetString(PyExc_ValueError, "Invalid initialization option"); return NULL; - case (Z_MEM_ERROR): + case Z_MEM_ERROR: Py_DECREF(self); PyErr_SetString(PyExc_MemoryError, "Can't allocate memory for decompression object"); @@ -621,113 +658,90 @@ static PyObject * zlib_Compress_compress_impl(compobject *self, Py_buffer *data) /*[clinic end generated code: output=5d5cd791cbc6a7f4 input=0d95908d6e64fab8]*/ { + PyObject *RetVal = NULL; + Py_ssize_t ibuflen, obuflen = DEF_BUF_SIZE; int err; - unsigned int inplen; - unsigned int length = DEF_BUF_SIZE, new_length; - PyObject *RetVal; - Byte *input; - unsigned long start_total_out; - if ((size_t)data->len > UINT_MAX) { - PyErr_SetString(PyExc_OverflowError, - "Size does not fit in an unsigned int"); - return NULL; - } - input = data->buf; - inplen = (unsigned int)data->len; - - if (!(RetVal = PyBytes_FromStringAndSize(NULL, length))) - return NULL; + self->zst.next_in = data->buf; + ibuflen = data->len; ENTER_ZLIB(self); - start_total_out = self->zst.total_out; - self->zst.avail_in = inplen; - self->zst.next_in = input; - self->zst.avail_out = length; - self->zst.next_out = (unsigned char *)PyBytes_AS_STRING(RetVal); + do { + arrange_input_buffer(&self->zst, &ibuflen); - Py_BEGIN_ALLOW_THREADS - err = deflate(&(self->zst), Z_NO_FLUSH); - Py_END_ALLOW_THREADS + do { + obuflen = arrange_output_buffer(&self->zst, &RetVal, obuflen); + if (obuflen < 0) + goto error; - /* while Z_OK and the output buffer is full, there might be more output, - so extend the output buffer and try again */ - while (err == Z_OK && self->zst.avail_out == 0) { - if (length <= (UINT_MAX >> 1)) - new_length = length << 1; - else - new_length = UINT_MAX; - if (_PyBytes_Resize(&RetVal, new_length) < 0) { - Py_CLEAR(RetVal); - goto done; - } - self->zst.next_out = - (unsigned char *)PyBytes_AS_STRING(RetVal) + length; - self->zst.avail_out = length; - length = new_length; + Py_BEGIN_ALLOW_THREADS + err = deflate(&self->zst, Z_NO_FLUSH); + Py_END_ALLOW_THREADS - Py_BEGIN_ALLOW_THREADS - err = deflate(&(self->zst), Z_NO_FLUSH); - Py_END_ALLOW_THREADS - } - /* We will only get Z_BUF_ERROR if the output buffer was full but - there wasn't more output when we tried again, so it is not an error - condition. - */ + if (err == Z_STREAM_ERROR) { + zlib_error(self->zst, err, "while compressing data"); + goto error; + } - if (err != Z_OK && err != Z_BUF_ERROR) { - zlib_error(self->zst, err, "while compressing data"); - Py_CLEAR(RetVal); - goto done; - } - if (_PyBytes_Resize(&RetVal, self->zst.total_out - start_total_out) < 0) { - Py_CLEAR(RetVal); - } + } while (self->zst.avail_out == 0); + assert(self->zst.avail_in == 0); - done: + } while (ibuflen != 0); + + if (_PyBytes_Resize(&RetVal, self->zst.next_out - + (Byte *)PyBytes_AS_STRING(RetVal)) == 0) + goto success; + + error: + Py_CLEAR(RetVal); + success: LEAVE_ZLIB(self); return RetVal; } -/* Helper for objdecompress() and unflush(). Saves any unconsumed input data in +/* Helper for objdecompress() and flush(). Saves any unconsumed input data in self->unused_data or self->unconsumed_tail, as appropriate. */ static int -save_unconsumed_input(compobject *self, int err) +save_unconsumed_input(compobject *self, Py_buffer *data, int err) { if (err == Z_STREAM_END) { /* The end of the compressed data has been reached. Store the leftover input data in self->unused_data. */ if (self->zst.avail_in > 0) { Py_ssize_t old_size = PyBytes_GET_SIZE(self->unused_data); - Py_ssize_t new_size; + Py_ssize_t new_size, left_size; PyObject *new_data; - if ((size_t)self->zst.avail_in > (size_t)UINT_MAX - (size_t)old_size) { + left_size = (Byte *)data->buf + data->len - self->zst.next_in; + if (left_size > (PY_SSIZE_T_MAX - old_size)) { PyErr_NoMemory(); return -1; } - new_size = old_size + self->zst.avail_in; + new_size = old_size + left_size; new_data = PyBytes_FromStringAndSize(NULL, new_size); if (new_data == NULL) return -1; Py_MEMCPY(PyBytes_AS_STRING(new_data), PyBytes_AS_STRING(self->unused_data), old_size); Py_MEMCPY(PyBytes_AS_STRING(new_data) + old_size, - self->zst.next_in, self->zst.avail_in); + self->zst.next_in, left_size); Py_SETREF(self->unused_data, new_data); self->zst.avail_in = 0; } } + if (self->zst.avail_in > 0 || PyBytes_GET_SIZE(self->unconsumed_tail)) { /* This code handles two distinct cases: 1. Output limit was reached. Save leftover input in unconsumed_tail. 2. All input data was consumed. Clear unconsumed_tail. */ + Py_ssize_t left_size = (Byte *)data->buf + data->len - self->zst.next_in; PyObject *new_data = PyBytes_FromStringAndSize( - (char *)self->zst.next_in, self->zst.avail_in); + (char *)self->zst.next_in, left_size); if (new_data == NULL) return -1; Py_SETREF(self->unconsumed_tail, new_data); } + return 0; } @@ -736,7 +750,7 @@ zlib.Decompress.decompress data: Py_buffer The binary data to decompress. - max_length: capped_uint = 0 + max_length: ssize_t = 0 The maximum allowable length of the decompressed data. Unconsumed input data will be stored in the unconsumed_tail attribute. @@ -751,85 +765,72 @@ Call the flush() method to clear these buffers. static PyObject * zlib_Decompress_decompress_impl(compobject *self, Py_buffer *data, - unsigned int max_length) -/*[clinic end generated code: output=b82e2a2c19f5fe7b input=68b6508ab07c2cf0]*/ + Py_ssize_t max_length) +/*[clinic end generated code: output=6e5173c74e710352 input=d6de9b53c4566b8a]*/ { - int err; - unsigned int old_length, length = DEF_BUF_SIZE; + int err = Z_OK; + Py_ssize_t ibuflen, obuflen = DEF_BUF_SIZE, hard_limit; PyObject *RetVal = NULL; - unsigned long start_total_out; - if ((size_t)data->len > UINT_MAX) { - PyErr_SetString(PyExc_OverflowError, - "Size does not fit in an unsigned int"); + if (max_length < 0) { + PyErr_SetString(PyExc_ValueError, "max_length must be non-negative"); return NULL; - } + } else if (max_length == 0) + hard_limit = PY_SSIZE_T_MAX; + else + hard_limit = max_length; + + self->zst.next_in = data->buf; + ibuflen = data->len; /* limit amount of data allocated to max_length */ - if (max_length && length > max_length) - length = max_length; - if (!(RetVal = PyBytes_FromStringAndSize(NULL, length))) - return NULL; + if (max_length && obuflen > max_length) + obuflen = max_length; ENTER_ZLIB(self); - start_total_out = self->zst.total_out; - self->zst.avail_in = (unsigned int)data->len; - self->zst.next_in = data->buf; - self->zst.avail_out = length; - self->zst.next_out = (unsigned char *)PyBytes_AS_STRING(RetVal); + do { + arrange_input_buffer(&self->zst, &ibuflen); - Py_BEGIN_ALLOW_THREADS - err = inflate(&(self->zst), Z_SYNC_FLUSH); - Py_END_ALLOW_THREADS + do { + obuflen = arrange_output_buffer_with_maximum(&self->zst, &RetVal, + obuflen, hard_limit); + if (obuflen == -2) { + if (max_length > 0) { + goto save; + } + PyErr_NoMemory(); + } + if (obuflen < 0) { + goto abort; + } - if (err == Z_NEED_DICT && self->zdict != NULL) { - if (set_inflate_zdict(self) < 0) { - Py_DECREF(RetVal); - RetVal = NULL; - goto error; - } + Py_BEGIN_ALLOW_THREADS + err = inflate(&self->zst, Z_SYNC_FLUSH); + Py_END_ALLOW_THREADS - /* Repeat the call to inflate. */ - Py_BEGIN_ALLOW_THREADS - err = inflate(&(self->zst), Z_SYNC_FLUSH); - Py_END_ALLOW_THREADS - } + switch (err) { + case Z_OK: /* fall through */ + case Z_BUF_ERROR: /* fall through */ + case Z_STREAM_END: + break; + default: + if (err == Z_NEED_DICT && self->zdict != NULL) { + if (set_inflate_zdict(self) < 0) + goto abort; + else + break; + } + goto save; + } - /* While Z_OK and the output buffer is full, there might be more output. - So extend the output buffer and try again. - */ - while (err == Z_OK && self->zst.avail_out == 0) { - /* If max_length set, don't continue decompressing if we've already - reached the limit. - */ - if (max_length && length >= max_length) - break; + } while (self->zst.avail_out == 0 || err == Z_NEED_DICT); - /* otherwise, ... */ - old_length = length; - length = length << 1; - if (max_length && length > max_length) - length = max_length; + } while (err != Z_STREAM_END && ibuflen != 0); - if (_PyBytes_Resize(&RetVal, length) < 0) { - Py_CLEAR(RetVal); - goto error; - } - self->zst.next_out = - (unsigned char *)PyBytes_AS_STRING(RetVal) + old_length; - self->zst.avail_out = length - old_length; - - Py_BEGIN_ALLOW_THREADS - err = inflate(&(self->zst), Z_SYNC_FLUSH); - Py_END_ALLOW_THREADS - } - - if (save_unconsumed_input(self, err) < 0) { - Py_DECREF(RetVal); - RetVal = NULL; - goto error; - } + save: + if (save_unconsumed_input(self, data, err) < 0) + goto abort; if (err == Z_STREAM_END) { /* This is the logical place to call inflateEnd, but the old behaviour @@ -841,16 +842,16 @@ zlib_Decompress_decompress_impl(compobject *self, Py_buffer *data, not an error condition. */ zlib_error(self->zst, err, "while decompressing data"); - Py_DECREF(RetVal); - RetVal = NULL; - goto error; + goto abort; } - if (_PyBytes_Resize(&RetVal, self->zst.total_out - start_total_out) < 0) { - Py_CLEAR(RetVal); - } + if (_PyBytes_Resize(&RetVal, self->zst.next_out - + (Byte *)PyBytes_AS_STRING(RetVal)) == 0) + goto success; - error: + abort: + Py_CLEAR(RetVal); + success: LEAVE_ZLIB(self); return RetVal; } @@ -873,9 +874,8 @@ zlib_Compress_flush_impl(compobject *self, int mode) /*[clinic end generated code: output=a203f4cefc9de727 input=73ed066794bd15bc]*/ { int err; - unsigned int length = DEF_BUF_SIZE, new_length; - PyObject *RetVal; - unsigned long start_total_out; + Py_ssize_t length = DEF_BUF_SIZE; + PyObject *RetVal = NULL; /* Flushing with Z_NO_FLUSH is a no-op, so there's no point in doing any work at all; just return an empty string. */ @@ -883,50 +883,37 @@ zlib_Compress_flush_impl(compobject *self, int mode) return PyBytes_FromStringAndSize(NULL, 0); } - if (!(RetVal = PyBytes_FromStringAndSize(NULL, length))) - return NULL; - ENTER_ZLIB(self); - start_total_out = self->zst.total_out; self->zst.avail_in = 0; - self->zst.avail_out = length; - self->zst.next_out = (unsigned char *)PyBytes_AS_STRING(RetVal); - Py_BEGIN_ALLOW_THREADS - err = deflate(&(self->zst), mode); - Py_END_ALLOW_THREADS - - /* while Z_OK and the output buffer is full, there might be more output, - so extend the output buffer and try again */ - while (err == Z_OK && self->zst.avail_out == 0) { - if (length <= (UINT_MAX >> 1)) - new_length = length << 1; - else - new_length = UINT_MAX; - if (_PyBytes_Resize(&RetVal, new_length) < 0) { + do { + length = arrange_output_buffer(&self->zst, &RetVal, length); + if (length < 0) { Py_CLEAR(RetVal); goto error; } - self->zst.next_out = - (unsigned char *)PyBytes_AS_STRING(RetVal) + length; - self->zst.avail_out = length; - length = new_length; Py_BEGIN_ALLOW_THREADS - err = deflate(&(self->zst), mode); + err = deflate(&self->zst, mode); Py_END_ALLOW_THREADS - } + + if (err == Z_STREAM_ERROR) { + zlib_error(self->zst, err, "while flushing"); + Py_CLEAR(RetVal); + goto error; + } + } while (self->zst.avail_out == 0); + assert(self->zst.avail_in == 0); /* If mode is Z_FINISH, we also have to call deflateEnd() to free various data structures. Note we should only get Z_STREAM_END when mode is Z_FINISH, but checking both for safety*/ if (err == Z_STREAM_END && mode == Z_FINISH) { - err = deflateEnd(&(self->zst)); + err = deflateEnd(&self->zst); if (err != Z_OK) { zlib_error(self->zst, err, "while finishing compression"); - Py_DECREF(RetVal); - RetVal = NULL; + Py_CLEAR(RetVal); goto error; } else @@ -936,20 +923,18 @@ zlib_Compress_flush_impl(compobject *self, int mode) but there wasn't more output when we tried again, so it is not an error condition. */ - } else if (err!=Z_OK && err!=Z_BUF_ERROR) { + } else if (err != Z_OK && err != Z_BUF_ERROR) { zlib_error(self->zst, err, "while flushing"); - Py_DECREF(RetVal); - RetVal = NULL; + Py_CLEAR(RetVal); goto error; } - if (_PyBytes_Resize(&RetVal, self->zst.total_out - start_total_out) < 0) { + if (_PyBytes_Resize(&RetVal, self->zst.next_out - + (Byte *)PyBytes_AS_STRING(RetVal)) < 0) Py_CLEAR(RetVal); - } error: LEAVE_ZLIB(self); - return RetVal; } @@ -976,13 +961,13 @@ zlib_Compress_copy_impl(compobject *self) */ ENTER_ZLIB(self); err = deflateCopy(&retval->zst, &self->zst); - switch(err) { - case(Z_OK): + switch (err) { + case Z_OK: break; - case(Z_STREAM_ERROR): + case Z_STREAM_ERROR: PyErr_SetString(PyExc_ValueError, "Inconsistent stream state"); goto error; - case(Z_MEM_ERROR): + case Z_MEM_ERROR: PyErr_SetString(PyExc_MemoryError, "Can't allocate memory for compression object"); goto error; @@ -1031,13 +1016,13 @@ zlib_Decompress_copy_impl(compobject *self) */ ENTER_ZLIB(self); err = inflateCopy(&retval->zst, &self->zst); - switch(err) { - case(Z_OK): + switch (err) { + case Z_OK: break; - case(Z_STREAM_ERROR): + case Z_STREAM_ERROR: PyErr_SetString(PyExc_ValueError, "Inconsistent stream state"); goto error; - case(Z_MEM_ERROR): + case Z_MEM_ERROR: PyErr_SetString(PyExc_MemoryError, "Can't allocate memory for decompression object"); goto error; @@ -1070,7 +1055,7 @@ error: /*[clinic input] zlib.Decompress.flush - length: capped_uint(c_default="DEF_BUF_SIZE") = zlib.DEF_BUF_SIZE + length: ssize_t(c_default="DEF_BUF_SIZE") = zlib.DEF_BUF_SIZE the initial size of the output buffer. / @@ -1078,88 +1063,84 @@ Return a bytes object containing any remaining decompressed data. [clinic start generated code]*/ static PyObject * -zlib_Decompress_flush_impl(compobject *self, unsigned int length) -/*[clinic end generated code: output=db6fb753ab698e22 input=1bb961eb21b62aa0]*/ +zlib_Decompress_flush_impl(compobject *self, Py_ssize_t length) +/*[clinic end generated code: output=68c75ea127cbe654 input=aa4ec37f3aef4da0]*/ { - int err; - unsigned int new_length; - PyObject * retval = NULL; - unsigned long start_total_out; - Py_ssize_t size; + int err, flush; + Py_buffer data; + PyObject *RetVal = NULL; + Py_ssize_t ibuflen; - if (length == 0) { + if (length <= 0) { PyErr_SetString(PyExc_ValueError, "length must be greater than zero"); return NULL; } - if (!(retval = PyBytes_FromStringAndSize(NULL, length))) + if (PyObject_GetBuffer(self->unconsumed_tail, &data, PyBUF_SIMPLE) == -1) return NULL; - ENTER_ZLIB(self); - size = PyBytes_GET_SIZE(self->unconsumed_tail); + self->zst.next_in = data.buf; + ibuflen = data.len; - start_total_out = self->zst.total_out; - /* save_unconsumed_input() ensures that unconsumed_tail length is lesser - or equal than UINT_MAX */ - self->zst.avail_in = Py_SAFE_DOWNCAST(size, Py_ssize_t, unsigned int); - self->zst.next_in = (Byte *)PyBytes_AS_STRING(self->unconsumed_tail); - self->zst.avail_out = length; - self->zst.next_out = (Byte *)PyBytes_AS_STRING(retval); + do { + arrange_input_buffer(&self->zst, &ibuflen); + flush = ibuflen == 0 ? Z_FINISH : Z_NO_FLUSH; - Py_BEGIN_ALLOW_THREADS - err = inflate(&(self->zst), Z_FINISH); - Py_END_ALLOW_THREADS + do { + length = arrange_output_buffer(&self->zst, &RetVal, length); + if (length < 0) + goto abort; - /* while Z_OK and the output buffer is full, there might be more output, - so extend the output buffer and try again */ - while ((err == Z_OK || err == Z_BUF_ERROR) && self->zst.avail_out == 0) { - if (length <= (UINT_MAX >> 1)) - new_length = length << 1; - else - new_length = UINT_MAX; - if (_PyBytes_Resize(&retval, new_length) < 0) { - Py_CLEAR(retval); - goto error; - } - self->zst.next_out = (Byte *)PyBytes_AS_STRING(retval) + length; - self->zst.avail_out = length; - length = new_length; + Py_BEGIN_ALLOW_THREADS + err = inflate(&self->zst, flush); + Py_END_ALLOW_THREADS - Py_BEGIN_ALLOW_THREADS - err = inflate(&(self->zst), Z_FINISH); - Py_END_ALLOW_THREADS - } + switch (err) { + case Z_OK: /* fall through */ + case Z_BUF_ERROR: /* fall through */ + case Z_STREAM_END: + break; + default: + if (err == Z_NEED_DICT && self->zdict != NULL) { + if (set_inflate_zdict(self) < 0) + goto abort; + else + break; + } + goto save; + } - if (save_unconsumed_input(self, err) < 0) { - Py_DECREF(retval); - retval = NULL; - goto error; - } + } while (self->zst.avail_out == 0 || err == Z_NEED_DICT); + + } while (err != Z_STREAM_END && ibuflen != 0); + + save: + if (save_unconsumed_input(self, &data, err) < 0) + goto abort; /* If at end of stream, clean up any memory allocated by zlib. */ if (err == Z_STREAM_END) { self->eof = 1; self->is_initialised = 0; - err = inflateEnd(&(self->zst)); + err = inflateEnd(&self->zst); if (err != Z_OK) { zlib_error(self->zst, err, "while finishing decompression"); - Py_DECREF(retval); - retval = NULL; - goto error; + goto abort; } } - if (_PyBytes_Resize(&retval, self->zst.total_out - start_total_out) < 0) { - Py_CLEAR(retval); - } - -error: + if (_PyBytes_Resize(&RetVal, self->zst.next_out - + (Byte *)PyBytes_AS_STRING(RetVal)) == 0) + goto success; + abort: + Py_CLEAR(RetVal); + success: + PyBuffer_Release(&data); LEAVE_ZLIB(self); - - return retval; + return RetVal; } #include "clinic/zlibmodule.c.h"