diff --git a/Doc/howto/clinic.rst b/Doc/howto/clinic.rst index 7524c4ad901..b04edea1897 100644 --- a/Doc/howto/clinic.rst +++ b/Doc/howto/clinic.rst @@ -1249,18 +1249,18 @@ Here's the simplest example of a custom converter, from ``Modules/zlibmodule.c`` /*[python input] - class uint_converter(CConverter): + class capped_uint_converter(CConverter): type = 'unsigned int' - converter = 'uint_converter' + converter = 'capped_uint_converter' [python start generated code]*/ - /*[python end generated code: checksum=da39a3ee5e6b4b0d3255bfef95601890afd80709]*/ + /*[python end generated code: output=da39a3ee5e6b4b0d input=35521e4e733823c7]*/ -This block adds a converter to Argument Clinic named ``uint``. Parameters -declared as ``uint`` will be declared as type ``unsigned int``, and will -be parsed by the ``'O&'`` format unit, which will call the ``uint_converter`` -converter function. -``uint`` variables automatically support default values. +This block adds a converter to Argument Clinic named ``capped_uint``. Parameters +declared as ``capped_uint`` will be declared as type ``unsigned int``, and will +be parsed by the ``'O&'`` format unit, which will call the +``capped_uint_converter`` converter function. ``capped_uint`` variables +automatically support default values. More sophisticated custom converters can insert custom C code to handle initialization and cleanup. diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index 359d6dd0f5e..dfb4c25416c 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -1608,12 +1608,15 @@ class _MemoryWatchdog: def bigmemtest(size, memuse, dry_run=True): """Decorator for bigmem tests. - 'minsize' is the minimum useful size for the test (in arbitrary, - test-interpreted units.) 'memuse' is the number of 'bytes per size' for - the test, or a good estimate of it. + 'size' is a requested size for the test (in arbitrary, test-interpreted + units.) 'memuse' is the number of bytes per unit for the test, or a good + estimate of it. For example, a test that needs two byte buffers, of 4 GiB + each, could be decorated with @bigmemtest(size=_4G, memuse=2). - if 'dry_run' is False, it means the test doesn't support dummy runs - when -M is not specified. + The 'size' argument is normally passed to the decorated test method as an + extra argument. If 'dry_run' is true, the value passed to the test method + may be less than the requested value. If 'dry_run' is false, it means the + test doesn't support dummy runs when -M is not specified. """ def decorator(f): def wrapper(self): diff --git a/Lib/test/test_gzip.py b/Lib/test/test_gzip.py index d8408e15cd4..3c51673a922 100644 --- a/Lib/test/test_gzip.py +++ b/Lib/test/test_gzip.py @@ -3,6 +3,7 @@ import unittest from test import support +from test.support import bigmemtest, _4G import os import io import struct @@ -116,6 +117,14 @@ class TestGzip(BaseTest): self.assertEqual(f.tell(), nread) self.assertEqual(b''.join(blocks), data1 * 50) + @bigmemtest(size=_4G, memuse=1) + def test_read_large(self, size): + # Read chunk size over UINT_MAX should be supported, despite zlib's + # limitation per low-level call + compressed = gzip.compress(data1, compresslevel=1) + f = gzip.GzipFile(fileobj=io.BytesIO(compressed), mode='rb') + self.assertEqual(f.read(size), data1) + def test_io_on_closed_object(self): # Test that I/O operations on closed GzipFile objects raise a # ValueError, just like the corresponding functions on file objects. diff --git a/Lib/test/test_zlib.py b/Lib/test/test_zlib.py index 7154e139bc9..88c415bc4eb 100644 --- a/Lib/test/test_zlib.py +++ b/Lib/test/test_zlib.py @@ -122,11 +122,17 @@ class ExceptionTestCase(unittest.TestCase): self.assertRaises(ValueError, zlib.decompressobj().flush, 0) self.assertRaises(ValueError, zlib.decompressobj().flush, -1) + @support.cpython_only + def test_overflow(self): + with self.assertRaisesRegex(OverflowError, 'int too large'): + zlib.decompress(b'', 15, sys.maxsize + 1) + with self.assertRaisesRegex(OverflowError, 'int too large'): + zlib.decompressobj().flush(sys.maxsize + 1) + class BaseCompressTestCase(object): def check_big_compress_buffer(self, size, compress_func): _1M = 1024 * 1024 - fmt = "%%0%dx" % (2 * _1M) # Generate 10MB worth of random, and expand it by repeating it. # The assumption is that zlib's memory is not big enough to exploit # such spread out redundancy. @@ -196,6 +202,18 @@ class CompressTestCase(BaseCompressTestCase, unittest.TestCase): finally: data = None + @bigmemtest(size=_4G, memuse=1) + def test_large_bufsize(self, size): + # Test decompress(bufsize) parameter greater than the internal limit + data = HAMLET_SCENE * 10 + compressed = zlib.compress(data, 1) + self.assertEqual(zlib.decompress(compressed, 15, size), data) + + def test_custom_bufsize(self): + data = HAMLET_SCENE * 10 + compressed = zlib.compress(data, 1) + self.assertEqual(zlib.decompress(compressed, 15, CustomInt()), data) + class CompressObjectTestCase(BaseCompressTestCase, unittest.TestCase): # Test compression object @@ -364,6 +382,21 @@ class CompressObjectTestCase(BaseCompressTestCase, unittest.TestCase): self.assertRaises(ValueError, dco.decompress, b"", -1) self.assertEqual(b'', dco.unconsumed_tail) + def test_maxlen_large(self): + # Sizes up to sys.maxsize should be accepted, although zlib is + # internally limited to expressing sizes with unsigned int + data = HAMLET_SCENE * 10 + self.assertGreater(len(data), zlib.DEF_BUF_SIZE) + compressed = zlib.compress(data, 1) + dco = zlib.decompressobj() + self.assertEqual(dco.decompress(compressed, sys.maxsize), data) + + def test_maxlen_custom(self): + data = HAMLET_SCENE * 10 + compressed = zlib.compress(data, 1) + dco = zlib.decompressobj() + self.assertEqual(dco.decompress(compressed, CustomInt()), data[:100]) + def test_clear_unconsumed_tail(self): # Issue #12050: calling decompress() without providing max_length # should clear the unconsumed_tail attribute. @@ -537,6 +570,22 @@ class CompressObjectTestCase(BaseCompressTestCase, unittest.TestCase): data = zlib.compress(input2) self.assertEqual(dco.flush(), input1[1:]) + @bigmemtest(size=_4G, memuse=1) + def test_flush_large_length(self, size): + # Test flush(length) parameter greater than internal limit UINT_MAX + input = HAMLET_SCENE * 10 + data = zlib.compress(input, 1) + dco = zlib.decompressobj() + dco.decompress(data, 1) + self.assertEqual(dco.flush(size), input[1:]) + + def test_flush_custom_length(self): + input = HAMLET_SCENE * 10 + data = zlib.compress(input, 1) + dco = zlib.decompressobj() + dco.decompress(data, 1) + self.assertEqual(dco.flush(CustomInt()), input[1:]) + @requires_Compress_copy def test_compresscopy(self): # Test copying a compression object @@ -725,5 +774,10 @@ LAERTES """ +class CustomInt: + def __int__(self): + return 100 + + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS b/Misc/NEWS index 60806d1ddfe..9f11892d803 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -77,6 +77,13 @@ Core and Builtins Library ------- +- Issue #25626: Change three zlib functions to accept sizes that fit in + Py_ssize_t, but internally cap those sizes to UINT_MAX. This resolves a + regression in 3.5 where GzipFile.read() failed to read chunks larger than 2 + or 4 GiB. The change affects the zlib.Decompress.decompress() max_length + parameter, the zlib.decompress() bufsize parameter, and the + zlib.Decompress.flush() length parameter. + - Issue #25583: Avoid incorrect errors raised by os.makedirs(exist_ok=True) when the OS gives priority to errors such as EACCES over EEXIST. diff --git a/Modules/clinic/zlibmodule.c.h b/Modules/clinic/zlibmodule.c.h index 35661a5a46e..c5cdf427b55 100644 --- a/Modules/clinic/zlibmodule.c.h +++ b/Modules/clinic/zlibmodule.c.h @@ -68,7 +68,7 @@ zlib_decompress(PyModuleDef *module, PyObject *args) unsigned int bufsize = DEF_BUF_SIZE; if (!PyArg_ParseTuple(args, "y*|iO&:decompress", - &data, &wbits, uint_converter, &bufsize)) + &data, &wbits, capped_uint_converter, &bufsize)) goto exit; return_value = zlib_decompress_impl(module, &data, wbits, bufsize); @@ -242,7 +242,7 @@ zlib_Decompress_decompress(compobject *self, PyObject *args) unsigned int max_length = 0; if (!PyArg_ParseTuple(args, "y*|O&:decompress", - &data, uint_converter, &max_length)) + &data, capped_uint_converter, &max_length)) goto exit; return_value = zlib_Decompress_decompress_impl(self, &data, max_length); @@ -353,7 +353,7 @@ zlib_Decompress_flush(compobject *self, PyObject *args) unsigned int length = DEF_BUF_SIZE; if (!PyArg_ParseTuple(args, "|O&:flush", - uint_converter, &length)) + capped_uint_converter, &length)) goto exit; return_value = zlib_Decompress_flush_impl(self, length); @@ -438,4 +438,4 @@ exit: #ifndef ZLIB_COMPRESS_COPY_METHODDEF #define ZLIB_COMPRESS_COPY_METHODDEF #endif /* !defined(ZLIB_COMPRESS_COPY_METHODDEF) */ -/*[clinic end generated code: output=56ed1147bbbb4788 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=7734aec079550bc8 input=a9049054013a1b77]*/ diff --git a/Modules/zlibmodule.c b/Modules/zlibmodule.c index 1997b40d182..37307be39dd 100644 --- a/Modules/zlibmodule.c +++ b/Modules/zlibmodule.c @@ -226,42 +226,42 @@ zlib_compress_impl(PyModuleDef *module, Py_buffer *bytes, int level) /*[python input] -class uint_converter(CConverter): +class capped_uint_converter(CConverter): type = 'unsigned int' - converter = 'uint_converter' + converter = 'capped_uint_converter' c_ignored_default = "0" [python start generated code]*/ -/*[python end generated code: output=da39a3ee5e6b4b0d input=22263855f7a3ebfd]*/ +/*[python end generated code: output=da39a3ee5e6b4b0d input=35521e4e733823c7]*/ static int -uint_converter(PyObject *obj, void *ptr) +capped_uint_converter(PyObject *obj, void *ptr) { - long val; - unsigned long uval; + PyObject *long_obj; + Py_ssize_t val; - val = PyLong_AsLong(obj); + long_obj = (PyObject *)_PyLong_FromNbInt(obj); + if (long_obj == NULL) { + return 0; + } + val = PyLong_AsSsize_t(long_obj); + Py_DECREF(long_obj); if (val == -1 && PyErr_Occurred()) { - uval = PyLong_AsUnsignedLong(obj); - if (uval == (unsigned long)-1 && PyErr_Occurred()) - return 0; + return 0; } - else { - if (val < 0) { - PyErr_SetString(PyExc_ValueError, - "value must be positive"); - return 0; - } - uval = (unsigned long)val; - } - - if (uval > UINT_MAX) { - PyErr_SetString(PyExc_OverflowError, - "Python int too large for C unsigned int"); + if (val < 0) { + PyErr_SetString(PyExc_ValueError, + "value must be positive"); return 0; } - *(unsigned int *)ptr = Py_SAFE_DOWNCAST(uval, unsigned long, unsigned int); + if ((size_t)val > UINT_MAX) { + *(unsigned int *)ptr = UINT_MAX; + } + else { + *(unsigned int *)ptr = Py_SAFE_DOWNCAST(val, Py_ssize_t, + unsigned int); + } return 1; } @@ -272,7 +272,7 @@ zlib.decompress Compressed data. wbits: int(c_default="MAX_WBITS") = MAX_WBITS The window buffer size. - bufsize: uint(c_default="DEF_BUF_SIZE") = DEF_BUF_SIZE + bufsize: capped_uint(c_default="DEF_BUF_SIZE") = DEF_BUF_SIZE The initial output buffer size. / @@ -282,7 +282,7 @@ Returns a bytes object containing the uncompressed data. static PyObject * zlib_decompress_impl(PyModuleDef *module, Py_buffer *data, int wbits, unsigned int bufsize) -/*[clinic end generated code: output=444d0987f3429574 input=0f4b9abb7103f50e]*/ +/*[clinic end generated code: output=444d0987f3429574 input=da095118b3243b27]*/ { PyObject *result_str = NULL; Byte *input; @@ -691,7 +691,7 @@ zlib.Decompress.decompress data: Py_buffer The binary data to decompress. - max_length: uint = 0 + max_length: capped_uint = 0 The maximum allowable length of the decompressed data. Unconsumed input data will be stored in the unconsumed_tail attribute. @@ -707,7 +707,7 @@ Call the flush() method to clear these buffers. static PyObject * zlib_Decompress_decompress_impl(compobject *self, Py_buffer *data, unsigned int max_length) -/*[clinic end generated code: output=b82e2a2c19f5fe7b input=02cfc047377cec86]*/ +/*[clinic end generated code: output=b82e2a2c19f5fe7b input=68b6508ab07c2cf0]*/ { int err; unsigned int old_length, length = DEF_BUF_SIZE; @@ -1048,7 +1048,7 @@ error: /*[clinic input] zlib.Decompress.flush - length: uint(c_default="DEF_BUF_SIZE") = zlib.DEF_BUF_SIZE + length: capped_uint(c_default="DEF_BUF_SIZE") = zlib.DEF_BUF_SIZE the initial size of the output buffer. / @@ -1057,7 +1057,7 @@ Return a bytes object containing any remaining decompressed data. static PyObject * zlib_Decompress_flush_impl(compobject *self, unsigned int length) -/*[clinic end generated code: output=db6fb753ab698e22 input=1580956505978993]*/ +/*[clinic end generated code: output=db6fb753ab698e22 input=1bb961eb21b62aa0]*/ { int err; unsigned int new_length;