diff --git a/Doc/library/zlib.rst b/Doc/library/zlib.rst index 1869bb8aac5..565f3f7211a 100644 --- a/Doc/library/zlib.rst +++ b/Doc/library/zlib.rst @@ -69,9 +69,22 @@ The available exception and functions in this module are: *method* is the compression algorithm. Currently, the only supported value is ``DEFLATED``. - *wbits* is the base two logarithm of the size of the window buffer. This - should be an integer from ``8`` to ``15``. Higher values give better - compression, but use more memory. + The *wbits* argument controls the size of the history buffer (or the + "window size") used when compressing data, and whether a header and + trailer is included in the output. It can take several ranges of values: + + * +9 to +15: The base-two logarithm of the window size, which + therefore ranges between 512 and 32768. Larger values produce + better compression at the expense of greater memory usage. The + resulting output will include a zlib-specific header and trailer. + + * −9 to −15: Uses the absolute value of *wbits* as the + window size logarithm, while producing a raw output stream with no + header or trailing checksum. + + * +25 to +31 = 16 + (9 to 15): Uses the low 4 bits of the value as the + window size logarithm, while including a basic :program:`gzip` header + and trailing checksum in the output. The *memLevel* argument controls the amount of memory used for the internal compression state. Valid values range from ``1`` to ``9``. @@ -113,20 +126,39 @@ The available exception and functions in this module are: .. function:: decompress(data[, wbits[, bufsize]]) Decompresses the bytes in *data*, returning a bytes object containing the - uncompressed data. The *wbits* parameter controls the size of the window - buffer, and is discussed further below. + uncompressed data. The *wbits* parameter depends on + the format of *data*, and is discussed further below. If *bufsize* is given, it is used as the initial size of the output buffer. Raises the :exc:`error` exception if any error occurs. - The absolute value of *wbits* is the base two logarithm of the size of the - history buffer (the "window size") used when compressing data. Its absolute - value should be between 8 and 15 for the most recent versions of the zlib - library, larger values resulting in better compression at the expense of greater - memory usage. When decompressing a stream, *wbits* must not be smaller + .. _decompress-wbits: + + The *wbits* parameter controls the size of the history buffer + (or "window size"), and what header and trailer format is expected. + It is similar to the parameter for :func:`compressobj`, but accepts + more ranges of values: + + * +8 to +15: The base-two logarithm of the window size. The input + must include a zlib header and trailer. + + * 0: Automatically determine the window size from the zlib header. + + * −8 to −15: Uses the absolute value of *wbits* as the window size + logarithm. The input must be a raw stream with no header or trailer. + + * +24 to +31 = 16 + (8 to 15): Uses the low 4 bits of the value as + the window size logarithm. The input must include a gzip header and + trailer. + + * +40 to +47 = 32 + (8 to 15): Uses the low 4 bits of the value as + the window size logarithm, and automatically accepts either + the zlib or gzip format. + + When decompressing a stream, the window size must not be smaller than the size originally used to compress the stream; using a too-small - value will result in an exception. The default value is therefore the - highest value, 15. When *wbits* is negative, the standard - :program:`gzip` header is suppressed. + value may result in an :exc:`error` exception. The default *wbits* value + is 15, which corresponds to the largest window size and requires a zlib + header and trailer to be included. *bufsize* is the initial size of the buffer used to hold decompressed data. If more space is required, the buffer size will be increased as needed, so you @@ -139,7 +171,9 @@ The available exception and functions in this module are: Returns a decompression object, to be used for decompressing data streams that won't fit into memory at once. - The *wbits* parameter controls the size of the window buffer. + The *wbits* parameter controls the size of the history buffer (or the + "window size"), and what header and trailer format is expected. It has + the same meaning as `described for decompress() <#decompress-wbits>`__. The *zdict* parameter specifies a predefined compression dictionary. If provided, this must be the same dictionary as was used by the compressor that diff --git a/Lib/test/test_zlib.py b/Lib/test/test_zlib.py index 1cafcb59875..72a15965369 100644 --- a/Lib/test/test_zlib.py +++ b/Lib/test/test_zlib.py @@ -680,6 +680,49 @@ class CompressObjectTestCase(BaseCompressTestCase, unittest.TestCase): finally: data = None + def test_wbits(self): + co = zlib.compressobj(level=1, wbits=15) + zlib15 = co.compress(HAMLET_SCENE) + co.flush() + self.assertEqual(zlib.decompress(zlib15, 15), HAMLET_SCENE) + self.assertEqual(zlib.decompress(zlib15, 0), HAMLET_SCENE) + self.assertEqual(zlib.decompress(zlib15, 32 + 15), HAMLET_SCENE) + with self.assertRaisesRegex(zlib.error, 'invalid window size'): + zlib.decompress(zlib15, 14) + dco = zlib.decompressobj(wbits=32 + 15) + self.assertEqual(dco.decompress(zlib15), HAMLET_SCENE) + dco = zlib.decompressobj(wbits=14) + with self.assertRaisesRegex(zlib.error, 'invalid window size'): + dco.decompress(zlib15) + + co = zlib.compressobj(level=1, wbits=9) + zlib9 = co.compress(HAMLET_SCENE) + co.flush() + self.assertEqual(zlib.decompress(zlib9, 9), HAMLET_SCENE) + self.assertEqual(zlib.decompress(zlib9, 15), HAMLET_SCENE) + self.assertEqual(zlib.decompress(zlib9, 0), HAMLET_SCENE) + self.assertEqual(zlib.decompress(zlib9, 32 + 9), HAMLET_SCENE) + dco = zlib.decompressobj(wbits=32 + 9) + self.assertEqual(dco.decompress(zlib9), HAMLET_SCENE) + + co = zlib.compressobj(level=1, wbits=-15) + deflate15 = co.compress(HAMLET_SCENE) + co.flush() + self.assertEqual(zlib.decompress(deflate15, -15), HAMLET_SCENE) + dco = zlib.decompressobj(wbits=-15) + self.assertEqual(dco.decompress(deflate15), HAMLET_SCENE) + + co = zlib.compressobj(level=1, wbits=-9) + deflate9 = co.compress(HAMLET_SCENE) + co.flush() + self.assertEqual(zlib.decompress(deflate9, -9), HAMLET_SCENE) + self.assertEqual(zlib.decompress(deflate9, -15), HAMLET_SCENE) + dco = zlib.decompressobj(wbits=-9) + self.assertEqual(dco.decompress(deflate9), HAMLET_SCENE) + + co = zlib.compressobj(level=1, wbits=16 + 15) + gzip = co.compress(HAMLET_SCENE) + co.flush() + self.assertEqual(zlib.decompress(gzip, 16 + 15), HAMLET_SCENE) + self.assertEqual(zlib.decompress(gzip, 32 + 15), HAMLET_SCENE) + dco = zlib.decompressobj(32 + 15) + self.assertEqual(dco.decompress(gzip), HAMLET_SCENE) + def genblock(seed, length, step=1024, generator=random): """length-byte stream of random data from a seed (in step-byte blocks).""" diff --git a/Modules/clinic/zlibmodule.c.h b/Modules/clinic/zlibmodule.c.h index 2d75bc912ec..602ae1df827 100644 --- a/Modules/clinic/zlibmodule.c.h +++ b/Modules/clinic/zlibmodule.c.h @@ -48,7 +48,7 @@ PyDoc_STRVAR(zlib_decompress__doc__, " data\n" " Compressed data.\n" " wbits\n" -" The window buffer size.\n" +" The window buffer size and container format.\n" " bufsize\n" " The initial output buffer size."); @@ -95,7 +95,10 @@ PyDoc_STRVAR(zlib_compressobj__doc__, " method\n" " The compression algorithm. If given, this must be DEFLATED.\n" " wbits\n" -" The base two logarithm of the window size (range: 8..15).\n" +" +9 to +15: The base-two logarithm of the window size. Include a zlib\n" +" container.\n" +" -9 to -15: Generate a raw stream.\n" +" +25 to +31: Include a gzip container.\n" " memLevel\n" " Controls the amount of memory used for internal compression state.\n" " Valid values range from 1 to 9. Higher values result in higher memory\n" @@ -146,7 +149,7 @@ PyDoc_STRVAR(zlib_decompressobj__doc__, "Return a decompressor object.\n" "\n" " wbits\n" -" The window buffer size.\n" +" The window buffer size and container format.\n" " zdict\n" " The predefined compression dictionary. This must be the same\n" " dictionary as used by the compressor that produced the input data."); @@ -439,4 +442,4 @@ exit: #ifndef ZLIB_COMPRESS_COPY_METHODDEF #define ZLIB_COMPRESS_COPY_METHODDEF #endif /* !defined(ZLIB_COMPRESS_COPY_METHODDEF) */ -/*[clinic end generated code: output=cf81e1deae3af0ce input=a9049054013a1b77]*/ +/*[clinic end generated code: output=f31627b314a7bd2f input=a9049054013a1b77]*/ diff --git a/Modules/zlibmodule.c b/Modules/zlibmodule.c index eb627287826..02c747ee822 100644 --- a/Modules/zlibmodule.c +++ b/Modules/zlibmodule.c @@ -271,7 +271,7 @@ zlib.decompress data: Py_buffer Compressed data. wbits: int(c_default="MAX_WBITS") = MAX_WBITS - The window buffer size. + The window buffer size and container format. bufsize: capped_uint(c_default="DEF_BUF_SIZE") = DEF_BUF_SIZE The initial output buffer size. / @@ -282,7 +282,7 @@ Returns a bytes object containing the uncompressed data. static PyObject * zlib_decompress_impl(PyModuleDef *module, Py_buffer *data, int wbits, unsigned int bufsize) -/*[clinic end generated code: output=444d0987f3429574 input=da095118b3243b27]*/ +/*[clinic end generated code: output=444d0987f3429574 input=75123b0d4ff0541d]*/ { PyObject *result_str = NULL; Byte *input; @@ -396,7 +396,10 @@ zlib.compressobj method: int(c_default="DEFLATED") = DEFLATED The compression algorithm. If given, this must be DEFLATED. wbits: int(c_default="MAX_WBITS") = MAX_WBITS - The base two logarithm of the window size (range: 8..15). + +9 to +15: The base-two logarithm of the window size. Include a zlib + container. + -9 to -15: Generate a raw stream. + +25 to +31: Include a gzip container. memLevel: int(c_default="DEF_MEM_LEVEL") = DEF_MEM_LEVEL Controls the amount of memory used for internal compression state. Valid values range from 1 to 9. Higher values result in higher memory @@ -414,7 +417,7 @@ Return a compressor object. static PyObject * zlib_compressobj_impl(PyModuleDef *module, int level, int method, int wbits, int memLevel, int strategy, Py_buffer *zdict) -/*[clinic end generated code: output=2949bbb9a5723ccd input=de2ffab6e910cd8b]*/ +/*[clinic end generated code: output=2949bbb9a5723ccd input=2fa3d026f90ab8d5]*/ { compobject *self = NULL; int err; @@ -475,7 +478,7 @@ zlib_compressobj_impl(PyModuleDef *module, int level, int method, int wbits, zlib.decompressobj wbits: int(c_default="MAX_WBITS") = MAX_WBITS - The window buffer size. + The window buffer size and container format. zdict: object(c_default="NULL") = b'' The predefined compression dictionary. This must be the same dictionary as used by the compressor that produced the input data. @@ -485,7 +488,7 @@ Return a decompressor object. static PyObject * zlib_decompressobj_impl(PyModuleDef *module, int wbits, PyObject *zdict) -/*[clinic end generated code: output=8ccd583fbd631798 input=67f05145a6920127]*/ +/*[clinic end generated code: output=8ccd583fbd631798 input=d3832b8511fc977b]*/ { int err; compobject *self; @@ -1329,7 +1332,7 @@ PyDoc_STRVAR(zlib_module_documentation, "decompress(string,[wbits],[bufsize]) -- Decompresses a compressed string.\n" "decompressobj([wbits[, zdict]]]) -- Return a decompressor object.\n" "\n" -"'wbits' is window buffer size.\n" +"'wbits' is window buffer size and container format.\n" "Compressor objects support compress() and flush() methods; decompressor\n" "objects support decompress() and flush().");