mirror of https://github.com/python/cpython
gh-95534: Improve gzip reading speed by 10% (#97664)
Change summary: + There is now a `gzip.READ_BUFFER_SIZE` constant that is 128KB. Other programs that read in 128KB chunks: pigz and cat. So this seems best practice among good programs. Also it is faster than 8 kb chunks. + a zlib._ZlibDecompressor was added. This is the _bz2.BZ2Decompressor ported to zlib. Since the zlib.Decompress object is better for in-memory decompression, the _ZlibDecompressor is hidden. It only makes sense in file decompression, and that is already implemented now in the gzip library. No need to bother the users with this. + The ZlibDecompressor uses the older Cpython arrange_output_buffer functions, as those are faster and more appropriate for the use case. + GzipFile.read has been optimized. There is no longer a `unconsumed_tail` member to write back to padded file. This is instead handled by the ZlibDecompressor itself, which has an internal buffer. `_add_read_data` has been inlined, as it was just two calls. EDIT: While I am adding improvements anyway, I figured I could add another one-liner optimization now to the python -m gzip application. That read chunks in io.DEFAULT_BUFFER_SIZE previously, but has been updated now to use READ_BUFFER_SIZE chunks.
This commit is contained in:
parent
bb38b39b33
commit
eae7dad402
24
Lib/gzip.py
24
Lib/gzip.py
|
@ -21,6 +21,8 @@ _COMPRESS_LEVEL_FAST = 1
|
||||||
_COMPRESS_LEVEL_TRADEOFF = 6
|
_COMPRESS_LEVEL_TRADEOFF = 6
|
||||||
_COMPRESS_LEVEL_BEST = 9
|
_COMPRESS_LEVEL_BEST = 9
|
||||||
|
|
||||||
|
READ_BUFFER_SIZE = 128 * 1024
|
||||||
|
|
||||||
|
|
||||||
def open(filename, mode="rb", compresslevel=_COMPRESS_LEVEL_BEST,
|
def open(filename, mode="rb", compresslevel=_COMPRESS_LEVEL_BEST,
|
||||||
encoding=None, errors=None, newline=None):
|
encoding=None, errors=None, newline=None):
|
||||||
|
@ -446,7 +448,7 @@ def _read_gzip_header(fp):
|
||||||
|
|
||||||
class _GzipReader(_compression.DecompressReader):
|
class _GzipReader(_compression.DecompressReader):
|
||||||
def __init__(self, fp):
|
def __init__(self, fp):
|
||||||
super().__init__(_PaddedFile(fp), zlib.decompressobj,
|
super().__init__(_PaddedFile(fp), zlib._ZlibDecompressor,
|
||||||
wbits=-zlib.MAX_WBITS)
|
wbits=-zlib.MAX_WBITS)
|
||||||
# Set flag indicating start of a new member
|
# Set flag indicating start of a new member
|
||||||
self._new_member = True
|
self._new_member = True
|
||||||
|
@ -494,12 +496,13 @@ class _GzipReader(_compression.DecompressReader):
|
||||||
self._new_member = False
|
self._new_member = False
|
||||||
|
|
||||||
# Read a chunk of data from the file
|
# Read a chunk of data from the file
|
||||||
buf = self._fp.read(io.DEFAULT_BUFFER_SIZE)
|
if self._decompressor.needs_input:
|
||||||
|
buf = self._fp.read(READ_BUFFER_SIZE)
|
||||||
|
uncompress = self._decompressor.decompress(buf, size)
|
||||||
|
else:
|
||||||
|
uncompress = self._decompressor.decompress(b"", size)
|
||||||
|
|
||||||
uncompress = self._decompressor.decompress(buf, size)
|
if self._decompressor.unused_data != b"":
|
||||||
if self._decompressor.unconsumed_tail != b"":
|
|
||||||
self._fp.prepend(self._decompressor.unconsumed_tail)
|
|
||||||
elif self._decompressor.unused_data != b"":
|
|
||||||
# Prepend the already read bytes to the fileobj so they can
|
# Prepend the already read bytes to the fileobj so they can
|
||||||
# be seen by _read_eof() and _read_gzip_header()
|
# be seen by _read_eof() and _read_gzip_header()
|
||||||
self._fp.prepend(self._decompressor.unused_data)
|
self._fp.prepend(self._decompressor.unused_data)
|
||||||
|
@ -510,14 +513,11 @@ class _GzipReader(_compression.DecompressReader):
|
||||||
raise EOFError("Compressed file ended before the "
|
raise EOFError("Compressed file ended before the "
|
||||||
"end-of-stream marker was reached")
|
"end-of-stream marker was reached")
|
||||||
|
|
||||||
self._add_read_data( uncompress )
|
self._crc = zlib.crc32(uncompress, self._crc)
|
||||||
|
self._stream_size += len(uncompress)
|
||||||
self._pos += len(uncompress)
|
self._pos += len(uncompress)
|
||||||
return uncompress
|
return uncompress
|
||||||
|
|
||||||
def _add_read_data(self, data):
|
|
||||||
self._crc = zlib.crc32(data, self._crc)
|
|
||||||
self._stream_size = self._stream_size + len(data)
|
|
||||||
|
|
||||||
def _read_eof(self):
|
def _read_eof(self):
|
||||||
# We've read to the end of the file
|
# We've read to the end of the file
|
||||||
# We check that the computed CRC and size of the
|
# We check that the computed CRC and size of the
|
||||||
|
@ -647,7 +647,7 @@ def main():
|
||||||
f = builtins.open(arg, "rb")
|
f = builtins.open(arg, "rb")
|
||||||
g = open(arg + ".gz", "wb")
|
g = open(arg + ".gz", "wb")
|
||||||
while True:
|
while True:
|
||||||
chunk = f.read(io.DEFAULT_BUFFER_SIZE)
|
chunk = f.read(READ_BUFFER_SIZE)
|
||||||
if not chunk:
|
if not chunk:
|
||||||
break
|
break
|
||||||
g.write(chunk)
|
g.write(chunk)
|
||||||
|
|
|
@ -944,6 +944,173 @@ LAERTES
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class ZlibDecompressorTest():
|
||||||
|
# Test adopted from test_bz2.py
|
||||||
|
TEXT = HAMLET_SCENE
|
||||||
|
DATA = zlib.compress(HAMLET_SCENE)
|
||||||
|
BAD_DATA = b"Not a valid deflate block"
|
||||||
|
def test_Constructor(self):
|
||||||
|
self.assertRaises(TypeError, zlib._ZlibDecompressor, 42)
|
||||||
|
|
||||||
|
def testDecompress(self):
|
||||||
|
zlibd = zlib._ZlibDecompressor()
|
||||||
|
self.assertRaises(TypeError, zlibd.decompress)
|
||||||
|
text = zlibd.decompress(self.DATA)
|
||||||
|
self.assertEqual(text, self.TEXT)
|
||||||
|
|
||||||
|
def testDecompressChunks10(self):
|
||||||
|
zlibd = zlib._ZlibDecompressor()
|
||||||
|
text = b''
|
||||||
|
n = 0
|
||||||
|
while True:
|
||||||
|
str = self.DATA[n*10:(n+1)*10]
|
||||||
|
if not str:
|
||||||
|
break
|
||||||
|
text += zlibd.decompress(str)
|
||||||
|
n += 1
|
||||||
|
self.assertEqual(text, self.TEXT)
|
||||||
|
|
||||||
|
def testDecompressUnusedData(self):
|
||||||
|
zlibd = zlib._ZlibDecompressor()
|
||||||
|
unused_data = b"this is unused data"
|
||||||
|
text = zlibd.decompress(self.DATA+unused_data)
|
||||||
|
self.assertEqual(text, self.TEXT)
|
||||||
|
self.assertEqual(zlibd.unused_data, unused_data)
|
||||||
|
|
||||||
|
def testEOFError(self):
|
||||||
|
zlibd = zlib._ZlibDecompressor()
|
||||||
|
text = zlibd.decompress(self.DATA)
|
||||||
|
self.assertRaises(EOFError, zlibd.decompress, b"anything")
|
||||||
|
self.assertRaises(EOFError, zlibd.decompress, b"")
|
||||||
|
|
||||||
|
@support.skip_if_pgo_task
|
||||||
|
@bigmemtest(size=_4G + 100, memuse=3.3)
|
||||||
|
def testDecompress4G(self, size):
|
||||||
|
# "Test zlib._ZlibDecompressor.decompress() with >4GiB input"
|
||||||
|
blocksize = 10 * 1024 * 1024
|
||||||
|
block = random.randbytes(blocksize)
|
||||||
|
try:
|
||||||
|
data = block * (size // blocksize + 1)
|
||||||
|
compressed = zlib.compress(data)
|
||||||
|
zlibd = zlib._ZlibDecompressor()
|
||||||
|
decompressed = zlibd.decompress(compressed)
|
||||||
|
self.assertTrue(decompressed == data)
|
||||||
|
finally:
|
||||||
|
data = None
|
||||||
|
compressed = None
|
||||||
|
decompressed = None
|
||||||
|
|
||||||
|
def testPickle(self):
|
||||||
|
for proto in range(pickle.HIGHEST_PROTOCOL + 1):
|
||||||
|
with self.assertRaises(TypeError):
|
||||||
|
pickle.dumps(zlib._ZlibDecompressor(), proto)
|
||||||
|
|
||||||
|
def testDecompressorChunksMaxsize(self):
|
||||||
|
zlibd = zlib._ZlibDecompressor()
|
||||||
|
max_length = 100
|
||||||
|
out = []
|
||||||
|
|
||||||
|
# Feed some input
|
||||||
|
len_ = len(self.BIG_DATA) - 64
|
||||||
|
out.append(zlibd.decompress(self.BIG_DATA[:len_],
|
||||||
|
max_length=max_length))
|
||||||
|
self.assertFalse(zlibd.needs_input)
|
||||||
|
self.assertEqual(len(out[-1]), max_length)
|
||||||
|
|
||||||
|
# Retrieve more data without providing more input
|
||||||
|
out.append(zlibd.decompress(b'', max_length=max_length))
|
||||||
|
self.assertFalse(zlibd.needs_input)
|
||||||
|
self.assertEqual(len(out[-1]), max_length)
|
||||||
|
|
||||||
|
# Retrieve more data while providing more input
|
||||||
|
out.append(zlibd.decompress(self.BIG_DATA[len_:],
|
||||||
|
max_length=max_length))
|
||||||
|
self.assertLessEqual(len(out[-1]), max_length)
|
||||||
|
|
||||||
|
# Retrieve remaining uncompressed data
|
||||||
|
while not zlibd.eof:
|
||||||
|
out.append(zlibd.decompress(b'', max_length=max_length))
|
||||||
|
self.assertLessEqual(len(out[-1]), max_length)
|
||||||
|
|
||||||
|
out = b"".join(out)
|
||||||
|
self.assertEqual(out, self.BIG_TEXT)
|
||||||
|
self.assertEqual(zlibd.unused_data, b"")
|
||||||
|
|
||||||
|
def test_decompressor_inputbuf_1(self):
|
||||||
|
# Test reusing input buffer after moving existing
|
||||||
|
# contents to beginning
|
||||||
|
zlibd = zlib._ZlibDecompressor()
|
||||||
|
out = []
|
||||||
|
|
||||||
|
# Create input buffer and fill it
|
||||||
|
self.assertEqual(zlibd.decompress(self.DATA[:100],
|
||||||
|
max_length=0), b'')
|
||||||
|
|
||||||
|
# Retrieve some results, freeing capacity at beginning
|
||||||
|
# of input buffer
|
||||||
|
out.append(zlibd.decompress(b'', 2))
|
||||||
|
|
||||||
|
# Add more data that fits into input buffer after
|
||||||
|
# moving existing data to beginning
|
||||||
|
out.append(zlibd.decompress(self.DATA[100:105], 15))
|
||||||
|
|
||||||
|
# Decompress rest of data
|
||||||
|
out.append(zlibd.decompress(self.DATA[105:]))
|
||||||
|
self.assertEqual(b''.join(out), self.TEXT)
|
||||||
|
|
||||||
|
def test_decompressor_inputbuf_2(self):
|
||||||
|
# Test reusing input buffer by appending data at the
|
||||||
|
# end right away
|
||||||
|
zlibd = zlib._ZlibDecompressor()
|
||||||
|
out = []
|
||||||
|
|
||||||
|
# Create input buffer and empty it
|
||||||
|
self.assertEqual(zlibd.decompress(self.DATA[:200],
|
||||||
|
max_length=0), b'')
|
||||||
|
out.append(zlibd.decompress(b''))
|
||||||
|
|
||||||
|
# Fill buffer with new data
|
||||||
|
out.append(zlibd.decompress(self.DATA[200:280], 2))
|
||||||
|
|
||||||
|
# Append some more data, not enough to require resize
|
||||||
|
out.append(zlibd.decompress(self.DATA[280:300], 2))
|
||||||
|
|
||||||
|
# Decompress rest of data
|
||||||
|
out.append(zlibd.decompress(self.DATA[300:]))
|
||||||
|
self.assertEqual(b''.join(out), self.TEXT)
|
||||||
|
|
||||||
|
def test_decompressor_inputbuf_3(self):
|
||||||
|
# Test reusing input buffer after extending it
|
||||||
|
|
||||||
|
zlibd = zlib._ZlibDecompressor()
|
||||||
|
out = []
|
||||||
|
|
||||||
|
# Create almost full input buffer
|
||||||
|
out.append(zlibd.decompress(self.DATA[:200], 5))
|
||||||
|
|
||||||
|
# Add even more data to it, requiring resize
|
||||||
|
out.append(zlibd.decompress(self.DATA[200:300], 5))
|
||||||
|
|
||||||
|
# Decompress rest of data
|
||||||
|
out.append(zlibd.decompress(self.DATA[300:]))
|
||||||
|
self.assertEqual(b''.join(out), self.TEXT)
|
||||||
|
|
||||||
|
def test_failure(self):
|
||||||
|
zlibd = zlib._ZlibDecompressor()
|
||||||
|
self.assertRaises(Exception, zlibd.decompress, self.BAD_DATA * 30)
|
||||||
|
# Previously, a second call could crash due to internal inconsistency
|
||||||
|
self.assertRaises(Exception, zlibd.decompress, self.BAD_DATA * 30)
|
||||||
|
|
||||||
|
@support.refcount_test
|
||||||
|
def test_refleaks_in___init__(self):
|
||||||
|
gettotalrefcount = support.get_attribute(sys, 'gettotalrefcount')
|
||||||
|
zlibd = zlib._ZlibDecompressor()
|
||||||
|
refs_before = gettotalrefcount()
|
||||||
|
for i in range(100):
|
||||||
|
zlibd.__init__()
|
||||||
|
self.assertAlmostEqual(gettotalrefcount() - refs_before, 0, delta=10)
|
||||||
|
|
||||||
|
|
||||||
class CustomInt:
|
class CustomInt:
|
||||||
def __index__(self):
|
def __index__(self):
|
||||||
return 100
|
return 100
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
:meth:`gzip.GzipFile.read` reads 10% faster.
|
|
@ -897,6 +897,104 @@ exit:
|
||||||
return return_value;
|
return return_value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PyDoc_STRVAR(zlib_ZlibDecompressor_decompress__doc__,
|
||||||
|
"decompress($self, /, data, max_length=-1)\n"
|
||||||
|
"--\n"
|
||||||
|
"\n"
|
||||||
|
"Decompress *data*, returning uncompressed data as bytes.\n"
|
||||||
|
"\n"
|
||||||
|
"If *max_length* is nonnegative, returns at most *max_length* bytes of\n"
|
||||||
|
"decompressed data. If this limit is reached and further output can be\n"
|
||||||
|
"produced, *self.needs_input* will be set to ``False``. In this case, the next\n"
|
||||||
|
"call to *decompress()* may provide *data* as b\'\' to obtain more of the output.\n"
|
||||||
|
"\n"
|
||||||
|
"If all of the input data was decompressed and returned (either because this\n"
|
||||||
|
"was less than *max_length* bytes, or because *max_length* was negative),\n"
|
||||||
|
"*self.needs_input* will be set to True.\n"
|
||||||
|
"\n"
|
||||||
|
"Attempting to decompress data after the end of stream is reached raises an\n"
|
||||||
|
"EOFError. Any data found after the end of the stream is ignored and saved in\n"
|
||||||
|
"the unused_data attribute.");
|
||||||
|
|
||||||
|
#define ZLIB_ZLIBDECOMPRESSOR_DECOMPRESS_METHODDEF \
|
||||||
|
{"decompress", _PyCFunction_CAST(zlib_ZlibDecompressor_decompress), METH_FASTCALL|METH_KEYWORDS, zlib_ZlibDecompressor_decompress__doc__},
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
zlib_ZlibDecompressor_decompress_impl(ZlibDecompressor *self,
|
||||||
|
Py_buffer *data, Py_ssize_t max_length);
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
zlib_ZlibDecompressor_decompress(ZlibDecompressor *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
|
||||||
|
{
|
||||||
|
PyObject *return_value = NULL;
|
||||||
|
#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
|
||||||
|
|
||||||
|
#define NUM_KEYWORDS 2
|
||||||
|
static struct {
|
||||||
|
PyGC_Head _this_is_not_used;
|
||||||
|
PyObject_VAR_HEAD
|
||||||
|
PyObject *ob_item[NUM_KEYWORDS];
|
||||||
|
} _kwtuple = {
|
||||||
|
.ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
|
||||||
|
.ob_item = { &_Py_ID(data), &_Py_ID(max_length), },
|
||||||
|
};
|
||||||
|
#undef NUM_KEYWORDS
|
||||||
|
#define KWTUPLE (&_kwtuple.ob_base.ob_base)
|
||||||
|
|
||||||
|
#else // !Py_BUILD_CORE
|
||||||
|
# define KWTUPLE NULL
|
||||||
|
#endif // !Py_BUILD_CORE
|
||||||
|
|
||||||
|
static const char * const _keywords[] = {"data", "max_length", NULL};
|
||||||
|
static _PyArg_Parser _parser = {
|
||||||
|
.keywords = _keywords,
|
||||||
|
.fname = "decompress",
|
||||||
|
.kwtuple = KWTUPLE,
|
||||||
|
};
|
||||||
|
#undef KWTUPLE
|
||||||
|
PyObject *argsbuf[2];
|
||||||
|
Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1;
|
||||||
|
Py_buffer data = {NULL, NULL};
|
||||||
|
Py_ssize_t max_length = -1;
|
||||||
|
|
||||||
|
args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 2, 0, argsbuf);
|
||||||
|
if (!args) {
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
if (PyObject_GetBuffer(args[0], &data, PyBUF_SIMPLE) != 0) {
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
if (!PyBuffer_IsContiguous(&data, 'C')) {
|
||||||
|
_PyArg_BadArgument("decompress", "argument 'data'", "contiguous buffer", args[0]);
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
if (!noptargs) {
|
||||||
|
goto skip_optional_pos;
|
||||||
|
}
|
||||||
|
{
|
||||||
|
Py_ssize_t ival = -1;
|
||||||
|
PyObject *iobj = _PyNumber_Index(args[1]);
|
||||||
|
if (iobj != NULL) {
|
||||||
|
ival = PyLong_AsSsize_t(iobj);
|
||||||
|
Py_DECREF(iobj);
|
||||||
|
}
|
||||||
|
if (ival == -1 && PyErr_Occurred()) {
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
max_length = ival;
|
||||||
|
}
|
||||||
|
skip_optional_pos:
|
||||||
|
return_value = zlib_ZlibDecompressor_decompress_impl(self, &data, max_length);
|
||||||
|
|
||||||
|
exit:
|
||||||
|
/* Cleanup for data */
|
||||||
|
if (data.obj) {
|
||||||
|
PyBuffer_Release(&data);
|
||||||
|
}
|
||||||
|
|
||||||
|
return return_value;
|
||||||
|
}
|
||||||
|
|
||||||
PyDoc_STRVAR(zlib_adler32__doc__,
|
PyDoc_STRVAR(zlib_adler32__doc__,
|
||||||
"adler32($module, data, value=1, /)\n"
|
"adler32($module, data, value=1, /)\n"
|
||||||
"--\n"
|
"--\n"
|
||||||
|
@ -1031,4 +1129,4 @@ exit:
|
||||||
#ifndef ZLIB_DECOMPRESS___DEEPCOPY___METHODDEF
|
#ifndef ZLIB_DECOMPRESS___DEEPCOPY___METHODDEF
|
||||||
#define ZLIB_DECOMPRESS___DEEPCOPY___METHODDEF
|
#define ZLIB_DECOMPRESS___DEEPCOPY___METHODDEF
|
||||||
#endif /* !defined(ZLIB_DECOMPRESS___DEEPCOPY___METHODDEF) */
|
#endif /* !defined(ZLIB_DECOMPRESS___DEEPCOPY___METHODDEF) */
|
||||||
/*[clinic end generated code: output=9e5f9911d0c273e1 input=a9049054013a1b77]*/
|
/*[clinic end generated code: output=57ff7b511ab23132 input=a9049054013a1b77]*/
|
||||||
|
|
|
@ -8,6 +8,11 @@
|
||||||
#include "Python.h"
|
#include "Python.h"
|
||||||
#include "structmember.h" // PyMemberDef
|
#include "structmember.h" // PyMemberDef
|
||||||
#include "zlib.h"
|
#include "zlib.h"
|
||||||
|
#include "stdbool.h"
|
||||||
|
|
||||||
|
#if defined(ZLIB_VERNUM) && ZLIB_VERNUM < 0x1221
|
||||||
|
#error "At least zlib version 1.2.2.1 is required"
|
||||||
|
#endif
|
||||||
|
|
||||||
// Blocks output buffer wrappers
|
// Blocks output buffer wrappers
|
||||||
#include "pycore_blocks_output_buffer.h"
|
#include "pycore_blocks_output_buffer.h"
|
||||||
|
@ -171,9 +176,6 @@ OutputBuffer_WindowOnError(_BlocksOutputBuffer *buffer, _Uint32Window *window)
|
||||||
} } while (0)
|
} } while (0)
|
||||||
#define LEAVE_ZLIB(obj) PyThread_release_lock((obj)->lock);
|
#define LEAVE_ZLIB(obj) PyThread_release_lock((obj)->lock);
|
||||||
|
|
||||||
#if defined(ZLIB_VERNUM) && ZLIB_VERNUM >= 0x1221
|
|
||||||
# define AT_LEAST_ZLIB_1_2_2_1
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* The following parameters are copied from zutil.h, version 0.95 */
|
/* The following parameters are copied from zutil.h, version 0.95 */
|
||||||
#define DEFLATED 8
|
#define DEFLATED 8
|
||||||
|
@ -185,12 +187,14 @@ OutputBuffer_WindowOnError(_BlocksOutputBuffer *buffer, _Uint32Window *window)
|
||||||
|
|
||||||
/* Initial buffer size. */
|
/* Initial buffer size. */
|
||||||
#define DEF_BUF_SIZE (16*1024)
|
#define DEF_BUF_SIZE (16*1024)
|
||||||
|
#define DEF_MAX_INITIAL_BUF_SIZE (16 * 1024 * 1024)
|
||||||
|
|
||||||
static PyModuleDef zlibmodule;
|
static PyModuleDef zlibmodule;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
PyTypeObject *Comptype;
|
PyTypeObject *Comptype;
|
||||||
PyTypeObject *Decomptype;
|
PyTypeObject *Decomptype;
|
||||||
|
PyTypeObject *ZlibDecompressorType;
|
||||||
PyObject *ZlibError;
|
PyObject *ZlibError;
|
||||||
} zlibstate;
|
} zlibstate;
|
||||||
|
|
||||||
|
@ -209,7 +213,7 @@ typedef struct
|
||||||
PyObject *unused_data;
|
PyObject *unused_data;
|
||||||
PyObject *unconsumed_tail;
|
PyObject *unconsumed_tail;
|
||||||
char eof;
|
char eof;
|
||||||
int is_initialised;
|
bool is_initialised;
|
||||||
PyObject *zdict;
|
PyObject *zdict;
|
||||||
PyThread_type_lock lock;
|
PyThread_type_lock lock;
|
||||||
} compobject;
|
} compobject;
|
||||||
|
@ -320,7 +324,7 @@ static PyObject *
|
||||||
zlib_compress_impl(PyObject *module, Py_buffer *data, int level, int wbits)
|
zlib_compress_impl(PyObject *module, Py_buffer *data, int level, int wbits)
|
||||||
/*[clinic end generated code: output=46bd152fadd66df2 input=c4d06ee5782a7e3f]*/
|
/*[clinic end generated code: output=46bd152fadd66df2 input=c4d06ee5782a7e3f]*/
|
||||||
{
|
{
|
||||||
PyObject *RetVal;
|
PyObject *return_value;
|
||||||
int flush;
|
int flush;
|
||||||
z_stream zst;
|
z_stream zst;
|
||||||
_BlocksOutputBuffer buffer = {.list = NULL};
|
_BlocksOutputBuffer buffer = {.list = NULL};
|
||||||
|
@ -387,11 +391,11 @@ zlib_compress_impl(PyObject *module, Py_buffer *data, int level, int wbits)
|
||||||
|
|
||||||
err = deflateEnd(&zst);
|
err = deflateEnd(&zst);
|
||||||
if (err == Z_OK) {
|
if (err == Z_OK) {
|
||||||
RetVal = OutputBuffer_Finish(&buffer, zst.avail_out);
|
return_value = OutputBuffer_Finish(&buffer, zst.avail_out);
|
||||||
if (RetVal == NULL) {
|
if (return_value == NULL) {
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
return RetVal;
|
return return_value;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
zlib_error(state, zst, err, "while finishing compression");
|
zlib_error(state, zst, err, "while finishing compression");
|
||||||
|
@ -419,7 +423,7 @@ zlib_decompress_impl(PyObject *module, Py_buffer *data, int wbits,
|
||||||
Py_ssize_t bufsize)
|
Py_ssize_t bufsize)
|
||||||
/*[clinic end generated code: output=77c7e35111dc8c42 input=a9ac17beff1f893f]*/
|
/*[clinic end generated code: output=77c7e35111dc8c42 input=a9ac17beff1f893f]*/
|
||||||
{
|
{
|
||||||
PyObject *RetVal;
|
PyObject *return_value;
|
||||||
Byte *ibuf;
|
Byte *ibuf;
|
||||||
Py_ssize_t ibuflen;
|
Py_ssize_t ibuflen;
|
||||||
int err, flush;
|
int err, flush;
|
||||||
|
@ -514,9 +518,9 @@ zlib_decompress_impl(PyObject *module, Py_buffer *data, int wbits,
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
RetVal = OutputBuffer_WindowFinish(&buffer, &window, zst.avail_out);
|
return_value = OutputBuffer_WindowFinish(&buffer, &window, zst.avail_out);
|
||||||
if (RetVal != NULL) {
|
if (return_value != NULL) {
|
||||||
return RetVal;
|
return return_value;
|
||||||
}
|
}
|
||||||
|
|
||||||
error:
|
error:
|
||||||
|
@ -675,18 +679,10 @@ zlib_decompressobj_impl(PyObject *module, int wbits, PyObject *zdict)
|
||||||
case Z_OK:
|
case Z_OK:
|
||||||
self->is_initialised = 1;
|
self->is_initialised = 1;
|
||||||
if (self->zdict != NULL && wbits < 0) {
|
if (self->zdict != NULL && wbits < 0) {
|
||||||
#ifdef AT_LEAST_ZLIB_1_2_2_1
|
|
||||||
if (set_inflate_zdict(state, self) < 0) {
|
if (set_inflate_zdict(state, self) < 0) {
|
||||||
Py_DECREF(self);
|
Py_DECREF(self);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
#else
|
|
||||||
PyErr_Format(state->ZlibError,
|
|
||||||
"zlib version %s does not allow raw inflate with dictionary",
|
|
||||||
ZLIB_VERSION);
|
|
||||||
Py_DECREF(self);
|
|
||||||
return NULL;
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
return (PyObject *)self;
|
return (PyObject *)self;
|
||||||
case Z_STREAM_ERROR:
|
case Z_STREAM_ERROR:
|
||||||
|
@ -753,7 +749,7 @@ zlib_Compress_compress_impl(compobject *self, PyTypeObject *cls,
|
||||||
Py_buffer *data)
|
Py_buffer *data)
|
||||||
/*[clinic end generated code: output=6731b3f0ff357ca6 input=04d00f65ab01d260]*/
|
/*[clinic end generated code: output=6731b3f0ff357ca6 input=04d00f65ab01d260]*/
|
||||||
{
|
{
|
||||||
PyObject *RetVal;
|
PyObject *return_value;
|
||||||
int err;
|
int err;
|
||||||
_BlocksOutputBuffer buffer = {.list = NULL};
|
_BlocksOutputBuffer buffer = {.list = NULL};
|
||||||
zlibstate *state = PyType_GetModuleState(cls);
|
zlibstate *state = PyType_GetModuleState(cls);
|
||||||
|
@ -791,17 +787,17 @@ zlib_Compress_compress_impl(compobject *self, PyTypeObject *cls,
|
||||||
|
|
||||||
} while (ibuflen != 0);
|
} while (ibuflen != 0);
|
||||||
|
|
||||||
RetVal = OutputBuffer_Finish(&buffer, self->zst.avail_out);
|
return_value = OutputBuffer_Finish(&buffer, self->zst.avail_out);
|
||||||
if (RetVal != NULL) {
|
if (return_value != NULL) {
|
||||||
goto success;
|
goto success;
|
||||||
}
|
}
|
||||||
|
|
||||||
error:
|
error:
|
||||||
OutputBuffer_OnError(&buffer);
|
OutputBuffer_OnError(&buffer);
|
||||||
RetVal = NULL;
|
return_value = NULL;
|
||||||
success:
|
success:
|
||||||
LEAVE_ZLIB(self);
|
LEAVE_ZLIB(self);
|
||||||
return RetVal;
|
return return_value;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Helper for objdecompress() and flush(). Saves any unconsumed input data in
|
/* Helper for objdecompress() and flush(). Saves any unconsumed input data in
|
||||||
|
@ -875,7 +871,7 @@ zlib_Decompress_decompress_impl(compobject *self, PyTypeObject *cls,
|
||||||
{
|
{
|
||||||
int err = Z_OK;
|
int err = Z_OK;
|
||||||
Py_ssize_t ibuflen;
|
Py_ssize_t ibuflen;
|
||||||
PyObject *RetVal;
|
PyObject *return_value;
|
||||||
_BlocksOutputBuffer buffer = {.list = NULL};
|
_BlocksOutputBuffer buffer = {.list = NULL};
|
||||||
|
|
||||||
PyObject *module = PyType_GetModule(cls);
|
PyObject *module = PyType_GetModule(cls);
|
||||||
|
@ -953,17 +949,17 @@ zlib_Decompress_decompress_impl(compobject *self, PyTypeObject *cls,
|
||||||
goto abort;
|
goto abort;
|
||||||
}
|
}
|
||||||
|
|
||||||
RetVal = OutputBuffer_Finish(&buffer, self->zst.avail_out);
|
return_value = OutputBuffer_Finish(&buffer, self->zst.avail_out);
|
||||||
if (RetVal != NULL) {
|
if (return_value != NULL) {
|
||||||
goto success;
|
goto success;
|
||||||
}
|
}
|
||||||
|
|
||||||
abort:
|
abort:
|
||||||
OutputBuffer_OnError(&buffer);
|
OutputBuffer_OnError(&buffer);
|
||||||
RetVal = NULL;
|
return_value = NULL;
|
||||||
success:
|
success:
|
||||||
LEAVE_ZLIB(self);
|
LEAVE_ZLIB(self);
|
||||||
return RetVal;
|
return return_value;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*[clinic input]
|
/*[clinic input]
|
||||||
|
@ -985,7 +981,7 @@ zlib_Compress_flush_impl(compobject *self, PyTypeObject *cls, int mode)
|
||||||
/*[clinic end generated code: output=c7efd13efd62add2 input=286146e29442eb6c]*/
|
/*[clinic end generated code: output=c7efd13efd62add2 input=286146e29442eb6c]*/
|
||||||
{
|
{
|
||||||
int err;
|
int err;
|
||||||
PyObject *RetVal;
|
PyObject *return_value;
|
||||||
_BlocksOutputBuffer buffer = {.list = NULL};
|
_BlocksOutputBuffer buffer = {.list = NULL};
|
||||||
|
|
||||||
zlibstate *state = PyType_GetModuleState(cls);
|
zlibstate *state = PyType_GetModuleState(cls);
|
||||||
|
@ -1042,17 +1038,17 @@ zlib_Compress_flush_impl(compobject *self, PyTypeObject *cls, int mode)
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
RetVal = OutputBuffer_Finish(&buffer, self->zst.avail_out);
|
return_value = OutputBuffer_Finish(&buffer, self->zst.avail_out);
|
||||||
if (RetVal != NULL) {
|
if (return_value != NULL) {
|
||||||
goto success;
|
goto success;
|
||||||
}
|
}
|
||||||
|
|
||||||
error:
|
error:
|
||||||
OutputBuffer_OnError(&buffer);
|
OutputBuffer_OnError(&buffer);
|
||||||
RetVal = NULL;
|
return_value = NULL;
|
||||||
success:
|
success:
|
||||||
LEAVE_ZLIB(self);
|
LEAVE_ZLIB(self);
|
||||||
return RetVal;
|
return return_value;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAVE_ZLIB_COPY
|
#ifdef HAVE_ZLIB_COPY
|
||||||
|
@ -1071,14 +1067,14 @@ zlib_Compress_copy_impl(compobject *self, PyTypeObject *cls)
|
||||||
{
|
{
|
||||||
zlibstate *state = PyType_GetModuleState(cls);
|
zlibstate *state = PyType_GetModuleState(cls);
|
||||||
|
|
||||||
compobject *retval = newcompobject(state->Comptype);
|
compobject *return_value = newcompobject(state->Comptype);
|
||||||
if (!retval) return NULL;
|
if (!return_value) return NULL;
|
||||||
|
|
||||||
/* Copy the zstream state
|
/* Copy the zstream state
|
||||||
* We use ENTER_ZLIB / LEAVE_ZLIB to make this thread-safe
|
* We use ENTER_ZLIB / LEAVE_ZLIB to make this thread-safe
|
||||||
*/
|
*/
|
||||||
ENTER_ZLIB(self);
|
ENTER_ZLIB(self);
|
||||||
int err = deflateCopy(&retval->zst, &self->zst);
|
int err = deflateCopy(&return_value->zst, &self->zst);
|
||||||
switch (err) {
|
switch (err) {
|
||||||
case Z_OK:
|
case Z_OK:
|
||||||
break;
|
break;
|
||||||
|
@ -1094,22 +1090,22 @@ zlib_Compress_copy_impl(compobject *self, PyTypeObject *cls)
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
Py_INCREF(self->unused_data);
|
Py_INCREF(self->unused_data);
|
||||||
Py_XSETREF(retval->unused_data, self->unused_data);
|
Py_XSETREF(return_value->unused_data, self->unused_data);
|
||||||
Py_INCREF(self->unconsumed_tail);
|
Py_INCREF(self->unconsumed_tail);
|
||||||
Py_XSETREF(retval->unconsumed_tail, self->unconsumed_tail);
|
Py_XSETREF(return_value->unconsumed_tail, self->unconsumed_tail);
|
||||||
Py_XINCREF(self->zdict);
|
Py_XINCREF(self->zdict);
|
||||||
Py_XSETREF(retval->zdict, self->zdict);
|
Py_XSETREF(return_value->zdict, self->zdict);
|
||||||
retval->eof = self->eof;
|
return_value->eof = self->eof;
|
||||||
|
|
||||||
/* Mark it as being initialized */
|
/* Mark it as being initialized */
|
||||||
retval->is_initialised = 1;
|
return_value->is_initialised = 1;
|
||||||
|
|
||||||
LEAVE_ZLIB(self);
|
LEAVE_ZLIB(self);
|
||||||
return (PyObject *)retval;
|
return (PyObject *)return_value;
|
||||||
|
|
||||||
error:
|
error:
|
||||||
LEAVE_ZLIB(self);
|
LEAVE_ZLIB(self);
|
||||||
Py_XDECREF(retval);
|
Py_XDECREF(return_value);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1158,14 +1154,14 @@ zlib_Decompress_copy_impl(compobject *self, PyTypeObject *cls)
|
||||||
{
|
{
|
||||||
zlibstate *state = PyType_GetModuleState(cls);
|
zlibstate *state = PyType_GetModuleState(cls);
|
||||||
|
|
||||||
compobject *retval = newcompobject(state->Decomptype);
|
compobject *return_value = newcompobject(state->Decomptype);
|
||||||
if (!retval) return NULL;
|
if (!return_value) return NULL;
|
||||||
|
|
||||||
/* Copy the zstream state
|
/* Copy the zstream state
|
||||||
* We use ENTER_ZLIB / LEAVE_ZLIB to make this thread-safe
|
* We use ENTER_ZLIB / LEAVE_ZLIB to make this thread-safe
|
||||||
*/
|
*/
|
||||||
ENTER_ZLIB(self);
|
ENTER_ZLIB(self);
|
||||||
int err = inflateCopy(&retval->zst, &self->zst);
|
int err = inflateCopy(&return_value->zst, &self->zst);
|
||||||
switch (err) {
|
switch (err) {
|
||||||
case Z_OK:
|
case Z_OK:
|
||||||
break;
|
break;
|
||||||
|
@ -1182,22 +1178,22 @@ zlib_Decompress_copy_impl(compobject *self, PyTypeObject *cls)
|
||||||
}
|
}
|
||||||
|
|
||||||
Py_INCREF(self->unused_data);
|
Py_INCREF(self->unused_data);
|
||||||
Py_XSETREF(retval->unused_data, self->unused_data);
|
Py_XSETREF(return_value->unused_data, self->unused_data);
|
||||||
Py_INCREF(self->unconsumed_tail);
|
Py_INCREF(self->unconsumed_tail);
|
||||||
Py_XSETREF(retval->unconsumed_tail, self->unconsumed_tail);
|
Py_XSETREF(return_value->unconsumed_tail, self->unconsumed_tail);
|
||||||
Py_XINCREF(self->zdict);
|
Py_XINCREF(self->zdict);
|
||||||
Py_XSETREF(retval->zdict, self->zdict);
|
Py_XSETREF(return_value->zdict, self->zdict);
|
||||||
retval->eof = self->eof;
|
return_value->eof = self->eof;
|
||||||
|
|
||||||
/* Mark it as being initialized */
|
/* Mark it as being initialized */
|
||||||
retval->is_initialised = 1;
|
return_value->is_initialised = 1;
|
||||||
|
|
||||||
LEAVE_ZLIB(self);
|
LEAVE_ZLIB(self);
|
||||||
return (PyObject *)retval;
|
return (PyObject *)return_value;
|
||||||
|
|
||||||
error:
|
error:
|
||||||
LEAVE_ZLIB(self);
|
LEAVE_ZLIB(self);
|
||||||
Py_XDECREF(retval);
|
Py_XDECREF(return_value);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1252,7 +1248,7 @@ zlib_Decompress_flush_impl(compobject *self, PyTypeObject *cls,
|
||||||
{
|
{
|
||||||
int err, flush;
|
int err, flush;
|
||||||
Py_buffer data;
|
Py_buffer data;
|
||||||
PyObject *RetVal;
|
PyObject *return_value;
|
||||||
Py_ssize_t ibuflen;
|
Py_ssize_t ibuflen;
|
||||||
_BlocksOutputBuffer buffer = {.list = NULL};
|
_BlocksOutputBuffer buffer = {.list = NULL};
|
||||||
_Uint32Window window; // output buffer's UINT32_MAX sliding window
|
_Uint32Window window; // output buffer's UINT32_MAX sliding window
|
||||||
|
@ -1306,13 +1302,6 @@ zlib_Decompress_flush_impl(compobject *self, PyTypeObject *cls,
|
||||||
case Z_STREAM_END:
|
case Z_STREAM_END:
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
if (err == Z_NEED_DICT && self->zdict != NULL) {
|
|
||||||
if (set_inflate_zdict(state, self) < 0) {
|
|
||||||
goto abort;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
goto save;
|
goto save;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1336,18 +1325,475 @@ zlib_Decompress_flush_impl(compobject *self, PyTypeObject *cls,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
RetVal = OutputBuffer_WindowFinish(&buffer, &window, self->zst.avail_out);
|
return_value = OutputBuffer_WindowFinish(&buffer, &window, self->zst.avail_out);
|
||||||
if (RetVal != NULL) {
|
if (return_value != NULL) {
|
||||||
goto success;
|
goto success;
|
||||||
}
|
}
|
||||||
|
|
||||||
abort:
|
abort:
|
||||||
OutputBuffer_WindowOnError(&buffer, &window);
|
OutputBuffer_WindowOnError(&buffer, &window);
|
||||||
RetVal = NULL;
|
return_value = NULL;
|
||||||
success:
|
success:
|
||||||
PyBuffer_Release(&data);
|
PyBuffer_Release(&data);
|
||||||
LEAVE_ZLIB(self);
|
LEAVE_ZLIB(self);
|
||||||
return RetVal;
|
return return_value;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
PyObject_HEAD
|
||||||
|
z_stream zst;
|
||||||
|
PyObject *zdict;
|
||||||
|
PyThread_type_lock lock;
|
||||||
|
PyObject *unused_data;
|
||||||
|
uint8_t *input_buffer;
|
||||||
|
Py_ssize_t input_buffer_size;
|
||||||
|
/* zst>avail_in is only 32 bit, so we store the true length
|
||||||
|
separately. Conversion and looping is encapsulated in
|
||||||
|
decompress_buf() */
|
||||||
|
Py_ssize_t avail_in_real;
|
||||||
|
bool is_initialised;
|
||||||
|
char eof; /* T_BOOL expects a char */
|
||||||
|
char needs_input;
|
||||||
|
} ZlibDecompressor;
|
||||||
|
|
||||||
|
/*[clinic input]
|
||||||
|
class zlib.ZlibDecompressor "ZlibDecompressor *" "&ZlibDecompressorType"
|
||||||
|
[clinic start generated code]*/
|
||||||
|
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=0658178ab94645df]*/
|
||||||
|
|
||||||
|
static void
|
||||||
|
ZlibDecompressor_dealloc(ZlibDecompressor *self)
|
||||||
|
{
|
||||||
|
PyObject *type = (PyObject *)Py_TYPE(self);
|
||||||
|
PyThread_free_lock(self->lock);
|
||||||
|
if (self->is_initialised) {
|
||||||
|
inflateEnd(&self->zst);
|
||||||
|
}
|
||||||
|
PyMem_Free(self->input_buffer);
|
||||||
|
Py_CLEAR(self->unused_data);
|
||||||
|
Py_CLEAR(self->zdict);
|
||||||
|
PyObject_Free(self);
|
||||||
|
Py_DECREF(type);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
set_inflate_zdict_ZlibDecompressor(zlibstate *state, ZlibDecompressor *self)
|
||||||
|
{
|
||||||
|
Py_buffer zdict_buf;
|
||||||
|
if (PyObject_GetBuffer(self->zdict, &zdict_buf, PyBUF_SIMPLE) == -1) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if ((size_t)zdict_buf.len > UINT_MAX) {
|
||||||
|
PyErr_SetString(PyExc_OverflowError,
|
||||||
|
"zdict length does not fit in an unsigned int");
|
||||||
|
PyBuffer_Release(&zdict_buf);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
int err;
|
||||||
|
err = inflateSetDictionary(&self->zst,
|
||||||
|
zdict_buf.buf, (unsigned int)zdict_buf.len);
|
||||||
|
PyBuffer_Release(&zdict_buf);
|
||||||
|
if (err != Z_OK) {
|
||||||
|
zlib_error(state, self->zst, err, "while setting zdict");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static Py_ssize_t
|
||||||
|
arrange_output_buffer_with_maximum(uint32_t *avail_out,
|
||||||
|
uint8_t **next_out,
|
||||||
|
PyObject **buffer,
|
||||||
|
Py_ssize_t length,
|
||||||
|
Py_ssize_t max_length)
|
||||||
|
{
|
||||||
|
Py_ssize_t occupied;
|
||||||
|
|
||||||
|
if (*buffer == NULL) {
|
||||||
|
if (!(*buffer = PyBytes_FromStringAndSize(NULL, length)))
|
||||||
|
return -1;
|
||||||
|
occupied = 0;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
occupied = *next_out - (uint8_t *)PyBytes_AS_STRING(*buffer);
|
||||||
|
|
||||||
|
if (length == occupied) {
|
||||||
|
Py_ssize_t new_length;
|
||||||
|
assert(length <= max_length);
|
||||||
|
/* can not scale the buffer over max_length */
|
||||||
|
if (length == max_length)
|
||||||
|
return -2;
|
||||||
|
if (length <= (max_length >> 1))
|
||||||
|
new_length = length << 1;
|
||||||
|
else
|
||||||
|
new_length = max_length;
|
||||||
|
if (_PyBytes_Resize(buffer, new_length) < 0)
|
||||||
|
return -1;
|
||||||
|
length = new_length;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
*avail_out = (uint32_t)Py_MIN((size_t)(length - occupied), UINT32_MAX);
|
||||||
|
*next_out = (uint8_t *)PyBytes_AS_STRING(*buffer) + occupied;
|
||||||
|
|
||||||
|
return length;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline Py_ssize_t
|
||||||
|
arrange_output_buffer(uint32_t *avail_out,
|
||||||
|
uint8_t **next_out,
|
||||||
|
PyObject **buffer,
|
||||||
|
Py_ssize_t length)
|
||||||
|
{
|
||||||
|
Py_ssize_t ret;
|
||||||
|
|
||||||
|
ret = arrange_output_buffer_with_maximum(avail_out, next_out, buffer,
|
||||||
|
length,
|
||||||
|
PY_SSIZE_T_MAX);
|
||||||
|
if (ret == -2)
|
||||||
|
PyErr_NoMemory();
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Decompress data of length self->avail_in_real in self->state.next_in. The
|
||||||
|
output buffer is allocated dynamically and returned. If the max_length is
|
||||||
|
of sufficiently low size, max_length is allocated immediately. At most
|
||||||
|
max_length bytes are returned, so some of the input may not be consumed.
|
||||||
|
self->state.next_in and self->avail_in_real are updated to reflect the
|
||||||
|
consumed input. */
|
||||||
|
static PyObject*
|
||||||
|
decompress_buf(ZlibDecompressor *self, Py_ssize_t max_length)
|
||||||
|
{
|
||||||
|
/* data_size is strictly positive, but because we repeatedly have to
|
||||||
|
compare against max_length and PyBytes_GET_SIZE we declare it as
|
||||||
|
signed */
|
||||||
|
PyObject *return_value = NULL;
|
||||||
|
Py_ssize_t hard_limit;
|
||||||
|
Py_ssize_t obuflen;
|
||||||
|
zlibstate *state = PyType_GetModuleState(Py_TYPE(self));
|
||||||
|
|
||||||
|
int err = Z_OK;
|
||||||
|
|
||||||
|
/* When sys.maxsize is passed as default use DEF_BUF_SIZE as start buffer.
|
||||||
|
In this particular case the data may not necessarily be very big, so
|
||||||
|
it is better to grow dynamically.*/
|
||||||
|
if ((max_length < 0) || max_length == PY_SSIZE_T_MAX) {
|
||||||
|
hard_limit = PY_SSIZE_T_MAX;
|
||||||
|
obuflen = DEF_BUF_SIZE;
|
||||||
|
} else {
|
||||||
|
/* Assume that decompressor is used in file decompression with a fixed
|
||||||
|
block size of max_length. In that case we will reach max_length almost
|
||||||
|
always (except at the end of the file). So it makes sense to allocate
|
||||||
|
max_length. */
|
||||||
|
hard_limit = max_length;
|
||||||
|
obuflen = max_length;
|
||||||
|
if (obuflen > DEF_MAX_INITIAL_BUF_SIZE){
|
||||||
|
// Safeguard against memory overflow.
|
||||||
|
obuflen = DEF_MAX_INITIAL_BUF_SIZE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
do {
|
||||||
|
arrange_input_buffer(&(self->zst), &(self->avail_in_real));
|
||||||
|
|
||||||
|
do {
|
||||||
|
obuflen = arrange_output_buffer_with_maximum(&(self->zst.avail_out),
|
||||||
|
&(self->zst.next_out),
|
||||||
|
&return_value,
|
||||||
|
obuflen,
|
||||||
|
hard_limit);
|
||||||
|
if (obuflen == -1){
|
||||||
|
PyErr_SetString(PyExc_MemoryError,
|
||||||
|
"Insufficient memory for buffer allocation");
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
else if (obuflen == -2) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
Py_BEGIN_ALLOW_THREADS
|
||||||
|
err = inflate(&self->zst, Z_SYNC_FLUSH);
|
||||||
|
Py_END_ALLOW_THREADS
|
||||||
|
switch (err) {
|
||||||
|
case Z_OK: /* fall through */
|
||||||
|
case Z_BUF_ERROR: /* fall through */
|
||||||
|
case Z_STREAM_END:
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
if (err == Z_NEED_DICT) {
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} while (self->zst.avail_out == 0);
|
||||||
|
} while(err != Z_STREAM_END && self->avail_in_real != 0);
|
||||||
|
|
||||||
|
if (err == Z_STREAM_END) {
|
||||||
|
self->eof = 1;
|
||||||
|
self->is_initialised = 0;
|
||||||
|
/* Unlike the Decompress object we call inflateEnd here as there are no
|
||||||
|
backwards compatibility issues */
|
||||||
|
err = inflateEnd(&self->zst);
|
||||||
|
if (err != Z_OK) {
|
||||||
|
zlib_error(state, self->zst, err, "while finishing decompression");
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
} else if (err != Z_OK && err != Z_BUF_ERROR) {
|
||||||
|
zlib_error(state, self->zst, err, "while decompressing data");
|
||||||
|
}
|
||||||
|
|
||||||
|
self->avail_in_real += self->zst.avail_in;
|
||||||
|
|
||||||
|
if (_PyBytes_Resize(&return_value, self->zst.next_out -
|
||||||
|
(uint8_t *)PyBytes_AS_STRING(return_value)) != 0) {
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
goto success;
|
||||||
|
error:
|
||||||
|
Py_CLEAR(return_value);
|
||||||
|
success:
|
||||||
|
return return_value;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
decompress(ZlibDecompressor *self, uint8_t *data,
|
||||||
|
size_t len, Py_ssize_t max_length)
|
||||||
|
{
|
||||||
|
bool input_buffer_in_use;
|
||||||
|
PyObject *result;
|
||||||
|
|
||||||
|
/* Prepend unconsumed input if necessary */
|
||||||
|
if (self->zst.next_in != NULL) {
|
||||||
|
size_t avail_now, avail_total;
|
||||||
|
|
||||||
|
/* Number of bytes we can append to input buffer */
|
||||||
|
avail_now = (self->input_buffer + self->input_buffer_size)
|
||||||
|
- (self->zst.next_in + self->avail_in_real);
|
||||||
|
|
||||||
|
/* Number of bytes we can append if we move existing
|
||||||
|
contents to beginning of buffer (overwriting
|
||||||
|
consumed input) */
|
||||||
|
avail_total = self->input_buffer_size - self->avail_in_real;
|
||||||
|
|
||||||
|
if (avail_total < len) {
|
||||||
|
size_t offset = self->zst.next_in - self->input_buffer;
|
||||||
|
uint8_t *tmp;
|
||||||
|
size_t new_size = self->input_buffer_size + len - avail_now;
|
||||||
|
|
||||||
|
/* Assign to temporary variable first, so we don't
|
||||||
|
lose address of allocated buffer if realloc fails */
|
||||||
|
tmp = PyMem_Realloc(self->input_buffer, new_size);
|
||||||
|
if (tmp == NULL) {
|
||||||
|
PyErr_SetNone(PyExc_MemoryError);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
self->input_buffer = tmp;
|
||||||
|
self->input_buffer_size = new_size;
|
||||||
|
|
||||||
|
self->zst.next_in = self->input_buffer + offset;
|
||||||
|
}
|
||||||
|
else if (avail_now < len) {
|
||||||
|
memmove(self->input_buffer, self->zst.next_in,
|
||||||
|
self->avail_in_real);
|
||||||
|
self->zst.next_in = self->input_buffer;
|
||||||
|
}
|
||||||
|
memcpy((void*)(self->zst.next_in + self->avail_in_real), data, len);
|
||||||
|
self->avail_in_real += len;
|
||||||
|
input_buffer_in_use = 1;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
self->zst.next_in = data;
|
||||||
|
self->avail_in_real = len;
|
||||||
|
input_buffer_in_use = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
result = decompress_buf(self, max_length);
|
||||||
|
if(result == NULL) {
|
||||||
|
self->zst.next_in = NULL;
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (self->eof) {
|
||||||
|
self->needs_input = 0;
|
||||||
|
|
||||||
|
if (self->avail_in_real > 0) {
|
||||||
|
PyObject *unused_data = PyBytes_FromStringAndSize(
|
||||||
|
(char *)self->zst.next_in, self->avail_in_real);
|
||||||
|
if (unused_data == NULL) {
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
Py_XSETREF(self->unused_data, unused_data);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (self->avail_in_real == 0) {
|
||||||
|
self->zst.next_in = NULL;
|
||||||
|
self->needs_input = 1;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
self->needs_input = 0;
|
||||||
|
|
||||||
|
/* If we did not use the input buffer, we now have
|
||||||
|
to copy the tail from the caller's buffer into the
|
||||||
|
input buffer */
|
||||||
|
if (!input_buffer_in_use) {
|
||||||
|
|
||||||
|
/* Discard buffer if it's too small
|
||||||
|
(resizing it may needlessly copy the current contents) */
|
||||||
|
if (self->input_buffer != NULL &&
|
||||||
|
self->input_buffer_size < self->avail_in_real) {
|
||||||
|
PyMem_Free(self->input_buffer);
|
||||||
|
self->input_buffer = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Allocate if necessary */
|
||||||
|
if (self->input_buffer == NULL) {
|
||||||
|
self->input_buffer = PyMem_Malloc(self->avail_in_real);
|
||||||
|
if (self->input_buffer == NULL) {
|
||||||
|
PyErr_SetNone(PyExc_MemoryError);
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
self->input_buffer_size = self->avail_in_real;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Copy tail */
|
||||||
|
memcpy(self->input_buffer, self->zst.next_in, self->avail_in_real);
|
||||||
|
self->zst.next_in = self->input_buffer;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
|
||||||
|
error:
|
||||||
|
Py_XDECREF(result);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*[clinic input]
|
||||||
|
zlib.ZlibDecompressor.decompress
|
||||||
|
|
||||||
|
data: Py_buffer
|
||||||
|
max_length: Py_ssize_t=-1
|
||||||
|
|
||||||
|
Decompress *data*, returning uncompressed data as bytes.
|
||||||
|
|
||||||
|
If *max_length* is nonnegative, returns at most *max_length* bytes of
|
||||||
|
decompressed data. If this limit is reached and further output can be
|
||||||
|
produced, *self.needs_input* will be set to ``False``. In this case, the next
|
||||||
|
call to *decompress()* may provide *data* as b'' to obtain more of the output.
|
||||||
|
|
||||||
|
If all of the input data was decompressed and returned (either because this
|
||||||
|
was less than *max_length* bytes, or because *max_length* was negative),
|
||||||
|
*self.needs_input* will be set to True.
|
||||||
|
|
||||||
|
Attempting to decompress data after the end of stream is reached raises an
|
||||||
|
EOFError. Any data found after the end of the stream is ignored and saved in
|
||||||
|
the unused_data attribute.
|
||||||
|
[clinic start generated code]*/
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
zlib_ZlibDecompressor_decompress_impl(ZlibDecompressor *self,
|
||||||
|
Py_buffer *data, Py_ssize_t max_length)
|
||||||
|
/*[clinic end generated code: output=990d32787b775f85 input=0b29d99715250b96]*/
|
||||||
|
|
||||||
|
{
|
||||||
|
PyObject *result = NULL;
|
||||||
|
|
||||||
|
ENTER_ZLIB(self);
|
||||||
|
if (self->eof) {
|
||||||
|
PyErr_SetString(PyExc_EOFError, "End of stream already reached");
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
result = decompress(self, data->buf, data->len, max_length);
|
||||||
|
}
|
||||||
|
LEAVE_ZLIB(self);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
PyDoc_STRVAR(ZlibDecompressor__new____doc__,
|
||||||
|
"_ZlibDecompressor(wbits=15, zdict=b\'\')\n"
|
||||||
|
"--\n"
|
||||||
|
"\n"
|
||||||
|
"Create a decompressor object for decompressing data incrementally.\n"
|
||||||
|
"\n"
|
||||||
|
" wbits = 15\n"
|
||||||
|
" zdict\n"
|
||||||
|
" The predefined compression dictionary. This is a sequence of bytes\n"
|
||||||
|
" (such as a bytes object) containing subsequences that are expected\n"
|
||||||
|
" to occur frequently in the data that is to be compressed. Those\n"
|
||||||
|
" subsequences that are expected to be most common should come at the\n"
|
||||||
|
" end of the dictionary. This must be the same dictionary as used by the\n"
|
||||||
|
" compressor that produced the input data.\n"
|
||||||
|
"\n");
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
ZlibDecompressor__new__(PyTypeObject *cls,
|
||||||
|
PyObject *args,
|
||||||
|
PyObject *kwargs)
|
||||||
|
{
|
||||||
|
static char *keywords[] = {"wbits", "zdict", NULL};
|
||||||
|
static char *format = "|iO:_ZlibDecompressor";
|
||||||
|
int wbits = MAX_WBITS;
|
||||||
|
PyObject *zdict = NULL;
|
||||||
|
zlibstate *state = PyType_GetModuleState(cls);
|
||||||
|
|
||||||
|
if (!PyArg_ParseTupleAndKeywords(
|
||||||
|
args, kwargs, format, keywords, &wbits, &zdict)) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
ZlibDecompressor *self = PyObject_New(ZlibDecompressor, cls);
|
||||||
|
self->eof = 0;
|
||||||
|
self->needs_input = 1;
|
||||||
|
self->avail_in_real = 0;
|
||||||
|
self->input_buffer = NULL;
|
||||||
|
self->input_buffer_size = 0;
|
||||||
|
if (zdict != NULL) {
|
||||||
|
Py_INCREF(zdict);
|
||||||
|
}
|
||||||
|
self->zdict = zdict;
|
||||||
|
self->zst.opaque = NULL;
|
||||||
|
self->zst.zalloc = PyZlib_Malloc;
|
||||||
|
self->zst.zfree = PyZlib_Free;
|
||||||
|
self->zst.next_in = NULL;
|
||||||
|
self->zst.avail_in = 0;
|
||||||
|
self->unused_data = PyBytes_FromStringAndSize(NULL, 0);
|
||||||
|
if (self->unused_data == NULL) {
|
||||||
|
Py_CLEAR(self);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
self->lock = PyThread_allocate_lock();
|
||||||
|
if (self->lock == NULL) {
|
||||||
|
Py_DECREF(self);
|
||||||
|
PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
int err = inflateInit2(&(self->zst), wbits);
|
||||||
|
switch (err) {
|
||||||
|
case Z_OK:
|
||||||
|
self->is_initialised = 1;
|
||||||
|
if (self->zdict != NULL && wbits < 0) {
|
||||||
|
if (set_inflate_zdict_ZlibDecompressor(state, self) < 0) {
|
||||||
|
Py_DECREF(self);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return (PyObject *)self;
|
||||||
|
case Z_STREAM_ERROR:
|
||||||
|
Py_DECREF(self);
|
||||||
|
PyErr_SetString(PyExc_ValueError, "Invalid initialization option");
|
||||||
|
return NULL;
|
||||||
|
case Z_MEM_ERROR:
|
||||||
|
Py_DECREF(self);
|
||||||
|
PyErr_SetString(PyExc_MemoryError,
|
||||||
|
"Can't allocate memory for decompression object");
|
||||||
|
return NULL;
|
||||||
|
default:
|
||||||
|
zlib_error(state, self->zst, err, "while creating decompression object");
|
||||||
|
Py_DECREF(self);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#include "clinic/zlibmodule.c.h"
|
#include "clinic/zlibmodule.c.h"
|
||||||
|
@ -1372,6 +1818,11 @@ static PyMethodDef Decomp_methods[] =
|
||||||
{NULL, NULL}
|
{NULL, NULL}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static PyMethodDef ZlibDecompressor_methods[] = {
|
||||||
|
ZLIB_ZLIBDECOMPRESSOR_DECOMPRESS_METHODDEF
|
||||||
|
{NULL}
|
||||||
|
};
|
||||||
|
|
||||||
#define COMP_OFF(x) offsetof(compobject, x)
|
#define COMP_OFF(x) offsetof(compobject, x)
|
||||||
static PyMemberDef Decomp_members[] = {
|
static PyMemberDef Decomp_members[] = {
|
||||||
{"unused_data", T_OBJECT, COMP_OFF(unused_data), READONLY},
|
{"unused_data", T_OBJECT, COMP_OFF(unused_data), READONLY},
|
||||||
|
@ -1380,6 +1831,26 @@ static PyMemberDef Decomp_members[] = {
|
||||||
{NULL},
|
{NULL},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
PyDoc_STRVAR(ZlibDecompressor_eof__doc__,
|
||||||
|
"True if the end-of-stream marker has been reached.");
|
||||||
|
|
||||||
|
PyDoc_STRVAR(ZlibDecompressor_unused_data__doc__,
|
||||||
|
"Data found after the end of the compressed stream.");
|
||||||
|
|
||||||
|
PyDoc_STRVAR(ZlibDecompressor_needs_input_doc,
|
||||||
|
"True if more input is needed before more decompressed data can be produced.");
|
||||||
|
|
||||||
|
static PyMemberDef ZlibDecompressor_members[] = {
|
||||||
|
{"eof", T_BOOL, offsetof(ZlibDecompressor, eof),
|
||||||
|
READONLY, ZlibDecompressor_eof__doc__},
|
||||||
|
{"unused_data", T_OBJECT_EX, offsetof(ZlibDecompressor, unused_data),
|
||||||
|
READONLY, ZlibDecompressor_unused_data__doc__},
|
||||||
|
{"needs_input", T_BOOL, offsetof(ZlibDecompressor, needs_input), READONLY,
|
||||||
|
ZlibDecompressor_needs_input_doc},
|
||||||
|
{NULL},
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
/*[clinic input]
|
/*[clinic input]
|
||||||
zlib.adler32
|
zlib.adler32
|
||||||
|
|
||||||
|
@ -1497,6 +1968,25 @@ static PyType_Spec Decomptype_spec = {
|
||||||
.slots = Decomptype_slots,
|
.slots = Decomptype_slots,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static PyType_Slot ZlibDecompressor_type_slots[] = {
|
||||||
|
{Py_tp_dealloc, ZlibDecompressor_dealloc},
|
||||||
|
{Py_tp_members, ZlibDecompressor_members},
|
||||||
|
{Py_tp_new, ZlibDecompressor__new__},
|
||||||
|
{Py_tp_doc, (char *)ZlibDecompressor__new____doc__},
|
||||||
|
{Py_tp_methods, ZlibDecompressor_methods},
|
||||||
|
{0, 0},
|
||||||
|
};
|
||||||
|
|
||||||
|
static PyType_Spec ZlibDecompressor_type_spec = {
|
||||||
|
.name = "zlib._ZlibDecompressor",
|
||||||
|
.basicsize = sizeof(ZlibDecompressor),
|
||||||
|
// Calling PyType_GetModuleState() on a subclass is not safe.
|
||||||
|
// ZlibDecompressor_type_spec does not have Py_TPFLAGS_BASETYPE flag
|
||||||
|
// which prevents to create a subclass.
|
||||||
|
// So calling PyType_GetModuleState() in this file is always safe.
|
||||||
|
.flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE),
|
||||||
|
.slots = ZlibDecompressor_type_slots,
|
||||||
|
};
|
||||||
PyDoc_STRVAR(zlib_module_documentation,
|
PyDoc_STRVAR(zlib_module_documentation,
|
||||||
"The functions in this module allow compression and decompression using the\n"
|
"The functions in this module allow compression and decompression using the\n"
|
||||||
"zlib library, which is based on GNU zip.\n"
|
"zlib library, which is based on GNU zip.\n"
|
||||||
|
@ -1518,6 +2008,7 @@ zlib_clear(PyObject *mod)
|
||||||
zlibstate *state = get_zlib_state(mod);
|
zlibstate *state = get_zlib_state(mod);
|
||||||
Py_CLEAR(state->Comptype);
|
Py_CLEAR(state->Comptype);
|
||||||
Py_CLEAR(state->Decomptype);
|
Py_CLEAR(state->Decomptype);
|
||||||
|
Py_CLEAR(state->ZlibDecompressorType);
|
||||||
Py_CLEAR(state->ZlibError);
|
Py_CLEAR(state->ZlibError);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -1528,6 +2019,7 @@ zlib_traverse(PyObject *mod, visitproc visit, void *arg)
|
||||||
zlibstate *state = get_zlib_state(mod);
|
zlibstate *state = get_zlib_state(mod);
|
||||||
Py_VISIT(state->Comptype);
|
Py_VISIT(state->Comptype);
|
||||||
Py_VISIT(state->Decomptype);
|
Py_VISIT(state->Decomptype);
|
||||||
|
Py_VISIT(state->ZlibDecompressorType);
|
||||||
Py_VISIT(state->ZlibError);
|
Py_VISIT(state->ZlibError);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -1555,6 +2047,12 @@ zlib_exec(PyObject *mod)
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
state->ZlibDecompressorType = (PyTypeObject *)PyType_FromModuleAndSpec(
|
||||||
|
mod, &ZlibDecompressor_type_spec, NULL);
|
||||||
|
if (state->ZlibDecompressorType == NULL) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
state->ZlibError = PyErr_NewException("zlib.error", NULL, NULL);
|
state->ZlibError = PyErr_NewException("zlib.error", NULL, NULL);
|
||||||
if (state->ZlibError == NULL) {
|
if (state->ZlibError == NULL) {
|
||||||
return -1;
|
return -1;
|
||||||
|
@ -1565,6 +2063,12 @@ zlib_exec(PyObject *mod)
|
||||||
Py_DECREF(state->ZlibError);
|
Py_DECREF(state->ZlibError);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
Py_INCREF(state->ZlibDecompressorType);
|
||||||
|
if (PyModule_AddObject(mod, "_ZlibDecompressor",
|
||||||
|
(PyObject *)state->ZlibDecompressorType) < 0) {
|
||||||
|
Py_DECREF(state->ZlibDecompressorType);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
#define ZLIB_ADD_INT_MACRO(c) \
|
#define ZLIB_ADD_INT_MACRO(c) \
|
||||||
do { \
|
do { \
|
||||||
|
|
Loading…
Reference in New Issue