mirror of https://github.com/python/cpython
[ #403753 ] zlib decompress; uncontrollable memory usage
Mostly by Toby Dickenson and Titus Brown. Add an optional argument to a decompression object's decompress() method. The argument specifies the maximum length of the return value. If the uncompressed data exceeds this length, the excess data is stored as the unconsumed_tail attribute. (Not to be confused with unused_data, which is a separate issue.) Difference from SF patch: Default value for unconsumed_tail is "" rather than None. It's simpler if the attribute is always a string.
This commit is contained in:
parent
7a59445e37
commit
511e2cacc4
|
@ -120,7 +120,7 @@ prevents compressing any more data. After calling
|
|||
action is to delete the object.
|
||||
\end{methoddesc}
|
||||
|
||||
Decompression objects support the following methods, and a single attribute:
|
||||
Decompression objects support the following methods, and two attributes:
|
||||
|
||||
\begin{memberdesc}{unused_data}
|
||||
A string which contains any unused data from the last string fed to
|
||||
|
@ -135,13 +135,27 @@ reading data and feeding it into a decompression object's
|
|||
no longer the empty string.
|
||||
\end{memberdesc}
|
||||
|
||||
\begin{methoddesc}[Decompress]{decompress}{string}
|
||||
\begin{memberdesc}{unconsumed_tail}
|
||||
A string that contains any data that was not consumed by the last
|
||||
\method{decompress} call because it exceeded the limit for the
|
||||
uncompressed data buffer.
|
||||
\end{memberdesc}
|
||||
|
||||
\begin{methoddesc}[Decompress]{decompress}{string}{\optional{max_length}}
|
||||
Decompress \var{string}, returning a string containing the
|
||||
uncompressed data corresponding to at least part of the data in
|
||||
\var{string}. This data should be concatenated to the output produced
|
||||
by any preceding calls to the
|
||||
\method{decompress()} method. Some of the input data may be preserved
|
||||
in internal buffers for later processing.
|
||||
|
||||
If the optional parameter \var{max_length} is supplied then the return value
|
||||
will be no longer than \var{max_length}. This may mean that not all of the
|
||||
compressed input can be processed; and unconsumed data will be stored
|
||||
in the attribute \member{unconsumed_tail}. This string must be passed
|
||||
to a subsequent call to \method{decompress()} if decompression is to
|
||||
continue. If \var{max_length} is not supplied then the whole input is
|
||||
decompressed, and \member{unconsumed_tail} is an empty string.
|
||||
\end{methoddesc}
|
||||
|
||||
\begin{methoddesc}[Decompress]{flush}{}
|
||||
|
|
|
@ -8,4 +8,7 @@ normal compression/decompression succeeded
|
|||
compress/decompression obj succeeded
|
||||
decompress with init options succeeded
|
||||
decompressobj with init options succeeded
|
||||
should be '': ''
|
||||
max_length decompressobj succeeded
|
||||
unconsumed_tail should be '': ''
|
||||
Testing on 17K of random data
|
||||
|
|
|
@ -76,6 +76,36 @@ if decomp2 != buf:
|
|||
else:
|
||||
print "decompressobj with init options succeeded"
|
||||
|
||||
print "should be '':", `deco.unconsumed_tail`
|
||||
|
||||
# Check a decompression object with max_length specified
|
||||
deco = zlib.decompressobj(-12)
|
||||
cb = combuf
|
||||
bufs = []
|
||||
while cb:
|
||||
max_length = 1 + len(cb)/10
|
||||
chunk = deco.decompress(cb, max_length)
|
||||
if len(chunk) > max_length:
|
||||
print 'chunk too big (%d>%d)' % (len(chunk),max_length)
|
||||
bufs.append(chunk)
|
||||
cb = deco.unconsumed_tail
|
||||
bufs.append(deco.flush())
|
||||
decomp2 = ''.join(buf)
|
||||
if decomp2 != buf:
|
||||
print "max_length decompressobj failed"
|
||||
else:
|
||||
print "max_length decompressobj succeeded"
|
||||
|
||||
# Misc tests of max_length
|
||||
deco = zlib.decompressobj(-12)
|
||||
try:
|
||||
deco.decompress("", -1)
|
||||
except ValueError:
|
||||
pass
|
||||
else:
|
||||
print "failed to raise value error on bad max_length"
|
||||
print "unconsumed_tail should be '':", `deco.unconsumed_tail`
|
||||
|
||||
# Test flush() with the various options, using all the different levels
|
||||
# in order to provide more variations.
|
||||
sync_opt = ['Z_NO_FLUSH', 'Z_SYNC_FLUSH', 'Z_FULL_FLUSH']
|
||||
|
|
|
@ -78,6 +78,7 @@ typedef struct
|
|||
PyObject_HEAD
|
||||
z_stream zst;
|
||||
PyObject *unused_data;
|
||||
PyObject *unconsumed_tail;
|
||||
int is_initialised;
|
||||
} compobject;
|
||||
|
||||
|
@ -100,6 +101,15 @@ newcompobject(PyTypeObject *type)
|
|||
return NULL;
|
||||
self->is_initialised = 0;
|
||||
self->unused_data = PyString_FromString("");
|
||||
if (self->unused_data == NULL) {
|
||||
Py_DECREF(self);
|
||||
return NULL;
|
||||
}
|
||||
self->unconsumed_tail = PyString_FromString("");
|
||||
if (self->unconsumed_tail == NULL) {
|
||||
Py_DECREF(self);
|
||||
return NULL;
|
||||
}
|
||||
return self;
|
||||
}
|
||||
|
||||
|
@ -485,6 +495,7 @@ Comp_dealloc(compobject *self)
|
|||
if (self->is_initialised)
|
||||
deflateEnd(&self->zst);
|
||||
Py_XDECREF(self->unused_data);
|
||||
Py_XDECREF(self->unconsumed_tail);
|
||||
PyObject_Del(self);
|
||||
|
||||
LEAVE_ZLIB
|
||||
|
@ -498,6 +509,7 @@ Decomp_dealloc(compobject *self)
|
|||
if (self->is_initialised)
|
||||
inflateEnd(&self->zst);
|
||||
Py_XDECREF(self->unused_data);
|
||||
Py_XDECREF(self->unconsumed_tail);
|
||||
PyObject_Del(self);
|
||||
|
||||
LEAVE_ZLIB
|
||||
|
@ -595,27 +607,41 @@ PyZlib_objcompress(compobject *self, PyObject *args)
|
|||
}
|
||||
|
||||
static char decomp_decompress__doc__[] =
|
||||
"decompress(data) -- Return a string containing the decompressed version of the data.\n\n"
|
||||
"decompress(data, max_length) -- Return a string containing\n"
|
||||
"the decompressed version of the data.\n\n"
|
||||
"After calling this function, some of the input data may still\n"
|
||||
"be stored in internal buffers for later processing.\n"
|
||||
"Call the flush() method to clear these buffers."
|
||||
"Call the flush() method to clear these buffers.\n"
|
||||
"If the max_length parameter is specified then the return value will be\n"
|
||||
"no longer than max_length. Unconsumed input data will be stored in\n"
|
||||
"the unconsumed_tail attribute."
|
||||
;
|
||||
|
||||
static PyObject *
|
||||
PyZlib_objdecompress(compobject *self, PyObject *args)
|
||||
{
|
||||
int err, inplen, length = DEFAULTALLOC;
|
||||
int err, inplen, old_length, length = DEFAULTALLOC;
|
||||
int max_length = 0;
|
||||
PyObject *RetVal;
|
||||
Byte *input;
|
||||
unsigned long start_total_out;
|
||||
int return_error;
|
||||
PyObject * inputString;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "S:decompress", &inputString))
|
||||
if (!PyArg_ParseTuple(args, "S|i:decompress", &inputString, &max_length))
|
||||
return NULL;
|
||||
if (max_length < 0) {
|
||||
PyErr_SetString(PyExc_ValueError,
|
||||
"max_length must be greater than zero");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (PyString_AsStringAndSize(inputString, (char**)&input, &inplen) == -1)
|
||||
return NULL;
|
||||
|
||||
/* limit amount of data allocated to max_length */
|
||||
if (max_length && length > max_length)
|
||||
length = max_length;
|
||||
if (!(RetVal = PyString_FromStringAndSize(NULL, length))) {
|
||||
PyErr_SetString(PyExc_MemoryError,
|
||||
"Can't allocate memory to compress data");
|
||||
|
@ -637,23 +663,46 @@ PyZlib_objdecompress(compobject *self, PyObject *args)
|
|||
err = inflate(&(self->zst), Z_SYNC_FLUSH);
|
||||
Py_END_ALLOW_THREADS
|
||||
|
||||
/* while Z_OK and the output buffer is full, there might be more output,
|
||||
so extend the output buffer and try again */
|
||||
/* While Z_OK and the output buffer is full, there might be more output.
|
||||
So extend the output buffer and try again.
|
||||
*/
|
||||
while (err == Z_OK && self->zst.avail_out == 0) {
|
||||
if (_PyString_Resize(&RetVal, length << 1) == -1) {
|
||||
/* If max_length set, don't continue decompressing if we've already
|
||||
reached the limit.
|
||||
*/
|
||||
if (max_length && length >= max_length)
|
||||
break;
|
||||
|
||||
/* otherwise, ... */
|
||||
old_length = length;
|
||||
length = length << 1;
|
||||
if (max_length && length > max_length)
|
||||
length = max_length;
|
||||
|
||||
if (_PyString_Resize(&RetVal, length) == -1) {
|
||||
PyErr_SetString(PyExc_MemoryError,
|
||||
"Can't allocate memory to compress data");
|
||||
return_error = 1;
|
||||
break;
|
||||
}
|
||||
self->zst.next_out = (unsigned char *)PyString_AsString(RetVal) + length;
|
||||
self->zst.avail_out = length;
|
||||
length = length << 1;
|
||||
self->zst.next_out = (unsigned char *)PyString_AsString(RetVal)+old_length;
|
||||
self->zst.avail_out = length - old_length;
|
||||
|
||||
Py_BEGIN_ALLOW_THREADS
|
||||
err = inflate(&(self->zst), Z_SYNC_FLUSH);
|
||||
Py_END_ALLOW_THREADS
|
||||
}
|
||||
|
||||
/* Not all of the compressed data could be accomodated in the output buffer
|
||||
of specified size. Return the unconsumed tail in an attribute.*/
|
||||
if(max_length) {
|
||||
Py_DECREF(self->unconsumed_tail);
|
||||
self->unconsumed_tail = PyString_FromStringAndSize(self->zst.next_in,
|
||||
self->zst.avail_in);
|
||||
if(!self->unconsumed_tail)
|
||||
return_error = 1;
|
||||
}
|
||||
|
||||
/* The end of the compressed data has been reached, so set the unused_data
|
||||
attribute to a string containing the remainder of the data in the string.
|
||||
Note that this is also a logical place to call inflateEnd, but the old
|
||||
|
@ -885,6 +934,11 @@ Decomp_getattr(compobject *self, char *name)
|
|||
Py_INCREF(self->unused_data);
|
||||
retval = self->unused_data;
|
||||
}
|
||||
else if (strcmp(name, "unconsumed_tail") == 0)
|
||||
{
|
||||
Py_INCREF(self->unconsumed_tail);
|
||||
retval = self->unconsumed_tail;
|
||||
}
|
||||
else
|
||||
retval = Py_FindMethod(Decomp_methods, (PyObject *)self, name);
|
||||
|
||||
|
|
Loading…
Reference in New Issue