[ #403753 ] zlib decompress; uncontrollable memory usage

Mostly by Toby Dickenson and Titus Brown.

Add an optional argument to a decompression object's decompress()
method.  The argument specifies the maximum length of the return
value.  If the uncompressed data exceeds this length, the excess data
is stored as the unconsumed_tail attribute.  (Not to be confused with
unused_data, which is a separate issue.)

Difference from SF patch: Default value for unconsumed_tail is ""
rather than None.  It's simpler if the attribute is always a string.
This commit is contained in:
Jeremy Hylton 2001-10-16 20:39:49 +00:00
parent 7a59445e37
commit 511e2cacc4
4 changed files with 113 additions and 12 deletions

View File

@ -120,7 +120,7 @@ prevents compressing any more data. After calling
action is to delete the object.
\end{methoddesc}
Decompression objects support the following methods, and a single attribute:
Decompression objects support the following methods, and two attributes:
\begin{memberdesc}{unused_data}
A string which contains any unused data from the last string fed to
@ -135,13 +135,27 @@ reading data and feeding it into a decompression object's
no longer the empty string.
\end{memberdesc}
\begin{methoddesc}[Decompress]{decompress}{string}
\begin{memberdesc}{unconsumed_tail}
A string that contains any data that was not consumed by the last
\method{decompress} call because it exceeded the limit for the
uncompressed data buffer.
\end{memberdesc}
\begin{methoddesc}[Decompress]{decompress}{string}{\optional{max_length}}
Decompress \var{string}, returning a string containing the
uncompressed data corresponding to at least part of the data in
\var{string}. This data should be concatenated to the output produced
by any preceding calls to the
\method{decompress()} method. Some of the input data may be preserved
in internal buffers for later processing.
If the optional parameter \var{max_length} is supplied then the return value
will be no longer than \var{max_length}. This may mean that not all of the
compressed input can be processed; and unconsumed data will be stored
in the attribute \member{unconsumed_tail}. This string must be passed
to a subsequent call to \method{decompress()} if decompression is to
continue. If \var{max_length} is not supplied then the whole input is
decompressed, and \member{unconsumed_tail} is an empty string.
\end{methoddesc}
\begin{methoddesc}[Decompress]{flush}{}

View File

@ -8,4 +8,7 @@ normal compression/decompression succeeded
compress/decompression obj succeeded
decompress with init options succeeded
decompressobj with init options succeeded
should be '': ''
max_length decompressobj succeeded
unconsumed_tail should be '': ''
Testing on 17K of random data

View File

@ -76,6 +76,36 @@ if decomp2 != buf:
else:
print "decompressobj with init options succeeded"
print "should be '':", `deco.unconsumed_tail`
# Check a decompression object with max_length specified
deco = zlib.decompressobj(-12)
cb = combuf
bufs = []
while cb:
max_length = 1 + len(cb)/10
chunk = deco.decompress(cb, max_length)
if len(chunk) > max_length:
print 'chunk too big (%d>%d)' % (len(chunk),max_length)
bufs.append(chunk)
cb = deco.unconsumed_tail
bufs.append(deco.flush())
decomp2 = ''.join(buf)
if decomp2 != buf:
print "max_length decompressobj failed"
else:
print "max_length decompressobj succeeded"
# Misc tests of max_length
deco = zlib.decompressobj(-12)
try:
deco.decompress("", -1)
except ValueError:
pass
else:
print "failed to raise value error on bad max_length"
print "unconsumed_tail should be '':", `deco.unconsumed_tail`
# Test flush() with the various options, using all the different levels
# in order to provide more variations.
sync_opt = ['Z_NO_FLUSH', 'Z_SYNC_FLUSH', 'Z_FULL_FLUSH']

View File

@ -78,6 +78,7 @@ typedef struct
PyObject_HEAD
z_stream zst;
PyObject *unused_data;
PyObject *unconsumed_tail;
int is_initialised;
} compobject;
@ -100,6 +101,15 @@ newcompobject(PyTypeObject *type)
return NULL;
self->is_initialised = 0;
self->unused_data = PyString_FromString("");
if (self->unused_data == NULL) {
Py_DECREF(self);
return NULL;
}
self->unconsumed_tail = PyString_FromString("");
if (self->unconsumed_tail == NULL) {
Py_DECREF(self);
return NULL;
}
return self;
}
@ -485,6 +495,7 @@ Comp_dealloc(compobject *self)
if (self->is_initialised)
deflateEnd(&self->zst);
Py_XDECREF(self->unused_data);
Py_XDECREF(self->unconsumed_tail);
PyObject_Del(self);
LEAVE_ZLIB
@ -498,6 +509,7 @@ Decomp_dealloc(compobject *self)
if (self->is_initialised)
inflateEnd(&self->zst);
Py_XDECREF(self->unused_data);
Py_XDECREF(self->unconsumed_tail);
PyObject_Del(self);
LEAVE_ZLIB
@ -595,27 +607,41 @@ PyZlib_objcompress(compobject *self, PyObject *args)
}
static char decomp_decompress__doc__[] =
"decompress(data) -- Return a string containing the decompressed version of the data.\n\n"
"decompress(data, max_length) -- Return a string containing\n"
"the decompressed version of the data.\n\n"
"After calling this function, some of the input data may still\n"
"be stored in internal buffers for later processing.\n"
"Call the flush() method to clear these buffers."
"Call the flush() method to clear these buffers.\n"
"If the max_length parameter is specified then the return value will be\n"
"no longer than max_length. Unconsumed input data will be stored in\n"
"the unconsumed_tail attribute."
;
static PyObject *
PyZlib_objdecompress(compobject *self, PyObject *args)
{
int err, inplen, length = DEFAULTALLOC;
int err, inplen, old_length, length = DEFAULTALLOC;
int max_length = 0;
PyObject *RetVal;
Byte *input;
unsigned long start_total_out;
int return_error;
PyObject * inputString;
if (!PyArg_ParseTuple(args, "S:decompress", &inputString))
if (!PyArg_ParseTuple(args, "S|i:decompress", &inputString, &max_length))
return NULL;
if (max_length < 0) {
PyErr_SetString(PyExc_ValueError,
"max_length must be greater than zero");
return NULL;
}
if (PyString_AsStringAndSize(inputString, (char**)&input, &inplen) == -1)
return NULL;
/* limit amount of data allocated to max_length */
if (max_length && length > max_length)
length = max_length;
if (!(RetVal = PyString_FromStringAndSize(NULL, length))) {
PyErr_SetString(PyExc_MemoryError,
"Can't allocate memory to compress data");
@ -637,23 +663,46 @@ PyZlib_objdecompress(compobject *self, PyObject *args)
err = inflate(&(self->zst), Z_SYNC_FLUSH);
Py_END_ALLOW_THREADS
/* while Z_OK and the output buffer is full, there might be more output,
so extend the output buffer and try again */
/* While Z_OK and the output buffer is full, there might be more output.
So extend the output buffer and try again.
*/
while (err == Z_OK && self->zst.avail_out == 0) {
if (_PyString_Resize(&RetVal, length << 1) == -1) {
/* If max_length set, don't continue decompressing if we've already
reached the limit.
*/
if (max_length && length >= max_length)
break;
/* otherwise, ... */
old_length = length;
length = length << 1;
if (max_length && length > max_length)
length = max_length;
if (_PyString_Resize(&RetVal, length) == -1) {
PyErr_SetString(PyExc_MemoryError,
"Can't allocate memory to compress data");
return_error = 1;
break;
}
self->zst.next_out = (unsigned char *)PyString_AsString(RetVal) + length;
self->zst.avail_out = length;
length = length << 1;
self->zst.next_out = (unsigned char *)PyString_AsString(RetVal)+old_length;
self->zst.avail_out = length - old_length;
Py_BEGIN_ALLOW_THREADS
err = inflate(&(self->zst), Z_SYNC_FLUSH);
Py_END_ALLOW_THREADS
}
/* Not all of the compressed data could be accomodated in the output buffer
of specified size. Return the unconsumed tail in an attribute.*/
if(max_length) {
Py_DECREF(self->unconsumed_tail);
self->unconsumed_tail = PyString_FromStringAndSize(self->zst.next_in,
self->zst.avail_in);
if(!self->unconsumed_tail)
return_error = 1;
}
/* The end of the compressed data has been reached, so set the unused_data
attribute to a string containing the remainder of the data in the string.
Note that this is also a logical place to call inflateEnd, but the old
@ -885,6 +934,11 @@ Decomp_getattr(compobject *self, char *name)
Py_INCREF(self->unused_data);
retval = self->unused_data;
}
else if (strcmp(name, "unconsumed_tail") == 0)
{
Py_INCREF(self->unconsumed_tail);
retval = self->unconsumed_tail;
}
else
retval = Py_FindMethod(Decomp_methods, (PyObject *)self, name);