Issue #12646: Add an 'eof' attribute to zlib.Decompress.

This will make it easier to detect truncated input streams.

Also, make zlib's error messages more consistent.
This commit is contained in:
Nadeem Vawda 2011-08-13 15:22:40 +02:00
parent 74b6abf61f
commit 1c38546e49
4 changed files with 48 additions and 14 deletions

View File

@ -152,7 +152,7 @@ Compression objects support the following methods:
compress a set of data that share a common initial prefix.
Decompression objects support the following methods, and two attributes:
Decompression objects support the following methods and attributes:
.. attribute:: Decompress.unused_data
@ -162,13 +162,6 @@ Decompression objects support the following methods, and two attributes:
available. If the whole bytestring turned out to contain compressed data, this is
``b""``, an empty bytes object.
The only way to determine where a bytestring of compressed data ends is by actually
decompressing it. This means that when compressed data is contained part of a
larger file, you can only find the end of it by reading data and feeding it
followed by some non-empty bytestring into a decompression object's
:meth:`decompress` method until the :attr:`unused_data` attribute is no longer
empty.
.. attribute:: Decompress.unconsumed_tail
@ -179,6 +172,17 @@ Decompression objects support the following methods, and two attributes:
:meth:`decompress` method call in order to get correct output.
.. attribute:: Decompress.eof
A boolean indicating whether the end of the compressed data stream has been
reached.
This makes it possible to distinguish between a properly-formed compressed
stream, and an incomplete or truncated one.
.. versionadded:: 3.3
.. method:: Decompress.decompress(data[, max_length])
Decompress *data*, returning a bytes object containing the uncompressed data

View File

@ -447,6 +447,26 @@ class CompressObjectTestCase(BaseCompressTestCase, unittest.TestCase):
y += dco.flush()
self.assertEqual(y, b'foo')
def test_decompress_eof(self):
x = b'x\x9cK\xcb\xcf\x07\x00\x02\x82\x01E' # 'foo'
dco = zlib.decompressobj()
self.assertFalse(dco.eof)
dco.decompress(x[:-5])
self.assertFalse(dco.eof)
dco.decompress(x[-5:])
self.assertTrue(dco.eof)
dco.flush()
self.assertTrue(dco.eof)
def test_decompress_eof_incomplete_stream(self):
x = b'x\x9cK\xcb\xcf\x07\x00\x02\x82\x01E' # 'foo'
dco = zlib.decompressobj()
self.assertFalse(dco.eof)
dco.decompress(x[:-5])
self.assertFalse(dco.eof)
dco.flush()
self.assertFalse(dco.eof)
if hasattr(zlib.compressobj(), "copy"):
def test_compresscopy(self):
# Test copying a compression object

View File

@ -254,6 +254,9 @@ Core and Builtins
Library
-------
- Issue #12646: Add an 'eof' attribute to zlib.Decompress, to make it easier to
detect truncated input streams.
- Issue #11513: Fix exception handling ``tarfile.TarFile.gzopen()`` when
the file cannot be opened.

View File

@ -43,6 +43,7 @@ typedef struct
z_stream zst;
PyObject *unused_data;
PyObject *unconsumed_tail;
char eof;
int is_initialised;
#ifdef WITH_THREAD
PyThread_type_lock lock;
@ -89,6 +90,7 @@ newcompobject(PyTypeObject *type)
self = PyObject_New(compobject, type);
if (self == NULL)
return NULL;
self->eof = 0;
self->is_initialised = 0;
self->unused_data = PyBytes_FromStringAndSize("", 0);
if (self->unused_data == NULL) {
@ -291,7 +293,7 @@ PyZlib_decompress(PyObject *self, PyObject *args)
err = inflateEnd(&zst);
if (err != Z_OK) {
zlib_error(zst, err, "while finishing data decompression");
zlib_error(zst, err, "while finishing decompression");
goto error;
}
@ -476,7 +478,7 @@ PyZlib_objcompress(compobject *self, PyObject *args)
*/
if (err != Z_OK && err != Z_BUF_ERROR) {
zlib_error(self->zst, err, "while compressing");
zlib_error(self->zst, err, "while compressing data");
Py_DECREF(RetVal);
RetVal = NULL;
goto error;
@ -611,12 +613,13 @@ PyZlib_objdecompress(compobject *self, PyObject *args)
Py_DECREF(RetVal);
goto error;
}
self->eof = 1;
/* We will only get Z_BUF_ERROR if the output buffer was full
but there wasn't more output when we tried again, so it is
not an error condition.
*/
} else if (err != Z_OK && err != Z_BUF_ERROR) {
zlib_error(self->zst, err, "while decompressing");
zlib_error(self->zst, err, "while decompressing data");
Py_DECREF(RetVal);
RetVal = NULL;
goto error;
@ -697,7 +700,7 @@ PyZlib_flush(compobject *self, PyObject *args)
if (err == Z_STREAM_END && flushmode == Z_FINISH) {
err = deflateEnd(&(self->zst));
if (err != Z_OK) {
zlib_error(self->zst, err, "from deflateEnd()");
zlib_error(self->zst, err, "while finishing compression");
Py_DECREF(RetVal);
RetVal = NULL;
goto error;
@ -765,6 +768,7 @@ PyZlib_copy(compobject *self)
Py_XDECREF(retval->unconsumed_tail);
retval->unused_data = self->unused_data;
retval->unconsumed_tail = self->unconsumed_tail;
retval->eof = self->eof;
/* Mark it as being initialized */
retval->is_initialised = 1;
@ -816,6 +820,7 @@ PyZlib_uncopy(compobject *self)
Py_XDECREF(retval->unconsumed_tail);
retval->unused_data = self->unused_data;
retval->unconsumed_tail = self->unconsumed_tail;
retval->eof = self->eof;
/* Mark it as being initialized */
retval->is_initialised = 1;
@ -885,10 +890,11 @@ PyZlib_unflush(compobject *self, PyObject *args)
various data structures. Note we should only get Z_STREAM_END when
flushmode is Z_FINISH */
if (err == Z_STREAM_END) {
err = inflateEnd(&(self->zst));
self->eof = 1;
self->is_initialised = 0;
err = inflateEnd(&(self->zst));
if (err != Z_OK) {
zlib_error(self->zst, err, "from inflateEnd()");
zlib_error(self->zst, err, "while finishing decompression");
Py_DECREF(retval);
retval = NULL;
goto error;
@ -936,6 +942,7 @@ static PyMethodDef Decomp_methods[] =
static PyMemberDef Decomp_members[] = {
{"unused_data", T_OBJECT, COMP_OFF(unused_data), READONLY},
{"unconsumed_tail", T_OBJECT, COMP_OFF(unconsumed_tail), READONLY},
{"eof", T_BOOL, COMP_OFF(eof), READONLY},
{NULL},
};