Fix 64-bit safety issue in BZ2Compressor and BZ2Decompressor.

This commit is contained in:
Nadeem Vawda 2011-04-12 23:02:42 +02:00
parent b30f1b4106
commit ea4b46f9a9
2 changed files with 59 additions and 10 deletions

View File

@ -1,10 +1,11 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from test import support from test import support
from test.support import TESTFN from test.support import TESTFN, precisionbigmemtest, _4G
import unittest import unittest
from io import BytesIO from io import BytesIO
import os import os
import random
import subprocess import subprocess
import sys import sys
@ -415,6 +416,23 @@ class BZ2CompressorTest(BaseTest):
data += bz2c.flush() data += bz2c.flush()
self.assertEqual(self.decompress(data), self.TEXT) self.assertEqual(self.decompress(data), self.TEXT)
@precisionbigmemtest(size=_4G + 100, memuse=2)
def testCompress4G(self, size):
# "Test BZ2Compressor.compress()/flush() with >4GiB input"
bz2c = BZ2Compressor()
data = b"x" * size
try:
compressed = bz2c.compress(data)
compressed += bz2c.flush()
finally:
data = None # Release memory
data = bz2.decompress(compressed)
try:
self.assertEqual(len(data), size)
self.assertEqual(len(data.strip(b"x")), 0)
finally:
data = None
class BZ2DecompressorTest(BaseTest): class BZ2DecompressorTest(BaseTest):
def test_Constructor(self): def test_Constructor(self):
self.assertRaises(TypeError, BZ2Decompressor, 42) self.assertRaises(TypeError, BZ2Decompressor, 42)
@ -453,6 +471,22 @@ class BZ2DecompressorTest(BaseTest):
text = bz2d.decompress(self.DATA) text = bz2d.decompress(self.DATA)
self.assertRaises(EOFError, bz2d.decompress, b"anything") self.assertRaises(EOFError, bz2d.decompress, b"anything")
@precisionbigmemtest(size=_4G + 100, memuse=3)
def testDecompress4G(self, size):
# "Test BZ2Decompressor.decompress() with >4GiB input"
blocksize = 10 * 1024 * 1024
block = random.getrandbits(blocksize * 8).to_bytes(blocksize, 'little')
try:
data = block * (size // blocksize + 1)
compressed = bz2.compress(data)
bz2d = BZ2Decompressor()
decompressed = bz2d.decompress(compressed)
self.assertTrue(decompressed == data)
finally:
data = None
compressed = None
decompressed = None
class FuncTest(BaseTest): class FuncTest(BaseTest):
"Test module functions" "Test module functions"

View File

@ -36,6 +36,8 @@
#define RELEASE_LOCK(obj) #define RELEASE_LOCK(obj)
#endif #endif
#define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
typedef struct { typedef struct {
PyObject_HEAD PyObject_HEAD
@ -145,8 +147,10 @@ compress(BZ2Compressor *c, char *data, size_t len, int action)
if (result == NULL) if (result == NULL)
return NULL; return NULL;
c->bzs.next_in = data; c->bzs.next_in = data;
/* FIXME This is not 64-bit clean - avail_in is an int. */ /* On a 64-bit system, len might not fit in avail_in (an unsigned int).
c->bzs.avail_in = len; Do compression in chunks of no more than UINT_MAX bytes each. */
c->bzs.avail_in = MIN(len, UINT_MAX);
len -= c->bzs.avail_in;
c->bzs.next_out = PyBytes_AS_STRING(result); c->bzs.next_out = PyBytes_AS_STRING(result);
c->bzs.avail_out = PyBytes_GET_SIZE(result); c->bzs.avail_out = PyBytes_GET_SIZE(result);
for (;;) { for (;;) {
@ -161,6 +165,11 @@ compress(BZ2Compressor *c, char *data, size_t len, int action)
if (catch_bz2_error(bzerror)) if (catch_bz2_error(bzerror))
goto error; goto error;
if (c->bzs.avail_in == 0 && len > 0) {
c->bzs.avail_in = MIN(len, UINT_MAX);
len -= c->bzs.avail_in;
}
/* In regular compression mode, stop when input data is exhausted. /* In regular compression mode, stop when input data is exhausted.
In flushing mode, stop when all buffered data has been flushed. */ In flushing mode, stop when all buffered data has been flushed. */
if ((action == BZ_RUN && c->bzs.avail_in == 0) || if ((action == BZ_RUN && c->bzs.avail_in == 0) ||
@ -354,8 +363,10 @@ decompress(BZ2Decompressor *d, char *data, size_t len)
if (result == NULL) if (result == NULL)
return result; return result;
d->bzs.next_in = data; d->bzs.next_in = data;
/* FIXME This is not 64-bit clean - avail_in is an int. */ /* On a 64-bit system, len might not fit in avail_in (an unsigned int).
d->bzs.avail_in = len; Do decompression in chunks of no more than UINT_MAX bytes each. */
d->bzs.avail_in = MIN(len, UINT_MAX);
len -= d->bzs.avail_in;
d->bzs.next_out = PyBytes_AS_STRING(result); d->bzs.next_out = PyBytes_AS_STRING(result);
d->bzs.avail_out = PyBytes_GET_SIZE(result); d->bzs.avail_out = PyBytes_GET_SIZE(result);
for (;;) { for (;;) {
@ -371,17 +382,21 @@ decompress(BZ2Decompressor *d, char *data, size_t len)
goto error; goto error;
if (bzerror == BZ_STREAM_END) { if (bzerror == BZ_STREAM_END) {
d->eof = 1; d->eof = 1;
if (d->bzs.avail_in > 0) { /* Save leftover input to unused_data */ len += d->bzs.avail_in;
if (len > 0) { /* Save leftover input to unused_data */
Py_CLEAR(d->unused_data); Py_CLEAR(d->unused_data);
d->unused_data = PyBytes_FromStringAndSize(d->bzs.next_in, d->unused_data = PyBytes_FromStringAndSize(d->bzs.next_in, len);
d->bzs.avail_in);
if (d->unused_data == NULL) if (d->unused_data == NULL)
goto error; goto error;
} }
break; break;
} }
if (d->bzs.avail_in == 0) if (d->bzs.avail_in == 0) {
break; if (len == 0)
break;
d->bzs.avail_in = MIN(len, UINT_MAX);
len -= d->bzs.avail_in;
}
if (d->bzs.avail_out == 0) { if (d->bzs.avail_out == 0) {
if (grow_buffer(&result) < 0) if (grow_buffer(&result) < 0)
goto error; goto error;