diff --git a/Lib/test/test_bz2.py b/Lib/test/test_bz2.py index cee38e0fd17..3567b362574 100644 --- a/Lib/test/test_bz2.py +++ b/Lib/test/test_bz2.py @@ -1,10 +1,11 @@ #!/usr/bin/env python3 from test import support -from test.support import TESTFN +from test.support import TESTFN, precisionbigmemtest, _4G import unittest from io import BytesIO import os +import random import subprocess import sys @@ -415,6 +416,23 @@ class BZ2CompressorTest(BaseTest): data += bz2c.flush() self.assertEqual(self.decompress(data), self.TEXT) + @precisionbigmemtest(size=_4G + 100, memuse=2) + def testCompress4G(self, size): + # "Test BZ2Compressor.compress()/flush() with >4GiB input" + bz2c = BZ2Compressor() + data = b"x" * size + try: + compressed = bz2c.compress(data) + compressed += bz2c.flush() + finally: + data = None # Release memory + data = bz2.decompress(compressed) + try: + self.assertEqual(len(data), size) + self.assertEqual(len(data.strip(b"x")), 0) + finally: + data = None + class BZ2DecompressorTest(BaseTest): def test_Constructor(self): self.assertRaises(TypeError, BZ2Decompressor, 42) @@ -453,6 +471,22 @@ class BZ2DecompressorTest(BaseTest): text = bz2d.decompress(self.DATA) self.assertRaises(EOFError, bz2d.decompress, b"anything") + @precisionbigmemtest(size=_4G + 100, memuse=3) + def testDecompress4G(self, size): + # "Test BZ2Decompressor.decompress() with >4GiB input" + blocksize = 10 * 1024 * 1024 + block = random.getrandbits(blocksize * 8).to_bytes(blocksize, 'little') + try: + data = block * (size // blocksize + 1) + compressed = bz2.compress(data) + bz2d = BZ2Decompressor() + decompressed = bz2d.decompress(compressed) + self.assertTrue(decompressed == data) + finally: + data = None + compressed = None + decompressed = None + class FuncTest(BaseTest): "Test module functions" diff --git a/Modules/_bz2module.c b/Modules/_bz2module.c index 522b3e56585..d329c146261 100644 --- a/Modules/_bz2module.c +++ b/Modules/_bz2module.c @@ -36,6 +36,8 @@ #define RELEASE_LOCK(obj) #endif +#define MIN(X, Y) (((X) < (Y)) ? (X) : (Y)) + typedef struct { PyObject_HEAD @@ -145,8 +147,10 @@ compress(BZ2Compressor *c, char *data, size_t len, int action) if (result == NULL) return NULL; c->bzs.next_in = data; - /* FIXME This is not 64-bit clean - avail_in is an int. */ - c->bzs.avail_in = len; + /* On a 64-bit system, len might not fit in avail_in (an unsigned int). + Do compression in chunks of no more than UINT_MAX bytes each. */ + c->bzs.avail_in = MIN(len, UINT_MAX); + len -= c->bzs.avail_in; c->bzs.next_out = PyBytes_AS_STRING(result); c->bzs.avail_out = PyBytes_GET_SIZE(result); for (;;) { @@ -161,6 +165,11 @@ compress(BZ2Compressor *c, char *data, size_t len, int action) if (catch_bz2_error(bzerror)) goto error; + if (c->bzs.avail_in == 0 && len > 0) { + c->bzs.avail_in = MIN(len, UINT_MAX); + len -= c->bzs.avail_in; + } + /* In regular compression mode, stop when input data is exhausted. In flushing mode, stop when all buffered data has been flushed. */ if ((action == BZ_RUN && c->bzs.avail_in == 0) || @@ -354,8 +363,10 @@ decompress(BZ2Decompressor *d, char *data, size_t len) if (result == NULL) return result; d->bzs.next_in = data; - /* FIXME This is not 64-bit clean - avail_in is an int. */ - d->bzs.avail_in = len; + /* On a 64-bit system, len might not fit in avail_in (an unsigned int). + Do decompression in chunks of no more than UINT_MAX bytes each. */ + d->bzs.avail_in = MIN(len, UINT_MAX); + len -= d->bzs.avail_in; d->bzs.next_out = PyBytes_AS_STRING(result); d->bzs.avail_out = PyBytes_GET_SIZE(result); for (;;) { @@ -371,17 +382,21 @@ decompress(BZ2Decompressor *d, char *data, size_t len) goto error; if (bzerror == BZ_STREAM_END) { d->eof = 1; - if (d->bzs.avail_in > 0) { /* Save leftover input to unused_data */ + len += d->bzs.avail_in; + if (len > 0) { /* Save leftover input to unused_data */ Py_CLEAR(d->unused_data); - d->unused_data = PyBytes_FromStringAndSize(d->bzs.next_in, - d->bzs.avail_in); + d->unused_data = PyBytes_FromStringAndSize(d->bzs.next_in, len); if (d->unused_data == NULL) goto error; } break; } - if (d->bzs.avail_in == 0) - break; + if (d->bzs.avail_in == 0) { + if (len == 0) + break; + d->bzs.avail_in = MIN(len, UINT_MAX); + len -= d->bzs.avail_in; + } if (d->bzs.avail_out == 0) { if (grow_buffer(&result) < 0) goto error;