Issue #1625: BZ2File and bz2.decompress() now support multi-stream files.
Initial patch by Nir Aides.
This commit is contained in:
parent
c556e10b94
commit
55b4338874
47
Lib/bz2.py
47
Lib/bz2.py
|
@ -76,6 +76,10 @@ class BZ2File(io.BufferedIOBase):
|
||||||
mode = "wb"
|
mode = "wb"
|
||||||
mode_code = _MODE_WRITE
|
mode_code = _MODE_WRITE
|
||||||
self._compressor = BZ2Compressor()
|
self._compressor = BZ2Compressor()
|
||||||
|
elif mode in ("a", "ab"):
|
||||||
|
mode = "ab"
|
||||||
|
mode_code = _MODE_WRITE
|
||||||
|
self._compressor = BZ2Compressor()
|
||||||
else:
|
else:
|
||||||
raise ValueError("Invalid mode: {!r}".format(mode))
|
raise ValueError("Invalid mode: {!r}".format(mode))
|
||||||
|
|
||||||
|
@ -161,14 +165,25 @@ class BZ2File(io.BufferedIOBase):
|
||||||
def _fill_buffer(self):
|
def _fill_buffer(self):
|
||||||
if self._buffer:
|
if self._buffer:
|
||||||
return True
|
return True
|
||||||
if self._decompressor.eof:
|
|
||||||
self._mode = _MODE_READ_EOF
|
if self._decompressor.unused_data:
|
||||||
self._size = self._pos
|
rawblock = self._decompressor.unused_data
|
||||||
return False
|
else:
|
||||||
rawblock = self._fp.read(_BUFFER_SIZE)
|
rawblock = self._fp.read(_BUFFER_SIZE)
|
||||||
|
|
||||||
if not rawblock:
|
if not rawblock:
|
||||||
raise EOFError("Compressed file ended before the "
|
if self._decompressor.eof:
|
||||||
"end-of-stream marker was reached")
|
self._mode = _MODE_READ_EOF
|
||||||
|
self._size = self._pos
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
raise EOFError("Compressed file ended before the "
|
||||||
|
"end-of-stream marker was reached")
|
||||||
|
|
||||||
|
# Continue to next stream.
|
||||||
|
if self._decompressor.eof:
|
||||||
|
self._decompressor = BZ2Decompressor()
|
||||||
|
|
||||||
self._buffer = self._decompressor.decompress(rawblock)
|
self._buffer = self._decompressor.decompress(rawblock)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
@ -384,9 +399,15 @@ def decompress(data):
|
||||||
"""
|
"""
|
||||||
if len(data) == 0:
|
if len(data) == 0:
|
||||||
return b""
|
return b""
|
||||||
decomp = BZ2Decompressor()
|
|
||||||
result = decomp.decompress(data)
|
result = b""
|
||||||
if not decomp.eof:
|
while True:
|
||||||
raise ValueError("Compressed data ended before the "
|
decomp = BZ2Decompressor()
|
||||||
"end-of-stream marker was reached")
|
result += decomp.decompress(data)
|
||||||
return result
|
if not decomp.eof:
|
||||||
|
raise ValueError("Compressed data ended before the "
|
||||||
|
"end-of-stream marker was reached")
|
||||||
|
if not decomp.unused_data:
|
||||||
|
return result
|
||||||
|
# There is unused data left over. Proceed to next stream.
|
||||||
|
data = decomp.unused_data
|
||||||
|
|
|
@ -84,9 +84,9 @@ class BZ2FileTest(BaseTest):
|
||||||
else:
|
else:
|
||||||
return self.DATA
|
return self.DATA
|
||||||
|
|
||||||
def createTempFile(self, crlf=False):
|
def createTempFile(self, crlf=False, streams=1):
|
||||||
with open(self.filename, "wb") as f:
|
with open(self.filename, "wb") as f:
|
||||||
f.write(self.getData(crlf))
|
f.write(self.getData(crlf) * streams)
|
||||||
|
|
||||||
def testRead(self):
|
def testRead(self):
|
||||||
# "Test BZ2File.read()"
|
# "Test BZ2File.read()"
|
||||||
|
@ -95,6 +95,26 @@ class BZ2FileTest(BaseTest):
|
||||||
self.assertRaises(TypeError, bz2f.read, None)
|
self.assertRaises(TypeError, bz2f.read, None)
|
||||||
self.assertEqual(bz2f.read(), self.TEXT)
|
self.assertEqual(bz2f.read(), self.TEXT)
|
||||||
|
|
||||||
|
def testReadMultiStream(self):
|
||||||
|
# "Test BZ2File.read() with a multi stream archive"
|
||||||
|
self.createTempFile(streams=5)
|
||||||
|
with BZ2File(self.filename) as bz2f:
|
||||||
|
self.assertRaises(TypeError, bz2f.read, None)
|
||||||
|
self.assertEqual(bz2f.read(), self.TEXT * 5)
|
||||||
|
|
||||||
|
def testReadMonkeyMultiStream(self):
|
||||||
|
# "Test BZ2File.read() with a multi stream archive in which stream"
|
||||||
|
# "end is alined with internal buffer size"
|
||||||
|
buffer_size = bz2._BUFFER_SIZE
|
||||||
|
bz2._BUFFER_SIZE = len(self.DATA)
|
||||||
|
try:
|
||||||
|
self.createTempFile(streams=5)
|
||||||
|
with BZ2File(self.filename) as bz2f:
|
||||||
|
self.assertRaises(TypeError, bz2f.read, None)
|
||||||
|
self.assertEqual(bz2f.read(), self.TEXT * 5)
|
||||||
|
finally:
|
||||||
|
bz2._BUFFER_SIZE = buffer_size
|
||||||
|
|
||||||
def testRead0(self):
|
def testRead0(self):
|
||||||
# "Test BBZ2File.read(0)"
|
# "Test BBZ2File.read(0)"
|
||||||
self.createTempFile()
|
self.createTempFile()
|
||||||
|
@ -114,6 +134,18 @@ class BZ2FileTest(BaseTest):
|
||||||
text += str
|
text += str
|
||||||
self.assertEqual(text, self.TEXT)
|
self.assertEqual(text, self.TEXT)
|
||||||
|
|
||||||
|
def testReadChunk10MultiStream(self):
|
||||||
|
# "Test BZ2File.read() in chunks of 10 bytes with a multi stream archive"
|
||||||
|
self.createTempFile(streams=5)
|
||||||
|
with BZ2File(self.filename) as bz2f:
|
||||||
|
text = b''
|
||||||
|
while 1:
|
||||||
|
str = bz2f.read(10)
|
||||||
|
if not str:
|
||||||
|
break
|
||||||
|
text += str
|
||||||
|
self.assertEqual(text, self.TEXT * 5)
|
||||||
|
|
||||||
def testRead100(self):
|
def testRead100(self):
|
||||||
# "Test BZ2File.read(100)"
|
# "Test BZ2File.read(100)"
|
||||||
self.createTempFile()
|
self.createTempFile()
|
||||||
|
@ -151,6 +183,15 @@ class BZ2FileTest(BaseTest):
|
||||||
for line in sio.readlines():
|
for line in sio.readlines():
|
||||||
self.assertEqual(bz2f.readline(), line)
|
self.assertEqual(bz2f.readline(), line)
|
||||||
|
|
||||||
|
def testReadLineMultiStream(self):
|
||||||
|
# "Test BZ2File.readline() with a multi stream archive"
|
||||||
|
self.createTempFile(streams=5)
|
||||||
|
with BZ2File(self.filename) as bz2f:
|
||||||
|
self.assertRaises(TypeError, bz2f.readline, None)
|
||||||
|
sio = BytesIO(self.TEXT * 5)
|
||||||
|
for line in sio.readlines():
|
||||||
|
self.assertEqual(bz2f.readline(), line)
|
||||||
|
|
||||||
def testReadLines(self):
|
def testReadLines(self):
|
||||||
# "Test BZ2File.readlines()"
|
# "Test BZ2File.readlines()"
|
||||||
self.createTempFile()
|
self.createTempFile()
|
||||||
|
@ -159,6 +200,14 @@ class BZ2FileTest(BaseTest):
|
||||||
sio = BytesIO(self.TEXT)
|
sio = BytesIO(self.TEXT)
|
||||||
self.assertEqual(bz2f.readlines(), sio.readlines())
|
self.assertEqual(bz2f.readlines(), sio.readlines())
|
||||||
|
|
||||||
|
def testReadLinesMultiStream(self):
|
||||||
|
# "Test BZ2File.readlines() with a multi stream archive"
|
||||||
|
self.createTempFile(streams=5)
|
||||||
|
with BZ2File(self.filename) as bz2f:
|
||||||
|
self.assertRaises(TypeError, bz2f.readlines, None)
|
||||||
|
sio = BytesIO(self.TEXT * 5)
|
||||||
|
self.assertEqual(bz2f.readlines(), sio.readlines())
|
||||||
|
|
||||||
def testIterator(self):
|
def testIterator(self):
|
||||||
# "Test iter(BZ2File)"
|
# "Test iter(BZ2File)"
|
||||||
self.createTempFile()
|
self.createTempFile()
|
||||||
|
@ -166,6 +215,13 @@ class BZ2FileTest(BaseTest):
|
||||||
sio = BytesIO(self.TEXT)
|
sio = BytesIO(self.TEXT)
|
||||||
self.assertEqual(list(iter(bz2f)), sio.readlines())
|
self.assertEqual(list(iter(bz2f)), sio.readlines())
|
||||||
|
|
||||||
|
def testIteratorMultiStream(self):
|
||||||
|
# "Test iter(BZ2File) with a multi stream archive"
|
||||||
|
self.createTempFile(streams=5)
|
||||||
|
with BZ2File(self.filename) as bz2f:
|
||||||
|
sio = BytesIO(self.TEXT * 5)
|
||||||
|
self.assertEqual(list(iter(bz2f)), sio.readlines())
|
||||||
|
|
||||||
def testClosedIteratorDeadlock(self):
|
def testClosedIteratorDeadlock(self):
|
||||||
# "Test that iteration on a closed bz2file releases the lock."
|
# "Test that iteration on a closed bz2file releases the lock."
|
||||||
# http://bugs.python.org/issue3309
|
# http://bugs.python.org/issue3309
|
||||||
|
@ -217,6 +273,17 @@ class BZ2FileTest(BaseTest):
|
||||||
self.assertRaises(IOError, bz2f.write, b"a")
|
self.assertRaises(IOError, bz2f.write, b"a")
|
||||||
self.assertRaises(IOError, bz2f.writelines, [b"a"])
|
self.assertRaises(IOError, bz2f.writelines, [b"a"])
|
||||||
|
|
||||||
|
def testAppend(self):
|
||||||
|
# "Test BZ2File.write()"
|
||||||
|
with BZ2File(self.filename, "w") as bz2f:
|
||||||
|
self.assertRaises(TypeError, bz2f.write)
|
||||||
|
bz2f.write(self.TEXT)
|
||||||
|
with BZ2File(self.filename, "a") as bz2f:
|
||||||
|
self.assertRaises(TypeError, bz2f.write)
|
||||||
|
bz2f.write(self.TEXT)
|
||||||
|
with open(self.filename, 'rb') as f:
|
||||||
|
self.assertEqual(self.decompress(f.read()), self.TEXT * 2)
|
||||||
|
|
||||||
def testSeekForward(self):
|
def testSeekForward(self):
|
||||||
# "Test BZ2File.seek(150, 0)"
|
# "Test BZ2File.seek(150, 0)"
|
||||||
self.createTempFile()
|
self.createTempFile()
|
||||||
|
@ -225,6 +292,14 @@ class BZ2FileTest(BaseTest):
|
||||||
bz2f.seek(150)
|
bz2f.seek(150)
|
||||||
self.assertEqual(bz2f.read(), self.TEXT[150:])
|
self.assertEqual(bz2f.read(), self.TEXT[150:])
|
||||||
|
|
||||||
|
def testSeekForwardMultiStream(self):
|
||||||
|
# "Test BZ2File.seek(150, 0) across stream boundaries"
|
||||||
|
self.createTempFile(streams=2)
|
||||||
|
with BZ2File(self.filename) as bz2f:
|
||||||
|
self.assertRaises(TypeError, bz2f.seek)
|
||||||
|
bz2f.seek(len(self.TEXT) + 150)
|
||||||
|
self.assertEqual(bz2f.read(), self.TEXT[150:])
|
||||||
|
|
||||||
def testSeekBackwards(self):
|
def testSeekBackwards(self):
|
||||||
# "Test BZ2File.seek(-150, 1)"
|
# "Test BZ2File.seek(-150, 1)"
|
||||||
self.createTempFile()
|
self.createTempFile()
|
||||||
|
@ -233,6 +308,16 @@ class BZ2FileTest(BaseTest):
|
||||||
bz2f.seek(-150, 1)
|
bz2f.seek(-150, 1)
|
||||||
self.assertEqual(bz2f.read(), self.TEXT[500-150:])
|
self.assertEqual(bz2f.read(), self.TEXT[500-150:])
|
||||||
|
|
||||||
|
def testSeekBackwardsMultiStream(self):
|
||||||
|
# "Test BZ2File.seek(-150, 1) across stream boundaries"
|
||||||
|
self.createTempFile(streams=2)
|
||||||
|
with BZ2File(self.filename) as bz2f:
|
||||||
|
readto = len(self.TEXT) + 100
|
||||||
|
while readto > 0:
|
||||||
|
readto -= len(bz2f.read(readto))
|
||||||
|
bz2f.seek(-150, 1)
|
||||||
|
self.assertEqual(bz2f.read(), self.TEXT[100-150:] + self.TEXT)
|
||||||
|
|
||||||
def testSeekBackwardsFromEnd(self):
|
def testSeekBackwardsFromEnd(self):
|
||||||
# "Test BZ2File.seek(-150, 2)"
|
# "Test BZ2File.seek(-150, 2)"
|
||||||
self.createTempFile()
|
self.createTempFile()
|
||||||
|
@ -240,6 +325,13 @@ class BZ2FileTest(BaseTest):
|
||||||
bz2f.seek(-150, 2)
|
bz2f.seek(-150, 2)
|
||||||
self.assertEqual(bz2f.read(), self.TEXT[len(self.TEXT)-150:])
|
self.assertEqual(bz2f.read(), self.TEXT[len(self.TEXT)-150:])
|
||||||
|
|
||||||
|
def testSeekBackwardsFromEndMultiStream(self):
|
||||||
|
# "Test BZ2File.seek(-1000, 2) across stream boundaries"
|
||||||
|
self.createTempFile(streams=2)
|
||||||
|
with BZ2File(self.filename) as bz2f:
|
||||||
|
bz2f.seek(-1000, 2)
|
||||||
|
self.assertEqual(bz2f.read(), (self.TEXT * 2)[-1000:])
|
||||||
|
|
||||||
def testSeekPostEnd(self):
|
def testSeekPostEnd(self):
|
||||||
# "Test BZ2File.seek(150000)"
|
# "Test BZ2File.seek(150000)"
|
||||||
self.createTempFile()
|
self.createTempFile()
|
||||||
|
@ -248,6 +340,14 @@ class BZ2FileTest(BaseTest):
|
||||||
self.assertEqual(bz2f.tell(), len(self.TEXT))
|
self.assertEqual(bz2f.tell(), len(self.TEXT))
|
||||||
self.assertEqual(bz2f.read(), b"")
|
self.assertEqual(bz2f.read(), b"")
|
||||||
|
|
||||||
|
def testSeekPostEndMultiStream(self):
|
||||||
|
# "Test BZ2File.seek(150000)"
|
||||||
|
self.createTempFile(streams=5)
|
||||||
|
with BZ2File(self.filename) as bz2f:
|
||||||
|
bz2f.seek(150000)
|
||||||
|
self.assertEqual(bz2f.tell(), len(self.TEXT) * 5)
|
||||||
|
self.assertEqual(bz2f.read(), b"")
|
||||||
|
|
||||||
def testSeekPostEndTwice(self):
|
def testSeekPostEndTwice(self):
|
||||||
# "Test BZ2File.seek(150000) twice"
|
# "Test BZ2File.seek(150000) twice"
|
||||||
self.createTempFile()
|
self.createTempFile()
|
||||||
|
@ -257,6 +357,15 @@ class BZ2FileTest(BaseTest):
|
||||||
self.assertEqual(bz2f.tell(), len(self.TEXT))
|
self.assertEqual(bz2f.tell(), len(self.TEXT))
|
||||||
self.assertEqual(bz2f.read(), b"")
|
self.assertEqual(bz2f.read(), b"")
|
||||||
|
|
||||||
|
def testSeekPostEndTwiceMultiStream(self):
|
||||||
|
# "Test BZ2File.seek(150000) twice with a multi stream archive"
|
||||||
|
self.createTempFile(streams=5)
|
||||||
|
with BZ2File(self.filename) as bz2f:
|
||||||
|
bz2f.seek(150000)
|
||||||
|
bz2f.seek(150000)
|
||||||
|
self.assertEqual(bz2f.tell(), len(self.TEXT) * 5)
|
||||||
|
self.assertEqual(bz2f.read(), b"")
|
||||||
|
|
||||||
def testSeekPreStart(self):
|
def testSeekPreStart(self):
|
||||||
# "Test BZ2File.seek(-150, 0)"
|
# "Test BZ2File.seek(-150, 0)"
|
||||||
self.createTempFile()
|
self.createTempFile()
|
||||||
|
@ -265,6 +374,14 @@ class BZ2FileTest(BaseTest):
|
||||||
self.assertEqual(bz2f.tell(), 0)
|
self.assertEqual(bz2f.tell(), 0)
|
||||||
self.assertEqual(bz2f.read(), self.TEXT)
|
self.assertEqual(bz2f.read(), self.TEXT)
|
||||||
|
|
||||||
|
def testSeekPreStartMultiStream(self):
|
||||||
|
# "Test BZ2File.seek(-150, 0) with a multi stream archive"
|
||||||
|
self.createTempFile(streams=2)
|
||||||
|
with BZ2File(self.filename) as bz2f:
|
||||||
|
bz2f.seek(-150)
|
||||||
|
self.assertEqual(bz2f.tell(), 0)
|
||||||
|
self.assertEqual(bz2f.read(), self.TEXT * 2)
|
||||||
|
|
||||||
def testFileno(self):
|
def testFileno(self):
|
||||||
# "Test BZ2File.fileno()"
|
# "Test BZ2File.fileno()"
|
||||||
self.createTempFile()
|
self.createTempFile()
|
||||||
|
@ -510,6 +627,11 @@ class FuncTest(BaseTest):
|
||||||
# "Test decompress() function with incomplete data"
|
# "Test decompress() function with incomplete data"
|
||||||
self.assertRaises(ValueError, bz2.decompress, self.DATA[:-10])
|
self.assertRaises(ValueError, bz2.decompress, self.DATA[:-10])
|
||||||
|
|
||||||
|
def testDecompressMultiStream(self):
|
||||||
|
# "Test decompress() function for data with multiple streams"
|
||||||
|
text = bz2.decompress(self.DATA * 5)
|
||||||
|
self.assertEqual(text, self.TEXT * 5)
|
||||||
|
|
||||||
def test_main():
|
def test_main():
|
||||||
support.run_unittest(
|
support.run_unittest(
|
||||||
BZ2FileTest,
|
BZ2FileTest,
|
||||||
|
|
|
@ -161,6 +161,9 @@ Core and Builtins
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Issue #1625: BZ2File and bz2.decompress() now support multi-stream files.
|
||||||
|
Initial patch by Nir Aides.
|
||||||
|
|
||||||
- Issue #12175: BufferedReader.read(-1) now calls raw.readall() if available.
|
- Issue #12175: BufferedReader.read(-1) now calls raw.readall() if available.
|
||||||
|
|
||||||
- Issue #12175: FileIO.readall() now only reads the file position and size
|
- Issue #12175: FileIO.readall() now only reads the file position and size
|
||||||
|
|
Loading…
Reference in New Issue