Merge heads

This commit is contained in:
Nadeem Vawda 2011-05-27 02:03:06 +02:00
commit 4e18ac850f
4 changed files with 192 additions and 23 deletions

View File

@ -37,14 +37,18 @@ All of the classes in this module may safely be accessed from multiple threads.
*fileobj*), or operate directly on a named file (named by *filename*).
Exactly one of these two parameters should be provided.
The *mode* argument can be either ``'r'`` for reading (default), or ``'w'``
for writing.
The *mode* argument can be either ``'r'`` for reading (default), ``'w'`` for
overwriting, or ``'a'`` for appending. If *fileobj* is provided, a mode of
``'w'`` does not truncate the file, and is instead equivalent to ``'a'``.
The *buffering* argument is ignored. Its use is deprecated.
If *mode* is ``'w'``, *compresslevel* can be a number between ``1`` and
``9`` specifying the level of compression: ``1`` produces the least
compression, and ``9`` (default) produces the most compression.
If *mode* is ``'w'`` or ``'a'``, *compresslevel* can be a number between
``1`` and ``9`` specifying the level of compression: ``1`` produces the
least compression, and ``9`` (default) produces the most compression.
If *mode* is ``'r'``, the input file may be the concatenation of multiple
compressed streams.
:class:`BZ2File` provides all of the members specified by the
:class:`io.BufferedIOBase`, except for :meth:`detach` and :meth:`truncate`.
@ -70,6 +74,10 @@ All of the classes in this module may safely be accessed from multiple threads.
.. versionchanged:: 3.3
The *fileobj* argument to the constructor was added.
.. versionchanged:: 3.3
The ``'a'`` (append) mode was added, along with support for reading
multi-stream files.
Incremental (de)compression
---------------------------
@ -106,14 +114,20 @@ Incremental (de)compression
incrementally. For one-shot compression, use the :func:`decompress` function
instead.
.. note::
This class does not transparently handle inputs containing multiple
compressed streams, unlike :func:`decompress` and :class:`BZ2File`. If
you need to decompress a multi-stream input with :class:`BZ2Decompressor`,
you must use a new decompressor for each stream.
.. method:: decompress(data)
Provide data to the decompressor object. Returns a chunk of decompressed
data if possible, or an empty byte string otherwise.
Attempting to decompress data after the end of stream is reached raises
an :exc:`EOFError`. If any data is found after the end of the stream, it
is ignored and saved in the :attr:`unused_data` attribute.
Attempting to decompress data after the end of the current stream is
reached raises an :exc:`EOFError`. If any data is found after the end of
the stream, it is ignored and saved in the :attr:`unused_data` attribute.
.. attribute:: eof
@ -127,6 +141,9 @@ Incremental (de)compression
Data found after the end of the compressed stream.
If this attribute is accessed before the end of the stream has been
reached, its value will be ``b''``.
One-shot (de)compression
------------------------
@ -145,5 +162,11 @@ One-shot (de)compression
Decompress *data*.
If *data* is the concatenation of multiple compressed streams, decompress
all of the streams.
For incremental decompression, use a :class:`BZ2Decompressor` instead.
.. versionchanged:: 3.3
Support for multi-stream inputs was added.

View File

@ -76,6 +76,10 @@ class BZ2File(io.BufferedIOBase):
mode = "wb"
mode_code = _MODE_WRITE
self._compressor = BZ2Compressor()
elif mode in ("a", "ab"):
mode = "ab"
mode_code = _MODE_WRITE
self._compressor = BZ2Compressor()
else:
raise ValueError("Invalid mode: {!r}".format(mode))
@ -161,14 +165,25 @@ class BZ2File(io.BufferedIOBase):
def _fill_buffer(self):
if self._buffer:
return True
if self._decompressor.eof:
self._mode = _MODE_READ_EOF
self._size = self._pos
return False
rawblock = self._fp.read(_BUFFER_SIZE)
if self._decompressor.unused_data:
rawblock = self._decompressor.unused_data
else:
rawblock = self._fp.read(_BUFFER_SIZE)
if not rawblock:
raise EOFError("Compressed file ended before the "
"end-of-stream marker was reached")
if self._decompressor.eof:
self._mode = _MODE_READ_EOF
self._size = self._pos
return False
else:
raise EOFError("Compressed file ended before the "
"end-of-stream marker was reached")
# Continue to next stream.
if self._decompressor.eof:
self._decompressor = BZ2Decompressor()
self._buffer = self._decompressor.decompress(rawblock)
return True
@ -384,9 +399,15 @@ def decompress(data):
"""
if len(data) == 0:
return b""
decomp = BZ2Decompressor()
result = decomp.decompress(data)
if not decomp.eof:
raise ValueError("Compressed data ended before the "
"end-of-stream marker was reached")
return result
result = b""
while True:
decomp = BZ2Decompressor()
result += decomp.decompress(data)
if not decomp.eof:
raise ValueError("Compressed data ended before the "
"end-of-stream marker was reached")
if not decomp.unused_data:
return result
# There is unused data left over. Proceed to next stream.
data = decomp.unused_data

View File

@ -84,9 +84,9 @@ class BZ2FileTest(BaseTest):
else:
return self.DATA
def createTempFile(self, crlf=False):
def createTempFile(self, crlf=False, streams=1):
with open(self.filename, "wb") as f:
f.write(self.getData(crlf))
f.write(self.getData(crlf) * streams)
def testRead(self):
# "Test BZ2File.read()"
@ -95,6 +95,26 @@ class BZ2FileTest(BaseTest):
self.assertRaises(TypeError, bz2f.read, None)
self.assertEqual(bz2f.read(), self.TEXT)
def testReadMultiStream(self):
# "Test BZ2File.read() with a multi stream archive"
self.createTempFile(streams=5)
with BZ2File(self.filename) as bz2f:
self.assertRaises(TypeError, bz2f.read, None)
self.assertEqual(bz2f.read(), self.TEXT * 5)
def testReadMonkeyMultiStream(self):
# "Test BZ2File.read() with a multi stream archive in which stream"
# "end is alined with internal buffer size"
buffer_size = bz2._BUFFER_SIZE
bz2._BUFFER_SIZE = len(self.DATA)
try:
self.createTempFile(streams=5)
with BZ2File(self.filename) as bz2f:
self.assertRaises(TypeError, bz2f.read, None)
self.assertEqual(bz2f.read(), self.TEXT * 5)
finally:
bz2._BUFFER_SIZE = buffer_size
def testRead0(self):
# "Test BBZ2File.read(0)"
self.createTempFile()
@ -114,6 +134,18 @@ class BZ2FileTest(BaseTest):
text += str
self.assertEqual(text, self.TEXT)
def testReadChunk10MultiStream(self):
# "Test BZ2File.read() in chunks of 10 bytes with a multi stream archive"
self.createTempFile(streams=5)
with BZ2File(self.filename) as bz2f:
text = b''
while 1:
str = bz2f.read(10)
if not str:
break
text += str
self.assertEqual(text, self.TEXT * 5)
def testRead100(self):
# "Test BZ2File.read(100)"
self.createTempFile()
@ -151,6 +183,15 @@ class BZ2FileTest(BaseTest):
for line in sio.readlines():
self.assertEqual(bz2f.readline(), line)
def testReadLineMultiStream(self):
# "Test BZ2File.readline() with a multi stream archive"
self.createTempFile(streams=5)
with BZ2File(self.filename) as bz2f:
self.assertRaises(TypeError, bz2f.readline, None)
sio = BytesIO(self.TEXT * 5)
for line in sio.readlines():
self.assertEqual(bz2f.readline(), line)
def testReadLines(self):
# "Test BZ2File.readlines()"
self.createTempFile()
@ -159,6 +200,14 @@ class BZ2FileTest(BaseTest):
sio = BytesIO(self.TEXT)
self.assertEqual(bz2f.readlines(), sio.readlines())
def testReadLinesMultiStream(self):
# "Test BZ2File.readlines() with a multi stream archive"
self.createTempFile(streams=5)
with BZ2File(self.filename) as bz2f:
self.assertRaises(TypeError, bz2f.readlines, None)
sio = BytesIO(self.TEXT * 5)
self.assertEqual(bz2f.readlines(), sio.readlines())
def testIterator(self):
# "Test iter(BZ2File)"
self.createTempFile()
@ -166,6 +215,13 @@ class BZ2FileTest(BaseTest):
sio = BytesIO(self.TEXT)
self.assertEqual(list(iter(bz2f)), sio.readlines())
def testIteratorMultiStream(self):
# "Test iter(BZ2File) with a multi stream archive"
self.createTempFile(streams=5)
with BZ2File(self.filename) as bz2f:
sio = BytesIO(self.TEXT * 5)
self.assertEqual(list(iter(bz2f)), sio.readlines())
def testClosedIteratorDeadlock(self):
# "Test that iteration on a closed bz2file releases the lock."
# http://bugs.python.org/issue3309
@ -217,6 +273,17 @@ class BZ2FileTest(BaseTest):
self.assertRaises(IOError, bz2f.write, b"a")
self.assertRaises(IOError, bz2f.writelines, [b"a"])
def testAppend(self):
# "Test BZ2File.write()"
with BZ2File(self.filename, "w") as bz2f:
self.assertRaises(TypeError, bz2f.write)
bz2f.write(self.TEXT)
with BZ2File(self.filename, "a") as bz2f:
self.assertRaises(TypeError, bz2f.write)
bz2f.write(self.TEXT)
with open(self.filename, 'rb') as f:
self.assertEqual(self.decompress(f.read()), self.TEXT * 2)
def testSeekForward(self):
# "Test BZ2File.seek(150, 0)"
self.createTempFile()
@ -225,6 +292,14 @@ class BZ2FileTest(BaseTest):
bz2f.seek(150)
self.assertEqual(bz2f.read(), self.TEXT[150:])
def testSeekForwardMultiStream(self):
# "Test BZ2File.seek(150, 0) across stream boundaries"
self.createTempFile(streams=2)
with BZ2File(self.filename) as bz2f:
self.assertRaises(TypeError, bz2f.seek)
bz2f.seek(len(self.TEXT) + 150)
self.assertEqual(bz2f.read(), self.TEXT[150:])
def testSeekBackwards(self):
# "Test BZ2File.seek(-150, 1)"
self.createTempFile()
@ -233,6 +308,16 @@ class BZ2FileTest(BaseTest):
bz2f.seek(-150, 1)
self.assertEqual(bz2f.read(), self.TEXT[500-150:])
def testSeekBackwardsMultiStream(self):
# "Test BZ2File.seek(-150, 1) across stream boundaries"
self.createTempFile(streams=2)
with BZ2File(self.filename) as bz2f:
readto = len(self.TEXT) + 100
while readto > 0:
readto -= len(bz2f.read(readto))
bz2f.seek(-150, 1)
self.assertEqual(bz2f.read(), self.TEXT[100-150:] + self.TEXT)
def testSeekBackwardsFromEnd(self):
# "Test BZ2File.seek(-150, 2)"
self.createTempFile()
@ -240,6 +325,13 @@ class BZ2FileTest(BaseTest):
bz2f.seek(-150, 2)
self.assertEqual(bz2f.read(), self.TEXT[len(self.TEXT)-150:])
def testSeekBackwardsFromEndMultiStream(self):
# "Test BZ2File.seek(-1000, 2) across stream boundaries"
self.createTempFile(streams=2)
with BZ2File(self.filename) as bz2f:
bz2f.seek(-1000, 2)
self.assertEqual(bz2f.read(), (self.TEXT * 2)[-1000:])
def testSeekPostEnd(self):
# "Test BZ2File.seek(150000)"
self.createTempFile()
@ -248,6 +340,14 @@ class BZ2FileTest(BaseTest):
self.assertEqual(bz2f.tell(), len(self.TEXT))
self.assertEqual(bz2f.read(), b"")
def testSeekPostEndMultiStream(self):
# "Test BZ2File.seek(150000)"
self.createTempFile(streams=5)
with BZ2File(self.filename) as bz2f:
bz2f.seek(150000)
self.assertEqual(bz2f.tell(), len(self.TEXT) * 5)
self.assertEqual(bz2f.read(), b"")
def testSeekPostEndTwice(self):
# "Test BZ2File.seek(150000) twice"
self.createTempFile()
@ -257,6 +357,15 @@ class BZ2FileTest(BaseTest):
self.assertEqual(bz2f.tell(), len(self.TEXT))
self.assertEqual(bz2f.read(), b"")
def testSeekPostEndTwiceMultiStream(self):
# "Test BZ2File.seek(150000) twice with a multi stream archive"
self.createTempFile(streams=5)
with BZ2File(self.filename) as bz2f:
bz2f.seek(150000)
bz2f.seek(150000)
self.assertEqual(bz2f.tell(), len(self.TEXT) * 5)
self.assertEqual(bz2f.read(), b"")
def testSeekPreStart(self):
# "Test BZ2File.seek(-150, 0)"
self.createTempFile()
@ -265,6 +374,14 @@ class BZ2FileTest(BaseTest):
self.assertEqual(bz2f.tell(), 0)
self.assertEqual(bz2f.read(), self.TEXT)
def testSeekPreStartMultiStream(self):
# "Test BZ2File.seek(-150, 0) with a multi stream archive"
self.createTempFile(streams=2)
with BZ2File(self.filename) as bz2f:
bz2f.seek(-150)
self.assertEqual(bz2f.tell(), 0)
self.assertEqual(bz2f.read(), self.TEXT * 2)
def testFileno(self):
# "Test BZ2File.fileno()"
self.createTempFile()
@ -510,6 +627,11 @@ class FuncTest(BaseTest):
# "Test decompress() function with incomplete data"
self.assertRaises(ValueError, bz2.decompress, self.DATA[:-10])
def testDecompressMultiStream(self):
# "Test decompress() function for data with multiple streams"
text = bz2.decompress(self.DATA * 5)
self.assertEqual(text, self.TEXT * 5)
def test_main():
support.run_unittest(
BZ2FileTest,

View File

@ -161,6 +161,9 @@ Core and Builtins
Library
-------
- Issue #1625: BZ2File and bz2.decompress() now support multi-stream files.
Initial patch by Nir Aides.
- Issue #8796: codecs.open() calls the builtin open() function instead of using
StreamReaderWriter. Deprecate StreamReader, StreamWriter, StreamReaderWriter,
StreamRecoder and EncodedFile() of the codec module. Use the builtin open()