Closes #13989: Add support for text modes to gzip.open().

Also, add tests for gzip.open().
This commit is contained in:
Nadeem Vawda 2012-05-06 15:04:01 +02:00
parent 54c74ece28
commit 7e126205e6
4 changed files with 157 additions and 21 deletions

View File

@ -13,9 +13,11 @@ like the GNU programs :program:`gzip` and :program:`gunzip` would.
The data compression is provided by the :mod:`zlib` module.
The :mod:`gzip` module provides the :class:`GzipFile` class. The :class:`GzipFile`
class reads and writes :program:`gzip`\ -format files, automatically compressing
or decompressing the data so that it looks like an ordinary :term:`file object`.
The :mod:`gzip` module provides the :class:`GzipFile` class, as well as the
:func:`gzip.open`, :func:`compress` and :func:`decompress` convenience
functions. The :class:`GzipFile` class reads and writes :program:`gzip`\ -format
files, automatically compressing or decompressing the data so that it looks like
an ordinary :term:`file object`.
Note that additional file formats which can be decompressed by the
:program:`gzip` and :program:`gunzip` programs, such as those produced by
@ -24,6 +26,32 @@ Note that additional file formats which can be decompressed by the
The module defines the following items:
.. function:: open(filename, mode='rb', compresslevel=9, encoding=None, errors=None, newline=None)
Open *filename* as a gzip-compressed file in binary or text mode.
Returns a :term:`file object`.
The *mode* argument can be any of ``'r'``, ``'rb'``, ``'a'``, ``'ab'``,
``'w'``, or ``'wb'`` for binary mode, or ``'rt'``, ``'at'``, or ``'wt'`` for
text mode. The default is ``'rb'``.
The *compresslevel* argument is an integer from 1 to 9, as for the
:class:`GzipFile` constructor.
For binary mode, this function is equivalent to the :class:`GzipFile`
constructor: ``GzipFile(filename, mode, compresslevel)``. In this case, the
*encoding*, *errors* and *newline* arguments must not be provided.
For text mode, a :class:`GzipFile` object is created, and wrapped in an
:class:`io.TextIOWrapper` instance with the specified encoding, error
handling behavior, and line ending(s).
.. versionchanged:: 3.3
Support for text mode was added, along with the *encoding*, *errors* and
*newline* arguments.
.. class:: GzipFile(filename=None, mode=None, compresslevel=9, fileobj=None, mtime=None)
Constructor for the :class:`GzipFile` class, which simulates most of the
@ -46,9 +74,9 @@ The module defines the following items:
or ``'wb'``, depending on whether the file will be read or written. The default
is the mode of *fileobj* if discernible; otherwise, the default is ``'rb'``.
Note that the file is always opened in binary mode; text mode is not
supported. If you need to read a compressed file in text mode, wrap your
:class:`GzipFile` with an :class:`io.TextIOWrapper`.
Note that the file is always opened in binary mode. To open a compressed file
in text mode, use :func:`gzip.open` (or wrap your :class:`GzipFile` with an
:class:`io.TextIOWrapper`).
The *compresslevel* argument is an integer from ``1`` to ``9`` controlling the
level of compression; ``1`` is fastest and produces the least compression, and
@ -97,12 +125,6 @@ The module defines the following items:
The :meth:`io.BufferedIOBase.read1` method is now implemented.
.. function:: open(filename, mode='rb', compresslevel=9)
This is a shorthand for ``GzipFile(filename,`` ``mode,`` ``compresslevel)``.
The *filename* argument is required; *mode* defaults to ``'rb'`` and
*compresslevel* defaults to ``9``.
.. function:: compress(data, compresslevel=9)
Compress the *data*, returning a :class:`bytes` object containing

View File

@ -16,6 +16,39 @@ FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16
READ, WRITE = 1, 2
def open(filename, mode="rb", compresslevel=9,
encoding=None, errors=None, newline=None):
"""Open a gzip-compressed file in binary or text mode.
The mode argument can be "r", "rb", "w", "wb", "a" or "ab" for binary mode,
or "rt", "wt" or "at" for text mode. The default mode is "rb", and the
default compresslevel is 9.
For binary mode, this function is equivalent to the GzipFile constructor:
GzipFile(filename, mode, compresslevel). In this case, the encoding, errors
and newline arguments must not be provided.
For text mode, a GzipFile object is created, and wrapped in an
io.TextIOWrapper instance with the specified encoding, error handling
behavior, and line ending(s).
"""
if "t" in mode:
if "b" in mode:
raise ValueError("Invalid mode: %r" % (mode,))
else:
if encoding is not None:
raise ValueError("Argument 'encoding' not supported in binary mode")
if errors is not None:
raise ValueError("Argument 'errors' not supported in binary mode")
if newline is not None:
raise ValueError("Argument 'newline' not supported in binary mode")
binary_file = GzipFile(filename, mode.replace("t", ""), compresslevel)
if "t" in mode:
return io.TextIOWrapper(binary_file, encoding, errors, newline)
else:
return binary_file
def write32u(output, value):
# The L format writes the bit pattern correctly whether signed
# or unsigned.
@ -24,15 +57,6 @@ def write32u(output, value):
def read32(input):
return struct.unpack("<I", input.read(4))[0]
def open(filename, mode="rb", compresslevel=9):
"""Shorthand for GzipFile(filename, mode, compresslevel).
The filename argument is required; mode defaults to 'rb'
and compresslevel defaults to 9.
"""
return GzipFile(filename, mode, compresslevel)
class _PaddedFile:
"""Minimal read-only file object that prepends a string to the contents
of an actual file. Shouldn't be used outside of gzip.py, as it lacks

View File

@ -374,6 +374,94 @@ class TestGzip(unittest.TestCase):
datac = gzip.compress(data)
self.assertEqual(gzip.decompress(datac), data)
# Test the 'open' convenience function.
def test_open_binary(self):
# Test explicit binary modes.
uncompressed = data1 * 50
with gzip.open(self.filename, "wb") as f:
f.write(uncompressed)
with open(self.filename, "rb") as f:
file_data = gzip.decompress(f.read())
self.assertEqual(file_data, uncompressed)
with gzip.open(self.filename, "rb") as f:
self.assertEqual(f.read(), uncompressed)
with gzip.open(self.filename, "ab") as f:
f.write(uncompressed)
with open(self.filename, "rb") as f:
file_data = gzip.decompress(f.read())
self.assertEqual(file_data, uncompressed * 2)
def test_open_default_binary(self):
# Test implicit binary modes (no "b" or "t" in mode string).
uncompressed = data1 * 50
with gzip.open(self.filename, "w") as f:
f.write(uncompressed)
with open(self.filename, "rb") as f:
file_data = gzip.decompress(f.read())
self.assertEqual(file_data, uncompressed)
with gzip.open(self.filename, "r") as f:
self.assertEqual(f.read(), uncompressed)
with gzip.open(self.filename, "a") as f:
f.write(uncompressed)
with open(self.filename, "rb") as f:
file_data = gzip.decompress(f.read())
self.assertEqual(file_data, uncompressed * 2)
def test_open_text(self):
# Test text modes.
uncompressed = data1.decode("ascii") * 50
with gzip.open(self.filename, "wt") as f:
f.write(uncompressed)
with open(self.filename, "rb") as f:
file_data = gzip.decompress(f.read()).decode("ascii")
self.assertEqual(file_data, uncompressed)
with gzip.open(self.filename, "rt") as f:
self.assertEqual(f.read(), uncompressed)
with gzip.open(self.filename, "at") as f:
f.write(uncompressed)
with open(self.filename, "rb") as f:
file_data = gzip.decompress(f.read()).decode("ascii")
self.assertEqual(file_data, uncompressed * 2)
def test_open_bad_params(self):
# Test invalid parameter combinations.
with self.assertRaises(ValueError):
gzip.open(self.filename, "wbt")
with self.assertRaises(ValueError):
gzip.open(self.filename, "rb", encoding="utf-8")
with self.assertRaises(ValueError):
gzip.open(self.filename, "rb", errors="ignore")
with self.assertRaises(ValueError):
gzip.open(self.filename, "rb", newline="\n")
def test_open_with_encoding(self):
# Test non-default encoding.
uncompressed = data1.decode("ascii") * 50
with gzip.open(self.filename, "wt", encoding="utf-16") as f:
f.write(uncompressed)
with open(self.filename, "rb") as f:
file_data = gzip.decompress(f.read()).decode("utf-16")
self.assertEqual(file_data, uncompressed)
with gzip.open(self.filename, "rt", encoding="utf-16") as f:
self.assertEqual(f.read(), uncompressed)
def test_open_with_encoding_error_handler(self):
# Test with non-default encoding error handler.
with gzip.open(self.filename, "wb") as f:
f.write(b"foo\xffbar")
with gzip.open(self.filename, "rt", encoding="ascii", errors="ignore") \
as f:
self.assertEqual(f.read(), "foobar")
def test_open_with_newline(self):
# Test with explicit newline (universal newline mode disabled).
uncompressed = data1.decode("ascii") * 50
with gzip.open(self.filename, "wt") as f:
f.write(uncompressed)
with gzip.open(self.filename, "rt", newline="\r") as f:
self.assertEqual(f.readlines(), [uncompressed])
def test_main(verbose=None):
support.run_unittest(TestGzip)

View File

@ -17,6 +17,8 @@ Core and Builtins
Library
-------
- Issue #13989: Add support for text mode to gzip.open().
- Issue #14127: The os.stat() result object now provides three additional
fields: st_ctime_ns, st_mtime_ns, and st_atime_ns, providing those times as an
integer with nanosecond resolution. The functions os.utime(), os.lutimes(),