Closes #13989: Add support for text modes to gzip.open().
Also, add tests for gzip.open().
This commit is contained in:
parent
54c74ece28
commit
7e126205e6
|
@ -13,9 +13,11 @@ like the GNU programs :program:`gzip` and :program:`gunzip` would.
|
|||
|
||||
The data compression is provided by the :mod:`zlib` module.
|
||||
|
||||
The :mod:`gzip` module provides the :class:`GzipFile` class. The :class:`GzipFile`
|
||||
class reads and writes :program:`gzip`\ -format files, automatically compressing
|
||||
or decompressing the data so that it looks like an ordinary :term:`file object`.
|
||||
The :mod:`gzip` module provides the :class:`GzipFile` class, as well as the
|
||||
:func:`gzip.open`, :func:`compress` and :func:`decompress` convenience
|
||||
functions. The :class:`GzipFile` class reads and writes :program:`gzip`\ -format
|
||||
files, automatically compressing or decompressing the data so that it looks like
|
||||
an ordinary :term:`file object`.
|
||||
|
||||
Note that additional file formats which can be decompressed by the
|
||||
:program:`gzip` and :program:`gunzip` programs, such as those produced by
|
||||
|
@ -24,6 +26,32 @@ Note that additional file formats which can be decompressed by the
|
|||
The module defines the following items:
|
||||
|
||||
|
||||
.. function:: open(filename, mode='rb', compresslevel=9, encoding=None, errors=None, newline=None)
|
||||
|
||||
Open *filename* as a gzip-compressed file in binary or text mode.
|
||||
|
||||
Returns a :term:`file object`.
|
||||
|
||||
The *mode* argument can be any of ``'r'``, ``'rb'``, ``'a'``, ``'ab'``,
|
||||
``'w'``, or ``'wb'`` for binary mode, or ``'rt'``, ``'at'``, or ``'wt'`` for
|
||||
text mode. The default is ``'rb'``.
|
||||
|
||||
The *compresslevel* argument is an integer from 1 to 9, as for the
|
||||
:class:`GzipFile` constructor.
|
||||
|
||||
For binary mode, this function is equivalent to the :class:`GzipFile`
|
||||
constructor: ``GzipFile(filename, mode, compresslevel)``. In this case, the
|
||||
*encoding*, *errors* and *newline* arguments must not be provided.
|
||||
|
||||
For text mode, a :class:`GzipFile` object is created, and wrapped in an
|
||||
:class:`io.TextIOWrapper` instance with the specified encoding, error
|
||||
handling behavior, and line ending(s).
|
||||
|
||||
.. versionchanged:: 3.3
|
||||
Support for text mode was added, along with the *encoding*, *errors* and
|
||||
*newline* arguments.
|
||||
|
||||
|
||||
.. class:: GzipFile(filename=None, mode=None, compresslevel=9, fileobj=None, mtime=None)
|
||||
|
||||
Constructor for the :class:`GzipFile` class, which simulates most of the
|
||||
|
@ -46,9 +74,9 @@ The module defines the following items:
|
|||
or ``'wb'``, depending on whether the file will be read or written. The default
|
||||
is the mode of *fileobj* if discernible; otherwise, the default is ``'rb'``.
|
||||
|
||||
Note that the file is always opened in binary mode; text mode is not
|
||||
supported. If you need to read a compressed file in text mode, wrap your
|
||||
:class:`GzipFile` with an :class:`io.TextIOWrapper`.
|
||||
Note that the file is always opened in binary mode. To open a compressed file
|
||||
in text mode, use :func:`gzip.open` (or wrap your :class:`GzipFile` with an
|
||||
:class:`io.TextIOWrapper`).
|
||||
|
||||
The *compresslevel* argument is an integer from ``1`` to ``9`` controlling the
|
||||
level of compression; ``1`` is fastest and produces the least compression, and
|
||||
|
@ -97,12 +125,6 @@ The module defines the following items:
|
|||
The :meth:`io.BufferedIOBase.read1` method is now implemented.
|
||||
|
||||
|
||||
.. function:: open(filename, mode='rb', compresslevel=9)
|
||||
|
||||
This is a shorthand for ``GzipFile(filename,`` ``mode,`` ``compresslevel)``.
|
||||
The *filename* argument is required; *mode* defaults to ``'rb'`` and
|
||||
*compresslevel* defaults to ``9``.
|
||||
|
||||
.. function:: compress(data, compresslevel=9)
|
||||
|
||||
Compress the *data*, returning a :class:`bytes` object containing
|
||||
|
|
42
Lib/gzip.py
42
Lib/gzip.py
|
@ -16,6 +16,39 @@ FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16
|
|||
|
||||
READ, WRITE = 1, 2
|
||||
|
||||
def open(filename, mode="rb", compresslevel=9,
|
||||
encoding=None, errors=None, newline=None):
|
||||
"""Open a gzip-compressed file in binary or text mode.
|
||||
|
||||
The mode argument can be "r", "rb", "w", "wb", "a" or "ab" for binary mode,
|
||||
or "rt", "wt" or "at" for text mode. The default mode is "rb", and the
|
||||
default compresslevel is 9.
|
||||
|
||||
For binary mode, this function is equivalent to the GzipFile constructor:
|
||||
GzipFile(filename, mode, compresslevel). In this case, the encoding, errors
|
||||
and newline arguments must not be provided.
|
||||
|
||||
For text mode, a GzipFile object is created, and wrapped in an
|
||||
io.TextIOWrapper instance with the specified encoding, error handling
|
||||
behavior, and line ending(s).
|
||||
|
||||
"""
|
||||
if "t" in mode:
|
||||
if "b" in mode:
|
||||
raise ValueError("Invalid mode: %r" % (mode,))
|
||||
else:
|
||||
if encoding is not None:
|
||||
raise ValueError("Argument 'encoding' not supported in binary mode")
|
||||
if errors is not None:
|
||||
raise ValueError("Argument 'errors' not supported in binary mode")
|
||||
if newline is not None:
|
||||
raise ValueError("Argument 'newline' not supported in binary mode")
|
||||
binary_file = GzipFile(filename, mode.replace("t", ""), compresslevel)
|
||||
if "t" in mode:
|
||||
return io.TextIOWrapper(binary_file, encoding, errors, newline)
|
||||
else:
|
||||
return binary_file
|
||||
|
||||
def write32u(output, value):
|
||||
# The L format writes the bit pattern correctly whether signed
|
||||
# or unsigned.
|
||||
|
@ -24,15 +57,6 @@ def write32u(output, value):
|
|||
def read32(input):
|
||||
return struct.unpack("<I", input.read(4))[0]
|
||||
|
||||
def open(filename, mode="rb", compresslevel=9):
|
||||
"""Shorthand for GzipFile(filename, mode, compresslevel).
|
||||
|
||||
The filename argument is required; mode defaults to 'rb'
|
||||
and compresslevel defaults to 9.
|
||||
|
||||
"""
|
||||
return GzipFile(filename, mode, compresslevel)
|
||||
|
||||
class _PaddedFile:
|
||||
"""Minimal read-only file object that prepends a string to the contents
|
||||
of an actual file. Shouldn't be used outside of gzip.py, as it lacks
|
||||
|
|
|
@ -374,6 +374,94 @@ class TestGzip(unittest.TestCase):
|
|||
datac = gzip.compress(data)
|
||||
self.assertEqual(gzip.decompress(datac), data)
|
||||
|
||||
# Test the 'open' convenience function.
|
||||
|
||||
def test_open_binary(self):
|
||||
# Test explicit binary modes.
|
||||
uncompressed = data1 * 50
|
||||
with gzip.open(self.filename, "wb") as f:
|
||||
f.write(uncompressed)
|
||||
with open(self.filename, "rb") as f:
|
||||
file_data = gzip.decompress(f.read())
|
||||
self.assertEqual(file_data, uncompressed)
|
||||
with gzip.open(self.filename, "rb") as f:
|
||||
self.assertEqual(f.read(), uncompressed)
|
||||
with gzip.open(self.filename, "ab") as f:
|
||||
f.write(uncompressed)
|
||||
with open(self.filename, "rb") as f:
|
||||
file_data = gzip.decompress(f.read())
|
||||
self.assertEqual(file_data, uncompressed * 2)
|
||||
|
||||
def test_open_default_binary(self):
|
||||
# Test implicit binary modes (no "b" or "t" in mode string).
|
||||
uncompressed = data1 * 50
|
||||
with gzip.open(self.filename, "w") as f:
|
||||
f.write(uncompressed)
|
||||
with open(self.filename, "rb") as f:
|
||||
file_data = gzip.decompress(f.read())
|
||||
self.assertEqual(file_data, uncompressed)
|
||||
with gzip.open(self.filename, "r") as f:
|
||||
self.assertEqual(f.read(), uncompressed)
|
||||
with gzip.open(self.filename, "a") as f:
|
||||
f.write(uncompressed)
|
||||
with open(self.filename, "rb") as f:
|
||||
file_data = gzip.decompress(f.read())
|
||||
self.assertEqual(file_data, uncompressed * 2)
|
||||
|
||||
def test_open_text(self):
|
||||
# Test text modes.
|
||||
uncompressed = data1.decode("ascii") * 50
|
||||
with gzip.open(self.filename, "wt") as f:
|
||||
f.write(uncompressed)
|
||||
with open(self.filename, "rb") as f:
|
||||
file_data = gzip.decompress(f.read()).decode("ascii")
|
||||
self.assertEqual(file_data, uncompressed)
|
||||
with gzip.open(self.filename, "rt") as f:
|
||||
self.assertEqual(f.read(), uncompressed)
|
||||
with gzip.open(self.filename, "at") as f:
|
||||
f.write(uncompressed)
|
||||
with open(self.filename, "rb") as f:
|
||||
file_data = gzip.decompress(f.read()).decode("ascii")
|
||||
self.assertEqual(file_data, uncompressed * 2)
|
||||
|
||||
def test_open_bad_params(self):
|
||||
# Test invalid parameter combinations.
|
||||
with self.assertRaises(ValueError):
|
||||
gzip.open(self.filename, "wbt")
|
||||
with self.assertRaises(ValueError):
|
||||
gzip.open(self.filename, "rb", encoding="utf-8")
|
||||
with self.assertRaises(ValueError):
|
||||
gzip.open(self.filename, "rb", errors="ignore")
|
||||
with self.assertRaises(ValueError):
|
||||
gzip.open(self.filename, "rb", newline="\n")
|
||||
|
||||
def test_open_with_encoding(self):
|
||||
# Test non-default encoding.
|
||||
uncompressed = data1.decode("ascii") * 50
|
||||
with gzip.open(self.filename, "wt", encoding="utf-16") as f:
|
||||
f.write(uncompressed)
|
||||
with open(self.filename, "rb") as f:
|
||||
file_data = gzip.decompress(f.read()).decode("utf-16")
|
||||
self.assertEqual(file_data, uncompressed)
|
||||
with gzip.open(self.filename, "rt", encoding="utf-16") as f:
|
||||
self.assertEqual(f.read(), uncompressed)
|
||||
|
||||
def test_open_with_encoding_error_handler(self):
|
||||
# Test with non-default encoding error handler.
|
||||
with gzip.open(self.filename, "wb") as f:
|
||||
f.write(b"foo\xffbar")
|
||||
with gzip.open(self.filename, "rt", encoding="ascii", errors="ignore") \
|
||||
as f:
|
||||
self.assertEqual(f.read(), "foobar")
|
||||
|
||||
def test_open_with_newline(self):
|
||||
# Test with explicit newline (universal newline mode disabled).
|
||||
uncompressed = data1.decode("ascii") * 50
|
||||
with gzip.open(self.filename, "wt") as f:
|
||||
f.write(uncompressed)
|
||||
with gzip.open(self.filename, "rt", newline="\r") as f:
|
||||
self.assertEqual(f.readlines(), [uncompressed])
|
||||
|
||||
def test_main(verbose=None):
|
||||
support.run_unittest(TestGzip)
|
||||
|
||||
|
|
|
@ -17,6 +17,8 @@ Core and Builtins
|
|||
Library
|
||||
-------
|
||||
|
||||
- Issue #13989: Add support for text mode to gzip.open().
|
||||
|
||||
- Issue #14127: The os.stat() result object now provides three additional
|
||||
fields: st_ctime_ns, st_mtime_ns, and st_atime_ns, providing those times as an
|
||||
integer with nanosecond resolution. The functions os.utime(), os.lutimes(),
|
||||
|
|
Loading…
Reference in New Issue