From bca63b362d23f154a5ed7fe43e4146977bba181e Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 23 Mar 2015 14:59:48 +0200 Subject: [PATCH] Issue #23688: Added support of arbitrary bytes-like objects and avoided unnecessary copying of memoryview in gzip.GzipFile.write(). Original patch by Wolfgang Maier. --- Doc/library/gzip.rst | 4 ++++ Lib/gzip.py | 19 +++++++++++-------- Lib/test/test_gzip.py | 37 +++++++++++++++++++++++++++++++++++++ Misc/NEWS | 4 ++++ 4 files changed, 56 insertions(+), 8 deletions(-) diff --git a/Doc/library/gzip.rst b/Doc/library/gzip.rst index 78536fab51d..5ea57b7c99d 100644 --- a/Doc/library/gzip.rst +++ b/Doc/library/gzip.rst @@ -137,6 +137,10 @@ The module defines the following items: .. versionchanged:: 3.4 Added support for the ``'x'`` and ``'xb'`` modes. + .. versionchanged:: 3.5 + Added support for writing arbitrary + :term:`bytes-like objects `. + .. function:: compress(data, compresslevel=9) diff --git a/Lib/gzip.py b/Lib/gzip.py index f934d4f1c2b..21d83e6414d 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -334,17 +334,20 @@ class GzipFile(io.BufferedIOBase): if self.fileobj is None: raise ValueError("write() on closed GzipFile object") - # Convert data type if called by io.BufferedWriter. - if isinstance(data, memoryview): - data = data.tobytes() + if isinstance(data, bytes): + length = len(data) + else: + # accept any data that supports the buffer protocol + data = memoryview(data) + length = data.nbytes - if len(data) > 0: - self.size = self.size + len(data) + if length > 0: + self.fileobj.write(self.compress.compress(data)) + self.size += length self.crc = zlib.crc32(data, self.crc) & 0xffffffff - self.fileobj.write( self.compress.compress(data) ) - self.offset += len(data) + self.offset += length - return len(data) + return length def read(self, size=-1): self._check_closed() diff --git a/Lib/test/test_gzip.py b/Lib/test/test_gzip.py index b7a7e03c96a..c0be3a1f2dd 100644 --- a/Lib/test/test_gzip.py +++ b/Lib/test/test_gzip.py @@ -6,6 +6,7 @@ from test import support import os import io import struct +import array gzip = support.import_module('gzip') data1 = b""" int length=DEFAULTALLOC, err = Z_OK; @@ -43,6 +44,14 @@ class BaseTest(unittest.TestCase): class TestGzip(BaseTest): + def write_and_read_back(self, data, mode='b'): + b_data = bytes(data) + with gzip.GzipFile(self.filename, 'w'+mode) as f: + l = f.write(data) + self.assertEqual(l, len(b_data)) + with gzip.GzipFile(self.filename, 'r'+mode) as f: + self.assertEqual(f.read(), b_data) + def test_write(self): with gzip.GzipFile(self.filename, 'wb') as f: f.write(data1 * 50) @@ -57,6 +66,34 @@ class TestGzip(BaseTest): # Test multiple close() calls. f.close() + # The following test_write_xy methods test that write accepts + # the corresponding bytes-like object type as input + # and that the data written equals bytes(xy) in all cases. + def test_write_memoryview(self): + self.write_and_read_back(memoryview(data1 * 50)) + m = memoryview(bytes(range(256))) + data = m.cast('B', shape=[8,8,4]) + self.write_and_read_back(data) + + def test_write_bytearray(self): + self.write_and_read_back(bytearray(data1 * 50)) + + def test_write_array(self): + self.write_and_read_back(array.array('I', data1 * 40)) + + def test_write_incompatible_type(self): + # Test that non-bytes-like types raise TypeError. + # Issue #21560: attempts to write incompatible types + # should not affect the state of the fileobject + with gzip.GzipFile(self.filename, 'wb') as f: + with self.assertRaises(TypeError): + f.write('') + with self.assertRaises(TypeError): + f.write([]) + f.write(data1) + with gzip.GzipFile(self.filename, 'rb') as f: + self.assertEqual(f.read(), data1) + def test_read(self): self.test_write() # Try reading. diff --git a/Misc/NEWS b/Misc/NEWS index 0d362d8b6a7..f59297b1582 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -23,6 +23,10 @@ Core and Builtins Library ------- +- Issue #23688: Added support of arbitrary bytes-like objects and avoided + unnecessary copying of memoryview in gzip.GzipFile.write(). + Original patch by Wolfgang Maier. + - Issue #23252: Added support for writing ZIP files to unseekable streams. - Issue #21526: Tkinter now supports new boolean type in Tcl 8.5.