Merged revisions 77288 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/trunk ........ r77288 | antoine.pitrou | 2010-01-03 23:29:56 +0100 (dim., 03 janv. 2010) | 5 lines Issue #7471: Improve the performance of GzipFile's buffering mechanism, and make it implement the `io.BufferedIOBase` ABC to allow for further speedups by wrapping it in an `io.BufferedReader`. Patch by Nir Aides. ........
This commit is contained in:
parent
a81d881e13
commit
b1f8835b21
99
Lib/gzip.py
99
Lib/gzip.py
|
@ -8,6 +8,7 @@ but random access is not allowed."""
|
|||
import struct, sys, time, os
|
||||
import zlib
|
||||
import builtins
|
||||
import io
|
||||
|
||||
__all__ = ["GzipFile","open"]
|
||||
|
||||
|
@ -44,7 +45,7 @@ def open(filename, mode="rb", compresslevel=9):
|
|||
"""
|
||||
return GzipFile(filename, mode, compresslevel)
|
||||
|
||||
class GzipFile:
|
||||
class GzipFile(io.BufferedIOBase):
|
||||
"""The GzipFile class simulates most of the methods of a file object with
|
||||
the exception of the readinto() and truncate() methods.
|
||||
|
||||
|
@ -109,8 +110,12 @@ class GzipFile:
|
|||
self.mode = READ
|
||||
# Set flag indicating start of a new member
|
||||
self._new_member = True
|
||||
# Buffer data read from gzip file. extrastart is offset in
|
||||
# stream where buffer starts. extrasize is number of
|
||||
# bytes remaining in buffer from current stream position.
|
||||
self.extrabuf = b""
|
||||
self.extrasize = 0
|
||||
self.extrastart = 0
|
||||
self.name = filename
|
||||
# Starts small, scales exponentially
|
||||
self.min_readsize = 100
|
||||
|
@ -214,7 +219,6 @@ class GzipFile:
|
|||
if flag & FHCRC:
|
||||
self.fileobj.read(2) # Read & discard the 16-bit header CRC
|
||||
|
||||
|
||||
def write(self,data):
|
||||
if self.mode != WRITE:
|
||||
import errno
|
||||
|
@ -222,12 +226,19 @@ class GzipFile:
|
|||
|
||||
if self.fileobj is None:
|
||||
raise ValueError("write() on closed GzipFile object")
|
||||
|
||||
# Convert data type if called by io.BufferedWriter.
|
||||
if isinstance(data, memoryview):
|
||||
data = data.tobytes()
|
||||
|
||||
if len(data) > 0:
|
||||
self.size = self.size + len(data)
|
||||
self.crc = zlib.crc32(data, self.crc) & 0xffffffff
|
||||
self.fileobj.write( self.compress.compress(data) )
|
||||
self.offset += len(data)
|
||||
|
||||
return len(data)
|
||||
|
||||
def read(self, size=-1):
|
||||
if self.mode != READ:
|
||||
import errno
|
||||
|
@ -253,15 +264,14 @@ class GzipFile:
|
|||
if size > self.extrasize:
|
||||
size = self.extrasize
|
||||
|
||||
chunk = self.extrabuf[:size]
|
||||
self.extrabuf = self.extrabuf[size:]
|
||||
offset = self.offset - self.extrastart
|
||||
chunk = self.extrabuf[offset: offset + size]
|
||||
self.extrasize = self.extrasize - size
|
||||
|
||||
self.offset += size
|
||||
return chunk
|
||||
|
||||
def _unread(self, buf):
|
||||
self.extrabuf = buf + self.extrabuf
|
||||
self.extrasize = len(buf) + self.extrasize
|
||||
self.offset -= len(buf)
|
||||
|
||||
|
@ -317,8 +327,10 @@ class GzipFile:
|
|||
|
||||
def _add_read_data(self, data):
|
||||
self.crc = zlib.crc32(data, self.crc) & 0xffffffff
|
||||
self.extrabuf = self.extrabuf + data
|
||||
offset = self.offset - self.extrastart
|
||||
self.extrabuf = self.extrabuf[offset:] + data
|
||||
self.extrasize = self.extrasize + len(data)
|
||||
self.extrastart = self.offset
|
||||
self.size = self.size + len(data)
|
||||
|
||||
def _read_eof(self):
|
||||
|
@ -336,6 +348,10 @@ class GzipFile:
|
|||
elif isize != (self.size & 0xffffffff):
|
||||
raise IOError("Incorrect length of data produced")
|
||||
|
||||
@property
|
||||
def closed(self):
|
||||
return self.fileobj is None
|
||||
|
||||
def close(self):
|
||||
if self.fileobj is None:
|
||||
return
|
||||
|
@ -351,15 +367,6 @@ class GzipFile:
|
|||
self.myfileobj.close()
|
||||
self.myfileobj = None
|
||||
|
||||
def __del__(self):
|
||||
try:
|
||||
if (self.myfileobj is None and
|
||||
self.fileobj is None):
|
||||
return
|
||||
except AttributeError:
|
||||
return
|
||||
self.close()
|
||||
|
||||
def flush(self,zlib_mode=zlib.Z_SYNC_FLUSH):
|
||||
if self.mode == WRITE:
|
||||
# Ensure the compressor's buffer is flushed
|
||||
|
@ -374,12 +381,6 @@ class GzipFile:
|
|||
"""
|
||||
return self.fileobj.fileno()
|
||||
|
||||
def isatty(self):
|
||||
return False
|
||||
|
||||
def tell(self):
|
||||
return self.offset
|
||||
|
||||
def rewind(self):
|
||||
'''Return the uncompressed stream file position indicator to the
|
||||
beginning of the file'''
|
||||
|
@ -389,8 +390,18 @@ class GzipFile:
|
|||
self._new_member = True
|
||||
self.extrabuf = b""
|
||||
self.extrasize = 0
|
||||
self.extrastart = 0
|
||||
self.offset = 0
|
||||
|
||||
def readable(self):
|
||||
return self.mode == READ
|
||||
|
||||
def writable(self):
|
||||
return self.mode == WRITE
|
||||
|
||||
def seekable(self):
|
||||
return True
|
||||
|
||||
def seek(self, offset, whence=0):
|
||||
if whence:
|
||||
if whence == 1:
|
||||
|
@ -414,8 +425,18 @@ class GzipFile:
|
|||
self.read(1024)
|
||||
self.read(count % 1024)
|
||||
|
||||
return self.offset
|
||||
|
||||
def readline(self, size=-1):
|
||||
if size < 0:
|
||||
# Shortcut common case - newline found in buffer.
|
||||
offset = self.offset - self.extrastart
|
||||
i = self.extrabuf.find(b'\n', offset) + 1
|
||||
if i > 0:
|
||||
self.extrasize -= i - offset
|
||||
self.offset += i - offset
|
||||
return self.extrabuf[offset: i]
|
||||
|
||||
size = sys.maxsize
|
||||
readsize = self.min_readsize
|
||||
else:
|
||||
|
@ -445,42 +466,6 @@ class GzipFile:
|
|||
self.min_readsize = min(readsize, self.min_readsize * 2, 512)
|
||||
return b''.join(bufs) # Return resulting line
|
||||
|
||||
def readlines(self, sizehint=0):
|
||||
# Negative numbers result in reading all the lines
|
||||
if sizehint <= 0:
|
||||
sizehint = sys.maxsize
|
||||
L = []
|
||||
while sizehint > 0:
|
||||
line = self.readline()
|
||||
if line == b"":
|
||||
break
|
||||
L.append(line)
|
||||
sizehint = sizehint - len(line)
|
||||
|
||||
return L
|
||||
|
||||
def writelines(self, L):
|
||||
for line in L:
|
||||
self.write(line)
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
line = self.readline()
|
||||
if line:
|
||||
return line
|
||||
else:
|
||||
raise StopIteration
|
||||
|
||||
def __enter__(self):
|
||||
if self.fileobj is None:
|
||||
raise ValueError("I/O operation on closed GzipFile object")
|
||||
return self
|
||||
|
||||
def __exit__(self, *args):
|
||||
self.close()
|
||||
|
||||
|
||||
def _test():
|
||||
# Act like gzip; with -d, act like gunzip.
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
import unittest
|
||||
from test import support
|
||||
import os
|
||||
import io
|
||||
import struct
|
||||
gzip = support.import_module('gzip')
|
||||
|
||||
|
@ -80,6 +81,16 @@ class TestGzip(unittest.TestCase):
|
|||
zgfile.close()
|
||||
self.assertEquals(contents, b'a'*201)
|
||||
|
||||
def test_buffered_reader(self):
|
||||
# Issue #7471: a GzipFile can be wrapped in a BufferedReader for
|
||||
# performance.
|
||||
self.test_write()
|
||||
|
||||
f = gzip.GzipFile(self.filename, 'rb')
|
||||
with io.BufferedReader(f) as r:
|
||||
lines = [line for line in r]
|
||||
|
||||
self.assertEqual(lines, 50 * data1.splitlines(True))
|
||||
|
||||
def test_readline(self):
|
||||
self.test_write()
|
||||
|
|
|
@ -191,7 +191,11 @@ C-API
|
|||
Library
|
||||
-------
|
||||
|
||||
_ Issue #3972: http.client.HTTPConnection now accepts an optional source_address
|
||||
- Issue #7471: Improve the performance of GzipFile's buffering mechanism,
|
||||
and make it implement the `io.BufferedIOBase` ABC to allow for further
|
||||
speedups by wrapping it in an `io.BufferedReader`. Patch by Nir Aides.
|
||||
|
||||
- Issue #3972: http.client.HTTPConnection now accepts an optional source_address
|
||||
parameter to allow specifying where your connections come from.
|
||||
|
||||
- socket.create_connection now accepts an optional source_address parameter.
|
||||
|
|
Loading…
Reference in New Issue