Merged revisions 77288 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/trunk ........ r77288 | antoine.pitrou | 2010-01-03 23:29:56 +0100 (dim., 03 janv. 2010) | 5 lines Issue #7471: Improve the performance of GzipFile's buffering mechanism, and make it implement the `io.BufferedIOBase` ABC to allow for further speedups by wrapping it in an `io.BufferedReader`. Patch by Nir Aides. ........
This commit is contained in:
parent
a81d881e13
commit
b1f8835b21
99
Lib/gzip.py
99
Lib/gzip.py
|
@ -8,6 +8,7 @@ but random access is not allowed."""
|
||||||
import struct, sys, time, os
|
import struct, sys, time, os
|
||||||
import zlib
|
import zlib
|
||||||
import builtins
|
import builtins
|
||||||
|
import io
|
||||||
|
|
||||||
__all__ = ["GzipFile","open"]
|
__all__ = ["GzipFile","open"]
|
||||||
|
|
||||||
|
@ -44,7 +45,7 @@ def open(filename, mode="rb", compresslevel=9):
|
||||||
"""
|
"""
|
||||||
return GzipFile(filename, mode, compresslevel)
|
return GzipFile(filename, mode, compresslevel)
|
||||||
|
|
||||||
class GzipFile:
|
class GzipFile(io.BufferedIOBase):
|
||||||
"""The GzipFile class simulates most of the methods of a file object with
|
"""The GzipFile class simulates most of the methods of a file object with
|
||||||
the exception of the readinto() and truncate() methods.
|
the exception of the readinto() and truncate() methods.
|
||||||
|
|
||||||
|
@ -109,8 +110,12 @@ class GzipFile:
|
||||||
self.mode = READ
|
self.mode = READ
|
||||||
# Set flag indicating start of a new member
|
# Set flag indicating start of a new member
|
||||||
self._new_member = True
|
self._new_member = True
|
||||||
|
# Buffer data read from gzip file. extrastart is offset in
|
||||||
|
# stream where buffer starts. extrasize is number of
|
||||||
|
# bytes remaining in buffer from current stream position.
|
||||||
self.extrabuf = b""
|
self.extrabuf = b""
|
||||||
self.extrasize = 0
|
self.extrasize = 0
|
||||||
|
self.extrastart = 0
|
||||||
self.name = filename
|
self.name = filename
|
||||||
# Starts small, scales exponentially
|
# Starts small, scales exponentially
|
||||||
self.min_readsize = 100
|
self.min_readsize = 100
|
||||||
|
@ -214,7 +219,6 @@ class GzipFile:
|
||||||
if flag & FHCRC:
|
if flag & FHCRC:
|
||||||
self.fileobj.read(2) # Read & discard the 16-bit header CRC
|
self.fileobj.read(2) # Read & discard the 16-bit header CRC
|
||||||
|
|
||||||
|
|
||||||
def write(self,data):
|
def write(self,data):
|
||||||
if self.mode != WRITE:
|
if self.mode != WRITE:
|
||||||
import errno
|
import errno
|
||||||
|
@ -222,12 +226,19 @@ class GzipFile:
|
||||||
|
|
||||||
if self.fileobj is None:
|
if self.fileobj is None:
|
||||||
raise ValueError("write() on closed GzipFile object")
|
raise ValueError("write() on closed GzipFile object")
|
||||||
|
|
||||||
|
# Convert data type if called by io.BufferedWriter.
|
||||||
|
if isinstance(data, memoryview):
|
||||||
|
data = data.tobytes()
|
||||||
|
|
||||||
if len(data) > 0:
|
if len(data) > 0:
|
||||||
self.size = self.size + len(data)
|
self.size = self.size + len(data)
|
||||||
self.crc = zlib.crc32(data, self.crc) & 0xffffffff
|
self.crc = zlib.crc32(data, self.crc) & 0xffffffff
|
||||||
self.fileobj.write( self.compress.compress(data) )
|
self.fileobj.write( self.compress.compress(data) )
|
||||||
self.offset += len(data)
|
self.offset += len(data)
|
||||||
|
|
||||||
|
return len(data)
|
||||||
|
|
||||||
def read(self, size=-1):
|
def read(self, size=-1):
|
||||||
if self.mode != READ:
|
if self.mode != READ:
|
||||||
import errno
|
import errno
|
||||||
|
@ -253,15 +264,14 @@ class GzipFile:
|
||||||
if size > self.extrasize:
|
if size > self.extrasize:
|
||||||
size = self.extrasize
|
size = self.extrasize
|
||||||
|
|
||||||
chunk = self.extrabuf[:size]
|
offset = self.offset - self.extrastart
|
||||||
self.extrabuf = self.extrabuf[size:]
|
chunk = self.extrabuf[offset: offset + size]
|
||||||
self.extrasize = self.extrasize - size
|
self.extrasize = self.extrasize - size
|
||||||
|
|
||||||
self.offset += size
|
self.offset += size
|
||||||
return chunk
|
return chunk
|
||||||
|
|
||||||
def _unread(self, buf):
|
def _unread(self, buf):
|
||||||
self.extrabuf = buf + self.extrabuf
|
|
||||||
self.extrasize = len(buf) + self.extrasize
|
self.extrasize = len(buf) + self.extrasize
|
||||||
self.offset -= len(buf)
|
self.offset -= len(buf)
|
||||||
|
|
||||||
|
@ -317,8 +327,10 @@ class GzipFile:
|
||||||
|
|
||||||
def _add_read_data(self, data):
|
def _add_read_data(self, data):
|
||||||
self.crc = zlib.crc32(data, self.crc) & 0xffffffff
|
self.crc = zlib.crc32(data, self.crc) & 0xffffffff
|
||||||
self.extrabuf = self.extrabuf + data
|
offset = self.offset - self.extrastart
|
||||||
|
self.extrabuf = self.extrabuf[offset:] + data
|
||||||
self.extrasize = self.extrasize + len(data)
|
self.extrasize = self.extrasize + len(data)
|
||||||
|
self.extrastart = self.offset
|
||||||
self.size = self.size + len(data)
|
self.size = self.size + len(data)
|
||||||
|
|
||||||
def _read_eof(self):
|
def _read_eof(self):
|
||||||
|
@ -336,6 +348,10 @@ class GzipFile:
|
||||||
elif isize != (self.size & 0xffffffff):
|
elif isize != (self.size & 0xffffffff):
|
||||||
raise IOError("Incorrect length of data produced")
|
raise IOError("Incorrect length of data produced")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def closed(self):
|
||||||
|
return self.fileobj is None
|
||||||
|
|
||||||
def close(self):
|
def close(self):
|
||||||
if self.fileobj is None:
|
if self.fileobj is None:
|
||||||
return
|
return
|
||||||
|
@ -351,15 +367,6 @@ class GzipFile:
|
||||||
self.myfileobj.close()
|
self.myfileobj.close()
|
||||||
self.myfileobj = None
|
self.myfileobj = None
|
||||||
|
|
||||||
def __del__(self):
|
|
||||||
try:
|
|
||||||
if (self.myfileobj is None and
|
|
||||||
self.fileobj is None):
|
|
||||||
return
|
|
||||||
except AttributeError:
|
|
||||||
return
|
|
||||||
self.close()
|
|
||||||
|
|
||||||
def flush(self,zlib_mode=zlib.Z_SYNC_FLUSH):
|
def flush(self,zlib_mode=zlib.Z_SYNC_FLUSH):
|
||||||
if self.mode == WRITE:
|
if self.mode == WRITE:
|
||||||
# Ensure the compressor's buffer is flushed
|
# Ensure the compressor's buffer is flushed
|
||||||
|
@ -374,12 +381,6 @@ class GzipFile:
|
||||||
"""
|
"""
|
||||||
return self.fileobj.fileno()
|
return self.fileobj.fileno()
|
||||||
|
|
||||||
def isatty(self):
|
|
||||||
return False
|
|
||||||
|
|
||||||
def tell(self):
|
|
||||||
return self.offset
|
|
||||||
|
|
||||||
def rewind(self):
|
def rewind(self):
|
||||||
'''Return the uncompressed stream file position indicator to the
|
'''Return the uncompressed stream file position indicator to the
|
||||||
beginning of the file'''
|
beginning of the file'''
|
||||||
|
@ -389,8 +390,18 @@ class GzipFile:
|
||||||
self._new_member = True
|
self._new_member = True
|
||||||
self.extrabuf = b""
|
self.extrabuf = b""
|
||||||
self.extrasize = 0
|
self.extrasize = 0
|
||||||
|
self.extrastart = 0
|
||||||
self.offset = 0
|
self.offset = 0
|
||||||
|
|
||||||
|
def readable(self):
|
||||||
|
return self.mode == READ
|
||||||
|
|
||||||
|
def writable(self):
|
||||||
|
return self.mode == WRITE
|
||||||
|
|
||||||
|
def seekable(self):
|
||||||
|
return True
|
||||||
|
|
||||||
def seek(self, offset, whence=0):
|
def seek(self, offset, whence=0):
|
||||||
if whence:
|
if whence:
|
||||||
if whence == 1:
|
if whence == 1:
|
||||||
|
@ -414,8 +425,18 @@ class GzipFile:
|
||||||
self.read(1024)
|
self.read(1024)
|
||||||
self.read(count % 1024)
|
self.read(count % 1024)
|
||||||
|
|
||||||
|
return self.offset
|
||||||
|
|
||||||
def readline(self, size=-1):
|
def readline(self, size=-1):
|
||||||
if size < 0:
|
if size < 0:
|
||||||
|
# Shortcut common case - newline found in buffer.
|
||||||
|
offset = self.offset - self.extrastart
|
||||||
|
i = self.extrabuf.find(b'\n', offset) + 1
|
||||||
|
if i > 0:
|
||||||
|
self.extrasize -= i - offset
|
||||||
|
self.offset += i - offset
|
||||||
|
return self.extrabuf[offset: i]
|
||||||
|
|
||||||
size = sys.maxsize
|
size = sys.maxsize
|
||||||
readsize = self.min_readsize
|
readsize = self.min_readsize
|
||||||
else:
|
else:
|
||||||
|
@ -445,42 +466,6 @@ class GzipFile:
|
||||||
self.min_readsize = min(readsize, self.min_readsize * 2, 512)
|
self.min_readsize = min(readsize, self.min_readsize * 2, 512)
|
||||||
return b''.join(bufs) # Return resulting line
|
return b''.join(bufs) # Return resulting line
|
||||||
|
|
||||||
def readlines(self, sizehint=0):
|
|
||||||
# Negative numbers result in reading all the lines
|
|
||||||
if sizehint <= 0:
|
|
||||||
sizehint = sys.maxsize
|
|
||||||
L = []
|
|
||||||
while sizehint > 0:
|
|
||||||
line = self.readline()
|
|
||||||
if line == b"":
|
|
||||||
break
|
|
||||||
L.append(line)
|
|
||||||
sizehint = sizehint - len(line)
|
|
||||||
|
|
||||||
return L
|
|
||||||
|
|
||||||
def writelines(self, L):
|
|
||||||
for line in L:
|
|
||||||
self.write(line)
|
|
||||||
|
|
||||||
def __iter__(self):
|
|
||||||
return self
|
|
||||||
|
|
||||||
def __next__(self):
|
|
||||||
line = self.readline()
|
|
||||||
if line:
|
|
||||||
return line
|
|
||||||
else:
|
|
||||||
raise StopIteration
|
|
||||||
|
|
||||||
def __enter__(self):
|
|
||||||
if self.fileobj is None:
|
|
||||||
raise ValueError("I/O operation on closed GzipFile object")
|
|
||||||
return self
|
|
||||||
|
|
||||||
def __exit__(self, *args):
|
|
||||||
self.close()
|
|
||||||
|
|
||||||
|
|
||||||
def _test():
|
def _test():
|
||||||
# Act like gzip; with -d, act like gunzip.
|
# Act like gzip; with -d, act like gunzip.
|
||||||
|
|
|
@ -5,6 +5,7 @@
|
||||||
import unittest
|
import unittest
|
||||||
from test import support
|
from test import support
|
||||||
import os
|
import os
|
||||||
|
import io
|
||||||
import struct
|
import struct
|
||||||
gzip = support.import_module('gzip')
|
gzip = support.import_module('gzip')
|
||||||
|
|
||||||
|
@ -80,6 +81,16 @@ class TestGzip(unittest.TestCase):
|
||||||
zgfile.close()
|
zgfile.close()
|
||||||
self.assertEquals(contents, b'a'*201)
|
self.assertEquals(contents, b'a'*201)
|
||||||
|
|
||||||
|
def test_buffered_reader(self):
|
||||||
|
# Issue #7471: a GzipFile can be wrapped in a BufferedReader for
|
||||||
|
# performance.
|
||||||
|
self.test_write()
|
||||||
|
|
||||||
|
f = gzip.GzipFile(self.filename, 'rb')
|
||||||
|
with io.BufferedReader(f) as r:
|
||||||
|
lines = [line for line in r]
|
||||||
|
|
||||||
|
self.assertEqual(lines, 50 * data1.splitlines(True))
|
||||||
|
|
||||||
def test_readline(self):
|
def test_readline(self):
|
||||||
self.test_write()
|
self.test_write()
|
||||||
|
|
|
@ -191,7 +191,11 @@ C-API
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
_ Issue #3972: http.client.HTTPConnection now accepts an optional source_address
|
- Issue #7471: Improve the performance of GzipFile's buffering mechanism,
|
||||||
|
and make it implement the `io.BufferedIOBase` ABC to allow for further
|
||||||
|
speedups by wrapping it in an `io.BufferedReader`. Patch by Nir Aides.
|
||||||
|
|
||||||
|
- Issue #3972: http.client.HTTPConnection now accepts an optional source_address
|
||||||
parameter to allow specifying where your connections come from.
|
parameter to allow specifying where your connections come from.
|
||||||
|
|
||||||
- socket.create_connection now accepts an optional source_address parameter.
|
- socket.create_connection now accepts an optional source_address parameter.
|
||||||
|
|
Loading…
Reference in New Issue