(This fix is really by Jeremy)

Here's my suggested replacement for gzip.py for 1.5.1.  I've
re-implemeted methods readline and readlines, added an _unread, and
tweaked read and _read.

I tried a more complicated buffer scheme for unread (using a list of
strings and string.join), but it was more complicated and slower.
This version is a lot faster than the current version and is still
pretty simple.
This commit is contained in:
Guido van Rossum 1998-01-27 19:29:45 +00:00
parent 7570669a08
commit b16a3b8450
1 changed files with 29 additions and 22 deletions

View File

@ -81,7 +81,6 @@ class GzipFile:
elif self.mode == READ: elif self.mode == READ:
self._read_gzip_header() self._read_gzip_header()
def __repr__(self): def __repr__(self):
s = repr(self.fileobj) s = repr(self.fileobj)
return '<gzip ' + s[1:-1] + ' ' + hex(id(self)) + '>' return '<gzip ' + s[1:-1] + ' ' + hex(id(self)) + '>'
@ -162,18 +161,19 @@ class GzipFile:
if self.extrasize <= 0 and self.fileobj is None: if self.extrasize <= 0 and self.fileobj is None:
return '' return ''
if not size: readsize = 1024
# get the whole thing if not size: # get the whole thing
try: try:
while 1: while 1:
self._read() self._read(readsize)
readsize = readsize * 2
except EOFError: except EOFError:
size = self.extrasize size = self.extrasize
else: else: # just get some more of it
# just get some more of it
try: try:
while size > self.extrasize: while size > self.extrasize:
self._read() self._read(readsize)
readsize = readsize * 2
except EOFError: except EOFError:
pass pass
@ -183,8 +183,15 @@ class GzipFile:
return chunk return chunk
def _read(self): def _unread(self, buf):
buf = self.fileobj.read(1024) self.extrabuf = buf + self.extrabuf
self.extrasize = len(buf) + self.extrasize
def _read(self, size=1024):
try:
buf = self.fileobj.read(size)
except AttributeError:
raise EOFError, "Reached EOF"
if buf == "": if buf == "":
uncompress = self.decompress.flush() uncompress = self.decompress.flush()
if uncompress == "": if uncompress == "":
@ -237,21 +244,21 @@ class GzipFile:
return 0 return 0
def readline(self): def readline(self):
# XXX This function isn't implemented in a very efficient way bufs = []
line="" readsize = 100
while 1: while 1:
c = self.read(1) c = self.read(readsize)
line = line + c i = string.find(c, '\n')
if c=='\n' or c=="": break if i >= 0 or c == '':
return line bufs.append(c[:i])
self._unread(c[i+1:])
return string.join(bufs, '')
bufs.append(c)
readsize = readsize * 2
def readlines(self): def readlines(self):
L=[] buf = self.read()
line = self.readline() return string.split(buf, '\n')
while line!="":
L.append(line)
line = self.readline()
return L
def writelines(self, L): def writelines(self, L):
for line in L: for line in L: