(This fix is really by Jeremy)

Here's my suggested replacement for gzip.py for 1.5.1. I've re-implemeted methods readline and readlines, added an _unread, and tweaked read and _read. I tried a more complicated buffer scheme for unread (using a list of strings and string.join), but it was more complicated and slower. This version is a lot faster than the current version and is still pretty simple.
1998-01-27 19:29:45 +00:00 · 1998-01-27 19:29:45 +00:00 · b16a3b8450
parent 7570669a08
commit b16a3b8450
1 changed files with 29 additions and 22 deletions
--- a/Lib/gzip.py
+++ b/Lib/gzip.py
@ -81,7 +81,6 @@ class GzipFile:
 	elif self.mode == READ:
 	    self._read_gzip_header()
    def __repr__(self):
 	s = repr(self.fileobj)
 	return '<gzip ' + s[1:-1] + ' ' + hex(id(self)) + '>'
@ -162,18 +161,19 @@ class GzipFile:
 	if self.extrasize <= 0 and self.fileobj is None:
 	    return ''
-	if not size:
+	readsize = 1024
-	    # get the whole thing
+	if not size:	    # get the whole thing
 	    try:
 		while 1:
-		    self._read()
+		    self._read(readsize)
 		    readsize = readsize * 2
 	    except EOFError:
 		size = self.extrasize
-	else:
+	else:	            # just get some more of it
 	    # just get some more of it
 	    try:
 		while size > self.extrasize:
-		    self._read()
+		    self._read(readsize)
 		    readsize = readsize * 2
 	    except EOFError:
 		pass
@ -183,8 +183,15 @@ class GzipFile:
 	return chunk
-    def _read(self):
+    def _unread(self, buf):
-	buf = self.fileobj.read(1024)
+	self.extrabuf = buf + self.extrabuf
 	self.extrasize = len(buf) + self.extrasize
    def _read(self, size=1024):
 	try:
 	    buf = self.fileobj.read(size)
 	except AttributeError:
 	    raise EOFError, "Reached EOF"
 	if buf == "":
 	    uncompress = self.decompress.flush()
 	    if uncompress == "":
@ -237,21 +244,21 @@ class GzipFile:
 	return 0
    def readline(self):
-	# XXX This function isn't implemented in a very efficient way
+	bufs = []
-	line=""
+	readsize = 100
 	while 1:
-	    c = self.read(1)
+	    c = self.read(readsize)
-	    line = line + c
+	    i = string.find(c, '\n')
-	    if c=='\n' or c=="": break
+	    if i >= 0 or c == '':
-	return line
+		bufs.append(c[:i])
 		self._unread(c[i+1:])
 		return string.join(bufs, '')
 	    bufs.append(c)
 	    readsize = readsize * 2
    def readlines(self):
-	L=[]
+	buf = self.read()
-	line = self.readline()
+	return string.split(buf, '\n')
 	while line!="":
 	    L.append(line)
 	    line = self.readline()
 	return L
    def writelines(self, L):
 	for line in L: