Patch #1268314: Cache lines in StreamReader.readlines for performance.

Will backport to Python 2.4.
This commit is contained in:
Martin v. Löwis 2005-09-18 08:34:39 +00:00
parent 8b291e2d66
commit 4ed673877d
2 changed files with 39 additions and 0 deletions

View File

@ -232,6 +232,7 @@ class StreamReader(Codec):
# For str->str decoding this will stay a str # For str->str decoding this will stay a str
# For str->unicode decoding the first read will promote it to unicode # For str->unicode decoding the first read will promote it to unicode
self.charbuffer = "" self.charbuffer = ""
self.linebuffer = None
def decode(self, input, errors='strict'): def decode(self, input, errors='strict'):
raise NotImplementedError raise NotImplementedError
@ -264,6 +265,11 @@ class StreamReader(Codec):
optional encoding endings or state markers are available optional encoding endings or state markers are available
on the stream, these should be read too. on the stream, these should be read too.
""" """
# If we have lines cached, first merge them back into characters
if self.linebuffer:
self.charbuffer = "".join(self.linebuffer)
self.linebuffer = None
# read until we get the required number of characters (if available) # read until we get the required number of characters (if available)
while True: while True:
# can the request can be satisfied from the character buffer? # can the request can be satisfied from the character buffer?
@ -316,6 +322,20 @@ class StreamReader(Codec):
read() method. read() method.
""" """
# If we have lines cached from an earlier read, return
# them unconditionally
if self.linebuffer:
line = self.linebuffer[0]
del self.linebuffer[0]
if len(self.linebuffer) == 1:
# revert to charbuffer mode; we might need more data
# next time
self.charbuffer = self.linebuffer[0]
self.linebuffer = None
if not keepends:
line = line.splitlines(False)[0]
return line
readsize = size or 72 readsize = size or 72
line = "" line = ""
# If size is given, we call read() only once # If size is given, we call read() only once
@ -331,6 +351,22 @@ class StreamReader(Codec):
line += data line += data
lines = line.splitlines(True) lines = line.splitlines(True)
if lines: if lines:
if len(lines) > 1:
# More than one line result; the first line is a full line
# to return
line = lines[0]
del lines[0]
if len(lines) > 1:
# cache the remaining lines
lines[-1] += self.charbuffer
self.linebuffer = lines
self.charbuffer = None
else:
# only one remaining line, put it back into charbuffer
self.charbuffer = lines[0] + self.charbuffer
if not keepends:
line = line.splitlines(False)[0]
break
line0withend = lines[0] line0withend = lines[0]
line0withoutend = lines[0].splitlines(False)[0] line0withoutend = lines[0].splitlines(False)[0]
if line0withend != line0withoutend: # We really have a line end if line0withend != line0withoutend: # We really have a line end
@ -376,6 +412,7 @@ class StreamReader(Codec):
""" """
self.bytebuffer = "" self.bytebuffer = ""
self.charbuffer = u"" self.charbuffer = u""
self.linebuffer = None
def seek(self, offset, whence=0): def seek(self, offset, whence=0):
""" Set the input stream's current position. """ Set the input stream's current position.

View File

@ -219,6 +219,8 @@ Extension Modules
Library Library
------- -------
- Patch #1268314: Cache lines in StreamReader.readlines for performance.
- Bug #1290505: Fix clearing the regex cache for time.strptime(). - Bug #1290505: Fix clearing the regex cache for time.strptime().
- Bug #1167128: Fix size of a symlink in a tarfile to be 0. - Bug #1167128: Fix size of a symlink in a tarfile to be 0.