#2523: binary buffered reading is quadratic
This commit is contained in:
parent
711419388c
commit
c66f909f43
91
Lib/io.py
91
Lib/io.py
|
@ -893,8 +893,12 @@ class BufferedReader(_BufferedIOMixin):
|
|||
"""
|
||||
raw._checkReadable()
|
||||
_BufferedIOMixin.__init__(self, raw)
|
||||
self._read_buf = b""
|
||||
self.buffer_size = buffer_size
|
||||
self._reset_read_buf()
|
||||
|
||||
def _reset_read_buf(self):
|
||||
self._read_buf = b""
|
||||
self._read_pos = 0
|
||||
|
||||
def read(self, n=None):
|
||||
"""Read n bytes.
|
||||
|
@ -904,25 +908,50 @@ class BufferedReader(_BufferedIOMixin):
|
|||
mode. If n is negative, read until EOF or until read() would
|
||||
block.
|
||||
"""
|
||||
if n is None:
|
||||
n = -1
|
||||
nodata_val = b""
|
||||
while n < 0 or len(self._read_buf) < n:
|
||||
to_read = max(self.buffer_size,
|
||||
n if n is not None else 2*len(self._read_buf))
|
||||
current = self.raw.read(to_read)
|
||||
if current in (b"", None):
|
||||
nodata_val = current
|
||||
empty_values = (b"", None)
|
||||
buf = self._read_buf
|
||||
pos = self._read_pos
|
||||
|
||||
# Special case for when the number of bytes to read is unspecified.
|
||||
if n is None or n == -1:
|
||||
self._reset_read_buf()
|
||||
chunks = [buf[pos:]] # Strip the consumed bytes.
|
||||
current_size = 0
|
||||
while True:
|
||||
# Read until EOF or until read() would block.
|
||||
chunk = self.raw.read()
|
||||
if chunk in empty_values:
|
||||
nodata_val = chunk
|
||||
break
|
||||
current_size += len(chunk)
|
||||
chunks.append(chunk)
|
||||
return b"".join(chunks) or nodata_val
|
||||
|
||||
# The number of bytes to read is specified, return at most n bytes.
|
||||
avail = len(buf) - pos # Length of the available buffered data.
|
||||
if n <= avail:
|
||||
# Fast path: the data to read is fully buffered.
|
||||
self._read_pos += n
|
||||
return buf[pos:pos+n]
|
||||
# Slow path: read from the stream until enough bytes are read,
|
||||
# or until an EOF occurs or until read() would block.
|
||||
chunks = [buf[pos:]]
|
||||
wanted = max(self.buffer_size, n)
|
||||
while avail < n:
|
||||
chunk = self.raw.read(wanted)
|
||||
if chunk in empty_values:
|
||||
nodata_val = chunk
|
||||
break
|
||||
self._read_buf += current
|
||||
if self._read_buf:
|
||||
if n < 0:
|
||||
n = len(self._read_buf)
|
||||
out = self._read_buf[:n]
|
||||
self._read_buf = self._read_buf[n:]
|
||||
else:
|
||||
out = nodata_val
|
||||
return out
|
||||
avail += len(chunk)
|
||||
chunks.append(chunk)
|
||||
# n is more then avail only when an EOF occurred or when
|
||||
# read() would have blocked.
|
||||
n = min(n, avail)
|
||||
out = b"".join(chunks)
|
||||
self._read_buf = out[n:] # Save the extra data in the buffer.
|
||||
self._read_pos = 0
|
||||
return out[:n] if out else nodata_val
|
||||
|
||||
def peek(self, n=0):
|
||||
"""Returns buffered bytes without advancing the position.
|
||||
|
@ -932,13 +961,14 @@ class BufferedReader(_BufferedIOMixin):
|
|||
than self.buffer_size.
|
||||
"""
|
||||
want = min(n, self.buffer_size)
|
||||
have = len(self._read_buf)
|
||||
have = len(self._read_buf) - self._read_pos
|
||||
if have < want:
|
||||
to_read = self.buffer_size - have
|
||||
current = self.raw.read(to_read)
|
||||
if current:
|
||||
self._read_buf += current
|
||||
return self._read_buf
|
||||
self._read_buf = self._read_buf[self._read_pos:] + current
|
||||
self._read_pos = 0
|
||||
return self._read_buf[self._read_pos:]
|
||||
|
||||
def read1(self, n):
|
||||
"""Reads up to n bytes, with at most one read() system call."""
|
||||
|
@ -947,16 +977,16 @@ class BufferedReader(_BufferedIOMixin):
|
|||
if n <= 0:
|
||||
return b""
|
||||
self.peek(1)
|
||||
return self.read(min(n, len(self._read_buf)))
|
||||
return self.read(min(n, len(self._read_buf) - self._read_pos))
|
||||
|
||||
def tell(self):
|
||||
return self.raw.tell() - len(self._read_buf)
|
||||
return self.raw.tell() - len(self._read_buf) + self._read_pos
|
||||
|
||||
def seek(self, pos, whence=0):
|
||||
if whence == 1:
|
||||
pos -= len(self._read_buf)
|
||||
pos -= len(self._read_buf) - self._read_pos
|
||||
pos = self.raw.seek(pos, whence)
|
||||
self._read_buf = b""
|
||||
self._reset_read_buf()
|
||||
return pos
|
||||
|
||||
|
||||
|
@ -1125,14 +1155,14 @@ class BufferedRandom(BufferedWriter, BufferedReader):
|
|||
# First do the raw seek, then empty the read buffer, so that
|
||||
# if the raw seek fails, we don't lose buffered data forever.
|
||||
pos = self.raw.seek(pos, whence)
|
||||
self._read_buf = b""
|
||||
self._reset_read_buf()
|
||||
return pos
|
||||
|
||||
def tell(self):
|
||||
if (self._write_buf):
|
||||
if self._write_buf:
|
||||
return self.raw.tell() + len(self._write_buf)
|
||||
else:
|
||||
return self.raw.tell() - len(self._read_buf)
|
||||
return BufferedReader.tell(self)
|
||||
|
||||
def truncate(self, pos=None):
|
||||
if pos is None:
|
||||
|
@ -1161,8 +1191,9 @@ class BufferedRandom(BufferedWriter, BufferedReader):
|
|||
|
||||
def write(self, b):
|
||||
if self._read_buf:
|
||||
self.raw.seek(-len(self._read_buf), 1) # Undo readahead
|
||||
self._read_buf = b""
|
||||
# Undo readahead
|
||||
self.raw.seek(self._read_pos - len(self._read_buf), 1)
|
||||
self._reset_read_buf()
|
||||
return BufferedWriter.write(self, b)
|
||||
|
||||
|
||||
|
|
|
@ -22,6 +22,9 @@ Library
|
|||
file name rather than a ZipInfo instance, so files are extracted with
|
||||
mode 0600 rather than 000 under Unix.
|
||||
|
||||
- Issue #2523: Fix quadratic behaviour when read()ing a binary file without
|
||||
asking for a specific length.
|
||||
|
||||
|
||||
What's new in Python 3.0b2?
|
||||
===========================
|
||||
|
|
Loading…
Reference in New Issue