bpo-34010: Fix tarfile read performance regression (GH-8020)
During buffered read, use a list followed by join instead of extending a bytes object. This is how it was done before but changed in commitb506dc32c1
. (cherry picked from commit12a08c4760
) Co-authored-by: hajoscher <hajoscher@gmail.com>
This commit is contained in:
parent
2cbd1bb1f3
commit
c1b75b5fb9
|
@ -532,7 +532,7 @@ class _Stream:
|
|||
if not buf:
|
||||
break
|
||||
t.append(buf)
|
||||
buf = "".join(t)
|
||||
buf = b"".join(t)
|
||||
else:
|
||||
buf = self._read(size)
|
||||
self.pos += len(buf)
|
||||
|
@ -545,6 +545,7 @@ class _Stream:
|
|||
return self.__read(size)
|
||||
|
||||
c = len(self.dbuf)
|
||||
t = [self.dbuf]
|
||||
while c < size:
|
||||
buf = self.__read(self.bufsize)
|
||||
if not buf:
|
||||
|
@ -553,26 +554,27 @@ class _Stream:
|
|||
buf = self.cmp.decompress(buf)
|
||||
except self.exception:
|
||||
raise ReadError("invalid compressed data")
|
||||
self.dbuf += buf
|
||||
t.append(buf)
|
||||
c += len(buf)
|
||||
buf = self.dbuf[:size]
|
||||
self.dbuf = self.dbuf[size:]
|
||||
return buf
|
||||
t = b"".join(t)
|
||||
self.dbuf = t[size:]
|
||||
return t[:size]
|
||||
|
||||
def __read(self, size):
|
||||
"""Return size bytes from stream. If internal buffer is empty,
|
||||
read another block from the stream.
|
||||
"""
|
||||
c = len(self.buf)
|
||||
t = [self.buf]
|
||||
while c < size:
|
||||
buf = self.fileobj.read(self.bufsize)
|
||||
if not buf:
|
||||
break
|
||||
self.buf += buf
|
||||
t.append(buf)
|
||||
c += len(buf)
|
||||
buf = self.buf[:size]
|
||||
self.buf = self.buf[size:]
|
||||
return buf
|
||||
t = b"".join(t)
|
||||
self.buf = t[size:]
|
||||
return t[:size]
|
||||
# class _Stream
|
||||
|
||||
class _StreamProxy(object):
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
Fixed a performance regression for reading streams with tarfile. The
|
||||
buffered read should use a list, instead of appending to a bytes object.
|
Loading…
Reference in New Issue