bpo-34010: Fix tarfile read performance regression (GH-8020)

During buffered read, use a list followed by join instead of extending a bytes object.
This is how it was done before but changed in commit b506dc32c1.
(cherry picked from commit 12a08c4760)

Co-authored-by: hajoscher <hajoscher@gmail.com>
This commit is contained in:
Miss Islington (bot) 2018-07-04 01:32:41 -07:00 committed by GitHub
parent 2cbd1bb1f3
commit c1b75b5fb9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 13 additions and 9 deletions

View File

@ -532,7 +532,7 @@ class _Stream:
if not buf:
break
t.append(buf)
buf = "".join(t)
buf = b"".join(t)
else:
buf = self._read(size)
self.pos += len(buf)
@ -545,6 +545,7 @@ class _Stream:
return self.__read(size)
c = len(self.dbuf)
t = [self.dbuf]
while c < size:
buf = self.__read(self.bufsize)
if not buf:
@ -553,26 +554,27 @@ class _Stream:
buf = self.cmp.decompress(buf)
except self.exception:
raise ReadError("invalid compressed data")
self.dbuf += buf
t.append(buf)
c += len(buf)
buf = self.dbuf[:size]
self.dbuf = self.dbuf[size:]
return buf
t = b"".join(t)
self.dbuf = t[size:]
return t[:size]
def __read(self, size):
"""Return size bytes from stream. If internal buffer is empty,
read another block from the stream.
"""
c = len(self.buf)
t = [self.buf]
while c < size:
buf = self.fileobj.read(self.bufsize)
if not buf:
break
self.buf += buf
t.append(buf)
c += len(buf)
buf = self.buf[:size]
self.buf = self.buf[size:]
return buf
t = b"".join(t)
self.buf = t[size:]
return t[:size]
# class _Stream
class _StreamProxy(object):

View File

@ -0,0 +1,2 @@
Fixed a performance regression for reading streams with tarfile. The
buffered read should use a list, instead of appending to a bytes object.