Issue #16034 follow-up: Apply optimizations to the lzma module.
This commit is contained in:
parent
06eecead03
commit
186370b433
99
Lib/lzma.py
99
Lib/lzma.py
|
@ -110,7 +110,8 @@ class LZMAFile(io.BufferedIOBase):
|
|||
# stream will need a separate decompressor object.
|
||||
self._init_args = {"format":format, "filters":filters}
|
||||
self._decompressor = LZMADecompressor(**self._init_args)
|
||||
self._buffer = None
|
||||
self._buffer = b""
|
||||
self._buffer_offset = 0
|
||||
elif mode in ("w", "wb", "a", "ab"):
|
||||
if format is None:
|
||||
format = FORMAT_XZ
|
||||
|
@ -143,7 +144,7 @@ class LZMAFile(io.BufferedIOBase):
|
|||
try:
|
||||
if self._mode in (_MODE_READ, _MODE_READ_EOF):
|
||||
self._decompressor = None
|
||||
self._buffer = None
|
||||
self._buffer = b""
|
||||
elif self._mode == _MODE_WRITE:
|
||||
self._fp.write(self._compressor.flush())
|
||||
self._compressor = None
|
||||
|
@ -187,15 +188,18 @@ class LZMAFile(io.BufferedIOBase):
|
|||
raise ValueError("I/O operation on closed file")
|
||||
|
||||
def _check_can_read(self):
|
||||
if not self.readable():
|
||||
if self._mode not in (_MODE_READ, _MODE_READ_EOF):
|
||||
self._check_not_closed()
|
||||
raise io.UnsupportedOperation("File not open for reading")
|
||||
|
||||
def _check_can_write(self):
|
||||
if not self.writable():
|
||||
if self._mode != _MODE_WRITE:
|
||||
self._check_not_closed()
|
||||
raise io.UnsupportedOperation("File not open for writing")
|
||||
|
||||
def _check_can_seek(self):
|
||||
if not self.readable():
|
||||
if self._mode not in (_MODE_READ, _MODE_READ_EOF):
|
||||
self._check_not_closed()
|
||||
raise io.UnsupportedOperation("Seeking is only supported "
|
||||
"on files open for reading")
|
||||
if not self._fp.seekable():
|
||||
|
@ -204,16 +208,13 @@ class LZMAFile(io.BufferedIOBase):
|
|||
|
||||
# Fill the readahead buffer if it is empty. Returns False on EOF.
|
||||
def _fill_buffer(self):
|
||||
if self._mode == _MODE_READ_EOF:
|
||||
return False
|
||||
# Depending on the input data, our call to the decompressor may not
|
||||
# return any data. In this case, try again after reading another block.
|
||||
while True:
|
||||
if self._buffer:
|
||||
return True
|
||||
|
||||
if self._decompressor.unused_data:
|
||||
rawblock = self._decompressor.unused_data
|
||||
else:
|
||||
rawblock = self._fp.read(_BUFFER_SIZE)
|
||||
while self._buffer_offset == len(self._buffer):
|
||||
rawblock = (self._decompressor.unused_data or
|
||||
self._fp.read(_BUFFER_SIZE))
|
||||
|
||||
if not rawblock:
|
||||
if self._decompressor.eof:
|
||||
|
@ -229,30 +230,48 @@ class LZMAFile(io.BufferedIOBase):
|
|||
self._decompressor = LZMADecompressor(**self._init_args)
|
||||
|
||||
self._buffer = self._decompressor.decompress(rawblock)
|
||||
self._buffer_offset = 0
|
||||
return True
|
||||
|
||||
# Read data until EOF.
|
||||
# If return_data is false, consume the data without returning it.
|
||||
def _read_all(self, return_data=True):
|
||||
# The loop assumes that _buffer_offset is 0. Ensure that this is true.
|
||||
self._buffer = self._buffer[self._buffer_offset:]
|
||||
self._buffer_offset = 0
|
||||
|
||||
blocks = []
|
||||
while self._fill_buffer():
|
||||
if return_data:
|
||||
blocks.append(self._buffer)
|
||||
self._pos += len(self._buffer)
|
||||
self._buffer = None
|
||||
self._buffer = b""
|
||||
if return_data:
|
||||
return b"".join(blocks)
|
||||
|
||||
# Read a block of up to n bytes.
|
||||
# If return_data is false, consume the data without returning it.
|
||||
def _read_block(self, n, return_data=True):
|
||||
# If we have enough data buffered, return immediately.
|
||||
end = self._buffer_offset + n
|
||||
if end <= len(self._buffer):
|
||||
data = self._buffer[self._buffer_offset : end]
|
||||
self._buffer_offset = end
|
||||
self._pos += len(data)
|
||||
return data if return_data else None
|
||||
|
||||
# The loop assumes that _buffer_offset is 0. Ensure that this is true.
|
||||
self._buffer = self._buffer[self._buffer_offset:]
|
||||
self._buffer_offset = 0
|
||||
|
||||
blocks = []
|
||||
while n > 0 and self._fill_buffer():
|
||||
if n < len(self._buffer):
|
||||
data = self._buffer[:n]
|
||||
self._buffer = self._buffer[n:]
|
||||
self._buffer_offset = n
|
||||
else:
|
||||
data = self._buffer
|
||||
self._buffer = None
|
||||
self._buffer = b""
|
||||
if return_data:
|
||||
blocks.append(data)
|
||||
self._pos += len(data)
|
||||
|
@ -267,9 +286,9 @@ class LZMAFile(io.BufferedIOBase):
|
|||
The exact number of bytes returned is unspecified.
|
||||
"""
|
||||
self._check_can_read()
|
||||
if self._mode == _MODE_READ_EOF or not self._fill_buffer():
|
||||
if not self._fill_buffer():
|
||||
return b""
|
||||
return self._buffer
|
||||
return self._buffer[self._buffer_offset:]
|
||||
|
||||
def read(self, size=-1):
|
||||
"""Read up to size uncompressed bytes from the file.
|
||||
|
@ -278,7 +297,7 @@ class LZMAFile(io.BufferedIOBase):
|
|||
Returns b"" if the file is already at EOF.
|
||||
"""
|
||||
self._check_can_read()
|
||||
if self._mode == _MODE_READ_EOF or size == 0:
|
||||
if size == 0:
|
||||
return b""
|
||||
elif size < 0:
|
||||
return self._read_all()
|
||||
|
@ -295,18 +314,40 @@ class LZMAFile(io.BufferedIOBase):
|
|||
# this does not give enough data for the decompressor to make progress.
|
||||
# In this case we make multiple reads, to avoid returning b"".
|
||||
self._check_can_read()
|
||||
if (size == 0 or self._mode == _MODE_READ_EOF or
|
||||
not self._fill_buffer()):
|
||||
if (size == 0 or
|
||||
# Only call _fill_buffer() if the buffer is actually empty.
|
||||
# This gives a significant speedup if *size* is small.
|
||||
(self._buffer_offset == len(self._buffer) and not self._fill_buffer())):
|
||||
return b""
|
||||
if 0 < size < len(self._buffer):
|
||||
data = self._buffer[:size]
|
||||
self._buffer = self._buffer[size:]
|
||||
if size > 0:
|
||||
data = self._buffer[self._buffer_offset :
|
||||
self._buffer_offset + size]
|
||||
self._buffer_offset += len(data)
|
||||
else:
|
||||
data = self._buffer
|
||||
self._buffer = None
|
||||
data = self._buffer[self._buffer_offset:]
|
||||
self._buffer = b""
|
||||
self._buffer_offset = 0
|
||||
self._pos += len(data)
|
||||
return data
|
||||
|
||||
def readline(self, size=-1):
|
||||
"""Read a line of uncompressed bytes from the file.
|
||||
|
||||
The terminating newline (if present) is retained. If size is
|
||||
non-negative, no more than size bytes will be read (in which
|
||||
case the line may be incomplete). Returns b'' if already at EOF.
|
||||
"""
|
||||
self._check_can_read()
|
||||
# Shortcut for the common case - the whole line is in the buffer.
|
||||
if size < 0:
|
||||
end = self._buffer.find(b"\n", self._buffer_offset) + 1
|
||||
if end > 0:
|
||||
line = self._buffer[self._buffer_offset : end]
|
||||
self._buffer_offset = end
|
||||
self._pos += len(line)
|
||||
return line
|
||||
return io.BufferedIOBase.readline(self, size)
|
||||
|
||||
def write(self, data):
|
||||
"""Write a bytes object to the file.
|
||||
|
||||
|
@ -326,7 +367,8 @@ class LZMAFile(io.BufferedIOBase):
|
|||
self._mode = _MODE_READ
|
||||
self._pos = 0
|
||||
self._decompressor = LZMADecompressor(**self._init_args)
|
||||
self._buffer = None
|
||||
self._buffer = b""
|
||||
self._buffer_offset = 0
|
||||
|
||||
def seek(self, offset, whence=0):
|
||||
"""Change the file position.
|
||||
|
@ -365,8 +407,7 @@ class LZMAFile(io.BufferedIOBase):
|
|||
offset -= self._pos
|
||||
|
||||
# Read and discard data until we reach the desired position.
|
||||
if self._mode != _MODE_READ_EOF:
|
||||
self._read_block(offset, return_data=False)
|
||||
self._read_block(offset, return_data=False)
|
||||
|
||||
return self._pos
|
||||
|
||||
|
|
Loading…
Reference in New Issue