Issue #2058: Remove the buf attribute and add __slots__ to the
TarInfo class in order to reduce tarfile's memory usage.
This commit is contained in:
parent
13d4a61075
commit
c2ea8c6c3a
|
@ -767,7 +767,7 @@ class ExFileObject(object):
|
||||||
self.fileobj = _FileInFile(tarfile.fileobj,
|
self.fileobj = _FileInFile(tarfile.fileobj,
|
||||||
tarinfo.offset_data,
|
tarinfo.offset_data,
|
||||||
tarinfo.size,
|
tarinfo.size,
|
||||||
getattr(tarinfo, "sparse", None))
|
tarinfo.sparse)
|
||||||
self.name = tarinfo.name
|
self.name = tarinfo.name
|
||||||
self.mode = "r"
|
self.mode = "r"
|
||||||
self.closed = False
|
self.closed = False
|
||||||
|
@ -906,6 +906,12 @@ class TarInfo(object):
|
||||||
usually created internally.
|
usually created internally.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
__slots__ = ("name", "mode", "uid", "gid", "size", "mtime",
|
||||||
|
"chksum", "type", "linkname", "uname", "gname",
|
||||||
|
"devmajor", "devminor",
|
||||||
|
"offset", "offset_data", "pax_headers", "sparse",
|
||||||
|
"tarfile", "_sparse_structs", "_link_target")
|
||||||
|
|
||||||
def __init__(self, name=""):
|
def __init__(self, name=""):
|
||||||
"""Construct a TarInfo object. name is the optional name
|
"""Construct a TarInfo object. name is the optional name
|
||||||
of the member.
|
of the member.
|
||||||
|
@ -927,6 +933,7 @@ class TarInfo(object):
|
||||||
self.offset = 0 # the tar header starts here
|
self.offset = 0 # the tar header starts here
|
||||||
self.offset_data = 0 # the file's data starts here
|
self.offset_data = 0 # the file's data starts here
|
||||||
|
|
||||||
|
self.sparse = None # sparse member information
|
||||||
self.pax_headers = {} # pax header information
|
self.pax_headers = {} # pax header information
|
||||||
|
|
||||||
# In pax headers the "name" and "linkname" field are called
|
# In pax headers the "name" and "linkname" field are called
|
||||||
|
@ -1181,7 +1188,6 @@ class TarInfo(object):
|
||||||
raise HeaderError("bad checksum")
|
raise HeaderError("bad checksum")
|
||||||
|
|
||||||
obj = cls()
|
obj = cls()
|
||||||
obj.buf = buf
|
|
||||||
obj.name = nts(buf[0:100], encoding, errors)
|
obj.name = nts(buf[0:100], encoding, errors)
|
||||||
obj.mode = nti(buf[100:108])
|
obj.mode = nti(buf[100:108])
|
||||||
obj.uid = nti(buf[108:116])
|
obj.uid = nti(buf[108:116])
|
||||||
|
@ -1202,6 +1208,24 @@ class TarInfo(object):
|
||||||
if obj.type == AREGTYPE and obj.name.endswith("/"):
|
if obj.type == AREGTYPE and obj.name.endswith("/"):
|
||||||
obj.type = DIRTYPE
|
obj.type = DIRTYPE
|
||||||
|
|
||||||
|
# The old GNU sparse format occupies some of the unused
|
||||||
|
# space in the buffer for up to 4 sparse structures.
|
||||||
|
# Save the them for later processing in _proc_sparse().
|
||||||
|
if obj.type == GNUTYPE_SPARSE:
|
||||||
|
pos = 386
|
||||||
|
structs = []
|
||||||
|
for i in range(4):
|
||||||
|
try:
|
||||||
|
offset = nti(buf[pos:pos + 12])
|
||||||
|
numbytes = nti(buf[pos + 12:pos + 24])
|
||||||
|
except ValueError:
|
||||||
|
break
|
||||||
|
structs.append((offset, numbytes))
|
||||||
|
pos += 24
|
||||||
|
isextended = bool(buf[482])
|
||||||
|
origsize = nti(buf[483:495])
|
||||||
|
obj._sparse_structs = (structs, isextended, origsize)
|
||||||
|
|
||||||
# Remove redundant slashes from directories.
|
# Remove redundant slashes from directories.
|
||||||
if obj.isdir():
|
if obj.isdir():
|
||||||
obj.name = obj.name.rstrip("/")
|
obj.name = obj.name.rstrip("/")
|
||||||
|
@ -1288,31 +1312,11 @@ class TarInfo(object):
|
||||||
def _proc_sparse(self, tarfile):
|
def _proc_sparse(self, tarfile):
|
||||||
"""Process a GNU sparse header plus extra headers.
|
"""Process a GNU sparse header plus extra headers.
|
||||||
"""
|
"""
|
||||||
buf = self.buf
|
# We already collected some sparse structures in frombuf().
|
||||||
sp = _ringbuffer()
|
structs, isextended, origsize = self._sparse_structs
|
||||||
pos = 386
|
del self._sparse_structs
|
||||||
lastpos = 0
|
|
||||||
realpos = 0
|
|
||||||
# There are 4 possible sparse structs in the
|
|
||||||
# first header.
|
|
||||||
for i in range(4):
|
|
||||||
try:
|
|
||||||
offset = nti(buf[pos:pos + 12])
|
|
||||||
numbytes = nti(buf[pos + 12:pos + 24])
|
|
||||||
except ValueError:
|
|
||||||
break
|
|
||||||
if offset > lastpos:
|
|
||||||
sp.append(_hole(lastpos, offset - lastpos))
|
|
||||||
sp.append(_data(offset, numbytes, realpos))
|
|
||||||
realpos += numbytes
|
|
||||||
lastpos = offset + numbytes
|
|
||||||
pos += 24
|
|
||||||
|
|
||||||
isextended = bool(buf[482])
|
# Collect sparse structures from extended header blocks.
|
||||||
origsize = nti(buf[483:495])
|
|
||||||
|
|
||||||
# If the isextended flag is given,
|
|
||||||
# there are extra headers to process.
|
|
||||||
while isextended:
|
while isextended:
|
||||||
buf = tarfile.fileobj.read(BLOCKSIZE)
|
buf = tarfile.fileobj.read(BLOCKSIZE)
|
||||||
pos = 0
|
pos = 0
|
||||||
|
@ -1322,18 +1326,23 @@ class TarInfo(object):
|
||||||
numbytes = nti(buf[pos + 12:pos + 24])
|
numbytes = nti(buf[pos + 12:pos + 24])
|
||||||
except ValueError:
|
except ValueError:
|
||||||
break
|
break
|
||||||
if offset > lastpos:
|
structs.append((offset, numbytes))
|
||||||
sp.append(_hole(lastpos, offset - lastpos))
|
|
||||||
sp.append(_data(offset, numbytes, realpos))
|
|
||||||
realpos += numbytes
|
|
||||||
lastpos = offset + numbytes
|
|
||||||
pos += 24
|
pos += 24
|
||||||
isextended = bool(buf[504])
|
isextended = bool(buf[504])
|
||||||
|
|
||||||
|
# Transform the sparse structures to something we can use
|
||||||
|
# in ExFileObject.
|
||||||
|
self.sparse = _ringbuffer()
|
||||||
|
lastpos = 0
|
||||||
|
realpos = 0
|
||||||
|
for offset, numbytes in structs:
|
||||||
|
if offset > lastpos:
|
||||||
|
self.sparse.append(_hole(lastpos, offset - lastpos))
|
||||||
|
self.sparse.append(_data(offset, numbytes, realpos))
|
||||||
|
realpos += numbytes
|
||||||
|
lastpos = offset + numbytes
|
||||||
if lastpos < origsize:
|
if lastpos < origsize:
|
||||||
sp.append(_hole(lastpos, origsize - lastpos))
|
self.sparse.append(_hole(lastpos, origsize - lastpos))
|
||||||
|
|
||||||
self.sparse = sp
|
|
||||||
|
|
||||||
self.offset_data = tarfile.fileobj.tell()
|
self.offset_data = tarfile.fileobj.tell()
|
||||||
tarfile.offset = self.offset_data + self._block(self.size)
|
tarfile.offset = self.offset_data + self._block(self.size)
|
||||||
|
|
|
@ -29,6 +29,9 @@ Extension Modules
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Issue #2058: Remove the buf attribute and add __slots__ to the TarInfo
|
||||||
|
class in order to reduce tarfile's memory usage.
|
||||||
|
|
||||||
- Bug #2606: Avoid calling .sort() on a dict_keys object.
|
- Bug #2606: Avoid calling .sort() on a dict_keys object.
|
||||||
|
|
||||||
- The bundled libffi copy is now in sync with the recently released
|
- The bundled libffi copy is now in sync with the recently released
|
||||||
|
|
Loading…
Reference in New Issue