Issue #2058: Remove the buf attribute and add __slots__ to the

TarInfo class in order to reduce tarfile's memory usage.
This commit is contained in:
Lars Gustäbel 2008-04-14 10:05:48 +00:00
parent 13d4a61075
commit c2ea8c6c3a
2 changed files with 46 additions and 34 deletions

View File

@ -767,7 +767,7 @@ class ExFileObject(object):
self.fileobj = _FileInFile(tarfile.fileobj, self.fileobj = _FileInFile(tarfile.fileobj,
tarinfo.offset_data, tarinfo.offset_data,
tarinfo.size, tarinfo.size,
getattr(tarinfo, "sparse", None)) tarinfo.sparse)
self.name = tarinfo.name self.name = tarinfo.name
self.mode = "r" self.mode = "r"
self.closed = False self.closed = False
@ -906,6 +906,12 @@ class TarInfo(object):
usually created internally. usually created internally.
""" """
__slots__ = ("name", "mode", "uid", "gid", "size", "mtime",
"chksum", "type", "linkname", "uname", "gname",
"devmajor", "devminor",
"offset", "offset_data", "pax_headers", "sparse",
"tarfile", "_sparse_structs", "_link_target")
def __init__(self, name=""): def __init__(self, name=""):
"""Construct a TarInfo object. name is the optional name """Construct a TarInfo object. name is the optional name
of the member. of the member.
@ -927,6 +933,7 @@ class TarInfo(object):
self.offset = 0 # the tar header starts here self.offset = 0 # the tar header starts here
self.offset_data = 0 # the file's data starts here self.offset_data = 0 # the file's data starts here
self.sparse = None # sparse member information
self.pax_headers = {} # pax header information self.pax_headers = {} # pax header information
# In pax headers the "name" and "linkname" field are called # In pax headers the "name" and "linkname" field are called
@ -1181,7 +1188,6 @@ class TarInfo(object):
raise HeaderError("bad checksum") raise HeaderError("bad checksum")
obj = cls() obj = cls()
obj.buf = buf
obj.name = nts(buf[0:100], encoding, errors) obj.name = nts(buf[0:100], encoding, errors)
obj.mode = nti(buf[100:108]) obj.mode = nti(buf[100:108])
obj.uid = nti(buf[108:116]) obj.uid = nti(buf[108:116])
@ -1202,6 +1208,24 @@ class TarInfo(object):
if obj.type == AREGTYPE and obj.name.endswith("/"): if obj.type == AREGTYPE and obj.name.endswith("/"):
obj.type = DIRTYPE obj.type = DIRTYPE
# The old GNU sparse format occupies some of the unused
# space in the buffer for up to 4 sparse structures.
# Save the them for later processing in _proc_sparse().
if obj.type == GNUTYPE_SPARSE:
pos = 386
structs = []
for i in range(4):
try:
offset = nti(buf[pos:pos + 12])
numbytes = nti(buf[pos + 12:pos + 24])
except ValueError:
break
structs.append((offset, numbytes))
pos += 24
isextended = bool(buf[482])
origsize = nti(buf[483:495])
obj._sparse_structs = (structs, isextended, origsize)
# Remove redundant slashes from directories. # Remove redundant slashes from directories.
if obj.isdir(): if obj.isdir():
obj.name = obj.name.rstrip("/") obj.name = obj.name.rstrip("/")
@ -1288,31 +1312,11 @@ class TarInfo(object):
def _proc_sparse(self, tarfile): def _proc_sparse(self, tarfile):
"""Process a GNU sparse header plus extra headers. """Process a GNU sparse header plus extra headers.
""" """
buf = self.buf # We already collected some sparse structures in frombuf().
sp = _ringbuffer() structs, isextended, origsize = self._sparse_structs
pos = 386 del self._sparse_structs
lastpos = 0
realpos = 0
# There are 4 possible sparse structs in the
# first header.
for i in range(4):
try:
offset = nti(buf[pos:pos + 12])
numbytes = nti(buf[pos + 12:pos + 24])
except ValueError:
break
if offset > lastpos:
sp.append(_hole(lastpos, offset - lastpos))
sp.append(_data(offset, numbytes, realpos))
realpos += numbytes
lastpos = offset + numbytes
pos += 24
isextended = bool(buf[482]) # Collect sparse structures from extended header blocks.
origsize = nti(buf[483:495])
# If the isextended flag is given,
# there are extra headers to process.
while isextended: while isextended:
buf = tarfile.fileobj.read(BLOCKSIZE) buf = tarfile.fileobj.read(BLOCKSIZE)
pos = 0 pos = 0
@ -1322,18 +1326,23 @@ class TarInfo(object):
numbytes = nti(buf[pos + 12:pos + 24]) numbytes = nti(buf[pos + 12:pos + 24])
except ValueError: except ValueError:
break break
if offset > lastpos: structs.append((offset, numbytes))
sp.append(_hole(lastpos, offset - lastpos))
sp.append(_data(offset, numbytes, realpos))
realpos += numbytes
lastpos = offset + numbytes
pos += 24 pos += 24
isextended = bool(buf[504]) isextended = bool(buf[504])
# Transform the sparse structures to something we can use
# in ExFileObject.
self.sparse = _ringbuffer()
lastpos = 0
realpos = 0
for offset, numbytes in structs:
if offset > lastpos:
self.sparse.append(_hole(lastpos, offset - lastpos))
self.sparse.append(_data(offset, numbytes, realpos))
realpos += numbytes
lastpos = offset + numbytes
if lastpos < origsize: if lastpos < origsize:
sp.append(_hole(lastpos, origsize - lastpos)) self.sparse.append(_hole(lastpos, origsize - lastpos))
self.sparse = sp
self.offset_data = tarfile.fileobj.tell() self.offset_data = tarfile.fileobj.tell()
tarfile.offset = self.offset_data + self._block(self.size) tarfile.offset = self.offset_data + self._block(self.size)

View File

@ -29,6 +29,9 @@ Extension Modules
Library Library
------- -------
- Issue #2058: Remove the buf attribute and add __slots__ to the TarInfo
class in order to reduce tarfile's memory usage.
- Bug #2606: Avoid calling .sort() on a dict_keys object. - Bug #2606: Avoid calling .sort() on a dict_keys object.
- The bundled libffi copy is now in sync with the recently released - The bundled libffi copy is now in sync with the recently released