Issue #2058: Remove the buf attribute and add __slots__ to the

TarInfo class in order to reduce tarfile's memory usage.
This commit is contained in:
Lars Gustäbel 2008-04-14 10:05:48 +00:00
parent 13d4a61075
commit c2ea8c6c3a
2 changed files with 46 additions and 34 deletions

View File

@ -767,7 +767,7 @@ class ExFileObject(object):
self.fileobj = _FileInFile(tarfile.fileobj,
tarinfo.offset_data,
tarinfo.size,
getattr(tarinfo, "sparse", None))
tarinfo.sparse)
self.name = tarinfo.name
self.mode = "r"
self.closed = False
@ -906,6 +906,12 @@ class TarInfo(object):
usually created internally.
"""
__slots__ = ("name", "mode", "uid", "gid", "size", "mtime",
"chksum", "type", "linkname", "uname", "gname",
"devmajor", "devminor",
"offset", "offset_data", "pax_headers", "sparse",
"tarfile", "_sparse_structs", "_link_target")
def __init__(self, name=""):
"""Construct a TarInfo object. name is the optional name
of the member.
@ -927,6 +933,7 @@ class TarInfo(object):
self.offset = 0 # the tar header starts here
self.offset_data = 0 # the file's data starts here
self.sparse = None # sparse member information
self.pax_headers = {} # pax header information
# In pax headers the "name" and "linkname" field are called
@ -1181,7 +1188,6 @@ class TarInfo(object):
raise HeaderError("bad checksum")
obj = cls()
obj.buf = buf
obj.name = nts(buf[0:100], encoding, errors)
obj.mode = nti(buf[100:108])
obj.uid = nti(buf[108:116])
@ -1202,6 +1208,24 @@ class TarInfo(object):
if obj.type == AREGTYPE and obj.name.endswith("/"):
obj.type = DIRTYPE
# The old GNU sparse format occupies some of the unused
# space in the buffer for up to 4 sparse structures.
# Save the them for later processing in _proc_sparse().
if obj.type == GNUTYPE_SPARSE:
pos = 386
structs = []
for i in range(4):
try:
offset = nti(buf[pos:pos + 12])
numbytes = nti(buf[pos + 12:pos + 24])
except ValueError:
break
structs.append((offset, numbytes))
pos += 24
isextended = bool(buf[482])
origsize = nti(buf[483:495])
obj._sparse_structs = (structs, isextended, origsize)
# Remove redundant slashes from directories.
if obj.isdir():
obj.name = obj.name.rstrip("/")
@ -1288,31 +1312,11 @@ class TarInfo(object):
def _proc_sparse(self, tarfile):
"""Process a GNU sparse header plus extra headers.
"""
buf = self.buf
sp = _ringbuffer()
pos = 386
lastpos = 0
realpos = 0
# There are 4 possible sparse structs in the
# first header.
for i in range(4):
try:
offset = nti(buf[pos:pos + 12])
numbytes = nti(buf[pos + 12:pos + 24])
except ValueError:
break
if offset > lastpos:
sp.append(_hole(lastpos, offset - lastpos))
sp.append(_data(offset, numbytes, realpos))
realpos += numbytes
lastpos = offset + numbytes
pos += 24
# We already collected some sparse structures in frombuf().
structs, isextended, origsize = self._sparse_structs
del self._sparse_structs
isextended = bool(buf[482])
origsize = nti(buf[483:495])
# If the isextended flag is given,
# there are extra headers to process.
# Collect sparse structures from extended header blocks.
while isextended:
buf = tarfile.fileobj.read(BLOCKSIZE)
pos = 0
@ -1322,18 +1326,23 @@ class TarInfo(object):
numbytes = nti(buf[pos + 12:pos + 24])
except ValueError:
break
if offset > lastpos:
sp.append(_hole(lastpos, offset - lastpos))
sp.append(_data(offset, numbytes, realpos))
realpos += numbytes
lastpos = offset + numbytes
structs.append((offset, numbytes))
pos += 24
isextended = bool(buf[504])
# Transform the sparse structures to something we can use
# in ExFileObject.
self.sparse = _ringbuffer()
lastpos = 0
realpos = 0
for offset, numbytes in structs:
if offset > lastpos:
self.sparse.append(_hole(lastpos, offset - lastpos))
self.sparse.append(_data(offset, numbytes, realpos))
realpos += numbytes
lastpos = offset + numbytes
if lastpos < origsize:
sp.append(_hole(lastpos, origsize - lastpos))
self.sparse = sp
self.sparse.append(_hole(lastpos, origsize - lastpos))
self.offset_data = tarfile.fileobj.tell()
tarfile.offset = self.offset_data + self._block(self.size)

View File

@ -29,6 +29,9 @@ Extension Modules
Library
-------
- Issue #2058: Remove the buf attribute and add __slots__ to the TarInfo
class in order to reduce tarfile's memory usage.
- Bug #2606: Avoid calling .sort() on a dict_keys object.
- The bundled libffi copy is now in sync with the recently released