Issue #13815: TarFile.extractfile() now returns io.BufferedReader objects.

The ExFileObject class was removed, some of its code went into _FileInFile.
This commit is contained in:
Lars Gustäbel 2012-05-05 18:15:03 +02:00
parent ef5a4636d0
commit 7a919e9930
4 changed files with 80 additions and 200 deletions

View File

@ -376,15 +376,12 @@ be finalized; only the internally used file object will be closed. See the
.. method:: TarFile.extractfile(member)
Extract a member from the archive as a file object. *member* may be a filename
or a :class:`TarInfo` object. If *member* is a regular file, a :term:`file-like
object` is returned. If *member* is a link, a file-like object is constructed from
the link's target. If *member* is none of the above, :const:`None` is returned.
or a :class:`TarInfo` object. If *member* is a regular file or a link, an
:class:`io.BufferedReader` object is returned. Otherwise, :const:`None` is
returned.
.. note::
The file-like object is read-only. It provides the methods
:meth:`read`, :meth:`readline`, :meth:`readlines`, :meth:`seek`, :meth:`tell`,
and :meth:`close`, and also supports iteration over its lines.
.. versionchanged:: 3.3
Return an :class:`io.BufferedReader` object.
.. method:: TarFile.add(name, arcname=None, recursive=True, exclude=None, *, filter=None)

View File

@ -668,6 +668,8 @@ class _FileInFile(object):
self.offset = offset
self.size = size
self.position = 0
self.name = getattr(fileobj, "name", None)
self.closed = False
if blockinfo is None:
blockinfo = [(0, size)]
@ -686,10 +688,16 @@ class _FileInFile(object):
if lastpos < self.size:
self.map.append((False, lastpos, self.size, None))
def seekable(self):
if not hasattr(self.fileobj, "seekable"):
# XXX gzip.GzipFile and bz2.BZ2File
def flush(self):
pass
def readable(self):
return True
def writable(self):
return False
def seekable(self):
return self.fileobj.seekable()
def tell(self):
@ -697,10 +705,21 @@ class _FileInFile(object):
"""
return self.position
def seek(self, position):
def seek(self, position, whence=io.SEEK_SET):
"""Seek to a position in the file.
"""
self.position = position
if whence == io.SEEK_SET:
self.position = min(max(position, 0), self.size)
elif whence == io.SEEK_CUR:
if position < 0:
self.position = max(self.position + position, 0)
else:
self.position = min(self.position + position, self.size)
elif whence == io.SEEK_END:
self.position = max(min(self.size + position, self.size), 0)
else:
raise ValueError("Invalid argument")
return self.position
def read(self, size=None):
"""Read data from the file.
@ -729,146 +748,16 @@ class _FileInFile(object):
size -= length
self.position += length
return buf
#class _FileInFile
class ExFileObject(object):
"""File-like object for reading an archive member.
Is returned by TarFile.extractfile().
"""
blocksize = 1024
def __init__(self, tarfile, tarinfo):
self.fileobj = _FileInFile(tarfile.fileobj,
tarinfo.offset_data,
tarinfo.size,
tarinfo.sparse)
self.name = tarinfo.name
self.mode = "r"
self.closed = False
self.size = tarinfo.size
self.position = 0
self.buffer = b""
def readable(self):
return True
def writable(self):
return False
def seekable(self):
return self.fileobj.seekable()
def read(self, size=None):
"""Read at most size bytes from the file. If size is not
present or None, read all data until EOF is reached.
"""
if self.closed:
raise ValueError("I/O operation on closed file")
buf = b""
if self.buffer:
if size is None:
buf = self.buffer
self.buffer = b""
else:
buf = self.buffer[:size]
self.buffer = self.buffer[size:]
if size is None:
buf += self.fileobj.read()
else:
buf += self.fileobj.read(size - len(buf))
self.position += len(buf)
return buf
# XXX TextIOWrapper uses the read1() method.
read1 = read
def readline(self, size=-1):
"""Read one entire line from the file. If size is present
and non-negative, return a string with at most that
size, which may be an incomplete line.
"""
if self.closed:
raise ValueError("I/O operation on closed file")
pos = self.buffer.find(b"\n") + 1
if pos == 0:
# no newline found.
while True:
buf = self.fileobj.read(self.blocksize)
self.buffer += buf
if not buf or b"\n" in buf:
pos = self.buffer.find(b"\n") + 1
if pos == 0:
# no newline found.
pos = len(self.buffer)
break
if size != -1:
pos = min(size, pos)
buf = self.buffer[:pos]
self.buffer = self.buffer[pos:]
self.position += len(buf)
return buf
def readlines(self):
"""Return a list with all remaining lines.
"""
result = []
while True:
line = self.readline()
if not line: break
result.append(line)
return result
def tell(self):
"""Return the current file position.
"""
if self.closed:
raise ValueError("I/O operation on closed file")
return self.position
def seek(self, pos, whence=io.SEEK_SET):
"""Seek to a position in the file.
"""
if self.closed:
raise ValueError("I/O operation on closed file")
if whence == io.SEEK_SET:
self.position = min(max(pos, 0), self.size)
elif whence == io.SEEK_CUR:
if pos < 0:
self.position = max(self.position + pos, 0)
else:
self.position = min(self.position + pos, self.size)
elif whence == io.SEEK_END:
self.position = max(min(self.size + pos, self.size), 0)
else:
raise ValueError("Invalid argument")
self.buffer = b""
self.fileobj.seek(self.position)
def readinto(self, b):
buf = self.read(len(b))
b[:len(buf)] = buf
return len(buf)
def close(self):
"""Close the file object.
"""
self.closed = True
#class _FileInFile
def __iter__(self):
"""Get an iterator over the file's lines.
"""
while True:
line = self.readline()
if not line:
break
yield line
#class ExFileObject
#------------------
# Exported Classes
@ -1554,7 +1443,8 @@ class TarFile(object):
tarinfo = TarInfo # The default TarInfo class to use.
fileobject = ExFileObject # The default ExFileObject class to use.
fileobject = None # The file-object for extractfile() or
# io.BufferedReader if None.
def __init__(self, name=None, mode="r", fileobj=None, format=None,
tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
@ -2178,12 +2068,9 @@ class TarFile(object):
def extractfile(self, member):
"""Extract a member from the archive as a file object. `member' may be
a filename or a TarInfo object. If `member' is a regular file, a
file-like object is returned. If `member' is a link, a file-like
object is constructed from the link's target. If `member' is none of
the above, None is returned.
The file-like object is read-only and provides the following
methods: read(), readline(), readlines(), seek() and tell()
a filename or a TarInfo object. If `member' is a regular file or a
link, an io.BufferedReader object is returned. Otherwise, None is
returned.
"""
self._check("r")
@ -2192,12 +2079,13 @@ class TarFile(object):
else:
tarinfo = member
if tarinfo.isreg():
return self.fileobject(self, tarinfo)
elif tarinfo.type not in SUPPORTED_TYPES:
# If a member's type is unknown, it is treated as a
# regular file.
if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
# Members with unknown types are treated as regular files.
if self.fileobject is None:
fileobj = _FileInFile(self.fileobj, tarinfo.offset_data, tarinfo.size, tarinfo.sparse)
return io.BufferedReader(fileobj)
else:
# Keep the traditional pre-3.3 API intact.
return self.fileobject(self, tarinfo)
elif tarinfo.islnk() or tarinfo.issym():

View File

@ -56,13 +56,10 @@ class UstarReadTest(ReadTest):
def test_fileobj_regular_file(self):
tarinfo = self.tar.getmember("ustar/regtype")
fobj = self.tar.extractfile(tarinfo)
try:
with self.tar.extractfile(tarinfo) as fobj:
data = fobj.read()
self.assertTrue((len(data), md5sum(data)) == (tarinfo.size, md5_regtype),
"regular file extraction failed")
finally:
fobj.close()
def test_fileobj_readlines(self):
self.tar.extract("ustar/regtype", TEMPDIR)
@ -70,8 +67,7 @@ class UstarReadTest(ReadTest):
with open(os.path.join(TEMPDIR, "ustar/regtype"), "r") as fobj1:
lines1 = fobj1.readlines()
fobj = self.tar.extractfile(tarinfo)
try:
with self.tar.extractfile(tarinfo) as fobj:
fobj2 = io.TextIOWrapper(fobj)
lines2 = fobj2.readlines()
self.assertTrue(lines1 == lines2,
@ -81,21 +77,16 @@ class UstarReadTest(ReadTest):
self.assertTrue(lines2[83] ==
"I will gladly admit that Python is not the fastest running scripting language.\n",
"fileobj.readlines() failed")
finally:
fobj.close()
def test_fileobj_iter(self):
self.tar.extract("ustar/regtype", TEMPDIR)
tarinfo = self.tar.getmember("ustar/regtype")
with open(os.path.join(TEMPDIR, "ustar/regtype"), "r") as fobj1:
lines1 = fobj1.readlines()
fobj2 = self.tar.extractfile(tarinfo)
try:
with self.tar.extractfile(tarinfo) as fobj2:
lines2 = list(io.TextIOWrapper(fobj2))
self.assertTrue(lines1 == lines2,
"fileobj.__iter__() failed")
finally:
fobj2.close()
def test_fileobj_seek(self):
self.tar.extract("ustar/regtype", TEMPDIR)
@ -147,17 +138,24 @@ class UstarReadTest(ReadTest):
"read() after readline() failed")
fobj.close()
def test_fileobj_text(self):
with self.tar.extractfile("ustar/regtype") as fobj:
fobj = io.TextIOWrapper(fobj)
data = fobj.read().encode("iso8859-1")
self.assertEqual(md5sum(data), md5_regtype)
try:
fobj.seek(100)
except AttributeError:
# Issue #13815: seek() complained about a missing
# flush() method.
self.fail("seeking failed in text mode")
# Test if symbolic and hard links are resolved by extractfile(). The
# test link members each point to a regular member whose data is
# supposed to be exported.
def _test_fileobj_link(self, lnktype, regtype):
a = self.tar.extractfile(lnktype)
b = self.tar.extractfile(regtype)
try:
with self.tar.extractfile(lnktype) as a, self.tar.extractfile(regtype) as b:
self.assertEqual(a.name, b.name)
finally:
a.close()
b.close()
def test_fileobj_link1(self):
self._test_fileobj_link("ustar/lnktype", "ustar/regtype")
@ -265,9 +263,8 @@ class MiscReadTest(CommonReadTest):
t = tar.next()
name = t.name
offset = t.offset
f = tar.extractfile(t)
with tar.extractfile(t) as f:
data = f.read()
f.close()
finally:
tar.close()
@ -439,7 +436,7 @@ class StreamReadTest(CommonReadTest):
for tarinfo in self.tar:
if not tarinfo.isreg():
continue
fobj = self.tar.extractfile(tarinfo)
with self.tar.extractfile(tarinfo) as fobj:
while True:
try:
buf = fobj.read(512)
@ -447,18 +444,17 @@ class StreamReadTest(CommonReadTest):
self.fail("simple read-through using TarFile.extractfile() failed")
if not buf:
break
fobj.close()
def test_fileobj_regular_file(self):
tarinfo = self.tar.next() # get "regtype" (can't use getmember)
fobj = self.tar.extractfile(tarinfo)
with self.tar.extractfile(tarinfo) as fobj:
data = fobj.read()
self.assertTrue((len(data), md5sum(data)) == (tarinfo.size, md5_regtype),
"regular file extraction failed")
def test_provoke_stream_error(self):
tarinfos = self.tar.getmembers()
f = self.tar.extractfile(tarinfos[0]) # read the first member
with self.tar.extractfile(tarinfos[0]) as f: # read the first member
self.assertRaises(tarfile.StreamError, f.read)
def test_compare_members(self):
@ -1484,12 +1480,9 @@ class AppendTest(unittest.TestCase):
with tarfile.open(tarname, encoding="iso8859-1") as src:
t = src.getmember("ustar/regtype")
t.name = "foo"
f = src.extractfile(t)
try:
with src.extractfile(t) as f:
with tarfile.open(self.tarname, mode) as tar:
tar.addfile(t, f)
finally:
f.close()
def _test(self, names=["bar"], fileobj=None):
with tarfile.open(self.tarname, fileobj=fileobj) as tar:

View File

@ -89,6 +89,8 @@ Core and Builtins
Library
-------
- Issue #13815: TarFile.extractfile() now returns io.BufferedReader objects.
- Issue #14371: Support bzip2 in zipfile module.
Patch by Serhiy Storchaka.