Issue #13815: TarFile.extractfile() now returns io.BufferedReader objects.
The ExFileObject class was removed, some of its code went into _FileInFile.
This commit is contained in:
parent
ef5a4636d0
commit
7a919e9930
|
@ -376,15 +376,12 @@ be finalized; only the internally used file object will be closed. See the
|
|||
.. method:: TarFile.extractfile(member)
|
||||
|
||||
Extract a member from the archive as a file object. *member* may be a filename
|
||||
or a :class:`TarInfo` object. If *member* is a regular file, a :term:`file-like
|
||||
object` is returned. If *member* is a link, a file-like object is constructed from
|
||||
the link's target. If *member* is none of the above, :const:`None` is returned.
|
||||
or a :class:`TarInfo` object. If *member* is a regular file or a link, an
|
||||
:class:`io.BufferedReader` object is returned. Otherwise, :const:`None` is
|
||||
returned.
|
||||
|
||||
.. note::
|
||||
|
||||
The file-like object is read-only. It provides the methods
|
||||
:meth:`read`, :meth:`readline`, :meth:`readlines`, :meth:`seek`, :meth:`tell`,
|
||||
and :meth:`close`, and also supports iteration over its lines.
|
||||
.. versionchanged:: 3.3
|
||||
Return an :class:`io.BufferedReader` object.
|
||||
|
||||
|
||||
.. method:: TarFile.add(name, arcname=None, recursive=True, exclude=None, *, filter=None)
|
||||
|
|
194
Lib/tarfile.py
194
Lib/tarfile.py
|
@ -668,6 +668,8 @@ class _FileInFile(object):
|
|||
self.offset = offset
|
||||
self.size = size
|
||||
self.position = 0
|
||||
self.name = getattr(fileobj, "name", None)
|
||||
self.closed = False
|
||||
|
||||
if blockinfo is None:
|
||||
blockinfo = [(0, size)]
|
||||
|
@ -686,10 +688,16 @@ class _FileInFile(object):
|
|||
if lastpos < self.size:
|
||||
self.map.append((False, lastpos, self.size, None))
|
||||
|
||||
def seekable(self):
|
||||
if not hasattr(self.fileobj, "seekable"):
|
||||
# XXX gzip.GzipFile and bz2.BZ2File
|
||||
def flush(self):
|
||||
pass
|
||||
|
||||
def readable(self):
|
||||
return True
|
||||
|
||||
def writable(self):
|
||||
return False
|
||||
|
||||
def seekable(self):
|
||||
return self.fileobj.seekable()
|
||||
|
||||
def tell(self):
|
||||
|
@ -697,10 +705,21 @@ class _FileInFile(object):
|
|||
"""
|
||||
return self.position
|
||||
|
||||
def seek(self, position):
|
||||
def seek(self, position, whence=io.SEEK_SET):
|
||||
"""Seek to a position in the file.
|
||||
"""
|
||||
self.position = position
|
||||
if whence == io.SEEK_SET:
|
||||
self.position = min(max(position, 0), self.size)
|
||||
elif whence == io.SEEK_CUR:
|
||||
if position < 0:
|
||||
self.position = max(self.position + position, 0)
|
||||
else:
|
||||
self.position = min(self.position + position, self.size)
|
||||
elif whence == io.SEEK_END:
|
||||
self.position = max(min(self.size + position, self.size), 0)
|
||||
else:
|
||||
raise ValueError("Invalid argument")
|
||||
return self.position
|
||||
|
||||
def read(self, size=None):
|
||||
"""Read data from the file.
|
||||
|
@ -729,146 +748,16 @@ class _FileInFile(object):
|
|||
size -= length
|
||||
self.position += length
|
||||
return buf
|
||||
#class _FileInFile
|
||||
|
||||
|
||||
class ExFileObject(object):
|
||||
"""File-like object for reading an archive member.
|
||||
Is returned by TarFile.extractfile().
|
||||
"""
|
||||
blocksize = 1024
|
||||
|
||||
def __init__(self, tarfile, tarinfo):
|
||||
self.fileobj = _FileInFile(tarfile.fileobj,
|
||||
tarinfo.offset_data,
|
||||
tarinfo.size,
|
||||
tarinfo.sparse)
|
||||
self.name = tarinfo.name
|
||||
self.mode = "r"
|
||||
self.closed = False
|
||||
self.size = tarinfo.size
|
||||
|
||||
self.position = 0
|
||||
self.buffer = b""
|
||||
|
||||
def readable(self):
|
||||
return True
|
||||
|
||||
def writable(self):
|
||||
return False
|
||||
|
||||
def seekable(self):
|
||||
return self.fileobj.seekable()
|
||||
|
||||
def read(self, size=None):
|
||||
"""Read at most size bytes from the file. If size is not
|
||||
present or None, read all data until EOF is reached.
|
||||
"""
|
||||
if self.closed:
|
||||
raise ValueError("I/O operation on closed file")
|
||||
|
||||
buf = b""
|
||||
if self.buffer:
|
||||
if size is None:
|
||||
buf = self.buffer
|
||||
self.buffer = b""
|
||||
else:
|
||||
buf = self.buffer[:size]
|
||||
self.buffer = self.buffer[size:]
|
||||
|
||||
if size is None:
|
||||
buf += self.fileobj.read()
|
||||
else:
|
||||
buf += self.fileobj.read(size - len(buf))
|
||||
|
||||
self.position += len(buf)
|
||||
return buf
|
||||
|
||||
# XXX TextIOWrapper uses the read1() method.
|
||||
read1 = read
|
||||
|
||||
def readline(self, size=-1):
|
||||
"""Read one entire line from the file. If size is present
|
||||
and non-negative, return a string with at most that
|
||||
size, which may be an incomplete line.
|
||||
"""
|
||||
if self.closed:
|
||||
raise ValueError("I/O operation on closed file")
|
||||
|
||||
pos = self.buffer.find(b"\n") + 1
|
||||
if pos == 0:
|
||||
# no newline found.
|
||||
while True:
|
||||
buf = self.fileobj.read(self.blocksize)
|
||||
self.buffer += buf
|
||||
if not buf or b"\n" in buf:
|
||||
pos = self.buffer.find(b"\n") + 1
|
||||
if pos == 0:
|
||||
# no newline found.
|
||||
pos = len(self.buffer)
|
||||
break
|
||||
|
||||
if size != -1:
|
||||
pos = min(size, pos)
|
||||
|
||||
buf = self.buffer[:pos]
|
||||
self.buffer = self.buffer[pos:]
|
||||
self.position += len(buf)
|
||||
return buf
|
||||
|
||||
def readlines(self):
|
||||
"""Return a list with all remaining lines.
|
||||
"""
|
||||
result = []
|
||||
while True:
|
||||
line = self.readline()
|
||||
if not line: break
|
||||
result.append(line)
|
||||
return result
|
||||
|
||||
def tell(self):
|
||||
"""Return the current file position.
|
||||
"""
|
||||
if self.closed:
|
||||
raise ValueError("I/O operation on closed file")
|
||||
|
||||
return self.position
|
||||
|
||||
def seek(self, pos, whence=io.SEEK_SET):
|
||||
"""Seek to a position in the file.
|
||||
"""
|
||||
if self.closed:
|
||||
raise ValueError("I/O operation on closed file")
|
||||
|
||||
if whence == io.SEEK_SET:
|
||||
self.position = min(max(pos, 0), self.size)
|
||||
elif whence == io.SEEK_CUR:
|
||||
if pos < 0:
|
||||
self.position = max(self.position + pos, 0)
|
||||
else:
|
||||
self.position = min(self.position + pos, self.size)
|
||||
elif whence == io.SEEK_END:
|
||||
self.position = max(min(self.size + pos, self.size), 0)
|
||||
else:
|
||||
raise ValueError("Invalid argument")
|
||||
|
||||
self.buffer = b""
|
||||
self.fileobj.seek(self.position)
|
||||
def readinto(self, b):
|
||||
buf = self.read(len(b))
|
||||
b[:len(buf)] = buf
|
||||
return len(buf)
|
||||
|
||||
def close(self):
|
||||
"""Close the file object.
|
||||
"""
|
||||
self.closed = True
|
||||
#class _FileInFile
|
||||
|
||||
def __iter__(self):
|
||||
"""Get an iterator over the file's lines.
|
||||
"""
|
||||
while True:
|
||||
line = self.readline()
|
||||
if not line:
|
||||
break
|
||||
yield line
|
||||
#class ExFileObject
|
||||
|
||||
#------------------
|
||||
# Exported Classes
|
||||
|
@ -1554,7 +1443,8 @@ class TarFile(object):
|
|||
|
||||
tarinfo = TarInfo # The default TarInfo class to use.
|
||||
|
||||
fileobject = ExFileObject # The default ExFileObject class to use.
|
||||
fileobject = None # The file-object for extractfile() or
|
||||
# io.BufferedReader if None.
|
||||
|
||||
def __init__(self, name=None, mode="r", fileobj=None, format=None,
|
||||
tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
|
||||
|
@ -2178,12 +2068,9 @@ class TarFile(object):
|
|||
|
||||
def extractfile(self, member):
|
||||
"""Extract a member from the archive as a file object. `member' may be
|
||||
a filename or a TarInfo object. If `member' is a regular file, a
|
||||
file-like object is returned. If `member' is a link, a file-like
|
||||
object is constructed from the link's target. If `member' is none of
|
||||
the above, None is returned.
|
||||
The file-like object is read-only and provides the following
|
||||
methods: read(), readline(), readlines(), seek() and tell()
|
||||
a filename or a TarInfo object. If `member' is a regular file or a
|
||||
link, an io.BufferedReader object is returned. Otherwise, None is
|
||||
returned.
|
||||
"""
|
||||
self._check("r")
|
||||
|
||||
|
@ -2192,12 +2079,13 @@ class TarFile(object):
|
|||
else:
|
||||
tarinfo = member
|
||||
|
||||
if tarinfo.isreg():
|
||||
return self.fileobject(self, tarinfo)
|
||||
|
||||
elif tarinfo.type not in SUPPORTED_TYPES:
|
||||
# If a member's type is unknown, it is treated as a
|
||||
# regular file.
|
||||
if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
|
||||
# Members with unknown types are treated as regular files.
|
||||
if self.fileobject is None:
|
||||
fileobj = _FileInFile(self.fileobj, tarinfo.offset_data, tarinfo.size, tarinfo.sparse)
|
||||
return io.BufferedReader(fileobj)
|
||||
else:
|
||||
# Keep the traditional pre-3.3 API intact.
|
||||
return self.fileobject(self, tarinfo)
|
||||
|
||||
elif tarinfo.islnk() or tarinfo.issym():
|
||||
|
|
|
@ -56,13 +56,10 @@ class UstarReadTest(ReadTest):
|
|||
|
||||
def test_fileobj_regular_file(self):
|
||||
tarinfo = self.tar.getmember("ustar/regtype")
|
||||
fobj = self.tar.extractfile(tarinfo)
|
||||
try:
|
||||
with self.tar.extractfile(tarinfo) as fobj:
|
||||
data = fobj.read()
|
||||
self.assertTrue((len(data), md5sum(data)) == (tarinfo.size, md5_regtype),
|
||||
"regular file extraction failed")
|
||||
finally:
|
||||
fobj.close()
|
||||
|
||||
def test_fileobj_readlines(self):
|
||||
self.tar.extract("ustar/regtype", TEMPDIR)
|
||||
|
@ -70,8 +67,7 @@ class UstarReadTest(ReadTest):
|
|||
with open(os.path.join(TEMPDIR, "ustar/regtype"), "r") as fobj1:
|
||||
lines1 = fobj1.readlines()
|
||||
|
||||
fobj = self.tar.extractfile(tarinfo)
|
||||
try:
|
||||
with self.tar.extractfile(tarinfo) as fobj:
|
||||
fobj2 = io.TextIOWrapper(fobj)
|
||||
lines2 = fobj2.readlines()
|
||||
self.assertTrue(lines1 == lines2,
|
||||
|
@ -81,21 +77,16 @@ class UstarReadTest(ReadTest):
|
|||
self.assertTrue(lines2[83] ==
|
||||
"I will gladly admit that Python is not the fastest running scripting language.\n",
|
||||
"fileobj.readlines() failed")
|
||||
finally:
|
||||
fobj.close()
|
||||
|
||||
def test_fileobj_iter(self):
|
||||
self.tar.extract("ustar/regtype", TEMPDIR)
|
||||
tarinfo = self.tar.getmember("ustar/regtype")
|
||||
with open(os.path.join(TEMPDIR, "ustar/regtype"), "r") as fobj1:
|
||||
lines1 = fobj1.readlines()
|
||||
fobj2 = self.tar.extractfile(tarinfo)
|
||||
try:
|
||||
with self.tar.extractfile(tarinfo) as fobj2:
|
||||
lines2 = list(io.TextIOWrapper(fobj2))
|
||||
self.assertTrue(lines1 == lines2,
|
||||
"fileobj.__iter__() failed")
|
||||
finally:
|
||||
fobj2.close()
|
||||
|
||||
def test_fileobj_seek(self):
|
||||
self.tar.extract("ustar/regtype", TEMPDIR)
|
||||
|
@ -147,17 +138,24 @@ class UstarReadTest(ReadTest):
|
|||
"read() after readline() failed")
|
||||
fobj.close()
|
||||
|
||||
def test_fileobj_text(self):
|
||||
with self.tar.extractfile("ustar/regtype") as fobj:
|
||||
fobj = io.TextIOWrapper(fobj)
|
||||
data = fobj.read().encode("iso8859-1")
|
||||
self.assertEqual(md5sum(data), md5_regtype)
|
||||
try:
|
||||
fobj.seek(100)
|
||||
except AttributeError:
|
||||
# Issue #13815: seek() complained about a missing
|
||||
# flush() method.
|
||||
self.fail("seeking failed in text mode")
|
||||
|
||||
# Test if symbolic and hard links are resolved by extractfile(). The
|
||||
# test link members each point to a regular member whose data is
|
||||
# supposed to be exported.
|
||||
def _test_fileobj_link(self, lnktype, regtype):
|
||||
a = self.tar.extractfile(lnktype)
|
||||
b = self.tar.extractfile(regtype)
|
||||
try:
|
||||
with self.tar.extractfile(lnktype) as a, self.tar.extractfile(regtype) as b:
|
||||
self.assertEqual(a.name, b.name)
|
||||
finally:
|
||||
a.close()
|
||||
b.close()
|
||||
|
||||
def test_fileobj_link1(self):
|
||||
self._test_fileobj_link("ustar/lnktype", "ustar/regtype")
|
||||
|
@ -265,9 +263,8 @@ class MiscReadTest(CommonReadTest):
|
|||
t = tar.next()
|
||||
name = t.name
|
||||
offset = t.offset
|
||||
f = tar.extractfile(t)
|
||||
with tar.extractfile(t) as f:
|
||||
data = f.read()
|
||||
f.close()
|
||||
finally:
|
||||
tar.close()
|
||||
|
||||
|
@ -439,7 +436,7 @@ class StreamReadTest(CommonReadTest):
|
|||
for tarinfo in self.tar:
|
||||
if not tarinfo.isreg():
|
||||
continue
|
||||
fobj = self.tar.extractfile(tarinfo)
|
||||
with self.tar.extractfile(tarinfo) as fobj:
|
||||
while True:
|
||||
try:
|
||||
buf = fobj.read(512)
|
||||
|
@ -447,18 +444,17 @@ class StreamReadTest(CommonReadTest):
|
|||
self.fail("simple read-through using TarFile.extractfile() failed")
|
||||
if not buf:
|
||||
break
|
||||
fobj.close()
|
||||
|
||||
def test_fileobj_regular_file(self):
|
||||
tarinfo = self.tar.next() # get "regtype" (can't use getmember)
|
||||
fobj = self.tar.extractfile(tarinfo)
|
||||
with self.tar.extractfile(tarinfo) as fobj:
|
||||
data = fobj.read()
|
||||
self.assertTrue((len(data), md5sum(data)) == (tarinfo.size, md5_regtype),
|
||||
"regular file extraction failed")
|
||||
|
||||
def test_provoke_stream_error(self):
|
||||
tarinfos = self.tar.getmembers()
|
||||
f = self.tar.extractfile(tarinfos[0]) # read the first member
|
||||
with self.tar.extractfile(tarinfos[0]) as f: # read the first member
|
||||
self.assertRaises(tarfile.StreamError, f.read)
|
||||
|
||||
def test_compare_members(self):
|
||||
|
@ -1484,12 +1480,9 @@ class AppendTest(unittest.TestCase):
|
|||
with tarfile.open(tarname, encoding="iso8859-1") as src:
|
||||
t = src.getmember("ustar/regtype")
|
||||
t.name = "foo"
|
||||
f = src.extractfile(t)
|
||||
try:
|
||||
with src.extractfile(t) as f:
|
||||
with tarfile.open(self.tarname, mode) as tar:
|
||||
tar.addfile(t, f)
|
||||
finally:
|
||||
f.close()
|
||||
|
||||
def _test(self, names=["bar"], fileobj=None):
|
||||
with tarfile.open(self.tarname, fileobj=fileobj) as tar:
|
||||
|
|
Loading…
Reference in New Issue