Merged revisions 76443 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/trunk ........ r76443 | lars.gustaebel | 2009-11-22 19:30:53 +0100 (Sun, 22 Nov 2009) | 24 lines Issue #6123: Fix opening empty archives and files. (Note that an empty archive is not the same as an empty file. An empty archive contains no members and is correctly terminated with an EOF block full of zeros. An empty file contains no data at all.) The problem was that although tarfile was able to create empty archives, it failed to open them raising a ReadError. On the other hand, tarfile opened empty files without error in most read modes and presented them as empty archives. (However, some modes still raised errors: "r|gz" raised ReadError, but "r:gz" worked, "r:bz2" even raised EOFError.) In order to get a more fine-grained control over the various internal error conditions I now split up the HeaderError exception into a number of meaningful sub-exceptions. This makes it easier in the TarFile.next() method to react to the different conditions in the correct way. The visible change in its behaviour now is that tarfile will open empty archives correctly and raise ReadError consistently for empty files. ........
This commit is contained in:
parent
f08adb36d8
commit
9520a430ef
|
@ -194,7 +194,7 @@ def nti(s):
|
||||||
try:
|
try:
|
||||||
n = int(nts(s, "ascii", "strict") or "0", 8)
|
n = int(nts(s, "ascii", "strict") or "0", 8)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
raise HeaderError("invalid header")
|
raise InvalidHeaderError("invalid header")
|
||||||
else:
|
else:
|
||||||
n = 0
|
n = 0
|
||||||
for i in range(len(s) - 1):
|
for i in range(len(s) - 1):
|
||||||
|
@ -325,8 +325,23 @@ class StreamError(TarError):
|
||||||
"""Exception for unsupported operations on stream-like TarFiles."""
|
"""Exception for unsupported operations on stream-like TarFiles."""
|
||||||
pass
|
pass
|
||||||
class HeaderError(TarError):
|
class HeaderError(TarError):
|
||||||
|
"""Base exception for header errors."""
|
||||||
|
pass
|
||||||
|
class EmptyHeaderError(HeaderError):
|
||||||
|
"""Exception for empty headers."""
|
||||||
|
pass
|
||||||
|
class TruncatedHeaderError(HeaderError):
|
||||||
|
"""Exception for truncated headers."""
|
||||||
|
pass
|
||||||
|
class EOFHeaderError(HeaderError):
|
||||||
|
"""Exception for end of file headers."""
|
||||||
|
pass
|
||||||
|
class InvalidHeaderError(HeaderError):
|
||||||
"""Exception for invalid headers."""
|
"""Exception for invalid headers."""
|
||||||
pass
|
pass
|
||||||
|
class SubsequentHeaderError(HeaderError):
|
||||||
|
"""Exception for missing and invalid extended headers."""
|
||||||
|
pass
|
||||||
|
|
||||||
#---------------------------
|
#---------------------------
|
||||||
# internal stream interface
|
# internal stream interface
|
||||||
|
@ -1171,14 +1186,16 @@ class TarInfo(object):
|
||||||
def frombuf(cls, buf, encoding, errors):
|
def frombuf(cls, buf, encoding, errors):
|
||||||
"""Construct a TarInfo object from a 512 byte bytes object.
|
"""Construct a TarInfo object from a 512 byte bytes object.
|
||||||
"""
|
"""
|
||||||
|
if len(buf) == 0:
|
||||||
|
raise EmptyHeaderError("empty header")
|
||||||
if len(buf) != BLOCKSIZE:
|
if len(buf) != BLOCKSIZE:
|
||||||
raise HeaderError("truncated header")
|
raise TruncatedHeaderError("truncated header")
|
||||||
if buf.count(NUL) == BLOCKSIZE:
|
if buf.count(NUL) == BLOCKSIZE:
|
||||||
raise HeaderError("empty header")
|
raise EOFHeaderError("end of file header")
|
||||||
|
|
||||||
chksum = nti(buf[148:156])
|
chksum = nti(buf[148:156])
|
||||||
if chksum not in calc_chksums(buf):
|
if chksum not in calc_chksums(buf):
|
||||||
raise HeaderError("bad checksum")
|
raise InvalidHeaderError("bad checksum")
|
||||||
|
|
||||||
obj = cls()
|
obj = cls()
|
||||||
obj.name = nts(buf[0:100], encoding, errors)
|
obj.name = nts(buf[0:100], encoding, errors)
|
||||||
|
@ -1234,8 +1251,6 @@ class TarInfo(object):
|
||||||
tarfile.
|
tarfile.
|
||||||
"""
|
"""
|
||||||
buf = tarfile.fileobj.read(BLOCKSIZE)
|
buf = tarfile.fileobj.read(BLOCKSIZE)
|
||||||
if not buf:
|
|
||||||
return
|
|
||||||
obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
|
obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
|
||||||
obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
|
obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
|
||||||
return obj._proc_member(tarfile)
|
return obj._proc_member(tarfile)
|
||||||
|
@ -1288,9 +1303,10 @@ class TarInfo(object):
|
||||||
buf = tarfile.fileobj.read(self._block(self.size))
|
buf = tarfile.fileobj.read(self._block(self.size))
|
||||||
|
|
||||||
# Fetch the next header and process it.
|
# Fetch the next header and process it.
|
||||||
next = self.fromtarfile(tarfile)
|
try:
|
||||||
if next is None:
|
next = self.fromtarfile(tarfile)
|
||||||
raise HeaderError("missing subsequent header")
|
except HeaderError:
|
||||||
|
raise SubsequentHeaderError("missing or bad subsequent header")
|
||||||
|
|
||||||
# Patch the TarInfo object from the next header with
|
# Patch the TarInfo object from the next header with
|
||||||
# the longname information.
|
# the longname information.
|
||||||
|
@ -1380,12 +1396,12 @@ class TarInfo(object):
|
||||||
pos += length
|
pos += length
|
||||||
|
|
||||||
# Fetch the next header.
|
# Fetch the next header.
|
||||||
next = self.fromtarfile(tarfile)
|
try:
|
||||||
|
next = self.fromtarfile(tarfile)
|
||||||
|
except HeaderError:
|
||||||
|
raise SubsequentHeaderError("missing or bad subsequent header")
|
||||||
|
|
||||||
if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
|
if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
|
||||||
if next is None:
|
|
||||||
raise HeaderError("missing subsequent header")
|
|
||||||
|
|
||||||
# Patch the TarInfo object with the extended header info.
|
# Patch the TarInfo object with the extended header info.
|
||||||
next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
|
next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
|
||||||
next.offset = self.offset
|
next.offset = self.offset
|
||||||
|
@ -1557,12 +1573,16 @@ class TarFile(object):
|
||||||
if self.mode == "a":
|
if self.mode == "a":
|
||||||
# Move to the end of the archive,
|
# Move to the end of the archive,
|
||||||
# before the first empty block.
|
# before the first empty block.
|
||||||
self.firstmember = None
|
|
||||||
while True:
|
while True:
|
||||||
if self.next() is None:
|
self.fileobj.seek(self.offset)
|
||||||
if self.offset > 0:
|
try:
|
||||||
self.fileobj.seek(self.fileobj.tell() - BLOCKSIZE)
|
tarinfo = self.tarinfo.fromtarfile(self)
|
||||||
|
self.members.append(tarinfo)
|
||||||
|
except EOFHeaderError:
|
||||||
|
self.fileobj.seek(self.offset)
|
||||||
break
|
break
|
||||||
|
except HeaderError as e:
|
||||||
|
raise ReadError(str(e))
|
||||||
|
|
||||||
if self.mode in "aw":
|
if self.mode in "aw":
|
||||||
self._loaded = True
|
self._loaded = True
|
||||||
|
@ -1715,7 +1735,7 @@ class TarFile(object):
|
||||||
|
|
||||||
try:
|
try:
|
||||||
t = cls.taropen(name, mode, fileobj, **kwargs)
|
t = cls.taropen(name, mode, fileobj, **kwargs)
|
||||||
except IOError:
|
except (IOError, EOFError):
|
||||||
raise ReadError("not a bzip2 file")
|
raise ReadError("not a bzip2 file")
|
||||||
t._extfileobj = False
|
t._extfileobj = False
|
||||||
return t
|
return t
|
||||||
|
@ -2287,24 +2307,37 @@ class TarFile(object):
|
||||||
|
|
||||||
# Read the next block.
|
# Read the next block.
|
||||||
self.fileobj.seek(self.offset)
|
self.fileobj.seek(self.offset)
|
||||||
|
tarinfo = None
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
tarinfo = self.tarinfo.fromtarfile(self)
|
tarinfo = self.tarinfo.fromtarfile(self)
|
||||||
if tarinfo is None:
|
except EOFHeaderError as e:
|
||||||
return
|
|
||||||
self.members.append(tarinfo)
|
|
||||||
|
|
||||||
except HeaderError as e:
|
|
||||||
if self.ignore_zeros:
|
if self.ignore_zeros:
|
||||||
self._dbg(2, "0x%X: %s" % (self.offset, e))
|
self._dbg(2, "0x%X: %s" % (self.offset, e))
|
||||||
self.offset += BLOCKSIZE
|
self.offset += BLOCKSIZE
|
||||||
continue
|
continue
|
||||||
else:
|
except InvalidHeaderError as e:
|
||||||
if self.offset == 0:
|
if self.ignore_zeros:
|
||||||
raise ReadError(str(e))
|
self._dbg(2, "0x%X: %s" % (self.offset, e))
|
||||||
return None
|
self.offset += BLOCKSIZE
|
||||||
|
continue
|
||||||
|
elif self.offset == 0:
|
||||||
|
raise ReadError(str(e))
|
||||||
|
except EmptyHeaderError:
|
||||||
|
if self.offset == 0:
|
||||||
|
raise ReadError("empty file")
|
||||||
|
except TruncatedHeaderError as e:
|
||||||
|
if self.offset == 0:
|
||||||
|
raise ReadError(str(e))
|
||||||
|
except SubsequentHeaderError as e:
|
||||||
|
raise ReadError(str(e))
|
||||||
break
|
break
|
||||||
|
|
||||||
|
if tarinfo is not None:
|
||||||
|
self.members.append(tarinfo)
|
||||||
|
else:
|
||||||
|
self._loaded = True
|
||||||
|
|
||||||
return tarinfo
|
return tarinfo
|
||||||
|
|
||||||
#--------------------------------------------------------------------------
|
#--------------------------------------------------------------------------
|
||||||
|
|
|
@ -135,7 +135,53 @@ class UstarReadTest(ReadTest):
|
||||||
fobj.close()
|
fobj.close()
|
||||||
|
|
||||||
|
|
||||||
class MiscReadTest(ReadTest):
|
class CommonReadTest(ReadTest):
|
||||||
|
|
||||||
|
def test_empty_tarfile(self):
|
||||||
|
# Test for issue6123: Allow opening empty archives.
|
||||||
|
# This test checks if tarfile.open() is able to open an empty tar
|
||||||
|
# archive successfully. Note that an empty tar archive is not the
|
||||||
|
# same as an empty file!
|
||||||
|
tarfile.open(tmpname, self.mode.replace("r", "w")).close()
|
||||||
|
try:
|
||||||
|
tar = tarfile.open(tmpname, self.mode)
|
||||||
|
tar.getnames()
|
||||||
|
except tarfile.ReadError:
|
||||||
|
self.fail("tarfile.open() failed on empty archive")
|
||||||
|
self.assertListEqual(tar.getmembers(), [])
|
||||||
|
|
||||||
|
def test_null_tarfile(self):
|
||||||
|
# Test for issue6123: Allow opening empty archives.
|
||||||
|
# This test guarantees that tarfile.open() does not treat an empty
|
||||||
|
# file as an empty tar archive.
|
||||||
|
open(tmpname, "wb").close()
|
||||||
|
self.assertRaises(tarfile.ReadError, tarfile.open, tmpname, self.mode)
|
||||||
|
self.assertRaises(tarfile.ReadError, tarfile.open, tmpname)
|
||||||
|
|
||||||
|
def test_ignore_zeros(self):
|
||||||
|
# Test TarFile's ignore_zeros option.
|
||||||
|
if self.mode.endswith(":gz"):
|
||||||
|
_open = gzip.GzipFile
|
||||||
|
elif self.mode.endswith(":bz2"):
|
||||||
|
_open = bz2.BZ2File
|
||||||
|
else:
|
||||||
|
_open = open
|
||||||
|
|
||||||
|
for char in (b'\0', b'a'):
|
||||||
|
# Test if EOFHeaderError ('\0') and InvalidHeaderError ('a')
|
||||||
|
# are ignored correctly.
|
||||||
|
fobj = _open(tmpname, "wb")
|
||||||
|
fobj.write(char * 1024)
|
||||||
|
fobj.write(tarfile.TarInfo("foo").tobuf())
|
||||||
|
fobj.close()
|
||||||
|
|
||||||
|
tar = tarfile.open(tmpname, mode="r", ignore_zeros=True)
|
||||||
|
self.assertListEqual(tar.getnames(), ["foo"],
|
||||||
|
"ignore_zeros=True should have skipped the %r-blocks" % char)
|
||||||
|
tar.close()
|
||||||
|
|
||||||
|
|
||||||
|
class MiscReadTest(CommonReadTest):
|
||||||
|
|
||||||
def test_no_name_argument(self):
|
def test_no_name_argument(self):
|
||||||
fobj = open(self.tarname, "rb")
|
fobj = open(self.tarname, "rb")
|
||||||
|
@ -264,7 +310,7 @@ class MiscReadTest(ReadTest):
|
||||||
tar.close()
|
tar.close()
|
||||||
|
|
||||||
|
|
||||||
class StreamReadTest(ReadTest):
|
class StreamReadTest(CommonReadTest):
|
||||||
|
|
||||||
mode="r|"
|
mode="r|"
|
||||||
|
|
||||||
|
@ -1079,12 +1125,12 @@ class AppendTest(unittest.TestCase):
|
||||||
self._test()
|
self._test()
|
||||||
|
|
||||||
def test_empty(self):
|
def test_empty(self):
|
||||||
open(self.tarname, "w").close()
|
tarfile.open(self.tarname, "w:").close()
|
||||||
self._add_testfile()
|
self._add_testfile()
|
||||||
self._test()
|
self._test()
|
||||||
|
|
||||||
def test_empty_fileobj(self):
|
def test_empty_fileobj(self):
|
||||||
fobj = io.BytesIO()
|
fobj = io.BytesIO(b"\0" * 1024)
|
||||||
self._add_testfile(fobj)
|
self._add_testfile(fobj)
|
||||||
fobj.seek(0)
|
fobj.seek(0)
|
||||||
self._test(fileobj=fobj)
|
self._test(fileobj=fobj)
|
||||||
|
@ -1114,6 +1160,29 @@ class AppendTest(unittest.TestCase):
|
||||||
self._create_testtar("w:bz2")
|
self._create_testtar("w:bz2")
|
||||||
self.assertRaises(tarfile.ReadError, tarfile.open, tmpname, "a")
|
self.assertRaises(tarfile.ReadError, tarfile.open, tmpname, "a")
|
||||||
|
|
||||||
|
# Append mode is supposed to fail if the tarfile to append to
|
||||||
|
# does not end with a zero block.
|
||||||
|
def _test_error(self, data):
|
||||||
|
open(self.tarname, "wb").write(data)
|
||||||
|
self.assertRaises(tarfile.ReadError, self._add_testfile)
|
||||||
|
|
||||||
|
def test_null(self):
|
||||||
|
self._test_error(b"")
|
||||||
|
|
||||||
|
def test_incomplete(self):
|
||||||
|
self._test_error(b"\0" * 13)
|
||||||
|
|
||||||
|
def test_premature_eof(self):
|
||||||
|
data = tarfile.TarInfo("foo").tobuf()
|
||||||
|
self._test_error(data)
|
||||||
|
|
||||||
|
def test_trailing_garbage(self):
|
||||||
|
data = tarfile.TarInfo("foo").tobuf()
|
||||||
|
self._test_error(data + b"\0" * 13)
|
||||||
|
|
||||||
|
def test_invalid(self):
|
||||||
|
self._test_error(b"a" * 512)
|
||||||
|
|
||||||
|
|
||||||
class LimitsTest(unittest.TestCase):
|
class LimitsTest(unittest.TestCase):
|
||||||
|
|
||||||
|
@ -1228,10 +1297,16 @@ class Bz2PartialReadTest(unittest.TestCase):
|
||||||
raise AssertionError("infinite loop detected in tarfile.open()")
|
raise AssertionError("infinite loop detected in tarfile.open()")
|
||||||
self.hit_eof = self.tell() == len(self.getvalue())
|
self.hit_eof = self.tell() == len(self.getvalue())
|
||||||
return super(MyBytesIO, self).read(n)
|
return super(MyBytesIO, self).read(n)
|
||||||
|
def seek(self, *args):
|
||||||
|
self.hit_eof = False
|
||||||
|
return super(MyBytesIO, self).seek(*args)
|
||||||
|
|
||||||
data = bz2.compress(tarfile.TarInfo("foo").tobuf())
|
data = bz2.compress(tarfile.TarInfo("foo").tobuf())
|
||||||
for x in range(len(data) + 1):
|
for x in range(len(data) + 1):
|
||||||
tarfile.open(fileobj=MyBytesIO(data[:x]), mode=mode)
|
try:
|
||||||
|
tarfile.open(fileobj=MyBytesIO(data[:x]), mode=mode)
|
||||||
|
except tarfile.ReadError:
|
||||||
|
pass # we have no interest in ReadErrors
|
||||||
|
|
||||||
def test_partial_input(self):
|
def test_partial_input(self):
|
||||||
self._test_partial_input("r")
|
self._test_partial_input("r")
|
||||||
|
|
|
@ -140,6 +140,9 @@ C-API
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Issue #6123: tarfile now opens empty archives correctly and consistently
|
||||||
|
raises ReadError on empty files.
|
||||||
|
|
||||||
- Issue #7354: distutils.tests.test_msvc9compiler - dragfullwindows can
|
- Issue #7354: distutils.tests.test_msvc9compiler - dragfullwindows can
|
||||||
be 2.
|
be 2.
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue