Issue #19920: TarFile.list() no longer fails when outputs a listing

containing non-encodable characters.  Added tests for TarFile.list().
Based on patch by Vajrasky Kok.
This commit is contained in:
Serhiy Storchaka 2014-02-05 20:53:36 +02:00
parent 1812bd44d5
commit 3b4f1594ff
3 changed files with 100 additions and 11 deletions

View File

@ -281,6 +281,12 @@ def filemode(mode):
DeprecationWarning, 2) DeprecationWarning, 2)
return stat.filemode(mode) return stat.filemode(mode)
def _safe_print(s):
encoding = getattr(sys.stdout, 'encoding', None)
if encoding is not None:
s = s.encode(encoding, 'backslashreplace').decode(encoding)
print(s, end=' ')
class TarError(Exception): class TarError(Exception):
"""Base exception.""" """Base exception."""
@ -1870,24 +1876,24 @@ class TarFile(object):
for tarinfo in self: for tarinfo in self:
if verbose: if verbose:
print(stat.filemode(tarinfo.mode), end=' ') _safe_print(stat.filemode(tarinfo.mode))
print("%s/%s" % (tarinfo.uname or tarinfo.uid, _safe_print("%s/%s" % (tarinfo.uname or tarinfo.uid,
tarinfo.gname or tarinfo.gid), end=' ') tarinfo.gname or tarinfo.gid))
if tarinfo.ischr() or tarinfo.isblk(): if tarinfo.ischr() or tarinfo.isblk():
print("%10s" % ("%d,%d" \ _safe_print("%10s" %
% (tarinfo.devmajor, tarinfo.devminor)), end=' ') ("%d,%d" % (tarinfo.devmajor, tarinfo.devminor)))
else: else:
print("%10d" % tarinfo.size, end=' ') _safe_print("%10d" % tarinfo.size)
print("%d-%02d-%02d %02d:%02d:%02d" \ _safe_print("%d-%02d-%02d %02d:%02d:%02d" \
% time.localtime(tarinfo.mtime)[:6], end=' ') % time.localtime(tarinfo.mtime)[:6])
print(tarinfo.name + ("/" if tarinfo.isdir() else ""), end=' ') _safe_print(tarinfo.name + ("/" if tarinfo.isdir() else ""))
if verbose: if verbose:
if tarinfo.issym(): if tarinfo.issym():
print("->", tarinfo.linkname, end=' ') _safe_print("-> " + tarinfo.linkname)
if tarinfo.islnk(): if tarinfo.islnk():
print("link to", tarinfo.linkname, end=' ') _safe_print("link to " + tarinfo.linkname)
print() print()
def add(self, name, arcname=None, recursive=True, exclude=None, *, filter=None): def add(self, name, arcname=None, recursive=True, exclude=None, *, filter=None):

View File

@ -217,6 +217,84 @@ class LzmaUstarReadTest(LzmaTest, UstarReadTest):
pass pass
class ListTest(ReadTest, unittest.TestCase):
# Override setUp to use default encoding (UTF-8)
def setUp(self):
self.tar = tarfile.open(self.tarname, mode=self.mode)
def test_list(self):
tio = io.TextIOWrapper(io.BytesIO(), 'ascii', newline='\n')
with support.swap_attr(sys, 'stdout', tio):
self.tar.list(verbose=False)
out = tio.detach().getvalue()
self.assertIn(b'ustar/conttype', out)
self.assertIn(b'ustar/regtype', out)
self.assertIn(b'ustar/lnktype', out)
self.assertIn(b'ustar' + (b'/12345' * 40) + b'67/longname', out)
self.assertIn(b'./ustar/linktest2/symtype', out)
self.assertIn(b'./ustar/linktest2/lnktype', out)
# Make sure it puts trailing slash for directory
self.assertIn(b'ustar/dirtype/', out)
self.assertIn(b'ustar/dirtype-with-size/', out)
# Make sure it is able to print unencodable characters
self.assertIn(br'ustar/umlauts-'
br'\udcc4\udcd6\udcdc\udce4\udcf6\udcfc\udcdf', out)
self.assertIn(br'misc/regtype-hpux-signed-chksum-'
br'\udcc4\udcd6\udcdc\udce4\udcf6\udcfc\udcdf', out)
self.assertIn(br'misc/regtype-old-v7-signed-chksum-'
br'\udcc4\udcd6\udcdc\udce4\udcf6\udcfc\udcdf', out)
self.assertIn(br'pax/bad-pax-\udce4\udcf6\udcfc', out)
self.assertIn(br'pax/hdrcharset-\udce4\udcf6\udcfc', out)
# Make sure it prints files separated by one newline without any
# 'ls -l'-like accessories if verbose flag is not being used
# ...
# ustar/conttype
# ustar/regtype
# ...
self.assertRegex(out, br'ustar/conttype ?\r?\n'
br'ustar/regtype ?\r?\n')
# Make sure it does not print the source of link without verbose flag
self.assertNotIn(b'link to', out)
self.assertNotIn(b'->', out)
def test_list_verbose(self):
tio = io.TextIOWrapper(io.BytesIO(), 'ascii', newline='\n')
with support.swap_attr(sys, 'stdout', tio):
self.tar.list(verbose=True)
out = tio.detach().getvalue()
# Make sure it prints files separated by one newline with 'ls -l'-like
# accessories if verbose flag is being used
# ...
# ?rw-r--r-- tarfile/tarfile 7011 2003-01-06 07:19:43 ustar/conttype
# ?rw-r--r-- tarfile/tarfile 7011 2003-01-06 07:19:43 ustar/regtype
# ...
self.assertRegex(out, (br'-rw-r--r-- tarfile/tarfile\s+7011 '
br'\d{4}-\d\d-\d\d\s+\d\d:\d\d:\d\d '
br'ustar/\w+type ?\r?\n') * 2)
# Make sure it prints the source of link with verbose flag
self.assertIn(b'ustar/symtype -> regtype', out)
self.assertIn(b'./ustar/linktest2/symtype -> ../linktest1/regtype', out)
self.assertIn(b'./ustar/linktest2/lnktype link to '
b'./ustar/linktest1/regtype', out)
self.assertIn(b'gnu' + (b'/123' * 125) + b'/longlink link to gnu' +
(b'/123' * 125) + b'/longname', out)
self.assertIn(b'pax' + (b'/123' * 125) + b'/longlink link to pax' +
(b'/123' * 125) + b'/longname', out)
class GzipListTest(GzipTest, ListTest):
pass
class Bz2ListTest(Bz2Test, ListTest):
pass
class LzmaListTest(LzmaTest, ListTest):
pass
class CommonReadTest(ReadTest): class CommonReadTest(ReadTest):
def test_empty_tarfile(self): def test_empty_tarfile(self):

View File

@ -45,6 +45,9 @@ Core and Builtins
Library Library
------- -------
- Issue #19920: TarFile.list() no longer fails when outputs a listing
containing non-encodable characters. Based on patch by Vajrasky Kok.
- Issue #20515: Fix NULL pointer dereference introduced by issue #20368. - Issue #20515: Fix NULL pointer dereference introduced by issue #20368.
- Issue #19186: Restore namespacing of expat symbols inside the pyexpat module. - Issue #19186: Restore namespacing of expat symbols inside the pyexpat module.
@ -321,6 +324,8 @@ IDLE
Tests Tests
----- -----
- Issue #19920: Added tests for TarFile.list(). Based on patch by Vajrasky Kok.
- Issue #19990: Added tests for the imghdr module. Based on patch by - Issue #19990: Added tests for the imghdr module. Based on patch by
Claudiu Popa. Claudiu Popa.