bpo-41316: Make tarfile follow specs for FNAME (GH-21511)

tarfile writes full path to FNAME field of GZIP format instead of just basename if user specified absolute path. Some archive viewers may process file incorrectly. Also it creates security issue because anyone can know structure of directories on system and know username or other personal information.

RFC1952 says about FNAME:
This is the original name of the file being compressed, with any directory components removed.

So tarfile must remove directory names from FNAME and write only basename of file.

Automerge-Triggered-By: @jaraco
This commit is contained in:
Artem Bulgakov 2020-09-07 19:46:33 +03:00 committed by GitHub
parent fd4cafd470
commit 22748a83d9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 17 additions and 1 deletions

View File

@ -420,6 +420,8 @@ class _Stream:
self.__write(b"\037\213\010\010" + timestamp + b"\002\377") self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
if self.name.endswith(".gz"): if self.name.endswith(".gz"):
self.name = self.name[:-3] self.name = self.name[:-3]
# Honor "directory components removed" from RFC1952
self.name = os.path.basename(self.name)
# RFC1952 says we must use ISO-8859-1 for the FNAME field. # RFC1952 says we must use ISO-8859-1 for the FNAME field.
self.__write(self.name.encode("iso-8859-1", "replace") + NUL) self.__write(self.name.encode("iso-8859-1", "replace") + NUL)

View File

@ -1417,12 +1417,15 @@ class WriteTest(WriteTestBase, unittest.TestCase):
pax_headers={'non': 'empty'}) pax_headers={'non': 'empty'})
self.assertFalse(f.closed) self.assertFalse(f.closed)
class GzipWriteTest(GzipTest, WriteTest): class GzipWriteTest(GzipTest, WriteTest):
pass pass
class Bz2WriteTest(Bz2Test, WriteTest): class Bz2WriteTest(Bz2Test, WriteTest):
pass pass
class LzmaWriteTest(LzmaTest, WriteTest): class LzmaWriteTest(LzmaTest, WriteTest):
pass pass
@ -1465,8 +1468,17 @@ class StreamWriteTest(WriteTestBase, unittest.TestCase):
finally: finally:
os.umask(original_umask) os.umask(original_umask)
class GzipStreamWriteTest(GzipTest, StreamWriteTest): class GzipStreamWriteTest(GzipTest, StreamWriteTest):
pass def test_source_directory_not_leaked(self):
"""
Ensure the source directory is not included in the tar header
per bpo-41316.
"""
tarfile.open(tmpname, self.mode).close()
payload = pathlib.Path(tmpname).read_text(encoding='latin-1')
assert os.path.dirname(tmpname) not in payload
class Bz2StreamWriteTest(Bz2Test, StreamWriteTest): class Bz2StreamWriteTest(Bz2Test, StreamWriteTest):
decompressor = bz2.BZ2Decompressor if bz2 else None decompressor = bz2.BZ2Decompressor if bz2 else None

View File

@ -242,6 +242,7 @@ Colm Buckley
Erik de Bueger Erik de Bueger
Jan-Hein Bührman Jan-Hein Bührman
Lars Buitinck Lars Buitinck
Artem Bulgakov
Dick Bulterman Dick Bulterman
Bill Bumgarner Bill Bumgarner
Jimmy Burgett Jimmy Burgett

View File

@ -0,0 +1 @@
Fix the :mod:`tarfile` module to write only basename of TAR file to GZIP compression header.