diff --git a/Lib/tarfile.py b/Lib/tarfile.py index e2b60532f69..6769066cabd 100755 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -1249,6 +1249,8 @@ class TarInfo(object): length, keyword = match.groups() length = int(length) + if length == 0: + raise InvalidHeaderError("invalid header") value = buf[match.end(2) + 1:match.start(1) + length - 1] # Normally, we could just use "utf-8" as the encoding and "strict" diff --git a/Lib/test/recursion.tar b/Lib/test/recursion.tar new file mode 100644 index 00000000000..b8237251964 Binary files /dev/null and b/Lib/test/recursion.tar differ diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index d60d35b5be0..3ddeb97f526 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -429,6 +429,13 @@ class CommonReadTest(ReadTest): with self.assertRaisesRegex(tarfile.ReadError, "unexpected end of data"): tar.extractfile(t).read() + def test_length_zero_header(self): + # bpo-39017 (CVE-2019-20907): reading a zero-length header should fail + # with an exception + with self.assertRaisesRegex(tarfile.ReadError, "file could not be opened successfully"): + with tarfile.open(support.findfile('recursion.tar')) as tar: + pass + class MiscReadTestBase(CommonReadTest): def requires_name_attribute(self): pass diff --git a/Misc/NEWS.d/next/Library/2020-07-12-22-16-58.bpo-39017.x3Cg-9.rst b/Misc/NEWS.d/next/Library/2020-07-12-22-16-58.bpo-39017.x3Cg-9.rst new file mode 100644 index 00000000000..ad26676f8b8 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-07-12-22-16-58.bpo-39017.x3Cg-9.rst @@ -0,0 +1 @@ +Avoid infinite loop when reading specially crafted TAR files using the tarfile module (CVE-2019-20907).