From db91e0fe2417f075693a194a492b1699829871e7 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 31 Oct 2017 14:05:53 +0200 Subject: [PATCH] bpo-31897: Convert unexpected errors when read bogus binary plists into InvalidFileException. (#4171) --- Lib/plistlib.py | 5 +- Lib/test/test_plistlib.py | 68 +++++++++++++++++-- .../2017-10-30-11-04-56.bpo-31897.yjwdEb.rst | 2 + 3 files changed, 70 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2017-10-30-11-04-56.bpo-31897.yjwdEb.rst diff --git a/Lib/plistlib.py b/Lib/plistlib.py index 8262fb049e5..2113a2dc57b 100644 --- a/Lib/plistlib.py +++ b/Lib/plistlib.py @@ -557,7 +557,8 @@ class _BinaryPlistParser: self._object_offsets = self._read_ints(num_objects, offset_size) return self._read_object(self._object_offsets[top_object]) - except (OSError, IndexError, struct.error): + except (OSError, IndexError, struct.error, OverflowError, + UnicodeDecodeError): raise InvalidFileException() def _get_size(self, tokenL): @@ -575,6 +576,8 @@ class _BinaryPlistParser: if size in _BINARY_FORMAT: return struct.unpack('>' + _BINARY_FORMAT[size] * n, data) else: + if not size or len(data) != size * n: + raise InvalidFileException() return tuple(int.from_bytes(data[i: i + size], 'big') for i in range(0, size * n, size)) diff --git a/Lib/test/test_plistlib.py b/Lib/test/test_plistlib.py index 58b885f0228..7dd179a2c71 100644 --- a/Lib/test/test_plistlib.py +++ b/Lib/test/test_plistlib.py @@ -353,11 +353,13 @@ class TestPlistlib(unittest.TestCase): testString = "string containing %s" % c if i >= 32 or c in "\r\n\t": # \r, \n and \t are the only legal control chars in XML - plistlib.dumps(testString, fmt=plistlib.FMT_XML) + data = plistlib.dumps(testString, fmt=plistlib.FMT_XML) + if c != "\r": + self.assertEqual(plistlib.loads(data), testString) else: - self.assertRaises(ValueError, - plistlib.dumps, - testString) + with self.assertRaises(ValueError): + plistlib.dumps(testString, fmt=plistlib.FMT_XML) + plistlib.dumps(testString, fmt=plistlib.FMT_BINARY) def test_non_bmp_characters(self): pl = {'python': '\U0001f40d'} @@ -366,6 +368,14 @@ class TestPlistlib(unittest.TestCase): data = plistlib.dumps(pl, fmt=fmt) self.assertEqual(plistlib.loads(data), pl) + def test_lone_surrogates(self): + for fmt in ALL_FORMATS: + with self.subTest(fmt=fmt): + with self.assertRaises(UnicodeEncodeError): + plistlib.dumps('\ud8ff', fmt=fmt) + with self.assertRaises(UnicodeEncodeError): + plistlib.dumps('\udcff', fmt=fmt) + def test_nondictroot(self): for fmt in ALL_FORMATS: with self.subTest(fmt=fmt): @@ -442,6 +452,56 @@ class TestPlistlib(unittest.TestCase): data = plistlib.dumps(d, fmt=plistlib.FMT_BINARY) self.assertEqual(plistlib.loads(data), d) + def test_invalid_binary(self): + for data in [ + # too short data + b'', + # too large offset_table_offset and nonstandard offset_size + b'\x00\x08' + b'\x00\x00\x00\x00\x00\x00\x03\x01' + b'\x00\x00\x00\x00\x00\x00\x00\x01' + b'\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x2a', + # integer overflow in offset_table_offset + b'\x00\x08' + b'\x00\x00\x00\x00\x00\x00\x01\x01' + b'\x00\x00\x00\x00\x00\x00\x00\x01' + b'\x00\x00\x00\x00\x00\x00\x00\x00' + b'\xff\xff\xff\xff\xff\xff\xff\xff', + # offset_size = 0 + b'\x00\x08' + b'\x00\x00\x00\x00\x00\x00\x00\x01' + b'\x00\x00\x00\x00\x00\x00\x00\x01' + b'\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x09', + # ref_size = 0 + b'\xa1\x01\x00\x08\x0a' + b'\x00\x00\x00\x00\x00\x00\x01\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x02' + b'\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x0b', + # integer overflow in offset + b'\x00\xff\xff\xff\xff\xff\xff\xff\xff' + b'\x00\x00\x00\x00\x00\x00\x08\x01' + b'\x00\x00\x00\x00\x00\x00\x00\x01' + b'\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x09', + # invalid ASCII + b'\x51\xff\x08' + b'\x00\x00\x00\x00\x00\x00\x01\x01' + b'\x00\x00\x00\x00\x00\x00\x00\x01' + b'\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x0a', + # invalid UTF-16 + b'\x61\xd8\x00\x08' + b'\x00\x00\x00\x00\x00\x00\x01\x01' + b'\x00\x00\x00\x00\x00\x00\x00\x01' + b'\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x0b', + ]: + with self.assertRaises(plistlib.InvalidFileException): + plistlib.loads(b'bplist00' + data, fmt=plistlib.FMT_BINARY) + class TestPlistlibDeprecated(unittest.TestCase): def test_io_deprecated(self): diff --git a/Misc/NEWS.d/next/Library/2017-10-30-11-04-56.bpo-31897.yjwdEb.rst b/Misc/NEWS.d/next/Library/2017-10-30-11-04-56.bpo-31897.yjwdEb.rst new file mode 100644 index 00000000000..229472cb489 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2017-10-30-11-04-56.bpo-31897.yjwdEb.rst @@ -0,0 +1,2 @@ +plistlib now catches more errors when read binary plists and raises +InvalidFileException instead of unexpected exceptions.