bpo-32072: Fix issues with binary plists. (GH-4455) (#4654)
* Fixed saving bytearrays.
* Identical objects will be saved only once.
* Equal references will be load as identical objects.
* Added support for saving and loading recursive data structures.
(cherry picked from commit a897aeeef6
)
This commit is contained in:
parent
c91bf742e5
commit
8cd31082fb
|
@ -590,6 +590,8 @@ class InvalidFileException (ValueError):
|
|||
|
||||
_BINARY_FORMAT = {1: 'B', 2: 'H', 4: 'L', 8: 'Q'}
|
||||
|
||||
_undefined = object()
|
||||
|
||||
class _BinaryPlistParser:
|
||||
"""
|
||||
Read or write a binary plist file, following the description of the binary
|
||||
|
@ -620,7 +622,8 @@ class _BinaryPlistParser:
|
|||
) = struct.unpack('>6xBBQQQ', trailer)
|
||||
self._fp.seek(offset_table_offset)
|
||||
self._object_offsets = self._read_ints(num_objects, offset_size)
|
||||
return self._read_object(self._object_offsets[top_object])
|
||||
self._objects = [_undefined] * num_objects
|
||||
return self._read_object(top_object)
|
||||
|
||||
except (OSError, IndexError, struct.error, OverflowError,
|
||||
UnicodeDecodeError):
|
||||
|
@ -649,62 +652,68 @@ class _BinaryPlistParser:
|
|||
def _read_refs(self, n):
|
||||
return self._read_ints(n, self._ref_size)
|
||||
|
||||
def _read_object(self, offset):
|
||||
def _read_object(self, ref):
|
||||
"""
|
||||
read the object at offset.
|
||||
read the object by reference.
|
||||
|
||||
May recursively read sub-objects (content of an array/dict/set)
|
||||
"""
|
||||
result = self._objects[ref]
|
||||
if result is not _undefined:
|
||||
return result
|
||||
|
||||
offset = self._object_offsets[ref]
|
||||
self._fp.seek(offset)
|
||||
token = self._fp.read(1)[0]
|
||||
tokenH, tokenL = token & 0xF0, token & 0x0F
|
||||
|
||||
if token == 0x00:
|
||||
return None
|
||||
result = None
|
||||
|
||||
elif token == 0x08:
|
||||
return False
|
||||
result = False
|
||||
|
||||
elif token == 0x09:
|
||||
return True
|
||||
result = True
|
||||
|
||||
# The referenced source code also mentions URL (0x0c, 0x0d) and
|
||||
# UUID (0x0e), but neither can be generated using the Cocoa libraries.
|
||||
|
||||
elif token == 0x0f:
|
||||
return b''
|
||||
result = b''
|
||||
|
||||
elif tokenH == 0x10: # int
|
||||
return int.from_bytes(self._fp.read(1 << tokenL),
|
||||
'big', signed=tokenL >= 3)
|
||||
result = int.from_bytes(self._fp.read(1 << tokenL),
|
||||
'big', signed=tokenL >= 3)
|
||||
|
||||
elif token == 0x22: # real
|
||||
return struct.unpack('>f', self._fp.read(4))[0]
|
||||
result = struct.unpack('>f', self._fp.read(4))[0]
|
||||
|
||||
elif token == 0x23: # real
|
||||
return struct.unpack('>d', self._fp.read(8))[0]
|
||||
result = struct.unpack('>d', self._fp.read(8))[0]
|
||||
|
||||
elif token == 0x33: # date
|
||||
f = struct.unpack('>d', self._fp.read(8))[0]
|
||||
# timestamp 0 of binary plists corresponds to 1/1/2001
|
||||
# (year of Mac OS X 10.0), instead of 1/1/1970.
|
||||
return datetime.datetime(2001, 1, 1) + datetime.timedelta(seconds=f)
|
||||
result = (datetime.datetime(2001, 1, 1) +
|
||||
datetime.timedelta(seconds=f))
|
||||
|
||||
elif tokenH == 0x40: # data
|
||||
s = self._get_size(tokenL)
|
||||
if self._use_builtin_types:
|
||||
return self._fp.read(s)
|
||||
result = self._fp.read(s)
|
||||
else:
|
||||
return Data(self._fp.read(s))
|
||||
result = Data(self._fp.read(s))
|
||||
|
||||
elif tokenH == 0x50: # ascii string
|
||||
s = self._get_size(tokenL)
|
||||
result = self._fp.read(s).decode('ascii')
|
||||
return result
|
||||
result = result
|
||||
|
||||
elif tokenH == 0x60: # unicode string
|
||||
s = self._get_size(tokenL)
|
||||
return self._fp.read(s * 2).decode('utf-16be')
|
||||
result = self._fp.read(s * 2).decode('utf-16be')
|
||||
|
||||
# tokenH == 0x80 is documented as 'UID' and appears to be used for
|
||||
# keyed-archiving, not in plists.
|
||||
|
@ -712,8 +721,9 @@ class _BinaryPlistParser:
|
|||
elif tokenH == 0xA0: # array
|
||||
s = self._get_size(tokenL)
|
||||
obj_refs = self._read_refs(s)
|
||||
return [self._read_object(self._object_offsets[x])
|
||||
for x in obj_refs]
|
||||
result = []
|
||||
self._objects[ref] = result
|
||||
result.extend(self._read_object(x) for x in obj_refs)
|
||||
|
||||
# tokenH == 0xB0 is documented as 'ordset', but is not actually
|
||||
# implemented in the Apple reference code.
|
||||
|
@ -726,12 +736,15 @@ class _BinaryPlistParser:
|
|||
key_refs = self._read_refs(s)
|
||||
obj_refs = self._read_refs(s)
|
||||
result = self._dict_type()
|
||||
self._objects[ref] = result
|
||||
for k, o in zip(key_refs, obj_refs):
|
||||
result[self._read_object(self._object_offsets[k])
|
||||
] = self._read_object(self._object_offsets[o])
|
||||
return result
|
||||
result[self._read_object(k)] = self._read_object(o)
|
||||
|
||||
raise InvalidFileException()
|
||||
else:
|
||||
raise InvalidFileException()
|
||||
|
||||
self._objects[ref] = result
|
||||
return result
|
||||
|
||||
def _count_to_size(count):
|
||||
if count < 1 << 8:
|
||||
|
@ -746,6 +759,8 @@ def _count_to_size(count):
|
|||
else:
|
||||
return 8
|
||||
|
||||
_scalars = (str, int, float, datetime.datetime, bytes)
|
||||
|
||||
class _BinaryPlistWriter (object):
|
||||
def __init__(self, fp, sort_keys, skipkeys):
|
||||
self._fp = fp
|
||||
|
@ -801,8 +816,7 @@ class _BinaryPlistWriter (object):
|
|||
# First check if the object is in the object table, not used for
|
||||
# containers to ensure that two subcontainers with the same contents
|
||||
# will be serialized as distinct values.
|
||||
if isinstance(value, (
|
||||
str, int, float, datetime.datetime, bytes, bytearray)):
|
||||
if isinstance(value, _scalars):
|
||||
if (type(value), value) in self._objtable:
|
||||
return
|
||||
|
||||
|
@ -810,15 +824,17 @@ class _BinaryPlistWriter (object):
|
|||
if (type(value.data), value.data) in self._objtable:
|
||||
return
|
||||
|
||||
elif id(value) in self._objidtable:
|
||||
return
|
||||
|
||||
# Add to objectreference map
|
||||
refnum = len(self._objlist)
|
||||
self._objlist.append(value)
|
||||
try:
|
||||
if isinstance(value, Data):
|
||||
self._objtable[(type(value.data), value.data)] = refnum
|
||||
else:
|
||||
self._objtable[(type(value), value)] = refnum
|
||||
except TypeError:
|
||||
if isinstance(value, _scalars):
|
||||
self._objtable[(type(value), value)] = refnum
|
||||
elif isinstance(value, Data):
|
||||
self._objtable[(type(value.data), value.data)] = refnum
|
||||
else:
|
||||
self._objidtable[id(value)] = refnum
|
||||
|
||||
# And finally recurse into containers
|
||||
|
@ -845,12 +861,11 @@ class _BinaryPlistWriter (object):
|
|||
self._flatten(o)
|
||||
|
||||
def _getrefnum(self, value):
|
||||
try:
|
||||
if isinstance(value, Data):
|
||||
return self._objtable[(type(value.data), value.data)]
|
||||
else:
|
||||
return self._objtable[(type(value), value)]
|
||||
except TypeError:
|
||||
if isinstance(value, _scalars):
|
||||
return self._objtable[(type(value), value)]
|
||||
elif isinstance(value, Data):
|
||||
return self._objtable[(type(value.data), value.data)]
|
||||
else:
|
||||
return self._objidtable[id(value)]
|
||||
|
||||
def _write_size(self, token, size):
|
||||
|
|
|
@ -169,6 +169,17 @@ class TestPlistlib(unittest.TestCase):
|
|||
self.assertRaises(OverflowError, plistlib.dumps,
|
||||
pl, fmt=fmt)
|
||||
|
||||
def test_bytearray(self):
|
||||
for pl in (b'<binary gunk>', b"<lots of binary gunk>\0\1\2\3" * 10):
|
||||
for fmt in ALL_FORMATS:
|
||||
with self.subTest(pl=pl, fmt=fmt):
|
||||
data = plistlib.dumps(bytearray(pl), fmt=fmt)
|
||||
pl2 = plistlib.loads(data)
|
||||
self.assertIsInstance(pl2, bytes)
|
||||
self.assertEqual(pl2, pl)
|
||||
data2 = plistlib.dumps(pl2, fmt=fmt)
|
||||
self.assertEqual(data, data2)
|
||||
|
||||
def test_bytes(self):
|
||||
pl = self._create()
|
||||
data = plistlib.dumps(pl)
|
||||
|
@ -432,6 +443,9 @@ class TestPlistlib(unittest.TestCase):
|
|||
pl2 = plistlib.loads(data)
|
||||
self.assertEqual(dict(pl), dict(pl2))
|
||||
|
||||
|
||||
class TestBinaryPlistlib(unittest.TestCase):
|
||||
|
||||
def test_nonstandard_refs_size(self):
|
||||
# Issue #21538: Refs and offsets are 24-bit integers
|
||||
data = (b'bplist00'
|
||||
|
@ -444,6 +458,47 @@ class TestPlistlib(unittest.TestCase):
|
|||
b'\x00\x00\x00\x00\x00\x00\x00\x13')
|
||||
self.assertEqual(plistlib.loads(data), {'a': 'b'})
|
||||
|
||||
def test_dump_duplicates(self):
|
||||
# Test effectiveness of saving duplicated objects
|
||||
for x in (None, False, True, 12345, 123.45, 'abcde', b'abcde',
|
||||
datetime.datetime(2004, 10, 26, 10, 33, 33),
|
||||
plistlib.Data(b'abcde'), bytearray(b'abcde'),
|
||||
[12, 345], (12, 345), {'12': 345}):
|
||||
with self.subTest(x=x):
|
||||
data = plistlib.dumps([x]*1000, fmt=plistlib.FMT_BINARY)
|
||||
self.assertLess(len(data), 1100, repr(data))
|
||||
|
||||
def test_identity(self):
|
||||
for x in (None, False, True, 12345, 123.45, 'abcde', b'abcde',
|
||||
datetime.datetime(2004, 10, 26, 10, 33, 33),
|
||||
plistlib.Data(b'abcde'), bytearray(b'abcde'),
|
||||
[12, 345], (12, 345), {'12': 345}):
|
||||
with self.subTest(x=x):
|
||||
data = plistlib.dumps([x]*2, fmt=plistlib.FMT_BINARY)
|
||||
a, b = plistlib.loads(data)
|
||||
if isinstance(x, tuple):
|
||||
x = list(x)
|
||||
self.assertEqual(a, x)
|
||||
self.assertEqual(b, x)
|
||||
self.assertIs(a, b)
|
||||
|
||||
def test_cycles(self):
|
||||
# recursive list
|
||||
a = []
|
||||
a.append(a)
|
||||
b = plistlib.loads(plistlib.dumps(a, fmt=plistlib.FMT_BINARY))
|
||||
self.assertIs(b[0], b)
|
||||
# recursive tuple
|
||||
a = ([],)
|
||||
a[0].append(a)
|
||||
b = plistlib.loads(plistlib.dumps(a, fmt=plistlib.FMT_BINARY))
|
||||
self.assertIs(b[0][0], b)
|
||||
# recursive dict
|
||||
a = {}
|
||||
a['x'] = a
|
||||
b = plistlib.loads(plistlib.dumps(a, fmt=plistlib.FMT_BINARY))
|
||||
self.assertIs(b['x'], b)
|
||||
|
||||
def test_large_timestamp(self):
|
||||
# Issue #26709: 32-bit timestamp out of range
|
||||
for ts in -2**31-1, 2**31:
|
||||
|
|
|
@ -0,0 +1,6 @@
|
|||
Fixed issues with binary plists:
|
||||
|
||||
* Fixed saving bytearrays.
|
||||
* Identical objects will be saved only once.
|
||||
* Equal references will be load as identical objects.
|
||||
* Added support for saving and loading recursive data structures.
|
Loading…
Reference in New Issue