mirror of https://github.com/python/cpython
gh-120380: fix Python implementation of `pickle.Pickler` for `bytes` and `bytearray` objects in protocol version 5. (GH-120422)
This commit is contained in:
parent
83d3d7aace
commit
7595e6743a
|
@ -782,14 +782,10 @@ class _Pickler:
|
||||||
self.write(FLOAT + repr(obj).encode("ascii") + b'\n')
|
self.write(FLOAT + repr(obj).encode("ascii") + b'\n')
|
||||||
dispatch[float] = save_float
|
dispatch[float] = save_float
|
||||||
|
|
||||||
def save_bytes(self, obj):
|
def _save_bytes_no_memo(self, obj):
|
||||||
if self.proto < 3:
|
# helper for writing bytes objects for protocol >= 3
|
||||||
if not obj: # bytes object is empty
|
# without memoizing them
|
||||||
self.save_reduce(bytes, (), obj=obj)
|
assert self.proto >= 3
|
||||||
else:
|
|
||||||
self.save_reduce(codecs.encode,
|
|
||||||
(str(obj, 'latin1'), 'latin1'), obj=obj)
|
|
||||||
return
|
|
||||||
n = len(obj)
|
n = len(obj)
|
||||||
if n <= 0xff:
|
if n <= 0xff:
|
||||||
self.write(SHORT_BINBYTES + pack("<B", n) + obj)
|
self.write(SHORT_BINBYTES + pack("<B", n) + obj)
|
||||||
|
@ -799,9 +795,29 @@ class _Pickler:
|
||||||
self._write_large_bytes(BINBYTES + pack("<I", n), obj)
|
self._write_large_bytes(BINBYTES + pack("<I", n), obj)
|
||||||
else:
|
else:
|
||||||
self.write(BINBYTES + pack("<I", n) + obj)
|
self.write(BINBYTES + pack("<I", n) + obj)
|
||||||
|
|
||||||
|
def save_bytes(self, obj):
|
||||||
|
if self.proto < 3:
|
||||||
|
if not obj: # bytes object is empty
|
||||||
|
self.save_reduce(bytes, (), obj=obj)
|
||||||
|
else:
|
||||||
|
self.save_reduce(codecs.encode,
|
||||||
|
(str(obj, 'latin1'), 'latin1'), obj=obj)
|
||||||
|
return
|
||||||
|
self._save_bytes_no_memo(obj)
|
||||||
self.memoize(obj)
|
self.memoize(obj)
|
||||||
dispatch[bytes] = save_bytes
|
dispatch[bytes] = save_bytes
|
||||||
|
|
||||||
|
def _save_bytearray_no_memo(self, obj):
|
||||||
|
# helper for writing bytearray objects for protocol >= 5
|
||||||
|
# without memoizing them
|
||||||
|
assert self.proto >= 5
|
||||||
|
n = len(obj)
|
||||||
|
if n >= self.framer._FRAME_SIZE_TARGET:
|
||||||
|
self._write_large_bytes(BYTEARRAY8 + pack("<Q", n), obj)
|
||||||
|
else:
|
||||||
|
self.write(BYTEARRAY8 + pack("<Q", n) + obj)
|
||||||
|
|
||||||
def save_bytearray(self, obj):
|
def save_bytearray(self, obj):
|
||||||
if self.proto < 5:
|
if self.proto < 5:
|
||||||
if not obj: # bytearray is empty
|
if not obj: # bytearray is empty
|
||||||
|
@ -809,11 +825,7 @@ class _Pickler:
|
||||||
else:
|
else:
|
||||||
self.save_reduce(bytearray, (bytes(obj),), obj=obj)
|
self.save_reduce(bytearray, (bytes(obj),), obj=obj)
|
||||||
return
|
return
|
||||||
n = len(obj)
|
self._save_bytearray_no_memo(obj)
|
||||||
if n >= self.framer._FRAME_SIZE_TARGET:
|
|
||||||
self._write_large_bytes(BYTEARRAY8 + pack("<Q", n), obj)
|
|
||||||
else:
|
|
||||||
self.write(BYTEARRAY8 + pack("<Q", n) + obj)
|
|
||||||
self.memoize(obj)
|
self.memoize(obj)
|
||||||
dispatch[bytearray] = save_bytearray
|
dispatch[bytearray] = save_bytearray
|
||||||
|
|
||||||
|
@ -832,10 +844,18 @@ class _Pickler:
|
||||||
if in_band:
|
if in_band:
|
||||||
# Write data in-band
|
# Write data in-band
|
||||||
# XXX The C implementation avoids a copy here
|
# XXX The C implementation avoids a copy here
|
||||||
|
buf = m.tobytes()
|
||||||
|
in_memo = id(buf) in self.memo
|
||||||
if m.readonly:
|
if m.readonly:
|
||||||
self.save_bytes(m.tobytes())
|
if in_memo:
|
||||||
|
self._save_bytes_no_memo(buf)
|
||||||
|
else:
|
||||||
|
self.save_bytes(buf)
|
||||||
else:
|
else:
|
||||||
self.save_bytearray(m.tobytes())
|
if in_memo:
|
||||||
|
self._save_bytearray_no_memo(buf)
|
||||||
|
else:
|
||||||
|
self.save_bytearray(buf)
|
||||||
else:
|
else:
|
||||||
# Write data out-of-band
|
# Write data out-of-band
|
||||||
self.write(NEXT_BUFFER)
|
self.write(NEXT_BUFFER)
|
||||||
|
|
|
@ -1845,6 +1845,25 @@ class AbstractPickleTests:
|
||||||
p = self.dumps(s, proto)
|
p = self.dumps(s, proto)
|
||||||
self.assert_is_copy(s, self.loads(p))
|
self.assert_is_copy(s, self.loads(p))
|
||||||
|
|
||||||
|
def test_bytes_memoization(self):
|
||||||
|
for proto in protocols:
|
||||||
|
for array_type in [bytes, ZeroCopyBytes]:
|
||||||
|
for s in b'', b'xyz', b'xyz'*100:
|
||||||
|
with self.subTest(proto=proto, array_type=array_type, s=s, independent=False):
|
||||||
|
b = array_type(s)
|
||||||
|
p = self.dumps((b, b), proto)
|
||||||
|
x, y = self.loads(p)
|
||||||
|
self.assertIs(x, y)
|
||||||
|
self.assert_is_copy((b, b), (x, y))
|
||||||
|
|
||||||
|
with self.subTest(proto=proto, array_type=array_type, s=s, independent=True):
|
||||||
|
b1, b2 = array_type(s), array_type(s)
|
||||||
|
p = self.dumps((b1, b2), proto)
|
||||||
|
# Note that (b1, b2) = self.loads(p) might have identical
|
||||||
|
# components, i.e., b1 is b2, but this is not always the
|
||||||
|
# case if the content is large (equality still holds).
|
||||||
|
self.assert_is_copy((b1, b2), self.loads(p))
|
||||||
|
|
||||||
def test_bytearray(self):
|
def test_bytearray(self):
|
||||||
for proto in protocols:
|
for proto in protocols:
|
||||||
for s in b'', b'xyz', b'xyz'*100:
|
for s in b'', b'xyz', b'xyz'*100:
|
||||||
|
@ -1864,13 +1883,31 @@ class AbstractPickleTests:
|
||||||
self.assertNotIn(b'bytearray', p)
|
self.assertNotIn(b'bytearray', p)
|
||||||
self.assertTrue(opcode_in_pickle(pickle.BYTEARRAY8, p))
|
self.assertTrue(opcode_in_pickle(pickle.BYTEARRAY8, p))
|
||||||
|
|
||||||
def test_bytearray_memoization_bug(self):
|
def test_bytearray_memoization(self):
|
||||||
for proto in protocols:
|
for proto in protocols:
|
||||||
for s in b'', b'xyz', b'xyz'*100:
|
for array_type in [bytearray, ZeroCopyBytearray]:
|
||||||
b = bytearray(s)
|
for s in b'', b'xyz', b'xyz'*100:
|
||||||
p = self.dumps((b, b), proto)
|
with self.subTest(proto=proto, array_type=array_type, s=s, independent=False):
|
||||||
b1, b2 = self.loads(p)
|
b = array_type(s)
|
||||||
self.assertIs(b1, b2)
|
p = self.dumps((b, b), proto)
|
||||||
|
b1, b2 = self.loads(p)
|
||||||
|
self.assertIs(b1, b2)
|
||||||
|
|
||||||
|
with self.subTest(proto=proto, array_type=array_type, s=s, independent=True):
|
||||||
|
b1a, b2a = array_type(s), array_type(s)
|
||||||
|
# Unlike bytes, equal but independent bytearray objects are
|
||||||
|
# never identical.
|
||||||
|
self.assertIsNot(b1a, b2a)
|
||||||
|
|
||||||
|
p = self.dumps((b1a, b2a), proto)
|
||||||
|
b1b, b2b = self.loads(p)
|
||||||
|
self.assertIsNot(b1b, b2b)
|
||||||
|
|
||||||
|
self.assertIsNot(b1a, b1b)
|
||||||
|
self.assert_is_copy(b1a, b1b)
|
||||||
|
|
||||||
|
self.assertIsNot(b2a, b2b)
|
||||||
|
self.assert_is_copy(b2a, b2b)
|
||||||
|
|
||||||
def test_ints(self):
|
def test_ints(self):
|
||||||
for proto in protocols:
|
for proto in protocols:
|
||||||
|
|
|
@ -0,0 +1,3 @@
|
||||||
|
Fix Python implementation of :class:`pickle.Pickler` for :class:`bytes` and
|
||||||
|
:class:`bytearray` objects when using protocol version 5. Patch by Bénédikt
|
||||||
|
Tran.
|
Loading…
Reference in New Issue