Issue #13505: Make pickling of bytes object compatible with Python 2.

Initial patch by sbt.
This commit is contained in:
Alexandre Vassalotti 2011-12-13 13:08:09 -05:00
parent 7b7e39a61f
commit 3bfc65a25b
5 changed files with 94 additions and 57 deletions

View File

@ -487,7 +487,11 @@ class _Pickler:
def save_bytes(self, obj, pack=struct.pack):
if self.proto < 3:
self.save_reduce(bytes, (list(obj),), obj=obj)
if len(obj) == 0:
self.save_reduce(bytes, (), obj=obj)
else:
self.save_reduce(codecs.encode,
(str(obj, 'latin1'), 'latin1'), obj=obj)
return
n = len(obj)
if n < 256:

View File

@ -2083,27 +2083,22 @@ _dis_test = r"""
29: ( MARK
30: d DICT (MARK at 29)
31: p PUT 2
34: c GLOBAL '__builtin__ bytes'
53: p PUT 3
56: ( MARK
57: ( MARK
58: l LIST (MARK at 57)
34: c GLOBAL '_codecs encode'
50: p PUT 3
53: ( MARK
54: V UNICODE 'abc'
59: p PUT 4
62: L LONG 97
67: a APPEND
68: L LONG 98
73: a APPEND
74: L LONG 99
79: a APPEND
80: t TUPLE (MARK at 56)
81: p PUT 5
84: R REDUCE
85: p PUT 6
88: V UNICODE 'def'
93: p PUT 7
96: s SETITEM
97: a APPEND
98: . STOP
62: V UNICODE 'latin1'
70: p PUT 5
73: t TUPLE (MARK at 53)
74: p PUT 6
77: R REDUCE
78: p PUT 7
81: V UNICODE 'def'
86: p PUT 8
89: s SETITEM
90: a APPEND
91: . STOP
highest protocol among opcodes = 0
Try again with a "binary" pickle.
@ -2122,25 +2117,22 @@ Try again with a "binary" pickle.
14: q BINPUT 1
16: } EMPTY_DICT
17: q BINPUT 2
19: c GLOBAL '__builtin__ bytes'
38: q BINPUT 3
40: ( MARK
41: ] EMPTY_LIST
42: q BINPUT 4
44: ( MARK
45: K BININT1 97
47: K BININT1 98
49: K BININT1 99
51: e APPENDS (MARK at 44)
52: t TUPLE (MARK at 40)
53: q BINPUT 5
55: R REDUCE
56: q BINPUT 6
58: X BINUNICODE 'def'
66: q BINPUT 7
68: s SETITEM
69: e APPENDS (MARK at 3)
70: . STOP
19: c GLOBAL '_codecs encode'
35: q BINPUT 3
37: ( MARK
38: X BINUNICODE 'abc'
46: q BINPUT 4
48: X BINUNICODE 'latin1'
59: q BINPUT 5
61: t TUPLE (MARK at 37)
62: q BINPUT 6
64: R REDUCE
65: q BINPUT 7
67: X BINUNICODE 'def'
75: q BINPUT 8
77: s SETITEM
78: e APPENDS (MARK at 3)
79: . STOP
highest protocol among opcodes = 1
Exercise the INST/OBJ/BUILD family.

View File

@ -636,9 +636,15 @@ class AbstractPickleTests(unittest.TestCase):
def test_bytes(self):
for proto in protocols:
for u in b'', b'xyz', b'xyz'*100:
p = self.dumps(u)
self.assertEqual(self.loads(p), u)
for s in b'', b'xyz', b'xyz'*100:
p = self.dumps(s)
self.assertEqual(self.loads(p), s)
for s in [bytes([i]) for i in range(256)]:
p = self.dumps(s)
self.assertEqual(self.loads(p), s)
for s in [bytes([i, i]) for i in range(256)]:
p = self.dumps(s)
self.assertEqual(self.loads(p), s)
def test_ints(self):
import sys

View File

@ -10,6 +10,9 @@ What's New in Python 3.2.3?
Core and Builtins
-----------------
- Issue #13505: Pickle bytes objects in a way that is compatible with
Python 2 when using protocols <= 2.
- Issue #11147: Fix an unused argument in _Py_ANNOTATE_MEMORY_ORDER. (Fix
given by Campbell Barton).

View File

@ -369,7 +369,7 @@ typedef struct UnpicklerObject {
char *errors; /* Name of errors handling scheme to used when
decoding strings. The default value is
"strict". */
Py_ssize_t *marks; /* Mark stack, used for unpickling container
Py_ssize_t *marks; /* Mark stack, used for unpickling container
objects. */
Py_ssize_t num_marks; /* Number of marks in the mark stack. */
Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
@ -1700,26 +1700,58 @@ save_bytes(PicklerObject *self, PyObject *obj)
if (self->proto < 3) {
/* Older pickle protocols do not have an opcode for pickling bytes
objects. Therefore, we need to fake the copy protocol (i.e.,
the __reduce__ method) to permit bytes object unpickling. */
the __reduce__ method) to permit bytes object unpickling.
Here we use a hack to be compatible with Python 2. Since in Python
2 'bytes' is just an alias for 'str' (which has different
parameters than the actual bytes object), we use codecs.encode
to create the appropriate 'str' object when unpickled using
Python 2 *and* the appropriate 'bytes' object when unpickled
using Python 3. Again this is a hack and we don't need to do this
with newer protocols. */
static PyObject *codecs_encode = NULL;
PyObject *reduce_value = NULL;
PyObject *bytelist = NULL;
int status;
bytelist = PySequence_List(obj);
if (bytelist == NULL)
return -1;
reduce_value = Py_BuildValue("(O(O))", (PyObject *)&PyBytes_Type,
bytelist);
if (reduce_value == NULL) {
Py_DECREF(bytelist);
return -1;
if (codecs_encode == NULL) {
PyObject *codecs_module = PyImport_ImportModule("codecs");
if (codecs_module == NULL) {
return -1;
}
codecs_encode = PyObject_GetAttrString(codecs_module, "encode");
Py_DECREF(codecs_module);
if (codecs_encode == NULL) {
return -1;
}
}
if (PyBytes_GET_SIZE(obj) == 0) {
reduce_value = Py_BuildValue("(O())", (PyObject*)&PyBytes_Type);
}
else {
static PyObject *latin1 = NULL;
PyObject *unicode_str =
PyUnicode_DecodeLatin1(PyBytes_AS_STRING(obj),
PyBytes_GET_SIZE(obj),
"strict");
if (unicode_str == NULL)
return -1;
if (latin1 == NULL) {
latin1 = PyUnicode_InternFromString("latin1");
if (latin1 == NULL)
return -1;
}
reduce_value = Py_BuildValue("(O(OO))",
codecs_encode, unicode_str, latin1);
Py_DECREF(unicode_str);
}
if (reduce_value == NULL)
return -1;
/* save_reduce() will memoize the object automatically. */
status = save_reduce(self, reduce_value, obj);
Py_DECREF(reduce_value);
Py_DECREF(bytelist);
return status;
}
else {
@ -1727,7 +1759,7 @@ save_bytes(PicklerObject *self, PyObject *obj)
char header[5];
Py_ssize_t len;
size = PyBytes_Size(obj);
size = PyBytes_GET_SIZE(obj);
if (size < 0)
return -1;