Issue #13505: Make pickling of bytes object compatible with Python 2.
Initial patch by sbt.
This commit is contained in:
parent
7b7e39a61f
commit
3bfc65a25b
|
@ -487,7 +487,11 @@ class _Pickler:
|
|||
|
||||
def save_bytes(self, obj, pack=struct.pack):
|
||||
if self.proto < 3:
|
||||
self.save_reduce(bytes, (list(obj),), obj=obj)
|
||||
if len(obj) == 0:
|
||||
self.save_reduce(bytes, (), obj=obj)
|
||||
else:
|
||||
self.save_reduce(codecs.encode,
|
||||
(str(obj, 'latin1'), 'latin1'), obj=obj)
|
||||
return
|
||||
n = len(obj)
|
||||
if n < 256:
|
||||
|
|
|
@ -2083,27 +2083,22 @@ _dis_test = r"""
|
|||
29: ( MARK
|
||||
30: d DICT (MARK at 29)
|
||||
31: p PUT 2
|
||||
34: c GLOBAL '__builtin__ bytes'
|
||||
53: p PUT 3
|
||||
56: ( MARK
|
||||
57: ( MARK
|
||||
58: l LIST (MARK at 57)
|
||||
34: c GLOBAL '_codecs encode'
|
||||
50: p PUT 3
|
||||
53: ( MARK
|
||||
54: V UNICODE 'abc'
|
||||
59: p PUT 4
|
||||
62: L LONG 97
|
||||
67: a APPEND
|
||||
68: L LONG 98
|
||||
73: a APPEND
|
||||
74: L LONG 99
|
||||
79: a APPEND
|
||||
80: t TUPLE (MARK at 56)
|
||||
81: p PUT 5
|
||||
84: R REDUCE
|
||||
85: p PUT 6
|
||||
88: V UNICODE 'def'
|
||||
93: p PUT 7
|
||||
96: s SETITEM
|
||||
97: a APPEND
|
||||
98: . STOP
|
||||
62: V UNICODE 'latin1'
|
||||
70: p PUT 5
|
||||
73: t TUPLE (MARK at 53)
|
||||
74: p PUT 6
|
||||
77: R REDUCE
|
||||
78: p PUT 7
|
||||
81: V UNICODE 'def'
|
||||
86: p PUT 8
|
||||
89: s SETITEM
|
||||
90: a APPEND
|
||||
91: . STOP
|
||||
highest protocol among opcodes = 0
|
||||
|
||||
Try again with a "binary" pickle.
|
||||
|
@ -2122,25 +2117,22 @@ Try again with a "binary" pickle.
|
|||
14: q BINPUT 1
|
||||
16: } EMPTY_DICT
|
||||
17: q BINPUT 2
|
||||
19: c GLOBAL '__builtin__ bytes'
|
||||
38: q BINPUT 3
|
||||
40: ( MARK
|
||||
41: ] EMPTY_LIST
|
||||
42: q BINPUT 4
|
||||
44: ( MARK
|
||||
45: K BININT1 97
|
||||
47: K BININT1 98
|
||||
49: K BININT1 99
|
||||
51: e APPENDS (MARK at 44)
|
||||
52: t TUPLE (MARK at 40)
|
||||
53: q BINPUT 5
|
||||
55: R REDUCE
|
||||
56: q BINPUT 6
|
||||
58: X BINUNICODE 'def'
|
||||
66: q BINPUT 7
|
||||
68: s SETITEM
|
||||
69: e APPENDS (MARK at 3)
|
||||
70: . STOP
|
||||
19: c GLOBAL '_codecs encode'
|
||||
35: q BINPUT 3
|
||||
37: ( MARK
|
||||
38: X BINUNICODE 'abc'
|
||||
46: q BINPUT 4
|
||||
48: X BINUNICODE 'latin1'
|
||||
59: q BINPUT 5
|
||||
61: t TUPLE (MARK at 37)
|
||||
62: q BINPUT 6
|
||||
64: R REDUCE
|
||||
65: q BINPUT 7
|
||||
67: X BINUNICODE 'def'
|
||||
75: q BINPUT 8
|
||||
77: s SETITEM
|
||||
78: e APPENDS (MARK at 3)
|
||||
79: . STOP
|
||||
highest protocol among opcodes = 1
|
||||
|
||||
Exercise the INST/OBJ/BUILD family.
|
||||
|
|
|
@ -636,9 +636,15 @@ class AbstractPickleTests(unittest.TestCase):
|
|||
|
||||
def test_bytes(self):
|
||||
for proto in protocols:
|
||||
for u in b'', b'xyz', b'xyz'*100:
|
||||
p = self.dumps(u)
|
||||
self.assertEqual(self.loads(p), u)
|
||||
for s in b'', b'xyz', b'xyz'*100:
|
||||
p = self.dumps(s)
|
||||
self.assertEqual(self.loads(p), s)
|
||||
for s in [bytes([i]) for i in range(256)]:
|
||||
p = self.dumps(s)
|
||||
self.assertEqual(self.loads(p), s)
|
||||
for s in [bytes([i, i]) for i in range(256)]:
|
||||
p = self.dumps(s)
|
||||
self.assertEqual(self.loads(p), s)
|
||||
|
||||
def test_ints(self):
|
||||
import sys
|
||||
|
|
|
@ -10,6 +10,9 @@ What's New in Python 3.2.3?
|
|||
Core and Builtins
|
||||
-----------------
|
||||
|
||||
- Issue #13505: Pickle bytes objects in a way that is compatible with
|
||||
Python 2 when using protocols <= 2.
|
||||
|
||||
- Issue #11147: Fix an unused argument in _Py_ANNOTATE_MEMORY_ORDER. (Fix
|
||||
given by Campbell Barton).
|
||||
|
||||
|
|
|
@ -369,7 +369,7 @@ typedef struct UnpicklerObject {
|
|||
char *errors; /* Name of errors handling scheme to used when
|
||||
decoding strings. The default value is
|
||||
"strict". */
|
||||
Py_ssize_t *marks; /* Mark stack, used for unpickling container
|
||||
Py_ssize_t *marks; /* Mark stack, used for unpickling container
|
||||
objects. */
|
||||
Py_ssize_t num_marks; /* Number of marks in the mark stack. */
|
||||
Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
|
||||
|
@ -1700,26 +1700,58 @@ save_bytes(PicklerObject *self, PyObject *obj)
|
|||
if (self->proto < 3) {
|
||||
/* Older pickle protocols do not have an opcode for pickling bytes
|
||||
objects. Therefore, we need to fake the copy protocol (i.e.,
|
||||
the __reduce__ method) to permit bytes object unpickling. */
|
||||
the __reduce__ method) to permit bytes object unpickling.
|
||||
|
||||
Here we use a hack to be compatible with Python 2. Since in Python
|
||||
2 'bytes' is just an alias for 'str' (which has different
|
||||
parameters than the actual bytes object), we use codecs.encode
|
||||
to create the appropriate 'str' object when unpickled using
|
||||
Python 2 *and* the appropriate 'bytes' object when unpickled
|
||||
using Python 3. Again this is a hack and we don't need to do this
|
||||
with newer protocols. */
|
||||
static PyObject *codecs_encode = NULL;
|
||||
PyObject *reduce_value = NULL;
|
||||
PyObject *bytelist = NULL;
|
||||
int status;
|
||||
|
||||
bytelist = PySequence_List(obj);
|
||||
if (bytelist == NULL)
|
||||
return -1;
|
||||
|
||||
reduce_value = Py_BuildValue("(O(O))", (PyObject *)&PyBytes_Type,
|
||||
bytelist);
|
||||
if (reduce_value == NULL) {
|
||||
Py_DECREF(bytelist);
|
||||
return -1;
|
||||
if (codecs_encode == NULL) {
|
||||
PyObject *codecs_module = PyImport_ImportModule("codecs");
|
||||
if (codecs_module == NULL) {
|
||||
return -1;
|
||||
}
|
||||
codecs_encode = PyObject_GetAttrString(codecs_module, "encode");
|
||||
Py_DECREF(codecs_module);
|
||||
if (codecs_encode == NULL) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (PyBytes_GET_SIZE(obj) == 0) {
|
||||
reduce_value = Py_BuildValue("(O())", (PyObject*)&PyBytes_Type);
|
||||
}
|
||||
else {
|
||||
static PyObject *latin1 = NULL;
|
||||
PyObject *unicode_str =
|
||||
PyUnicode_DecodeLatin1(PyBytes_AS_STRING(obj),
|
||||
PyBytes_GET_SIZE(obj),
|
||||
"strict");
|
||||
if (unicode_str == NULL)
|
||||
return -1;
|
||||
if (latin1 == NULL) {
|
||||
latin1 = PyUnicode_InternFromString("latin1");
|
||||
if (latin1 == NULL)
|
||||
return -1;
|
||||
}
|
||||
reduce_value = Py_BuildValue("(O(OO))",
|
||||
codecs_encode, unicode_str, latin1);
|
||||
Py_DECREF(unicode_str);
|
||||
}
|
||||
|
||||
if (reduce_value == NULL)
|
||||
return -1;
|
||||
|
||||
/* save_reduce() will memoize the object automatically. */
|
||||
status = save_reduce(self, reduce_value, obj);
|
||||
Py_DECREF(reduce_value);
|
||||
Py_DECREF(bytelist);
|
||||
return status;
|
||||
}
|
||||
else {
|
||||
|
@ -1727,7 +1759,7 @@ save_bytes(PicklerObject *self, PyObject *obj)
|
|||
char header[5];
|
||||
Py_ssize_t len;
|
||||
|
||||
size = PyBytes_Size(obj);
|
||||
size = PyBytes_GET_SIZE(obj);
|
||||
if (size < 0)
|
||||
return -1;
|
||||
|
||||
|
|
Loading…
Reference in New Issue