bpo-32037: Use the INT opcode for 32-bit integers in protocol 0 pickles. (#4407)

This commit is contained in:
Serhiy Storchaka 2017-11-16 09:44:43 +02:00 committed by GitHub
parent 0a2abdfca2
commit 3daaafb700
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 69 additions and 62 deletions

View File

@ -674,7 +674,10 @@ class _Pickler:
else:
self.write(LONG4 + pack("<i", n) + encoded)
return
self.write(LONG + repr(obj).encode("ascii") + b'L\n')
if -0x80000000 <= obj <= 0x7fffffff:
self.write(INT + repr(obj).encode("ascii") + b'\n')
else:
self.write(LONG + repr(obj).encode("ascii") + b'L\n')
dispatch[int] = save_long
def save_float(self, obj):

View File

@ -2480,35 +2480,35 @@ _dis_test = r"""
0: ( MARK
1: l LIST (MARK at 0)
2: p PUT 0
5: L LONG 1
9: a APPEND
10: L LONG 2
14: a APPEND
15: ( MARK
16: L LONG 3
20: L LONG 4
24: t TUPLE (MARK at 15)
25: p PUT 1
28: a APPEND
29: ( MARK
30: d DICT (MARK at 29)
31: p PUT 2
34: c GLOBAL '_codecs encode'
50: p PUT 3
53: ( MARK
54: V UNICODE 'abc'
59: p PUT 4
62: V UNICODE 'latin1'
70: p PUT 5
73: t TUPLE (MARK at 53)
74: p PUT 6
77: R REDUCE
78: p PUT 7
81: V UNICODE 'def'
86: p PUT 8
89: s SETITEM
90: a APPEND
91: . STOP
5: I INT 1
8: a APPEND
9: I INT 2
12: a APPEND
13: ( MARK
14: I INT 3
17: I INT 4
20: t TUPLE (MARK at 13)
21: p PUT 1
24: a APPEND
25: ( MARK
26: d DICT (MARK at 25)
27: p PUT 2
30: c GLOBAL '_codecs encode'
46: p PUT 3
49: ( MARK
50: V UNICODE 'abc'
55: p PUT 4
58: V UNICODE 'latin1'
66: p PUT 5
69: t TUPLE (MARK at 49)
70: p PUT 6
73: R REDUCE
74: p PUT 7
77: V UNICODE 'def'
82: p PUT 8
85: s SETITEM
86: a APPEND
87: . STOP
highest protocol among opcodes = 0
Try again with a "binary" pickle.
@ -2577,13 +2577,13 @@ highest protocol among opcodes = 0
93: p PUT 6
96: V UNICODE 'value'
103: p PUT 7
106: L LONG 42
111: s SETITEM
112: b BUILD
113: a APPEND
114: g GET 5
117: a APPEND
118: . STOP
106: I INT 42
110: s SETITEM
111: b BUILD
112: a APPEND
113: g GET 5
116: a APPEND
117: . STOP
highest protocol among opcodes = 0
>>> dis(pickle.dumps(x, 1))

View File

@ -1821,7 +1821,7 @@ class AbstractPickleTests(unittest.TestCase):
with self.subTest(proto=proto):
s = self.dumps(x, proto)
if proto < 1:
self.assertIn(b'\nL64206', s) # LONG
self.assertIn(b'\nI64206', s) # INT
else:
self.assertIn(b'M\xce\xfa', s) # BININT2
self.assertEqual(opcode_in_pickle(pickle.NEWOBJ, s),
@ -1837,7 +1837,7 @@ class AbstractPickleTests(unittest.TestCase):
with self.subTest(proto=proto):
s = self.dumps(x, proto)
if proto < 1:
self.assertIn(b'\nL64206', s) # LONG
self.assertIn(b'\nI64206', s) # INT
elif proto < 2:
self.assertIn(b'M\xce\xfa', s) # BININT2
elif proto < 4:
@ -1857,7 +1857,7 @@ class AbstractPickleTests(unittest.TestCase):
with self.subTest(proto=proto):
s = self.dumps(x, proto)
if proto < 1:
self.assertIn(b'\nL64206', s) # LONG
self.assertIn(b'\nI64206', s) # INT
elif proto < 2:
self.assertIn(b'M\xce\xfa', s) # BININT2
elif proto < 4:

View File

@ -0,0 +1,4 @@
Integers that fit in a signed 32-bit integer will be now pickled with
protocol 0 using the INT opcode. This will decrease the size of a pickle,
speed up pickling and unpickling, and make these integers be unpickled as
int instances in Python 2.

View File

@ -1858,18 +1858,13 @@ save_long(PicklerObject *self, PyObject *obj)
PyObject *repr = NULL;
Py_ssize_t size;
long val;
int overflow;
int status = 0;
const char long_op = LONG;
val= PyLong_AsLong(obj);
if (val == -1 && PyErr_Occurred()) {
/* out of range for int pickling */
PyErr_Clear();
}
else if (self->bin &&
(sizeof(long) <= 4 ||
(val <= 0x7fffffffL && val >= (-0x7fffffffL - 1)))) {
val= PyLong_AsLongAndOverflow(obj, &overflow);
if (!overflow && (sizeof(long) <= 4 ||
(val <= 0x7fffffffL && val >= (-0x7fffffffL - 1))))
{
/* result fits in a signed 4-byte integer.
Note: we can't use -0x80000000L in the above condition because some
@ -1882,31 +1877,35 @@ save_long(PicklerObject *self, PyObject *obj)
char pdata[32];
Py_ssize_t len = 0;
pdata[1] = (unsigned char)(val & 0xff);
pdata[2] = (unsigned char)((val >> 8) & 0xff);
pdata[3] = (unsigned char)((val >> 16) & 0xff);
pdata[4] = (unsigned char)((val >> 24) & 0xff);
if (self->bin) {
pdata[1] = (unsigned char)(val & 0xff);
pdata[2] = (unsigned char)((val >> 8) & 0xff);
pdata[3] = (unsigned char)((val >> 16) & 0xff);
pdata[4] = (unsigned char)((val >> 24) & 0xff);
if ((pdata[4] == 0) && (pdata[3] == 0)) {
if (pdata[2] == 0) {
pdata[0] = BININT1;
len = 2;
if ((pdata[4] != 0) || (pdata[3] != 0)) {
pdata[0] = BININT;
len = 5;
}
else {
else if (pdata[2] != 0) {
pdata[0] = BININT2;
len = 3;
}
else {
pdata[0] = BININT1;
len = 2;
}
}
else {
pdata[0] = BININT;
len = 5;
sprintf(pdata, "%c%ld\n", INT, val);
len = strlen(pdata);
}
if (_Pickler_Write(self, pdata, len) < 0)
return -1;
return 0;
}
assert(!PyErr_Occurred());
if (self->proto >= 2) {
/* Linear-time pickling. */
@ -1986,6 +1985,7 @@ save_long(PicklerObject *self, PyObject *obj)
goto error;
}
else {
const char long_op = LONG;
const char *string;
/* proto < 2: write the repr and newline. This is quadratic-time (in