cPickle.c: Full support for the new LONG1 and LONG4. Added comments.
Assorted code cleanups; e.g., sizeof(char) is 1 by definition, so there's no need to do things like multiply by sizeof(char) in hairy malloc arguments. Fixed an undetected-overflow bug in readline_file(). longobject.c: Fixed a really stupid bug in the new _PyLong_NumBits. pickle.py: Fixed stupid bug in save_long(): When proto is 2, it wrote LONG1 or LONG4, but forgot to return then -- it went on to append the proto 1 LONG opcode too. Fixed equally stupid cancelling bugs in load_long1() and load_long4(): they *returned* the unpickled long instead of pushing it on the stack. The return values were ignored. Tests passed before only because save_long() pickled the long twice. Fixed bugs in encode_long(). Noted that decode_long() is quadratic-time despite our hopes, because long(string, 16) is still quadratic-time in len(string). It's hex() that's linear-time. I don't know a way to make decode_long() linear-time in Python, short of maybe transforming the 256's-complement bytes into marshal's funky internal format, and letting marshal decode that. It would be more valuable to make long(string, 16) linear time. pickletester.py: Added a global "protocols" vector so tests can try all the protocols in a sane way. Changed test_ints() and test_unicode() to do so. Added a new test_long(), but the tail end of it is disabled because it "takes forever" under pickle.py (but runs very quickly under cPickle: cPickle proto 2 for longs is linear-time).
This commit is contained in:
parent
5bd2a79b22
commit
ee1a53cbb1
|
@ -553,6 +553,7 @@ class Pickler:
|
|||
self.write(LONG1 + chr(n) + bytes)
|
||||
else:
|
||||
self.write(LONG4 + pack("<i", n) + bytes)
|
||||
return
|
||||
self.write(LONG + `obj` + '\n')
|
||||
dispatch[LongType] = save_long
|
||||
|
||||
|
@ -1042,13 +1043,13 @@ class Unpickler:
|
|||
def load_long1(self):
|
||||
n = ord(self.read(1))
|
||||
bytes = self.read(n)
|
||||
return decode_long(bytes)
|
||||
self.append(decode_long(bytes))
|
||||
dispatch[LONG1] = load_long1
|
||||
|
||||
def load_long4(self):
|
||||
n = mloads('i' + self.read(4))
|
||||
bytes = self.read(n)
|
||||
return decode_long(bytes)
|
||||
self.append(decode_long(bytes))
|
||||
dispatch[LONG4] = load_long4
|
||||
|
||||
def load_float(self):
|
||||
|
@ -1404,24 +1405,31 @@ def encode_long(x):
|
|||
njunkchars = 2 + ashex.endswith('L')
|
||||
nibbles = len(ashex) - njunkchars
|
||||
if nibbles & 1:
|
||||
# need an even # of nibbles for unhexlify
|
||||
# Extend to a full byte.
|
||||
nibbles += 1
|
||||
nbits = nibbles * 4
|
||||
x += 1L << nbits
|
||||
assert x > 0
|
||||
ashex = hex(x)
|
||||
if x >> (nbits - 1) == 0:
|
||||
njunkchars = 2 + ashex.endswith('L')
|
||||
newnibbles = len(ashex) - njunkchars
|
||||
if newnibbles < nibbles:
|
||||
ashex = "0x" + "0" * (nibbles - newnibbles) + ashex[2:]
|
||||
if int(ashex[2], 16) < 8:
|
||||
# "looks positive", so need a byte of sign bits
|
||||
ashex = "0xff" + x[2:]
|
||||
ashex = "0xff" + ashex[2:]
|
||||
|
||||
if ashex.endswith('L'):
|
||||
ashex = ashex[2:-1]
|
||||
else:
|
||||
ashex = ashex[2:]
|
||||
assert len(ashex) & 1 == 0
|
||||
assert len(ashex) & 1 == 0, (x, ashex)
|
||||
binary = _binascii.unhexlify(ashex)
|
||||
return binary[::-1]
|
||||
|
||||
# XXX OOPS! This is still quadratic-time. While hex(n) is linear-time
|
||||
# XXX in the # of digits in n, long(s, 16) is still quadratic-time
|
||||
# XXX in len(s).
|
||||
def decode_long(data):
|
||||
r"""Decode a long from a two's complement little-endian binary string.
|
||||
|
||||
|
@ -1445,7 +1453,7 @@ def decode_long(data):
|
|||
if nbytes == 0:
|
||||
return 0L
|
||||
ashex = _binascii.hexlify(data[::-1])
|
||||
n = long(ashex, 16)
|
||||
n = long(ashex, 16) # quadratic time
|
||||
if data[-1] >= '\x80':
|
||||
n -= 1L << (nbytes * 8)
|
||||
return n
|
||||
|
|
|
@ -1,6 +1,11 @@
|
|||
import unittest
|
||||
from test.test_support import TestFailed, have_unicode, TESTFN
|
||||
|
||||
# Tests that try a number of pickle protocols should have a
|
||||
# for proto in protocols:
|
||||
# kind of outer loop. Bump the 3 to 4 if/when protocol 3 is invented.
|
||||
protocols = range(3)
|
||||
|
||||
class C:
|
||||
def __cmp__(self, other):
|
||||
return cmp(self.__dict__, other.__dict__)
|
||||
|
@ -28,6 +33,9 @@ class metaclass(type):
|
|||
class use_metaclass(object):
|
||||
__metaclass__ = metaclass
|
||||
|
||||
# DATA and BINDATA are the protocol 0 and protocol 1 pickles of the object
|
||||
# returned by create_data().
|
||||
|
||||
# break into multiple strings to avoid confusing font-lock-mode
|
||||
DATA = """(lp1
|
||||
I0
|
||||
|
@ -210,20 +218,22 @@ class AbstractPickleTests(unittest.TestCase):
|
|||
def test_unicode(self):
|
||||
endcases = [unicode(''), unicode('<\\u>'), unicode('<\\\u1234>'),
|
||||
unicode('<\n>'), unicode('<\\>')]
|
||||
for u in endcases:
|
||||
p = self.dumps(u)
|
||||
u2 = self.loads(p)
|
||||
self.assertEqual(u2, u)
|
||||
for proto in protocols:
|
||||
for u in endcases:
|
||||
p = self.dumps(u, proto)
|
||||
u2 = self.loads(p)
|
||||
self.assertEqual(u2, u)
|
||||
|
||||
def test_ints(self):
|
||||
import sys
|
||||
n = sys.maxint
|
||||
while n:
|
||||
for expected in (-n, n):
|
||||
s = self.dumps(expected)
|
||||
n2 = self.loads(s)
|
||||
self.assertEqual(expected, n2)
|
||||
n = n >> 1
|
||||
for proto in protocols:
|
||||
n = sys.maxint
|
||||
while n:
|
||||
for expected in (-n, n):
|
||||
s = self.dumps(expected, proto)
|
||||
n2 = self.loads(s)
|
||||
self.assertEqual(expected, n2)
|
||||
n = n >> 1
|
||||
|
||||
def test_maxint64(self):
|
||||
maxint64 = (1L << 63) - 1
|
||||
|
@ -235,6 +245,34 @@ class AbstractPickleTests(unittest.TestCase):
|
|||
data = 'I' + str(maxint64) + 'JUNK\n.'
|
||||
self.assertRaises(ValueError, self.loads, data)
|
||||
|
||||
def test_long(self):
|
||||
for proto in protocols:
|
||||
# 256 bytes is where LONG4 begins
|
||||
for nbits in 1, 8, 8*254, 8*255, 8*256, 8*257:
|
||||
nbase = 1L << nbits
|
||||
for npos in nbase-1, nbase, nbase+1:
|
||||
for n in npos, -npos:
|
||||
pickle = self.dumps(n, proto)
|
||||
got = self.loads(pickle)
|
||||
self.assertEqual(n, got)
|
||||
# Try a monster. This is quadratic-time in protos 0 & 1, so don't
|
||||
# bother with those.
|
||||
# XXX Damn. pickle.py is still quadratic-time here, due to
|
||||
# XXX long(string, 16). cPickle runs this in an eyeblink, but I
|
||||
# XXX gave up waiting for pickle.py to get beyond "loading". Giving
|
||||
# XXX up for now.
|
||||
return
|
||||
print "building long"
|
||||
nbase = long("deadbeeffeedface", 16)
|
||||
nbase += nbase << 1000000
|
||||
for n in nbase, -nbase:
|
||||
print "dumping"
|
||||
p = self.dumps(n, 2)
|
||||
print "loading"
|
||||
got = self.loads(p)
|
||||
print "checking"
|
||||
self.assertEqual(n, got)
|
||||
|
||||
def test_reduce(self):
|
||||
pass
|
||||
|
||||
|
|
|
@ -464,7 +464,7 @@ write_other(Picklerobject *self, char *s, int n)
|
|||
|
||||
|
||||
static int
|
||||
read_file(Unpicklerobject *self, char **s, int n)
|
||||
read_file(Unpicklerobject *self, char **s, int n)
|
||||
{
|
||||
size_t nbytesread;
|
||||
|
||||
|
@ -472,7 +472,7 @@ read_file(Unpicklerobject *self, char **s, int n)
|
|||
int size;
|
||||
|
||||
size = ((n < 32) ? 32 : n);
|
||||
if (!( self->buf = (char *)malloc(size * sizeof(char)))) {
|
||||
if (!( self->buf = (char *)malloc(size))) {
|
||||
PyErr_NoMemory();
|
||||
return -1;
|
||||
}
|
||||
|
@ -480,12 +480,11 @@ read_file(Unpicklerobject *self, char **s, int n)
|
|||
self->buf_size = size;
|
||||
}
|
||||
else if (n > self->buf_size) {
|
||||
self->buf = (char *)realloc(self->buf, n * sizeof(char));
|
||||
self->buf = (char *)realloc(self->buf, n);
|
||||
if (!self->buf) {
|
||||
PyErr_NoMemory();
|
||||
return -1;
|
||||
}
|
||||
|
||||
self->buf_size = n;
|
||||
}
|
||||
|
||||
|
@ -514,16 +513,16 @@ readline_file(Unpicklerobject *self, char **s)
|
|||
int i;
|
||||
|
||||
if (self->buf_size == 0) {
|
||||
if (!( self->buf = (char *)malloc(40 * sizeof(char)))) {
|
||||
if (!( self->buf = (char *)malloc(40))) {
|
||||
PyErr_NoMemory();
|
||||
return -1;
|
||||
}
|
||||
|
||||
self->buf_size = 40;
|
||||
}
|
||||
|
||||
i = 0;
|
||||
while (1) {
|
||||
int bigger;
|
||||
for (; i < (self->buf_size - 1); i++) {
|
||||
if (feof(self->fp) ||
|
||||
(self->buf[i] = getc(self->fp)) == '\n') {
|
||||
|
@ -532,14 +531,17 @@ readline_file(Unpicklerobject *self, char **s)
|
|||
return i + 1;
|
||||
}
|
||||
}
|
||||
self->buf = (char *)realloc(self->buf,
|
||||
(self->buf_size * 2) * sizeof(char));
|
||||
bigger = self->buf_size << 1;
|
||||
if (bigger <= 0) { /* overflow */
|
||||
PyErr_NoMemory();
|
||||
return -1;
|
||||
}
|
||||
self->buf = (char *)realloc(self->buf, bigger);
|
||||
if (!self->buf) {
|
||||
PyErr_NoMemory();
|
||||
return -1;
|
||||
}
|
||||
|
||||
self->buf_size *= 2;
|
||||
self->buf_size = bigger;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -620,14 +622,18 @@ readline_other(Unpicklerobject *self, char **s)
|
|||
return str_size;
|
||||
}
|
||||
|
||||
|
||||
/* Copy the first n bytes from s into newly malloc'ed memory, plus a
|
||||
* trailing 0 byte. Return a pointer to that, or NULL if out of memory.
|
||||
* The caller is responsible for free()'ing the return value.
|
||||
*/
|
||||
static char *
|
||||
pystrndup(char *s, int l)
|
||||
pystrndup(char *s, int n)
|
||||
{
|
||||
char *r;
|
||||
if (!( r=malloc((l+1)*sizeof(char)))) return (char*)PyErr_NoMemory();
|
||||
memcpy(r,s,l);
|
||||
r[l]=0;
|
||||
char *r = (char *)malloc(n+1);
|
||||
if (r == NULL)
|
||||
return (char*)PyErr_NoMemory();
|
||||
memcpy(r, s, n);
|
||||
r[n] = 0;
|
||||
return r;
|
||||
}
|
||||
|
||||
|
@ -1012,11 +1018,93 @@ save_int(Picklerobject *self, PyObject *args)
|
|||
static int
|
||||
save_long(Picklerobject *self, PyObject *args)
|
||||
{
|
||||
int size, res = -1;
|
||||
PyObject *repr = 0;
|
||||
int size;
|
||||
int res = -1;
|
||||
PyObject *repr = NULL;
|
||||
|
||||
static char l = LONG;
|
||||
|
||||
if (self->proto >= 2) {
|
||||
/* Linear-time pickling. */
|
||||
size_t nbits;
|
||||
size_t nbytes;
|
||||
unsigned char *pdata;
|
||||
char c_str[5];
|
||||
int i;
|
||||
int sign = _PyLong_Sign(args);
|
||||
|
||||
if (sign == 0) {
|
||||
/* It's 0 -- an empty bytestring. */
|
||||
c_str[0] = LONG1;
|
||||
c_str[1] = 0;
|
||||
i = self->write_func(self, c_str, 2);
|
||||
if (i < 0) goto finally;
|
||||
res = 0;
|
||||
goto finally;
|
||||
}
|
||||
nbits = _PyLong_NumBits(args);
|
||||
if (nbits == (size_t)-1 && PyErr_Occurred())
|
||||
goto finally;
|
||||
/* How many bytes do we need? There are nbits >> 3 full
|
||||
* bytes of data, and nbits & 7 leftover bits. If there
|
||||
* are any leftover bits, then we clearly need another
|
||||
* byte. Wnat's not so obvious is that we *probably*
|
||||
* need another byte even if there aren't any leftovers:
|
||||
* the most-significant bit of the most-significant byte
|
||||
* acts like a sign bit, and it's usually got a sense
|
||||
* opposite of the one we need. The exception is longs
|
||||
* of the form -(2**(8*j-1)) for j > 0. Such a long is
|
||||
* its own 256's-complement, so has the right sign bit
|
||||
* even without the extra byte. That's a pain to check
|
||||
* for in advance, though, so we always grab an extra
|
||||
* byte at the start, and cut it back later if possible.
|
||||
*/
|
||||
nbytes = (nbits >> 3) + 1;
|
||||
if ((int)nbytes < 0 || (size_t)(int)nbytes != nbytes) {
|
||||
PyErr_SetString(PyExc_OverflowError, "long too large "
|
||||
"to pickle");
|
||||
goto finally;
|
||||
}
|
||||
repr = PyString_FromStringAndSize(NULL, (int)nbytes);
|
||||
if (repr == NULL) goto finally;
|
||||
pdata = (unsigned char *)PyString_AS_STRING(repr);
|
||||
i = _PyLong_AsByteArray((PyLongObject *)args,
|
||||
pdata, nbytes,
|
||||
1 /* little endian */, 1 /* signed */);
|
||||
if (i < 0) goto finally;
|
||||
/* If the long is negative, this may be a byte more than
|
||||
* needed. This is so iff the MSB is all redundant sign
|
||||
* bits.
|
||||
*/
|
||||
if (sign < 0 && nbytes > 1 && pdata[nbytes - 1] == 0xff &&
|
||||
(pdata[nbytes - 2] & 0x80) != 0)
|
||||
--nbytes;
|
||||
|
||||
if (nbytes < 256) {
|
||||
c_str[0] = LONG1;
|
||||
c_str[1] = (char)nbytes;
|
||||
size = 2;
|
||||
}
|
||||
else {
|
||||
c_str[0] = LONG4;
|
||||
size = (int)nbytes;
|
||||
for (i = 1; i < 5; i++) {
|
||||
c_str[i] = (char)(size & 0xff);
|
||||
size >>= 8;
|
||||
}
|
||||
size = 5;
|
||||
}
|
||||
i = self->write_func(self, c_str, size);
|
||||
if (i < 0) goto finally;
|
||||
i = self->write_func(self, (char *)pdata, (int)nbytes);
|
||||
if (i < 0) goto finally;
|
||||
res = 0;
|
||||
goto finally;
|
||||
}
|
||||
|
||||
/* proto < 2: write the repr and newline. This is quadratic-time
|
||||
* (in the number of digits), in both directions.
|
||||
*/
|
||||
if (!( repr = PyObject_Repr(args)))
|
||||
goto finally;
|
||||
|
||||
|
@ -1038,7 +1126,6 @@ save_long(Picklerobject *self, PyObject *args)
|
|||
|
||||
finally:
|
||||
Py_XDECREF(repr);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
@ -2687,9 +2774,13 @@ load_int(Unpicklerobject *self)
|
|||
return res;
|
||||
}
|
||||
|
||||
|
||||
/* s contains x bytes of a little-endian integer. Return its value as a
|
||||
* C int. Obscure: when x is 1 or 2, this is an unsigned little-endian
|
||||
* int, but when x is 4 it's a signed one. This is an historical source
|
||||
* of x-platform bugs.
|
||||
*/
|
||||
static long
|
||||
calc_binint(char *s, int x)
|
||||
calc_binint(char *s, int x)
|
||||
{
|
||||
unsigned char c;
|
||||
int i;
|
||||
|
@ -2786,6 +2877,45 @@ load_long(Unpicklerobject *self)
|
|||
return res;
|
||||
}
|
||||
|
||||
/* 'size' bytes contain the # of bytes of little-endian 256's-complement
|
||||
* data following.
|
||||
*/
|
||||
static int
|
||||
load_counted_long(Unpicklerobject *self, int size)
|
||||
{
|
||||
int i;
|
||||
char *nbytes;
|
||||
unsigned char *pdata;
|
||||
PyObject *along;
|
||||
|
||||
assert(size == 1 || size == 4);
|
||||
i = self->read_func(self, &nbytes, size);
|
||||
if (i < 0) return -1;
|
||||
|
||||
size = calc_binint(nbytes, size);
|
||||
if (size < 0) {
|
||||
/* Corrupt or hostile pickle -- we never write one like
|
||||
* this.
|
||||
*/
|
||||
PyErr_SetString(PyExc_ValueError, "LONG pickle has negative "
|
||||
"byte count");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (size == 0)
|
||||
along = PyLong_FromLong(0L);
|
||||
else {
|
||||
/* Read the raw little-endian bytes & convert. */
|
||||
i = self->read_func(self, &(char *)pdata, size);
|
||||
if (i < 0) return -1;
|
||||
along = _PyLong_FromByteArray(pdata, (size_t)size,
|
||||
1 /* little endian */, 1 /* signed */);
|
||||
}
|
||||
if (along == NULL)
|
||||
return -1;
|
||||
PDATA_PUSH(self->stack, along, -1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
load_float(Unpicklerobject *self)
|
||||
|
@ -3784,6 +3914,16 @@ load(Unpicklerobject *self)
|
|||
break;
|
||||
continue;
|
||||
|
||||
case LONG1:
|
||||
if (load_counted_long(self, 1) < 0)
|
||||
break;
|
||||
continue;
|
||||
|
||||
case LONG4:
|
||||
if (load_counted_long(self, 4) < 0)
|
||||
break;
|
||||
continue;
|
||||
|
||||
case FLOAT:
|
||||
if (load_float(self) < 0)
|
||||
break;
|
||||
|
|
|
@ -264,13 +264,13 @@ int
|
|||
_PyLong_Sign(PyObject *vv)
|
||||
{
|
||||
PyLongObject *v = (PyLongObject *)vv;
|
||||
const int ndigits = v->ob_size;
|
||||
const int ndigits = ABS(v->ob_size);
|
||||
|
||||
assert(v != NULL);
|
||||
assert(PyLong_Check(v));
|
||||
assert(ndigits == 0 || v->ob_digit[ndigits - 1] != 0);
|
||||
|
||||
return ndigits == 0 ? 0 : (ndigits < 0 ? -1 : 1);
|
||||
return v->ob_size == 0 ? 0 : (v->ob_size < 0 ? -1 : 1);
|
||||
}
|
||||
|
||||
size_t
|
||||
|
|
Loading…
Reference in New Issue