cPickle support for TUPLE[123]. Incidentally plugged several undetected

overflow holes in Pdata_grow().
This commit is contained in:
Tim Peters 2003-02-02 20:29:39 +00:00
parent 9af6968b90
commit 1d63c9f151
3 changed files with 250 additions and 91 deletions

View File

@ -621,8 +621,11 @@ class Pickler:
proto = self.proto
n = len(obj)
if n == 0 and proto:
if n == 0:
if proto:
write(EMPTY_TUPLE)
else:
write(MARK + TUPLE)
return
save = self.save
@ -639,13 +642,13 @@ class Pickler:
self.memoize(obj)
return
# proto 0, or proto 1 and tuple isn't empty, or proto > 1 and tuple
# proto 0 or proto 1 and tuple isn't empty, or proto > 1 and tuple
# has more than 3 elements.
write(MARK)
for element in obj:
save(element)
if n and id(obj) in memo:
if id(obj) in memo:
# Subtle. d was not in memo when we entered save_tuple(), so
# the process of saving the tuple's elements must have saved
# the tuple itself: the tuple is recursive. The proper action
@ -660,9 +663,8 @@ class Pickler:
write(POP * (n+1) + get)
return
# No recursion (including the empty-tuple case for protocol 0).
# No recursion.
self.write(TUPLE)
if obj: # No need to memoize empty tuple
self.memoize(obj)
dispatch[TupleType] = save_tuple

View File

@ -484,6 +484,27 @@ class AbstractPickleTests(unittest.TestCase):
self.assertEqual(x, y)
def test_short_tuples(self):
# Map (proto, len(tuple)) to expected opcode.
expected_opcode = {(0, 0): pickle.TUPLE,
(0, 1): pickle.TUPLE,
(0, 2): pickle.TUPLE,
(0, 3): pickle.TUPLE,
(0, 4): pickle.TUPLE,
(1, 0): pickle.EMPTY_TUPLE,
(1, 1): pickle.TUPLE,
(1, 2): pickle.TUPLE,
(1, 3): pickle.TUPLE,
(1, 4): pickle.TUPLE,
(2, 0): pickle.EMPTY_TUPLE,
(2, 1): pickle.TUPLE1,
(2, 2): pickle.TUPLE2,
(2, 3): pickle.TUPLE3,
(2, 4): pickle.TUPLE,
}
all_tuple_opcodes = (pickle.TUPLE, pickle.EMPTY_TUPLE,
pickle.TUPLE1, pickle.TUPLE2, pickle.TUPLE3)
a = ()
b = (1,)
c = (1, 2)
@ -495,6 +516,16 @@ class AbstractPickleTests(unittest.TestCase):
y = self.loads(s)
self.assertEqual(x, y, (proto, x, s, y))
# Verify that the protocol-correct tuple-building opcode
# was generated.
expected = expected_opcode[proto, len(x)]
for opcode in s:
if opcode in all_tuple_opcodes:
self.assertEqual(expected, opcode)
break
else:
self.fail("didn't find a tuple-building opcode in pickle")
def test_singletons(self):
for proto in protocols:
for x in None, False, True:

View File

@ -110,7 +110,8 @@ static PyObject *__class___str, *__getinitargs___str, *__dict___str,
typedef struct {
PyObject_HEAD
int length, size;
int length; /* number of initial slots in data currently used */
int size; /* number of slots in data allocated */
PyObject **data;
} Pdata;
@ -120,10 +121,11 @@ Pdata_dealloc(Pdata *self)
int i;
PyObject **p;
for (i=self->length, p=self->data; --i >= 0; p++) Py_DECREF(*p);
if (self->data) free(self->data);
for (i = self->length, p = self->data; --i >= 0; p++) {
Py_DECREF(*p);
}
if (self->data)
free(self->data);
PyObject_Del(self);
}
@ -140,11 +142,13 @@ Pdata_New(void)
{
Pdata *self;
if (!( self = PyObject_New(Pdata, &PdataType))) return NULL;
if (!(self = PyObject_New(Pdata, &PdataType)))
return NULL;
self->size = 8;
self->length = 0;
self->data = malloc(self->size * sizeof(PyObject*));
if (self->data) return (PyObject*)self;
if (self->data)
return (PyObject*)self;
Py_DECREF(self);
return PyErr_NoMemory();
}
@ -156,6 +160,9 @@ stackUnderflow(void)
return -1;
}
/* Retain only the initial clearto items. If clearto >= the current
* number of items, this is a (non-erroneous) NOP.
*/
static int
Pdata_clear(Pdata *self, int clearto)
{
@ -165,40 +172,57 @@ Pdata_clear(Pdata *self, int clearto)
if (clearto < 0) return stackUnderflow();
if (clearto >= self->length) return 0;
for (i=self->length, p=self->data+clearto; --i >= clearto; p++)
for (i = self->length, p = self->data + clearto;
--i >= clearto;
p++) {
Py_DECREF(*p);
}
self->length = clearto;
return 0;
}
static int
Pdata_grow(Pdata *self)
{
if (! self->size) {
PyErr_NoMemory();
return -1;
}
self->size *= 2;
self->data = realloc(self->data, self->size*sizeof(PyObject*));
if (! self->data) {
int bigger;
size_t nbytes;
if (! self->size)
goto nomemory;
bigger = self->size << 1;
if (bigger <= 0)
goto nomemory;
if ((int)(size_t)bigger != bigger)
goto nomemory;
nbytes = (size_t)bigger * sizeof(PyObject *);
if (nbytes / sizeof(PyObject *) != (size_t)bigger)
goto nomemory;
self->data = realloc(self->data, nbytes);
if (self->data == NULL)
goto nomemory;
self->size = bigger;
return 0;
nomemory:
self->size = 0;
PyErr_NoMemory();
return -1;
}
return 0;
}
/* D is a Pdata *. Pop the topmost element and store it into V, which
* must be an lvalue holding PyObject *. On stack underflow, UnpicklingError
* is raised and V is set to NULL. D and V may be evaluated several times.
*/
#define PDATA_POP(D, V) { \
if ((D)->length) V=D->data[--((D)->length)]; \
if ((D)->length) \
(V) = (D)->data[--((D)->length)]; \
else { \
PyErr_SetString(UnpicklingError, "bad pickle data"); \
V=NULL; \
(V) = NULL; \
} \
}
static PyObject *
Pdata_popTuple(Pdata *self, int start)
{
@ -206,7 +230,9 @@ Pdata_popTuple(Pdata *self, int start)
int i, j, l;
l = self->length-start;
if (!( r=PyTuple_New(l))) return NULL;
r = PyTuple_New(l);
if (r == NULL)
return NULL;
for (i = start, j = 0 ; j < l; i++, j++)
PyTuple_SET_ITEM(r, j, self->data[i]);
@ -1444,81 +1470,142 @@ save_unicode(Picklerobject *self, PyObject *args, int doput)
}
#endif
/* A helper for save_tuple. Push the len elements in tuple t on the stack. */
static int
store_tuple_elememts(Picklerobject *self, PyObject *t, int len)
{
int i;
int res = -1; /* guilty until proved innocent */
assert(PyTuple_Size(t) == len);
for (i = 0; i < len; i++) {
PyObject *element = PyTuple_GET_ITEM(t, i);
if (element == NULL)
goto finally;
if (save(self, element, 0) < 0)
goto finally;
}
res = 0;
finally:
return res;
}
/* Tuples are ubiquitous in the pickle protocols, so many techniques are
* used across protocols to minimize the space needed to pickle them.
* Tuples are also the only builtin immuatable type that can be recursive
* (a tuple can be reached from itself), and that requires some subtle
* magic so that it works in all cases. IOW, this is a long routine.
*/
static int
save_tuple(Picklerobject *self, PyObject *args)
{
PyObject *element = 0, *py_tuple_id = 0;
int len, i, res = -1;
PyObject *py_tuple_id = NULL;
int len, i;
int res = -1;
static char tuple = TUPLE;
if (self->write_func(self, &MARKv, 1) < 0)
goto finally;
static char pop = POP;
static char pop_mark = POP_MARK;
static char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
if ((len = PyTuple_Size(args)) < 0)
goto finally;
for (i = 0; i < len; i++) {
if (!( element = PyTuple_GET_ITEM((PyTupleObject *)args, i)))
goto finally;
if (len == 0) {
char c_str[2];
if (save(self, element, 0) < 0)
if (self->proto) {
c_str[0] = EMPTY_TUPLE;
len = 1;
}
else {
c_str[0] = MARK;
c_str[1] = TUPLE;
len = 2;
}
if (self->write_func(self, c_str, len) >= 0)
res = 0;
/* Don't memoize an empty tuple. */
goto finally;
}
if (!( py_tuple_id = PyLong_FromVoidPtr(args)))
/* A non-empty tuple. */
/* id(tuple) isn't in the memo now. If it shows up there after
* saving the tuple elements, the tuple must be recursive, in
* which case we'll pop everything we put on the stack, and fetch
* its value from the memo.
*/
py_tuple_id = PyLong_FromVoidPtr(args);
if (py_tuple_id == NULL)
goto finally;
if (len) {
if (len <= 3 && self->proto >= 2) {
/* Use TUPLE{1,2,3} opcodes. */
if (store_tuple_elememts(self, args, len) < 0)
goto finally;
if (PyDict_GetItem(self->memo, py_tuple_id)) {
if (self->bin) {
static char pop_mark = POP_MARK;
/* pop the len elements */
for (i = 0; i < len; ++i)
if (self->write_func(self, &pop, 1) < 0)
goto finally;
/* fetch from memo */
if (get(self, py_tuple_id) < 0)
goto finally;
res = 0;
goto finally;
}
/* Not recursive. */
if (self->write_func(self, len2opcode + len, 1) < 0)
goto finally;
goto memoize;
}
/* proto < 2 and len > 0, or proto >= 2 and len > 3.
* Generate MARK elt1 elt2 ... TUPLE
*/
if (self->write_func(self, &MARKv, 1) < 0)
goto finally;
if (store_tuple_elememts(self, args, len) < 0)
goto finally;
if (PyDict_GetItem(self->memo, py_tuple_id)) {
/* pop the stack stuff we pushed */
if (self->bin) {
if (self->write_func(self, &pop_mark, 1) < 0)
goto finally;
}
else {
static char pop = POP;
for (i = 0; i <= len; i++) {
/* Note that we pop one more than len, to remove
* the MARK too.
*/
for (i = 0; i <= len; i++)
if (self->write_func(self, &pop, 1) < 0)
goto finally;
}
}
if (get(self, py_tuple_id) < 0)
goto finally;
/* fetch from memo */
if (get(self, py_tuple_id) >= 0)
res = 0;
goto finally;
}
}
if (self->write_func(self, &tuple, 1) < 0) {
goto finally;
}
if (put(self, args) < 0)
/* Not recursive. */
if (self->write_func(self, &tuple, 1) < 0)
goto finally;
memoize:
if (put(self, args) >= 0)
res = 0;
finally:
Py_XDECREF(py_tuple_id);
return res;
}
static int
save_empty_tuple(Picklerobject *self, PyObject *args)
{
static char tuple = EMPTY_TUPLE;
return self->write_func(self, &tuple, 1);
}
static int
save_list(Picklerobject *self, PyObject *args)
{
@ -2029,8 +2116,7 @@ save(Picklerobject *self, PyObject *args, int pers_save)
case 't':
if (type == &PyTuple_Type && PyTuple_Size(args) == 0) {
if (self->bin) res = save_empty_tuple(self, args);
else res = save_tuple(self, args);
res = save_tuple(self, args);
goto finally;
}
break;
@ -3193,11 +3279,21 @@ load_tuple(Unpicklerobject *self)
}
static int
load_empty_tuple(Unpicklerobject *self)
load_counted_tuple(Unpicklerobject *self, int len)
{
PyObject *tup;
PyObject *tup = PyTuple_New(len);
if (!( tup=PyTuple_New(0))) return -1;
if (tup == NULL)
return -1;
while (--len >= 0) {
PyObject *element;
PDATA_POP(self->stack, element);
if (element == NULL)
return -1;
PyTuple_SET_ITEM(tup, len, element);
}
PDATA_PUSH(self->stack, tup, -1);
return 0;
}
@ -4018,7 +4114,22 @@ load(Unpicklerobject *self)
#endif
case EMPTY_TUPLE:
if (load_empty_tuple(self) < 0)
if (load_counted_tuple(self, 0) < 0)
break;
continue;
case TUPLE1:
if (load_counted_tuple(self, 1) < 0)
break;
continue;
case TUPLE2:
if (load_counted_tuple(self, 2) < 0)
break;
continue;
case TUPLE3:
if (load_counted_tuple(self, 3) < 0)
break;
continue;
@ -4346,7 +4457,22 @@ noload(Unpicklerobject *self)
#endif
case EMPTY_TUPLE:
if (load_empty_tuple(self) < 0)
if (load_counted_tuple(self, 0) < 0)
break;
continue;
case TUPLE1:
if (load_counted_tuple(self, 1) < 0)
break;
continue;
case TUPLE2:
if (load_counted_tuple(self, 2) < 0)
break;
continue;
case TUPLE3:
if (load_counted_tuple(self, 3) < 0)
break;
continue;