Another extension to reduce(). It can return a 4- or 5-tuple now.

The 4th item can be None or an iterator yielding list items, which are
used to append() or extend() the object.  The 5th item can be None or
an iterator yielding a dict's (key, value) pairs, which are stuffed
into the object using __setitem__.

Also (as a separate, though related, feature) add "batching" for list
and dict items.  If you pickled a dict or list with a million items in
the past, it would push a million items onto the stack.  It now pushes
only 1000 items at a time on the stack, using repeated APPENDS or
SETITEMS opcodes.  (For lists, I hope that using many short extend()
calls doesn't exhibit quadratic behavior.)
This commit is contained in:
Guido van Rossum 2003-01-31 18:53:21 +00:00
parent f9b0cc7df6
commit 25cb7dfb0f
1 changed files with 94 additions and 69 deletions

View File

@ -317,19 +317,14 @@ class Pickler:
if type(rv) is not TupleType: if type(rv) is not TupleType:
raise PicklingError("%s must return string or tuple" % reduce) raise PicklingError("%s must return string or tuple" % reduce)
# Assert that it returned a 2-tuple or 3-tuple, and unpack it # Assert that it returned an appropriately sized tuple
l = len(rv) l = len(rv)
if l == 2: if not (2 <= l <= 5):
func, args = rv
state = None
elif l == 3:
func, args, state = rv
else:
raise PicklingError("Tuple returned by %s must have " raise PicklingError("Tuple returned by %s must have "
"exactly two or three elements" % reduce) "two to five elements" % reduce)
# Save the reduce() output and finally memoize the object # Save the reduce() output and finally memoize the object
self.save_reduce(func, args, state, obj) self.save_reduce(obj=obj, *rv)
def persistent_id(self, obj): def persistent_id(self, obj):
# This exists so a subclass can override it # This exists so a subclass can override it
@ -343,7 +338,8 @@ class Pickler:
else: else:
self.write(PERSID + str(pid) + '\n') self.write(PERSID + str(pid) + '\n')
def save_reduce(self, func, args, state=None, obj=None): def save_reduce(self, func, args, state=None,
listitems=None, dictitems=None, obj=None):
# This API is be called by some subclasses # This API is be called by some subclasses
# Assert that args is a tuple or None # Assert that args is a tuple or None
@ -411,6 +407,17 @@ class Pickler:
if obj is not None: if obj is not None:
self.memoize(obj) self.memoize(obj)
# More new special cases (that work with older protocols as
# well): when __reduce__ returns a tuple with 4 or 5 items,
# the 4th and 5th item should be iterators that provide list
# items and dict items (as (key, value) tuples), or None.
if listitems is not None:
self._batch_appends(listitems)
if dictitems is not None:
self._batch_setitems(dictitems)
if state is not None: if state is not None:
save(state) save(state)
write(BUILD) write(BUILD)
@ -434,28 +441,9 @@ class Pickler:
self.memoize(obj) self.memoize(obj)
if isinstance(obj, list): if isinstance(obj, list):
n = len(obj) self._batch_appends(iter(obj))
if n > 1:
write(MARK)
for x in obj:
save(x)
write(APPENDS)
elif n == 1:
save(obj[0])
write(APPEND)
elif isinstance(obj, dict): elif isinstance(obj, dict):
n = len(obj) self._batch_setitems(obj.iteritems())
if n > 1:
write(MARK)
for k, v in obj.iteritems():
save(k)
save(v)
write(SETITEMS)
elif n == 1:
k, v = obj.items()[0]
save(k)
save(v)
write(SETITEM)
getstate = getattr(obj, "__getstate__", None) getstate = getattr(obj, "__getstate__", None)
@ -683,63 +671,100 @@ class Pickler:
def save_list(self, obj): def save_list(self, obj):
write = self.write write = self.write
save = self.save
if self.bin: if self.bin:
write(EMPTY_LIST) write(EMPTY_LIST)
self.memoize(obj) else: # proto 0 -- can't use EMPTY_LIST
n = len(obj)
if n > 1:
write(MARK)
for element in obj:
save(element)
write(APPENDS)
elif n:
assert n == 1
save(obj[0])
write(APPEND)
# else the list is empty, and we're already done
else: # proto 0 -- can't use EMPTY_LIST or APPENDS
write(MARK + LIST) write(MARK + LIST)
self.memoize(obj)
for element in obj: self.memoize(obj)
save(element) self._batch_appends(iter(obj))
write(APPEND)
dispatch[ListType] = save_list dispatch[ListType] = save_list
_BATCHSIZE = 1000
def _batch_appends(self, items):
# Helper to batch up APPENDS sequences
save = self.save
write = self.write
if not self.bin:
for x in items:
save(x)
write(APPEND)
return
r = xrange(self._BATCHSIZE)
while items is not None:
tmp = []
for i in r:
try:
tmp.append(items.next())
except StopIteration:
items = None
break
n = len(tmp)
if n > 1:
write(MARK)
for x in tmp:
save(x)
write(APPENDS)
elif n:
save(tmp[0])
write(APPEND)
# else tmp is empty, and we're done
def save_dict(self, obj): def save_dict(self, obj):
write = self.write write = self.write
save = self.save
items = obj.iteritems()
if self.bin: if self.bin:
write(EMPTY_DICT) write(EMPTY_DICT)
self.memoize(obj) else: # proto 0 -- can't use EMPTY_DICT
if len(obj) > 1:
write(MARK)
for key, value in items:
save(key)
save(value)
write(SETITEMS)
return
# else (dict is empty or a singleton), fall through to the
# SETITEM code at the end
else: # proto 0 -- can't use EMPTY_DICT or SETITEMS
write(MARK + DICT) write(MARK + DICT)
self.memoize(obj)
# proto 0 or len(obj) < 2 self.memoize(obj)
for key, value in items: self._batch_setitems(obj.iteritems())
save(key)
save(value)
write(SETITEM)
dispatch[DictionaryType] = save_dict dispatch[DictionaryType] = save_dict
if not PyStringMap is None: if not PyStringMap is None:
dispatch[PyStringMap] = save_dict dispatch[PyStringMap] = save_dict
def _batch_setitems(self, items):
# Helper to batch up SETITEMS sequences; proto >= 1 only
save = self.save
write = self.write
if not self.bin:
for k, v in items:
save(k)
save(v)
write(SETITEM)
return
r = xrange(self._BATCHSIZE)
while items is not None:
tmp = []
for i in r:
try:
tmp.append(items.next())
except StopIteration:
items = None
break
n = len(tmp)
if n > 1:
write(MARK)
for k, v in tmp:
save(k)
save(v)
write(SETITEMS)
elif n:
k, v = tmp[0]
save(k)
save(v)
write(SETITEM)
# else tmp is empty, and we're done
def save_inst(self, obj): def save_inst(self, obj):
cls = obj.__class__ cls = obj.__class__