Complete an open todo on pickletools -- add a pickle optimizer.

This commit is contained in:
Raymond Hettinger 2008-02-10 20:35:16 +00:00
parent 900b783526
commit da614dcc4f
3 changed files with 38 additions and 3 deletions

View File

@ -35,3 +35,10 @@ probably won't find the :mod:`pickletools` module relevant.
the opcode's argument; *pos* is the position at which this opcode is located.
*pickle* can be a string or a file-like object.
.. function:: optimize(picklestring)
Returns a new equivalent pickle string after eliminating unused ``PUT``
opcodes. The optimized pickle is shorter, takes less transmission time,
requires less storage space, and unpickles more efficiently.
.. versionadded:: 2.6

View File

@ -10,9 +10,7 @@ dis(pickle, out=None, memo=None, indentlevel=4)
Print a symbolic disassembly of a pickle.
'''
__all__ = ['dis',
'genops',
]
__all__ = ['dis', 'genops', 'optimize']
# Other ideas:
#
@ -1857,6 +1855,33 @@ def genops(pickle):
assert opcode.name == 'STOP'
break
##############################################################################
# A pickle optimizer.
def optimize(p):
'Optimize a pickle string by removing unused PUT opcodes'
gets = set() # set of args used by a GET opcode
puts = [] # (arg, startpos, stoppos) for the PUT opcodes
prevpos = None # set to pos if previous opcode was a PUT
for opcode, arg, pos in genops(p):
if prevpos is not None:
puts.append((prevarg, prevpos, pos))
prevpos = None
if 'PUT' in opcode.name:
prevarg, prevpos = arg, pos
elif 'GET' in opcode.name:
gets.add(arg)
# Copy the pickle string except for PUTS without a corresponding GET
s = []
i = 0
for arg, start, stop in puts:
j = stop if (arg in gets) else start
s.append(p[i:j])
i = stop
s.append(p[i:])
return ''.join(s)
##############################################################################
# A symbolic pickle disassembler.

View File

@ -400,6 +400,9 @@ Core and builtins
Library
-------
- The pickletools module now provides an optimize() function
that eliminates unused PUT opcodes from a pickle string.
- #2021: Allow tempfile.NamedTemporaryFile and SpooledTemporaryFile
to be used in with statements by correctly supporting the context
management protocol.