diff --git a/Doc/library/pickletools.rst b/Doc/library/pickletools.rst index a19b97838e4..b07e3bdd84f 100644 --- a/Doc/library/pickletools.rst +++ b/Doc/library/pickletools.rst @@ -35,3 +35,10 @@ probably won't find the :mod:`pickletools` module relevant. the opcode's argument; *pos* is the position at which this opcode is located. *pickle* can be a string or a file-like object. +.. function:: optimize(picklestring) + + Returns a new equivalent pickle string after eliminating unused ``PUT`` + opcodes. The optimized pickle is shorter, takes less transmission time, + requires less storage space, and unpickles more efficiently. + + .. versionadded:: 2.6 diff --git a/Lib/pickletools.py b/Lib/pickletools.py index 98f80f1ce30..ae02a36103a 100644 --- a/Lib/pickletools.py +++ b/Lib/pickletools.py @@ -10,9 +10,7 @@ dis(pickle, out=None, memo=None, indentlevel=4) Print a symbolic disassembly of a pickle. ''' -__all__ = ['dis', - 'genops', - ] +__all__ = ['dis', 'genops', 'optimize'] # Other ideas: # @@ -1857,6 +1855,33 @@ def genops(pickle): assert opcode.name == 'STOP' break +############################################################################## +# A pickle optimizer. + +def optimize(p): + 'Optimize a pickle string by removing unused PUT opcodes' + gets = set() # set of args used by a GET opcode + puts = [] # (arg, startpos, stoppos) for the PUT opcodes + prevpos = None # set to pos if previous opcode was a PUT + for opcode, arg, pos in genops(p): + if prevpos is not None: + puts.append((prevarg, prevpos, pos)) + prevpos = None + if 'PUT' in opcode.name: + prevarg, prevpos = arg, pos + elif 'GET' in opcode.name: + gets.add(arg) + + # Copy the pickle string except for PUTS without a corresponding GET + s = [] + i = 0 + for arg, start, stop in puts: + j = stop if (arg in gets) else start + s.append(p[i:j]) + i = stop + s.append(p[i:]) + return ''.join(s) + ############################################################################## # A symbolic pickle disassembler. diff --git a/Misc/NEWS b/Misc/NEWS index 96ea7d7d62e..c519ba664e2 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -400,6 +400,9 @@ Core and builtins Library ------- +- The pickletools module now provides an optimize() function + that eliminates unused PUT opcodes from a pickle string. + - #2021: Allow tempfile.NamedTemporaryFile and SpooledTemporaryFile to be used in with statements by correctly supporting the context management protocol.