Issue #19858: pickletools.optimize() now aware of the MEMOIZE opcode, can

produce more compact result and no longer produces invalid output if input
data contains MEMOIZE opcodes together with PUT or BINPUT opcodes.
This commit is contained in:
Serhiy Storchaka 2014-12-16 18:02:49 +02:00
commit f5553bbb0e
3 changed files with 92 additions and 24 deletions

View File

@ -2282,40 +2282,61 @@ def genops(pickle):
def optimize(p):
'Optimize a pickle string by removing unused PUT opcodes'
not_a_put = object()
gets = { not_a_put } # set of args used by a GET opcode
opcodes = [] # (startpos, stoppos, putid)
put = 'PUT'
get = 'GET'
oldids = set() # set of all PUT ids
newids = {} # set of ids used by a GET opcode
opcodes = [] # (op, idx) or (pos, end_pos)
proto = 0
protoheader = b''
for opcode, arg, pos, end_pos in _genops(p, yield_end_pos=True):
if 'PUT' in opcode.name:
opcodes.append((pos, end_pos, arg))
oldids.add(arg)
opcodes.append((put, arg))
elif opcode.name == 'MEMOIZE':
idx = len(oldids)
oldids.add(idx)
opcodes.append((put, idx))
elif 'FRAME' in opcode.name:
pass
else:
if 'GET' in opcode.name:
gets.add(arg)
elif opcode.name == 'PROTO':
assert pos == 0, pos
elif 'GET' in opcode.name:
if opcode.proto > proto:
proto = opcode.proto
newids[arg] = None
opcodes.append((get, arg))
elif opcode.name == 'PROTO':
if arg > proto:
proto = arg
opcodes.append((pos, end_pos, not_a_put))
prevpos, prevarg = pos, None
if pos == 0:
protoheader = p[pos: end_pos]
else:
opcodes.append((pos, end_pos))
else:
opcodes.append((pos, end_pos))
del oldids
# Copy the opcodes except for PUTS without a corresponding GET
out = io.BytesIO()
opcodes = iter(opcodes)
if proto >= 2:
# Write the PROTO header before any framing
start, stop, _ = next(opcodes)
out.write(p[start:stop])
buf = pickle._Framer(out.write)
# Write the PROTO header before any framing
out.write(protoheader)
pickler = pickle._Pickler(out, proto)
if proto >= 4:
buf.start_framing()
for start, stop, putid in opcodes:
if putid in gets:
buf.commit_frame()
buf.write(p[start:stop])
if proto >= 4:
buf.end_framing()
pickler.framer.start_framing()
idx = 0
for op, arg in opcodes:
if op is put:
if arg not in newids:
continue
data = pickler.put(idx)
newids[arg] = idx
idx += 1
elif op is get:
data = pickler.get(newids[arg])
else:
data = p[op:arg]
pickler.framer.commit_frame()
pickler.write(data)
pickler.framer.end_framing()
return out.getvalue()
##############################################################################

View File

@ -1,3 +1,4 @@
import struct
import pickle
import pickletools
from test import support
@ -15,6 +16,48 @@ class OptimizedPickleTests(AbstractPickleTests, AbstractPickleModuleTests):
# Test relies on precise output of dumps()
test_pickle_to_2x = None
def test_optimize_long_binget(self):
data = [str(i) for i in range(257)]
data.append(data[-1])
for proto in range(pickle.HIGHEST_PROTOCOL + 1):
pickled = pickle.dumps(data, proto)
unpickled = pickle.loads(pickled)
self.assertEqual(unpickled, data)
self.assertIs(unpickled[-1], unpickled[-2])
pickled2 = pickletools.optimize(pickled)
unpickled2 = pickle.loads(pickled2)
self.assertEqual(unpickled2, data)
self.assertIs(unpickled2[-1], unpickled2[-2])
self.assertNotIn(pickle.LONG_BINGET, pickled2)
self.assertNotIn(pickle.LONG_BINPUT, pickled2)
def test_optimize_binput_and_memoize(self):
pickled = (b'\x80\x04\x95\x15\x00\x00\x00\x00\x00\x00\x00'
b']\x94(\x8c\x04spamq\x01\x8c\x03ham\x94h\x02e.')
# 0: \x80 PROTO 4
# 2: \x95 FRAME 21
# 11: ] EMPTY_LIST
# 12: \x94 MEMOIZE
# 13: ( MARK
# 14: \x8c SHORT_BINUNICODE 'spam'
# 20: q BINPUT 1
# 22: \x8c SHORT_BINUNICODE 'ham'
# 27: \x94 MEMOIZE
# 28: h BINGET 2
# 30: e APPENDS (MARK at 13)
# 31: . STOP
self.assertIn(pickle.BINPUT, pickled)
unpickled = pickle.loads(pickled)
self.assertEqual(unpickled, ['spam', 'ham', 'ham'])
self.assertIs(unpickled[1], unpickled[2])
pickled2 = pickletools.optimize(pickled)
unpickled2 = pickle.loads(pickled2)
self.assertEqual(unpickled2, ['spam', 'ham', 'ham'])
self.assertIs(unpickled2[1], unpickled2[2])
self.assertNotIn(pickle.BINPUT, pickled2)
def test_main():
support.run_unittest(OptimizedPickleTests)

View File

@ -196,6 +196,10 @@ Core and Builtins
Library
-------
- Issue #19858: pickletools.optimize() now aware of the MEMOIZE opcode, can
produce more compact result and no longer produces invalid output if input
data contains MEMOIZE opcodes together with PUT or BINPUT opcodes.
- Issue #22095: Fixed HTTPConnection.set_tunnel with default port. The port
value in the host header was set to "None". Patch by Demian Brecht.