Issue #19858: pickletools.optimize() now aware of the MEMOIZE opcode, can
produce more compact result and no longer produces invalid output if input data contains MEMOIZE opcodes together with PUT or BINPUT opcodes.
This commit is contained in:
commit
f5553bbb0e
|
@ -2282,40 +2282,61 @@ def genops(pickle):
|
||||||
|
|
||||||
def optimize(p):
|
def optimize(p):
|
||||||
'Optimize a pickle string by removing unused PUT opcodes'
|
'Optimize a pickle string by removing unused PUT opcodes'
|
||||||
not_a_put = object()
|
put = 'PUT'
|
||||||
gets = { not_a_put } # set of args used by a GET opcode
|
get = 'GET'
|
||||||
opcodes = [] # (startpos, stoppos, putid)
|
oldids = set() # set of all PUT ids
|
||||||
|
newids = {} # set of ids used by a GET opcode
|
||||||
|
opcodes = [] # (op, idx) or (pos, end_pos)
|
||||||
proto = 0
|
proto = 0
|
||||||
|
protoheader = b''
|
||||||
for opcode, arg, pos, end_pos in _genops(p, yield_end_pos=True):
|
for opcode, arg, pos, end_pos in _genops(p, yield_end_pos=True):
|
||||||
if 'PUT' in opcode.name:
|
if 'PUT' in opcode.name:
|
||||||
opcodes.append((pos, end_pos, arg))
|
oldids.add(arg)
|
||||||
|
opcodes.append((put, arg))
|
||||||
|
elif opcode.name == 'MEMOIZE':
|
||||||
|
idx = len(oldids)
|
||||||
|
oldids.add(idx)
|
||||||
|
opcodes.append((put, idx))
|
||||||
elif 'FRAME' in opcode.name:
|
elif 'FRAME' in opcode.name:
|
||||||
pass
|
pass
|
||||||
else:
|
elif 'GET' in opcode.name:
|
||||||
if 'GET' in opcode.name:
|
if opcode.proto > proto:
|
||||||
gets.add(arg)
|
proto = opcode.proto
|
||||||
elif opcode.name == 'PROTO':
|
newids[arg] = None
|
||||||
assert pos == 0, pos
|
opcodes.append((get, arg))
|
||||||
|
elif opcode.name == 'PROTO':
|
||||||
|
if arg > proto:
|
||||||
proto = arg
|
proto = arg
|
||||||
opcodes.append((pos, end_pos, not_a_put))
|
if pos == 0:
|
||||||
prevpos, prevarg = pos, None
|
protoheader = p[pos: end_pos]
|
||||||
|
else:
|
||||||
|
opcodes.append((pos, end_pos))
|
||||||
|
else:
|
||||||
|
opcodes.append((pos, end_pos))
|
||||||
|
del oldids
|
||||||
|
|
||||||
# Copy the opcodes except for PUTS without a corresponding GET
|
# Copy the opcodes except for PUTS without a corresponding GET
|
||||||
out = io.BytesIO()
|
out = io.BytesIO()
|
||||||
opcodes = iter(opcodes)
|
# Write the PROTO header before any framing
|
||||||
if proto >= 2:
|
out.write(protoheader)
|
||||||
# Write the PROTO header before any framing
|
pickler = pickle._Pickler(out, proto)
|
||||||
start, stop, _ = next(opcodes)
|
|
||||||
out.write(p[start:stop])
|
|
||||||
buf = pickle._Framer(out.write)
|
|
||||||
if proto >= 4:
|
if proto >= 4:
|
||||||
buf.start_framing()
|
pickler.framer.start_framing()
|
||||||
for start, stop, putid in opcodes:
|
idx = 0
|
||||||
if putid in gets:
|
for op, arg in opcodes:
|
||||||
buf.commit_frame()
|
if op is put:
|
||||||
buf.write(p[start:stop])
|
if arg not in newids:
|
||||||
if proto >= 4:
|
continue
|
||||||
buf.end_framing()
|
data = pickler.put(idx)
|
||||||
|
newids[arg] = idx
|
||||||
|
idx += 1
|
||||||
|
elif op is get:
|
||||||
|
data = pickler.get(newids[arg])
|
||||||
|
else:
|
||||||
|
data = p[op:arg]
|
||||||
|
pickler.framer.commit_frame()
|
||||||
|
pickler.write(data)
|
||||||
|
pickler.framer.end_framing()
|
||||||
return out.getvalue()
|
return out.getvalue()
|
||||||
|
|
||||||
##############################################################################
|
##############################################################################
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
import struct
|
||||||
import pickle
|
import pickle
|
||||||
import pickletools
|
import pickletools
|
||||||
from test import support
|
from test import support
|
||||||
|
@ -15,6 +16,48 @@ class OptimizedPickleTests(AbstractPickleTests, AbstractPickleModuleTests):
|
||||||
# Test relies on precise output of dumps()
|
# Test relies on precise output of dumps()
|
||||||
test_pickle_to_2x = None
|
test_pickle_to_2x = None
|
||||||
|
|
||||||
|
def test_optimize_long_binget(self):
|
||||||
|
data = [str(i) for i in range(257)]
|
||||||
|
data.append(data[-1])
|
||||||
|
for proto in range(pickle.HIGHEST_PROTOCOL + 1):
|
||||||
|
pickled = pickle.dumps(data, proto)
|
||||||
|
unpickled = pickle.loads(pickled)
|
||||||
|
self.assertEqual(unpickled, data)
|
||||||
|
self.assertIs(unpickled[-1], unpickled[-2])
|
||||||
|
|
||||||
|
pickled2 = pickletools.optimize(pickled)
|
||||||
|
unpickled2 = pickle.loads(pickled2)
|
||||||
|
self.assertEqual(unpickled2, data)
|
||||||
|
self.assertIs(unpickled2[-1], unpickled2[-2])
|
||||||
|
self.assertNotIn(pickle.LONG_BINGET, pickled2)
|
||||||
|
self.assertNotIn(pickle.LONG_BINPUT, pickled2)
|
||||||
|
|
||||||
|
def test_optimize_binput_and_memoize(self):
|
||||||
|
pickled = (b'\x80\x04\x95\x15\x00\x00\x00\x00\x00\x00\x00'
|
||||||
|
b']\x94(\x8c\x04spamq\x01\x8c\x03ham\x94h\x02e.')
|
||||||
|
# 0: \x80 PROTO 4
|
||||||
|
# 2: \x95 FRAME 21
|
||||||
|
# 11: ] EMPTY_LIST
|
||||||
|
# 12: \x94 MEMOIZE
|
||||||
|
# 13: ( MARK
|
||||||
|
# 14: \x8c SHORT_BINUNICODE 'spam'
|
||||||
|
# 20: q BINPUT 1
|
||||||
|
# 22: \x8c SHORT_BINUNICODE 'ham'
|
||||||
|
# 27: \x94 MEMOIZE
|
||||||
|
# 28: h BINGET 2
|
||||||
|
# 30: e APPENDS (MARK at 13)
|
||||||
|
# 31: . STOP
|
||||||
|
self.assertIn(pickle.BINPUT, pickled)
|
||||||
|
unpickled = pickle.loads(pickled)
|
||||||
|
self.assertEqual(unpickled, ['spam', 'ham', 'ham'])
|
||||||
|
self.assertIs(unpickled[1], unpickled[2])
|
||||||
|
|
||||||
|
pickled2 = pickletools.optimize(pickled)
|
||||||
|
unpickled2 = pickle.loads(pickled2)
|
||||||
|
self.assertEqual(unpickled2, ['spam', 'ham', 'ham'])
|
||||||
|
self.assertIs(unpickled2[1], unpickled2[2])
|
||||||
|
self.assertNotIn(pickle.BINPUT, pickled2)
|
||||||
|
|
||||||
|
|
||||||
def test_main():
|
def test_main():
|
||||||
support.run_unittest(OptimizedPickleTests)
|
support.run_unittest(OptimizedPickleTests)
|
||||||
|
|
|
@ -196,6 +196,10 @@ Core and Builtins
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Issue #19858: pickletools.optimize() now aware of the MEMOIZE opcode, can
|
||||||
|
produce more compact result and no longer produces invalid output if input
|
||||||
|
data contains MEMOIZE opcodes together with PUT or BINPUT opcodes.
|
||||||
|
|
||||||
- Issue #22095: Fixed HTTPConnection.set_tunnel with default port. The port
|
- Issue #22095: Fixed HTTPConnection.set_tunnel with default port. The port
|
||||||
value in the host header was set to "None". Patch by Demian Brecht.
|
value in the host header was set to "None". Patch by Demian Brecht.
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue