Removed the new LONG2 opcode: it's extravagant. If LONG1 isn't enough,
then the embedded argument consumes at least 256 bytes. The difference between a 3-byte prefix (LONG2 + 2 bytes) and a 5-byte prefix (LONG4 + 4 bytes) is at worst less than 1%. Note that binary strings and binary Unicode strings also have only "size is 1 byte, or size is 4 bytes?" flavors, and I expect for the same reason. The only place a 2-byte thingie was used was in BININT2, where the 2 bytes make up the *entire* embedded argument (and now EXT2 also does this); that's a large savings over 4 bytes, because the total opcode+argument size is so small in the BININT2/EXT2 case. Removed the TAKEN_FROM_ARGUMENT "number of bytes" code, and bifurcated it into TAKEN_FROM_ARGUMENT1 and TAKEN_FROM_ARGUMENT4. Now there's enough info in ArgumentDescriptor objects to deduce the # of bytes consumed by each opcode. Rearranged the order in which proto2 opcodes are listed in pickle.py.
This commit is contained in:
parent
bdbe74183c
commit
fdb8cfab08
|
@ -135,19 +135,18 @@ FALSE = 'I00\n' # not an opcode; see INT docs in pickletools.py
|
||||||
|
|
||||||
# Protocol 2 (not yet implemented) (XXX comments will be added later)
|
# Protocol 2 (not yet implemented) (XXX comments will be added later)
|
||||||
|
|
||||||
NEWOBJ = '\x81'
|
|
||||||
PROTO = '\x80'
|
PROTO = '\x80'
|
||||||
EXT2 = '\x83'
|
NEWOBJ = '\x81'
|
||||||
EXT1 = '\x82'
|
EXT1 = '\x82'
|
||||||
TUPLE1 = '\x85'
|
EXT2 = '\x83'
|
||||||
EXT4 = '\x84'
|
EXT4 = '\x84'
|
||||||
TUPLE3 = '\x87'
|
TUPLE1 = '\x85'
|
||||||
TUPLE2 = '\x86'
|
TUPLE2 = '\x86'
|
||||||
NEWFALSE = '\x89'
|
TUPLE3 = '\x87'
|
||||||
NEWTRUE = '\x88'
|
NEWTRUE = '\x88'
|
||||||
LONG2 = '\x8b'
|
NEWFALSE = '\x89'
|
||||||
LONG1 = '\x8a'
|
LONG1 = '\x8a'
|
||||||
LONG4 = '\x8c'
|
LONG4 = '\x8b'
|
||||||
|
|
||||||
|
|
||||||
__all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$",x)])
|
__all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$",x)])
|
||||||
|
|
|
@ -125,7 +125,8 @@ UP_TO_NEWLINE = -1
|
||||||
|
|
||||||
# Represents the number of bytes consumed by a two-argument opcode where
|
# Represents the number of bytes consumed by a two-argument opcode where
|
||||||
# the first argument gives the number of bytes in the second argument.
|
# the first argument gives the number of bytes in the second argument.
|
||||||
TAKEN_FROM_ARGUMENT = -2
|
TAKEN_FROM_ARGUMENT1 = -2 # num bytes is 1-byte unsigned int
|
||||||
|
TAKEN_FROM_ARGUMENT4 = -3 # num bytes is 4-byte signed little-endian int
|
||||||
|
|
||||||
class ArgumentDescriptor(object):
|
class ArgumentDescriptor(object):
|
||||||
__slots__ = (
|
__slots__ = (
|
||||||
|
@ -133,7 +134,8 @@ class ArgumentDescriptor(object):
|
||||||
'name',
|
'name',
|
||||||
|
|
||||||
# length of argument, in bytes; an int; UP_TO_NEWLINE and
|
# length of argument, in bytes; an int; UP_TO_NEWLINE and
|
||||||
# TAKEN_FROM_ARGUMENT are negative values for variable-length cases
|
# TAKEN_FROM_ARGUMENT{1,4} are negative values for variable-length
|
||||||
|
# cases
|
||||||
'n',
|
'n',
|
||||||
|
|
||||||
# a function taking a file-like object, reading this kind of argument
|
# a function taking a file-like object, reading this kind of argument
|
||||||
|
@ -150,8 +152,9 @@ class ArgumentDescriptor(object):
|
||||||
self.name = name
|
self.name = name
|
||||||
|
|
||||||
assert isinstance(n, int) and (n >= 0 or
|
assert isinstance(n, int) and (n >= 0 or
|
||||||
n is UP_TO_NEWLINE or
|
n in (UP_TO_NEWLINE,
|
||||||
n is TAKEN_FROM_ARGUMENT)
|
TAKEN_FROM_ARGUMENT1,
|
||||||
|
TAKEN_FROM_ARGUMENT4))
|
||||||
self.n = n
|
self.n = n
|
||||||
|
|
||||||
self.reader = reader
|
self.reader = reader
|
||||||
|
@ -341,7 +344,7 @@ def read_string4(f):
|
||||||
|
|
||||||
string4 = ArgumentDescriptor(
|
string4 = ArgumentDescriptor(
|
||||||
name="string4",
|
name="string4",
|
||||||
n=TAKEN_FROM_ARGUMENT,
|
n=TAKEN_FROM_ARGUMENT4,
|
||||||
reader=read_string4,
|
reader=read_string4,
|
||||||
doc="""A counted string.
|
doc="""A counted string.
|
||||||
|
|
||||||
|
@ -370,7 +373,7 @@ def read_string1(f):
|
||||||
|
|
||||||
string1 = ArgumentDescriptor(
|
string1 = ArgumentDescriptor(
|
||||||
name="string1",
|
name="string1",
|
||||||
n=TAKEN_FROM_ARGUMENT,
|
n=TAKEN_FROM_ARGUMENT1,
|
||||||
reader=read_string1,
|
reader=read_string1,
|
||||||
doc="""A counted string.
|
doc="""A counted string.
|
||||||
|
|
||||||
|
@ -434,7 +437,7 @@ def read_unicodestring4(f):
|
||||||
|
|
||||||
unicodestring4 = ArgumentDescriptor(
|
unicodestring4 = ArgumentDescriptor(
|
||||||
name="unicodestring4",
|
name="unicodestring4",
|
||||||
n=TAKEN_FROM_ARGUMENT,
|
n=TAKEN_FROM_ARGUMENT4,
|
||||||
reader=read_unicodestring4,
|
reader=read_unicodestring4,
|
||||||
doc="""A counted Unicode string.
|
doc="""A counted Unicode string.
|
||||||
|
|
||||||
|
@ -626,7 +629,7 @@ def read_long1(f):
|
||||||
|
|
||||||
long1 = ArgumentDescriptor(
|
long1 = ArgumentDescriptor(
|
||||||
name="long1",
|
name="long1",
|
||||||
n=TAKEN_FROM_ARGUMENT,
|
n=TAKEN_FROM_ARGUMENT1,
|
||||||
reader=read_long1,
|
reader=read_long1,
|
||||||
doc="""A binary long, little-endian, using 1-byte size.
|
doc="""A binary long, little-endian, using 1-byte size.
|
||||||
|
|
||||||
|
@ -634,36 +637,6 @@ long1 = ArgumentDescriptor(
|
||||||
many bytes and interprets them as a little-endian 2's-complement long.
|
many bytes and interprets them as a little-endian 2's-complement long.
|
||||||
""")
|
""")
|
||||||
|
|
||||||
def read_long2(f):
|
|
||||||
r"""
|
|
||||||
>>> import StringIO
|
|
||||||
>>> read_long2(StringIO.StringIO("\x02\x00\xff\x00"))
|
|
||||||
255L
|
|
||||||
>>> read_long2(StringIO.StringIO("\x02\x00\xff\x7f"))
|
|
||||||
32767L
|
|
||||||
>>> read_long2(StringIO.StringIO("\x02\x00\x00\xff"))
|
|
||||||
-256L
|
|
||||||
>>> read_long2(StringIO.StringIO("\x02\x00\x00\x80"))
|
|
||||||
-32768L
|
|
||||||
>>>
|
|
||||||
"""
|
|
||||||
|
|
||||||
n = read_uint2(f)
|
|
||||||
data = f.read(n)
|
|
||||||
if len(data) != n:
|
|
||||||
raise ValueError("not enough data in stream to read long2")
|
|
||||||
return decode_long(data)
|
|
||||||
|
|
||||||
long2 = ArgumentDescriptor(
|
|
||||||
name="long2",
|
|
||||||
n=TAKEN_FROM_ARGUMENT,
|
|
||||||
reader=read_long2,
|
|
||||||
doc="""A binary long, little-endian, using 2-byte size.
|
|
||||||
|
|
||||||
This first reads two byte as an unsigned size, then reads that
|
|
||||||
many bytes and interprets them as a little-endian 2's-complement long.
|
|
||||||
""")
|
|
||||||
|
|
||||||
def read_long4(f):
|
def read_long4(f):
|
||||||
r"""
|
r"""
|
||||||
>>> import StringIO
|
>>> import StringIO
|
||||||
|
@ -688,7 +661,7 @@ def read_long4(f):
|
||||||
|
|
||||||
long4 = ArgumentDescriptor(
|
long4 = ArgumentDescriptor(
|
||||||
name="long4",
|
name="long4",
|
||||||
n=TAKEN_FROM_ARGUMENT,
|
n=TAKEN_FROM_ARGUMENT4,
|
||||||
reader=read_long4,
|
reader=read_long4,
|
||||||
doc="""A binary representation of a long, little-endian.
|
doc="""A binary representation of a long, little-endian.
|
||||||
|
|
||||||
|
@ -1705,19 +1678,8 @@ opcodes = [
|
||||||
A more efficient encoding of a Python long; the long1 encoding
|
A more efficient encoding of a Python long; the long1 encoding
|
||||||
says it all."""),
|
says it all."""),
|
||||||
|
|
||||||
I(name="LONG2",
|
|
||||||
code='\x8b',
|
|
||||||
arg=long2,
|
|
||||||
stack_before=[],
|
|
||||||
stack_after=[pylong],
|
|
||||||
proto=2,
|
|
||||||
doc="""Long integer using two-byte length.
|
|
||||||
|
|
||||||
A more efficient encoding of a Python long; the long2 encoding
|
|
||||||
says it all."""),
|
|
||||||
|
|
||||||
I(name="LONG4",
|
I(name="LONG4",
|
||||||
code='\x8c',
|
code='\x8b',
|
||||||
arg=long4,
|
arg=long4,
|
||||||
stack_before=[],
|
stack_before=[],
|
||||||
stack_after=[pylong],
|
stack_after=[pylong],
|
||||||
|
|
Loading…
Reference in New Issue