Removed the new LONG2 opcode: it's extravagant. If LONG1 isn't enough,

then the embedded argument consumes at least 256 bytes.  The difference
between a 3-byte prefix (LONG2 + 2 bytes) and a 5-byte prefix (LONG4 +
4 bytes) is at worst less than 1%.  Note that binary strings and binary
Unicode strings also have only "size is 1 byte, or size is 4 bytes?"
flavors, and I expect for the same reason.  The only place a 2-byte
thingie was used was in BININT2, where the 2 bytes make up the *entire*
embedded argument (and now EXT2 also does this); that's a large savings
over 4 bytes, because the total opcode+argument size is so small in
the BININT2/EXT2 case.

Removed the TAKEN_FROM_ARGUMENT "number of bytes" code, and bifurcated it
into TAKEN_FROM_ARGUMENT1 and TAKEN_FROM_ARGUMENT4.  Now there's enough
info in ArgumentDescriptor objects to deduce the # of bytes consumed by
each opcode.

Rearranged the order in which proto2 opcodes are listed in pickle.py.
This commit is contained in:
Tim Peters 2003-01-28 00:13:19 +00:00
parent bdbe74183c
commit fdb8cfab08
2 changed files with 19 additions and 58 deletions

View File

@ -135,19 +135,18 @@ FALSE = 'I00\n' # not an opcode; see INT docs in pickletools.py
# Protocol 2 (not yet implemented) (XXX comments will be added later) # Protocol 2 (not yet implemented) (XXX comments will be added later)
NEWOBJ = '\x81'
PROTO = '\x80' PROTO = '\x80'
EXT2 = '\x83' NEWOBJ = '\x81'
EXT1 = '\x82' EXT1 = '\x82'
TUPLE1 = '\x85' EXT2 = '\x83'
EXT4 = '\x84' EXT4 = '\x84'
TUPLE3 = '\x87' TUPLE1 = '\x85'
TUPLE2 = '\x86' TUPLE2 = '\x86'
NEWFALSE = '\x89' TUPLE3 = '\x87'
NEWTRUE = '\x88' NEWTRUE = '\x88'
LONG2 = '\x8b' NEWFALSE = '\x89'
LONG1 = '\x8a' LONG1 = '\x8a'
LONG4 = '\x8c' LONG4 = '\x8b'
__all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$",x)]) __all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$",x)])

View File

@ -125,7 +125,8 @@ UP_TO_NEWLINE = -1
# Represents the number of bytes consumed by a two-argument opcode where # Represents the number of bytes consumed by a two-argument opcode where
# the first argument gives the number of bytes in the second argument. # the first argument gives the number of bytes in the second argument.
TAKEN_FROM_ARGUMENT = -2 TAKEN_FROM_ARGUMENT1 = -2 # num bytes is 1-byte unsigned int
TAKEN_FROM_ARGUMENT4 = -3 # num bytes is 4-byte signed little-endian int
class ArgumentDescriptor(object): class ArgumentDescriptor(object):
__slots__ = ( __slots__ = (
@ -133,7 +134,8 @@ class ArgumentDescriptor(object):
'name', 'name',
# length of argument, in bytes; an int; UP_TO_NEWLINE and # length of argument, in bytes; an int; UP_TO_NEWLINE and
# TAKEN_FROM_ARGUMENT are negative values for variable-length cases # TAKEN_FROM_ARGUMENT{1,4} are negative values for variable-length
# cases
'n', 'n',
# a function taking a file-like object, reading this kind of argument # a function taking a file-like object, reading this kind of argument
@ -150,8 +152,9 @@ class ArgumentDescriptor(object):
self.name = name self.name = name
assert isinstance(n, int) and (n >= 0 or assert isinstance(n, int) and (n >= 0 or
n is UP_TO_NEWLINE or n in (UP_TO_NEWLINE,
n is TAKEN_FROM_ARGUMENT) TAKEN_FROM_ARGUMENT1,
TAKEN_FROM_ARGUMENT4))
self.n = n self.n = n
self.reader = reader self.reader = reader
@ -341,7 +344,7 @@ def read_string4(f):
string4 = ArgumentDescriptor( string4 = ArgumentDescriptor(
name="string4", name="string4",
n=TAKEN_FROM_ARGUMENT, n=TAKEN_FROM_ARGUMENT4,
reader=read_string4, reader=read_string4,
doc="""A counted string. doc="""A counted string.
@ -370,7 +373,7 @@ def read_string1(f):
string1 = ArgumentDescriptor( string1 = ArgumentDescriptor(
name="string1", name="string1",
n=TAKEN_FROM_ARGUMENT, n=TAKEN_FROM_ARGUMENT1,
reader=read_string1, reader=read_string1,
doc="""A counted string. doc="""A counted string.
@ -434,7 +437,7 @@ def read_unicodestring4(f):
unicodestring4 = ArgumentDescriptor( unicodestring4 = ArgumentDescriptor(
name="unicodestring4", name="unicodestring4",
n=TAKEN_FROM_ARGUMENT, n=TAKEN_FROM_ARGUMENT4,
reader=read_unicodestring4, reader=read_unicodestring4,
doc="""A counted Unicode string. doc="""A counted Unicode string.
@ -626,7 +629,7 @@ def read_long1(f):
long1 = ArgumentDescriptor( long1 = ArgumentDescriptor(
name="long1", name="long1",
n=TAKEN_FROM_ARGUMENT, n=TAKEN_FROM_ARGUMENT1,
reader=read_long1, reader=read_long1,
doc="""A binary long, little-endian, using 1-byte size. doc="""A binary long, little-endian, using 1-byte size.
@ -634,36 +637,6 @@ long1 = ArgumentDescriptor(
many bytes and interprets them as a little-endian 2's-complement long. many bytes and interprets them as a little-endian 2's-complement long.
""") """)
def read_long2(f):
r"""
>>> import StringIO
>>> read_long2(StringIO.StringIO("\x02\x00\xff\x00"))
255L
>>> read_long2(StringIO.StringIO("\x02\x00\xff\x7f"))
32767L
>>> read_long2(StringIO.StringIO("\x02\x00\x00\xff"))
-256L
>>> read_long2(StringIO.StringIO("\x02\x00\x00\x80"))
-32768L
>>>
"""
n = read_uint2(f)
data = f.read(n)
if len(data) != n:
raise ValueError("not enough data in stream to read long2")
return decode_long(data)
long2 = ArgumentDescriptor(
name="long2",
n=TAKEN_FROM_ARGUMENT,
reader=read_long2,
doc="""A binary long, little-endian, using 2-byte size.
This first reads two byte as an unsigned size, then reads that
many bytes and interprets them as a little-endian 2's-complement long.
""")
def read_long4(f): def read_long4(f):
r""" r"""
>>> import StringIO >>> import StringIO
@ -688,7 +661,7 @@ def read_long4(f):
long4 = ArgumentDescriptor( long4 = ArgumentDescriptor(
name="long4", name="long4",
n=TAKEN_FROM_ARGUMENT, n=TAKEN_FROM_ARGUMENT4,
reader=read_long4, reader=read_long4,
doc="""A binary representation of a long, little-endian. doc="""A binary representation of a long, little-endian.
@ -1705,19 +1678,8 @@ opcodes = [
A more efficient encoding of a Python long; the long1 encoding A more efficient encoding of a Python long; the long1 encoding
says it all."""), says it all."""),
I(name="LONG2",
code='\x8b',
arg=long2,
stack_before=[],
stack_after=[pylong],
proto=2,
doc="""Long integer using two-byte length.
A more efficient encoding of a Python long; the long2 encoding
says it all."""),
I(name="LONG4", I(name="LONG4",
code='\x8c', code='\x8b',
arg=long4, arg=long4,
stack_before=[], stack_before=[],
stack_after=[pylong], stack_after=[pylong],