Issue #16550: Update the opcode descriptions of pickletools to use unsigned

integers where appropriate.  Initial patch by Serhiy Storchaka.
This commit is contained in:
Alexandre Vassalotti 2013-04-14 03:30:35 -07:00
parent 5c4874f7a2
commit 8db89ca56c
2 changed files with 109 additions and 21 deletions

View File

@ -13,6 +13,7 @@ dis(pickle, out=None, memo=None, indentlevel=4)
import codecs import codecs
import pickle import pickle
import re import re
import sys
__all__ = ['dis', 'genops', 'optimize'] __all__ = ['dis', 'genops', 'optimize']
@ -165,8 +166,9 @@ UP_TO_NEWLINE = -1
# Represents the number of bytes consumed by a two-argument opcode where # Represents the number of bytes consumed by a two-argument opcode where
# the first argument gives the number of bytes in the second argument. # the first argument gives the number of bytes in the second argument.
TAKEN_FROM_ARGUMENT1 = -2 # num bytes is 1-byte unsigned int TAKEN_FROM_ARGUMENT1 = -2 # num bytes is 1-byte unsigned int
TAKEN_FROM_ARGUMENT4 = -3 # num bytes is 4-byte signed little-endian int TAKEN_FROM_ARGUMENT4 = -3 # num bytes is 4-byte signed little-endian int
TAKEN_FROM_ARGUMENT4U = -4 # num bytes is 4-byte unsigned little-endian int
class ArgumentDescriptor(object): class ArgumentDescriptor(object):
__slots__ = ( __slots__ = (
@ -194,7 +196,8 @@ class ArgumentDescriptor(object):
assert isinstance(n, int) and (n >= 0 or assert isinstance(n, int) and (n >= 0 or
n in (UP_TO_NEWLINE, n in (UP_TO_NEWLINE,
TAKEN_FROM_ARGUMENT1, TAKEN_FROM_ARGUMENT1,
TAKEN_FROM_ARGUMENT4)) TAKEN_FROM_ARGUMENT4,
TAKEN_FROM_ARGUMENT4U))
self.n = n self.n = n
self.reader = reader self.reader = reader
@ -265,6 +268,27 @@ int4 = ArgumentDescriptor(
doc="Four-byte signed integer, little-endian, 2's complement.") doc="Four-byte signed integer, little-endian, 2's complement.")
def read_uint4(f):
r"""
>>> import io
>>> read_uint4(io.BytesIO(b'\xff\x00\x00\x00'))
255
>>> read_uint4(io.BytesIO(b'\x00\x00\x00\x80')) == 2**31
True
"""
data = f.read(4)
if len(data) == 4:
return _unpack("<I", data)[0]
raise ValueError("not enough data in stream to read uint4")
uint4 = ArgumentDescriptor(
name='uint4',
n=4,
reader=read_uint4,
doc="Four-byte unsigned integer, little-endian.")
def read_stringnl(f, decode=True, stripquotes=True): def read_stringnl(f, decode=True, stripquotes=True):
r""" r"""
>>> import io >>> import io
@ -421,6 +445,67 @@ string1 = ArgumentDescriptor(
""") """)
def read_bytes1(f):
r"""
>>> import io
>>> read_bytes1(io.BytesIO(b"\x00"))
b''
>>> read_bytes1(io.BytesIO(b"\x03abcdef"))
b'abc'
"""
n = read_uint1(f)
assert n >= 0
data = f.read(n)
if len(data) == n:
return data
raise ValueError("expected %d bytes in a bytes1, but only %d remain" %
(n, len(data)))
bytes1 = ArgumentDescriptor(
name="bytes1",
n=TAKEN_FROM_ARGUMENT1,
reader=read_bytes1,
doc="""A counted bytes string.
The first argument is a 1-byte unsigned int giving the number
of bytes, and the second argument is that many bytes.
""")
def read_bytes4(f):
r"""
>>> import io
>>> read_bytes4(io.BytesIO(b"\x00\x00\x00\x00abc"))
b''
>>> read_bytes4(io.BytesIO(b"\x03\x00\x00\x00abcdef"))
b'abc'
>>> read_bytes4(io.BytesIO(b"\x00\x00\x00\x03abcdef"))
Traceback (most recent call last):
...
ValueError: expected 50331648 bytes in a bytes4, but only 6 remain
"""
n = read_uint4(f)
if n > sys.maxsize:
raise ValueError("bytes4 byte count > sys.maxsize: %d" % n)
data = f.read(n)
if len(data) == n:
return data
raise ValueError("expected %d bytes in a bytes4, but only %d remain" %
(n, len(data)))
bytes4 = ArgumentDescriptor(
name="bytes4",
n=TAKEN_FROM_ARGUMENT4U,
reader=read_bytes4,
doc="""A counted bytes string.
The first argument is a 4-byte little-endian unsigned int giving
the number of bytes, and the second argument is that many bytes.
""")
def read_unicodestringnl(f): def read_unicodestringnl(f):
r""" r"""
>>> import io >>> import io
@ -464,9 +549,9 @@ def read_unicodestring4(f):
ValueError: expected 7 bytes in a unicodestring4, but only 6 remain ValueError: expected 7 bytes in a unicodestring4, but only 6 remain
""" """
n = read_int4(f) n = read_uint4(f)
if n < 0: if n > sys.maxsize:
raise ValueError("unicodestring4 byte count < 0: %d" % n) raise ValueError("unicodestring4 byte count > sys.maxsize: %d" % n)
data = f.read(n) data = f.read(n)
if len(data) == n: if len(data) == n:
return str(data, 'utf-8', 'surrogatepass') return str(data, 'utf-8', 'surrogatepass')
@ -475,7 +560,7 @@ def read_unicodestring4(f):
unicodestring4 = ArgumentDescriptor( unicodestring4 = ArgumentDescriptor(
name="unicodestring4", name="unicodestring4",
n=TAKEN_FROM_ARGUMENT4, n=TAKEN_FROM_ARGUMENT4U,
reader=read_unicodestring4, reader=read_unicodestring4,
doc="""A counted Unicode string. doc="""A counted Unicode string.
@ -872,7 +957,7 @@ class OpcodeInfo(object):
assert isinstance(x, StackObject) assert isinstance(x, StackObject)
self.stack_after = stack_after self.stack_after = stack_after
assert isinstance(proto, int) and 0 <= proto <= 3 assert isinstance(proto, int) and 0 <= proto <= pickle.HIGHEST_PROTOCOL
self.proto = proto self.proto = proto
assert isinstance(doc, str) assert isinstance(doc, str)
@ -1038,28 +1123,28 @@ opcodes = [
I(name='BINBYTES', I(name='BINBYTES',
code='B', code='B',
arg=string4, arg=bytes4,
stack_before=[], stack_before=[],
stack_after=[pybytes], stack_after=[pybytes],
proto=3, proto=3,
doc="""Push a Python bytes object. doc="""Push a Python bytes object.
There are two arguments: the first is a 4-byte little-endian signed int There are two arguments: the first is a 4-byte little-endian unsigned int
giving the number of bytes in the string, and the second is that many giving the number of bytes, and the second is that many bytes, which are
bytes, which are taken literally as the bytes content. taken literally as the bytes content.
"""), """),
I(name='SHORT_BINBYTES', I(name='SHORT_BINBYTES',
code='C', code='C',
arg=string1, arg=bytes1,
stack_before=[], stack_before=[],
stack_after=[pybytes], stack_after=[pybytes],
proto=3, proto=3,
doc="""Push a Python string object. doc="""Push a Python bytes object.
There are two arguments: the first is a 1-byte unsigned int giving There are two arguments: the first is a 1-byte unsigned int giving
the number of bytes in the string, and the second is that many bytes, the number of bytes, and the second is that many bytes, which are taken
which are taken literally as the string content. literally as the string content.
"""), """),
# Ways to spell None. # Ways to spell None.
@ -1118,7 +1203,7 @@ opcodes = [
proto=1, proto=1,
doc="""Push a Python Unicode string object. doc="""Push a Python Unicode string object.
There are two arguments: the first is a 4-byte little-endian signed int There are two arguments: the first is a 4-byte little-endian unsigned int
giving the number of bytes in the string. The second is that many giving the number of bytes in the string. The second is that many
bytes, and is the UTF-8 encoding of the Unicode string. bytes, and is the UTF-8 encoding of the Unicode string.
"""), """),
@ -1422,13 +1507,13 @@ opcodes = [
I(name='LONG_BINGET', I(name='LONG_BINGET',
code='j', code='j',
arg=int4, arg=uint4,
stack_before=[], stack_before=[],
stack_after=[anyobject], stack_after=[anyobject],
proto=1, proto=1,
doc="""Read an object from the memo and push it on the stack. doc="""Read an object from the memo and push it on the stack.
The index of the memo object to push is given by the 4-byte signed The index of the memo object to push is given by the 4-byte unsigned
little-endian integer following. little-endian integer following.
"""), """),
@ -1459,14 +1544,14 @@ opcodes = [
I(name='LONG_BINPUT', I(name='LONG_BINPUT',
code='r', code='r',
arg=int4, arg=uint4,
stack_before=[], stack_before=[],
stack_after=[], stack_after=[],
proto=1, proto=1,
doc="""Store the stack top into the memo. The stack is not popped. doc="""Store the stack top into the memo. The stack is not popped.
The index of the memo location to write into is given by the 4-byte The index of the memo location to write into is given by the 4-byte
signed little-endian integer following. unsigned little-endian integer following.
"""), """),
# Access the extension registry (predefined objects). Akin to the GET # Access the extension registry (predefined objects). Akin to the GET

View File

@ -58,6 +58,9 @@ Library
- Issue #17526: fix an IndexError raised while passing code without filename to - Issue #17526: fix an IndexError raised while passing code without filename to
inspect.findsource(). Initial patch by Tyler Doyle. inspect.findsource(). Initial patch by Tyler Doyle.
- Issue #16550: Update the opcode descriptions of pickletools to use unsigned
integers where appropriate. Initial patch by Serhiy Storchaka.
IDLE IDLE
---- ----