Issue #16550: Update the opcode descriptions of pickletools to use unsigned
integers where appropriate. Initial patch by Serhiy Storchaka.
This commit is contained in:
parent
5c4874f7a2
commit
8db89ca56c
|
@ -13,6 +13,7 @@ dis(pickle, out=None, memo=None, indentlevel=4)
|
||||||
import codecs
|
import codecs
|
||||||
import pickle
|
import pickle
|
||||||
import re
|
import re
|
||||||
|
import sys
|
||||||
|
|
||||||
__all__ = ['dis', 'genops', 'optimize']
|
__all__ = ['dis', 'genops', 'optimize']
|
||||||
|
|
||||||
|
@ -165,8 +166,9 @@ UP_TO_NEWLINE = -1
|
||||||
|
|
||||||
# Represents the number of bytes consumed by a two-argument opcode where
|
# Represents the number of bytes consumed by a two-argument opcode where
|
||||||
# the first argument gives the number of bytes in the second argument.
|
# the first argument gives the number of bytes in the second argument.
|
||||||
TAKEN_FROM_ARGUMENT1 = -2 # num bytes is 1-byte unsigned int
|
TAKEN_FROM_ARGUMENT1 = -2 # num bytes is 1-byte unsigned int
|
||||||
TAKEN_FROM_ARGUMENT4 = -3 # num bytes is 4-byte signed little-endian int
|
TAKEN_FROM_ARGUMENT4 = -3 # num bytes is 4-byte signed little-endian int
|
||||||
|
TAKEN_FROM_ARGUMENT4U = -4 # num bytes is 4-byte unsigned little-endian int
|
||||||
|
|
||||||
class ArgumentDescriptor(object):
|
class ArgumentDescriptor(object):
|
||||||
__slots__ = (
|
__slots__ = (
|
||||||
|
@ -194,7 +196,8 @@ class ArgumentDescriptor(object):
|
||||||
assert isinstance(n, int) and (n >= 0 or
|
assert isinstance(n, int) and (n >= 0 or
|
||||||
n in (UP_TO_NEWLINE,
|
n in (UP_TO_NEWLINE,
|
||||||
TAKEN_FROM_ARGUMENT1,
|
TAKEN_FROM_ARGUMENT1,
|
||||||
TAKEN_FROM_ARGUMENT4))
|
TAKEN_FROM_ARGUMENT4,
|
||||||
|
TAKEN_FROM_ARGUMENT4U))
|
||||||
self.n = n
|
self.n = n
|
||||||
|
|
||||||
self.reader = reader
|
self.reader = reader
|
||||||
|
@ -265,6 +268,27 @@ int4 = ArgumentDescriptor(
|
||||||
doc="Four-byte signed integer, little-endian, 2's complement.")
|
doc="Four-byte signed integer, little-endian, 2's complement.")
|
||||||
|
|
||||||
|
|
||||||
|
def read_uint4(f):
|
||||||
|
r"""
|
||||||
|
>>> import io
|
||||||
|
>>> read_uint4(io.BytesIO(b'\xff\x00\x00\x00'))
|
||||||
|
255
|
||||||
|
>>> read_uint4(io.BytesIO(b'\x00\x00\x00\x80')) == 2**31
|
||||||
|
True
|
||||||
|
"""
|
||||||
|
|
||||||
|
data = f.read(4)
|
||||||
|
if len(data) == 4:
|
||||||
|
return _unpack("<I", data)[0]
|
||||||
|
raise ValueError("not enough data in stream to read uint4")
|
||||||
|
|
||||||
|
uint4 = ArgumentDescriptor(
|
||||||
|
name='uint4',
|
||||||
|
n=4,
|
||||||
|
reader=read_uint4,
|
||||||
|
doc="Four-byte unsigned integer, little-endian.")
|
||||||
|
|
||||||
|
|
||||||
def read_stringnl(f, decode=True, stripquotes=True):
|
def read_stringnl(f, decode=True, stripquotes=True):
|
||||||
r"""
|
r"""
|
||||||
>>> import io
|
>>> import io
|
||||||
|
@ -421,6 +445,67 @@ string1 = ArgumentDescriptor(
|
||||||
""")
|
""")
|
||||||
|
|
||||||
|
|
||||||
|
def read_bytes1(f):
|
||||||
|
r"""
|
||||||
|
>>> import io
|
||||||
|
>>> read_bytes1(io.BytesIO(b"\x00"))
|
||||||
|
b''
|
||||||
|
>>> read_bytes1(io.BytesIO(b"\x03abcdef"))
|
||||||
|
b'abc'
|
||||||
|
"""
|
||||||
|
|
||||||
|
n = read_uint1(f)
|
||||||
|
assert n >= 0
|
||||||
|
data = f.read(n)
|
||||||
|
if len(data) == n:
|
||||||
|
return data
|
||||||
|
raise ValueError("expected %d bytes in a bytes1, but only %d remain" %
|
||||||
|
(n, len(data)))
|
||||||
|
|
||||||
|
bytes1 = ArgumentDescriptor(
|
||||||
|
name="bytes1",
|
||||||
|
n=TAKEN_FROM_ARGUMENT1,
|
||||||
|
reader=read_bytes1,
|
||||||
|
doc="""A counted bytes string.
|
||||||
|
|
||||||
|
The first argument is a 1-byte unsigned int giving the number
|
||||||
|
of bytes, and the second argument is that many bytes.
|
||||||
|
""")
|
||||||
|
|
||||||
|
|
||||||
|
def read_bytes4(f):
|
||||||
|
r"""
|
||||||
|
>>> import io
|
||||||
|
>>> read_bytes4(io.BytesIO(b"\x00\x00\x00\x00abc"))
|
||||||
|
b''
|
||||||
|
>>> read_bytes4(io.BytesIO(b"\x03\x00\x00\x00abcdef"))
|
||||||
|
b'abc'
|
||||||
|
>>> read_bytes4(io.BytesIO(b"\x00\x00\x00\x03abcdef"))
|
||||||
|
Traceback (most recent call last):
|
||||||
|
...
|
||||||
|
ValueError: expected 50331648 bytes in a bytes4, but only 6 remain
|
||||||
|
"""
|
||||||
|
|
||||||
|
n = read_uint4(f)
|
||||||
|
if n > sys.maxsize:
|
||||||
|
raise ValueError("bytes4 byte count > sys.maxsize: %d" % n)
|
||||||
|
data = f.read(n)
|
||||||
|
if len(data) == n:
|
||||||
|
return data
|
||||||
|
raise ValueError("expected %d bytes in a bytes4, but only %d remain" %
|
||||||
|
(n, len(data)))
|
||||||
|
|
||||||
|
bytes4 = ArgumentDescriptor(
|
||||||
|
name="bytes4",
|
||||||
|
n=TAKEN_FROM_ARGUMENT4U,
|
||||||
|
reader=read_bytes4,
|
||||||
|
doc="""A counted bytes string.
|
||||||
|
|
||||||
|
The first argument is a 4-byte little-endian unsigned int giving
|
||||||
|
the number of bytes, and the second argument is that many bytes.
|
||||||
|
""")
|
||||||
|
|
||||||
|
|
||||||
def read_unicodestringnl(f):
|
def read_unicodestringnl(f):
|
||||||
r"""
|
r"""
|
||||||
>>> import io
|
>>> import io
|
||||||
|
@ -464,9 +549,9 @@ def read_unicodestring4(f):
|
||||||
ValueError: expected 7 bytes in a unicodestring4, but only 6 remain
|
ValueError: expected 7 bytes in a unicodestring4, but only 6 remain
|
||||||
"""
|
"""
|
||||||
|
|
||||||
n = read_int4(f)
|
n = read_uint4(f)
|
||||||
if n < 0:
|
if n > sys.maxsize:
|
||||||
raise ValueError("unicodestring4 byte count < 0: %d" % n)
|
raise ValueError("unicodestring4 byte count > sys.maxsize: %d" % n)
|
||||||
data = f.read(n)
|
data = f.read(n)
|
||||||
if len(data) == n:
|
if len(data) == n:
|
||||||
return str(data, 'utf-8', 'surrogatepass')
|
return str(data, 'utf-8', 'surrogatepass')
|
||||||
|
@ -475,7 +560,7 @@ def read_unicodestring4(f):
|
||||||
|
|
||||||
unicodestring4 = ArgumentDescriptor(
|
unicodestring4 = ArgumentDescriptor(
|
||||||
name="unicodestring4",
|
name="unicodestring4",
|
||||||
n=TAKEN_FROM_ARGUMENT4,
|
n=TAKEN_FROM_ARGUMENT4U,
|
||||||
reader=read_unicodestring4,
|
reader=read_unicodestring4,
|
||||||
doc="""A counted Unicode string.
|
doc="""A counted Unicode string.
|
||||||
|
|
||||||
|
@ -872,7 +957,7 @@ class OpcodeInfo(object):
|
||||||
assert isinstance(x, StackObject)
|
assert isinstance(x, StackObject)
|
||||||
self.stack_after = stack_after
|
self.stack_after = stack_after
|
||||||
|
|
||||||
assert isinstance(proto, int) and 0 <= proto <= 3
|
assert isinstance(proto, int) and 0 <= proto <= pickle.HIGHEST_PROTOCOL
|
||||||
self.proto = proto
|
self.proto = proto
|
||||||
|
|
||||||
assert isinstance(doc, str)
|
assert isinstance(doc, str)
|
||||||
|
@ -1038,28 +1123,28 @@ opcodes = [
|
||||||
|
|
||||||
I(name='BINBYTES',
|
I(name='BINBYTES',
|
||||||
code='B',
|
code='B',
|
||||||
arg=string4,
|
arg=bytes4,
|
||||||
stack_before=[],
|
stack_before=[],
|
||||||
stack_after=[pybytes],
|
stack_after=[pybytes],
|
||||||
proto=3,
|
proto=3,
|
||||||
doc="""Push a Python bytes object.
|
doc="""Push a Python bytes object.
|
||||||
|
|
||||||
There are two arguments: the first is a 4-byte little-endian signed int
|
There are two arguments: the first is a 4-byte little-endian unsigned int
|
||||||
giving the number of bytes in the string, and the second is that many
|
giving the number of bytes, and the second is that many bytes, which are
|
||||||
bytes, which are taken literally as the bytes content.
|
taken literally as the bytes content.
|
||||||
"""),
|
"""),
|
||||||
|
|
||||||
I(name='SHORT_BINBYTES',
|
I(name='SHORT_BINBYTES',
|
||||||
code='C',
|
code='C',
|
||||||
arg=string1,
|
arg=bytes1,
|
||||||
stack_before=[],
|
stack_before=[],
|
||||||
stack_after=[pybytes],
|
stack_after=[pybytes],
|
||||||
proto=3,
|
proto=3,
|
||||||
doc="""Push a Python string object.
|
doc="""Push a Python bytes object.
|
||||||
|
|
||||||
There are two arguments: the first is a 1-byte unsigned int giving
|
There are two arguments: the first is a 1-byte unsigned int giving
|
||||||
the number of bytes in the string, and the second is that many bytes,
|
the number of bytes, and the second is that many bytes, which are taken
|
||||||
which are taken literally as the string content.
|
literally as the string content.
|
||||||
"""),
|
"""),
|
||||||
|
|
||||||
# Ways to spell None.
|
# Ways to spell None.
|
||||||
|
@ -1118,7 +1203,7 @@ opcodes = [
|
||||||
proto=1,
|
proto=1,
|
||||||
doc="""Push a Python Unicode string object.
|
doc="""Push a Python Unicode string object.
|
||||||
|
|
||||||
There are two arguments: the first is a 4-byte little-endian signed int
|
There are two arguments: the first is a 4-byte little-endian unsigned int
|
||||||
giving the number of bytes in the string. The second is that many
|
giving the number of bytes in the string. The second is that many
|
||||||
bytes, and is the UTF-8 encoding of the Unicode string.
|
bytes, and is the UTF-8 encoding of the Unicode string.
|
||||||
"""),
|
"""),
|
||||||
|
@ -1422,13 +1507,13 @@ opcodes = [
|
||||||
|
|
||||||
I(name='LONG_BINGET',
|
I(name='LONG_BINGET',
|
||||||
code='j',
|
code='j',
|
||||||
arg=int4,
|
arg=uint4,
|
||||||
stack_before=[],
|
stack_before=[],
|
||||||
stack_after=[anyobject],
|
stack_after=[anyobject],
|
||||||
proto=1,
|
proto=1,
|
||||||
doc="""Read an object from the memo and push it on the stack.
|
doc="""Read an object from the memo and push it on the stack.
|
||||||
|
|
||||||
The index of the memo object to push is given by the 4-byte signed
|
The index of the memo object to push is given by the 4-byte unsigned
|
||||||
little-endian integer following.
|
little-endian integer following.
|
||||||
"""),
|
"""),
|
||||||
|
|
||||||
|
@ -1459,14 +1544,14 @@ opcodes = [
|
||||||
|
|
||||||
I(name='LONG_BINPUT',
|
I(name='LONG_BINPUT',
|
||||||
code='r',
|
code='r',
|
||||||
arg=int4,
|
arg=uint4,
|
||||||
stack_before=[],
|
stack_before=[],
|
||||||
stack_after=[],
|
stack_after=[],
|
||||||
proto=1,
|
proto=1,
|
||||||
doc="""Store the stack top into the memo. The stack is not popped.
|
doc="""Store the stack top into the memo. The stack is not popped.
|
||||||
|
|
||||||
The index of the memo location to write into is given by the 4-byte
|
The index of the memo location to write into is given by the 4-byte
|
||||||
signed little-endian integer following.
|
unsigned little-endian integer following.
|
||||||
"""),
|
"""),
|
||||||
|
|
||||||
# Access the extension registry (predefined objects). Akin to the GET
|
# Access the extension registry (predefined objects). Akin to the GET
|
||||||
|
|
|
@ -58,6 +58,9 @@ Library
|
||||||
- Issue #17526: fix an IndexError raised while passing code without filename to
|
- Issue #17526: fix an IndexError raised while passing code without filename to
|
||||||
inspect.findsource(). Initial patch by Tyler Doyle.
|
inspect.findsource(). Initial patch by Tyler Doyle.
|
||||||
|
|
||||||
|
- Issue #16550: Update the opcode descriptions of pickletools to use unsigned
|
||||||
|
integers where appropriate. Initial patch by Serhiy Storchaka.
|
||||||
|
|
||||||
IDLE
|
IDLE
|
||||||
----
|
----
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue