Issue #16550: Update the opcode descriptions of pickletools to use unsigned
integers where appropriate. Initial patch by Serhiy Storchaka.
This commit is contained in:
parent
5c4874f7a2
commit
8db89ca56c
|
@ -13,6 +13,7 @@ dis(pickle, out=None, memo=None, indentlevel=4)
|
|||
import codecs
|
||||
import pickle
|
||||
import re
|
||||
import sys
|
||||
|
||||
__all__ = ['dis', 'genops', 'optimize']
|
||||
|
||||
|
@ -165,8 +166,9 @@ UP_TO_NEWLINE = -1
|
|||
|
||||
# Represents the number of bytes consumed by a two-argument opcode where
|
||||
# the first argument gives the number of bytes in the second argument.
|
||||
TAKEN_FROM_ARGUMENT1 = -2 # num bytes is 1-byte unsigned int
|
||||
TAKEN_FROM_ARGUMENT4 = -3 # num bytes is 4-byte signed little-endian int
|
||||
TAKEN_FROM_ARGUMENT1 = -2 # num bytes is 1-byte unsigned int
|
||||
TAKEN_FROM_ARGUMENT4 = -3 # num bytes is 4-byte signed little-endian int
|
||||
TAKEN_FROM_ARGUMENT4U = -4 # num bytes is 4-byte unsigned little-endian int
|
||||
|
||||
class ArgumentDescriptor(object):
|
||||
__slots__ = (
|
||||
|
@ -194,7 +196,8 @@ class ArgumentDescriptor(object):
|
|||
assert isinstance(n, int) and (n >= 0 or
|
||||
n in (UP_TO_NEWLINE,
|
||||
TAKEN_FROM_ARGUMENT1,
|
||||
TAKEN_FROM_ARGUMENT4))
|
||||
TAKEN_FROM_ARGUMENT4,
|
||||
TAKEN_FROM_ARGUMENT4U))
|
||||
self.n = n
|
||||
|
||||
self.reader = reader
|
||||
|
@ -265,6 +268,27 @@ int4 = ArgumentDescriptor(
|
|||
doc="Four-byte signed integer, little-endian, 2's complement.")
|
||||
|
||||
|
||||
def read_uint4(f):
|
||||
r"""
|
||||
>>> import io
|
||||
>>> read_uint4(io.BytesIO(b'\xff\x00\x00\x00'))
|
||||
255
|
||||
>>> read_uint4(io.BytesIO(b'\x00\x00\x00\x80')) == 2**31
|
||||
True
|
||||
"""
|
||||
|
||||
data = f.read(4)
|
||||
if len(data) == 4:
|
||||
return _unpack("<I", data)[0]
|
||||
raise ValueError("not enough data in stream to read uint4")
|
||||
|
||||
uint4 = ArgumentDescriptor(
|
||||
name='uint4',
|
||||
n=4,
|
||||
reader=read_uint4,
|
||||
doc="Four-byte unsigned integer, little-endian.")
|
||||
|
||||
|
||||
def read_stringnl(f, decode=True, stripquotes=True):
|
||||
r"""
|
||||
>>> import io
|
||||
|
@ -421,6 +445,67 @@ string1 = ArgumentDescriptor(
|
|||
""")
|
||||
|
||||
|
||||
def read_bytes1(f):
|
||||
r"""
|
||||
>>> import io
|
||||
>>> read_bytes1(io.BytesIO(b"\x00"))
|
||||
b''
|
||||
>>> read_bytes1(io.BytesIO(b"\x03abcdef"))
|
||||
b'abc'
|
||||
"""
|
||||
|
||||
n = read_uint1(f)
|
||||
assert n >= 0
|
||||
data = f.read(n)
|
||||
if len(data) == n:
|
||||
return data
|
||||
raise ValueError("expected %d bytes in a bytes1, but only %d remain" %
|
||||
(n, len(data)))
|
||||
|
||||
bytes1 = ArgumentDescriptor(
|
||||
name="bytes1",
|
||||
n=TAKEN_FROM_ARGUMENT1,
|
||||
reader=read_bytes1,
|
||||
doc="""A counted bytes string.
|
||||
|
||||
The first argument is a 1-byte unsigned int giving the number
|
||||
of bytes, and the second argument is that many bytes.
|
||||
""")
|
||||
|
||||
|
||||
def read_bytes4(f):
|
||||
r"""
|
||||
>>> import io
|
||||
>>> read_bytes4(io.BytesIO(b"\x00\x00\x00\x00abc"))
|
||||
b''
|
||||
>>> read_bytes4(io.BytesIO(b"\x03\x00\x00\x00abcdef"))
|
||||
b'abc'
|
||||
>>> read_bytes4(io.BytesIO(b"\x00\x00\x00\x03abcdef"))
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
ValueError: expected 50331648 bytes in a bytes4, but only 6 remain
|
||||
"""
|
||||
|
||||
n = read_uint4(f)
|
||||
if n > sys.maxsize:
|
||||
raise ValueError("bytes4 byte count > sys.maxsize: %d" % n)
|
||||
data = f.read(n)
|
||||
if len(data) == n:
|
||||
return data
|
||||
raise ValueError("expected %d bytes in a bytes4, but only %d remain" %
|
||||
(n, len(data)))
|
||||
|
||||
bytes4 = ArgumentDescriptor(
|
||||
name="bytes4",
|
||||
n=TAKEN_FROM_ARGUMENT4U,
|
||||
reader=read_bytes4,
|
||||
doc="""A counted bytes string.
|
||||
|
||||
The first argument is a 4-byte little-endian unsigned int giving
|
||||
the number of bytes, and the second argument is that many bytes.
|
||||
""")
|
||||
|
||||
|
||||
def read_unicodestringnl(f):
|
||||
r"""
|
||||
>>> import io
|
||||
|
@ -464,9 +549,9 @@ def read_unicodestring4(f):
|
|||
ValueError: expected 7 bytes in a unicodestring4, but only 6 remain
|
||||
"""
|
||||
|
||||
n = read_int4(f)
|
||||
if n < 0:
|
||||
raise ValueError("unicodestring4 byte count < 0: %d" % n)
|
||||
n = read_uint4(f)
|
||||
if n > sys.maxsize:
|
||||
raise ValueError("unicodestring4 byte count > sys.maxsize: %d" % n)
|
||||
data = f.read(n)
|
||||
if len(data) == n:
|
||||
return str(data, 'utf-8', 'surrogatepass')
|
||||
|
@ -475,7 +560,7 @@ def read_unicodestring4(f):
|
|||
|
||||
unicodestring4 = ArgumentDescriptor(
|
||||
name="unicodestring4",
|
||||
n=TAKEN_FROM_ARGUMENT4,
|
||||
n=TAKEN_FROM_ARGUMENT4U,
|
||||
reader=read_unicodestring4,
|
||||
doc="""A counted Unicode string.
|
||||
|
||||
|
@ -872,7 +957,7 @@ class OpcodeInfo(object):
|
|||
assert isinstance(x, StackObject)
|
||||
self.stack_after = stack_after
|
||||
|
||||
assert isinstance(proto, int) and 0 <= proto <= 3
|
||||
assert isinstance(proto, int) and 0 <= proto <= pickle.HIGHEST_PROTOCOL
|
||||
self.proto = proto
|
||||
|
||||
assert isinstance(doc, str)
|
||||
|
@ -1038,28 +1123,28 @@ opcodes = [
|
|||
|
||||
I(name='BINBYTES',
|
||||
code='B',
|
||||
arg=string4,
|
||||
arg=bytes4,
|
||||
stack_before=[],
|
||||
stack_after=[pybytes],
|
||||
proto=3,
|
||||
doc="""Push a Python bytes object.
|
||||
|
||||
There are two arguments: the first is a 4-byte little-endian signed int
|
||||
giving the number of bytes in the string, and the second is that many
|
||||
bytes, which are taken literally as the bytes content.
|
||||
There are two arguments: the first is a 4-byte little-endian unsigned int
|
||||
giving the number of bytes, and the second is that many bytes, which are
|
||||
taken literally as the bytes content.
|
||||
"""),
|
||||
|
||||
I(name='SHORT_BINBYTES',
|
||||
code='C',
|
||||
arg=string1,
|
||||
arg=bytes1,
|
||||
stack_before=[],
|
||||
stack_after=[pybytes],
|
||||
proto=3,
|
||||
doc="""Push a Python string object.
|
||||
doc="""Push a Python bytes object.
|
||||
|
||||
There are two arguments: the first is a 1-byte unsigned int giving
|
||||
the number of bytes in the string, and the second is that many bytes,
|
||||
which are taken literally as the string content.
|
||||
the number of bytes, and the second is that many bytes, which are taken
|
||||
literally as the string content.
|
||||
"""),
|
||||
|
||||
# Ways to spell None.
|
||||
|
@ -1118,7 +1203,7 @@ opcodes = [
|
|||
proto=1,
|
||||
doc="""Push a Python Unicode string object.
|
||||
|
||||
There are two arguments: the first is a 4-byte little-endian signed int
|
||||
There are two arguments: the first is a 4-byte little-endian unsigned int
|
||||
giving the number of bytes in the string. The second is that many
|
||||
bytes, and is the UTF-8 encoding of the Unicode string.
|
||||
"""),
|
||||
|
@ -1422,13 +1507,13 @@ opcodes = [
|
|||
|
||||
I(name='LONG_BINGET',
|
||||
code='j',
|
||||
arg=int4,
|
||||
arg=uint4,
|
||||
stack_before=[],
|
||||
stack_after=[anyobject],
|
||||
proto=1,
|
||||
doc="""Read an object from the memo and push it on the stack.
|
||||
|
||||
The index of the memo object to push is given by the 4-byte signed
|
||||
The index of the memo object to push is given by the 4-byte unsigned
|
||||
little-endian integer following.
|
||||
"""),
|
||||
|
||||
|
@ -1459,14 +1544,14 @@ opcodes = [
|
|||
|
||||
I(name='LONG_BINPUT',
|
||||
code='r',
|
||||
arg=int4,
|
||||
arg=uint4,
|
||||
stack_before=[],
|
||||
stack_after=[],
|
||||
proto=1,
|
||||
doc="""Store the stack top into the memo. The stack is not popped.
|
||||
|
||||
The index of the memo location to write into is given by the 4-byte
|
||||
signed little-endian integer following.
|
||||
unsigned little-endian integer following.
|
||||
"""),
|
||||
|
||||
# Access the extension registry (predefined objects). Akin to the GET
|
||||
|
|
|
@ -58,6 +58,9 @@ Library
|
|||
- Issue #17526: fix an IndexError raised while passing code without filename to
|
||||
inspect.findsource(). Initial patch by Tyler Doyle.
|
||||
|
||||
- Issue #16550: Update the opcode descriptions of pickletools to use unsigned
|
||||
integers where appropriate. Initial patch by Serhiy Storchaka.
|
||||
|
||||
IDLE
|
||||
----
|
||||
|
||||
|
|
Loading…
Reference in New Issue