Issue #26881: The modulefinder module now supports extended opcode arguments.

This commit is contained in:
Serhiy Storchaka 2016-05-08 23:44:54 +03:00
commit 3e99fdeed5
4 changed files with 62 additions and 57 deletions

View File

@ -284,31 +284,17 @@ def _get_instructions_bytes(code, varnames=None, names=None, constants=None,
""" """
labels = findlabels(code) labels = findlabels(code)
extended_arg = 0
starts_line = None starts_line = None
free = None free = None
# enumerate() is not an option, since we sometimes process for offset, op, arg in _unpack_opargs(code):
# multiple elements on a single pass through the loop
n = len(code)
i = 0
while i < n:
op = code[i]
offset = i
if linestarts is not None: if linestarts is not None:
starts_line = linestarts.get(i, None) starts_line = linestarts.get(offset, None)
if starts_line is not None: if starts_line is not None:
starts_line += line_offset starts_line += line_offset
is_jump_target = i in labels is_jump_target = offset in labels
i = i+1
arg = None
argval = None argval = None
argrepr = '' argrepr = ''
if op >= HAVE_ARGUMENT: if arg is not None:
arg = code[i] + code[i+1]*256 + extended_arg
extended_arg = 0
i = i+2
if op == EXTENDED_ARG:
extended_arg = arg*65536
# Set argval to the dereferenced value of the argument when # Set argval to the dereferenced value of the argument when
# availabe, and argrepr to the string representation of argval. # availabe, and argrepr to the string representation of argval.
# _disassemble_bytes needs the string repr of the # _disassemble_bytes needs the string repr of the
@ -319,7 +305,7 @@ def _get_instructions_bytes(code, varnames=None, names=None, constants=None,
elif op in hasname: elif op in hasname:
argval, argrepr = _get_name_info(arg, names) argval, argrepr = _get_name_info(arg, names)
elif op in hasjrel: elif op in hasjrel:
argval = i + arg argval = offset + 3 + arg
argrepr = "to " + repr(argval) argrepr = "to " + repr(argval)
elif op in haslocal: elif op in haslocal:
argval, argrepr = _get_name_info(arg, varnames) argval, argrepr = _get_name_info(arg, varnames)
@ -329,7 +315,7 @@ def _get_instructions_bytes(code, varnames=None, names=None, constants=None,
elif op in hasfree: elif op in hasfree:
argval, argrepr = _get_name_info(arg, cells) argval, argrepr = _get_name_info(arg, cells)
elif op in hasnargs: elif op in hasnargs:
argrepr = "%d positional, %d keyword pair" % (code[i-2], code[i-1]) argrepr = "%d positional, %d keyword pair" % (arg%256, arg//256)
yield Instruction(opname[op], op, yield Instruction(opname[op], op,
arg, argval, argrepr, arg, argval, argrepr,
offset, starts_line, is_jump_target) offset, starts_line, is_jump_target)
@ -365,6 +351,25 @@ def _disassemble_str(source, *, file=None):
disco = disassemble # XXX For backwards compatibility disco = disassemble # XXX For backwards compatibility
def _unpack_opargs(code):
# enumerate() is not an option, since we sometimes process
# multiple elements on a single pass through the loop
extended_arg = 0
n = len(code)
i = 0
while i < n:
op = code[i]
offset = i
i = i+1
arg = None
if op >= HAVE_ARGUMENT:
arg = code[i] + code[i+1]*256 + extended_arg
extended_arg = 0
i = i+2
if op == EXTENDED_ARG:
extended_arg = arg*65536
yield (offset, op, arg)
def findlabels(code): def findlabels(code):
"""Detect all offsets in a byte code which are jump targets. """Detect all offsets in a byte code which are jump targets.
@ -372,19 +377,11 @@ def findlabels(code):
""" """
labels = [] labels = []
# enumerate() is not an option, since we sometimes process for offset, op, arg in _unpack_opargs(code):
# multiple elements on a single pass through the loop if arg is not None:
n = len(code)
i = 0
while i < n:
op = code[i]
i = i+1
if op >= HAVE_ARGUMENT:
arg = code[i] + code[i+1]*256
i = i+2
label = -1 label = -1
if op in hasjrel: if op in hasjrel:
label = i+arg label = offset + 3 + arg
elif op in hasjabs: elif op in hasjabs:
label = arg label = arg
if label >= 0: if label >= 0:

View File

@ -13,13 +13,12 @@ with warnings.catch_warnings():
warnings.simplefilter('ignore', DeprecationWarning) warnings.simplefilter('ignore', DeprecationWarning)
import imp import imp
# XXX Clean up once str8's cstor matches bytes. LOAD_CONST = dis.opmap['LOAD_CONST']
LOAD_CONST = bytes([dis.opmap['LOAD_CONST']]) IMPORT_NAME = dis.opmap['IMPORT_NAME']
IMPORT_NAME = bytes([dis.opmap['IMPORT_NAME']]) STORE_NAME = dis.opmap['STORE_NAME']
STORE_NAME = bytes([dis.opmap['STORE_NAME']]) STORE_GLOBAL = dis.opmap['STORE_GLOBAL']
STORE_GLOBAL = bytes([dis.opmap['STORE_GLOBAL']])
STORE_OPS = STORE_NAME, STORE_GLOBAL STORE_OPS = STORE_NAME, STORE_GLOBAL
HAVE_ARGUMENT = bytes([dis.HAVE_ARGUMENT]) EXTENDED_ARG = dis.EXTENDED_ARG
# Modulefinder does a good job at simulating Python's, but it can not # Modulefinder does a good job at simulating Python's, but it can not
# handle __path__ modifications packages make at runtime. Therefore there # handle __path__ modifications packages make at runtime. Therefore there
@ -337,38 +336,30 @@ class ModuleFinder:
fullname = name + "." + sub fullname = name + "." + sub
self._add_badmodule(fullname, caller) self._add_badmodule(fullname, caller)
def scan_opcodes_25(self, co, def scan_opcodes(self, co):
unpack = struct.unpack):
# Scan the code, and yield 'interesting' opcode combinations # Scan the code, and yield 'interesting' opcode combinations
# Python 2.5 version (has absolute and relative imports)
code = co.co_code code = co.co_code
names = co.co_names names = co.co_names
consts = co.co_consts consts = co.co_consts
LOAD_LOAD_AND_IMPORT = LOAD_CONST + LOAD_CONST + IMPORT_NAME opargs = [(op, arg) for _, op, arg in dis._unpack_opargs(code)
while code: if op != EXTENDED_ARG]
c = bytes([code[0]]) for i, (op, oparg) in enumerate(opargs):
if c in STORE_OPS: if op in STORE_OPS:
oparg, = unpack('<H', code[1:3])
yield "store", (names[oparg],) yield "store", (names[oparg],)
code = code[3:]
continue continue
if code[:9:3] == LOAD_LOAD_AND_IMPORT: if (op == IMPORT_NAME and i >= 2
oparg_1, oparg_2, oparg_3 = unpack('<xHxHxH', code[:9]) and opargs[i-1][0] == opargs[i-2][0] == LOAD_CONST):
level = consts[oparg_1] level = consts[opargs[i-2][1]]
fromlist = consts[opargs[i-1][1]]
if level == 0: # absolute import if level == 0: # absolute import
yield "absolute_import", (consts[oparg_2], names[oparg_3]) yield "absolute_import", (fromlist, names[oparg])
else: # relative import else: # relative import
yield "relative_import", (level, consts[oparg_2], names[oparg_3]) yield "relative_import", (level, fromlist, names[oparg])
code = code[9:]
continue continue
if c >= HAVE_ARGUMENT:
code = code[3:]
else:
code = code[1:]
def scan_code(self, co, m): def scan_code(self, co, m):
code = co.co_code code = co.co_code
scanner = self.scan_opcodes_25 scanner = self.scan_opcodes
for what, args in scanner(co): for what, args in scanner(co):
if what == "store": if what == "store":
name, = args name, = args

View File

@ -319,6 +319,19 @@ class ModuleFinderTest(unittest.TestCase):
expected = "co_filename %r changed to %r" % (old_path, new_path) expected = "co_filename %r changed to %r" % (old_path, new_path)
self.assertIn(expected, output) self.assertIn(expected, output)
def test_extended_opargs(self):
extended_opargs_test = [
"a",
["a", "b"],
[], [],
"""\
a.py
%r
import b
b.py
""" % list(range(2**16))] # 2**16 constants
self._do_test(extended_opargs_test)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()

View File

@ -268,6 +268,8 @@ Core and Builtins
Library Library
------- -------
- Issue #26881: The modulefinder module now supports extended opcode arguments.
- Issue #23815: Fixed crashes related to directly created instances of types in - Issue #23815: Fixed crashes related to directly created instances of types in
_tkinter and curses.panel modules. _tkinter and curses.panel modules.
@ -277,6 +279,8 @@ Library
- Issue #26873: xmlrpc now raises ResponseError on unsupported type tags - Issue #26873: xmlrpc now raises ResponseError on unsupported type tags
instead of silently return incorrect result. instead of silently return incorrect result.
- Issue #26881: modulefinder now works with bytecode with extended args.
- Issue #26915: The __contains__ methods in the collections ABCs now check - Issue #26915: The __contains__ methods in the collections ABCs now check
for identity before checking equality. This better matches the behavior for identity before checking equality. This better matches the behavior
of the concrete classes, allows sensible handling of NaNs, and makes it of the concrete classes, allows sensible handling of NaNs, and makes it