Issue #26881: The modulefinder module now supports extended opcode arguments.

This commit is contained in:
Serhiy Storchaka 2016-05-08 23:43:50 +03:00
parent c7cc9850d4
commit 02d9f5e5b2
4 changed files with 63 additions and 58 deletions

View File

@ -275,31 +275,17 @@ def _get_instructions_bytes(code, varnames=None, names=None, constants=None,
""" """
labels = findlabels(code) labels = findlabels(code)
extended_arg = 0
starts_line = None starts_line = None
free = None free = None
# enumerate() is not an option, since we sometimes process for offset, op, arg in _unpack_opargs(code):
# multiple elements on a single pass through the loop
n = len(code)
i = 0
while i < n:
op = code[i]
offset = i
if linestarts is not None: if linestarts is not None:
starts_line = linestarts.get(i, None) starts_line = linestarts.get(offset, None)
if starts_line is not None: if starts_line is not None:
starts_line += line_offset starts_line += line_offset
is_jump_target = i in labels is_jump_target = offset in labels
i = i+1
arg = None
argval = None argval = None
argrepr = '' argrepr = ''
if op >= HAVE_ARGUMENT: if arg is not None:
arg = code[i] + code[i+1]*256 + extended_arg
extended_arg = 0
i = i+2
if op == EXTENDED_ARG:
extended_arg = arg*65536
# Set argval to the dereferenced value of the argument when # Set argval to the dereferenced value of the argument when
# availabe, and argrepr to the string representation of argval. # availabe, and argrepr to the string representation of argval.
# _disassemble_bytes needs the string repr of the # _disassemble_bytes needs the string repr of the
@ -310,7 +296,7 @@ def _get_instructions_bytes(code, varnames=None, names=None, constants=None,
elif op in hasname: elif op in hasname:
argval, argrepr = _get_name_info(arg, names) argval, argrepr = _get_name_info(arg, names)
elif op in hasjrel: elif op in hasjrel:
argval = i + arg argval = offset + 3 + arg
argrepr = "to " + repr(argval) argrepr = "to " + repr(argval)
elif op in haslocal: elif op in haslocal:
argval, argrepr = _get_name_info(arg, varnames) argval, argrepr = _get_name_info(arg, varnames)
@ -320,7 +306,7 @@ def _get_instructions_bytes(code, varnames=None, names=None, constants=None,
elif op in hasfree: elif op in hasfree:
argval, argrepr = _get_name_info(arg, cells) argval, argrepr = _get_name_info(arg, cells)
elif op in hasnargs: elif op in hasnargs:
argrepr = "%d positional, %d keyword pair" % (code[i-2], code[i-1]) argrepr = "%d positional, %d keyword pair" % (arg%256, arg//256)
yield Instruction(opname[op], op, yield Instruction(opname[op], op,
arg, argval, argrepr, arg, argval, argrepr,
offset, starts_line, is_jump_target) offset, starts_line, is_jump_target)
@ -356,6 +342,25 @@ def _disassemble_str(source, *, file=None):
disco = disassemble # XXX For backwards compatibility disco = disassemble # XXX For backwards compatibility
def _unpack_opargs(code):
# enumerate() is not an option, since we sometimes process
# multiple elements on a single pass through the loop
extended_arg = 0
n = len(code)
i = 0
while i < n:
op = code[i]
offset = i
i = i+1
arg = None
if op >= HAVE_ARGUMENT:
arg = code[i] + code[i+1]*256 + extended_arg
extended_arg = 0
i = i+2
if op == EXTENDED_ARG:
extended_arg = arg*65536
yield (offset, op, arg)
def findlabels(code): def findlabels(code):
"""Detect all offsets in a byte code which are jump targets. """Detect all offsets in a byte code which are jump targets.
@ -363,19 +368,11 @@ def findlabels(code):
""" """
labels = [] labels = []
# enumerate() is not an option, since we sometimes process for offset, op, arg in _unpack_opargs(code):
# multiple elements on a single pass through the loop if arg is not None:
n = len(code)
i = 0
while i < n:
op = code[i]
i = i+1
if op >= HAVE_ARGUMENT:
arg = code[i] + code[i+1]*256
i = i+2
label = -1 label = -1
if op in hasjrel: if op in hasjrel:
label = i+arg label = offset + 3 + arg
elif op in hasjabs: elif op in hasjabs:
label = arg label = arg
if label >= 0: if label >= 0:

View File

@ -13,13 +13,12 @@ with warnings.catch_warnings():
warnings.simplefilter('ignore', PendingDeprecationWarning) warnings.simplefilter('ignore', PendingDeprecationWarning)
import imp import imp
# XXX Clean up once str8's cstor matches bytes. LOAD_CONST = dis.opmap['LOAD_CONST']
LOAD_CONST = bytes([dis.opname.index('LOAD_CONST')]) IMPORT_NAME = dis.opmap['IMPORT_NAME']
IMPORT_NAME = bytes([dis.opname.index('IMPORT_NAME')]) STORE_NAME = dis.opmap['STORE_NAME']
STORE_NAME = bytes([dis.opname.index('STORE_NAME')]) STORE_GLOBAL = dis.opmap['STORE_GLOBAL']
STORE_GLOBAL = bytes([dis.opname.index('STORE_GLOBAL')]) STORE_OPS = STORE_NAME, STORE_GLOBAL
STORE_OPS = [STORE_NAME, STORE_GLOBAL] EXTENDED_ARG = dis.EXTENDED_ARG
HAVE_ARGUMENT = bytes([dis.HAVE_ARGUMENT])
# Modulefinder does a good job at simulating Python's, but it can not # Modulefinder does a good job at simulating Python's, but it can not
# handle __path__ modifications packages make at runtime. Therefore there # handle __path__ modifications packages make at runtime. Therefore there
@ -337,38 +336,30 @@ class ModuleFinder:
fullname = name + "." + sub fullname = name + "." + sub
self._add_badmodule(fullname, caller) self._add_badmodule(fullname, caller)
def scan_opcodes_25(self, co, def scan_opcodes(self, co):
unpack = struct.unpack):
# Scan the code, and yield 'interesting' opcode combinations # Scan the code, and yield 'interesting' opcode combinations
# Python 2.5 version (has absolute and relative imports)
code = co.co_code code = co.co_code
names = co.co_names names = co.co_names
consts = co.co_consts consts = co.co_consts
LOAD_LOAD_AND_IMPORT = LOAD_CONST + LOAD_CONST + IMPORT_NAME opargs = [(op, arg) for _, op, arg in dis._unpack_opargs(code)
while code: if op != EXTENDED_ARG]
c = bytes([code[0]]) for i, (op, oparg) in enumerate(opargs):
if c in STORE_OPS: if op in STORE_OPS:
oparg, = unpack('<H', code[1:3])
yield "store", (names[oparg],) yield "store", (names[oparg],)
code = code[3:]
continue continue
if code[:9:3] == LOAD_LOAD_AND_IMPORT: if (op == IMPORT_NAME and i >= 2
oparg_1, oparg_2, oparg_3 = unpack('<xHxHxH', code[:9]) and opargs[i-1][0] == opargs[i-2][0] == LOAD_CONST):
level = consts[oparg_1] level = consts[opargs[i-2][1]]
fromlist = consts[opargs[i-1][1]]
if level == 0: # absolute import if level == 0: # absolute import
yield "absolute_import", (consts[oparg_2], names[oparg_3]) yield "absolute_import", (fromlist, names[oparg])
else: # relative import else: # relative import
yield "relative_import", (level, consts[oparg_2], names[oparg_3]) yield "relative_import", (level, fromlist, names[oparg])
code = code[9:]
continue continue
if c >= HAVE_ARGUMENT:
code = code[3:]
else:
code = code[1:]
def scan_code(self, co, m): def scan_code(self, co, m):
code = co.co_code code = co.co_code
scanner = self.scan_opcodes_25 scanner = self.scan_opcodes
for what, args in scanner(co): for what, args in scanner(co):
if what == "store": if what == "store":
name, = args name, = args

View File

@ -319,6 +319,19 @@ class ModuleFinderTest(unittest.TestCase):
expected = "co_filename %r changed to %r" % (old_path, new_path) expected = "co_filename %r changed to %r" % (old_path, new_path)
self.assertIn(expected, output) self.assertIn(expected, output)
def test_extended_opargs(self):
extended_opargs_test = [
"a",
["a", "b"],
[], [],
"""\
a.py
%r
import b
b.py
""" % list(range(2**16))] # 2**16 constants
self._do_test(extended_opargs_test)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()

View File

@ -116,6 +116,8 @@ Core and Builtins
Library Library
------- -------
- Issue #26881: The modulefinder module now supports extended opcode arguments.
- Issue #23815: Fixed crashes related to directly created instances of types in - Issue #23815: Fixed crashes related to directly created instances of types in
_tkinter and curses.panel modules. _tkinter and curses.panel modules.
@ -125,6 +127,8 @@ Library
- Issue #26873: xmlrpc now raises ResponseError on unsupported type tags - Issue #26873: xmlrpc now raises ResponseError on unsupported type tags
instead of silently return incorrect result. instead of silently return incorrect result.
- Issue #26881: modulefinder now works with bytecode with extended args.
- Issue #26711: Fixed the comparison of plistlib.Data with other types. - Issue #26711: Fixed the comparison of plistlib.Data with other types.
- Issue #24114: Fix an uninitialized variable in `ctypes.util`. - Issue #24114: Fix an uninitialized variable in `ctypes.util`.