diff --git a/Lib/dis.py b/Lib/dis.py index a25fb2b4176..10e5f7fb08a 100644 --- a/Lib/dis.py +++ b/Lib/dis.py @@ -454,6 +454,7 @@ def findlinestarts(code): """ byte_increments = code.co_lnotab[0::2] line_increments = code.co_lnotab[1::2] + bytecode_len = len(code.co_code) lastlineno = None lineno = code.co_firstlineno @@ -464,6 +465,10 @@ def findlinestarts(code): yield (addr, lineno) lastlineno = lineno addr += byte_incr + if addr >= bytecode_len: + # The rest of the lnotab byte offsets are past the end of + # the bytecode, so the lines were optimized away. + return if line_incr >= 0x80: # line_increments is an array of 8-bit signed integers line_incr -= 0x100 diff --git a/Lib/test/test_peepholer.py b/Lib/test/test_peepholer.py index 47dee33076c..23cc36c6053 100644 --- a/Lib/test/test_peepholer.py +++ b/Lib/test/test_peepholer.py @@ -40,6 +40,20 @@ class TestTranforms(BytecodeTestCase): self.fail(f'{instr.opname} at {instr.offset} ' f'jumps to {tgt.opname} at {tgt.offset}') + def check_lnotab(self, code): + "Check that the lnotab byte offsets are sensible." + code = dis._get_code_object(code) + lnotab = list(dis.findlinestarts(code)) + # Don't bother checking if the line info is sensible, because + # most of the line info we can get at comes from lnotab. + min_bytecode = min(t[0] for t in lnotab) + max_bytecode = max(t[0] for t in lnotab) + self.assertGreaterEqual(min_bytecode, 0) + self.assertLess(max_bytecode, len(code.co_code)) + # This could conceivably test more (and probably should, as there + # aren't very many tests of lnotab), if peepholer wasn't scheduled + # to be replaced anyway. + def test_unot(self): # UNARY_NOT POP_JUMP_IF_FALSE --> POP_JUMP_IF_TRUE' def unot(x): @@ -48,6 +62,7 @@ class TestTranforms(BytecodeTestCase): self.assertNotInBytecode(unot, 'UNARY_NOT') self.assertNotInBytecode(unot, 'POP_JUMP_IF_FALSE') self.assertInBytecode(unot, 'POP_JUMP_IF_TRUE') + self.check_lnotab(unot) def test_elim_inversion_of_is_or_in(self): for line, cmp_op in ( @@ -58,6 +73,7 @@ class TestTranforms(BytecodeTestCase): ): code = compile(line, '', 'single') self.assertInBytecode(code, 'COMPARE_OP', cmp_op) + self.check_lnotab(code) def test_global_as_constant(self): # LOAD_GLOBAL None/True/False --> LOAD_CONST None/True/False @@ -75,6 +91,7 @@ class TestTranforms(BytecodeTestCase): for func, elem in ((f, None), (g, True), (h, False)): self.assertNotInBytecode(func, 'LOAD_GLOBAL') self.assertInBytecode(func, 'LOAD_CONST', elem) + self.check_lnotab(func) def f(): 'Adding a docstring made this test fail in Py2.5.0' @@ -82,6 +99,7 @@ class TestTranforms(BytecodeTestCase): self.assertNotInBytecode(f, 'LOAD_GLOBAL') self.assertInBytecode(f, 'LOAD_CONST', None) + self.check_lnotab(f) def test_while_one(self): # Skip over: LOAD_CONST trueconst POP_JUMP_IF_FALSE xx @@ -93,6 +111,7 @@ class TestTranforms(BytecodeTestCase): self.assertNotInBytecode(f, elem) for elem in ('JUMP_ABSOLUTE',): self.assertInBytecode(f, elem) + self.check_lnotab(f) def test_pack_unpack(self): for line, elem in ( @@ -104,6 +123,7 @@ class TestTranforms(BytecodeTestCase): self.assertInBytecode(code, elem) self.assertNotInBytecode(code, 'BUILD_TUPLE') self.assertNotInBytecode(code, 'UNPACK_TUPLE') + self.check_lnotab(code) def test_folding_of_tuples_of_constants(self): for line, elem in ( @@ -116,6 +136,7 @@ class TestTranforms(BytecodeTestCase): code = compile(line,'','single') self.assertInBytecode(code, 'LOAD_CONST', elem) self.assertNotInBytecode(code, 'BUILD_TUPLE') + self.check_lnotab(code) # Long tuples should be folded too. code = compile(repr(tuple(range(10000))),'','single') @@ -124,6 +145,7 @@ class TestTranforms(BytecodeTestCase): load_consts = [instr for instr in dis.get_instructions(code) if instr.opname == 'LOAD_CONST'] self.assertEqual(len(load_consts), 2) + self.check_lnotab(code) # Bug 1053819: Tuple of constants misidentified when presented with: # . . . opcode_with_arg 100 unary_opcode BUILD_TUPLE 1 . . . @@ -141,6 +163,7 @@ class TestTranforms(BytecodeTestCase): 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, ],) + self.check_lnotab(crater) def test_folding_of_lists_of_constants(self): for line, elem in ( @@ -153,6 +176,7 @@ class TestTranforms(BytecodeTestCase): code = compile(line, '', 'single') self.assertInBytecode(code, 'LOAD_CONST', elem) self.assertNotInBytecode(code, 'BUILD_LIST') + self.check_lnotab(code) def test_folding_of_sets_of_constants(self): for line, elem in ( @@ -166,6 +190,7 @@ class TestTranforms(BytecodeTestCase): code = compile(line, '', 'single') self.assertNotInBytecode(code, 'BUILD_SET') self.assertInBytecode(code, 'LOAD_CONST', elem) + self.check_lnotab(code) # Ensure that the resulting code actually works: def f(a): @@ -176,9 +201,11 @@ class TestTranforms(BytecodeTestCase): self.assertTrue(f(3)) self.assertTrue(not f(4)) + self.check_lnotab(f) self.assertTrue(not g(3)) self.assertTrue(g(4)) + self.check_lnotab(g) def test_folding_of_binops_on_constants(self): @@ -203,41 +230,50 @@ class TestTranforms(BytecodeTestCase): self.assertInBytecode(code, 'LOAD_CONST', elem) for instr in dis.get_instructions(code): self.assertFalse(instr.opname.startswith('BINARY_')) + self.check_lnotab(code) # Verify that unfoldables are skipped code = compile('a=2+"b"', '', 'single') self.assertInBytecode(code, 'LOAD_CONST', 2) self.assertInBytecode(code, 'LOAD_CONST', 'b') + self.check_lnotab(code) # Verify that large sequences do not result from folding code = compile('a="x"*10000', '', 'single') self.assertInBytecode(code, 'LOAD_CONST', 10000) self.assertNotIn("x"*10000, code.co_consts) + self.check_lnotab(code) code = compile('a=1<<1000', '', 'single') self.assertInBytecode(code, 'LOAD_CONST', 1000) self.assertNotIn(1<<1000, code.co_consts) + self.check_lnotab(code) code = compile('a=2**1000', '', 'single') self.assertInBytecode(code, 'LOAD_CONST', 1000) self.assertNotIn(2**1000, code.co_consts) + self.check_lnotab(code) def test_binary_subscr_on_unicode(self): # valid code get optimized code = compile('"foo"[0]', '', 'single') self.assertInBytecode(code, 'LOAD_CONST', 'f') self.assertNotInBytecode(code, 'BINARY_SUBSCR') + self.check_lnotab(code) code = compile('"\u0061\uffff"[1]', '', 'single') self.assertInBytecode(code, 'LOAD_CONST', '\uffff') self.assertNotInBytecode(code,'BINARY_SUBSCR') + self.check_lnotab(code) # With PEP 393, non-BMP char get optimized code = compile('"\U00012345"[0]', '', 'single') self.assertInBytecode(code, 'LOAD_CONST', '\U00012345') self.assertNotInBytecode(code, 'BINARY_SUBSCR') + self.check_lnotab(code) # invalid code doesn't get optimized # out of range code = compile('"fuu"[10]', '', 'single') self.assertInBytecode(code, 'BINARY_SUBSCR') + self.check_lnotab(code) def test_folding_of_unaryops_on_constants(self): for line, elem in ( @@ -252,13 +288,15 @@ class TestTranforms(BytecodeTestCase): self.assertInBytecode(code, 'LOAD_CONST', elem) for instr in dis.get_instructions(code): self.assertFalse(instr.opname.startswith('UNARY_')) + self.check_lnotab(code) # Check that -0.0 works after marshaling def negzero(): return -(1.0-1.0) - for instr in dis.get_instructions(code): + for instr in dis.get_instructions(negzero): self.assertFalse(instr.opname.startswith('UNARY_')) + self.check_lnotab(negzero) # Verify that unfoldables are skipped for line, elem, opname in ( @@ -268,6 +306,7 @@ class TestTranforms(BytecodeTestCase): code = compile(line, '', 'single') self.assertInBytecode(code, 'LOAD_CONST', elem) self.assertInBytecode(code, opname) + self.check_lnotab(code) def test_elim_extra_return(self): # RETURN LOAD_CONST None RETURN --> RETURN @@ -277,6 +316,7 @@ class TestTranforms(BytecodeTestCase): returns = [instr for instr in dis.get_instructions(f) if instr.opname == 'RETURN_VALUE'] self.assertEqual(len(returns), 1) + self.check_lnotab(f) def test_elim_jump_to_return(self): # JUMP_FORWARD to RETURN --> RETURN @@ -290,6 +330,7 @@ class TestTranforms(BytecodeTestCase): returns = [instr for instr in dis.get_instructions(f) if instr.opname == 'RETURN_VALUE'] self.assertEqual(len(returns), 2) + self.check_lnotab(f) def test_elim_jump_to_uncond_jump(self): # POP_JUMP_IF_FALSE to JUMP_FORWARD --> POP_JUMP_IF_FALSE to non-jump @@ -302,6 +343,7 @@ class TestTranforms(BytecodeTestCase): else: baz() self.check_jump_targets(f) + self.check_lnotab(f) def test_elim_jump_to_uncond_jump2(self): # POP_JUMP_IF_FALSE to JUMP_ABSOLUTE --> POP_JUMP_IF_FALSE to non-jump @@ -312,6 +354,7 @@ class TestTranforms(BytecodeTestCase): or d): a = foo() self.check_jump_targets(f) + self.check_lnotab(f) def test_elim_jump_to_uncond_jump3(self): # Intentionally use two-line expressions to test issue37213. @@ -320,18 +363,21 @@ class TestTranforms(BytecodeTestCase): return ((a and b) and c) self.check_jump_targets(f) + self.check_lnotab(f) self.assertEqual(count_instr_recursively(f, 'JUMP_IF_FALSE_OR_POP'), 2) # JUMP_IF_TRUE_OR_POP to JUMP_IF_TRUE_OR_POP --> JUMP_IF_TRUE_OR_POP to non-jump def f(a, b, c): return ((a or b) or c) self.check_jump_targets(f) + self.check_lnotab(f) self.assertEqual(count_instr_recursively(f, 'JUMP_IF_TRUE_OR_POP'), 2) # JUMP_IF_FALSE_OR_POP to JUMP_IF_TRUE_OR_POP --> POP_JUMP_IF_FALSE to non-jump def f(a, b, c): return ((a and b) or c) self.check_jump_targets(f) + self.check_lnotab(f) self.assertNotInBytecode(f, 'JUMP_IF_FALSE_OR_POP') self.assertInBytecode(f, 'JUMP_IF_TRUE_OR_POP') self.assertInBytecode(f, 'POP_JUMP_IF_FALSE') @@ -340,6 +386,7 @@ class TestTranforms(BytecodeTestCase): return ((a or b) and c) self.check_jump_targets(f) + self.check_lnotab(f) self.assertNotInBytecode(f, 'JUMP_IF_TRUE_OR_POP') self.assertInBytecode(f, 'JUMP_IF_FALSE_OR_POP') self.assertInBytecode(f, 'POP_JUMP_IF_TRUE') @@ -360,6 +407,7 @@ class TestTranforms(BytecodeTestCase): returns = [instr for instr in dis.get_instructions(f) if instr.opname == 'RETURN_VALUE'] self.assertLessEqual(len(returns), 6) + self.check_lnotab(f) def test_elim_jump_after_return2(self): # Eliminate dead code: jumps immediately after returns can't be reached @@ -374,6 +422,7 @@ class TestTranforms(BytecodeTestCase): returns = [instr for instr in dis.get_instructions(f) if instr.opname == 'RETURN_VALUE'] self.assertLessEqual(len(returns), 2) + self.check_lnotab(f) def test_make_function_doesnt_bail(self): def f(): @@ -381,6 +430,7 @@ class TestTranforms(BytecodeTestCase): pass return g self.assertNotInBytecode(f, 'BINARY_ADD') + self.check_lnotab(f) def test_constant_folding(self): # Issue #11244: aggressive constant folding. @@ -401,17 +451,20 @@ class TestTranforms(BytecodeTestCase): self.assertFalse(instr.opname.startswith('UNARY_')) self.assertFalse(instr.opname.startswith('BINARY_')) self.assertFalse(instr.opname.startswith('BUILD_')) + self.check_lnotab(code) def test_in_literal_list(self): def containtest(): return x in [a, b] self.assertEqual(count_instr_recursively(containtest, 'BUILD_LIST'), 0) + self.check_lnotab(containtest) def test_iterate_literal_list(self): def forloop(): for x in [a, b]: pass self.assertEqual(count_instr_recursively(forloop, 'BUILD_LIST'), 0) + self.check_lnotab(forloop) def test_condition_with_binop_with_bools(self): def f(): @@ -419,6 +472,7 @@ class TestTranforms(BytecodeTestCase): return 1 return 0 self.assertEqual(f(), 1) + self.check_lnotab(f) def test_if_with_if_expression(self): # Check bpo-37289 @@ -427,6 +481,19 @@ class TestTranforms(BytecodeTestCase): return True return False self.assertTrue(f(True)) + self.check_lnotab(f) + + def test_trailing_nops(self): + # Check the lnotab of a function that even after trivial + # optimization has trailing nops, which the lnotab adjustment has to + # handle properly (bpo-38115). + def f(x): + while 1: + return 3 + while 1: + return 5 + return 6 + self.check_lnotab(f) class TestBuglets(unittest.TestCase): diff --git a/Misc/NEWS.d/next/Library/2019-09-13-09-24-58.bpo-38115.BOO-Y1.rst b/Misc/NEWS.d/next/Library/2019-09-13-09-24-58.bpo-38115.BOO-Y1.rst new file mode 100644 index 00000000000..5119c0546e3 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2019-09-13-09-24-58.bpo-38115.BOO-Y1.rst @@ -0,0 +1 @@ +Fix a bug in dis.findlinestarts() where it would return invalid bytecode offsets. Document that a code object's co_lnotab can contain invalid bytecode offsets. \ No newline at end of file diff --git a/Objects/lnotab_notes.txt b/Objects/lnotab_notes.txt index 3dab2b98661..71a29797182 100644 --- a/Objects/lnotab_notes.txt +++ b/Objects/lnotab_notes.txt @@ -3,7 +3,9 @@ All about co_lnotab, the line number table. Code objects store a field named co_lnotab. This is an array of unsigned bytes disguised as a Python bytes object. It is used to map bytecode offsets to source code line #s for tracebacks and to identify line number boundaries for -line tracing. +line tracing. Because of internals of the peephole optimizer, it's possible +for lnotab to contain bytecode offsets that are no longer valid (for example +if the optimizer removed the last line in a function). The array is conceptually a compressed list of (bytecode offset increment, line number increment)