From 944fffee8916cb94321fa33cd3a43f4108717746 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 21 Apr 2022 16:10:37 +0100 Subject: [PATCH] GH-88116: Use a compact format to represent end line and column offsets. (GH-91666) * Stores all location info in linetable to conform to PEP 626. * Remove column table from code objects. * Remove end-line table from code objects. * Document new location table format --- Include/cpython/code.h | 32 +- Include/internal/pycore_code.h | 49 +- Lib/importlib/_bootstrap_external.py | 2 +- Lib/test/test_code.py | 169 ++++-- Lib/test/test_compile.py | 33 +- Lib/test/test_dis.py | 23 +- Lib/test/test_exceptions.py | 2 +- Lib/test/test_marshal.py | 10 +- Lib/test/test_traceback.py | 1 + .../2022-04-18-15-22-56.bpo-43950.qrTvWL.rst | 2 + Objects/clinic/codeobject.c.h | 70 +-- Objects/codeobject.c | 487 ++++++++++++------ Objects/frameobject.c | 6 +- Objects/locations.md | 69 +++ Programs/test_frozenmain.h | 26 +- Python/compile.c | 319 ++++++------ Python/marshal.c | 16 +- Tools/gdb/libpython.py | 76 ++- Tools/scripts/deepfreeze.py | 4 - Tools/scripts/umarshal.py | 2 - 20 files changed, 859 insertions(+), 539 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2022-04-18-15-22-56.bpo-43950.qrTvWL.rst create mode 100644 Objects/locations.md diff --git a/Include/cpython/code.h b/Include/cpython/code.h index 6dc2290ffeb..be3b10bba72 100644 --- a/Include/cpython/code.h +++ b/Include/cpython/code.h @@ -86,15 +86,7 @@ typedef uint16_t _Py_CODEUNIT; PyObject *co_filename; /* unicode (where it was loaded from) */ \ PyObject *co_name; /* unicode (name, for reference) */ \ PyObject *co_qualname; /* unicode (qualname, for reference) */ \ - PyObject *co_linetable; /* bytes (encoding addr<->lineno mapping) \ - See Objects/lnotab_notes.txt for details. \ - */ \ - PyObject *co_endlinetable; /* bytes object that holds end lineno for \ - instructions separated across different \ - lines */ \ - PyObject *co_columntable; /* bytes object that holds start/end column \ - offset each instruction */ \ - \ + PyObject *co_linetable; /* bytes object that holds location info */ \ PyObject *co_weakreflist; /* to support weakrefs to code objects */ \ /* Scratch space for extra data relating to the code object. \ Type is a void* to keep the format private in codeobject.c to force \ @@ -153,13 +145,13 @@ PyAPI_FUNC(PyCodeObject *) PyCode_New( int, int, int, int, int, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *, int, PyObject *, - PyObject *, PyObject *, PyObject *); + PyObject *); PyAPI_FUNC(PyCodeObject *) PyCode_NewWithPosOnlyArgs( int, int, int, int, int, int, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *, int, PyObject *, - PyObject *, PyObject *, PyObject *); + PyObject *); /* same as struct above */ /* Creates a new empty code object with the specified source location. */ @@ -176,8 +168,8 @@ PyAPI_FUNC(int) PyCode_Addr2Location(PyCodeObject *, int, int *, int *, int *, i /* for internal use only */ struct _opaque { int computed_line; - const char *lo_next; - const char *limit; + const uint8_t *lo_next; + const uint8_t *limit; }; typedef struct _line_offsets { @@ -210,6 +202,20 @@ PyAPI_FUNC(int) _PyCode_GetExtra(PyObject *code, Py_ssize_t index, PyAPI_FUNC(int) _PyCode_SetExtra(PyObject *code, Py_ssize_t index, void *extra); + +typedef enum _PyCodeLocationInfoKind { + /* short forms are 0 to 9 */ + PY_CODE_LOCATION_INFO_SHORT0 = 0, + /* one lineforms are 10 to 12 */ + PY_CODE_LOCATION_INFO_ONE_LINE0 = 10, + PY_CODE_LOCATION_INFO_ONE_LINE1 = 11, + PY_CODE_LOCATION_INFO_ONE_LINE2 = 12, + + PY_CODE_LOCATION_INFO_NO_COLUMNS = 13, + PY_CODE_LOCATION_INFO_LONG = 14, + PY_CODE_LOCATION_INFO_NONE = 15 +} _PyCodeLocationInfoKind; + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 8c868bcd5b5..3059db465e7 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -176,8 +176,6 @@ struct _PyCodeConstructor { PyObject *code; int firstlineno; PyObject *linetable; - PyObject *endlinetable; - PyObject *columntable; /* used by the code */ PyObject *consts; @@ -221,21 +219,10 @@ extern PyObject* _PyCode_GetCellvars(PyCodeObject *); extern PyObject* _PyCode_GetFreevars(PyCodeObject *); extern PyObject* _PyCode_GetCode(PyCodeObject *); -/* Return the ending source code line number from a bytecode index. */ -extern int _PyCode_Addr2EndLine(PyCodeObject *, int); - -/* Return the ending source code line number from a bytecode index. */ -extern int _PyCode_Addr2EndLine(PyCodeObject *, int); -/* Return the starting source code column offset from a bytecode index. */ -extern int _PyCode_Addr2Offset(PyCodeObject *, int); -/* Return the ending source code column offset from a bytecode index. */ -extern int _PyCode_Addr2EndOffset(PyCodeObject *, int); - /** API for initializing the line number tables. */ extern int _PyCode_InitAddressRange(PyCodeObject* co, PyCodeAddressRange *bounds); -extern int _PyCode_InitEndAddressRange(PyCodeObject* co, PyCodeAddressRange* bounds); -/** Out of process API for initializing the line number table. */ +/** Out of process API for initializing the location table. */ extern void _PyLineTable_InitAddressRange( const char *linetable, Py_ssize_t length, @@ -445,6 +432,40 @@ read_obj(uint16_t *p) return (PyObject *)val; } +static inline int +write_varint(uint8_t *ptr, unsigned int val) +{ + int written = 1; + while (val >= 64) { + *ptr++ = 64 | (val & 63); + val >>= 6; + written++; + } + *ptr = val; + return written; +} + +static inline int +write_signed_varint(uint8_t *ptr, int val) +{ + if (val < 0) { + val = ((-val)<<1) | 1; + } + else { + val = val << 1; + } + return write_varint(ptr, val); +} + +static inline int +write_location_entry_start(uint8_t *ptr, int code, int length) +{ + assert((code & 15) == code); + *ptr = 128 | (code << 3) | (length - 1); + return 1; +} + + #ifdef __cplusplus } #endif diff --git a/Lib/importlib/_bootstrap_external.py b/Lib/importlib/_bootstrap_external.py index 4eece8de24c..71e1e24b51e 100644 --- a/Lib/importlib/_bootstrap_external.py +++ b/Lib/importlib/_bootstrap_external.py @@ -402,7 +402,7 @@ _code_type = type(_write_atomic.__code__) # add JUMP_BACKWARD_NO_INTERRUPT, make JUMP_NO_INTERRUPT virtual) # Python 3.11a7 3492 (make POP_JUMP_IF_NONE/NOT_NONE/TRUE/FALSE relative) # Python 3.11a7 3493 (Make JUMP_IF_TRUE_OR_POP/JUMP_IF_FALSE_OR_POP relative) - +# Python 3.11a7 3494 (New location info table) # Python 3.12 will start with magic number 3500 diff --git a/Lib/test/test_code.py b/Lib/test/test_code.py index 872f7283fc5..a37ebd27dc3 100644 --- a/Lib/test/test_code.py +++ b/Lib/test/test_code.py @@ -230,9 +230,7 @@ class CodeTest(unittest.TestCase): co.co_name, co.co_qualname, co.co_firstlineno, - co.co_lnotab, - co.co_endlinetable, - co.co_columntable, + co.co_linetable, co.co_exceptiontable, co.co_freevars, co.co_cellvars) @@ -273,8 +271,6 @@ class CodeTest(unittest.TestCase): ("co_filename", "newfilename"), ("co_name", "newname"), ("co_linetable", code2.co_linetable), - ("co_endlinetable", code2.co_endlinetable), - ("co_columntable", code2.co_columntable), ): with self.subTest(attr=attr, value=value): new_code = code.replace(**{attr: value}) @@ -311,9 +307,7 @@ class CodeTest(unittest.TestCase): co.co_name, co.co_qualname, co.co_firstlineno, - co.co_lnotab, - co.co_endlinetable, - co.co_columntable, + co.co_linetable, co.co_exceptiontable, co.co_freevars, co.co_cellvars, @@ -391,14 +385,17 @@ class CodeTest(unittest.TestCase): ) def test_endline_and_columntable_none_when_no_debug_ranges(self): - # Make sure that if `-X no_debug_ranges` is used, the endlinetable and - # columntable are None. + # Make sure that if `-X no_debug_ranges` is used, there is + # minimal debug info code = textwrap.dedent(""" def f(): pass - assert f.__code__.co_endlinetable is None - assert f.__code__.co_columntable is None + positions = f.__code__.co_positions() + for line, end_line, column, end_column in positions: + assert line == end_line + assert column is None + assert end_column is None """) assert_python_ok('-X', 'no_debug_ranges', '-c', code) @@ -408,8 +405,11 @@ class CodeTest(unittest.TestCase): def f(): pass - assert f.__code__.co_endlinetable is None - assert f.__code__.co_columntable is None + positions = f.__code__.co_positions() + for line, end_line, column, end_column in positions: + assert line == end_line + assert column is None + assert end_column is None """) assert_python_ok('-c', code, PYTHONNODEBUGRANGES='1') @@ -421,35 +421,10 @@ class CodeTest(unittest.TestCase): x = 1 new_code = func.__code__.replace(co_linetable=b'') positions = new_code.co_positions() - next(positions) # Skip RESUME at start for line, end_line, column, end_column in positions: self.assertIsNone(line) self.assertEqual(end_line, new_code.co_firstlineno + 1) - @requires_debug_ranges() - def test_co_positions_empty_endlinetable(self): - def func(): - x = 1 - new_code = func.__code__.replace(co_endlinetable=b'') - positions = new_code.co_positions() - next(positions) # Skip RESUME at start - for line, end_line, column, end_column in positions: - self.assertEqual(line, new_code.co_firstlineno + 1) - self.assertIsNone(end_line) - - @requires_debug_ranges() - def test_co_positions_empty_columntable(self): - def func(): - x = 1 - new_code = func.__code__.replace(co_columntable=b'') - positions = new_code.co_positions() - next(positions) # Skip RESUME at start - for line, end_line, column, end_column in positions: - self.assertEqual(line, new_code.co_firstlineno + 1) - self.assertEqual(end_line, new_code.co_firstlineno + 1) - self.assertIsNone(column) - self.assertIsNone(end_column) - def isinterned(s): return s is sys.intern(('_' + s + '_')[1:-1]) @@ -527,6 +502,122 @@ class CodeWeakRefTest(unittest.TestCase): self.assertFalse(bool(coderef())) self.assertTrue(self.called) +# Python implementation of location table parsing algorithm +def read(it): + return next(it) + +def read_varint(it): + b = read(it) + val = b & 63; + shift = 0; + while b & 64: + b = read(it) + shift += 6 + val |= (b&63) << shift + return val + +def read_signed_varint(it): + uval = read_varint(it) + if uval & 1: + return -(uval >> 1) + else: + return uval >> 1 + +def parse_location_table(code): + line = code.co_firstlineno + it = iter(code.co_linetable) + while True: + try: + first_byte = read(it) + except StopIteration: + return + code = (first_byte >> 3) & 15 + length = (first_byte & 7) + 1 + if code == 15: + yield (code, length, None, None, None, None) + elif code == 14: + line_delta = read_signed_varint(it) + line += line_delta + end_line = line + read_varint(it) + col = read_varint(it) + if col == 0: + col = None + else: + col -= 1 + end_col = read_varint(it) + if end_col == 0: + end_col = None + else: + end_col -= 1 + yield (code, length, line, end_line, col, end_col) + elif code == 13: # No column + line_delta = read_signed_varint(it) + line += line_delta + yield (code, length, line, line, None, None) + elif code in (10, 11, 12): # new line + line_delta = code - 10 + line += line_delta + column = read(it) + end_column = read(it) + yield (code, length, line, line, column, end_column) + else: + assert (0 <= code < 10) + second_byte = read(it) + column = code << 3 | (second_byte >> 4) + yield (code, length, line, line, column, column + (second_byte & 15)) + +def positions_from_location_table(code): + for _, length, line, end_line, col, end_col in parse_location_table(code): + for _ in range(length): + yield (line, end_line, col, end_col) + +def misshappen(): + """ + + + + + + """ + x = ( + + + 4 + + + + + y + + ) + y = ( + a + + + b + + + + d + ) + return q if ( + + x + + ) else p + + +class CodeLocationTest(unittest.TestCase): + + def check_positions(self, func): + pos1 = list(func.__code__.co_positions()) + pos2 = list(positions_from_location_table(func.__code__)) + for l1, l2 in zip(pos1, pos2): + self.assertEqual(l1, l2) + self.assertEqual(len(pos1), len(pos2)) + + + def test_positions(self): + self.check_positions(parse_location_table) + self.check_positions(misshappen) + if check_impl_detail(cpython=True) and ctypes is not None: py = ctypes.pythonapi diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py index a4e80805d3e..5a9c618786f 100644 --- a/Lib/test/test_compile.py +++ b/Lib/test/test_compile.py @@ -158,7 +158,9 @@ if 1: s256 = "".join(["\n"] * 256 + ["spam"]) co = compile(s256, 'fn', 'exec') self.assertEqual(co.co_firstlineno, 1) - self.assertEqual(list(co.co_lines()), [(0, 2, None), (2, 10, 257)]) + lines = list(co.co_lines()) + self.assertEqual(lines[0][2], None) + self.assertEqual(lines[1][2], 257) def test_literals_with_leading_zeroes(self): for arg in ["077787", "0xj", "0x.", "0e", "090000000000000", @@ -892,12 +894,19 @@ if 1: with self.subTest(func=func): code = func.__code__ lines = list(code.co_lines()) - self.assertEqual(len(lines), 1) start, end, line = lines[0] self.assertEqual(start, 0) - self.assertEqual(end, len(code.co_code)) self.assertEqual(line, code.co_firstlineno) + def get_code_lines(self, code): + last_line = -2 + res = [] + for _, _, line in code.co_lines(): + if line is not None and line != last_line: + res.append(line - code.co_firstlineno) + last_line = line + return res + def test_lineno_attribute(self): def load_attr(): return ( @@ -939,9 +948,7 @@ if 1: for func, lines in zip(funcs, func_lines, strict=True): with self.subTest(func=func): - code_lines = [ line-func.__code__.co_firstlineno - for (_, _, line) in func.__code__.co_lines() - if line is not None ] + code_lines = self.get_code_lines(func.__code__) self.assertEqual(lines, code_lines) def test_line_number_genexp(self): @@ -952,11 +959,10 @@ if 1: x in y) - genexp_lines = [1, 3, 1] + genexp_lines = [0, 2, 0] genexp_code = return_genexp.__code__.co_consts[1] - code_lines = [ None if line is None else line-return_genexp.__code__.co_firstlineno - for (_, _, line) in genexp_code.co_lines() ] + code_lines = self.get_code_lines(genexp_code) self.assertEqual(genexp_lines, code_lines) def test_line_number_implicit_return_after_async_for(self): @@ -966,8 +972,7 @@ if 1: body expected_lines = [0, 1, 2, 1] - code_lines = [ None if line is None else line-test.__code__.co_firstlineno - for (_, _, line) in test.__code__.co_lines() ] + code_lines = self.get_code_lines(test.__code__) self.assertEqual(expected_lines, code_lines) def test_big_dict_literal(self): @@ -1112,14 +1117,14 @@ f( line=1, end_line=3, column=0, end_column=1) def test_very_long_line_end_offset(self): - # Make sure we get None for when the column offset is too large to - # store in a byte. + # Make sure we get the correct column offset for offsets + # too large to store in a byte. long_string = "a" * 1000 snippet = f"g('{long_string}')" compiled_code, _ = self.check_positions_against_ast(snippet) self.assertOpcodeSourcePositionIs(compiled_code, 'CALL', - line=1, end_line=1, column=None, end_column=None) + line=1, end_line=1, column=0, end_column=1005) def test_complex_single_line_expression(self): snippet = "a - b @ (c * x['key'] + 23)" diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index f560a5556c8..0bd589d5cfb 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -196,7 +196,7 @@ def bug42562(): # Set line number for 'pass' to None -bug42562.__code__ = bug42562.__code__.replace(co_linetable=b'\x04\x80') +bug42562.__code__ = bug42562.__code__.replace(co_linetable=b'\xf8') dis_bug42562 = """\ @@ -1560,32 +1560,19 @@ class InstructionTests(InstructionTestCase): @requires_debug_ranges() def test_co_positions_missing_info(self): code = compile('x, y, z', '', 'exec') - code_without_column_table = code.replace(co_columntable=b'') - actual = dis.get_instructions(code_without_column_table) + code_without_location_table = code.replace(co_linetable=b'') + actual = dis.get_instructions(code_without_location_table) for instruction in actual: with self.subTest(instruction=instruction): positions = instruction.positions self.assertEqual(len(positions), 4) if instruction.opname == "RESUME": continue - self.assertEqual(positions.lineno, 1) - self.assertEqual(positions.end_lineno, 1) + self.assertIsNone(positions.lineno) + self.assertIsNone(positions.end_lineno) self.assertIsNone(positions.col_offset) self.assertIsNone(positions.end_col_offset) - code_without_endline_table = code.replace(co_endlinetable=b'') - actual = dis.get_instructions(code_without_endline_table) - for instruction in actual: - with self.subTest(instruction=instruction): - positions = instruction.positions - self.assertEqual(len(positions), 4) - if instruction.opname == "RESUME": - continue - self.assertEqual(positions.lineno, 1) - self.assertIsNone(positions.end_lineno) - self.assertIsNotNone(positions.col_offset) - self.assertIsNotNone(positions.end_col_offset) - # get_instructions has its own tests above, so can rely on it to validate # the object oriented API class BytecodeTests(InstructionTestCase, DisTestBase): diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py index 2b5b5193456..ff1a02821a5 100644 --- a/Lib/test/test_exceptions.py +++ b/Lib/test/test_exceptions.py @@ -2658,7 +2658,7 @@ class PEP626Tests(unittest.TestCase): def f(): 1/0 self.lineno_after_raise(f, 1) - f.__code__ = f.__code__.replace(co_linetable=b'\x04\x80\xff\x80') + f.__code__ = f.__code__.replace(co_linetable=b'\xf8\xf8\xf8\xf9\xf8\xf8\xf8') self.lineno_after_raise(f, None) def test_lineno_after_raise_in_with_exit(self): diff --git a/Lib/test/test_marshal.py b/Lib/test/test_marshal.py index 8d55382b195..aae86cc257d 100644 --- a/Lib/test/test_marshal.py +++ b/Lib/test/test_marshal.py @@ -129,18 +129,18 @@ class CodeTestCase(unittest.TestCase): self.assertEqual(co2.co_filename, "f2") @requires_debug_ranges() - def test_no_columntable_and_endlinetable_with_no_debug_ranges(self): + def test_minimal_linetable_with_no_debug_ranges(self): # Make sure when demarshalling objects with `-X no_debug_ranges` - # that the columntable and endlinetable are None. + # that the columns are None. co = ExceptionTestCase.test_exceptions.__code__ code = textwrap.dedent(""" import sys import marshal with open(sys.argv[1], 'rb') as f: co = marshal.load(f) - - assert co.co_endlinetable is None - assert co.co_columntable is None + positions = list(co.co_positions()) + assert positions[0][2] is None + assert positions[0][3] is None """) try: diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py index 962322c89ff..ed5ddf95069 100644 --- a/Lib/test/test_traceback.py +++ b/Lib/test/test_traceback.py @@ -615,6 +615,7 @@ class TracebackErrorLocationCaretTests(unittest.TestCase): ' ^^^^^^^^^^\n' f' File "{TESTFN}", line {lineno_f}, in \n' f' {source}\n' + f' {"^"*len(source)}\n' ) self.assertEqual(result_lines, expected_error.splitlines()) diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-04-18-15-22-56.bpo-43950.qrTvWL.rst b/Misc/NEWS.d/next/Core and Builtins/2022-04-18-15-22-56.bpo-43950.qrTvWL.rst new file mode 100644 index 00000000000..c8bfa5914b0 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-04-18-15-22-56.bpo-43950.qrTvWL.rst @@ -0,0 +1,2 @@ +Use a single compact table for line starts, ends and column offsets. Reduces +memory consumption for location info by half diff --git a/Objects/clinic/codeobject.c.h b/Objects/clinic/codeobject.c.h index 272bcd6ea17..41c5c2e1170 100644 --- a/Objects/clinic/codeobject.c.h +++ b/Objects/clinic/codeobject.c.h @@ -5,8 +5,8 @@ preserve PyDoc_STRVAR(code_new__doc__, "code(argcount, posonlyargcount, kwonlyargcount, nlocals, stacksize,\n" " flags, codestring, constants, names, varnames, filename, name,\n" -" qualname, firstlineno, linetable, endlinetable, columntable,\n" -" exceptiontable, freevars=(), cellvars=(), /)\n" +" qualname, firstlineno, linetable, exceptiontable, freevars=(),\n" +" cellvars=(), /)\n" "--\n" "\n" "Create a code object. Not for the faint of heart."); @@ -17,7 +17,6 @@ code_new_impl(PyTypeObject *type, int argcount, int posonlyargcount, PyObject *code, PyObject *consts, PyObject *names, PyObject *varnames, PyObject *filename, PyObject *name, PyObject *qualname, int firstlineno, PyObject *linetable, - PyObject *endlinetable, PyObject *columntable, PyObject *exceptiontable, PyObject *freevars, PyObject *cellvars); @@ -40,8 +39,6 @@ code_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) PyObject *qualname; int firstlineno; PyObject *linetable; - PyObject *endlinetable; - PyObject *columntable; PyObject *exceptiontable; PyObject *freevars = NULL; PyObject *cellvars = NULL; @@ -51,7 +48,7 @@ code_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) !_PyArg_NoKeywords("code", kwargs)) { goto exit; } - if (!_PyArg_CheckPositional("code", PyTuple_GET_SIZE(args), 18, 20)) { + if (!_PyArg_CheckPositional("code", PyTuple_GET_SIZE(args), 16, 18)) { goto exit; } argcount = _PyLong_AsInt(PyTuple_GET_ITEM(args, 0)); @@ -131,31 +128,29 @@ code_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) goto exit; } linetable = PyTuple_GET_ITEM(args, 14); - endlinetable = PyTuple_GET_ITEM(args, 15); - columntable = PyTuple_GET_ITEM(args, 16); - if (!PyBytes_Check(PyTuple_GET_ITEM(args, 17))) { - _PyArg_BadArgument("code", "argument 18", "bytes", PyTuple_GET_ITEM(args, 17)); + if (!PyBytes_Check(PyTuple_GET_ITEM(args, 15))) { + _PyArg_BadArgument("code", "argument 16", "bytes", PyTuple_GET_ITEM(args, 15)); goto exit; } - exceptiontable = PyTuple_GET_ITEM(args, 17); - if (PyTuple_GET_SIZE(args) < 19) { + exceptiontable = PyTuple_GET_ITEM(args, 15); + if (PyTuple_GET_SIZE(args) < 17) { goto skip_optional; } - if (!PyTuple_Check(PyTuple_GET_ITEM(args, 18))) { - _PyArg_BadArgument("code", "argument 19", "tuple", PyTuple_GET_ITEM(args, 18)); + if (!PyTuple_Check(PyTuple_GET_ITEM(args, 16))) { + _PyArg_BadArgument("code", "argument 17", "tuple", PyTuple_GET_ITEM(args, 16)); goto exit; } - freevars = PyTuple_GET_ITEM(args, 18); - if (PyTuple_GET_SIZE(args) < 20) { + freevars = PyTuple_GET_ITEM(args, 16); + if (PyTuple_GET_SIZE(args) < 18) { goto skip_optional; } - if (!PyTuple_Check(PyTuple_GET_ITEM(args, 19))) { - _PyArg_BadArgument("code", "argument 20", "tuple", PyTuple_GET_ITEM(args, 19)); + if (!PyTuple_Check(PyTuple_GET_ITEM(args, 17))) { + _PyArg_BadArgument("code", "argument 18", "tuple", PyTuple_GET_ITEM(args, 17)); goto exit; } - cellvars = PyTuple_GET_ITEM(args, 19); + cellvars = PyTuple_GET_ITEM(args, 17); skip_optional: - return_value = code_new_impl(type, argcount, posonlyargcount, kwonlyargcount, nlocals, stacksize, flags, code, consts, names, varnames, filename, name, qualname, firstlineno, linetable, endlinetable, columntable, exceptiontable, freevars, cellvars); + return_value = code_new_impl(type, argcount, posonlyargcount, kwonlyargcount, nlocals, stacksize, flags, code, consts, names, varnames, filename, name, qualname, firstlineno, linetable, exceptiontable, freevars, cellvars); exit: return return_value; @@ -167,8 +162,7 @@ PyDoc_STRVAR(code_replace__doc__, " co_flags=-1, co_firstlineno=-1, co_code=None, co_consts=None,\n" " co_names=None, co_varnames=None, co_freevars=None,\n" " co_cellvars=None, co_filename=None, co_name=None,\n" -" co_qualname=None, co_linetable=None, co_endlinetable=None,\n" -" co_columntable=None, co_exceptiontable=None)\n" +" co_qualname=None, co_linetable=None, co_exceptiontable=None)\n" "--\n" "\n" "Return a copy of the code object with new values for the specified fields."); @@ -185,16 +179,16 @@ code_replace_impl(PyCodeObject *self, int co_argcount, PyObject *co_varnames, PyObject *co_freevars, PyObject *co_cellvars, PyObject *co_filename, PyObject *co_name, PyObject *co_qualname, - PyBytesObject *co_linetable, PyObject *co_endlinetable, - PyObject *co_columntable, PyBytesObject *co_exceptiontable); + PyBytesObject *co_linetable, + PyBytesObject *co_exceptiontable); static PyObject * code_replace(PyCodeObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; - static const char * const _keywords[] = {"co_argcount", "co_posonlyargcount", "co_kwonlyargcount", "co_nlocals", "co_stacksize", "co_flags", "co_firstlineno", "co_code", "co_consts", "co_names", "co_varnames", "co_freevars", "co_cellvars", "co_filename", "co_name", "co_qualname", "co_linetable", "co_endlinetable", "co_columntable", "co_exceptiontable", NULL}; + static const char * const _keywords[] = {"co_argcount", "co_posonlyargcount", "co_kwonlyargcount", "co_nlocals", "co_stacksize", "co_flags", "co_firstlineno", "co_code", "co_consts", "co_names", "co_varnames", "co_freevars", "co_cellvars", "co_filename", "co_name", "co_qualname", "co_linetable", "co_exceptiontable", NULL}; static _PyArg_Parser _parser = {NULL, _keywords, "replace", 0}; - PyObject *argsbuf[20]; + PyObject *argsbuf[18]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; int co_argcount = self->co_argcount; int co_posonlyargcount = self->co_posonlyargcount; @@ -213,8 +207,6 @@ code_replace(PyCodeObject *self, PyObject *const *args, Py_ssize_t nargs, PyObje PyObject *co_name = self->co_name; PyObject *co_qualname = self->co_qualname; PyBytesObject *co_linetable = (PyBytesObject *)self->co_linetable; - PyObject *co_endlinetable = self->co_endlinetable; - PyObject *co_columntable = self->co_columntable; PyBytesObject *co_exceptiontable = (PyBytesObject *)self->co_exceptiontable; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 0, 0, 0, argsbuf); @@ -396,25 +388,13 @@ code_replace(PyCodeObject *self, PyObject *const *args, Py_ssize_t nargs, PyObje goto skip_optional_kwonly; } } - if (args[17]) { - co_endlinetable = args[17]; - if (!--noptargs) { - goto skip_optional_kwonly; - } - } - if (args[18]) { - co_columntable = args[18]; - if (!--noptargs) { - goto skip_optional_kwonly; - } - } - if (!PyBytes_Check(args[19])) { - _PyArg_BadArgument("replace", "argument 'co_exceptiontable'", "bytes", args[19]); + if (!PyBytes_Check(args[17])) { + _PyArg_BadArgument("replace", "argument 'co_exceptiontable'", "bytes", args[17]); goto exit; } - co_exceptiontable = (PyBytesObject *)args[19]; + co_exceptiontable = (PyBytesObject *)args[17]; skip_optional_kwonly: - return_value = code_replace_impl(self, co_argcount, co_posonlyargcount, co_kwonlyargcount, co_nlocals, co_stacksize, co_flags, co_firstlineno, co_code, co_consts, co_names, co_varnames, co_freevars, co_cellvars, co_filename, co_name, co_qualname, co_linetable, co_endlinetable, co_columntable, co_exceptiontable); + return_value = code_replace_impl(self, co_argcount, co_posonlyargcount, co_kwonlyargcount, co_nlocals, co_stacksize, co_flags, co_firstlineno, co_code, co_consts, co_names, co_varnames, co_freevars, co_cellvars, co_filename, co_name, co_qualname, co_linetable, co_exceptiontable); exit: return return_value; @@ -456,4 +436,4 @@ code__varname_from_oparg(PyCodeObject *self, PyObject *const *args, Py_ssize_t n exit: return return_value; } -/*[clinic end generated code: output=b1b83a70ffc5b7cd input=a9049054013a1b77]*/ +/*[clinic end generated code: output=ebfeec29d2cff674 input=a9049054013a1b77]*/ diff --git a/Objects/codeobject.c b/Objects/codeobject.c index e872b398e08..9a578158827 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -244,10 +244,6 @@ _PyCode_Validate(struct _PyCodeConstructor *con) con->qualname == NULL || !PyUnicode_Check(con->qualname) || con->filename == NULL || !PyUnicode_Check(con->filename) || con->linetable == NULL || !PyBytes_Check(con->linetable) || - con->endlinetable == NULL || - (con->endlinetable != Py_None && !PyBytes_Check(con->endlinetable)) || - con->columntable == NULL || - (con->columntable != Py_None && !PyBytes_Check(con->columntable)) || con->exceptiontable == NULL || !PyBytes_Check(con->exceptiontable) ) { PyErr_BadInternalCall(); @@ -307,10 +303,6 @@ init_code(PyCodeObject *co, struct _PyCodeConstructor *con) co->co_firstlineno = con->firstlineno; Py_INCREF(con->linetable); co->co_linetable = con->linetable; - Py_INCREF(con->endlinetable); - co->co_endlinetable = con->endlinetable; - Py_INCREF(con->columntable); - co->co_columntable = con->columntable; Py_INCREF(con->consts); co->co_consts = con->consts; @@ -347,6 +339,97 @@ init_code(PyCodeObject *co, struct _PyCodeConstructor *con) PyBytes_GET_SIZE(con->code)); } +static int +scan_varint(const uint8_t *ptr) +{ + int read = *ptr++; + int val = read & 63; + int shift = 0; + while (read & 64) { + read = *ptr++; + shift += 6; + val |= (read & 63) << shift; + } + return val; +} + +static int +scan_signed_varint(const uint8_t *ptr) +{ + int uval = scan_varint(ptr); + if (uval & 1) { + return -(int)(uval >> 1); + } + else { + return uval >> 1; + } +} + +static int +get_line_delta(const uint8_t *ptr) +{ + int code = ((*ptr) >> 3) & 15; + switch (code) { + case PY_CODE_LOCATION_INFO_NONE: + return 0; + case PY_CODE_LOCATION_INFO_NO_COLUMNS: + case PY_CODE_LOCATION_INFO_LONG: + return scan_signed_varint(ptr+1); + case PY_CODE_LOCATION_INFO_ONE_LINE0: + return 0; + case PY_CODE_LOCATION_INFO_ONE_LINE1: + return 1; + case PY_CODE_LOCATION_INFO_ONE_LINE2: + return 2; + default: + /* Same line */ + return 0; + } +} + +static PyObject * +remove_column_info(PyObject *locations) +{ + int offset = 0; + const uint8_t *data = (const uint8_t *)PyBytes_AS_STRING(locations); + PyObject *res = PyBytes_FromStringAndSize(NULL, 32); + if (res == NULL) { + PyErr_NoMemory(); + return NULL; + } + uint8_t *output = (uint8_t *)PyBytes_AS_STRING(res); + while (offset < PyBytes_GET_SIZE(locations)) { + Py_ssize_t write_offset = output - (uint8_t *)PyBytes_AS_STRING(res); + if (write_offset + 16 >= PyBytes_GET_SIZE(res)) { + if (_PyBytes_Resize(&res, PyBytes_GET_SIZE(res) * 2) < 0) { + return NULL; + } + output = (uint8_t *)PyBytes_AS_STRING(res) + write_offset; + } + int code = (data[offset] >> 3) & 15; + if (code == PY_CODE_LOCATION_INFO_NONE) { + *output++ = data[offset]; + } + else { + int blength = (data[offset] & 7)+1; + output += write_location_entry_start( + output, PY_CODE_LOCATION_INFO_NO_COLUMNS, blength); + int ldelta = get_line_delta(&data[offset]); + output += write_signed_varint(output, ldelta); + } + offset++; + while (offset < PyBytes_GET_SIZE(locations) && + (data[offset] & 128) == 0) { + offset++; + } + } + Py_ssize_t write_offset = output - (uint8_t *)PyBytes_AS_STRING(res); + if (_PyBytes_Resize(&res, write_offset)) { + return NULL; + } + return res; +} + /* The caller is responsible for ensuring that the given data is valid. */ PyCodeObject * @@ -373,21 +456,26 @@ _PyCode_New(struct _PyCodeConstructor *con) return NULL; } - // Discard the endlinetable and columntable if we are opted out of debug + PyObject *replacement_locations = NULL; + // Compact the linetable if we are opted out of debug // ranges. if (!_Py_GetConfig()->code_debug_ranges) { - con->endlinetable = Py_None; - con->columntable = Py_None; + replacement_locations = remove_column_info(con->linetable); + if (replacement_locations == NULL) { + return NULL; + } + con->linetable = replacement_locations; } Py_ssize_t size = PyBytes_GET_SIZE(con->code) / sizeof(_Py_CODEUNIT); PyCodeObject *co = PyObject_NewVar(PyCodeObject, &PyCode_Type, size); if (co == NULL) { + Py_XDECREF(replacement_locations); PyErr_NoMemory(); return NULL; } init_code(co, con); - + Py_XDECREF(replacement_locations); return co; } @@ -403,8 +491,8 @@ PyCode_NewWithPosOnlyArgs(int argcount, int posonlyargcount, int kwonlyargcount, PyObject *varnames, PyObject *freevars, PyObject *cellvars, PyObject *filename, PyObject *name, PyObject *qualname, int firstlineno, - PyObject *linetable, PyObject *endlinetable, - PyObject *columntable, PyObject *exceptiontable) + PyObject *linetable, + PyObject *exceptiontable) { PyCodeObject *co = NULL; PyObject *localsplusnames = NULL; @@ -482,8 +570,6 @@ PyCode_NewWithPosOnlyArgs(int argcount, int posonlyargcount, int kwonlyargcount, .code = code, .firstlineno = firstlineno, .linetable = linetable, - .endlinetable = endlinetable, - .columntable = columntable, .consts = consts, .names = names, @@ -528,14 +614,16 @@ PyCode_New(int argcount, int kwonlyargcount, PyObject *code, PyObject *consts, PyObject *names, PyObject *varnames, PyObject *freevars, PyObject *cellvars, PyObject *filename, PyObject *name, PyObject *qualname, - int firstlineno, PyObject *linetable, PyObject *endlinetable, - PyObject *columntable, PyObject *exceptiontable) + int firstlineno, + PyObject *linetable, + PyObject *exceptiontable) { return PyCode_NewWithPosOnlyArgs(argcount, 0, kwonlyargcount, nlocals, stacksize, flags, code, consts, names, varnames, freevars, cellvars, filename, - name, qualname, firstlineno, linetable, - endlinetable, columntable, exceptiontable); + name, qualname, firstlineno, + linetable, + exceptiontable); } PyCodeObject * @@ -567,8 +655,6 @@ PyCode_NewEmpty(const char *filename, const char *funcname, int firstlineno) .code = emptystring, .firstlineno = firstlineno, .linetable = emptystring, - .endlinetable = emptystring, - .columntable = emptystring, .consts = nulltuple, .names = nulltuple, .localsplusnames = nulltuple, @@ -605,68 +691,10 @@ PyCode_Addr2Line(PyCodeObject *co, int addrq) return _PyCode_CheckLineNumber(addrq, &bounds); } -int -PyCode_Addr2Location(PyCodeObject *co, int addrq, - int *start_line, int *start_column, - int *end_line, int *end_column) -{ - *start_line = PyCode_Addr2Line(co, addrq); - *start_column = _PyCode_Addr2Offset(co, addrq); - *end_line = _PyCode_Addr2EndLine(co, addrq); - *end_column = _PyCode_Addr2EndOffset(co, addrq); - return 1; -} - -int -_PyCode_Addr2EndLine(PyCodeObject* co, int addrq) -{ - if (addrq < 0) { - return co->co_firstlineno; - } - else if (co->co_endlinetable == Py_None) { - return -1; - } - - assert(addrq >= 0 && addrq < _PyCode_NBYTES(co)); - PyCodeAddressRange bounds; - _PyCode_InitEndAddressRange(co, &bounds); - return _PyCode_CheckLineNumber(addrq, &bounds); -} - -int -_PyCode_Addr2Offset(PyCodeObject* co, int addrq) -{ - if (co->co_columntable == Py_None || addrq < 0) { - return -1; - } - addrq /= sizeof(_Py_CODEUNIT); - if (addrq*2 >= PyBytes_GET_SIZE(co->co_columntable)) { - return -1; - } - - unsigned char* bytes = (unsigned char*)PyBytes_AS_STRING(co->co_columntable); - return bytes[addrq*2] - 1; -} - -int -_PyCode_Addr2EndOffset(PyCodeObject* co, int addrq) -{ - if (co->co_columntable == Py_None || addrq < 0) { - return -1; - } - addrq /= sizeof(_Py_CODEUNIT); - if (addrq*2+1 >= PyBytes_GET_SIZE(co->co_columntable)) { - return -1; - } - - unsigned char* bytes = (unsigned char*)PyBytes_AS_STRING(co->co_columntable); - return bytes[addrq*2+1] - 1; -} - void _PyLineTable_InitAddressRange(const char *linetable, Py_ssize_t length, int firstlineno, PyCodeAddressRange *range) { - range->opaque.lo_next = linetable; + range->opaque.lo_next = (const uint8_t *)linetable; range->opaque.limit = range->opaque.lo_next + length; range->ar_start = -1; range->ar_end = 0; @@ -677,21 +705,13 @@ _PyLineTable_InitAddressRange(const char *linetable, Py_ssize_t length, int firs int _PyCode_InitAddressRange(PyCodeObject* co, PyCodeAddressRange *bounds) { + assert(co->co_linetable != NULL); const char *linetable = PyBytes_AS_STRING(co->co_linetable); Py_ssize_t length = PyBytes_GET_SIZE(co->co_linetable); _PyLineTable_InitAddressRange(linetable, length, co->co_firstlineno, bounds); return bounds->ar_line; } -int -_PyCode_InitEndAddressRange(PyCodeObject* co, PyCodeAddressRange* bounds) -{ - char* linetable = PyBytes_AS_STRING(co->co_endlinetable); - Py_ssize_t length = PyBytes_GET_SIZE(co->co_endlinetable); - _PyLineTable_InitAddressRange(linetable, length, co->co_firstlineno, bounds); - return bounds->ar_line; -} - /* Update *bounds to describe the first and one-past-the-last instructions in the same line as lasti. Return the number of that line, or -1 if lasti is out of bounds. */ int @@ -710,43 +730,182 @@ _PyCode_CheckLineNumber(int lasti, PyCodeAddressRange *bounds) return bounds->ar_line; } +static int +is_no_line_marker(uint8_t b) +{ + return (b >> 3) == 0x1f; +} + + +#define ASSERT_VALID_BOUNDS(bounds) \ + assert(bounds->opaque.lo_next <= bounds->opaque.limit && \ + (bounds->ar_line == -1 || bounds->ar_line == bounds->opaque.computed_line) && \ + (bounds->opaque.lo_next == bounds->opaque.limit || \ + (*bounds->opaque.lo_next) & 128)) + +static int +next_code_delta(PyCodeAddressRange *bounds) +{ + assert((*bounds->opaque.lo_next) & 128); + return (((*bounds->opaque.lo_next) & 7) + 1) * sizeof(_Py_CODEUNIT); +} + +static int +previous_code_delta(PyCodeAddressRange *bounds) +{ + const uint8_t *ptr = bounds->opaque.lo_next-1; + while (((*ptr) & 128) == 0) { + ptr--; + } + return (((*ptr) & 7) + 1) * sizeof(_Py_CODEUNIT); +} + +static int +read_byte(PyCodeAddressRange *bounds) +{ + return *bounds->opaque.lo_next++; +} + +static int +read_varint(PyCodeAddressRange *bounds) +{ + int read = read_byte(bounds); + int val = read & 63; + int shift = 0; + while (read & 64) { + read = read_byte(bounds); + shift += 6; + val |= (read & 63) << shift; + } + return val; +} + +static int +read_signed_varint(PyCodeAddressRange *bounds) +{ + int uval = read_varint(bounds); + if (uval & 1) { + return -(int)(uval >> 1); + } + else { + return uval >> 1; + } +} + static void retreat(PyCodeAddressRange *bounds) { - int ldelta = ((signed char *)bounds->opaque.lo_next)[-1]; - if (ldelta == -128) { - ldelta = 0; - } - bounds->opaque.computed_line -= ldelta; - bounds->opaque.lo_next -= 2; + ASSERT_VALID_BOUNDS(bounds); + assert(bounds->ar_start > 0); + do { + bounds->opaque.lo_next--; + } while (((*bounds->opaque.lo_next) & 128) == 0); + bounds->opaque.computed_line -= get_line_delta(bounds->opaque.lo_next); bounds->ar_end = bounds->ar_start; - bounds->ar_start -= ((unsigned char *)bounds->opaque.lo_next)[-2]; - ldelta = ((signed char *)bounds->opaque.lo_next)[-1]; - if (ldelta == -128) { + bounds->ar_start -= previous_code_delta(bounds); + if (is_no_line_marker(bounds->opaque.lo_next[-1])) { bounds->ar_line = -1; } else { bounds->ar_line = bounds->opaque.computed_line; } + ASSERT_VALID_BOUNDS(bounds); } static void advance(PyCodeAddressRange *bounds) { - bounds->ar_start = bounds->ar_end; - int delta = ((unsigned char *)bounds->opaque.lo_next)[0]; - bounds->ar_end += delta; - int ldelta = ((signed char *)bounds->opaque.lo_next)[1]; - bounds->opaque.lo_next += 2; - if (ldelta == -128) { + ASSERT_VALID_BOUNDS(bounds); + bounds->opaque.computed_line += get_line_delta(bounds->opaque.lo_next); + if (is_no_line_marker(*bounds->opaque.lo_next)) { bounds->ar_line = -1; } else { - bounds->opaque.computed_line += ldelta; bounds->ar_line = bounds->opaque.computed_line; } + bounds->ar_start = bounds->ar_end; + bounds->ar_end += next_code_delta(bounds); + do { + bounds->opaque.lo_next++; + } while (bounds->opaque.lo_next < bounds->opaque.limit && + ((*bounds->opaque.lo_next) & 128) == 0); + ASSERT_VALID_BOUNDS(bounds); } +static void +advance_with_locations(PyCodeAddressRange *bounds, int *endline, int *column, int *endcolumn) +{ + ASSERT_VALID_BOUNDS(bounds); + int first_byte = read_byte(bounds); + int code = (first_byte >> 3) & 15; + bounds->ar_start = bounds->ar_end; + bounds->ar_end = bounds->ar_start + ((first_byte & 7) + 1) * sizeof(_Py_CODEUNIT); + switch(code) { + case PY_CODE_LOCATION_INFO_NONE: + bounds->ar_line = *endline = -1; + *column = *endcolumn = -1; + break; + case PY_CODE_LOCATION_INFO_LONG: + { + bounds->opaque.computed_line += read_signed_varint(bounds); + bounds->ar_line = bounds->opaque.computed_line; + *endline = bounds->ar_line + read_varint(bounds); + *column = read_varint(bounds)-1; + *endcolumn = read_varint(bounds)-1; + break; + } + case PY_CODE_LOCATION_INFO_NO_COLUMNS: + { + /* No column */ + bounds->opaque.computed_line += read_signed_varint(bounds); + *endline = bounds->ar_line = bounds->opaque.computed_line; + *column = *endcolumn = -1; + break; + } + case PY_CODE_LOCATION_INFO_ONE_LINE0: + case PY_CODE_LOCATION_INFO_ONE_LINE1: + case PY_CODE_LOCATION_INFO_ONE_LINE2: + { + /* one line form */ + int line_delta = code - 10; + bounds->opaque.computed_line += line_delta; + *endline = bounds->ar_line = bounds->opaque.computed_line; + *column = read_byte(bounds); + *endcolumn = read_byte(bounds); + break; + } + default: + { + /* Short forms */ + int second_byte = read_byte(bounds); + assert((second_byte & 128) == 0); + *endline = bounds->ar_line = bounds->opaque.computed_line; + *column = code << 3 | (second_byte >> 4); + *endcolumn = *column + (second_byte & 15); + } + } + ASSERT_VALID_BOUNDS(bounds); +} +int +PyCode_Addr2Location(PyCodeObject *co, int addrq, + int *start_line, int *start_column, + int *end_line, int *end_column) +{ + if (addrq < 0) { + *start_line = *end_line = co->co_firstlineno; + *start_column = *end_column = 0; + } + assert(addrq >= 0 && addrq < _PyCode_NBYTES(co)); + PyCodeAddressRange bounds; + _PyCode_InitAddressRange(co, &bounds); + _PyCode_CheckLineNumber(addrq, &bounds); + retreat(&bounds); + advance_with_locations(&bounds, end_line, start_column, end_column); + *start_line = bounds.ar_line; + return 1; +} + + static inline int at_end(PyCodeAddressRange *bounds) { return bounds->opaque.lo_next >= bounds->opaque.limit; @@ -759,10 +918,7 @@ _PyLineTable_PreviousAddressRange(PyCodeAddressRange *range) return 0; } retreat(range); - while (range->ar_start == range->ar_end) { - assert(range->ar_start > 0); - retreat(range); - } + assert(range->ar_end > range->ar_start); return 1; } @@ -773,13 +929,37 @@ _PyLineTable_NextAddressRange(PyCodeAddressRange *range) return 0; } advance(range); - while (range->ar_start == range->ar_end) { - assert(!at_end(range)); - advance(range); - } + assert(range->ar_end > range->ar_start); return 1; } +int +_PyLineTable_StartsLine(PyCodeAddressRange *range) +{ + if (range->ar_start <= 0) { + return 0; + } + const uint8_t *ptr = range->opaque.lo_next; + do { + ptr--; + } while (((*ptr) & 128) == 0); + int code = ((*ptr)>> 3) & 15; + switch(code) { + case PY_CODE_LOCATION_INFO_LONG: + return 0; + case PY_CODE_LOCATION_INFO_NO_COLUMNS: + case PY_CODE_LOCATION_INFO_NONE: + return ptr[1] != 0; + case PY_CODE_LOCATION_INFO_ONE_LINE0: + return 0; + case PY_CODE_LOCATION_INFO_ONE_LINE1: + case PY_CODE_LOCATION_INFO_ONE_LINE2: + return 1; + default: + return 0; + } +} + static int emit_pair(PyObject **bytes, int *offset, int a, int b) { @@ -856,7 +1036,6 @@ typedef struct { PyObject_HEAD PyCodeObject *li_code; PyCodeAddressRange li_line; - char *li_end; } lineiterator; @@ -962,7 +1141,11 @@ new_linesiterator(PyCodeObject *code) typedef struct { PyObject_HEAD PyCodeObject* pi_code; + PyCodeAddressRange pi_range; int pi_offset; + int pi_endline; + int pi_column; + int pi_endcolumn; } positionsiterator; static void @@ -983,22 +1166,19 @@ _source_offset_converter(int* value) { static PyObject* positionsiter_next(positionsiterator* pi) { - if (pi->pi_offset >= _PyCode_NBYTES(pi->pi_code)) { - return NULL; + if (pi->pi_offset >= pi->pi_range.ar_end) { + assert(pi->pi_offset == pi->pi_range.ar_end); + if (at_end(&pi->pi_range)) { + return NULL; + } + advance_with_locations(&pi->pi_range, &pi->pi_endline, &pi->pi_column, &pi->pi_endcolumn); } - - int start_line, start_col, end_line, end_col; - if (!PyCode_Addr2Location(pi->pi_code, pi->pi_offset, &start_line, - &start_col, &end_line, &end_col)) { - return NULL; - } - pi->pi_offset += 2; return Py_BuildValue("(O&O&O&O&)", - _source_offset_converter, &start_line, - _source_offset_converter, &end_line, - _source_offset_converter, &start_col, - _source_offset_converter, &end_col); + _source_offset_converter, &pi->pi_range.ar_line, + _source_offset_converter, &pi->pi_endline, + _source_offset_converter, &pi->pi_column, + _source_offset_converter, &pi->pi_endcolumn); } static PyTypeObject PositionsIterator = { @@ -1053,7 +1233,8 @@ code_positionsiterator(PyCodeObject* code, PyObject* Py_UNUSED(args)) } Py_INCREF(code); pi->pi_code = code; - pi->pi_offset = 0; + _PyCode_InitAddressRange(code, &pi->pi_range); + pi->pi_offset = pi->pi_range.ar_end; return (PyObject*)pi; } @@ -1203,8 +1384,6 @@ code.__new__ as code_new qualname: unicode firstlineno: int linetable: object(subclass_of="&PyBytes_Type") - endlinetable: object - columntable: object exceptiontable: object(subclass_of="&PyBytes_Type") freevars: object(subclass_of="&PyTuple_Type", c_default="NULL") = () cellvars: object(subclass_of="&PyTuple_Type", c_default="NULL") = () @@ -1219,10 +1398,9 @@ code_new_impl(PyTypeObject *type, int argcount, int posonlyargcount, PyObject *code, PyObject *consts, PyObject *names, PyObject *varnames, PyObject *filename, PyObject *name, PyObject *qualname, int firstlineno, PyObject *linetable, - PyObject *endlinetable, PyObject *columntable, PyObject *exceptiontable, PyObject *freevars, PyObject *cellvars) -/*[clinic end generated code: output=e1d2086aa8da7c08 input=a06cd92369134063]*/ +/*[clinic end generated code: output=069fa20d299f9dda input=e31da3c41ad8064a]*/ { PyObject *co = NULL; PyObject *ournames = NULL; @@ -1263,17 +1441,6 @@ code_new_impl(PyTypeObject *type, int argcount, int posonlyargcount, goto cleanup; } - if (!Py_IsNone(endlinetable) && !PyBytes_Check(endlinetable)) { - PyErr_SetString(PyExc_ValueError, - "code: endlinetable must be None or bytes"); - goto cleanup; - } - if (!Py_IsNone(columntable) && !PyBytes_Check(columntable)) { - PyErr_SetString(PyExc_ValueError, - "code: columntable must be None or bytes"); - goto cleanup; - } - ournames = validate_and_copy_tuple(names); if (ournames == NULL) goto cleanup; @@ -1300,8 +1467,8 @@ code_new_impl(PyTypeObject *type, int argcount, int posonlyargcount, ourvarnames, ourfreevars, ourcellvars, filename, name, qualname, firstlineno, - linetable, endlinetable, - columntable, exceptiontable + linetable, + exceptiontable ); cleanup: Py_XDECREF(ournames); @@ -1337,8 +1504,6 @@ code_dealloc(PyCodeObject *co) Py_XDECREF(co->co_name); Py_XDECREF(co->co_qualname); Py_XDECREF(co->co_linetable); - Py_XDECREF(co->co_endlinetable); - Py_XDECREF(co->co_columntable); Py_XDECREF(co->co_exceptiontable); if (co->co_weakreflist != NULL) { PyObject_ClearWeakRefs((PyObject*)co); @@ -1488,8 +1653,6 @@ static PyMemberDef code_memberlist[] = { {"co_qualname", T_OBJECT, OFF(co_qualname), READONLY}, {"co_firstlineno", T_INT, OFF(co_firstlineno), READONLY}, {"co_linetable", T_OBJECT, OFF(co_linetable), READONLY}, - {"co_endlinetable", T_OBJECT, OFF(co_endlinetable), READONLY}, - {"co_columntable", T_OBJECT, OFF(co_columntable), READONLY}, {"co_exceptiontable", T_OBJECT, OFF(co_exceptiontable), READONLY}, {NULL} /* Sentinel */ }; @@ -1585,8 +1748,6 @@ code.replace co_name: unicode(c_default="self->co_name") = None co_qualname: unicode(c_default="self->co_qualname") = None co_linetable: PyBytesObject(c_default="(PyBytesObject *)self->co_linetable") = None - co_endlinetable: object(c_default="self->co_endlinetable") = None - co_columntable: object(c_default="self->co_columntable") = None co_exceptiontable: PyBytesObject(c_default="(PyBytesObject *)self->co_exceptiontable") = None Return a copy of the code object with new values for the specified fields. @@ -1601,9 +1762,9 @@ code_replace_impl(PyCodeObject *self, int co_argcount, PyObject *co_varnames, PyObject *co_freevars, PyObject *co_cellvars, PyObject *co_filename, PyObject *co_name, PyObject *co_qualname, - PyBytesObject *co_linetable, PyObject *co_endlinetable, - PyObject *co_columntable, PyBytesObject *co_exceptiontable) -/*[clinic end generated code: output=f046bf0be3bab91f input=78dbe204dbd06c2f]*/ + PyBytesObject *co_linetable, + PyBytesObject *co_exceptiontable) +/*[clinic end generated code: output=b6cd9988391d5711 input=f6f68e03571f8d7c]*/ { #define CHECK_INT_ARG(ARG) \ if (ARG < 0) { \ @@ -1664,24 +1825,12 @@ code_replace_impl(PyCodeObject *self, int co_argcount, co_freevars = freevars; } - if (!Py_IsNone(co_endlinetable) && !PyBytes_Check(co_endlinetable)) { - PyErr_SetString(PyExc_ValueError, - "co_endlinetable must be None or bytes"); - goto error; - } - if (!Py_IsNone(co_columntable) && !PyBytes_Check(co_columntable)) { - PyErr_SetString(PyExc_ValueError, - "co_columntable must be None or bytes"); - goto error; - } - co = PyCode_NewWithPosOnlyArgs( co_argcount, co_posonlyargcount, co_kwonlyargcount, co_nlocals, co_stacksize, co_flags, (PyObject*)co_code, co_consts, co_names, co_varnames, co_freevars, co_cellvars, co_filename, co_name, - co_qualname, co_firstlineno, (PyObject*)co_linetable, - (PyObject*)co_endlinetable, (PyObject*)co_columntable, - (PyObject*)co_exceptiontable); + co_qualname, co_firstlineno, + (PyObject*)co_linetable, (PyObject*)co_exceptiontable); error: Py_XDECREF(code); diff --git a/Objects/frameobject.c b/Objects/frameobject.c index e65395ee5f2..7278ca14749 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -378,6 +378,7 @@ marklines(PyCodeObject *code, int len) PyCodeAddressRange bounds; _PyCode_InitAddressRange(code, &bounds); assert (bounds.ar_end == 0); + int last_line = -1; int *linestarts = PyMem_New(int, len); if (linestarts == NULL) { @@ -389,7 +390,10 @@ marklines(PyCodeObject *code, int len) while (_PyLineTable_NextAddressRange(&bounds)) { assert(bounds.ar_start / (int)sizeof(_Py_CODEUNIT) < len); - linestarts[bounds.ar_start / sizeof(_Py_CODEUNIT)] = bounds.ar_line; + if (bounds.ar_line != last_line && bounds.ar_line != -1) { + linestarts[bounds.ar_start / sizeof(_Py_CODEUNIT)] = bounds.ar_line; + last_line = bounds.ar_line; + } } return linestarts; } diff --git a/Objects/locations.md b/Objects/locations.md new file mode 100644 index 00000000000..18a338a9597 --- /dev/null +++ b/Objects/locations.md @@ -0,0 +1,69 @@ +# Locations table + +For versions up to 3.10 see ./lnotab_notes.txt + +In version 3.11 the `co_linetable` bytes object of code objects contains a compact representation of the positions returned by the `co_positions()` iterator. + +The `co_linetable` consists of a sequence of location entries. +Each entry starts with a byte with the most significant bit set, followed by zero or more bytes with most significant bit unset. + +Each entry contains the following information: +* The number of code units covered by this entry (length) +* The start line +* The end line +* The start column +* The end column + +The first byte has the following format: + +Bit 7 | Bits 3-6 | Bits 0-2 + ---- | ---- | ---- + 1 | Code | Length (in code units) - 1 + +The codes are enumerated in the `_PyCodeLocationInfoKind` enum. + +## Variable length integer encodings + +Integers are often encoded using a variable length integer encoding + +### Unsigned integers (varint) + +Unsigned integers are encoded in 6 bit chunks, least significant first. +Each chunk but the last has bit 6 set. +For example: + +* 63 is encoded as `0x3f` +* 200 is encoded as `0x48`, `0x03` + +### Signed integers (svarint) + +Signed integers are encoded by converting them to unsigned integers, using the following function: +```Python +def convert(s): + if s < 0: + return ((-s)<<1) | 1 + else: + return (s<<1) +``` + +## Location entries + +The meaning of the codes and the following bytes are as follows: + +Code | Meaning | Start line | End line | Start column | End column + ---- | ---- | ---- | ---- | ---- | ---- + 0-9 | Short form | Δ 0 | Δ 0 | See below | See below + 10-12 | One line form | Δ (code - 10) | Δ 0 | unsigned byte | unsigned byte + 13 | No column info | Δ svarint | Δ 0 | None | None + 14 | Long form | Δ svarint | Δ varint | varint | varint + 15 | No location | None | None | None | None + +The Δ means the value is encoded as a delta from another value: +* Start line: Delta from the previous start line, or `co_firstlineno` for the first entry. +* End line: Delta from the start line + +### The short forms + +Codes 0-9 are the short forms. The short form consists of two bytes, the second byte holding additional column information. The code is the start column divided by 8 (and rounded down). +* Start column: `(code*8) + ((second_byte>>4)&7)` +* End column: `start_column + (second_byte&15)` diff --git a/Programs/test_frozenmain.h b/Programs/test_frozenmain.h index a3c09529116..3034927d7a1 100644 --- a/Programs/test_frozenmain.h +++ b/Programs/test_frozenmain.h @@ -28,19 +28,15 @@ unsigned char M_test_frozenmain[] = { 107,101,121,169,0,243,0,0,0,0,250,18,116,101,115,116, 95,102,114,111,122,101,110,109,97,105,110,46,112,121,250,8, 60,109,111,100,117,108,101,62,114,11,0,0,0,1,0,0, - 0,115,18,0,0,0,2,128,8,3,8,1,22,2,34,1, - 42,1,8,1,48,7,4,249,115,20,0,0,0,2,128,8, - 3,8,1,22,2,34,1,42,1,2,7,4,1,2,249,52, - 7,115,176,0,0,0,0,0,1,11,1,11,1,11,1,11, - 1,25,1,25,1,25,1,25,1,6,1,6,7,27,1,28, - 1,28,1,28,1,28,1,28,1,28,1,28,1,28,1,6, - 1,6,7,17,19,22,19,27,19,27,19,27,19,27,19,27, - 1,28,1,28,1,28,1,28,1,28,1,28,1,28,1,28, - 10,39,10,27,10,39,10,39,10,39,10,39,10,39,10,41, - 10,41,10,41,10,41,10,41,10,41,10,41,42,50,10,51, - 10,51,10,51,10,51,10,51,1,7,12,2,1,42,1,42, - 5,8,5,10,5,10,11,41,21,24,11,41,11,41,28,34, - 35,38,28,39,28,39,28,39,28,39,28,39,11,41,11,41, - 5,42,5,42,5,42,5,42,5,42,5,42,5,42,5,42, - 5,42,1,42,1,42,114,9,0,0,0, + 0,115,152,0,0,0,248,240,6,0,1,11,128,10,128,10, + 128,10,216,0,24,208,0,24,208,0,24,208,0,24,224,0, + 5,128,5,208,6,26,209,0,27,212,0,27,208,0,27,216, + 0,5,128,5,128,106,144,35,148,40,209,0,27,212,0,27, + 208,0,27,216,9,38,208,9,26,212,9,38,209,9,40,212, + 9,40,168,24,212,9,50,128,6,240,2,6,12,2,240,0, + 7,1,42,240,0,7,1,42,128,67,240,14,0,5,10,128, + 69,208,10,40,144,67,208,10,40,208,10,40,152,54,160,35, + 156,59,208,10,40,208,10,40,209,4,41,212,4,41,208,4, + 41,208,4,41,240,15,7,1,42,240,0,7,1,42,114,9, + 0,0,0, }; diff --git a/Python/compile.c b/Python/compile.c index ceaf85298d7..d66ee17293a 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -7053,25 +7053,23 @@ compiler_match(struct compiler *c, stmt_ty s) XXX must handle implicit jumps from one block to next */ + struct assembler { PyObject *a_bytecode; /* bytes containing bytecode */ - PyObject *a_lnotab; /* bytes containing lnotab */ - PyObject *a_enotab; /* bytes containing enotab */ - PyObject *a_cnotab; /* bytes containing cnotab */ PyObject *a_except_table; /* bytes containing exception table */ basicblock *a_entry; int a_offset; /* offset into bytecode */ int a_nblocks; /* number of reachable blocks */ int a_except_table_off; /* offset into exception table */ - int a_lnotab_off; /* offset into lnotab */ - int a_enotab_off; /* offset into enotab */ - int a_cnotab_off; /* offset into cnotab */ int a_prevlineno; /* lineno of last emitted line in line table */ int a_prev_end_lineno; /* end_lineno of last emitted line in line table */ int a_lineno; /* lineno of last emitted instruction */ int a_end_lineno; /* end_lineno of last emitted instruction */ int a_lineno_start; /* bytecode start offset of current lineno */ int a_end_lineno_start; /* bytecode start offset of current end_lineno */ + /* Location Info */ + PyObject* a_linetable; /* bytes containing location info */ + int a_location_off; /* offset of last written location info frame */ }; Py_LOCAL_INLINE(void) @@ -7169,25 +7167,15 @@ assemble_init(struct assembler *a, int nblocks, int firstlineno) memset(a, 0, sizeof(struct assembler)); a->a_prevlineno = a->a_lineno = firstlineno; a->a_prev_end_lineno = a->a_end_lineno = firstlineno; - a->a_lnotab = NULL; - a->a_enotab = NULL; - a->a_cnotab = NULL; - a->a_cnotab_off = 0; + a->a_linetable = NULL; + a->a_location_off = 0; a->a_except_table = NULL; a->a_bytecode = PyBytes_FromStringAndSize(NULL, DEFAULT_CODE_SIZE); if (a->a_bytecode == NULL) { goto error; } - a->a_lnotab = PyBytes_FromStringAndSize(NULL, DEFAULT_LNOTAB_SIZE); - if (a->a_lnotab == NULL) { - goto error; - } - a->a_enotab = PyBytes_FromStringAndSize(NULL, DEFAULT_LNOTAB_SIZE); - if (a->a_enotab == NULL) { - goto error; - } - a->a_cnotab = PyBytes_FromStringAndSize(NULL, DEFAULT_CNOTAB_SIZE); - if (a->a_cnotab == NULL) { + a->a_linetable = PyBytes_FromStringAndSize(NULL, DEFAULT_CNOTAB_SIZE); + if (a->a_linetable == NULL) { goto error; } a->a_except_table = PyBytes_FromStringAndSize(NULL, DEFAULT_LNOTAB_SIZE); @@ -7201,9 +7189,7 @@ assemble_init(struct assembler *a, int nblocks, int firstlineno) return 1; error: Py_XDECREF(a->a_bytecode); - Py_XDECREF(a->a_lnotab); - Py_XDECREF(a->a_enotab); - Py_XDECREF(a->a_cnotab); + Py_XDECREF(a->a_linetable); Py_XDECREF(a->a_except_table); return 0; } @@ -7212,9 +7198,7 @@ static void assemble_free(struct assembler *a) { Py_XDECREF(a->a_bytecode); - Py_XDECREF(a->a_lnotab); - Py_XDECREF(a->a_enotab); - Py_XDECREF(a->a_cnotab); + Py_XDECREF(a->a_linetable); Py_XDECREF(a->a_except_table); } @@ -7230,25 +7214,6 @@ blocksize(basicblock *b) return size; } -static int -assemble_emit_table_pair(struct assembler* a, PyObject** table, int* offset, - int left, int right) -{ - Py_ssize_t len = PyBytes_GET_SIZE(*table); - if (*offset + 2 >= len) { - if (_PyBytes_Resize(table, len * 2) < 0) - return 0; - } - unsigned char* table_entry = (unsigned char*)PyBytes_AS_STRING(*table); - - table_entry += *offset; - *offset += 2; - - *table_entry++ = left; - *table_entry++ = right; - return 1; -} - static basicblock * push_except_block(ExceptStack *stack, struct instr *setup) { assert(is_block_push(setup)); @@ -7492,118 +7457,153 @@ assemble_exception_table(struct assembler *a) return 1; } -/* Appends a range to the end of the line number table. See - * Objects/lnotab_notes.txt for the description of the line number table. */ +/* Code location emitting code. See locations.md for a description of the format. */ + +#define MSB 0x80 + +static void +write_location_byte(struct assembler* a, int val) +{ + PyBytes_AS_STRING(a->a_linetable)[a->a_location_off] = val&255; + a->a_location_off++; +} + + +static uint8_t * +location_pointer(struct assembler* a) +{ + return (uint8_t *)PyBytes_AS_STRING(a->a_linetable) + + a->a_location_off; +} + +static void +write_location_first_byte(struct assembler* a, int code, int length) +{ + a->a_location_off += write_location_entry_start( + location_pointer(a), code, length); +} + +static void +write_location_varint(struct assembler* a, unsigned int val) +{ + uint8_t *ptr = location_pointer(a); + a->a_location_off += write_varint(ptr, val); +} + + +static void +write_location_signed_varint(struct assembler* a, int val) +{ + uint8_t *ptr = location_pointer(a); + a->a_location_off += write_signed_varint(ptr, val); +} + +static void +write_location_info_short_form(struct assembler* a, int length, int column, int end_column) +{ + assert(length > 0 && length <= 8); + int column_low_bits = column & 7; + int column_group = column >> 3; + assert(column < 80); + assert(end_column - column < 16); + write_location_first_byte(a, PY_CODE_LOCATION_INFO_SHORT0 + column_group, length); + write_location_byte(a, (column_low_bits << 4) | (end_column - column)); +} + +static void +write_location_info_oneline_form(struct assembler* a, int length, int line_delta, int column, int end_column) +{ + assert(length > 0 && length <= 8); + assert(line_delta >= 0 && line_delta < 3); + assert(column < 128); + assert(end_column < 128); + write_location_first_byte(a, PY_CODE_LOCATION_INFO_ONE_LINE0 + line_delta, length); + write_location_byte(a, column); + write_location_byte(a, end_column); +} + +static void +write_location_info_long_form(struct assembler* a, struct instr* i, int length) +{ + assert(length > 0 && length <= 8); + write_location_first_byte(a, PY_CODE_LOCATION_INFO_LONG, length); + write_location_signed_varint(a, i->i_lineno - a->a_lineno); + assert(i->i_end_lineno >= i->i_lineno); + write_location_varint(a, i->i_end_lineno - i->i_lineno); + write_location_varint(a, i->i_col_offset+1); + write_location_varint(a, i->i_end_col_offset+1); +} + +static void +write_location_info_none(struct assembler* a, int length) +{ + write_location_first_byte(a, PY_CODE_LOCATION_INFO_NONE, length); +} + +static void +write_location_info_no_column(struct assembler* a, int length, int line_delta) +{ + write_location_first_byte(a, PY_CODE_LOCATION_INFO_NO_COLUMNS, length); + write_location_signed_varint(a, line_delta); +} + +#define THEORETICAL_MAX_ENTRY_SIZE 25 /* 1 + 6 + 6 + 6 + 6 */ static int -assemble_line_range(struct assembler* a, int current, PyObject** table, - int* prev, int* start, int* offset) +write_location_info_entry(struct assembler* a, struct instr* i, int isize) { - int ldelta, bdelta; - bdelta = (a->a_offset - *start) * sizeof(_Py_CODEUNIT); - if (bdelta == 0) { - return 1; - } - if (current < 0) { - ldelta = -128; - } - else { - ldelta = current - *prev; - *prev = current; - while (ldelta > 127) { - if (!assemble_emit_table_pair(a, table, offset, 0, 127)) { - return 0; - } - ldelta -= 127; - } - while (ldelta < -127) { - if (!assemble_emit_table_pair(a, table, offset, 0, -127)) { - return 0; - } - ldelta += 127; - } - } - assert(-128 <= ldelta && ldelta < 128); - while (bdelta > 254) { - if (!assemble_emit_table_pair(a, table, offset, 254, ldelta)) { + Py_ssize_t len = PyBytes_GET_SIZE(a->a_linetable); + if (a->a_location_off + THEORETICAL_MAX_ENTRY_SIZE >= len) { + assert(len > THEORETICAL_MAX_ENTRY_SIZE); + if (_PyBytes_Resize(&a->a_linetable, len*2) < 0) { return 0; } - ldelta = current < 0 ? -128 : 0; - bdelta -= 254; } - if (!assemble_emit_table_pair(a, table, offset, bdelta, ldelta)) { - return 0; - } - *start = a->a_offset; - return 1; -} - -static int -assemble_start_line_range(struct assembler* a) { - return assemble_line_range(a, a->a_lineno, &a->a_lnotab, - &a->a_prevlineno, &a->a_lineno_start, &a->a_lnotab_off); -} - -static int -assemble_end_line_range(struct assembler* a) { - return assemble_line_range(a, a->a_end_lineno, &a->a_enotab, - &a->a_prev_end_lineno, &a->a_end_lineno_start, &a->a_enotab_off); -} - -static int -assemble_lnotab(struct assembler* a, struct instr* i) -{ - if (i->i_lineno == a->a_lineno) { + if (i->i_lineno < 0) { + write_location_info_none(a, isize); return 1; } - if (!assemble_start_line_range(a)) { - return 0; + int line_delta = i->i_lineno - a->a_lineno; + int column = i->i_col_offset; + int end_column = i->i_end_col_offset; + assert(column >= -1); + assert(end_column >= -1); + if (column < 0 || end_column < 0) { + if (i->i_end_lineno == i->i_lineno || i->i_end_lineno == -1) { + write_location_info_no_column(a, isize, line_delta); + a->a_lineno = i->i_lineno; + return 1; + } } + else if (i->i_end_lineno == i->i_lineno) { + if (line_delta == 0 && column < 80 && end_column - column < 16) { + write_location_info_short_form(a, isize, column, end_column); + return 1; + } + if (line_delta >= 0 && line_delta < 3 && column < 128 && end_column < 128) { + write_location_info_oneline_form(a, isize, line_delta, column, end_column); + a->a_lineno = i->i_lineno; + return 1; + } + } + write_location_info_long_form(a, i, isize); a->a_lineno = i->i_lineno; return 1; } static int -assemble_enotab(struct assembler* a, struct instr* i) +assemble_emit_location(struct assembler* a, struct instr* i) { - if (i->i_end_lineno == a->a_end_lineno) { - return 1; - } - if (!assemble_end_line_range(a)) { - return 0; - } - a->a_end_lineno = i->i_end_lineno; - return 1; -} - -static int -assemble_cnotab(struct assembler* a, struct instr* i, int instr_size) -{ - Py_ssize_t len = PyBytes_GET_SIZE(a->a_cnotab); - int difference = instr_size * 2; - if (a->a_cnotab_off + difference >= len) { - if (_PyBytes_Resize(&a->a_cnotab, difference + (len * 2)) < 0) { + int isize = instr_size(i); + while (isize > 8) { + if (!write_location_info_entry(a, i, 8)) { return 0; } + isize -= 8; } - - unsigned char* cnotab = (unsigned char*)PyBytes_AS_STRING(a->a_cnotab); - cnotab += a->a_cnotab_off; - a->a_cnotab_off += difference; - - for (int j = 0; j < instr_size; j++) { - if (i->i_col_offset > 255 || i->i_end_col_offset > 255) { - *cnotab++ = 0; - *cnotab++ = 0; - continue; - } - *cnotab++ = i->i_col_offset + 1; - *cnotab++ = i->i_end_col_offset + 1; - } - return 1; + return write_location_info_entry(a, i, isize); } - /* assemble_emit() Extend the bytecode with a new instruction. Update lnotab if necessary. @@ -7616,15 +7616,6 @@ assemble_emit(struct assembler *a, struct instr *i) _Py_CODEUNIT *code; int size = instr_size(i); - if (i->i_lineno && !assemble_lnotab(a, i)) { - return 0; - } - if (!assemble_enotab(a, i)) { - return 0; - } - if (!assemble_cnotab(a, i, size)) { - return 0; - } if (a->a_offset + size >= len / (int)sizeof(_Py_CODEUNIT)) { if (len > PY_SSIZE_T_MAX / 2) return 0; @@ -7976,9 +7967,7 @@ makecode(struct compiler *c, struct assembler *a, PyObject *constslist, .code = a->a_bytecode, .firstlineno = c->u->u_firstlineno, - .linetable = a->a_lnotab, - .endlinetable = a->a_enotab, - .columntable = a->a_cnotab, + .linetable = a->a_linetable, .consts = consts, .names = names, @@ -8413,6 +8402,14 @@ assemble(struct compiler *c, int addNone) goto error; } + /* Emit location info */ + a.a_lineno = c->u->u_firstlineno; + for(b = entryblock; b != NULL; b = b->b_next) { + for (j = 0; j < b->b_iused; j++) + if (!assemble_emit_location(&a, &b->b_instr[j])) + goto error; + } + if (!assemble_exception_table(&a)) { goto error; } @@ -8422,30 +8419,14 @@ assemble(struct compiler *c, int addNone) if (!merge_const_one(c, &a.a_except_table)) { goto error; } - if (!assemble_start_line_range(&a)) { - return 0; - } - if (_PyBytes_Resize(&a.a_lnotab, a.a_lnotab_off) < 0) { + + if (_PyBytes_Resize(&a.a_linetable, a.a_location_off) < 0) { goto error; } - if (!merge_const_one(c, &a.a_lnotab)) { - goto error; - } - if (!assemble_end_line_range(&a)) { - return 0; - } - if (_PyBytes_Resize(&a.a_enotab, a.a_enotab_off) < 0) { - goto error; - } - if (!merge_const_one(c, &a.a_enotab)) { - goto error; - } - if (_PyBytes_Resize(&a.a_cnotab, a.a_cnotab_off) < 0) { - goto error; - } - if (!merge_const_one(c, &a.a_cnotab)) { + if (!merge_const_one(c, &a.a_linetable)) { goto error; } + if (_PyBytes_Resize(&a.a_bytecode, a.a_offset * sizeof(_Py_CODEUNIT)) < 0) { goto error; } diff --git a/Python/marshal.c b/Python/marshal.c index 19abcc8ffe4..bbe67e3379f 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -564,8 +564,6 @@ w_complex_object(PyObject *v, char flag, WFILE *p) w_object(co->co_qualname, p); w_long(co->co_firstlineno, p); w_object(co->co_linetable, p); - w_object(co->co_endlinetable, p); - w_object(co->co_columntable, p); w_object(co->co_exceptiontable, p); Py_DECREF(co_code); } @@ -1357,9 +1355,7 @@ r_object(RFILE *p) PyObject *name = NULL; PyObject *qualname = NULL; int firstlineno; - PyObject *linetable = NULL; - PyObject* endlinetable = NULL; - PyObject* columntable = NULL; + PyObject* linetable = NULL; PyObject *exceptiontable = NULL; idx = r_ref_reserve(flag, p); @@ -1415,12 +1411,6 @@ r_object(RFILE *p) linetable = r_object(p); if (linetable == NULL) goto code_error; - endlinetable = r_object(p); - if (endlinetable == NULL) - goto code_error; - columntable = r_object(p); - if (columntable == NULL) - goto code_error; exceptiontable = r_object(p); if (exceptiontable == NULL) goto code_error; @@ -1434,8 +1424,6 @@ r_object(RFILE *p) .code = code, .firstlineno = firstlineno, .linetable = linetable, - .endlinetable = endlinetable, - .columntable = columntable, .consts = consts, .names = names, @@ -1473,8 +1461,6 @@ r_object(RFILE *p) Py_XDECREF(name); Py_XDECREF(qualname); Py_XDECREF(linetable); - Py_XDECREF(endlinetable); - Py_XDECREF(columntable); Py_XDECREF(exceptiontable); } retval = v; diff --git a/Tools/gdb/libpython.py b/Tools/gdb/libpython.py index 610d1309943..857e52f00a0 100755 --- a/Tools/gdb/libpython.py +++ b/Tools/gdb/libpython.py @@ -634,6 +634,63 @@ class PyCFunctionObjectPtr(PyObjectPtr): else: return BuiltInMethodProxy(ml_name, pyop_m_self) +# Python implementation of location table parsing algorithm +def read(it): + return ord(next(it)) + +def read_varint(it): + b = read(it) + val = b & 63; + shift = 0; + while b & 64: + b = read(it) + shift += 6 + val |= (b&63) << shift + return val + +def read_signed_varint(it): + uval = read_varint(it) + if uval & 1: + return -(uval >> 1) + else: + return uval >> 1 + +def parse_location_table(firstlineno, linetable): + line = firstlineno + addr = 0 + it = iter(linetable) + while True: + try: + first_byte = read(it) + except StopIteration: + return + code = (first_byte >> 3) & 15 + length = (first_byte & 7) + 1 + end_addr = addr + length + if code == 15: + yield addr, end_addr, None + addr = end_addr + continue + elif code == 14: # Long form + line_delta = read_signed_varint(it) + line += line_delta + end_line = line + read_varint(it) + col = read_varint(it) + end_col = read_varint(it) + elif code == 13: # No column + line_delta = read_signed_varint(it) + line += line_delta + elif code in (10, 11, 12): # new line + line_delta = code - 10 + line += line_delta + column = read(it) + end_column = read(it) + else: + assert (0 <= code < 10) + second_byte = read(it) + column = code << 3 | (second_byte >> 4) + yield addr, end_addr, line + addr = end_addr class PyCodeObjectPtr(PyObjectPtr): """ @@ -658,18 +715,9 @@ class PyCodeObjectPtr(PyObjectPtr): if addrq < 0: return lineno addr = 0 - for addr_incr, line_incr in zip(co_linetable[::2], co_linetable[1::2]): - if addr_incr == 255: - break - addr += ord(addr_incr) - line_delta = ord(line_incr) - if line_delta == 128: - line_delta = 0 - elif line_delta > 128: - line_delta -= 256 - lineno += line_delta - if addr > addrq: - return lineno + for addr, end_addr, line in parse_location_table(lineno, co_linetable): + if addr <= addrq and end_addr > addrq: + return line assert False, "Unreachable" @@ -1082,8 +1130,8 @@ class PyFramePtr: if self.is_optimized_out(): return None try: - return self.co.addr2line(self.f_lasti*2) - except Exception: + return self.co.addr2line(self.f_lasti) + except Exception as ex: # bpo-34989: addr2line() is a complex function, it can fail in many # ways. For example, it fails with a TypeError on "FakeRepr" if # gdb fails to load debug symbols. Use a catch-all "except diff --git a/Tools/scripts/deepfreeze.py b/Tools/scripts/deepfreeze.py index 3c48bac2648..5ee6c2f58e5 100644 --- a/Tools/scripts/deepfreeze.py +++ b/Tools/scripts/deepfreeze.py @@ -240,8 +240,6 @@ class Printer: co_name = self.generate(name + "_name", code.co_name) co_qualname = self.generate(name + "_qualname", code.co_qualname) co_linetable = self.generate(name + "_linetable", code.co_linetable) - co_endlinetable = self.generate(name + "_endlinetable", code.co_endlinetable) - co_columntable = self.generate(name + "_columntable", code.co_columntable) co_exceptiontable = self.generate(name + "_exceptiontable", code.co_exceptiontable) # These fields are not directly accessible localsplusnames, localspluskinds = get_localsplus(code) @@ -280,8 +278,6 @@ class Printer: self.write(f".co_name = {co_name},") self.write(f".co_qualname = {co_qualname},") self.write(f".co_linetable = {co_linetable},") - self.write(f".co_endlinetable = {co_endlinetable},") - self.write(f".co_columntable = {co_columntable},") self.write(f".co_code_adaptive = {co_code_adaptive},") name_as_code = f"(PyCodeObject *)&{name}" self.deallocs.append(f"_PyStaticCode_Dealloc({name_as_code});") diff --git a/Tools/scripts/umarshal.py b/Tools/scripts/umarshal.py index 2eaaa7ce2d9..f61570cbaff 100644 --- a/Tools/scripts/umarshal.py +++ b/Tools/scripts/umarshal.py @@ -289,8 +289,6 @@ class Reader: retval.co_qualname = self.r_object() retval.co_firstlineno = self.r_long() retval.co_linetable = self.r_object() - retval.co_endlinetable = self.r_object() - retval.co_columntable = self.r_object() retval.co_exceptiontable = self.r_object() return retval elif type == Type.REF: