From 75a06f067bd0a2687312e5f8e78f9075be76ad3a Mon Sep 17 00:00:00 2001 From: Matthew Suozzo Date: Sat, 10 Apr 2021 16:56:28 -0400 Subject: [PATCH] bpo-43798: Add source location attributes to alias (GH-25324) * Add source location attributes to alias. * Move alias star construction to pegen helper. Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com> Co-authored-by: Pablo Galindo --- Grammar/python.gram | 6 +- Include/internal/pycore_ast.h | 8 +- Lib/test/test_asdl_parser.py | 4 +- Lib/test/test_ast.py | 28 ++++- Lib/test/test_peg_generator/test_c_parser.py | 2 +- .../2021-04-10-00-01-43.bpo-43798.p_nJFM.rst | 1 + Parser/Python.asdl | 1 + Parser/parser.c | 60 ++++++++++- Parser/pegen.c | 6 +- Parser/pegen.h | 2 +- Python/Python-ast.c | 100 +++++++++++++++++- 11 files changed, 199 insertions(+), 19 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2021-04-10-00-01-43.bpo-43798.p_nJFM.rst diff --git a/Grammar/python.gram b/Grammar/python.gram index ebf028fa819..eb10fc21105 100644 --- a/Grammar/python.gram +++ b/Grammar/python.gram @@ -144,20 +144,20 @@ import_from[stmt_ty]: import_from_targets[asdl_alias_seq*]: | '(' a=import_from_as_names [','] ')' { a } | import_from_as_names !',' - | '*' { (asdl_alias_seq*)_PyPegen_singleton_seq(p, CHECK(alias_ty, _PyPegen_alias_for_star(p))) } + | '*' { (asdl_alias_seq*)_PyPegen_singleton_seq(p, CHECK(alias_ty, _PyPegen_alias_for_star(p, EXTRA))) } | invalid_import_from_targets import_from_as_names[asdl_alias_seq*]: | a[asdl_alias_seq*]=','.import_from_as_name+ { a } import_from_as_name[alias_ty]: | a=NAME b=['as' z=NAME { z }] { _PyAST_alias(a->v.Name.id, (b) ? ((expr_ty) b)->v.Name.id : NULL, - p->arena) } + EXTRA) } dotted_as_names[asdl_alias_seq*]: | a[asdl_alias_seq*]=','.dotted_as_name+ { a } dotted_as_name[alias_ty]: | a=dotted_name b=['as' z=NAME { z }] { _PyAST_alias(a->v.Name.id, (b) ? ((expr_ty) b)->v.Name.id : NULL, - p->arena) } + EXTRA) } dotted_name[expr_ty]: | a=dotted_name '.' b=NAME { _PyPegen_join_names_with_dot(p, a, b) } | NAME diff --git a/Include/internal/pycore_ast.h b/Include/internal/pycore_ast.h index 38c8013d134..64d68b2b32d 100644 --- a/Include/internal/pycore_ast.h +++ b/Include/internal/pycore_ast.h @@ -543,6 +543,10 @@ struct _keyword { struct _alias { identifier name; identifier asname; + int lineno; + int col_offset; + int end_lineno; + int end_col_offset; }; struct _withitem { @@ -751,7 +755,9 @@ arg_ty _PyAST_arg(identifier arg, expr_ty annotation, string type_comment, int keyword_ty _PyAST_keyword(identifier arg, expr_ty value, int lineno, int col_offset, int end_lineno, int end_col_offset, PyArena *arena); -alias_ty _PyAST_alias(identifier name, identifier asname, PyArena *arena); +alias_ty _PyAST_alias(identifier name, identifier asname, int lineno, int + col_offset, int end_lineno, int end_col_offset, PyArena + *arena); withitem_ty _PyAST_withitem(expr_ty context_expr, expr_ty optional_vars, PyArena *arena); match_case_ty _PyAST_match_case(expr_ty pattern, expr_ty guard, asdl_stmt_seq * diff --git a/Lib/test/test_asdl_parser.py b/Lib/test/test_asdl_parser.py index d2c2b513347..2c198a6b8b2 100644 --- a/Lib/test/test_asdl_parser.py +++ b/Lib/test/test_asdl_parser.py @@ -62,7 +62,9 @@ class TestAsdlParser(unittest.TestCase): alias = self.types['alias'] self.assertEqual( str(alias), - 'Product([Field(identifier, name), Field(identifier, asname, opt=True)])') + 'Product([Field(identifier, name), Field(identifier, asname, opt=True)], ' + '[Field(int, lineno), Field(int, col_offset), ' + 'Field(int, end_lineno, opt=True), Field(int, end_col_offset, opt=True)])') def test_attributes(self): stmt = self.types['stmt'] diff --git a/Lib/test/test_ast.py b/Lib/test/test_ast.py index bddb3de2f09..68249583996 100644 --- a/Lib/test/test_ast.py +++ b/Lib/test/test_ast.py @@ -335,6 +335,26 @@ class AST_Tests(unittest.TestCase): mod.body[0].module = " __future__ ".strip() compile(mod, "", "exec") + def test_alias(self): + im = ast.parse("from bar import y").body[0] + self.assertEqual(len(im.names), 1) + alias = im.names[0] + self.assertEqual(alias.name, 'y') + self.assertIsNone(alias.asname) + self.assertEqual(alias.lineno, 1) + self.assertEqual(alias.end_lineno, 1) + self.assertEqual(alias.col_offset, 16) + self.assertEqual(alias.end_col_offset, 17) + + im = ast.parse("from bar import *").body[0] + alias = im.names[0] + self.assertEqual(alias.name, '*') + self.assertIsNone(alias.asname) + self.assertEqual(alias.lineno, 1) + self.assertEqual(alias.end_lineno, 1) + self.assertEqual(alias.col_offset, 16) + self.assertEqual(alias.end_col_offset, 17) + def test_base_classes(self): self.assertTrue(issubclass(ast.For, ast.stmt)) self.assertTrue(issubclass(ast.Name, ast.expr)) @@ -1037,7 +1057,8 @@ Module( def test_level_as_none(self): body = [ast.ImportFrom(module='time', - names=[ast.alias(name='sleep')], + names=[ast.alias(name='sleep', + lineno=0, col_offset=0)], level=None, lineno=0, col_offset=0)] mod = ast.Module(body, []) @@ -1735,6 +1756,7 @@ class EndPositionTests(unittest.TestCase): ''').strip() imp = ast.parse(s).body[0] self._check_end_pos(imp, 3, 1) + self._check_end_pos(imp.names[2], 2, 16) def test_slices(self): s1 = 'f()[1, 2] [0]' @@ -2095,8 +2117,8 @@ exec_results = [ ('Module', [('Try', (1, 0, 4, 6), [('Pass', (2, 2, 2, 6))], [('ExceptHandler', (3, 0, 4, 6), ('Name', (3, 7, 3, 16), 'Exception', ('Load',)), None, [('Pass', (4, 2, 4, 6))])], [], [])], []), ('Module', [('Try', (1, 0, 4, 6), [('Pass', (2, 2, 2, 6))], [], [], [('Pass', (4, 2, 4, 6))])], []), ('Module', [('Assert', (1, 0, 1, 8), ('Name', (1, 7, 1, 8), 'v', ('Load',)), None)], []), -('Module', [('Import', (1, 0, 1, 10), [('alias', 'sys', None)])], []), -('Module', [('ImportFrom', (1, 0, 1, 17), 'sys', [('alias', 'v', None)], 0)], []), +('Module', [('Import', (1, 0, 1, 10), [('alias', (1, 7, 1, 10), 'sys', None)])], []), +('Module', [('ImportFrom', (1, 0, 1, 17), 'sys', [('alias', (1, 16, 1, 17), 'v', None)], 0)], []), ('Module', [('Global', (1, 0, 1, 8), ['v'])], []), ('Module', [('Expr', (1, 0, 1, 1), ('Constant', (1, 0, 1, 1), 1, None))], []), ('Module', [('Pass', (1, 0, 1, 4))], []), diff --git a/Lib/test/test_peg_generator/test_c_parser.py b/Lib/test/test_peg_generator/test_c_parser.py index 5d8f54399e7..b7e80f0d351 100644 --- a/Lib/test/test_peg_generator/test_c_parser.py +++ b/Lib/test/test_peg_generator/test_c_parser.py @@ -314,7 +314,7 @@ class TestCParser(TempdirManager, unittest.TestCase): ) simple_name[expr_ty]: NAME import_as_names_from[asdl_alias_seq*]: a[asdl_alias_seq*]=','.import_as_name_from+ { a } - import_as_name_from[alias_ty]: a=NAME 'as' b=NAME { _PyAST_alias(((expr_ty) a)->v.Name.id, ((expr_ty) b)->v.Name.id, p->arena) } + import_as_name_from[alias_ty]: a=NAME 'as' b=NAME { _PyAST_alias(((expr_ty) a)->v.Name.id, ((expr_ty) b)->v.Name.id, EXTRA) } """ test_source = """ for stmt in ("from a import b as c", "from . import a as b"): diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-04-10-00-01-43.bpo-43798.p_nJFM.rst b/Misc/NEWS.d/next/Core and Builtins/2021-04-10-00-01-43.bpo-43798.p_nJFM.rst new file mode 100644 index 00000000000..f6ab5dd410f --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2021-04-10-00-01-43.bpo-43798.p_nJFM.rst @@ -0,0 +1 @@ +:class:`ast.alias` nodes now include source location metadata attributes e.g. lineno, col_offset. diff --git a/Parser/Python.asdl b/Parser/Python.asdl index ddc019da48e..e224f5f4848 100644 --- a/Parser/Python.asdl +++ b/Parser/Python.asdl @@ -124,6 +124,7 @@ module Python -- import name with optional 'as' alias. alias = (identifier name, identifier? asname) + attributes (int lineno, int col_offset, int? end_lineno, int? end_col_offset) withitem = (expr context_expr, expr? optional_vars) diff --git a/Parser/parser.c b/Parser/parser.c index cf8b624ce7d..af0c088694c 100644 --- a/Parser/parser.c +++ b/Parser/parser.c @@ -3310,6 +3310,15 @@ import_from_targets_rule(Parser *p) } asdl_alias_seq* _res = NULL; int _mark = p->mark; + if (p->mark == p->fill && _PyPegen_fill_token(p) < 0) { + p->error_indicator = 1; + D(p->level--); + return NULL; + } + int _start_lineno = p->tokens[_mark]->lineno; + UNUSED(_start_lineno); // Only used by EXTRA macro + int _start_col_offset = p->tokens[_mark]->col_offset; + UNUSED(_start_col_offset); // Only used by EXTRA macro { // '(' import_from_as_names ','? ')' if (p->error_indicator) { D(p->level--); @@ -3377,7 +3386,16 @@ import_from_targets_rule(Parser *p) ) { D(fprintf(stderr, "%*c+ import_from_targets[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'*'")); - _res = ( asdl_alias_seq * ) _PyPegen_singleton_seq ( p , CHECK ( alias_ty , _PyPegen_alias_for_star ( p ) ) ); + Token *_token = _PyPegen_get_last_nonnwhitespace_token(p); + if (_token == NULL) { + D(p->level--); + return NULL; + } + int _end_lineno = _token->end_lineno; + UNUSED(_end_lineno); // Only used by EXTRA macro + int _end_col_offset = _token->end_col_offset; + UNUSED(_end_col_offset); // Only used by EXTRA macro + _res = ( asdl_alias_seq * ) _PyPegen_singleton_seq ( p , CHECK ( alias_ty , _PyPegen_alias_for_star ( p , EXTRA ) ) ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; D(p->level--); @@ -3466,6 +3484,15 @@ import_from_as_name_rule(Parser *p) } alias_ty _res = NULL; int _mark = p->mark; + if (p->mark == p->fill && _PyPegen_fill_token(p) < 0) { + p->error_indicator = 1; + D(p->level--); + return NULL; + } + int _start_lineno = p->tokens[_mark]->lineno; + UNUSED(_start_lineno); // Only used by EXTRA macro + int _start_col_offset = p->tokens[_mark]->col_offset; + UNUSED(_start_col_offset); // Only used by EXTRA macro { // NAME ['as' NAME] if (p->error_indicator) { D(p->level--); @@ -3481,7 +3508,16 @@ import_from_as_name_rule(Parser *p) ) { D(fprintf(stderr, "%*c+ import_from_as_name[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "NAME ['as' NAME]")); - _res = _PyAST_alias ( a -> v . Name . id , ( b ) ? ( ( expr_ty ) b ) -> v . Name . id : NULL , p -> arena ); + Token *_token = _PyPegen_get_last_nonnwhitespace_token(p); + if (_token == NULL) { + D(p->level--); + return NULL; + } + int _end_lineno = _token->end_lineno; + UNUSED(_end_lineno); // Only used by EXTRA macro + int _end_col_offset = _token->end_col_offset; + UNUSED(_end_col_offset); // Only used by EXTRA macro + _res = _PyAST_alias ( a -> v . Name . id , ( b ) ? ( ( expr_ty ) b ) -> v . Name . id : NULL , EXTRA ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; D(p->level--); @@ -3551,6 +3587,15 @@ dotted_as_name_rule(Parser *p) } alias_ty _res = NULL; int _mark = p->mark; + if (p->mark == p->fill && _PyPegen_fill_token(p) < 0) { + p->error_indicator = 1; + D(p->level--); + return NULL; + } + int _start_lineno = p->tokens[_mark]->lineno; + UNUSED(_start_lineno); // Only used by EXTRA macro + int _start_col_offset = p->tokens[_mark]->col_offset; + UNUSED(_start_col_offset); // Only used by EXTRA macro { // dotted_name ['as' NAME] if (p->error_indicator) { D(p->level--); @@ -3566,7 +3611,16 @@ dotted_as_name_rule(Parser *p) ) { D(fprintf(stderr, "%*c+ dotted_as_name[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "dotted_name ['as' NAME]")); - _res = _PyAST_alias ( a -> v . Name . id , ( b ) ? ( ( expr_ty ) b ) -> v . Name . id : NULL , p -> arena ); + Token *_token = _PyPegen_get_last_nonnwhitespace_token(p); + if (_token == NULL) { + D(p->level--); + return NULL; + } + int _end_lineno = _token->end_lineno; + UNUSED(_end_lineno); // Only used by EXTRA macro + int _end_col_offset = _token->end_col_offset; + UNUSED(_end_col_offset); // Only used by EXTRA macro + _res = _PyAST_alias ( a -> v . Name . id , ( b ) ? ( ( expr_ty ) b ) -> v . Name . id : NULL , EXTRA ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; D(p->level--); diff --git a/Parser/pegen.c b/Parser/pegen.c index 57759f75125..f841ace28e7 100644 --- a/Parser/pegen.c +++ b/Parser/pegen.c @@ -1555,8 +1555,8 @@ _PyPegen_seq_count_dots(asdl_seq *seq) /* Creates an alias with '*' as the identifier name */ alias_ty -_PyPegen_alias_for_star(Parser *p) -{ +_PyPegen_alias_for_star(Parser *p, int lineno, int col_offset, int end_lineno, + int end_col_offset, PyArena *arena) { PyObject *str = PyUnicode_InternFromString("*"); if (!str) { return NULL; @@ -1565,7 +1565,7 @@ _PyPegen_alias_for_star(Parser *p) Py_DECREF(str); return NULL; } - return _PyAST_alias(str, NULL, p->arena); + return _PyAST_alias(str, NULL, lineno, col_offset, end_lineno, end_col_offset, arena); } /* Creates a new asdl_seq* with the identifiers of all the names in seq */ diff --git a/Parser/pegen.h b/Parser/pegen.h index 53d8e5221bf..f3e0876afba 100644 --- a/Parser/pegen.h +++ b/Parser/pegen.h @@ -237,7 +237,7 @@ asdl_seq *_PyPegen_seq_append_to_end(Parser *, asdl_seq *, void *); asdl_seq *_PyPegen_seq_flatten(Parser *, asdl_seq *); expr_ty _PyPegen_join_names_with_dot(Parser *, expr_ty, expr_ty); int _PyPegen_seq_count_dots(asdl_seq *); -alias_ty _PyPegen_alias_for_star(Parser *); +alias_ty _PyPegen_alias_for_star(Parser *, int, int, int, int, PyArena *); asdl_identifier_seq *_PyPegen_map_names_to_ids(Parser *, asdl_expr_seq *); CmpopExprPair *_PyPegen_cmpop_expr_pair(Parser *, cmpop_ty, expr_ty); asdl_int_seq *_PyPegen_get_cmpops(Parser *p, asdl_seq *); diff --git a/Python/Python-ast.c b/Python/Python-ast.c index 2105729ea34..779ee3e4894 100644 --- a/Python/Python-ast.c +++ b/Python/Python-ast.c @@ -675,6 +675,12 @@ static const char * const keyword_fields[]={ "value", }; static PyObject* ast2obj_alias(struct ast_state *state, void*); +static const char * const alias_attributes[] = { + "lineno", + "col_offset", + "end_lineno", + "end_col_offset", +}; static const char * const alias_fields[]={ "name", "asname", @@ -1707,9 +1713,15 @@ init_types(struct ast_state *state) alias_fields, 2, "alias(identifier name, identifier? asname)"); if (!state->alias_type) return 0; - if (!add_attributes(state, state->alias_type, NULL, 0)) return 0; + if (!add_attributes(state, state->alias_type, alias_attributes, 4)) return + 0; if (PyObject_SetAttr(state->alias_type, state->asname, Py_None) == -1) return 0; + if (PyObject_SetAttr(state->alias_type, state->end_lineno, Py_None) == -1) + return 0; + if (PyObject_SetAttr(state->alias_type, state->end_col_offset, Py_None) == + -1) + return 0; state->withitem_type = make_type(state, "withitem", state->AST_type, withitem_fields, 2, "withitem(expr context_expr, expr? optional_vars)"); @@ -3271,7 +3283,8 @@ _PyAST_keyword(identifier arg, expr_ty value, int lineno, int col_offset, int } alias_ty -_PyAST_alias(identifier name, identifier asname, PyArena *arena) +_PyAST_alias(identifier name, identifier asname, int lineno, int col_offset, + int end_lineno, int end_col_offset, PyArena *arena) { alias_ty p; if (!name) { @@ -3284,6 +3297,10 @@ _PyAST_alias(identifier name, identifier asname, PyArena *arena) return NULL; p->name = name; p->asname = asname; + p->lineno = lineno; + p->col_offset = col_offset; + p->end_lineno = end_lineno; + p->end_col_offset = end_col_offset; return p; } @@ -4850,6 +4867,26 @@ ast2obj_alias(struct ast_state *state, void* _o) if (PyObject_SetAttr(result, state->asname, value) == -1) goto failed; Py_DECREF(value); + value = ast2obj_int(state, o->lineno); + if (!value) goto failed; + if (PyObject_SetAttr(result, state->lineno, value) < 0) + goto failed; + Py_DECREF(value); + value = ast2obj_int(state, o->col_offset); + if (!value) goto failed; + if (PyObject_SetAttr(result, state->col_offset, value) < 0) + goto failed; + Py_DECREF(value); + value = ast2obj_int(state, o->end_lineno); + if (!value) goto failed; + if (PyObject_SetAttr(result, state->end_lineno, value) < 0) + goto failed; + Py_DECREF(value); + value = ast2obj_int(state, o->end_col_offset); + if (!value) goto failed; + if (PyObject_SetAttr(result, state->end_col_offset, value) < 0) + goto failed; + Py_DECREF(value); return result; failed: Py_XDECREF(value); @@ -9723,6 +9760,10 @@ obj2ast_alias(struct ast_state *state, PyObject* obj, alias_ty* out, PyArena* PyObject* tmp = NULL; identifier name; identifier asname; + int lineno; + int col_offset; + int end_lineno; + int end_col_offset; if (_PyObject_LookupAttr(obj, state->name, &tmp) < 0) { return 1; @@ -9750,7 +9791,60 @@ obj2ast_alias(struct ast_state *state, PyObject* obj, alias_ty* out, PyArena* if (res != 0) goto failed; Py_CLEAR(tmp); } - *out = _PyAST_alias(name, asname, arena); + if (_PyObject_LookupAttr(obj, state->lineno, &tmp) < 0) { + return 1; + } + if (tmp == NULL) { + PyErr_SetString(PyExc_TypeError, "required field \"lineno\" missing from alias"); + return 1; + } + else { + int res; + res = obj2ast_int(state, tmp, &lineno, arena); + if (res != 0) goto failed; + Py_CLEAR(tmp); + } + if (_PyObject_LookupAttr(obj, state->col_offset, &tmp) < 0) { + return 1; + } + if (tmp == NULL) { + PyErr_SetString(PyExc_TypeError, "required field \"col_offset\" missing from alias"); + return 1; + } + else { + int res; + res = obj2ast_int(state, tmp, &col_offset, arena); + if (res != 0) goto failed; + Py_CLEAR(tmp); + } + if (_PyObject_LookupAttr(obj, state->end_lineno, &tmp) < 0) { + return 1; + } + if (tmp == NULL || tmp == Py_None) { + Py_CLEAR(tmp); + end_lineno = 0; + } + else { + int res; + res = obj2ast_int(state, tmp, &end_lineno, arena); + if (res != 0) goto failed; + Py_CLEAR(tmp); + } + if (_PyObject_LookupAttr(obj, state->end_col_offset, &tmp) < 0) { + return 1; + } + if (tmp == NULL || tmp == Py_None) { + Py_CLEAR(tmp); + end_col_offset = 0; + } + else { + int res; + res = obj2ast_int(state, tmp, &end_col_offset, arena); + if (res != 0) goto failed; + Py_CLEAR(tmp); + } + *out = _PyAST_alias(name, asname, lineno, col_offset, end_lineno, + end_col_offset, arena); return 0; failed: Py_XDECREF(tmp);