From aa1afc72c1ee1f090e6302198d9a0295f1ce1c05 Mon Sep 17 00:00:00 2001 From: "Miss Islington (bot)" <31488909+miss-islington@users.noreply.github.com> Date: Wed, 6 Sep 2017 19:43:04 -0700 Subject: [PATCH] bpo-30465: Fix lineno and col_offset in fstring AST nodes (GH-1800) (gh-3409) For f-string ast nodes, fix the line and columns so that tools such as flake8 can identify them correctly. (cherry picked from commit e7c566caf177afe43b57f0b2723e723d880368e8) --- Lib/test/test_fstring.py | 248 ++++++++++++++++++ .../2017-09-06-10-47-29.bpo-30465.oe-3GD.rst | 3 + Python/ast.c | 72 ++++- 3 files changed, 318 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2017-09-06-10-47-29.bpo-30465.oe-3GD.rst diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py index 3d762b5aa9b..5e7efe25e39 100644 --- a/Lib/test/test_fstring.py +++ b/Lib/test/test_fstring.py @@ -70,6 +70,253 @@ f'{a * x()}'""" # Make sure x was called. self.assertTrue(x.called) + def test_ast_line_numbers(self): + expr = """ +a = 10 +f'{a * x()}'""" + t = ast.parse(expr) + self.assertEqual(type(t), ast.Module) + self.assertEqual(len(t.body), 2) + # check `a = 10` + self.assertEqual(type(t.body[0]), ast.Assign) + self.assertEqual(t.body[0].lineno, 2) + # check `f'...'` + self.assertEqual(type(t.body[1]), ast.Expr) + self.assertEqual(type(t.body[1].value), ast.JoinedStr) + self.assertEqual(len(t.body[1].value.values), 1) + self.assertEqual(type(t.body[1].value.values[0]), ast.FormattedValue) + self.assertEqual(t.body[1].lineno, 3) + self.assertEqual(t.body[1].value.lineno, 3) + self.assertEqual(t.body[1].value.values[0].lineno, 3) + # check the binop location + binop = t.body[1].value.values[0].value + self.assertEqual(type(binop), ast.BinOp) + self.assertEqual(type(binop.left), ast.Name) + self.assertEqual(type(binop.op), ast.Mult) + self.assertEqual(type(binop.right), ast.Call) + self.assertEqual(binop.lineno, 3) + self.assertEqual(binop.left.lineno, 3) + self.assertEqual(binop.right.lineno, 3) + self.assertEqual(binop.col_offset, 3) + self.assertEqual(binop.left.col_offset, 3) + self.assertEqual(binop.right.col_offset, 7) + + def test_ast_line_numbers_multiple_formattedvalues(self): + expr = """ +f'no formatted values' +f'eggs {a * x()} spam {b + y()}'""" + t = ast.parse(expr) + self.assertEqual(type(t), ast.Module) + self.assertEqual(len(t.body), 2) + # check `f'no formatted value'` + self.assertEqual(type(t.body[0]), ast.Expr) + self.assertEqual(type(t.body[0].value), ast.JoinedStr) + self.assertEqual(t.body[0].lineno, 2) + # check `f'...'` + self.assertEqual(type(t.body[1]), ast.Expr) + self.assertEqual(type(t.body[1].value), ast.JoinedStr) + self.assertEqual(len(t.body[1].value.values), 4) + self.assertEqual(type(t.body[1].value.values[0]), ast.Str) + self.assertEqual(type(t.body[1].value.values[1]), ast.FormattedValue) + self.assertEqual(type(t.body[1].value.values[2]), ast.Str) + self.assertEqual(type(t.body[1].value.values[3]), ast.FormattedValue) + self.assertEqual(t.body[1].lineno, 3) + self.assertEqual(t.body[1].value.lineno, 3) + self.assertEqual(t.body[1].value.values[0].lineno, 3) + self.assertEqual(t.body[1].value.values[1].lineno, 3) + self.assertEqual(t.body[1].value.values[2].lineno, 3) + self.assertEqual(t.body[1].value.values[3].lineno, 3) + # check the first binop location + binop1 = t.body[1].value.values[1].value + self.assertEqual(type(binop1), ast.BinOp) + self.assertEqual(type(binop1.left), ast.Name) + self.assertEqual(type(binop1.op), ast.Mult) + self.assertEqual(type(binop1.right), ast.Call) + self.assertEqual(binop1.lineno, 3) + self.assertEqual(binop1.left.lineno, 3) + self.assertEqual(binop1.right.lineno, 3) + self.assertEqual(binop1.col_offset, 8) + self.assertEqual(binop1.left.col_offset, 8) + self.assertEqual(binop1.right.col_offset, 12) + # check the second binop location + binop2 = t.body[1].value.values[3].value + self.assertEqual(type(binop2), ast.BinOp) + self.assertEqual(type(binop2.left), ast.Name) + self.assertEqual(type(binop2.op), ast.Add) + self.assertEqual(type(binop2.right), ast.Call) + self.assertEqual(binop2.lineno, 3) + self.assertEqual(binop2.left.lineno, 3) + self.assertEqual(binop2.right.lineno, 3) + self.assertEqual(binop2.col_offset, 23) + self.assertEqual(binop2.left.col_offset, 23) + self.assertEqual(binop2.right.col_offset, 27) + + def test_ast_line_numbers_nested(self): + expr = """ +a = 10 +f'{a * f"-{x()}-"}'""" + t = ast.parse(expr) + self.assertEqual(type(t), ast.Module) + self.assertEqual(len(t.body), 2) + # check `a = 10` + self.assertEqual(type(t.body[0]), ast.Assign) + self.assertEqual(t.body[0].lineno, 2) + # check `f'...'` + self.assertEqual(type(t.body[1]), ast.Expr) + self.assertEqual(type(t.body[1].value), ast.JoinedStr) + self.assertEqual(len(t.body[1].value.values), 1) + self.assertEqual(type(t.body[1].value.values[0]), ast.FormattedValue) + self.assertEqual(t.body[1].lineno, 3) + self.assertEqual(t.body[1].value.lineno, 3) + self.assertEqual(t.body[1].value.values[0].lineno, 3) + # check the binop location + binop = t.body[1].value.values[0].value + self.assertEqual(type(binop), ast.BinOp) + self.assertEqual(type(binop.left), ast.Name) + self.assertEqual(type(binop.op), ast.Mult) + self.assertEqual(type(binop.right), ast.JoinedStr) + self.assertEqual(binop.lineno, 3) + self.assertEqual(binop.left.lineno, 3) + self.assertEqual(binop.right.lineno, 3) + self.assertEqual(binop.col_offset, 3) + self.assertEqual(binop.left.col_offset, 3) + self.assertEqual(binop.right.col_offset, 7) + # check the nested call location + self.assertEqual(len(binop.right.values), 3) + self.assertEqual(type(binop.right.values[0]), ast.Str) + self.assertEqual(type(binop.right.values[1]), ast.FormattedValue) + self.assertEqual(type(binop.right.values[2]), ast.Str) + self.assertEqual(binop.right.values[0].lineno, 3) + self.assertEqual(binop.right.values[1].lineno, 3) + self.assertEqual(binop.right.values[2].lineno, 3) + call = binop.right.values[1].value + self.assertEqual(type(call), ast.Call) + self.assertEqual(call.lineno, 3) + self.assertEqual(call.col_offset, 11) + + def test_ast_line_numbers_duplicate_expression(self): + """Duplicate expression + + NOTE: this is currently broken, always sets location of the first + expression. + """ + expr = """ +a = 10 +f'{a * x()} {a * x()} {a * x()}' +""" + t = ast.parse(expr) + self.assertEqual(type(t), ast.Module) + self.assertEqual(len(t.body), 2) + # check `a = 10` + self.assertEqual(type(t.body[0]), ast.Assign) + self.assertEqual(t.body[0].lineno, 2) + # check `f'...'` + self.assertEqual(type(t.body[1]), ast.Expr) + self.assertEqual(type(t.body[1].value), ast.JoinedStr) + self.assertEqual(len(t.body[1].value.values), 5) + self.assertEqual(type(t.body[1].value.values[0]), ast.FormattedValue) + self.assertEqual(type(t.body[1].value.values[1]), ast.Str) + self.assertEqual(type(t.body[1].value.values[2]), ast.FormattedValue) + self.assertEqual(type(t.body[1].value.values[3]), ast.Str) + self.assertEqual(type(t.body[1].value.values[4]), ast.FormattedValue) + self.assertEqual(t.body[1].lineno, 3) + self.assertEqual(t.body[1].value.lineno, 3) + self.assertEqual(t.body[1].value.values[0].lineno, 3) + self.assertEqual(t.body[1].value.values[1].lineno, 3) + self.assertEqual(t.body[1].value.values[2].lineno, 3) + self.assertEqual(t.body[1].value.values[3].lineno, 3) + self.assertEqual(t.body[1].value.values[4].lineno, 3) + # check the first binop location + binop = t.body[1].value.values[0].value + self.assertEqual(type(binop), ast.BinOp) + self.assertEqual(type(binop.left), ast.Name) + self.assertEqual(type(binop.op), ast.Mult) + self.assertEqual(type(binop.right), ast.Call) + self.assertEqual(binop.lineno, 3) + self.assertEqual(binop.left.lineno, 3) + self.assertEqual(binop.right.lineno, 3) + self.assertEqual(binop.col_offset, 3) + self.assertEqual(binop.left.col_offset, 3) + self.assertEqual(binop.right.col_offset, 7) + # check the second binop location + binop = t.body[1].value.values[2].value + self.assertEqual(type(binop), ast.BinOp) + self.assertEqual(type(binop.left), ast.Name) + self.assertEqual(type(binop.op), ast.Mult) + self.assertEqual(type(binop.right), ast.Call) + self.assertEqual(binop.lineno, 3) + self.assertEqual(binop.left.lineno, 3) + self.assertEqual(binop.right.lineno, 3) + self.assertEqual(binop.col_offset, 3) # FIXME: this is wrong + self.assertEqual(binop.left.col_offset, 3) # FIXME: this is wrong + self.assertEqual(binop.right.col_offset, 7) # FIXME: this is wrong + # check the third binop location + binop = t.body[1].value.values[4].value + self.assertEqual(type(binop), ast.BinOp) + self.assertEqual(type(binop.left), ast.Name) + self.assertEqual(type(binop.op), ast.Mult) + self.assertEqual(type(binop.right), ast.Call) + self.assertEqual(binop.lineno, 3) + self.assertEqual(binop.left.lineno, 3) + self.assertEqual(binop.right.lineno, 3) + self.assertEqual(binop.col_offset, 3) # FIXME: this is wrong + self.assertEqual(binop.left.col_offset, 3) # FIXME: this is wrong + self.assertEqual(binop.right.col_offset, 7) # FIXME: this is wrong + + def test_ast_line_numbers_multiline_fstring(self): + # FIXME: This test demonstrates invalid behavior due to JoinedStr's + # immediate child nodes containing the wrong lineno. The enclosed + # expressions have valid line information and column offsets. + # See bpo-16806 and bpo-30465 for details. + expr = """ +a = 10 +f''' + {a + * + x()} +non-important content +''' +""" + t = ast.parse(expr) + self.assertEqual(type(t), ast.Module) + self.assertEqual(len(t.body), 2) + # check `a = 10` + self.assertEqual(type(t.body[0]), ast.Assign) + self.assertEqual(t.body[0].lineno, 2) + # check `f'...'` + self.assertEqual(type(t.body[1]), ast.Expr) + self.assertEqual(type(t.body[1].value), ast.JoinedStr) + self.assertEqual(len(t.body[1].value.values), 3) + self.assertEqual(type(t.body[1].value.values[0]), ast.Str) + self.assertEqual(type(t.body[1].value.values[1]), ast.FormattedValue) + self.assertEqual(type(t.body[1].value.values[2]), ast.Str) + # NOTE: the following invalid behavior is described in bpo-16806. + # - line number should be the *first* line (3), not the *last* (8) + # - column offset should not be -1 + self.assertEqual(t.body[1].lineno, 8) + self.assertEqual(t.body[1].value.lineno, 8) + self.assertEqual(t.body[1].value.values[0].lineno, 8) + self.assertEqual(t.body[1].value.values[1].lineno, 8) + self.assertEqual(t.body[1].value.values[2].lineno, 8) + self.assertEqual(t.body[1].col_offset, -1) + self.assertEqual(t.body[1].value.col_offset, -1) + self.assertEqual(t.body[1].value.values[0].col_offset, -1) + self.assertEqual(t.body[1].value.values[1].col_offset, -1) + self.assertEqual(t.body[1].value.values[2].col_offset, -1) + # NOTE: the following lineno information and col_offset is correct for + # expressions within FormattedValues. + binop = t.body[1].value.values[1].value + self.assertEqual(type(binop), ast.BinOp) + self.assertEqual(type(binop.left), ast.Name) + self.assertEqual(type(binop.op), ast.Mult) + self.assertEqual(type(binop.right), ast.Call) + self.assertEqual(binop.lineno, 4) + self.assertEqual(binop.left.lineno, 4) + self.assertEqual(binop.right.lineno, 6) + self.assertEqual(binop.col_offset, 3) + self.assertEqual(binop.left.col_offset, 3) + self.assertEqual(binop.right.col_offset, 7) + def test_docstring(self): def f(): f'''Not a docstring''' @@ -786,5 +1033,6 @@ f'{a * x()}'""" self.assertEqual(eval('f"\\\n"'), '') self.assertEqual(eval('f"\\\r"'), '') + if __name__ == '__main__': unittest.main() diff --git a/Misc/NEWS.d/next/Core and Builtins/2017-09-06-10-47-29.bpo-30465.oe-3GD.rst b/Misc/NEWS.d/next/Core and Builtins/2017-09-06-10-47-29.bpo-30465.oe-3GD.rst new file mode 100644 index 00000000000..0d9138ed82d --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2017-09-06-10-47-29.bpo-30465.oe-3GD.rst @@ -0,0 +1,3 @@ +Location information (``lineno`` and ``col_offset``) in f-strings is now +(mostly) correct. This fixes tools like flake8 from showing warnings on the +wrong line (typically the first line of the file). diff --git a/Python/ast.c b/Python/ast.c index 8d53736f081..4fa68a371d4 100644 --- a/Python/ast.c +++ b/Python/ast.c @@ -8,6 +8,7 @@ #include "node.h" #include "ast.h" #include "token.h" +#include "pythonrun.h" #include @@ -4240,6 +4241,55 @@ decode_bytes_with_escapes(struct compiling *c, const node *n, const char *s, return result; } +/* Shift locations for the given node and all its children by adding `lineno` + and `col_offset` to existing locations. */ +static void fstring_shift_node_locations(node *n, int lineno, int col_offset) +{ + n->n_col_offset = n->n_col_offset + col_offset; + for (int i = 0; i < NCH(n); ++i) { + if (n->n_lineno && n->n_lineno < CHILD(n, i)->n_lineno) { + /* Shifting column offsets unnecessary if there's been newlines. */ + col_offset = 0; + } + fstring_shift_node_locations(CHILD(n, i), lineno, col_offset); + } + n->n_lineno = n->n_lineno + lineno; +} + +/* Fix locations for the given node and its children. + + `parent` is the enclosing node. + `n` is the node which locations are going to be fixed relative to parent. + `expr_str` is the child node's string representation, incuding braces. +*/ +static void +fstring_fix_node_location(const node *parent, node *n, char *expr_str) +{ + char *substr = NULL; + char *start; + int lines = LINENO(parent) - 1; + int cols = parent->n_col_offset; + /* Find the full fstring to fix location information in `n`. */ + while (parent && parent->n_type != STRING) + parent = parent->n_child; + if (parent && parent->n_str) { + substr = strstr(parent->n_str, expr_str); + if (substr) { + start = substr; + while (start > parent->n_str) { + if (start[0] == '\n') + break; + start--; + } + cols += substr - start; + /* Fix lineno in mulitline strings. */ + while ((substr = strchr(substr + 1, '\n'))) + lines--; + } + } + fstring_shift_node_locations(n, lines, cols); +} + /* Compile this expression in to an expr_ty. Add parens around the expression, in order to allow leading spaces in the expression. */ static expr_ty @@ -4248,6 +4298,7 @@ fstring_compile_expr(const char *expr_start, const char *expr_end, { PyCompilerFlags cf; + node *mod_n; mod_ty mod; char *str; Py_ssize_t len; @@ -4257,9 +4308,10 @@ fstring_compile_expr(const char *expr_start, const char *expr_end, assert(*(expr_start-1) == '{'); assert(*expr_end == '}' || *expr_end == '!' || *expr_end == ':'); - /* If the substring is all whitespace, it's an error. We need to catch - this here, and not when we call PyParser_ASTFromString, because turning - the expression '' in to '()' would go from being invalid to valid. */ + /* If the substring is all whitespace, it's an error. We need to catch this + here, and not when we call PyParser_SimpleParseStringFlagsFilename, + because turning the expression '' in to '()' would go from being invalid + to valid. */ for (s = expr_start; s != expr_end; s++) { char c = *s; /* The Python parser ignores only the following whitespace @@ -4285,9 +4337,19 @@ fstring_compile_expr(const char *expr_start, const char *expr_end, str[len+2] = 0; cf.cf_flags = PyCF_ONLY_AST; - mod = PyParser_ASTFromString(str, "", - Py_eval_input, &cf, c->c_arena); + mod_n = PyParser_SimpleParseStringFlagsFilename(str, "", + Py_eval_input, 0); + if (!mod_n) { + PyMem_RawFree(str); + return NULL; + } + /* Reuse str to find the correct column offset. */ + str[0] = '{'; + str[len+1] = '}'; + fstring_fix_node_location(n, mod_n, str); + mod = PyAST_FromNode(mod_n, &cf, "", c->c_arena); PyMem_RawFree(str); + PyNode_Free(mod_n); if (!mod) return NULL; return mod->v.Expression.body;