bpo-30465: Fix lineno and col_offset in fstring AST nodes (#1800)
For f-string ast nodes, fix the line and columns so that tools such as flake8 can identify them correctly.
This commit is contained in:
parent
36d644d37a
commit
e7c566caf1
|
@ -70,6 +70,253 @@ f'{a * x()}'"""
|
||||||
# Make sure x was called.
|
# Make sure x was called.
|
||||||
self.assertTrue(x.called)
|
self.assertTrue(x.called)
|
||||||
|
|
||||||
|
def test_ast_line_numbers(self):
|
||||||
|
expr = """
|
||||||
|
a = 10
|
||||||
|
f'{a * x()}'"""
|
||||||
|
t = ast.parse(expr)
|
||||||
|
self.assertEqual(type(t), ast.Module)
|
||||||
|
self.assertEqual(len(t.body), 2)
|
||||||
|
# check `a = 10`
|
||||||
|
self.assertEqual(type(t.body[0]), ast.Assign)
|
||||||
|
self.assertEqual(t.body[0].lineno, 2)
|
||||||
|
# check `f'...'`
|
||||||
|
self.assertEqual(type(t.body[1]), ast.Expr)
|
||||||
|
self.assertEqual(type(t.body[1].value), ast.JoinedStr)
|
||||||
|
self.assertEqual(len(t.body[1].value.values), 1)
|
||||||
|
self.assertEqual(type(t.body[1].value.values[0]), ast.FormattedValue)
|
||||||
|
self.assertEqual(t.body[1].lineno, 3)
|
||||||
|
self.assertEqual(t.body[1].value.lineno, 3)
|
||||||
|
self.assertEqual(t.body[1].value.values[0].lineno, 3)
|
||||||
|
# check the binop location
|
||||||
|
binop = t.body[1].value.values[0].value
|
||||||
|
self.assertEqual(type(binop), ast.BinOp)
|
||||||
|
self.assertEqual(type(binop.left), ast.Name)
|
||||||
|
self.assertEqual(type(binop.op), ast.Mult)
|
||||||
|
self.assertEqual(type(binop.right), ast.Call)
|
||||||
|
self.assertEqual(binop.lineno, 3)
|
||||||
|
self.assertEqual(binop.left.lineno, 3)
|
||||||
|
self.assertEqual(binop.right.lineno, 3)
|
||||||
|
self.assertEqual(binop.col_offset, 3)
|
||||||
|
self.assertEqual(binop.left.col_offset, 3)
|
||||||
|
self.assertEqual(binop.right.col_offset, 7)
|
||||||
|
|
||||||
|
def test_ast_line_numbers_multiple_formattedvalues(self):
|
||||||
|
expr = """
|
||||||
|
f'no formatted values'
|
||||||
|
f'eggs {a * x()} spam {b + y()}'"""
|
||||||
|
t = ast.parse(expr)
|
||||||
|
self.assertEqual(type(t), ast.Module)
|
||||||
|
self.assertEqual(len(t.body), 2)
|
||||||
|
# check `f'no formatted value'`
|
||||||
|
self.assertEqual(type(t.body[0]), ast.Expr)
|
||||||
|
self.assertEqual(type(t.body[0].value), ast.JoinedStr)
|
||||||
|
self.assertEqual(t.body[0].lineno, 2)
|
||||||
|
# check `f'...'`
|
||||||
|
self.assertEqual(type(t.body[1]), ast.Expr)
|
||||||
|
self.assertEqual(type(t.body[1].value), ast.JoinedStr)
|
||||||
|
self.assertEqual(len(t.body[1].value.values), 4)
|
||||||
|
self.assertEqual(type(t.body[1].value.values[0]), ast.Str)
|
||||||
|
self.assertEqual(type(t.body[1].value.values[1]), ast.FormattedValue)
|
||||||
|
self.assertEqual(type(t.body[1].value.values[2]), ast.Str)
|
||||||
|
self.assertEqual(type(t.body[1].value.values[3]), ast.FormattedValue)
|
||||||
|
self.assertEqual(t.body[1].lineno, 3)
|
||||||
|
self.assertEqual(t.body[1].value.lineno, 3)
|
||||||
|
self.assertEqual(t.body[1].value.values[0].lineno, 3)
|
||||||
|
self.assertEqual(t.body[1].value.values[1].lineno, 3)
|
||||||
|
self.assertEqual(t.body[1].value.values[2].lineno, 3)
|
||||||
|
self.assertEqual(t.body[1].value.values[3].lineno, 3)
|
||||||
|
# check the first binop location
|
||||||
|
binop1 = t.body[1].value.values[1].value
|
||||||
|
self.assertEqual(type(binop1), ast.BinOp)
|
||||||
|
self.assertEqual(type(binop1.left), ast.Name)
|
||||||
|
self.assertEqual(type(binop1.op), ast.Mult)
|
||||||
|
self.assertEqual(type(binop1.right), ast.Call)
|
||||||
|
self.assertEqual(binop1.lineno, 3)
|
||||||
|
self.assertEqual(binop1.left.lineno, 3)
|
||||||
|
self.assertEqual(binop1.right.lineno, 3)
|
||||||
|
self.assertEqual(binop1.col_offset, 8)
|
||||||
|
self.assertEqual(binop1.left.col_offset, 8)
|
||||||
|
self.assertEqual(binop1.right.col_offset, 12)
|
||||||
|
# check the second binop location
|
||||||
|
binop2 = t.body[1].value.values[3].value
|
||||||
|
self.assertEqual(type(binop2), ast.BinOp)
|
||||||
|
self.assertEqual(type(binop2.left), ast.Name)
|
||||||
|
self.assertEqual(type(binop2.op), ast.Add)
|
||||||
|
self.assertEqual(type(binop2.right), ast.Call)
|
||||||
|
self.assertEqual(binop2.lineno, 3)
|
||||||
|
self.assertEqual(binop2.left.lineno, 3)
|
||||||
|
self.assertEqual(binop2.right.lineno, 3)
|
||||||
|
self.assertEqual(binop2.col_offset, 23)
|
||||||
|
self.assertEqual(binop2.left.col_offset, 23)
|
||||||
|
self.assertEqual(binop2.right.col_offset, 27)
|
||||||
|
|
||||||
|
def test_ast_line_numbers_nested(self):
|
||||||
|
expr = """
|
||||||
|
a = 10
|
||||||
|
f'{a * f"-{x()}-"}'"""
|
||||||
|
t = ast.parse(expr)
|
||||||
|
self.assertEqual(type(t), ast.Module)
|
||||||
|
self.assertEqual(len(t.body), 2)
|
||||||
|
# check `a = 10`
|
||||||
|
self.assertEqual(type(t.body[0]), ast.Assign)
|
||||||
|
self.assertEqual(t.body[0].lineno, 2)
|
||||||
|
# check `f'...'`
|
||||||
|
self.assertEqual(type(t.body[1]), ast.Expr)
|
||||||
|
self.assertEqual(type(t.body[1].value), ast.JoinedStr)
|
||||||
|
self.assertEqual(len(t.body[1].value.values), 1)
|
||||||
|
self.assertEqual(type(t.body[1].value.values[0]), ast.FormattedValue)
|
||||||
|
self.assertEqual(t.body[1].lineno, 3)
|
||||||
|
self.assertEqual(t.body[1].value.lineno, 3)
|
||||||
|
self.assertEqual(t.body[1].value.values[0].lineno, 3)
|
||||||
|
# check the binop location
|
||||||
|
binop = t.body[1].value.values[0].value
|
||||||
|
self.assertEqual(type(binop), ast.BinOp)
|
||||||
|
self.assertEqual(type(binop.left), ast.Name)
|
||||||
|
self.assertEqual(type(binop.op), ast.Mult)
|
||||||
|
self.assertEqual(type(binop.right), ast.JoinedStr)
|
||||||
|
self.assertEqual(binop.lineno, 3)
|
||||||
|
self.assertEqual(binop.left.lineno, 3)
|
||||||
|
self.assertEqual(binop.right.lineno, 3)
|
||||||
|
self.assertEqual(binop.col_offset, 3)
|
||||||
|
self.assertEqual(binop.left.col_offset, 3)
|
||||||
|
self.assertEqual(binop.right.col_offset, 7)
|
||||||
|
# check the nested call location
|
||||||
|
self.assertEqual(len(binop.right.values), 3)
|
||||||
|
self.assertEqual(type(binop.right.values[0]), ast.Str)
|
||||||
|
self.assertEqual(type(binop.right.values[1]), ast.FormattedValue)
|
||||||
|
self.assertEqual(type(binop.right.values[2]), ast.Str)
|
||||||
|
self.assertEqual(binop.right.values[0].lineno, 3)
|
||||||
|
self.assertEqual(binop.right.values[1].lineno, 3)
|
||||||
|
self.assertEqual(binop.right.values[2].lineno, 3)
|
||||||
|
call = binop.right.values[1].value
|
||||||
|
self.assertEqual(type(call), ast.Call)
|
||||||
|
self.assertEqual(call.lineno, 3)
|
||||||
|
self.assertEqual(call.col_offset, 11)
|
||||||
|
|
||||||
|
def test_ast_line_numbers_duplicate_expression(self):
|
||||||
|
"""Duplicate expression
|
||||||
|
|
||||||
|
NOTE: this is currently broken, always sets location of the first
|
||||||
|
expression.
|
||||||
|
"""
|
||||||
|
expr = """
|
||||||
|
a = 10
|
||||||
|
f'{a * x()} {a * x()} {a * x()}'
|
||||||
|
"""
|
||||||
|
t = ast.parse(expr)
|
||||||
|
self.assertEqual(type(t), ast.Module)
|
||||||
|
self.assertEqual(len(t.body), 2)
|
||||||
|
# check `a = 10`
|
||||||
|
self.assertEqual(type(t.body[0]), ast.Assign)
|
||||||
|
self.assertEqual(t.body[0].lineno, 2)
|
||||||
|
# check `f'...'`
|
||||||
|
self.assertEqual(type(t.body[1]), ast.Expr)
|
||||||
|
self.assertEqual(type(t.body[1].value), ast.JoinedStr)
|
||||||
|
self.assertEqual(len(t.body[1].value.values), 5)
|
||||||
|
self.assertEqual(type(t.body[1].value.values[0]), ast.FormattedValue)
|
||||||
|
self.assertEqual(type(t.body[1].value.values[1]), ast.Str)
|
||||||
|
self.assertEqual(type(t.body[1].value.values[2]), ast.FormattedValue)
|
||||||
|
self.assertEqual(type(t.body[1].value.values[3]), ast.Str)
|
||||||
|
self.assertEqual(type(t.body[1].value.values[4]), ast.FormattedValue)
|
||||||
|
self.assertEqual(t.body[1].lineno, 3)
|
||||||
|
self.assertEqual(t.body[1].value.lineno, 3)
|
||||||
|
self.assertEqual(t.body[1].value.values[0].lineno, 3)
|
||||||
|
self.assertEqual(t.body[1].value.values[1].lineno, 3)
|
||||||
|
self.assertEqual(t.body[1].value.values[2].lineno, 3)
|
||||||
|
self.assertEqual(t.body[1].value.values[3].lineno, 3)
|
||||||
|
self.assertEqual(t.body[1].value.values[4].lineno, 3)
|
||||||
|
# check the first binop location
|
||||||
|
binop = t.body[1].value.values[0].value
|
||||||
|
self.assertEqual(type(binop), ast.BinOp)
|
||||||
|
self.assertEqual(type(binop.left), ast.Name)
|
||||||
|
self.assertEqual(type(binop.op), ast.Mult)
|
||||||
|
self.assertEqual(type(binop.right), ast.Call)
|
||||||
|
self.assertEqual(binop.lineno, 3)
|
||||||
|
self.assertEqual(binop.left.lineno, 3)
|
||||||
|
self.assertEqual(binop.right.lineno, 3)
|
||||||
|
self.assertEqual(binop.col_offset, 3)
|
||||||
|
self.assertEqual(binop.left.col_offset, 3)
|
||||||
|
self.assertEqual(binop.right.col_offset, 7)
|
||||||
|
# check the second binop location
|
||||||
|
binop = t.body[1].value.values[2].value
|
||||||
|
self.assertEqual(type(binop), ast.BinOp)
|
||||||
|
self.assertEqual(type(binop.left), ast.Name)
|
||||||
|
self.assertEqual(type(binop.op), ast.Mult)
|
||||||
|
self.assertEqual(type(binop.right), ast.Call)
|
||||||
|
self.assertEqual(binop.lineno, 3)
|
||||||
|
self.assertEqual(binop.left.lineno, 3)
|
||||||
|
self.assertEqual(binop.right.lineno, 3)
|
||||||
|
self.assertEqual(binop.col_offset, 3) # FIXME: this is wrong
|
||||||
|
self.assertEqual(binop.left.col_offset, 3) # FIXME: this is wrong
|
||||||
|
self.assertEqual(binop.right.col_offset, 7) # FIXME: this is wrong
|
||||||
|
# check the third binop location
|
||||||
|
binop = t.body[1].value.values[4].value
|
||||||
|
self.assertEqual(type(binop), ast.BinOp)
|
||||||
|
self.assertEqual(type(binop.left), ast.Name)
|
||||||
|
self.assertEqual(type(binop.op), ast.Mult)
|
||||||
|
self.assertEqual(type(binop.right), ast.Call)
|
||||||
|
self.assertEqual(binop.lineno, 3)
|
||||||
|
self.assertEqual(binop.left.lineno, 3)
|
||||||
|
self.assertEqual(binop.right.lineno, 3)
|
||||||
|
self.assertEqual(binop.col_offset, 3) # FIXME: this is wrong
|
||||||
|
self.assertEqual(binop.left.col_offset, 3) # FIXME: this is wrong
|
||||||
|
self.assertEqual(binop.right.col_offset, 7) # FIXME: this is wrong
|
||||||
|
|
||||||
|
def test_ast_line_numbers_multiline_fstring(self):
|
||||||
|
# FIXME: This test demonstrates invalid behavior due to JoinedStr's
|
||||||
|
# immediate child nodes containing the wrong lineno. The enclosed
|
||||||
|
# expressions have valid line information and column offsets.
|
||||||
|
# See bpo-16806 and bpo-30465 for details.
|
||||||
|
expr = """
|
||||||
|
a = 10
|
||||||
|
f'''
|
||||||
|
{a
|
||||||
|
*
|
||||||
|
x()}
|
||||||
|
non-important content
|
||||||
|
'''
|
||||||
|
"""
|
||||||
|
t = ast.parse(expr)
|
||||||
|
self.assertEqual(type(t), ast.Module)
|
||||||
|
self.assertEqual(len(t.body), 2)
|
||||||
|
# check `a = 10`
|
||||||
|
self.assertEqual(type(t.body[0]), ast.Assign)
|
||||||
|
self.assertEqual(t.body[0].lineno, 2)
|
||||||
|
# check `f'...'`
|
||||||
|
self.assertEqual(type(t.body[1]), ast.Expr)
|
||||||
|
self.assertEqual(type(t.body[1].value), ast.JoinedStr)
|
||||||
|
self.assertEqual(len(t.body[1].value.values), 3)
|
||||||
|
self.assertEqual(type(t.body[1].value.values[0]), ast.Str)
|
||||||
|
self.assertEqual(type(t.body[1].value.values[1]), ast.FormattedValue)
|
||||||
|
self.assertEqual(type(t.body[1].value.values[2]), ast.Str)
|
||||||
|
# NOTE: the following invalid behavior is described in bpo-16806.
|
||||||
|
# - line number should be the *first* line (3), not the *last* (8)
|
||||||
|
# - column offset should not be -1
|
||||||
|
self.assertEqual(t.body[1].lineno, 8)
|
||||||
|
self.assertEqual(t.body[1].value.lineno, 8)
|
||||||
|
self.assertEqual(t.body[1].value.values[0].lineno, 8)
|
||||||
|
self.assertEqual(t.body[1].value.values[1].lineno, 8)
|
||||||
|
self.assertEqual(t.body[1].value.values[2].lineno, 8)
|
||||||
|
self.assertEqual(t.body[1].col_offset, -1)
|
||||||
|
self.assertEqual(t.body[1].value.col_offset, -1)
|
||||||
|
self.assertEqual(t.body[1].value.values[0].col_offset, -1)
|
||||||
|
self.assertEqual(t.body[1].value.values[1].col_offset, -1)
|
||||||
|
self.assertEqual(t.body[1].value.values[2].col_offset, -1)
|
||||||
|
# NOTE: the following lineno information and col_offset is correct for
|
||||||
|
# expressions within FormattedValues.
|
||||||
|
binop = t.body[1].value.values[1].value
|
||||||
|
self.assertEqual(type(binop), ast.BinOp)
|
||||||
|
self.assertEqual(type(binop.left), ast.Name)
|
||||||
|
self.assertEqual(type(binop.op), ast.Mult)
|
||||||
|
self.assertEqual(type(binop.right), ast.Call)
|
||||||
|
self.assertEqual(binop.lineno, 4)
|
||||||
|
self.assertEqual(binop.left.lineno, 4)
|
||||||
|
self.assertEqual(binop.right.lineno, 6)
|
||||||
|
self.assertEqual(binop.col_offset, 3)
|
||||||
|
self.assertEqual(binop.left.col_offset, 3)
|
||||||
|
self.assertEqual(binop.right.col_offset, 7)
|
||||||
|
|
||||||
def test_docstring(self):
|
def test_docstring(self):
|
||||||
def f():
|
def f():
|
||||||
f'''Not a docstring'''
|
f'''Not a docstring'''
|
||||||
|
@ -786,5 +1033,6 @@ f'{a * x()}'"""
|
||||||
self.assertEqual(eval('f"\\\n"'), '')
|
self.assertEqual(eval('f"\\\n"'), '')
|
||||||
self.assertEqual(eval('f"\\\r"'), '')
|
self.assertEqual(eval('f"\\\r"'), '')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
@ -0,0 +1,3 @@
|
||||||
|
Location information (``lineno`` and ``col_offset``) in f-strings is now
|
||||||
|
(mostly) correct. This fixes tools like flake8 from showing warnings on the
|
||||||
|
wrong line (typically the first line of the file).
|
72
Python/ast.c
72
Python/ast.c
|
@ -8,6 +8,7 @@
|
||||||
#include "node.h"
|
#include "node.h"
|
||||||
#include "ast.h"
|
#include "ast.h"
|
||||||
#include "token.h"
|
#include "token.h"
|
||||||
|
#include "pythonrun.h"
|
||||||
|
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
|
||||||
|
@ -4270,6 +4271,55 @@ decode_bytes_with_escapes(struct compiling *c, const node *n, const char *s,
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Shift locations for the given node and all its children by adding `lineno`
|
||||||
|
and `col_offset` to existing locations. */
|
||||||
|
static void fstring_shift_node_locations(node *n, int lineno, int col_offset)
|
||||||
|
{
|
||||||
|
n->n_col_offset = n->n_col_offset + col_offset;
|
||||||
|
for (int i = 0; i < NCH(n); ++i) {
|
||||||
|
if (n->n_lineno && n->n_lineno < CHILD(n, i)->n_lineno) {
|
||||||
|
/* Shifting column offsets unnecessary if there's been newlines. */
|
||||||
|
col_offset = 0;
|
||||||
|
}
|
||||||
|
fstring_shift_node_locations(CHILD(n, i), lineno, col_offset);
|
||||||
|
}
|
||||||
|
n->n_lineno = n->n_lineno + lineno;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Fix locations for the given node and its children.
|
||||||
|
|
||||||
|
`parent` is the enclosing node.
|
||||||
|
`n` is the node which locations are going to be fixed relative to parent.
|
||||||
|
`expr_str` is the child node's string representation, incuding braces.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
fstring_fix_node_location(const node *parent, node *n, char *expr_str)
|
||||||
|
{
|
||||||
|
char *substr = NULL;
|
||||||
|
char *start;
|
||||||
|
int lines = LINENO(parent) - 1;
|
||||||
|
int cols = parent->n_col_offset;
|
||||||
|
/* Find the full fstring to fix location information in `n`. */
|
||||||
|
while (parent && parent->n_type != STRING)
|
||||||
|
parent = parent->n_child;
|
||||||
|
if (parent && parent->n_str) {
|
||||||
|
substr = strstr(parent->n_str, expr_str);
|
||||||
|
if (substr) {
|
||||||
|
start = substr;
|
||||||
|
while (start > parent->n_str) {
|
||||||
|
if (start[0] == '\n')
|
||||||
|
break;
|
||||||
|
start--;
|
||||||
|
}
|
||||||
|
cols += substr - start;
|
||||||
|
/* Fix lineno in mulitline strings. */
|
||||||
|
while ((substr = strchr(substr + 1, '\n')))
|
||||||
|
lines--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fstring_shift_node_locations(n, lines, cols);
|
||||||
|
}
|
||||||
|
|
||||||
/* Compile this expression in to an expr_ty. Add parens around the
|
/* Compile this expression in to an expr_ty. Add parens around the
|
||||||
expression, in order to allow leading spaces in the expression. */
|
expression, in order to allow leading spaces in the expression. */
|
||||||
static expr_ty
|
static expr_ty
|
||||||
|
@ -4278,6 +4328,7 @@ fstring_compile_expr(const char *expr_start, const char *expr_end,
|
||||||
|
|
||||||
{
|
{
|
||||||
PyCompilerFlags cf;
|
PyCompilerFlags cf;
|
||||||
|
node *mod_n;
|
||||||
mod_ty mod;
|
mod_ty mod;
|
||||||
char *str;
|
char *str;
|
||||||
Py_ssize_t len;
|
Py_ssize_t len;
|
||||||
|
@ -4287,9 +4338,10 @@ fstring_compile_expr(const char *expr_start, const char *expr_end,
|
||||||
assert(*(expr_start-1) == '{');
|
assert(*(expr_start-1) == '{');
|
||||||
assert(*expr_end == '}' || *expr_end == '!' || *expr_end == ':');
|
assert(*expr_end == '}' || *expr_end == '!' || *expr_end == ':');
|
||||||
|
|
||||||
/* If the substring is all whitespace, it's an error. We need to catch
|
/* If the substring is all whitespace, it's an error. We need to catch this
|
||||||
this here, and not when we call PyParser_ASTFromString, because turning
|
here, and not when we call PyParser_SimpleParseStringFlagsFilename,
|
||||||
the expression '' in to '()' would go from being invalid to valid. */
|
because turning the expression '' in to '()' would go from being invalid
|
||||||
|
to valid. */
|
||||||
for (s = expr_start; s != expr_end; s++) {
|
for (s = expr_start; s != expr_end; s++) {
|
||||||
char c = *s;
|
char c = *s;
|
||||||
/* The Python parser ignores only the following whitespace
|
/* The Python parser ignores only the following whitespace
|
||||||
|
@ -4315,9 +4367,19 @@ fstring_compile_expr(const char *expr_start, const char *expr_end,
|
||||||
str[len+2] = 0;
|
str[len+2] = 0;
|
||||||
|
|
||||||
cf.cf_flags = PyCF_ONLY_AST;
|
cf.cf_flags = PyCF_ONLY_AST;
|
||||||
mod = PyParser_ASTFromString(str, "<fstring>",
|
mod_n = PyParser_SimpleParseStringFlagsFilename(str, "<fstring>",
|
||||||
Py_eval_input, &cf, c->c_arena);
|
Py_eval_input, 0);
|
||||||
|
if (!mod_n) {
|
||||||
PyMem_RawFree(str);
|
PyMem_RawFree(str);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
/* Reuse str to find the correct column offset. */
|
||||||
|
str[0] = '{';
|
||||||
|
str[len+1] = '}';
|
||||||
|
fstring_fix_node_location(n, mod_n, str);
|
||||||
|
mod = PyAST_FromNode(mod_n, &cf, "<fstring>", c->c_arena);
|
||||||
|
PyMem_RawFree(str);
|
||||||
|
PyNode_Free(mod_n);
|
||||||
if (!mod)
|
if (!mod)
|
||||||
return NULL;
|
return NULL;
|
||||||
return mod->v.Expression.body;
|
return mod->v.Expression.body;
|
||||||
|
|
Loading…
Reference in New Issue