mirror of https://github.com/python/cpython
bpo-44180: Fix edge cases in invalid assigment rules in the parser (GH-26283)
The invalid assignment rules are very delicate since the parser can easily raise an invalid assignment when a keyword argument is provided. As they are very deep into the grammar tree, is very difficult to specify in which contexts these rules can be used and in which don't. For that, we need to use a different version of the rule that doesn't do error checking in those situations where we don't want the rule to raise (keyword arguments and generator expressions). We also need to check if we are in left-recursive rule, as those can try to eagerly advance the parser even if the parse will fail at the end of the expression. Failing to do this allows the parser to start parsing a call as a tuple and incorrectly identify a keyword argument as an invalid assignment, before it realizes that it was not a tuple after all.
This commit is contained in:
parent
615069eb08
commit
c878a97968
|
@ -509,13 +509,13 @@ star_named_expression[expr_ty]:
|
||||||
| '*' a=bitwise_or { _PyAST_Starred(a, Load, EXTRA) }
|
| '*' a=bitwise_or { _PyAST_Starred(a, Load, EXTRA) }
|
||||||
| named_expression
|
| named_expression
|
||||||
|
|
||||||
named_expression[expr_ty]:
|
|
||||||
| a=NAME ':=' ~ b=expression { _PyAST_NamedExpr(CHECK(expr_ty, _PyPegen_set_expr_context(p, a, Store)), b, EXTRA) }
|
|
||||||
| invalid_named_expression
|
|
||||||
| expression !':='
|
|
||||||
|
|
||||||
direct_named_expression[expr_ty]:
|
assigment_expression[expr_ty]:
|
||||||
| a=NAME ':=' ~ b=expression { _PyAST_NamedExpr(CHECK(expr_ty, _PyPegen_set_expr_context(p, a, Store)), b, EXTRA) }
|
| a=NAME ':=' ~ b=expression { _PyAST_NamedExpr(CHECK(expr_ty, _PyPegen_set_expr_context(p, a, Store)), b, EXTRA) }
|
||||||
|
|
||||||
|
named_expression[expr_ty]:
|
||||||
|
| assigment_expression
|
||||||
|
| invalid_named_expression
|
||||||
| expression !':='
|
| expression !':='
|
||||||
|
|
||||||
annotated_rhs[expr_ty]: yield_expr | star_expressions
|
annotated_rhs[expr_ty]: yield_expr | star_expressions
|
||||||
|
@ -706,7 +706,7 @@ group[expr_ty]:
|
||||||
| '(' a=(yield_expr | named_expression) ')' { a }
|
| '(' a=(yield_expr | named_expression) ')' { a }
|
||||||
| invalid_group
|
| invalid_group
|
||||||
genexp[expr_ty]:
|
genexp[expr_ty]:
|
||||||
| '(' a=direct_named_expression b=for_if_clauses ')' { _PyAST_GeneratorExp(a, b, EXTRA) }
|
| '(' a=( assigment_expression | expression !':=') b=for_if_clauses ')' { _PyAST_GeneratorExp(a, b, EXTRA) }
|
||||||
| invalid_comprehension
|
| invalid_comprehension
|
||||||
set[expr_ty]: '{' a=star_named_expressions '}' { _PyAST_Set(a, EXTRA) }
|
set[expr_ty]: '{' a=star_named_expressions '}' { _PyAST_Set(a, EXTRA) }
|
||||||
setcomp[expr_ty]:
|
setcomp[expr_ty]:
|
||||||
|
@ -745,11 +745,13 @@ arguments[expr_ty] (memo):
|
||||||
| a=args [','] &')' { a }
|
| a=args [','] &')' { a }
|
||||||
| invalid_arguments
|
| invalid_arguments
|
||||||
args[expr_ty]:
|
args[expr_ty]:
|
||||||
| a[asdl_expr_seq*]=','.(starred_expression | direct_named_expression !'=')+ b=[',' k=kwargs {k}] { _PyPegen_collect_call_seqs(p, a, b, EXTRA) }
|
| a[asdl_expr_seq*]=','.(starred_expression | ( assigment_expression | expression !':=') !'=')+ b=[',' k=kwargs {k}] {
|
||||||
|
_PyPegen_collect_call_seqs(p, a, b, EXTRA) }
|
||||||
| a=kwargs { _PyAST_Call(_PyPegen_dummy_name(p),
|
| a=kwargs { _PyAST_Call(_PyPegen_dummy_name(p),
|
||||||
CHECK_NULL_ALLOWED(asdl_expr_seq*, _PyPegen_seq_extract_starred_exprs(p, a)),
|
CHECK_NULL_ALLOWED(asdl_expr_seq*, _PyPegen_seq_extract_starred_exprs(p, a)),
|
||||||
CHECK_NULL_ALLOWED(asdl_keyword_seq*, _PyPegen_seq_delete_starred_exprs(p, a)),
|
CHECK_NULL_ALLOWED(asdl_keyword_seq*, _PyPegen_seq_delete_starred_exprs(p, a)),
|
||||||
EXTRA) }
|
EXTRA) }
|
||||||
|
|
||||||
kwargs[asdl_seq*]:
|
kwargs[asdl_seq*]:
|
||||||
| a=','.kwarg_or_starred+ ',' b=','.kwarg_or_double_starred+ { _PyPegen_join_sequences(p, a, b) }
|
| a=','.kwarg_or_starred+ ',' b=','.kwarg_or_double_starred+ { _PyPegen_join_sequences(p, a, b) }
|
||||||
| ','.kwarg_or_starred+
|
| ','.kwarg_or_starred+
|
||||||
|
@ -757,15 +759,15 @@ kwargs[asdl_seq*]:
|
||||||
starred_expression[expr_ty]:
|
starred_expression[expr_ty]:
|
||||||
| '*' a=expression { _PyAST_Starred(a, Load, EXTRA) }
|
| '*' a=expression { _PyAST_Starred(a, Load, EXTRA) }
|
||||||
kwarg_or_starred[KeywordOrStarred*]:
|
kwarg_or_starred[KeywordOrStarred*]:
|
||||||
|
| invalid_kwarg
|
||||||
| a=NAME '=' b=expression {
|
| a=NAME '=' b=expression {
|
||||||
_PyPegen_keyword_or_starred(p, CHECK(keyword_ty, _PyAST_keyword(a->v.Name.id, b, EXTRA)), 1) }
|
_PyPegen_keyword_or_starred(p, CHECK(keyword_ty, _PyAST_keyword(a->v.Name.id, b, EXTRA)), 1) }
|
||||||
| a=starred_expression { _PyPegen_keyword_or_starred(p, a, 0) }
|
| a=starred_expression { _PyPegen_keyword_or_starred(p, a, 0) }
|
||||||
| invalid_kwarg
|
|
||||||
kwarg_or_double_starred[KeywordOrStarred*]:
|
kwarg_or_double_starred[KeywordOrStarred*]:
|
||||||
|
| invalid_kwarg
|
||||||
| a=NAME '=' b=expression {
|
| a=NAME '=' b=expression {
|
||||||
_PyPegen_keyword_or_starred(p, CHECK(keyword_ty, _PyAST_keyword(a->v.Name.id, b, EXTRA)), 1) }
|
_PyPegen_keyword_or_starred(p, CHECK(keyword_ty, _PyAST_keyword(a->v.Name.id, b, EXTRA)), 1) }
|
||||||
| '**' a=expression { _PyPegen_keyword_or_starred(p, CHECK(keyword_ty, _PyAST_keyword(NULL, a, EXTRA)), 1) }
|
| '**' a=expression { _PyPegen_keyword_or_starred(p, CHECK(keyword_ty, _PyAST_keyword(NULL, a, EXTRA)), 1) }
|
||||||
| invalid_kwarg
|
|
||||||
|
|
||||||
# NOTE: star_targets may contain *bitwise_or, targets may not.
|
# NOTE: star_targets may contain *bitwise_or, targets may not.
|
||||||
star_targets[expr_ty]:
|
star_targets[expr_ty]:
|
||||||
|
@ -838,29 +840,37 @@ invalid_arguments:
|
||||||
| a=args ',' '*' { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "iterable argument unpacking follows keyword argument unpacking") }
|
| a=args ',' '*' { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "iterable argument unpacking follows keyword argument unpacking") }
|
||||||
| a=expression b=for_if_clauses ',' [args | expression for_if_clauses] {
|
| a=expression b=for_if_clauses ',' [args | expression for_if_clauses] {
|
||||||
RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, PyPegen_last_item(b, comprehension_ty)->target, "Generator expression must be parenthesized") }
|
RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, PyPegen_last_item(b, comprehension_ty)->target, "Generator expression must be parenthesized") }
|
||||||
|
| a=NAME b='=' expression for_if_clauses {
|
||||||
|
RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "invalid syntax. Maybe you meant '==' or ':=' instead of '='?")}
|
||||||
| a=args for_if_clauses { _PyPegen_nonparen_genexp_in_call(p, a) }
|
| a=args for_if_clauses { _PyPegen_nonparen_genexp_in_call(p, a) }
|
||||||
| args ',' a=expression b=for_if_clauses {
|
| args ',' a=expression b=for_if_clauses {
|
||||||
RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, asdl_seq_GET(b, b->size-1)->target, "Generator expression must be parenthesized") }
|
RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, asdl_seq_GET(b, b->size-1)->target, "Generator expression must be parenthesized") }
|
||||||
| a=args ',' args { _PyPegen_arguments_parsing_error(p, a) }
|
| a=args ',' args { _PyPegen_arguments_parsing_error(p, a) }
|
||||||
invalid_kwarg:
|
invalid_kwarg:
|
||||||
|
| a=NAME b='=' expression for_if_clauses {
|
||||||
|
RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "invalid syntax. Maybe you meant '==' or ':=' instead of '='?")}
|
||||||
| !(NAME '=') a=expression b='=' {
|
| !(NAME '=') a=expression b='=' {
|
||||||
RAISE_SYNTAX_ERROR_KNOWN_RANGE(
|
RAISE_SYNTAX_ERROR_KNOWN_RANGE(
|
||||||
a, b, "expression cannot contain assignment, perhaps you meant \"==\"?") }
|
a, b, "expression cannot contain assignment, perhaps you meant \"==\"?") }
|
||||||
|
|
||||||
|
expression_without_invalid[expr_ty]:
|
||||||
|
| a=disjunction 'if' b=disjunction 'else' c=expression { _PyAST_IfExp(b, a, c, EXTRA) }
|
||||||
|
| disjunction
|
||||||
|
| lambdef
|
||||||
invalid_expression:
|
invalid_expression:
|
||||||
# !(NAME STRING) is not matched so we don't show this error with some invalid string prefixes like: kf"dsfsdf"
|
# !(NAME STRING) is not matched so we don't show this error with some invalid string prefixes like: kf"dsfsdf"
|
||||||
# Soft keywords need to also be ignored because they can be parsed as NAME NAME
|
# Soft keywords need to also be ignored because they can be parsed as NAME NAME
|
||||||
| !(NAME STRING | SOFT_KEYWORD) a=disjunction b=expression {
|
| !(NAME STRING | SOFT_KEYWORD) a=disjunction b=expression_without_invalid {
|
||||||
RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "invalid syntax. Perhaps you forgot a comma?") }
|
RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "invalid syntax. Perhaps you forgot a comma?") }
|
||||||
|
|
||||||
invalid_named_expression:
|
invalid_named_expression:
|
||||||
| a=expression ':=' expression {
|
| a=expression ':=' expression {
|
||||||
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(
|
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(
|
||||||
a, "cannot use assignment expressions with %s", _PyPegen_get_expr_name(a)) }
|
a, "cannot use assignment expressions with %s", _PyPegen_get_expr_name(a)) }
|
||||||
| a=NAME '=' b=bitwise_or !('='|':='|',') {
|
| a=NAME '=' b=bitwise_or !('='|':=') {
|
||||||
RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "invalid syntax. Maybe you meant '==' or ':=' instead of '='?") }
|
p->in_raw_rule ? NULL : RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "invalid syntax. Maybe you meant '==' or ':=' instead of '='?") }
|
||||||
| !(list|tuple|genexp|'True'|'None'|'False') a=bitwise_or b='=' bitwise_or !('='|':='|',') {
|
| !(list|tuple|genexp|'True'|'None'|'False') a=bitwise_or b='=' bitwise_or !('='|':=') {
|
||||||
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "cannot assign to %s here. Maybe you meant '==' instead of '='?",
|
p->in_raw_rule ? NULL : RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "cannot assign to %s here. Maybe you meant '==' instead of '='?",
|
||||||
_PyPegen_get_expr_name(a)) }
|
_PyPegen_get_expr_name(a)) }
|
||||||
|
|
||||||
invalid_assignment:
|
invalid_assignment:
|
||||||
|
|
|
@ -1128,6 +1128,26 @@ SyntaxError: cannot assign to f-string expression here. Maybe you meant '==' ins
|
||||||
Traceback (most recent call last):
|
Traceback (most recent call last):
|
||||||
SyntaxError: cannot assign to f-string expression here. Maybe you meant '==' instead of '='?
|
SyntaxError: cannot assign to f-string expression here. Maybe you meant '==' instead of '='?
|
||||||
|
|
||||||
|
>>> (x, y, z=3, d, e)
|
||||||
|
Traceback (most recent call last):
|
||||||
|
SyntaxError: invalid syntax. Maybe you meant '==' or ':=' instead of '='?
|
||||||
|
|
||||||
|
>>> [x, y, z=3, d, e]
|
||||||
|
Traceback (most recent call last):
|
||||||
|
SyntaxError: invalid syntax. Maybe you meant '==' or ':=' instead of '='?
|
||||||
|
|
||||||
|
>>> [z=3]
|
||||||
|
Traceback (most recent call last):
|
||||||
|
SyntaxError: invalid syntax. Maybe you meant '==' or ':=' instead of '='?
|
||||||
|
|
||||||
|
>>> {x, y, z=3, d, e}
|
||||||
|
Traceback (most recent call last):
|
||||||
|
SyntaxError: invalid syntax. Maybe you meant '==' or ':=' instead of '='?
|
||||||
|
|
||||||
|
>>> {z=3}
|
||||||
|
Traceback (most recent call last):
|
||||||
|
SyntaxError: invalid syntax. Maybe you meant '==' or ':=' instead of '='?
|
||||||
|
|
||||||
>>> from t import x,
|
>>> from t import x,
|
||||||
Traceback (most recent call last):
|
Traceback (most recent call last):
|
||||||
SyntaxError: trailing comma not allowed without surrounding parentheses
|
SyntaxError: trailing comma not allowed without surrounding parentheses
|
||||||
|
|
4352
Parser/parser.c
4352
Parser/parser.c
File diff suppressed because it is too large
Load Diff
|
@ -1222,7 +1222,7 @@ _PyPegen_Parser_New(struct tok_state *tok, int start_rule, int flags,
|
||||||
p->known_err_token = NULL;
|
p->known_err_token = NULL;
|
||||||
p->level = 0;
|
p->level = 0;
|
||||||
p->call_invalid_rules = 0;
|
p->call_invalid_rules = 0;
|
||||||
|
p->in_raw_rule = 0;
|
||||||
return p;
|
return p;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -74,6 +74,7 @@ typedef struct {
|
||||||
Token *known_err_token;
|
Token *known_err_token;
|
||||||
int level;
|
int level;
|
||||||
int call_invalid_rules;
|
int call_invalid_rules;
|
||||||
|
int in_raw_rule;
|
||||||
} Parser;
|
} Parser;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
|
|
@ -531,7 +531,9 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
|
||||||
f"_PyPegen_update_memo(p, _mark, {node.name}_type, _res)", "_res"
|
f"_PyPegen_update_memo(p, _mark, {node.name}_type, _res)", "_res"
|
||||||
)
|
)
|
||||||
self.print("p->mark = _mark;")
|
self.print("p->mark = _mark;")
|
||||||
|
self.print("p->in_raw_rule++;")
|
||||||
self.print(f"void *_raw = {node.name}_raw(p);")
|
self.print(f"void *_raw = {node.name}_raw(p);")
|
||||||
|
self.print("p->in_raw_rule--;")
|
||||||
self.print("if (p->error_indicator)")
|
self.print("if (p->error_indicator)")
|
||||||
with self.indent():
|
with self.indent():
|
||||||
self.print("return NULL;")
|
self.print("return NULL;")
|
||||||
|
|
Loading…
Reference in New Issue