From bca701403253379409dece03053dbd739c0bd059 Mon Sep 17 00:00:00 2001 From: Lysandros Nikolaou Date: Tue, 27 Oct 2020 00:42:04 +0200 Subject: [PATCH] bpo-42123: Run the parser two times and only enable invalid rules on the second run (GH-22111) * Implement running the parser a second time for the errors messages The first parser run is only responsible for detecting whether there is a `SyntaxError` or not. If there isn't the AST gets returned. Otherwise, the parser is run a second time with all the `invalid_*` rules enabled so that all the customized error messages get produced. --- Grammar/python.gram | 4 +- .../2020-10-23-02-43-24.bpo-42123.64gJWC.rst | 3 + Parser/parser.c | 94 +++++++++---------- Parser/pegen.c | 13 +++ Parser/pegen.h | 1 + Tools/peg_generator/pegen/c_generator.py | 5 +- 6 files changed, 70 insertions(+), 50 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2020-10-23-02-43-24.bpo-42123.64gJWC.rst diff --git a/Grammar/python.gram b/Grammar/python.gram index d2d6fc0d339..19c85accf8d 100644 --- a/Grammar/python.gram +++ b/Grammar/python.gram @@ -551,7 +551,7 @@ yield_expr[expr_ty]: arguments[expr_ty] (memo): | a=args [','] &')' { a } - | incorrect_arguments + | invalid_arguments args[expr_ty]: | a[asdl_expr_seq*]=','.(starred_expression | named_expression !'=')+ b=[',' k=kwargs {k}] { _PyPegen_collect_call_seqs(p, a, b, EXTRA) } | a=kwargs { _Py_Call(_PyPegen_dummy_name(p), @@ -637,7 +637,7 @@ t_atom[expr_ty]: # From here on, there are rules for invalid syntax with specialised error messages -incorrect_arguments: +invalid_arguments: | args ',' '*' { RAISE_SYNTAX_ERROR("iterable argument unpacking follows keyword argument unpacking") } | a=expression for_if_clauses ',' [args | expression for_if_clauses] { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "Generator expression must be parenthesized") } diff --git a/Misc/NEWS.d/next/Core and Builtins/2020-10-23-02-43-24.bpo-42123.64gJWC.rst b/Misc/NEWS.d/next/Core and Builtins/2020-10-23-02-43-24.bpo-42123.64gJWC.rst new file mode 100644 index 00000000000..6461efd76f0 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2020-10-23-02-43-24.bpo-42123.64gJWC.rst @@ -0,0 +1,3 @@ +Run the parser two times. On the first run, disable all the rules that only +generate better error messages to gain performance. If there's a parse +failure, run the parser a second time with those enabled. diff --git a/Parser/parser.c b/Parser/parser.c index 09be03c633a..e438f06c9be 100644 --- a/Parser/parser.c +++ b/Parser/parser.c @@ -209,7 +209,7 @@ static KeywordToken *reserved_keywords[] = { #define t_primary_type 1140 // Left-recursive #define t_lookahead_type 1141 #define t_atom_type 1142 -#define incorrect_arguments_type 1143 +#define invalid_arguments_type 1143 #define invalid_kwarg_type 1144 #define invalid_named_expression_type 1145 #define invalid_assignment_type 1146 @@ -527,7 +527,7 @@ static expr_ty target_rule(Parser *p); static expr_ty t_primary_rule(Parser *p); static void *t_lookahead_rule(Parser *p); static expr_ty t_atom_rule(Parser *p); -static void *incorrect_arguments_rule(Parser *p); +static void *invalid_arguments_rule(Parser *p); static void *invalid_kwarg_rule(Parser *p); static void *invalid_named_expression_rule(Parser *p); static void *invalid_assignment_rule(Parser *p); @@ -2218,7 +2218,7 @@ assignment_rule(Parser *p) return NULL; } } - { // invalid_assignment + if (p->call_invalid_rules) { // invalid_assignment if (p->error_indicator) { D(p->level--); return NULL; @@ -2891,7 +2891,7 @@ del_stmt_rule(Parser *p) D(fprintf(stderr, "%*c%s del_stmt[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'del' del_targets &(';' | NEWLINE)")); } - { // invalid_del_stmt + if (p->call_invalid_rules) { // invalid_del_stmt if (p->error_indicator) { D(p->level--); return NULL; @@ -3242,7 +3242,7 @@ import_from_targets_rule(Parser *p) D(fprintf(stderr, "%*c%s import_from_targets[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'*'")); } - { // invalid_import_from_targets + if (p->call_invalid_rules) { // invalid_import_from_targets if (p->error_indicator) { D(p->level--); return NULL; @@ -4035,7 +4035,7 @@ for_stmt_rule(Parser *p) return NULL; } } - { // invalid_for_target + if (p->call_invalid_rules) { // invalid_for_target if (p->error_indicator) { D(p->level--); return NULL; @@ -4336,7 +4336,7 @@ with_item_rule(Parser *p) D(fprintf(stderr, "%*c%s with_item[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "expression 'as' star_target &(',' | ')' | ':')")); } - { // invalid_with_item + if (p->call_invalid_rules) { // invalid_with_item if (p->error_indicator) { D(p->level--); return NULL; @@ -5071,7 +5071,7 @@ func_type_comment_rule(Parser *p) D(fprintf(stderr, "%*c%s func_type_comment[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "NEWLINE TYPE_COMMENT &(NEWLINE INDENT)")); } - { // invalid_double_type_comments + if (p->call_invalid_rules) { // invalid_double_type_comments if (p->error_indicator) { D(p->level--); return NULL; @@ -5126,7 +5126,7 @@ params_rule(Parser *p) } arguments_ty _res = NULL; int _mark = p->mark; - { // invalid_parameters + if (p->call_invalid_rules) { // invalid_parameters if (p->error_indicator) { D(p->level--); return NULL; @@ -5601,7 +5601,7 @@ star_etc_rule(Parser *p) D(fprintf(stderr, "%*c%s star_etc[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "kwds")); } - { // invalid_star_etc + if (p->call_invalid_rules) { // invalid_star_etc if (p->error_indicator) { D(p->level--); return NULL; @@ -6304,7 +6304,7 @@ block_rule(Parser *p) D(fprintf(stderr, "%*c%s block[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "simple_stmt")); } - { // invalid_block + if (p->call_invalid_rules) { // invalid_block if (p->error_indicator) { D(p->level--); return NULL; @@ -6798,7 +6798,7 @@ named_expression_rule(Parser *p) D(fprintf(stderr, "%*c%s named_expression[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "expression !':='")); } - { // invalid_named_expression + if (p->call_invalid_rules) { // invalid_named_expression if (p->error_indicator) { D(p->level--); return NULL; @@ -7192,7 +7192,7 @@ lambda_params_rule(Parser *p) } arguments_ty _res = NULL; int _mark = p->mark; - { // invalid_lambda_parameters + if (p->call_invalid_rules) { // invalid_lambda_parameters if (p->error_indicator) { D(p->level--); return NULL; @@ -7669,7 +7669,7 @@ lambda_star_etc_rule(Parser *p) D(fprintf(stderr, "%*c%s lambda_star_etc[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "lambda_kwds")); } - { // invalid_lambda_star_etc + if (p->call_invalid_rules) { // invalid_lambda_star_etc if (p->error_indicator) { D(p->level--); return NULL; @@ -11163,7 +11163,7 @@ listcomp_rule(Parser *p) return NULL; } } - { // invalid_comprehension + if (p->call_invalid_rules) { // invalid_comprehension if (p->error_indicator) { D(p->level--); return NULL; @@ -11294,7 +11294,7 @@ group_rule(Parser *p) D(fprintf(stderr, "%*c%s group[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'(' (yield_expr | named_expression) ')'")); } - { // invalid_group + if (p->call_invalid_rules) { // invalid_group if (p->error_indicator) { D(p->level--); return NULL; @@ -11388,7 +11388,7 @@ genexp_rule(Parser *p) return NULL; } } - { // invalid_comprehension + if (p->call_invalid_rules) { // invalid_comprehension if (p->error_indicator) { D(p->level--); return NULL; @@ -11547,7 +11547,7 @@ setcomp_rule(Parser *p) return NULL; } } - { // invalid_comprehension + if (p->call_invalid_rules) { // invalid_comprehension if (p->error_indicator) { D(p->level--); return NULL; @@ -11699,7 +11699,7 @@ dictcomp_rule(Parser *p) D(fprintf(stderr, "%*c%s dictcomp[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'{' kvpair for_if_clauses '}'")); } - { // invalid_dict_comprehension + if (p->call_invalid_rules) { // invalid_dict_comprehension if (p->error_indicator) { D(p->level--); return NULL; @@ -12023,7 +12023,7 @@ for_if_clause_rule(Parser *p) return NULL; } } - { // invalid_for_target + if (p->call_invalid_rules) { // invalid_for_target if (p->error_indicator) { D(p->level--); return NULL; @@ -12149,7 +12149,7 @@ yield_expr_rule(Parser *p) return _res; } -// arguments: args ','? &')' | incorrect_arguments +// arguments: args ','? &')' | invalid_arguments static expr_ty arguments_rule(Parser *p) { @@ -12194,24 +12194,24 @@ arguments_rule(Parser *p) D(fprintf(stderr, "%*c%s arguments[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "args ','? &')'")); } - { // incorrect_arguments + if (p->call_invalid_rules) { // invalid_arguments if (p->error_indicator) { D(p->level--); return NULL; } - D(fprintf(stderr, "%*c> arguments[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "incorrect_arguments")); - void *incorrect_arguments_var; + D(fprintf(stderr, "%*c> arguments[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "invalid_arguments")); + void *invalid_arguments_var; if ( - (incorrect_arguments_var = incorrect_arguments_rule(p)) // incorrect_arguments + (invalid_arguments_var = invalid_arguments_rule(p)) // invalid_arguments ) { - D(fprintf(stderr, "%*c+ arguments[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "incorrect_arguments")); - _res = incorrect_arguments_var; + D(fprintf(stderr, "%*c+ arguments[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "invalid_arguments")); + _res = invalid_arguments_var; goto done; } p->mark = _mark; D(fprintf(stderr, "%*c%s arguments[%d-%d]: %s failed!\n", p->level, ' ', - p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "incorrect_arguments")); + p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "invalid_arguments")); } _res = NULL; done: @@ -12548,7 +12548,7 @@ kwarg_or_starred_rule(Parser *p) D(fprintf(stderr, "%*c%s kwarg_or_starred[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "starred_expression")); } - { // invalid_kwarg + if (p->call_invalid_rules) { // invalid_kwarg if (p->error_indicator) { D(p->level--); return NULL; @@ -12668,7 +12668,7 @@ kwarg_or_double_starred_rule(Parser *p) D(fprintf(stderr, "%*c%s kwarg_or_double_starred[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'**' expression")); } - { // invalid_kwarg + if (p->call_invalid_rules) { // invalid_kwarg if (p->error_indicator) { D(p->level--); return NULL; @@ -14380,14 +14380,14 @@ t_atom_rule(Parser *p) return _res; } -// incorrect_arguments: +// invalid_arguments: // | args ',' '*' // | expression for_if_clauses ',' [args | expression for_if_clauses] // | args for_if_clauses // | args ',' expression for_if_clauses // | args ',' args static void * -incorrect_arguments_rule(Parser *p) +invalid_arguments_rule(Parser *p) { D(p->level++); if (p->error_indicator) { @@ -14401,7 +14401,7 @@ incorrect_arguments_rule(Parser *p) D(p->level--); return NULL; } - D(fprintf(stderr, "%*c> incorrect_arguments[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "args ',' '*'")); + D(fprintf(stderr, "%*c> invalid_arguments[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "args ',' '*'")); Token * _literal; Token * _literal_1; expr_ty args_var; @@ -14413,7 +14413,7 @@ incorrect_arguments_rule(Parser *p) (_literal_1 = _PyPegen_expect_token(p, 16)) // token='*' ) { - D(fprintf(stderr, "%*c+ incorrect_arguments[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "args ',' '*'")); + D(fprintf(stderr, "%*c+ invalid_arguments[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "args ',' '*'")); _res = RAISE_SYNTAX_ERROR ( "iterable argument unpacking follows keyword argument unpacking" ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; @@ -14423,7 +14423,7 @@ incorrect_arguments_rule(Parser *p) goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s incorrect_arguments[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s invalid_arguments[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "args ',' '*'")); } { // expression for_if_clauses ',' [args | expression for_if_clauses] @@ -14431,7 +14431,7 @@ incorrect_arguments_rule(Parser *p) D(p->level--); return NULL; } - D(fprintf(stderr, "%*c> incorrect_arguments[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "expression for_if_clauses ',' [args | expression for_if_clauses]")); + D(fprintf(stderr, "%*c> invalid_arguments[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "expression for_if_clauses ',' [args | expression for_if_clauses]")); Token * _literal; void *_opt_var; UNUSED(_opt_var); // Silence compiler warnings @@ -14447,7 +14447,7 @@ incorrect_arguments_rule(Parser *p) (_opt_var = _tmp_127_rule(p), 1) // [args | expression for_if_clauses] ) { - D(fprintf(stderr, "%*c+ incorrect_arguments[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "expression for_if_clauses ',' [args | expression for_if_clauses]")); + D(fprintf(stderr, "%*c+ invalid_arguments[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "expression for_if_clauses ',' [args | expression for_if_clauses]")); _res = RAISE_SYNTAX_ERROR_KNOWN_LOCATION ( a , "Generator expression must be parenthesized" ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; @@ -14457,7 +14457,7 @@ incorrect_arguments_rule(Parser *p) goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s incorrect_arguments[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s invalid_arguments[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "expression for_if_clauses ',' [args | expression for_if_clauses]")); } { // args for_if_clauses @@ -14465,7 +14465,7 @@ incorrect_arguments_rule(Parser *p) D(p->level--); return NULL; } - D(fprintf(stderr, "%*c> incorrect_arguments[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "args for_if_clauses")); + D(fprintf(stderr, "%*c> invalid_arguments[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "args for_if_clauses")); expr_ty a; asdl_comprehension_seq* for_if_clauses_var; if ( @@ -14474,7 +14474,7 @@ incorrect_arguments_rule(Parser *p) (for_if_clauses_var = for_if_clauses_rule(p)) // for_if_clauses ) { - D(fprintf(stderr, "%*c+ incorrect_arguments[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "args for_if_clauses")); + D(fprintf(stderr, "%*c+ invalid_arguments[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "args for_if_clauses")); _res = _PyPegen_nonparen_genexp_in_call ( p , a ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; @@ -14484,7 +14484,7 @@ incorrect_arguments_rule(Parser *p) goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s incorrect_arguments[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s invalid_arguments[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "args for_if_clauses")); } { // args ',' expression for_if_clauses @@ -14492,7 +14492,7 @@ incorrect_arguments_rule(Parser *p) D(p->level--); return NULL; } - D(fprintf(stderr, "%*c> incorrect_arguments[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "args ',' expression for_if_clauses")); + D(fprintf(stderr, "%*c> invalid_arguments[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "args ',' expression for_if_clauses")); Token * _literal; expr_ty a; expr_ty args_var; @@ -14507,7 +14507,7 @@ incorrect_arguments_rule(Parser *p) (for_if_clauses_var = for_if_clauses_rule(p)) // for_if_clauses ) { - D(fprintf(stderr, "%*c+ incorrect_arguments[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "args ',' expression for_if_clauses")); + D(fprintf(stderr, "%*c+ invalid_arguments[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "args ',' expression for_if_clauses")); _res = RAISE_SYNTAX_ERROR_KNOWN_LOCATION ( a , "Generator expression must be parenthesized" ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; @@ -14517,7 +14517,7 @@ incorrect_arguments_rule(Parser *p) goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s incorrect_arguments[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s invalid_arguments[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "args ',' expression for_if_clauses")); } { // args ',' args @@ -14525,7 +14525,7 @@ incorrect_arguments_rule(Parser *p) D(p->level--); return NULL; } - D(fprintf(stderr, "%*c> incorrect_arguments[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "args ',' args")); + D(fprintf(stderr, "%*c> invalid_arguments[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "args ',' args")); Token * _literal; expr_ty a; expr_ty args_var; @@ -14537,7 +14537,7 @@ incorrect_arguments_rule(Parser *p) (args_var = args_rule(p)) // args ) { - D(fprintf(stderr, "%*c+ incorrect_arguments[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "args ',' args")); + D(fprintf(stderr, "%*c+ invalid_arguments[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "args ',' args")); _res = _PyPegen_arguments_parsing_error ( p , a ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; @@ -14547,7 +14547,7 @@ incorrect_arguments_rule(Parser *p) goto done; } p->mark = _mark; - D(fprintf(stderr, "%*c%s incorrect_arguments[%d-%d]: %s failed!\n", p->level, ' ', + D(fprintf(stderr, "%*c%s invalid_arguments[%d-%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "args ',' args")); } _res = NULL; diff --git a/Parser/pegen.c b/Parser/pegen.c index c7343f7f047..827d4dace1f 100644 --- a/Parser/pegen.c +++ b/Parser/pegen.c @@ -1102,15 +1102,28 @@ _PyPegen_Parser_New(struct tok_state *tok, int start_rule, int flags, p->feature_version = feature_version; p->known_err_token = NULL; p->level = 0; + p->call_invalid_rules = 0; return p; } +static void +reset_parser_state(Parser *p) +{ + for (int i = 0; i < p->fill; i++) { + p->tokens[i]->memo = NULL; + } + p->mark = 0; + p->call_invalid_rules = 1; +} + void * _PyPegen_run_parser(Parser *p) { void *res = _PyPegen_parse(p); if (res == NULL) { + reset_parser_state(p); + _PyPegen_parse(p); if (PyErr_Occurred()) { return NULL; } diff --git a/Parser/pegen.h b/Parser/pegen.h index 9a280ae240a..841f1e5eb43 100644 --- a/Parser/pegen.h +++ b/Parser/pegen.h @@ -73,6 +73,7 @@ typedef struct { growable_comment_array type_ignore_comments; Token *known_err_token; int level; + int call_invalid_rules; } Parser; typedef struct { diff --git a/Tools/peg_generator/pegen/c_generator.py b/Tools/peg_generator/pegen/c_generator.py index 1a814aad11c..52bdb844e6b 100644 --- a/Tools/peg_generator/pegen/c_generator.py +++ b/Tools/peg_generator/pegen/c_generator.py @@ -736,7 +736,10 @@ class CParserGenerator(ParserGenerator, GrammarVisitor): def visit_Alt( self, node: Alt, is_loop: bool, is_gather: bool, rulename: Optional[str] ) -> None: - self.print(f"{{ // {node}") + if len(node.items) == 1 and str(node.items[0]).startswith('invalid_'): + self.print(f"if (p->call_invalid_rules) {{ // {node}") + else: + self.print(f"{{ // {node}") with self.indent(): self._check_for_errors() node_str = str(node).replace('"', '\\"')