[3.9] bpo-42123: Run the parser two times and only enable invalid rules on the second run (GH-22111) (GH-23011)

* Implement running the parser a second time for the errors messages

The first parser run is only responsible for detecting whether
there is a `SyntaxError` or not. If there isn't the AST gets returned.
Otherwise, the parser is run a second time with all the `invalid_*`
rules enabled so that all the customized error messages get produced.

(cherry picked from commit bca7014032)
This commit is contained in:
Lysandros Nikolaou 2020-10-28 02:14:15 +02:00 committed by GitHub
parent c4b58cea47
commit 24a7c298d4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 71 additions and 51 deletions

View File

@ -535,7 +535,7 @@ yield_expr[expr_ty]:
arguments[expr_ty] (memo):
| a=args [','] &')' { a }
| incorrect_arguments
| invalid_arguments
args[expr_ty]:
| a=','.(starred_expression | named_expression !'=')+ b=[',' k=kwargs {k}] { _PyPegen_collect_call_seqs(p, a, b, EXTRA) }
| a=kwargs { _Py_Call(_PyPegen_dummy_name(p),
@ -620,7 +620,7 @@ t_atom[expr_ty]:
# From here on, there are rules for invalid syntax with specialised error messages
incorrect_arguments:
invalid_arguments:
| args ',' '*' { RAISE_SYNTAX_ERROR("iterable argument unpacking follows keyword argument unpacking") }
| a=expression for_if_clauses ',' [args | expression for_if_clauses] {
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "Generator expression must be parenthesized") }

View File

@ -0,0 +1,3 @@
Run the parser two times. On the first run, disable all the rules that only
generate better error messages to gain performance. If there's a parse
failure, run the parser a second time with those enabled.

View File

@ -218,7 +218,7 @@ static KeywordToken *reserved_keywords[] = {
#define t_primary_type 1140 // Left-recursive
#define t_lookahead_type 1141
#define t_atom_type 1142
#define incorrect_arguments_type 1143
#define invalid_arguments_type 1143
#define invalid_kwarg_type 1144
#define invalid_named_expression_type 1145
#define invalid_assignment_type 1146
@ -537,7 +537,7 @@ static expr_ty target_rule(Parser *p);
static expr_ty t_primary_rule(Parser *p);
static void *t_lookahead_rule(Parser *p);
static expr_ty t_atom_rule(Parser *p);
static void *incorrect_arguments_rule(Parser *p);
static void *invalid_arguments_rule(Parser *p);
static void *invalid_kwarg_rule(Parser *p);
static void *invalid_named_expression_rule(Parser *p);
static void *invalid_assignment_rule(Parser *p);
@ -2219,7 +2219,7 @@ assignment_rule(Parser *p)
return NULL;
}
}
{ // invalid_assignment
if (p->call_invalid_rules) { // invalid_assignment
if (p->error_indicator) {
D(p->level--);
return NULL;
@ -2892,7 +2892,7 @@ del_stmt_rule(Parser *p)
D(fprintf(stderr, "%*c%s del_stmt[%d-%d]: %s failed!\n", p->level, ' ',
p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'del' del_targets &(';' | NEWLINE)"));
}
{ // invalid_del_stmt
if (p->call_invalid_rules) { // invalid_del_stmt
if (p->error_indicator) {
D(p->level--);
return NULL;
@ -3243,7 +3243,7 @@ import_from_targets_rule(Parser *p)
D(fprintf(stderr, "%*c%s import_from_targets[%d-%d]: %s failed!\n", p->level, ' ',
p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'*'"));
}
{ // invalid_import_from_targets
if (p->call_invalid_rules) { // invalid_import_from_targets
if (p->error_indicator) {
D(p->level--);
return NULL;
@ -4036,7 +4036,7 @@ for_stmt_rule(Parser *p)
return NULL;
}
}
{ // invalid_for_target
if (p->call_invalid_rules) { // invalid_for_target
if (p->error_indicator) {
D(p->level--);
return NULL;
@ -4337,7 +4337,7 @@ with_item_rule(Parser *p)
D(fprintf(stderr, "%*c%s with_item[%d-%d]: %s failed!\n", p->level, ' ',
p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "expression 'as' star_target &(',' | ')' | ':')"));
}
{ // invalid_with_item
if (p->call_invalid_rules) { // invalid_with_item
if (p->error_indicator) {
D(p->level--);
return NULL;
@ -5072,7 +5072,7 @@ func_type_comment_rule(Parser *p)
D(fprintf(stderr, "%*c%s func_type_comment[%d-%d]: %s failed!\n", p->level, ' ',
p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "NEWLINE TYPE_COMMENT &(NEWLINE INDENT)"));
}
{ // invalid_double_type_comments
if (p->call_invalid_rules) { // invalid_double_type_comments
if (p->error_indicator) {
D(p->level--);
return NULL;
@ -5127,7 +5127,7 @@ params_rule(Parser *p)
}
arguments_ty _res = NULL;
int _mark = p->mark;
{ // invalid_parameters
if (p->call_invalid_rules) { // invalid_parameters
if (p->error_indicator) {
D(p->level--);
return NULL;
@ -5602,7 +5602,7 @@ star_etc_rule(Parser *p)
D(fprintf(stderr, "%*c%s star_etc[%d-%d]: %s failed!\n", p->level, ' ',
p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "kwds"));
}
{ // invalid_star_etc
if (p->call_invalid_rules) { // invalid_star_etc
if (p->error_indicator) {
D(p->level--);
return NULL;
@ -6305,7 +6305,7 @@ block_rule(Parser *p)
D(fprintf(stderr, "%*c%s block[%d-%d]: %s failed!\n", p->level, ' ',
p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "simple_stmt"));
}
{ // invalid_block
if (p->call_invalid_rules) { // invalid_block
if (p->error_indicator) {
D(p->level--);
return NULL;
@ -6799,7 +6799,7 @@ named_expression_rule(Parser *p)
D(fprintf(stderr, "%*c%s named_expression[%d-%d]: %s failed!\n", p->level, ' ',
p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "expression !':='"));
}
{ // invalid_named_expression
if (p->call_invalid_rules) { // invalid_named_expression
if (p->error_indicator) {
D(p->level--);
return NULL;
@ -7193,7 +7193,7 @@ lambda_params_rule(Parser *p)
}
arguments_ty _res = NULL;
int _mark = p->mark;
{ // invalid_lambda_parameters
if (p->call_invalid_rules) { // invalid_lambda_parameters
if (p->error_indicator) {
D(p->level--);
return NULL;
@ -7670,7 +7670,7 @@ lambda_star_etc_rule(Parser *p)
D(fprintf(stderr, "%*c%s lambda_star_etc[%d-%d]: %s failed!\n", p->level, ' ',
p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "lambda_kwds"));
}
{ // invalid_lambda_star_etc
if (p->call_invalid_rules) { // invalid_lambda_star_etc
if (p->error_indicator) {
D(p->level--);
return NULL;
@ -10330,7 +10330,7 @@ primary_raw(Parser *p)
UNUSED(_start_lineno); // Only used by EXTRA macro
int _start_col_offset = p->tokens[_mark]->col_offset;
UNUSED(_start_col_offset); // Only used by EXTRA macro
{ // invalid_primary
if (p->call_invalid_rules) { // invalid_primary
if (p->error_indicator) {
D(p->level--);
return NULL;
@ -11209,7 +11209,7 @@ listcomp_rule(Parser *p)
return NULL;
}
}
{ // invalid_comprehension
if (p->call_invalid_rules) { // invalid_comprehension
if (p->error_indicator) {
D(p->level--);
return NULL;
@ -11340,7 +11340,7 @@ group_rule(Parser *p)
D(fprintf(stderr, "%*c%s group[%d-%d]: %s failed!\n", p->level, ' ',
p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'(' (yield_expr | named_expression) ')'"));
}
{ // invalid_group
if (p->call_invalid_rules) { // invalid_group
if (p->error_indicator) {
D(p->level--);
return NULL;
@ -11434,7 +11434,7 @@ genexp_rule(Parser *p)
return NULL;
}
}
{ // invalid_comprehension
if (p->call_invalid_rules) { // invalid_comprehension
if (p->error_indicator) {
D(p->level--);
return NULL;
@ -11593,7 +11593,7 @@ setcomp_rule(Parser *p)
return NULL;
}
}
{ // invalid_comprehension
if (p->call_invalid_rules) { // invalid_comprehension
if (p->error_indicator) {
D(p->level--);
return NULL;
@ -11745,7 +11745,7 @@ dictcomp_rule(Parser *p)
D(fprintf(stderr, "%*c%s dictcomp[%d-%d]: %s failed!\n", p->level, ' ',
p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'{' kvpair for_if_clauses '}'"));
}
{ // invalid_dict_comprehension
if (p->call_invalid_rules) { // invalid_dict_comprehension
if (p->error_indicator) {
D(p->level--);
return NULL;
@ -12064,7 +12064,7 @@ for_if_clause_rule(Parser *p)
return NULL;
}
}
{ // invalid_for_target
if (p->call_invalid_rules) { // invalid_for_target
if (p->error_indicator) {
D(p->level--);
return NULL;
@ -12190,7 +12190,7 @@ yield_expr_rule(Parser *p)
return _res;
}
// arguments: args ','? &')' | incorrect_arguments
// arguments: args ','? &')' | invalid_arguments
static expr_ty
arguments_rule(Parser *p)
{
@ -12235,24 +12235,24 @@ arguments_rule(Parser *p)
D(fprintf(stderr, "%*c%s arguments[%d-%d]: %s failed!\n", p->level, ' ',
p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "args ','? &')'"));
}
{ // incorrect_arguments
if (p->call_invalid_rules) { // invalid_arguments
if (p->error_indicator) {
D(p->level--);
return NULL;
}
D(fprintf(stderr, "%*c> arguments[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "incorrect_arguments"));
void *incorrect_arguments_var;
D(fprintf(stderr, "%*c> arguments[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "invalid_arguments"));
void *invalid_arguments_var;
if (
(incorrect_arguments_var = incorrect_arguments_rule(p)) // incorrect_arguments
(invalid_arguments_var = invalid_arguments_rule(p)) // invalid_arguments
)
{
D(fprintf(stderr, "%*c+ arguments[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "incorrect_arguments"));
_res = incorrect_arguments_var;
D(fprintf(stderr, "%*c+ arguments[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "invalid_arguments"));
_res = invalid_arguments_var;
goto done;
}
p->mark = _mark;
D(fprintf(stderr, "%*c%s arguments[%d-%d]: %s failed!\n", p->level, ' ',
p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "incorrect_arguments"));
p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "invalid_arguments"));
}
_res = NULL;
done:
@ -12589,7 +12589,7 @@ kwarg_or_starred_rule(Parser *p)
D(fprintf(stderr, "%*c%s kwarg_or_starred[%d-%d]: %s failed!\n", p->level, ' ',
p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "starred_expression"));
}
{ // invalid_kwarg
if (p->call_invalid_rules) { // invalid_kwarg
if (p->error_indicator) {
D(p->level--);
return NULL;
@ -12709,7 +12709,7 @@ kwarg_or_double_starred_rule(Parser *p)
D(fprintf(stderr, "%*c%s kwarg_or_double_starred[%d-%d]: %s failed!\n", p->level, ' ',
p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'**' expression"));
}
{ // invalid_kwarg
if (p->call_invalid_rules) { // invalid_kwarg
if (p->error_indicator) {
D(p->level--);
return NULL;
@ -14421,14 +14421,14 @@ t_atom_rule(Parser *p)
return _res;
}
// incorrect_arguments:
// invalid_arguments:
// | args ',' '*'
// | expression for_if_clauses ',' [args | expression for_if_clauses]
// | args for_if_clauses
// | args ',' expression for_if_clauses
// | args ',' args
static void *
incorrect_arguments_rule(Parser *p)
invalid_arguments_rule(Parser *p)
{
D(p->level++);
if (p->error_indicator) {
@ -14442,7 +14442,7 @@ incorrect_arguments_rule(Parser *p)
D(p->level--);
return NULL;
}
D(fprintf(stderr, "%*c> incorrect_arguments[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "args ',' '*'"));
D(fprintf(stderr, "%*c> invalid_arguments[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "args ',' '*'"));
Token * _literal;
Token * _literal_1;
expr_ty args_var;
@ -14454,7 +14454,7 @@ incorrect_arguments_rule(Parser *p)
(_literal_1 = _PyPegen_expect_token(p, 16)) // token='*'
)
{
D(fprintf(stderr, "%*c+ incorrect_arguments[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "args ',' '*'"));
D(fprintf(stderr, "%*c+ invalid_arguments[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "args ',' '*'"));
_res = RAISE_SYNTAX_ERROR ( "iterable argument unpacking follows keyword argument unpacking" );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
@ -14464,7 +14464,7 @@ incorrect_arguments_rule(Parser *p)
goto done;
}
p->mark = _mark;
D(fprintf(stderr, "%*c%s incorrect_arguments[%d-%d]: %s failed!\n", p->level, ' ',
D(fprintf(stderr, "%*c%s invalid_arguments[%d-%d]: %s failed!\n", p->level, ' ',
p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "args ',' '*'"));
}
{ // expression for_if_clauses ',' [args | expression for_if_clauses]
@ -14472,7 +14472,7 @@ incorrect_arguments_rule(Parser *p)
D(p->level--);
return NULL;
}
D(fprintf(stderr, "%*c> incorrect_arguments[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "expression for_if_clauses ',' [args | expression for_if_clauses]"));
D(fprintf(stderr, "%*c> invalid_arguments[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "expression for_if_clauses ',' [args | expression for_if_clauses]"));
Token * _literal;
void *_opt_var;
UNUSED(_opt_var); // Silence compiler warnings
@ -14488,7 +14488,7 @@ incorrect_arguments_rule(Parser *p)
(_opt_var = _tmp_127_rule(p), 1) // [args | expression for_if_clauses]
)
{
D(fprintf(stderr, "%*c+ incorrect_arguments[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "expression for_if_clauses ',' [args | expression for_if_clauses]"));
D(fprintf(stderr, "%*c+ invalid_arguments[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "expression for_if_clauses ',' [args | expression for_if_clauses]"));
_res = RAISE_SYNTAX_ERROR_KNOWN_LOCATION ( a , "Generator expression must be parenthesized" );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
@ -14498,7 +14498,7 @@ incorrect_arguments_rule(Parser *p)
goto done;
}
p->mark = _mark;
D(fprintf(stderr, "%*c%s incorrect_arguments[%d-%d]: %s failed!\n", p->level, ' ',
D(fprintf(stderr, "%*c%s invalid_arguments[%d-%d]: %s failed!\n", p->level, ' ',
p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "expression for_if_clauses ',' [args | expression for_if_clauses]"));
}
{ // args for_if_clauses
@ -14506,7 +14506,7 @@ incorrect_arguments_rule(Parser *p)
D(p->level--);
return NULL;
}
D(fprintf(stderr, "%*c> incorrect_arguments[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "args for_if_clauses"));
D(fprintf(stderr, "%*c> invalid_arguments[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "args for_if_clauses"));
expr_ty a;
asdl_seq* for_if_clauses_var;
if (
@ -14515,7 +14515,7 @@ incorrect_arguments_rule(Parser *p)
(for_if_clauses_var = for_if_clauses_rule(p)) // for_if_clauses
)
{
D(fprintf(stderr, "%*c+ incorrect_arguments[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "args for_if_clauses"));
D(fprintf(stderr, "%*c+ invalid_arguments[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "args for_if_clauses"));
_res = _PyPegen_nonparen_genexp_in_call ( p , a );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
@ -14525,7 +14525,7 @@ incorrect_arguments_rule(Parser *p)
goto done;
}
p->mark = _mark;
D(fprintf(stderr, "%*c%s incorrect_arguments[%d-%d]: %s failed!\n", p->level, ' ',
D(fprintf(stderr, "%*c%s invalid_arguments[%d-%d]: %s failed!\n", p->level, ' ',
p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "args for_if_clauses"));
}
{ // args ',' expression for_if_clauses
@ -14533,7 +14533,7 @@ incorrect_arguments_rule(Parser *p)
D(p->level--);
return NULL;
}
D(fprintf(stderr, "%*c> incorrect_arguments[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "args ',' expression for_if_clauses"));
D(fprintf(stderr, "%*c> invalid_arguments[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "args ',' expression for_if_clauses"));
Token * _literal;
expr_ty a;
expr_ty args_var;
@ -14548,7 +14548,7 @@ incorrect_arguments_rule(Parser *p)
(for_if_clauses_var = for_if_clauses_rule(p)) // for_if_clauses
)
{
D(fprintf(stderr, "%*c+ incorrect_arguments[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "args ',' expression for_if_clauses"));
D(fprintf(stderr, "%*c+ invalid_arguments[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "args ',' expression for_if_clauses"));
_res = RAISE_SYNTAX_ERROR_KNOWN_LOCATION ( a , "Generator expression must be parenthesized" );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
@ -14558,7 +14558,7 @@ incorrect_arguments_rule(Parser *p)
goto done;
}
p->mark = _mark;
D(fprintf(stderr, "%*c%s incorrect_arguments[%d-%d]: %s failed!\n", p->level, ' ',
D(fprintf(stderr, "%*c%s invalid_arguments[%d-%d]: %s failed!\n", p->level, ' ',
p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "args ',' expression for_if_clauses"));
}
{ // args ',' args
@ -14566,7 +14566,7 @@ incorrect_arguments_rule(Parser *p)
D(p->level--);
return NULL;
}
D(fprintf(stderr, "%*c> incorrect_arguments[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "args ',' args"));
D(fprintf(stderr, "%*c> invalid_arguments[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "args ',' args"));
Token * _literal;
expr_ty a;
expr_ty args_var;
@ -14578,7 +14578,7 @@ incorrect_arguments_rule(Parser *p)
(args_var = args_rule(p)) // args
)
{
D(fprintf(stderr, "%*c+ incorrect_arguments[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "args ',' args"));
D(fprintf(stderr, "%*c+ invalid_arguments[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "args ',' args"));
_res = _PyPegen_arguments_parsing_error ( p , a );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
@ -14588,7 +14588,7 @@ incorrect_arguments_rule(Parser *p)
goto done;
}
p->mark = _mark;
D(fprintf(stderr, "%*c%s incorrect_arguments[%d-%d]: %s failed!\n", p->level, ' ',
D(fprintf(stderr, "%*c%s invalid_arguments[%d-%d]: %s failed!\n", p->level, ' ',
p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "args ',' args"));
}
_res = NULL;

View File

@ -1101,15 +1101,28 @@ _PyPegen_Parser_New(struct tok_state *tok, int start_rule, int flags,
p->feature_version = feature_version;
p->known_err_token = NULL;
p->level = 0;
p->call_invalid_rules = 0;
return p;
}
static void
reset_parser_state(Parser *p)
{
for (int i = 0; i < p->fill; i++) {
p->tokens[i]->memo = NULL;
}
p->mark = 0;
p->call_invalid_rules = 1;
}
void *
_PyPegen_run_parser(Parser *p)
{
void *res = _PyPegen_parse(p);
if (res == NULL) {
reset_parser_state(p);
_PyPegen_parse(p);
if (PyErr_Occurred()) {
return NULL;
}

View File

@ -73,6 +73,7 @@ typedef struct {
growable_comment_array type_ignore_comments;
Token *known_err_token;
int level;
int call_invalid_rules;
} Parser;
typedef struct {

View File

@ -730,7 +730,10 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
def visit_Alt(
self, node: Alt, is_loop: bool, is_gather: bool, rulename: Optional[str]
) -> None:
self.print(f"{{ // {node}")
if len(node.items) == 1 and str(node.items[0]).startswith('invalid_'):
self.print(f"if (p->call_invalid_rules) {{ // {node}")
else:
self.print(f"{{ // {node}")
with self.indent():
self._check_for_errors()
node_str = str(node).replace('"', '\\"')