Allow the parser to avoid nested processing of invalid rules (GH-31252)

2022-02-10 13:12:14 +00:00 · 2022-02-10 13:12:14 +00:00 · 390459de6d
parent 2cea8c29cf
commit 390459de6d
6 changed files with 1998 additions and 1997 deletions
--- a/Grammar/python.gram
+++ b/Grammar/python.gram
@ -1078,6 +1078,7 @@ invalid_kwarg:
        RAISE_SYNTAX_ERROR_KNOWN_RANGE(
            a, b, "expression cannot contain assignment, perhaps you meant \"==\"?") }

+# IMPORTANT: Note that the "_without_invalid" suffix causes the rule to not call invalid rules under it
 expression_without_invalid[expr_ty]:
    | a=disjunction 'if' b=disjunction 'else' c=expression { _PyAST_IfExp(b, a, c, EXTRA) }
    | disjunction
@ -1095,16 +1096,14 @@ invalid_expression:
        RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "invalid syntax. Perhaps you forgot a comma?") }
   | a=disjunction 'if' b=disjunction !('else'|':') { RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "expected 'else' after 'if' expression") }

-invalid_left_assignment_prefixes(memo): list|tuple|genexp|'True'|'None'|'False'
-
-invalid_named_expression:
+invalid_named_expression(memo):
    | a=expression ':=' expression {
        RAISE_SYNTAX_ERROR_KNOWN_LOCATION(
            a, "cannot use assignment expressions with %s", _PyPegen_get_expr_name(a)) }
    | a=NAME '=' b=bitwise_or !('='|':=') {
-        p->in_raw_rule ? NULL : RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "invalid syntax. Maybe you meant '==' or ':=' instead of '='?") }
-    | !invalid_left_assignment_prefixes a=bitwise_or b='=' bitwise_or !('='|':=') {
-        p->in_raw_rule ? NULL : RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "cannot assign to %s here. Maybe you meant '==' instead of '='?",
+        RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "invalid syntax. Maybe you meant '==' or ':=' instead of '='?") }
+    | !(list|tuple|genexp|'True'|'None'|'False') a=bitwise_or b='=' bitwise_or !('='|':=') {
+        RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "cannot assign to %s here. Maybe you meant '==' instead of '='?",
                                          _PyPegen_get_expr_name(a)) }

 invalid_assignment:
--- a/Lib/test/test_exceptions.py
+++ b/Lib/test/test_exceptions.py
@ -231,7 +231,7 @@ class ExceptionTests(unittest.TestCase):
        check('a = « hello » « world »', 1, 5)
        check('[\nfile\nfor str(file)\nin\n[]\n]', 3, 5)
        check('[file for\n str(file) in []]', 2, 2)
-        check("ages = {'Alice'=22, 'Bob'=23}", 1, 16)
+        check("ages = {'Alice'=22, 'Bob'=23}", 1, 9)
        check('match ...:\n    case {**rest, "key": value}:\n        ...', 2, 19)
        check("[a b c d e f]", 1, 2)
        check("for x yfff:", 1, 7)
--- a/Parser/parser.c
+++ b/Parser/parser.c
--- a/Parser/pegen.c
+++ b/Parser/pegen.c
@ -381,6 +381,7 @@ _PyPegen_expect_token(Parser *p, int type)
    }
    Token *t = p->tokens[p->mark];
    if (t->type != type) {
+        if (Py_DebugFlag) fprintf(stderr, "Token = %s\n", PyBytes_AsString(t->bytes));
        return NULL;
    }
    p->mark += 1;
@ -785,7 +786,6 @@ _PyPegen_Parser_New(struct tok_state *tok, int start_rule, int flags,
    p->known_err_token = NULL;
    p->level = 0;
    p->call_invalid_rules = 0;
-    p->in_raw_rule = 0;
    return p;
 }

--- a/Parser/pegen.h
+++ b/Parser/pegen.h
@ -78,7 +78,6 @@ typedef struct {
    Token *known_err_token;
    int level;
    int call_invalid_rules;
-    int in_raw_rule;
 } Parser;

 typedef struct {
--- a/Tools/peg_generator/pegen/c_generator.py
+++ b/Tools/peg_generator/pegen/c_generator.py
@ -122,6 +122,7 @@ class CCallMakerVisitor(GrammarVisitor):
        self.exact_tokens = exact_tokens
        self.non_exact_tokens = non_exact_tokens
        self.cache: Dict[Any, FunctionCall] = {}
+        self.cleanup_statements: List[str] = []

    def keyword_helper(self, keyword: str) -> FunctionCall:
        return FunctionCall(
@ -364,6 +365,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
        self._varname_counter = 0
        self.debug = debug
        self.skip_actions = skip_actions
+        self.cleanup_statements: List[str] = []

    def add_level(self) -> None:
        self.print("if (p->level++ == MAXSTACK) {")
@ -376,6 +378,8 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
        self.print("p->level--;")

    def add_return(self, ret_val: str) -> None:
+        for stmt in self.cleanup_statements:
+            self.print(stmt)
        self.remove_level()
        self.print(f"return {ret_val};")

@ -547,9 +551,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
                    f"_PyPegen_update_memo(p, _mark, {node.name}_type, _res)", "_res"
                )
                self.print("p->mark = _mark;")
-                self.print("p->in_raw_rule++;")
                self.print(f"void *_raw = {node.name}_raw(p);")
-                self.print("p->in_raw_rule--;")
                self.print("if (p->error_indicator) {")
                with self.indent():
                    self.add_return("NULL")
@ -663,10 +665,21 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
            self._set_up_rule_memoization(node, result_type)

        self.print("{")
+
+        if node.name.endswith("without_invalid"):
+            with self.indent():
+                self.print("int _prev_call_invalid = p->call_invalid_rules;")
+                self.print("p->call_invalid_rules = 0;")
+                self.cleanup_statements.append("p->call_invalid_rules = _prev_call_invalid;")
+
        if is_loop:
            self._handle_loop_rule_body(node, rhs)
        else:
            self._handle_default_rule_body(node, rhs, result_type)
+
+        if node.name.endswith("without_invalid"):
+            self.cleanup_statements.pop()
+
        self.print("}")

    def visit_NamedItem(self, node: NamedItem) -> None: