Allow the parser to avoid nested processing of invalid rules (GH-31252)

This commit is contained in:
Pablo Galindo Salgado 2022-02-10 13:12:14 +00:00 committed by GitHub
parent 2cea8c29cf
commit 390459de6d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 1998 additions and 1997 deletions

View File

@ -1078,6 +1078,7 @@ invalid_kwarg:
RAISE_SYNTAX_ERROR_KNOWN_RANGE(
a, b, "expression cannot contain assignment, perhaps you meant \"==\"?") }
# IMPORTANT: Note that the "_without_invalid" suffix causes the rule to not call invalid rules under it
expression_without_invalid[expr_ty]:
| a=disjunction 'if' b=disjunction 'else' c=expression { _PyAST_IfExp(b, a, c, EXTRA) }
| disjunction
@ -1095,16 +1096,14 @@ invalid_expression:
RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "invalid syntax. Perhaps you forgot a comma?") }
| a=disjunction 'if' b=disjunction !('else'|':') { RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "expected 'else' after 'if' expression") }
invalid_left_assignment_prefixes(memo): list|tuple|genexp|'True'|'None'|'False'
invalid_named_expression:
invalid_named_expression(memo):
| a=expression ':=' expression {
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(
a, "cannot use assignment expressions with %s", _PyPegen_get_expr_name(a)) }
| a=NAME '=' b=bitwise_or !('='|':=') {
p->in_raw_rule ? NULL : RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "invalid syntax. Maybe you meant '==' or ':=' instead of '='?") }
| !invalid_left_assignment_prefixes a=bitwise_or b='=' bitwise_or !('='|':=') {
p->in_raw_rule ? NULL : RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "cannot assign to %s here. Maybe you meant '==' instead of '='?",
RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "invalid syntax. Maybe you meant '==' or ':=' instead of '='?") }
| !(list|tuple|genexp|'True'|'None'|'False') a=bitwise_or b='=' bitwise_or !('='|':=') {
RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "cannot assign to %s here. Maybe you meant '==' instead of '='?",
_PyPegen_get_expr_name(a)) }
invalid_assignment:

View File

@ -231,7 +231,7 @@ class ExceptionTests(unittest.TestCase):
check('a = « hello » « world »', 1, 5)
check('[\nfile\nfor str(file)\nin\n[]\n]', 3, 5)
check('[file for\n str(file) in []]', 2, 2)
check("ages = {'Alice'=22, 'Bob'=23}", 1, 16)
check("ages = {'Alice'=22, 'Bob'=23}", 1, 9)
check('match ...:\n case {**rest, "key": value}:\n ...', 2, 19)
check("[a b c d e f]", 1, 2)
check("for x yfff:", 1, 7)

3962
Parser/parser.c generated

File diff suppressed because it is too large Load Diff

View File

@ -381,6 +381,7 @@ _PyPegen_expect_token(Parser *p, int type)
}
Token *t = p->tokens[p->mark];
if (t->type != type) {
if (Py_DebugFlag) fprintf(stderr, "Token = %s\n", PyBytes_AsString(t->bytes));
return NULL;
}
p->mark += 1;
@ -785,7 +786,6 @@ _PyPegen_Parser_New(struct tok_state *tok, int start_rule, int flags,
p->known_err_token = NULL;
p->level = 0;
p->call_invalid_rules = 0;
p->in_raw_rule = 0;
return p;
}

View File

@ -78,7 +78,6 @@ typedef struct {
Token *known_err_token;
int level;
int call_invalid_rules;
int in_raw_rule;
} Parser;
typedef struct {

View File

@ -122,6 +122,7 @@ class CCallMakerVisitor(GrammarVisitor):
self.exact_tokens = exact_tokens
self.non_exact_tokens = non_exact_tokens
self.cache: Dict[Any, FunctionCall] = {}
self.cleanup_statements: List[str] = []
def keyword_helper(self, keyword: str) -> FunctionCall:
return FunctionCall(
@ -364,6 +365,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
self._varname_counter = 0
self.debug = debug
self.skip_actions = skip_actions
self.cleanup_statements: List[str] = []
def add_level(self) -> None:
self.print("if (p->level++ == MAXSTACK) {")
@ -376,6 +378,8 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
self.print("p->level--;")
def add_return(self, ret_val: str) -> None:
for stmt in self.cleanup_statements:
self.print(stmt)
self.remove_level()
self.print(f"return {ret_val};")
@ -547,9 +551,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
f"_PyPegen_update_memo(p, _mark, {node.name}_type, _res)", "_res"
)
self.print("p->mark = _mark;")
self.print("p->in_raw_rule++;")
self.print(f"void *_raw = {node.name}_raw(p);")
self.print("p->in_raw_rule--;")
self.print("if (p->error_indicator) {")
with self.indent():
self.add_return("NULL")
@ -663,10 +665,21 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
self._set_up_rule_memoization(node, result_type)
self.print("{")
if node.name.endswith("without_invalid"):
with self.indent():
self.print("int _prev_call_invalid = p->call_invalid_rules;")
self.print("p->call_invalid_rules = 0;")
self.cleanup_statements.append("p->call_invalid_rules = _prev_call_invalid;")
if is_loop:
self._handle_loop_rule_body(node, rhs)
else:
self._handle_default_rule_body(node, rhs, result_type)
if node.name.endswith("without_invalid"):
self.cleanup_statements.pop()
self.print("}")
def visit_NamedItem(self, node: NamedItem) -> None: