gh-118235: Move RAISE_SYNTAX_ERROR actions to invalid rules and make sure they stay there (GH-119731)

The Full Grammar specification in the docs omits rule actions, so grammar rules that raise a syntax error looked like valid syntax.
This was solved in ef940de by hiding those rules in the custom syntax highlighter.

This moves all syntax-error alternatives to invalid rules, adds a validator that ensures that actions containing RAISE_SYNTAX_ERROR are in invalid rules, and reverts the syntax highlighter hack.
This commit is contained in:
Petr Viktorin 2024-05-30 09:27:32 +02:00 committed by GitHub
parent a5fef800d3
commit 48f21b3631
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 2021 additions and 1871 deletions

View File

@ -16,7 +16,6 @@ class PEGLexer(RegexLexer):
- Rule types
- Rule options
- Rules named `invalid_*` or `incorrect_*`
- Rules with `RAISE_SYNTAX_ERROR`
"""
name = "PEG"
@ -60,7 +59,6 @@ class PEGLexer(RegexLexer):
(r"^(\s+\|\s+.*invalid_\w+.*\n)", bygroups(None)),
(r"^(\s+\|\s+.*incorrect_\w+.*\n)", bygroups(None)),
(r"^(#.*invalid syntax.*(?:.|\n)*)", bygroups(None),),
(r"^(\s+\|\s+.*\{[^}]*RAISE_SYNTAX_ERROR[^}]*\})\n", bygroups(None)),
],
"root": [
include("invalids"),

View File

@ -650,17 +650,8 @@ type_param_seq[asdl_type_param_seq*]: a[asdl_type_param_seq*]=','.type_param+ ['
type_param[type_param_ty] (memo):
| a=NAME b=[type_param_bound] c=[type_param_default] { _PyAST_TypeVar(a->v.Name.id, b, c, EXTRA) }
| '*' a=NAME colon=':' e=expression {
RAISE_SYNTAX_ERROR_STARTING_FROM(colon, e->kind == Tuple_kind
? "cannot use constraints with TypeVarTuple"
: "cannot use bound with TypeVarTuple")
}
| invalid_type_param
| '*' a=NAME b=[type_param_starred_default] { _PyAST_TypeVarTuple(a->v.Name.id, b, EXTRA) }
| '**' a=NAME colon=':' e=expression {
RAISE_SYNTAX_ERROR_STARTING_FROM(colon, e->kind == Tuple_kind
? "cannot use constraints with ParamSpec"
: "cannot use bound with ParamSpec")
}
| '**' a=NAME b=[type_param_default] { _PyAST_ParamSpec(a->v.Name.id, b, EXTRA) }
type_param_bound[expr_ty]: ':' e=expression { e }
@ -979,8 +970,7 @@ for_if_clause[comprehension_ty]:
CHECK_VERSION(comprehension_ty, 6, "Async comprehensions are", _PyAST_comprehension(a, b, c, 1, p->arena)) }
| 'for' a=star_targets 'in' ~ b=disjunction c[asdl_expr_seq*]=('if' z=disjunction { z })* {
_PyAST_comprehension(a, b, c, 0, p->arena) }
| 'async'? 'for' (bitwise_or (',' bitwise_or)* [',']) !'in' {
RAISE_SYNTAX_ERROR("'in' expected after for-loop variables") }
| invalid_for_if_clause
| invalid_for_target
listcomp[expr_ty]:
@ -1020,9 +1010,9 @@ kwargs[asdl_seq*]:
| ','.kwarg_or_double_starred+
starred_expression[expr_ty]:
| invalid_starred_expression
| invalid_starred_expression_unpacking
| '*' a=expression { _PyAST_Starred(a, Load, EXTRA) }
| '*' { RAISE_SYNTAX_ERROR("Invalid star expression") }
| invalid_starred_expression
kwarg_or_starred[KeywordOrStarred*]:
| invalid_kwarg
@ -1176,6 +1166,18 @@ invalid_legacy_expression:
_PyPegen_check_legacy_stmt(p, a) ? RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b,
"Missing parentheses in call to '%U'. Did you mean %U(...)?", a->v.Name.id, a->v.Name.id) : NULL}
invalid_type_param:
| '*' a=NAME colon=':' e=expression {
RAISE_SYNTAX_ERROR_STARTING_FROM(colon, e->kind == Tuple_kind
? "cannot use constraints with TypeVarTuple"
: "cannot use bound with TypeVarTuple")
}
| '**' a=NAME colon=':' e=expression {
RAISE_SYNTAX_ERROR_STARTING_FROM(colon, e->kind == Tuple_kind
? "cannot use constraints with ParamSpec"
: "cannot use bound with ParamSpec")
}
invalid_expression:
# !(NAME STRING) is not matched so we don't show this error with some invalid string prefixes like: kf"dsfsdf"
# Soft keywords need to also be ignored because they can be parsed as NAME NAME
@ -1296,6 +1298,10 @@ invalid_with_item:
| expression 'as' a=expression &(',' | ')' | ':') {
RAISE_SYNTAX_ERROR_INVALID_TARGET(STAR_TARGETS, a) }
invalid_for_if_clause:
| 'async'? 'for' (bitwise_or (',' bitwise_or)* [',']) !'in' {
RAISE_SYNTAX_ERROR("'in' expected after for-loop variables") }
invalid_for_target:
| 'async'? 'for' a=star_expressions {
RAISE_SYNTAX_ERROR_INVALID_TARGET(FOR_TARGETS, a) }
@ -1409,8 +1415,10 @@ invalid_kvpair:
RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, a->lineno, a->end_col_offset - 1, a->end_lineno, -1, "':' expected after dictionary key") }
| expression ':' a='*' bitwise_or { RAISE_SYNTAX_ERROR_STARTING_FROM(a, "cannot use a starred expression in a dictionary value") }
| expression a=':' &('}'|',') {RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "expression expected after dictionary key and ':'") }
invalid_starred_expression:
invalid_starred_expression_unpacking:
| a='*' expression '=' b=expression { RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "cannot assign to iterable argument unpacking") }
invalid_starred_expression:
| '*' { RAISE_SYNTAX_ERROR("Invalid star expression") }
invalid_replacement_field:
| '{' a='=' { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "f-string: valid expression required before '='") }

View File

@ -4,7 +4,7 @@ from test import test_tools
test_tools.skip_if_missing("peg_generator")
with test_tools.imports_under_tool("peg_generator"):
from pegen.grammar_parser import GeneratedParser as GrammarParser
from pegen.validator import SubRuleValidator, ValidationError
from pegen.validator import SubRuleValidator, ValidationError, RaiseRuleValidator
from pegen.testutil import parse_string
from pegen.grammar import Grammar
@ -49,3 +49,13 @@ class TestPegen(unittest.TestCase):
with self.assertRaises(ValidationError):
for rule_name, rule in grammar.rules.items():
validator.validate_rule(rule_name, rule)
def test_raising_valid_rule(self) -> None:
grammar_source = """
start: NAME { RAISE_SYNTAX_ERROR("this is not allowed") }
"""
grammar: Grammar = parse_string(grammar_source, GrammarParser)
validator = RaiseRuleValidator(grammar)
with self.assertRaises(ValidationError):
for rule_name, rule in grammar.rules.items():
validator.validate_rule(rule_name, rule)

3828
Parser/parser.c generated

File diff suppressed because it is too large Load Diff

View File

@ -34,6 +34,18 @@ class SubRuleValidator(GrammarValidator):
)
class RaiseRuleValidator(GrammarValidator):
def visit_Alt(self, node: Alt) -> None:
if self.rulename and self.rulename.startswith('invalid'):
# raising is allowed in invalid rules
return
if node.action and 'RAISE_SYNTAX_ERROR' in node.action:
raise ValidationError(
f"In {self.rulename!r} there is an alternative that contains "
f"RAISE_SYNTAX_ERROR; this is only allowed in invalid_ rules"
)
def validate_grammar(the_grammar: grammar.Grammar) -> None:
for validator_cls in GrammarValidator.__subclasses__():
validator = validator_cls(the_grammar)