gh-109596: Ensure repeated rules in the grammar are not allowed and fix incorrect soft keywords (#109606)

This commit is contained in:
Pablo Galindo Salgado 2023-09-22 19:03:23 +01:00 committed by GitHub
parent 7c55399172
commit b28ffaa193
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 1781 additions and 1797 deletions

View File

@ -19,8 +19,6 @@ _PyPegen_parse(Parser *p)
result = eval_rule(p);
} else if (p->start_rule == Py_func_type_input) {
result = func_type_rule(p);
} else if (p->start_rule == Py_fstring_input) {
result = fstring_rule(p);
}
return result;
@ -89,7 +87,6 @@ file[mod_ty]: a=[statements] ENDMARKER { _PyPegen_make_module(p, a) }
interactive[mod_ty]: a=statement_newline { _PyAST_Interactive(a, p->arena) }
eval[mod_ty]: a=expressions NEWLINE* ENDMARKER { _PyAST_Expression(a, p->arena) }
func_type[mod_ty]: '(' a=[type_expressions] ')' '->' b=expression NEWLINE* ENDMARKER { _PyAST_FunctionType(a, b, p->arena) }
fstring[expr_ty]: star_expressions
# GENERAL STATEMENTS
# ==================
@ -647,20 +644,20 @@ type_param_seq[asdl_type_param_seq*]: a[asdl_type_param_seq*]=','.type_param+ ['
type_param[type_param_ty] (memo):
| a=NAME b=[type_param_bound] { _PyAST_TypeVar(a->v.Name.id, b, EXTRA) }
| '*' a=NAME colon=":" e=expression {
| '*' a=NAME colon=':' e=expression {
RAISE_SYNTAX_ERROR_STARTING_FROM(colon, e->kind == Tuple_kind
? "cannot use constraints with TypeVarTuple"
: "cannot use bound with TypeVarTuple")
}
| '*' a=NAME { _PyAST_TypeVarTuple(a->v.Name.id, EXTRA) }
| '**' a=NAME colon=":" e=expression {
| '**' a=NAME colon=':' e=expression {
RAISE_SYNTAX_ERROR_STARTING_FROM(colon, e->kind == Tuple_kind
? "cannot use constraints with ParamSpec"
: "cannot use bound with ParamSpec")
}
| '**' a=NAME { _PyAST_ParamSpec(a->v.Name.id, EXTRA) }
type_param_bound[expr_ty]: ":" e=expression { e }
type_param_bound[expr_ty]: ':' e=expression { e }
# EXPRESSIONS
# -----------
@ -915,7 +912,7 @@ fstring_middle[expr_ty]:
| fstring_replacement_field
| t=FSTRING_MIDDLE { _PyPegen_constant_from_token(p, t) }
fstring_replacement_field[expr_ty]:
| '{' a=(yield_expr | star_expressions) debug_expr="="? conversion=[fstring_conversion] format=[fstring_full_format_spec] rbrace='}' {
| '{' a=(yield_expr | star_expressions) debug_expr='='? conversion=[fstring_conversion] format=[fstring_full_format_spec] rbrace='}' {
_PyPegen_formatted_value(p, a, debug_expr, conversion, format, rbrace, EXTRA) }
| invalid_replacement_field
fstring_conversion[ResultTokenWithMetadata*]:

View File

@ -10,9 +10,6 @@ extern "C" {
#define Py_eval_input 258
#define Py_func_type_input 345
/* This doesn't need to match anything */
#define Py_fstring_input 800
#ifndef Py_LIMITED_API
# define Py_CPYTHON_COMPILE_H
# include "cpython/compile.h"

View File

@ -42,6 +42,15 @@ class TestPegen(unittest.TestCase):
)
self.assertEqual(repr(rules["term"]), expected_repr)
def test_repeated_rules(self) -> None:
grammar_source = """
start: the_rule NEWLINE
the_rule: 'b' NEWLINE
the_rule: 'a' NEWLINE
"""
with self.assertRaisesRegex(GrammarError, "Repeated rule 'the_rule'"):
parse_string(grammar_source, GrammarParser)
def test_long_rule_str(self) -> None:
grammar_source = """
start: zero | one | one zero | one one | one zero zero | one zero one | one one zero | one one one

View File

@ -0,0 +1,3 @@
Fix some tokens in the grammar that were incorrectly marked as soft
keywords. Also fix some repeated rule names and ensure that repeated rules
are not allowed. Patch by Pablo Galindo

3518
Parser/parser.c generated

File diff suppressed because it is too large Load Diff

View File

@ -310,21 +310,6 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
end_col_offset = p->tok->cur - p->tok->line_start;
}
if (p->start_rule == Py_fstring_input) {
const char *fstring_msg = "f-string: ";
Py_ssize_t len = strlen(fstring_msg) + strlen(errmsg);
char *new_errmsg = PyMem_Malloc(len + 1); // Lengths of both strings plus NULL character
if (!new_errmsg) {
return (void *) PyErr_NoMemory();
}
// Copy both strings into new buffer
memcpy(new_errmsg, fstring_msg, strlen(fstring_msg));
memcpy(new_errmsg + strlen(fstring_msg), errmsg, strlen(errmsg));
new_errmsg[len] = 0;
errmsg = new_errmsg;
}
errstr = PyUnicode_FromFormatV(errmsg, va);
if (!errstr) {
goto error;
@ -363,11 +348,6 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
}
}
if (p->start_rule == Py_fstring_input) {
col_offset -= p->starting_col_offset;
end_col_offset -= p->starting_col_offset;
}
Py_ssize_t col_number = col_offset;
Py_ssize_t end_col_number = end_col_offset;
@ -398,17 +378,11 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
Py_DECREF(errstr);
Py_DECREF(value);
if (p->start_rule == Py_fstring_input) {
PyMem_Free((void *)errmsg);
}
return NULL;
error:
Py_XDECREF(errstr);
Py_XDECREF(error_line);
if (p->start_rule == Py_fstring_input) {
PyMem_Free((void *)errmsg);
}
return NULL;
}

View File

@ -35,7 +35,13 @@ class GrammarVisitor:
class Grammar:
def __init__(self, rules: Iterable[Rule], metas: Iterable[Tuple[str, Optional[str]]]):
self.rules = {rule.name: rule for rule in rules}
# Check if there are repeated rules in "rules"
all_rules = {}
for rule in rules:
if rule.name in all_rules:
raise GrammarError(f"Repeated rule {rule.name!r}")
all_rules[rule.name] = rule
self.rules = all_rules
self.metas = dict(metas)
def __str__(self) -> str: