mirror of https://github.com/python/cpython
gh-109596: Ensure repeated rules in the grammar are not allowed and fix incorrect soft keywords (#109606)
This commit is contained in:
parent
7c55399172
commit
b28ffaa193
|
@ -19,8 +19,6 @@ _PyPegen_parse(Parser *p)
|
|||
result = eval_rule(p);
|
||||
} else if (p->start_rule == Py_func_type_input) {
|
||||
result = func_type_rule(p);
|
||||
} else if (p->start_rule == Py_fstring_input) {
|
||||
result = fstring_rule(p);
|
||||
}
|
||||
|
||||
return result;
|
||||
|
@ -89,7 +87,6 @@ file[mod_ty]: a=[statements] ENDMARKER { _PyPegen_make_module(p, a) }
|
|||
interactive[mod_ty]: a=statement_newline { _PyAST_Interactive(a, p->arena) }
|
||||
eval[mod_ty]: a=expressions NEWLINE* ENDMARKER { _PyAST_Expression(a, p->arena) }
|
||||
func_type[mod_ty]: '(' a=[type_expressions] ')' '->' b=expression NEWLINE* ENDMARKER { _PyAST_FunctionType(a, b, p->arena) }
|
||||
fstring[expr_ty]: star_expressions
|
||||
|
||||
# GENERAL STATEMENTS
|
||||
# ==================
|
||||
|
@ -647,20 +644,20 @@ type_param_seq[asdl_type_param_seq*]: a[asdl_type_param_seq*]=','.type_param+ ['
|
|||
|
||||
type_param[type_param_ty] (memo):
|
||||
| a=NAME b=[type_param_bound] { _PyAST_TypeVar(a->v.Name.id, b, EXTRA) }
|
||||
| '*' a=NAME colon=":" e=expression {
|
||||
| '*' a=NAME colon=':' e=expression {
|
||||
RAISE_SYNTAX_ERROR_STARTING_FROM(colon, e->kind == Tuple_kind
|
||||
? "cannot use constraints with TypeVarTuple"
|
||||
: "cannot use bound with TypeVarTuple")
|
||||
}
|
||||
| '*' a=NAME { _PyAST_TypeVarTuple(a->v.Name.id, EXTRA) }
|
||||
| '**' a=NAME colon=":" e=expression {
|
||||
| '**' a=NAME colon=':' e=expression {
|
||||
RAISE_SYNTAX_ERROR_STARTING_FROM(colon, e->kind == Tuple_kind
|
||||
? "cannot use constraints with ParamSpec"
|
||||
: "cannot use bound with ParamSpec")
|
||||
}
|
||||
| '**' a=NAME { _PyAST_ParamSpec(a->v.Name.id, EXTRA) }
|
||||
|
||||
type_param_bound[expr_ty]: ":" e=expression { e }
|
||||
type_param_bound[expr_ty]: ':' e=expression { e }
|
||||
|
||||
# EXPRESSIONS
|
||||
# -----------
|
||||
|
@ -915,7 +912,7 @@ fstring_middle[expr_ty]:
|
|||
| fstring_replacement_field
|
||||
| t=FSTRING_MIDDLE { _PyPegen_constant_from_token(p, t) }
|
||||
fstring_replacement_field[expr_ty]:
|
||||
| '{' a=(yield_expr | star_expressions) debug_expr="="? conversion=[fstring_conversion] format=[fstring_full_format_spec] rbrace='}' {
|
||||
| '{' a=(yield_expr | star_expressions) debug_expr='='? conversion=[fstring_conversion] format=[fstring_full_format_spec] rbrace='}' {
|
||||
_PyPegen_formatted_value(p, a, debug_expr, conversion, format, rbrace, EXTRA) }
|
||||
| invalid_replacement_field
|
||||
fstring_conversion[ResultTokenWithMetadata*]:
|
||||
|
|
|
@ -10,9 +10,6 @@ extern "C" {
|
|||
#define Py_eval_input 258
|
||||
#define Py_func_type_input 345
|
||||
|
||||
/* This doesn't need to match anything */
|
||||
#define Py_fstring_input 800
|
||||
|
||||
#ifndef Py_LIMITED_API
|
||||
# define Py_CPYTHON_COMPILE_H
|
||||
# include "cpython/compile.h"
|
||||
|
|
|
@ -42,6 +42,15 @@ class TestPegen(unittest.TestCase):
|
|||
)
|
||||
self.assertEqual(repr(rules["term"]), expected_repr)
|
||||
|
||||
def test_repeated_rules(self) -> None:
|
||||
grammar_source = """
|
||||
start: the_rule NEWLINE
|
||||
the_rule: 'b' NEWLINE
|
||||
the_rule: 'a' NEWLINE
|
||||
"""
|
||||
with self.assertRaisesRegex(GrammarError, "Repeated rule 'the_rule'"):
|
||||
parse_string(grammar_source, GrammarParser)
|
||||
|
||||
def test_long_rule_str(self) -> None:
|
||||
grammar_source = """
|
||||
start: zero | one | one zero | one one | one zero zero | one zero one | one one zero | one one one
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
Fix some tokens in the grammar that were incorrectly marked as soft
|
||||
keywords. Also fix some repeated rule names and ensure that repeated rules
|
||||
are not allowed. Patch by Pablo Galindo
|
File diff suppressed because it is too large
Load Diff
|
@ -310,21 +310,6 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
|
|||
end_col_offset = p->tok->cur - p->tok->line_start;
|
||||
}
|
||||
|
||||
if (p->start_rule == Py_fstring_input) {
|
||||
const char *fstring_msg = "f-string: ";
|
||||
Py_ssize_t len = strlen(fstring_msg) + strlen(errmsg);
|
||||
|
||||
char *new_errmsg = PyMem_Malloc(len + 1); // Lengths of both strings plus NULL character
|
||||
if (!new_errmsg) {
|
||||
return (void *) PyErr_NoMemory();
|
||||
}
|
||||
|
||||
// Copy both strings into new buffer
|
||||
memcpy(new_errmsg, fstring_msg, strlen(fstring_msg));
|
||||
memcpy(new_errmsg + strlen(fstring_msg), errmsg, strlen(errmsg));
|
||||
new_errmsg[len] = 0;
|
||||
errmsg = new_errmsg;
|
||||
}
|
||||
errstr = PyUnicode_FromFormatV(errmsg, va);
|
||||
if (!errstr) {
|
||||
goto error;
|
||||
|
@ -363,11 +348,6 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
|
|||
}
|
||||
}
|
||||
|
||||
if (p->start_rule == Py_fstring_input) {
|
||||
col_offset -= p->starting_col_offset;
|
||||
end_col_offset -= p->starting_col_offset;
|
||||
}
|
||||
|
||||
Py_ssize_t col_number = col_offset;
|
||||
Py_ssize_t end_col_number = end_col_offset;
|
||||
|
||||
|
@ -398,17 +378,11 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
|
|||
|
||||
Py_DECREF(errstr);
|
||||
Py_DECREF(value);
|
||||
if (p->start_rule == Py_fstring_input) {
|
||||
PyMem_Free((void *)errmsg);
|
||||
}
|
||||
return NULL;
|
||||
|
||||
error:
|
||||
Py_XDECREF(errstr);
|
||||
Py_XDECREF(error_line);
|
||||
if (p->start_rule == Py_fstring_input) {
|
||||
PyMem_Free((void *)errmsg);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
|
|
@ -35,7 +35,13 @@ class GrammarVisitor:
|
|||
|
||||
class Grammar:
|
||||
def __init__(self, rules: Iterable[Rule], metas: Iterable[Tuple[str, Optional[str]]]):
|
||||
self.rules = {rule.name: rule for rule in rules}
|
||||
# Check if there are repeated rules in "rules"
|
||||
all_rules = {}
|
||||
for rule in rules:
|
||||
if rule.name in all_rules:
|
||||
raise GrammarError(f"Repeated rule {rule.name!r}")
|
||||
all_rules[rule.name] = rule
|
||||
self.rules = all_rules
|
||||
self.metas = dict(metas)
|
||||
|
||||
def __str__(self) -> str:
|
||||
|
|
Loading…
Reference in New Issue