mirror of https://github.com/python/cpython
gh-109596: Ensure repeated rules in the grammar are not allowed and fix incorrect soft keywords (#109606)
This commit is contained in:
parent
7c55399172
commit
b28ffaa193
|
@ -19,8 +19,6 @@ _PyPegen_parse(Parser *p)
|
||||||
result = eval_rule(p);
|
result = eval_rule(p);
|
||||||
} else if (p->start_rule == Py_func_type_input) {
|
} else if (p->start_rule == Py_func_type_input) {
|
||||||
result = func_type_rule(p);
|
result = func_type_rule(p);
|
||||||
} else if (p->start_rule == Py_fstring_input) {
|
|
||||||
result = fstring_rule(p);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
|
@ -89,7 +87,6 @@ file[mod_ty]: a=[statements] ENDMARKER { _PyPegen_make_module(p, a) }
|
||||||
interactive[mod_ty]: a=statement_newline { _PyAST_Interactive(a, p->arena) }
|
interactive[mod_ty]: a=statement_newline { _PyAST_Interactive(a, p->arena) }
|
||||||
eval[mod_ty]: a=expressions NEWLINE* ENDMARKER { _PyAST_Expression(a, p->arena) }
|
eval[mod_ty]: a=expressions NEWLINE* ENDMARKER { _PyAST_Expression(a, p->arena) }
|
||||||
func_type[mod_ty]: '(' a=[type_expressions] ')' '->' b=expression NEWLINE* ENDMARKER { _PyAST_FunctionType(a, b, p->arena) }
|
func_type[mod_ty]: '(' a=[type_expressions] ')' '->' b=expression NEWLINE* ENDMARKER { _PyAST_FunctionType(a, b, p->arena) }
|
||||||
fstring[expr_ty]: star_expressions
|
|
||||||
|
|
||||||
# GENERAL STATEMENTS
|
# GENERAL STATEMENTS
|
||||||
# ==================
|
# ==================
|
||||||
|
@ -647,20 +644,20 @@ type_param_seq[asdl_type_param_seq*]: a[asdl_type_param_seq*]=','.type_param+ ['
|
||||||
|
|
||||||
type_param[type_param_ty] (memo):
|
type_param[type_param_ty] (memo):
|
||||||
| a=NAME b=[type_param_bound] { _PyAST_TypeVar(a->v.Name.id, b, EXTRA) }
|
| a=NAME b=[type_param_bound] { _PyAST_TypeVar(a->v.Name.id, b, EXTRA) }
|
||||||
| '*' a=NAME colon=":" e=expression {
|
| '*' a=NAME colon=':' e=expression {
|
||||||
RAISE_SYNTAX_ERROR_STARTING_FROM(colon, e->kind == Tuple_kind
|
RAISE_SYNTAX_ERROR_STARTING_FROM(colon, e->kind == Tuple_kind
|
||||||
? "cannot use constraints with TypeVarTuple"
|
? "cannot use constraints with TypeVarTuple"
|
||||||
: "cannot use bound with TypeVarTuple")
|
: "cannot use bound with TypeVarTuple")
|
||||||
}
|
}
|
||||||
| '*' a=NAME { _PyAST_TypeVarTuple(a->v.Name.id, EXTRA) }
|
| '*' a=NAME { _PyAST_TypeVarTuple(a->v.Name.id, EXTRA) }
|
||||||
| '**' a=NAME colon=":" e=expression {
|
| '**' a=NAME colon=':' e=expression {
|
||||||
RAISE_SYNTAX_ERROR_STARTING_FROM(colon, e->kind == Tuple_kind
|
RAISE_SYNTAX_ERROR_STARTING_FROM(colon, e->kind == Tuple_kind
|
||||||
? "cannot use constraints with ParamSpec"
|
? "cannot use constraints with ParamSpec"
|
||||||
: "cannot use bound with ParamSpec")
|
: "cannot use bound with ParamSpec")
|
||||||
}
|
}
|
||||||
| '**' a=NAME { _PyAST_ParamSpec(a->v.Name.id, EXTRA) }
|
| '**' a=NAME { _PyAST_ParamSpec(a->v.Name.id, EXTRA) }
|
||||||
|
|
||||||
type_param_bound[expr_ty]: ":" e=expression { e }
|
type_param_bound[expr_ty]: ':' e=expression { e }
|
||||||
|
|
||||||
# EXPRESSIONS
|
# EXPRESSIONS
|
||||||
# -----------
|
# -----------
|
||||||
|
@ -915,7 +912,7 @@ fstring_middle[expr_ty]:
|
||||||
| fstring_replacement_field
|
| fstring_replacement_field
|
||||||
| t=FSTRING_MIDDLE { _PyPegen_constant_from_token(p, t) }
|
| t=FSTRING_MIDDLE { _PyPegen_constant_from_token(p, t) }
|
||||||
fstring_replacement_field[expr_ty]:
|
fstring_replacement_field[expr_ty]:
|
||||||
| '{' a=(yield_expr | star_expressions) debug_expr="="? conversion=[fstring_conversion] format=[fstring_full_format_spec] rbrace='}' {
|
| '{' a=(yield_expr | star_expressions) debug_expr='='? conversion=[fstring_conversion] format=[fstring_full_format_spec] rbrace='}' {
|
||||||
_PyPegen_formatted_value(p, a, debug_expr, conversion, format, rbrace, EXTRA) }
|
_PyPegen_formatted_value(p, a, debug_expr, conversion, format, rbrace, EXTRA) }
|
||||||
| invalid_replacement_field
|
| invalid_replacement_field
|
||||||
fstring_conversion[ResultTokenWithMetadata*]:
|
fstring_conversion[ResultTokenWithMetadata*]:
|
||||||
|
|
|
@ -10,9 +10,6 @@ extern "C" {
|
||||||
#define Py_eval_input 258
|
#define Py_eval_input 258
|
||||||
#define Py_func_type_input 345
|
#define Py_func_type_input 345
|
||||||
|
|
||||||
/* This doesn't need to match anything */
|
|
||||||
#define Py_fstring_input 800
|
|
||||||
|
|
||||||
#ifndef Py_LIMITED_API
|
#ifndef Py_LIMITED_API
|
||||||
# define Py_CPYTHON_COMPILE_H
|
# define Py_CPYTHON_COMPILE_H
|
||||||
# include "cpython/compile.h"
|
# include "cpython/compile.h"
|
||||||
|
|
|
@ -42,6 +42,15 @@ class TestPegen(unittest.TestCase):
|
||||||
)
|
)
|
||||||
self.assertEqual(repr(rules["term"]), expected_repr)
|
self.assertEqual(repr(rules["term"]), expected_repr)
|
||||||
|
|
||||||
|
def test_repeated_rules(self) -> None:
|
||||||
|
grammar_source = """
|
||||||
|
start: the_rule NEWLINE
|
||||||
|
the_rule: 'b' NEWLINE
|
||||||
|
the_rule: 'a' NEWLINE
|
||||||
|
"""
|
||||||
|
with self.assertRaisesRegex(GrammarError, "Repeated rule 'the_rule'"):
|
||||||
|
parse_string(grammar_source, GrammarParser)
|
||||||
|
|
||||||
def test_long_rule_str(self) -> None:
|
def test_long_rule_str(self) -> None:
|
||||||
grammar_source = """
|
grammar_source = """
|
||||||
start: zero | one | one zero | one one | one zero zero | one zero one | one one zero | one one one
|
start: zero | one | one zero | one one | one zero zero | one zero one | one one zero | one one one
|
||||||
|
|
|
@ -0,0 +1,3 @@
|
||||||
|
Fix some tokens in the grammar that were incorrectly marked as soft
|
||||||
|
keywords. Also fix some repeated rule names and ensure that repeated rules
|
||||||
|
are not allowed. Patch by Pablo Galindo
|
File diff suppressed because it is too large
Load Diff
|
@ -310,21 +310,6 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
|
||||||
end_col_offset = p->tok->cur - p->tok->line_start;
|
end_col_offset = p->tok->cur - p->tok->line_start;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (p->start_rule == Py_fstring_input) {
|
|
||||||
const char *fstring_msg = "f-string: ";
|
|
||||||
Py_ssize_t len = strlen(fstring_msg) + strlen(errmsg);
|
|
||||||
|
|
||||||
char *new_errmsg = PyMem_Malloc(len + 1); // Lengths of both strings plus NULL character
|
|
||||||
if (!new_errmsg) {
|
|
||||||
return (void *) PyErr_NoMemory();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Copy both strings into new buffer
|
|
||||||
memcpy(new_errmsg, fstring_msg, strlen(fstring_msg));
|
|
||||||
memcpy(new_errmsg + strlen(fstring_msg), errmsg, strlen(errmsg));
|
|
||||||
new_errmsg[len] = 0;
|
|
||||||
errmsg = new_errmsg;
|
|
||||||
}
|
|
||||||
errstr = PyUnicode_FromFormatV(errmsg, va);
|
errstr = PyUnicode_FromFormatV(errmsg, va);
|
||||||
if (!errstr) {
|
if (!errstr) {
|
||||||
goto error;
|
goto error;
|
||||||
|
@ -363,11 +348,6 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (p->start_rule == Py_fstring_input) {
|
|
||||||
col_offset -= p->starting_col_offset;
|
|
||||||
end_col_offset -= p->starting_col_offset;
|
|
||||||
}
|
|
||||||
|
|
||||||
Py_ssize_t col_number = col_offset;
|
Py_ssize_t col_number = col_offset;
|
||||||
Py_ssize_t end_col_number = end_col_offset;
|
Py_ssize_t end_col_number = end_col_offset;
|
||||||
|
|
||||||
|
@ -398,17 +378,11 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
|
||||||
|
|
||||||
Py_DECREF(errstr);
|
Py_DECREF(errstr);
|
||||||
Py_DECREF(value);
|
Py_DECREF(value);
|
||||||
if (p->start_rule == Py_fstring_input) {
|
|
||||||
PyMem_Free((void *)errmsg);
|
|
||||||
}
|
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
error:
|
error:
|
||||||
Py_XDECREF(errstr);
|
Py_XDECREF(errstr);
|
||||||
Py_XDECREF(error_line);
|
Py_XDECREF(error_line);
|
||||||
if (p->start_rule == Py_fstring_input) {
|
|
||||||
PyMem_Free((void *)errmsg);
|
|
||||||
}
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -35,7 +35,13 @@ class GrammarVisitor:
|
||||||
|
|
||||||
class Grammar:
|
class Grammar:
|
||||||
def __init__(self, rules: Iterable[Rule], metas: Iterable[Tuple[str, Optional[str]]]):
|
def __init__(self, rules: Iterable[Rule], metas: Iterable[Tuple[str, Optional[str]]]):
|
||||||
self.rules = {rule.name: rule for rule in rules}
|
# Check if there are repeated rules in "rules"
|
||||||
|
all_rules = {}
|
||||||
|
for rule in rules:
|
||||||
|
if rule.name in all_rules:
|
||||||
|
raise GrammarError(f"Repeated rule {rule.name!r}")
|
||||||
|
all_rules[rule.name] = rule
|
||||||
|
self.rules = all_rules
|
||||||
self.metas = dict(metas)
|
self.metas = dict(metas)
|
||||||
|
|
||||||
def __str__(self) -> str:
|
def __str__(self) -> str:
|
||||||
|
|
Loading…
Reference in New Issue