gh-109596: Ensure repeated rules in the grammar are not allowed and fix incorrect soft keywords (#109606)

2023-09-22 19:03:23 +01:00 · 2023-09-22 19:03:23 +01:00 · b28ffaa193
parent 7c55399172
commit b28ffaa193
7 changed files with 1781 additions and 1797 deletions
--- a/Grammar/python.gram
+++ b/Grammar/python.gram
@ -19,8 +19,6 @@ _PyPegen_parse(Parser *p)
        result = eval_rule(p);
    } else if (p->start_rule == Py_func_type_input) {
        result = func_type_rule(p);
    } else if (p->start_rule == Py_fstring_input) {
        result = fstring_rule(p);
    }
    return result;
@ -89,7 +87,6 @@ file[mod_ty]: a=[statements] ENDMARKER { _PyPegen_make_module(p, a) }
 interactive[mod_ty]: a=statement_newline { _PyAST_Interactive(a, p->arena) }
 eval[mod_ty]: a=expressions NEWLINE* ENDMARKER { _PyAST_Expression(a, p->arena) }
 func_type[mod_ty]: '(' a=[type_expressions] ')' '->' b=expression NEWLINE* ENDMARKER { _PyAST_FunctionType(a, b, p->arena) }
 fstring[expr_ty]: star_expressions
 # GENERAL STATEMENTS
 # ==================
@ -647,20 +644,20 @@ type_param_seq[asdl_type_param_seq*]: a[asdl_type_param_seq*]=','.type_param+ ['
 type_param[type_param_ty] (memo):
    | a=NAME b=[type_param_bound] { _PyAST_TypeVar(a->v.Name.id, b, EXTRA) }
-    | '*' a=NAME colon=":" e=expression {
+    | '*' a=NAME colon=':' e=expression {
            RAISE_SYNTAX_ERROR_STARTING_FROM(colon, e->kind == Tuple_kind
                ? "cannot use constraints with TypeVarTuple"
                : "cannot use bound with TypeVarTuple")
        }
    | '*' a=NAME { _PyAST_TypeVarTuple(a->v.Name.id, EXTRA) }
-    | '**' a=NAME colon=":" e=expression {
+    | '**' a=NAME colon=':' e=expression {
            RAISE_SYNTAX_ERROR_STARTING_FROM(colon, e->kind == Tuple_kind
                ? "cannot use constraints with ParamSpec"
                : "cannot use bound with ParamSpec")
        }
    | '**' a=NAME { _PyAST_ParamSpec(a->v.Name.id, EXTRA) }
-type_param_bound[expr_ty]: ":" e=expression { e }
+type_param_bound[expr_ty]: ':' e=expression { e }
 # EXPRESSIONS
 # -----------
@ -915,7 +912,7 @@ fstring_middle[expr_ty]:
    | fstring_replacement_field
    | t=FSTRING_MIDDLE { _PyPegen_constant_from_token(p, t) }
 fstring_replacement_field[expr_ty]:
-    | '{' a=(yield_expr | star_expressions) debug_expr="="? conversion=[fstring_conversion] format=[fstring_full_format_spec] rbrace='}' {
+    | '{' a=(yield_expr | star_expressions) debug_expr='='? conversion=[fstring_conversion] format=[fstring_full_format_spec] rbrace='}' {
        _PyPegen_formatted_value(p, a, debug_expr, conversion, format, rbrace, EXTRA) }
    | invalid_replacement_field
 fstring_conversion[ResultTokenWithMetadata*]:
--- a/Include/compile.h
+++ b/Include/compile.h
@ -10,9 +10,6 @@ extern "C" {
 #define Py_eval_input 258
 #define Py_func_type_input 345
 /* This doesn't need to match anything */
 #define Py_fstring_input 800
 #ifndef Py_LIMITED_API
 #  define Py_CPYTHON_COMPILE_H
 #  include "cpython/compile.h"
--- a/Lib/test/test_peg_generator/test_pegen.py
+++ b/Lib/test/test_peg_generator/test_pegen.py
@ -42,6 +42,15 @@ class TestPegen(unittest.TestCase):
        )
        self.assertEqual(repr(rules["term"]), expected_repr)
    def test_repeated_rules(self) -> None:
        grammar_source = """
        start: the_rule NEWLINE
        the_rule: 'b' NEWLINE
        the_rule: 'a' NEWLINE
        """
        with self.assertRaisesRegex(GrammarError, "Repeated rule 'the_rule'"):
            parse_string(grammar_source, GrammarParser)
    def test_long_rule_str(self) -> None:
        grammar_source = """
        start: zero | one | one zero | one one | one zero zero | one zero one | one one zero | one one one
--- a/Builtins/2023-09-20-13-18-08.gh-issue-109596.RG0K2G.rst
+++ b/Builtins/2023-09-20-13-18-08.gh-issue-109596.RG0K2G.rst
@ -0,0 +1,3 @@
 Fix some tokens in the grammar that were incorrectly marked as soft
 keywords. Also fix some repeated rule names and ensure that repeated rules
 are not allowed. Patch by Pablo Galindo
--- a/Parser/parser.c
+++ b/Parser/parser.c
--- a/Parser/pegen_errors.c
+++ b/Parser/pegen_errors.c
@ -310,21 +310,6 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
        end_col_offset = p->tok->cur - p->tok->line_start;
    }
    if (p->start_rule == Py_fstring_input) {
        const char *fstring_msg = "f-string: ";
        Py_ssize_t len = strlen(fstring_msg) + strlen(errmsg);
        char *new_errmsg = PyMem_Malloc(len + 1); // Lengths of both strings plus NULL character
        if (!new_errmsg) {
            return (void *) PyErr_NoMemory();
        }
        // Copy both strings into new buffer
        memcpy(new_errmsg, fstring_msg, strlen(fstring_msg));
        memcpy(new_errmsg + strlen(fstring_msg), errmsg, strlen(errmsg));
        new_errmsg[len] = 0;
        errmsg = new_errmsg;
    }
    errstr = PyUnicode_FromFormatV(errmsg, va);
    if (!errstr) {
        goto error;
@ -363,11 +348,6 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
        }
    }
    if (p->start_rule == Py_fstring_input) {
        col_offset -= p->starting_col_offset;
        end_col_offset -= p->starting_col_offset;
    }
    Py_ssize_t col_number = col_offset;
    Py_ssize_t end_col_number = end_col_offset;
@ -398,17 +378,11 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
    Py_DECREF(errstr);
    Py_DECREF(value);
    if (p->start_rule == Py_fstring_input) {
        PyMem_Free((void *)errmsg);
    }
    return NULL;
 error:
    Py_XDECREF(errstr);
    Py_XDECREF(error_line);
    if (p->start_rule == Py_fstring_input) {
        PyMem_Free((void *)errmsg);
    }
    return NULL;
 }
--- a/Tools/peg_generator/pegen/grammar.py
+++ b/Tools/peg_generator/pegen/grammar.py
@ -35,7 +35,13 @@ class GrammarVisitor:
 class Grammar:
    def __init__(self, rules: Iterable[Rule], metas: Iterable[Tuple[str, Optional[str]]]):
-        self.rules = {rule.name: rule for rule in rules}
+        # Check if there are repeated rules in "rules"
        all_rules = {}
        for rule in rules:
            if rule.name in all_rules:
                raise GrammarError(f"Repeated rule {rule.name!r}")
            all_rules[rule.name] = rule
        self.rules = all_rules
        self.metas = dict(metas)
    def __str__(self) -> str: