diff --git a/Lib/test/test_peg_generator/test_pegen.py b/Lib/test/test_peg_generator/test_pegen.py index 71b0fdc5646..99c75f09aa1 100644 --- a/Lib/test/test_peg_generator/test_pegen.py +++ b/Lib/test/test_peg_generator/test_pegen.py @@ -15,6 +15,7 @@ with test_tools.imports_under_tool("peg_generator"): from pegen.grammar import GrammarVisitor, GrammarError, Grammar from pegen.grammar_visualizer import ASTGrammarPrinter from pegen.parser import Parser + from pegen.parser_generator import compute_nullables, compute_left_recursives from pegen.python_generator import PythonParserGenerator @@ -502,11 +503,10 @@ class TestPegen(unittest.TestCase): sign: ['-' | '+'] """ grammar: Grammar = parse_string(grammar_source, GrammarParser) - out = io.StringIO() - genr = PythonParserGenerator(grammar, out) rules = grammar.rules - self.assertFalse(rules["start"].nullable) # Not None! - self.assertTrue(rules["sign"].nullable) + nullables = compute_nullables(rules) + self.assertNotIn(rules["start"], nullables) # Not None! + self.assertIn(rules["sign"], nullables) def test_advanced_left_recursive(self) -> None: grammar_source = """ @@ -514,11 +514,11 @@ class TestPegen(unittest.TestCase): sign: ['-'] """ grammar: Grammar = parse_string(grammar_source, GrammarParser) - out = io.StringIO() - genr = PythonParserGenerator(grammar, out) rules = grammar.rules - self.assertFalse(rules["start"].nullable) # Not None! - self.assertTrue(rules["sign"].nullable) + nullables = compute_nullables(rules) + compute_left_recursives(rules) + self.assertNotIn(rules["start"], nullables) # Not None! + self.assertIn(rules["sign"], nullables) self.assertTrue(rules["start"].left_recursive) self.assertFalse(rules["sign"].left_recursive) diff --git a/Parser/parser.c b/Parser/parser.c index 87227b7f2f7..3cea370c5ad 100644 --- a/Parser/parser.c +++ b/Parser/parser.c @@ -11,57 +11,57 @@ static KeywordToken *reserved_keywords[] = { (KeywordToken[]) {{NULL, -1}}, (KeywordToken[]) {{NULL, -1}}, (KeywordToken[]) { - {"if", 510}, - {"in", 520}, - {"as", 522}, - {"is", 529}, - {"or", 531}, + {"if", 624}, + {"as", 622}, + {"in", 631}, + {"or", 571}, + {"is", 579}, {NULL, -1}, }, (KeywordToken[]) { - {"del", 503}, - {"try", 511}, - {"def", 516}, - {"for", 519}, - {"not", 528}, - {"and", 532}, + {"del", 597}, + {"def", 632}, + {"for", 630}, + {"try", 609}, + {"and", 572}, + {"not", 578}, {NULL, -1}, }, (KeywordToken[]) { - {"pass", 502}, - {"from", 514}, - {"elif", 517}, - {"else", 518}, - {"with", 521}, - {"None", 525}, - {"True", 526}, + {"from", 569}, + {"pass", 504}, + {"with", 606}, + {"elif", 626}, + {"else", 627}, + {"None", 595}, + {"True", 594}, {NULL, -1}, }, (KeywordToken[]) { - {"raise", 501}, - {"yield", 504}, - {"break", 506}, - {"while", 512}, - {"class", 515}, - {"False", 527}, + {"raise", 522}, + {"yield", 570}, + {"break", 508}, + {"class", 633}, + {"while", 629}, + {"False", 596}, {NULL, -1}, }, (KeywordToken[]) { - {"return", 500}, - {"assert", 505}, - {"global", 508}, - {"import", 513}, - {"except", 523}, - {"lambda", 530}, + {"return", 519}, + {"import", 531}, + {"assert", 526}, + {"global", 523}, + {"except", 620}, + {"lambda", 583}, {NULL, -1}, }, (KeywordToken[]) { - {"finally", 524}, + {"finally", 617}, {NULL, -1}, }, (KeywordToken[]) { - {"continue", 507}, - {"nonlocal", 509}, + {"continue", 509}, + {"nonlocal", 524}, {NULL, -1}, }, }; @@ -1562,7 +1562,7 @@ simple_stmt_rule(Parser *p) D(fprintf(stderr, "%*c> simple_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "&'return' return_stmt")); stmt_ty return_stmt_var; if ( - _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 500) // token='return' + _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 519) // token='return' && (return_stmt_var = return_stmt_rule(p)) // return_stmt ) @@ -1604,7 +1604,7 @@ simple_stmt_rule(Parser *p) D(fprintf(stderr, "%*c> simple_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "&'raise' raise_stmt")); stmt_ty raise_stmt_var; if ( - _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 501) // token='raise' + _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 522) // token='raise' && (raise_stmt_var = raise_stmt_rule(p)) // raise_stmt ) @@ -1625,7 +1625,7 @@ simple_stmt_rule(Parser *p) D(fprintf(stderr, "%*c> simple_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'pass'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 502)) // token='pass' + (_keyword = _PyPegen_expect_token(p, 504)) // token='pass' ) { D(fprintf(stderr, "%*c+ simple_stmt[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'pass'")); @@ -1658,7 +1658,7 @@ simple_stmt_rule(Parser *p) D(fprintf(stderr, "%*c> simple_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "&'del' del_stmt")); stmt_ty del_stmt_var; if ( - _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 503) // token='del' + _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 597) // token='del' && (del_stmt_var = del_stmt_rule(p)) // del_stmt ) @@ -1679,7 +1679,7 @@ simple_stmt_rule(Parser *p) D(fprintf(stderr, "%*c> simple_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "&'yield' yield_stmt")); stmt_ty yield_stmt_var; if ( - _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 504) // token='yield' + _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 570) // token='yield' && (yield_stmt_var = yield_stmt_rule(p)) // yield_stmt ) @@ -1700,7 +1700,7 @@ simple_stmt_rule(Parser *p) D(fprintf(stderr, "%*c> simple_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "&'assert' assert_stmt")); stmt_ty assert_stmt_var; if ( - _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 505) // token='assert' + _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 526) // token='assert' && (assert_stmt_var = assert_stmt_rule(p)) // assert_stmt ) @@ -1721,7 +1721,7 @@ simple_stmt_rule(Parser *p) D(fprintf(stderr, "%*c> simple_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'break'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 506)) // token='break' + (_keyword = _PyPegen_expect_token(p, 508)) // token='break' ) { D(fprintf(stderr, "%*c+ simple_stmt[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'break'")); @@ -1754,7 +1754,7 @@ simple_stmt_rule(Parser *p) D(fprintf(stderr, "%*c> simple_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'continue'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 507)) // token='continue' + (_keyword = _PyPegen_expect_token(p, 509)) // token='continue' ) { D(fprintf(stderr, "%*c+ simple_stmt[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'continue'")); @@ -1787,7 +1787,7 @@ simple_stmt_rule(Parser *p) D(fprintf(stderr, "%*c> simple_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "&'global' global_stmt")); stmt_ty global_stmt_var; if ( - _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 508) // token='global' + _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 523) // token='global' && (global_stmt_var = global_stmt_rule(p)) // global_stmt ) @@ -1808,7 +1808,7 @@ simple_stmt_rule(Parser *p) D(fprintf(stderr, "%*c> simple_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "&'nonlocal' nonlocal_stmt")); stmt_ty nonlocal_stmt_var; if ( - _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 509) // token='nonlocal' + _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 524) // token='nonlocal' && (nonlocal_stmt_var = nonlocal_stmt_rule(p)) // nonlocal_stmt ) @@ -1876,7 +1876,7 @@ compound_stmt_rule(Parser *p) D(fprintf(stderr, "%*c> compound_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "&'if' if_stmt")); stmt_ty if_stmt_var; if ( - _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 510) // token='if' + _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 624) // token='if' && (if_stmt_var = if_stmt_rule(p)) // if_stmt ) @@ -1960,7 +1960,7 @@ compound_stmt_rule(Parser *p) D(fprintf(stderr, "%*c> compound_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "&'try' try_stmt")); stmt_ty try_stmt_var; if ( - _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 511) // token='try' + _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 609) // token='try' && (try_stmt_var = try_stmt_rule(p)) // try_stmt ) @@ -1981,7 +1981,7 @@ compound_stmt_rule(Parser *p) D(fprintf(stderr, "%*c> compound_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "&'while' while_stmt")); stmt_ty while_stmt_var; if ( - _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 512) // token='while' + _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 629) // token='while' && (while_stmt_var = while_stmt_rule(p)) // while_stmt ) @@ -2666,7 +2666,7 @@ return_stmt_rule(Parser *p) Token * _keyword; void *a; if ( - (_keyword = _PyPegen_expect_token(p, 500)) // token='return' + (_keyword = _PyPegen_expect_token(p, 519)) // token='return' && (a = star_expressions_rule(p), 1) // star_expressions? ) @@ -2729,7 +2729,7 @@ raise_stmt_rule(Parser *p) expr_ty a; void *b; if ( - (_keyword = _PyPegen_expect_token(p, 501)) // token='raise' + (_keyword = _PyPegen_expect_token(p, 522)) // token='raise' && (a = expression_rule(p)) // expression && @@ -2766,7 +2766,7 @@ raise_stmt_rule(Parser *p) D(fprintf(stderr, "%*c> raise_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'raise'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 501)) // token='raise' + (_keyword = _PyPegen_expect_token(p, 522)) // token='raise' ) { D(fprintf(stderr, "%*c+ raise_stmt[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'raise'")); @@ -2826,7 +2826,7 @@ global_stmt_rule(Parser *p) Token * _keyword; asdl_expr_seq* a; if ( - (_keyword = _PyPegen_expect_token(p, 508)) // token='global' + (_keyword = _PyPegen_expect_token(p, 523)) // token='global' && (a = (asdl_expr_seq*)_gather_18_rule(p)) // ','.NAME+ ) @@ -2888,7 +2888,7 @@ nonlocal_stmt_rule(Parser *p) Token * _keyword; asdl_expr_seq* a; if ( - (_keyword = _PyPegen_expect_token(p, 509)) // token='nonlocal' + (_keyword = _PyPegen_expect_token(p, 524)) // token='nonlocal' && (a = (asdl_expr_seq*)_gather_20_rule(p)) // ','.NAME+ ) @@ -2950,7 +2950,7 @@ del_stmt_rule(Parser *p) Token * _keyword; asdl_expr_seq* a; if ( - (_keyword = _PyPegen_expect_token(p, 503)) // token='del' + (_keyword = _PyPegen_expect_token(p, 597)) // token='del' && (a = del_targets_rule(p)) // del_targets && @@ -3093,7 +3093,7 @@ assert_stmt_rule(Parser *p) expr_ty a; void *b; if ( - (_keyword = _PyPegen_expect_token(p, 505)) // token='assert' + (_keyword = _PyPegen_expect_token(p, 526)) // token='assert' && (a = expression_rule(p)) // expression && @@ -3212,7 +3212,7 @@ import_name_rule(Parser *p) Token * _keyword; asdl_alias_seq* a; if ( - (_keyword = _PyPegen_expect_token(p, 513)) // token='import' + (_keyword = _PyPegen_expect_token(p, 531)) // token='import' && (a = dotted_as_names_rule(p)) // dotted_as_names ) @@ -3279,13 +3279,13 @@ import_from_rule(Parser *p) expr_ty b; asdl_alias_seq* c; if ( - (_keyword = _PyPegen_expect_token(p, 514)) // token='from' + (_keyword = _PyPegen_expect_token(p, 569)) // token='from' && (a = _loop0_24_rule(p)) // (('.' | '...'))* && (b = dotted_name_rule(p)) // dotted_name && - (_keyword_1 = _PyPegen_expect_token(p, 513)) // token='import' + (_keyword_1 = _PyPegen_expect_token(p, 531)) // token='import' && (c = import_from_targets_rule(p)) // import_from_targets ) @@ -3323,11 +3323,11 @@ import_from_rule(Parser *p) asdl_seq * a; asdl_alias_seq* b; if ( - (_keyword = _PyPegen_expect_token(p, 514)) // token='from' + (_keyword = _PyPegen_expect_token(p, 569)) // token='from' && (a = _loop1_25_rule(p)) // (('.' | '...'))+ && - (_keyword_1 = _PyPegen_expect_token(p, 513)) // token='import' + (_keyword_1 = _PyPegen_expect_token(p, 531)) // token='import' && (b = import_from_targets_rule(p)) // import_from_targets ) @@ -4051,7 +4051,7 @@ class_def_raw_rule(Parser *p) void *b; asdl_stmt_seq* c; if ( - (_keyword = _PyPegen_expect_token(p, 515)) // token='class' + (_keyword = _PyPegen_expect_token(p, 633)) // token='class' && (a = _PyPegen_name_token(p)) // NAME && @@ -4211,7 +4211,7 @@ function_def_raw_rule(Parser *p) void *params; void *tc; if ( - (_keyword = _PyPegen_expect_token(p, 516)) // token='def' + (_keyword = _PyPegen_expect_token(p, 632)) // token='def' && (n = _PyPegen_name_token(p)) // NAME && @@ -4271,7 +4271,7 @@ function_def_raw_rule(Parser *p) if ( (async_var = _PyPegen_expect_token(p, ASYNC)) // token='ASYNC' && - (_keyword = _PyPegen_expect_token(p, 516)) // token='def' + (_keyword = _PyPegen_expect_token(p, 632)) // token='def' && (n = _PyPegen_name_token(p)) // NAME && @@ -5319,7 +5319,7 @@ if_stmt_rule(Parser *p) asdl_stmt_seq* b; stmt_ty c; if ( - (_keyword = _PyPegen_expect_token(p, 510)) // token='if' + (_keyword = _PyPegen_expect_token(p, 624)) // token='if' && (a = named_expression_rule(p)) // named_expression && @@ -5364,7 +5364,7 @@ if_stmt_rule(Parser *p) asdl_stmt_seq* b; void *c; if ( - (_keyword = _PyPegen_expect_token(p, 510)) // token='if' + (_keyword = _PyPegen_expect_token(p, 624)) // token='if' && (a = named_expression_rule(p)) // named_expression && @@ -5457,7 +5457,7 @@ elif_stmt_rule(Parser *p) asdl_stmt_seq* b; stmt_ty c; if ( - (_keyword = _PyPegen_expect_token(p, 517)) // token='elif' + (_keyword = _PyPegen_expect_token(p, 626)) // token='elif' && (a = named_expression_rule(p)) // named_expression && @@ -5502,7 +5502,7 @@ elif_stmt_rule(Parser *p) asdl_stmt_seq* b; void *c; if ( - (_keyword = _PyPegen_expect_token(p, 517)) // token='elif' + (_keyword = _PyPegen_expect_token(p, 626)) // token='elif' && (a = named_expression_rule(p)) // named_expression && @@ -5581,7 +5581,7 @@ else_block_rule(Parser *p) Token * _literal; asdl_stmt_seq* b; if ( - (_keyword = _PyPegen_expect_token(p, 518)) // token='else' + (_keyword = _PyPegen_expect_token(p, 627)) // token='else' && (_literal = _PyPegen_expect_forced_token(p, 11, ":")) // forced_token=':' && @@ -5658,7 +5658,7 @@ while_stmt_rule(Parser *p) asdl_stmt_seq* b; void *c; if ( - (_keyword = _PyPegen_expect_token(p, 512)) // token='while' + (_keyword = _PyPegen_expect_token(p, 629)) // token='while' && (a = named_expression_rule(p)) // named_expression && @@ -5756,11 +5756,11 @@ for_stmt_rule(Parser *p) expr_ty t; void *tc; if ( - (_keyword = _PyPegen_expect_token(p, 519)) // token='for' + (_keyword = _PyPegen_expect_token(p, 630)) // token='for' && (t = star_targets_rule(p)) // star_targets && - (_keyword_1 = _PyPegen_expect_token(p, 520)) // token='in' + (_keyword_1 = _PyPegen_expect_token(p, 631)) // token='in' && (_cut_var = 1) && @@ -5820,11 +5820,11 @@ for_stmt_rule(Parser *p) if ( (async_var = _PyPegen_expect_token(p, ASYNC)) // token='ASYNC' && - (_keyword = _PyPegen_expect_token(p, 519)) // token='for' + (_keyword = _PyPegen_expect_token(p, 630)) // token='for' && (t = star_targets_rule(p)) // star_targets && - (_keyword_1 = _PyPegen_expect_token(p, 520)) // token='in' + (_keyword_1 = _PyPegen_expect_token(p, 631)) // token='in' && (_cut_var = 1) && @@ -5950,7 +5950,7 @@ with_stmt_rule(Parser *p) asdl_withitem_seq* a; asdl_stmt_seq* b; if ( - (_keyword = _PyPegen_expect_token(p, 521)) // token='with' + (_keyword = _PyPegen_expect_token(p, 606)) // token='with' && (_literal = _PyPegen_expect_token(p, 7)) // token='(' && @@ -5999,7 +5999,7 @@ with_stmt_rule(Parser *p) asdl_stmt_seq* b; void *tc; if ( - (_keyword = _PyPegen_expect_token(p, 521)) // token='with' + (_keyword = _PyPegen_expect_token(p, 606)) // token='with' && (a = (asdl_withitem_seq*)_gather_52_rule(p)) // ','.with_item+ && @@ -6050,7 +6050,7 @@ with_stmt_rule(Parser *p) if ( (async_var = _PyPegen_expect_token(p, ASYNC)) // token='ASYNC' && - (_keyword = _PyPegen_expect_token(p, 521)) // token='with' + (_keyword = _PyPegen_expect_token(p, 606)) // token='with' && (_literal = _PyPegen_expect_token(p, 7)) // token='(' && @@ -6102,7 +6102,7 @@ with_stmt_rule(Parser *p) if ( (async_var = _PyPegen_expect_token(p, ASYNC)) // token='ASYNC' && - (_keyword = _PyPegen_expect_token(p, 521)) // token='with' + (_keyword = _PyPegen_expect_token(p, 606)) // token='with' && (a = (asdl_withitem_seq*)_gather_56_rule(p)) // ','.with_item+ && @@ -6186,7 +6186,7 @@ with_item_rule(Parser *p) if ( (e = expression_rule(p)) // expression && - (_keyword = _PyPegen_expect_token(p, 522)) // token='as' + (_keyword = _PyPegen_expect_token(p, 622)) // token='as' && (t = star_target_rule(p)) // star_target && @@ -6308,7 +6308,7 @@ try_stmt_rule(Parser *p) asdl_stmt_seq* b; asdl_stmt_seq* f; if ( - (_keyword = _PyPegen_expect_token(p, 511)) // token='try' + (_keyword = _PyPegen_expect_token(p, 609)) // token='try' && (_literal = _PyPegen_expect_forced_token(p, 11, ":")) // forced_token=':' && @@ -6352,7 +6352,7 @@ try_stmt_rule(Parser *p) asdl_excepthandler_seq* ex; void *f; if ( - (_keyword = _PyPegen_expect_token(p, 511)) // token='try' + (_keyword = _PyPegen_expect_token(p, 609)) // token='try' && (_literal = _PyPegen_expect_forced_token(p, 11, ":")) // forced_token=':' && @@ -6448,7 +6448,7 @@ except_block_rule(Parser *p) expr_ty e; void *t; if ( - (_keyword = _PyPegen_expect_token(p, 523)) // token='except' + (_keyword = _PyPegen_expect_token(p, 620)) // token='except' && (e = expression_rule(p)) // expression && @@ -6491,7 +6491,7 @@ except_block_rule(Parser *p) Token * _literal; asdl_stmt_seq* b; if ( - (_keyword = _PyPegen_expect_token(p, 523)) // token='except' + (_keyword = _PyPegen_expect_token(p, 620)) // token='except' && (_literal = _PyPegen_expect_token(p, 11)) // token=':' && @@ -6585,7 +6585,7 @@ finally_block_rule(Parser *p) Token * _literal; asdl_stmt_seq* a; if ( - (_keyword = _PyPegen_expect_token(p, 524)) // token='finally' + (_keyword = _PyPegen_expect_token(p, 617)) // token='finally' && (_literal = _PyPegen_expect_forced_token(p, 11, ":")) // forced_token=':' && @@ -6885,7 +6885,7 @@ guard_rule(Parser *p) Token * _keyword; expr_ty guard; if ( - (_keyword = _PyPegen_expect_token(p, 510)) // token='if' + (_keyword = _PyPegen_expect_token(p, 624)) // token='if' && (guard = named_expression_rule(p)) // named_expression ) @@ -7074,7 +7074,7 @@ as_pattern_rule(Parser *p) if ( (pattern = or_pattern_rule(p)) // or_pattern && - (_keyword = _PyPegen_expect_token(p, 522)) // token='as' + (_keyword = _PyPegen_expect_token(p, 622)) // token='as' && (target = pattern_capture_target_rule(p)) // pattern_capture_target ) @@ -7497,7 +7497,7 @@ literal_pattern_rule(Parser *p) D(fprintf(stderr, "%*c> literal_pattern[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'None'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 525)) // token='None' + (_keyword = _PyPegen_expect_token(p, 595)) // token='None' ) { D(fprintf(stderr, "%*c+ literal_pattern[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'None'")); @@ -7530,7 +7530,7 @@ literal_pattern_rule(Parser *p) D(fprintf(stderr, "%*c> literal_pattern[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'True'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 526)) // token='True' + (_keyword = _PyPegen_expect_token(p, 594)) // token='True' ) { D(fprintf(stderr, "%*c+ literal_pattern[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'True'")); @@ -7563,7 +7563,7 @@ literal_pattern_rule(Parser *p) D(fprintf(stderr, "%*c> literal_pattern[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'False'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 527)) // token='False' + (_keyword = _PyPegen_expect_token(p, 596)) // token='False' ) { D(fprintf(stderr, "%*c+ literal_pattern[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'False'")); @@ -7687,7 +7687,7 @@ literal_expr_rule(Parser *p) D(fprintf(stderr, "%*c> literal_expr[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'None'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 525)) // token='None' + (_keyword = _PyPegen_expect_token(p, 595)) // token='None' ) { D(fprintf(stderr, "%*c+ literal_expr[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'None'")); @@ -7720,7 +7720,7 @@ literal_expr_rule(Parser *p) D(fprintf(stderr, "%*c> literal_expr[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'True'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 526)) // token='True' + (_keyword = _PyPegen_expect_token(p, 594)) // token='True' ) { D(fprintf(stderr, "%*c+ literal_expr[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'True'")); @@ -7753,7 +7753,7 @@ literal_expr_rule(Parser *p) D(fprintf(stderr, "%*c> literal_expr[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'False'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 527)) // token='False' + (_keyword = _PyPegen_expect_token(p, 596)) // token='False' ) { D(fprintf(stderr, "%*c+ literal_expr[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'False'")); @@ -9776,11 +9776,11 @@ expression_rule(Parser *p) if ( (a = disjunction_rule(p)) // disjunction && - (_keyword = _PyPegen_expect_token(p, 510)) // token='if' + (_keyword = _PyPegen_expect_token(p, 624)) // token='if' && (b = disjunction_rule(p)) // disjunction && - (_keyword_1 = _PyPegen_expect_token(p, 518)) // token='else' + (_keyword_1 = _PyPegen_expect_token(p, 627)) // token='else' && (c = expression_rule(p)) // expression ) @@ -9882,9 +9882,9 @@ yield_expr_rule(Parser *p) Token * _keyword_1; expr_ty a; if ( - (_keyword = _PyPegen_expect_token(p, 504)) // token='yield' + (_keyword = _PyPegen_expect_token(p, 570)) // token='yield' && - (_keyword_1 = _PyPegen_expect_token(p, 514)) // token='from' + (_keyword_1 = _PyPegen_expect_token(p, 569)) // token='from' && (a = expression_rule(p)) // expression ) @@ -9920,7 +9920,7 @@ yield_expr_rule(Parser *p) Token * _keyword; void *a; if ( - (_keyword = _PyPegen_expect_token(p, 504)) // token='yield' + (_keyword = _PyPegen_expect_token(p, 570)) // token='yield' && (a = star_expressions_rule(p), 1) // star_expressions? ) @@ -10642,7 +10642,7 @@ inversion_rule(Parser *p) Token * _keyword; expr_ty a; if ( - (_keyword = _PyPegen_expect_token(p, 528)) // token='not' + (_keyword = _PyPegen_expect_token(p, 578)) // token='not' && (a = inversion_rule(p)) // inversion ) @@ -11278,9 +11278,9 @@ notin_bitwise_or_rule(Parser *p) Token * _keyword_1; expr_ty a; if ( - (_keyword = _PyPegen_expect_token(p, 528)) // token='not' + (_keyword = _PyPegen_expect_token(p, 578)) // token='not' && - (_keyword_1 = _PyPegen_expect_token(p, 520)) // token='in' + (_keyword_1 = _PyPegen_expect_token(p, 631)) // token='in' && (a = bitwise_or_rule(p)) // bitwise_or ) @@ -11324,7 +11324,7 @@ in_bitwise_or_rule(Parser *p) Token * _keyword; expr_ty a; if ( - (_keyword = _PyPegen_expect_token(p, 520)) // token='in' + (_keyword = _PyPegen_expect_token(p, 631)) // token='in' && (a = bitwise_or_rule(p)) // bitwise_or ) @@ -11369,9 +11369,9 @@ isnot_bitwise_or_rule(Parser *p) Token * _keyword_1; expr_ty a; if ( - (_keyword = _PyPegen_expect_token(p, 529)) // token='is' + (_keyword = _PyPegen_expect_token(p, 579)) // token='is' && - (_keyword_1 = _PyPegen_expect_token(p, 528)) // token='not' + (_keyword_1 = _PyPegen_expect_token(p, 578)) // token='not' && (a = bitwise_or_rule(p)) // bitwise_or ) @@ -11415,7 +11415,7 @@ is_bitwise_or_rule(Parser *p) Token * _keyword; expr_ty a; if ( - (_keyword = _PyPegen_expect_token(p, 529)) // token='is' + (_keyword = _PyPegen_expect_token(p, 579)) // token='is' && (a = bitwise_or_rule(p)) // bitwise_or ) @@ -13196,7 +13196,7 @@ atom_rule(Parser *p) D(fprintf(stderr, "%*c> atom[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'True'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 526)) // token='True' + (_keyword = _PyPegen_expect_token(p, 594)) // token='True' ) { D(fprintf(stderr, "%*c+ atom[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'True'")); @@ -13229,7 +13229,7 @@ atom_rule(Parser *p) D(fprintf(stderr, "%*c> atom[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'False'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 527)) // token='False' + (_keyword = _PyPegen_expect_token(p, 596)) // token='False' ) { D(fprintf(stderr, "%*c+ atom[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'False'")); @@ -13262,7 +13262,7 @@ atom_rule(Parser *p) D(fprintf(stderr, "%*c> atom[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'None'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 525)) // token='None' + (_keyword = _PyPegen_expect_token(p, 595)) // token='None' ) { D(fprintf(stderr, "%*c+ atom[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'None'")); @@ -13526,7 +13526,7 @@ lambdef_rule(Parser *p) void *a; expr_ty b; if ( - (_keyword = _PyPegen_expect_token(p, 530)) // token='lambda' + (_keyword = _PyPegen_expect_token(p, 583)) // token='lambda' && (a = lambda_params_rule(p), 1) // lambda_params? && @@ -14958,11 +14958,11 @@ for_if_clause_rule(Parser *p) if ( (async_var = _PyPegen_expect_token(p, ASYNC)) // token='ASYNC' && - (_keyword = _PyPegen_expect_token(p, 519)) // token='for' + (_keyword = _PyPegen_expect_token(p, 630)) // token='for' && (a = star_targets_rule(p)) // star_targets && - (_keyword_1 = _PyPegen_expect_token(p, 520)) // token='in' + (_keyword_1 = _PyPegen_expect_token(p, 631)) // token='in' && (_cut_var = 1) && @@ -15001,11 +15001,11 @@ for_if_clause_rule(Parser *p) expr_ty b; asdl_expr_seq* c; if ( - (_keyword = _PyPegen_expect_token(p, 519)) // token='for' + (_keyword = _PyPegen_expect_token(p, 630)) // token='for' && (a = star_targets_rule(p)) // star_targets && - (_keyword_1 = _PyPegen_expect_token(p, 520)) // token='in' + (_keyword_1 = _PyPegen_expect_token(p, 631)) // token='in' && (_cut_var = 1) && @@ -18085,11 +18085,11 @@ expression_without_invalid_rule(Parser *p) if ( (a = disjunction_rule(p)) // disjunction && - (_keyword = _PyPegen_expect_token(p, 510)) // token='if' + (_keyword = _PyPegen_expect_token(p, 624)) // token='if' && (b = disjunction_rule(p)) // disjunction && - (_keyword_1 = _PyPegen_expect_token(p, 518)) // token='else' + (_keyword_1 = _PyPegen_expect_token(p, 627)) // token='else' && (c = expression_rule(p)) // expression ) @@ -18280,7 +18280,7 @@ invalid_expression_rule(Parser *p) if ( (a = disjunction_rule(p)) // disjunction && - (_keyword = _PyPegen_expect_token(p, 510)) // token='if' + (_keyword = _PyPegen_expect_token(p, 624)) // token='if' && (b = disjunction_rule(p)) // disjunction && @@ -18736,7 +18736,7 @@ invalid_del_stmt_rule(Parser *p) Token * _keyword; expr_ty a; if ( - (_keyword = _PyPegen_expect_token(p, 503)) // token='del' + (_keyword = _PyPegen_expect_token(p, 597)) // token='del' && (a = star_expressions_rule(p)) // star_expressions ) @@ -19386,7 +19386,7 @@ invalid_with_item_rule(Parser *p) if ( (expression_var = expression_rule(p)) // expression && - (_keyword = _PyPegen_expect_token(p, 522)) // token='as' + (_keyword = _PyPegen_expect_token(p, 622)) // token='as' && (a = expression_rule(p)) // expression && @@ -19436,7 +19436,7 @@ invalid_for_target_rule(Parser *p) if ( (_opt_var = _PyPegen_expect_token(p, ASYNC), 1) // ASYNC? && - (_keyword = _PyPegen_expect_token(p, 519)) // token='for' + (_keyword = _PyPegen_expect_token(p, 630)) // token='for' && (a = star_expressions_rule(p)) // star_expressions ) @@ -19614,7 +19614,7 @@ invalid_with_stmt_rule(Parser *p) if ( (_opt_var = _PyPegen_expect_token(p, ASYNC), 1) // ASYNC? && - (_keyword = _PyPegen_expect_token(p, 521)) // token='with' + (_keyword = _PyPegen_expect_token(p, 606)) // token='with' && (_gather_162_var = _gather_162_rule(p)) // ','.(expression ['as' star_target])+ && @@ -19647,7 +19647,7 @@ invalid_with_stmt_rule(Parser *p) if ( (_opt_var = _PyPegen_expect_token(p, ASYNC), 1) // ASYNC? && - (_keyword = _PyPegen_expect_token(p, 521)) // token='with' + (_keyword = _PyPegen_expect_token(p, 606)) // token='with' && (_literal = _PyPegen_expect_token(p, 7)) // token='(' && @@ -19702,7 +19702,7 @@ invalid_with_stmt_indent_rule(Parser *p) if ( (_opt_var = _PyPegen_expect_token(p, ASYNC), 1) // ASYNC? && - (a = _PyPegen_expect_token(p, 521)) // token='with' + (a = _PyPegen_expect_token(p, 606)) // token='with' && (_gather_166_var = _gather_166_rule(p)) // ','.(expression ['as' star_target])+ && @@ -19745,7 +19745,7 @@ invalid_with_stmt_indent_rule(Parser *p) if ( (_opt_var = _PyPegen_expect_token(p, ASYNC), 1) // ASYNC? && - (a = _PyPegen_expect_token(p, 521)) // token='with' + (a = _PyPegen_expect_token(p, 606)) // token='with' && (_literal = _PyPegen_expect_token(p, 7)) // token='(' && @@ -19802,7 +19802,7 @@ invalid_try_stmt_rule(Parser *p) Token * a; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 511)) // token='try' + (a = _PyPegen_expect_token(p, 609)) // token='try' && (_literal = _PyPegen_expect_token(p, 11)) // token=':' && @@ -19834,7 +19834,7 @@ invalid_try_stmt_rule(Parser *p) Token * _literal; asdl_stmt_seq* block_var; if ( - (_keyword = _PyPegen_expect_token(p, 511)) // token='try' + (_keyword = _PyPegen_expect_token(p, 609)) // token='try' && (_literal = _PyPegen_expect_token(p, 11)) // token=':' && @@ -19890,7 +19890,7 @@ invalid_except_stmt_rule(Parser *p) expr_ty a; expr_ty expressions_var; if ( - (_keyword = _PyPegen_expect_token(p, 523)) // token='except' + (_keyword = _PyPegen_expect_token(p, 620)) // token='except' && (a = expression_rule(p)) // expression && @@ -19928,7 +19928,7 @@ invalid_except_stmt_rule(Parser *p) expr_ty expression_var; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 523)) // token='except' + (a = _PyPegen_expect_token(p, 620)) // token='except' && (expression_var = expression_rule(p)) // expression && @@ -19959,7 +19959,7 @@ invalid_except_stmt_rule(Parser *p) Token * a; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 523)) // token='except' + (a = _PyPegen_expect_token(p, 620)) // token='except' && (newline_var = _PyPegen_expect_token(p, NEWLINE)) // token='NEWLINE' ) @@ -20004,7 +20004,7 @@ invalid_finally_stmt_rule(Parser *p) Token * a; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 524)) // token='finally' + (a = _PyPegen_expect_token(p, 617)) // token='finally' && (_literal = _PyPegen_expect_token(p, 11)) // token=':' && @@ -20058,7 +20058,7 @@ invalid_except_stmt_indent_rule(Parser *p) expr_ty expression_var; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 523)) // token='except' + (a = _PyPegen_expect_token(p, 620)) // token='except' && (expression_var = expression_rule(p)) // expression && @@ -20094,7 +20094,7 @@ invalid_except_stmt_indent_rule(Parser *p) Token * a; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 523)) // token='except' + (a = _PyPegen_expect_token(p, 620)) // token='except' && (_literal = _PyPegen_expect_token(p, 11)) // token=':' && @@ -20319,7 +20319,7 @@ invalid_as_pattern_rule(Parser *p) if ( (or_pattern_var = or_pattern_rule(p)) // or_pattern && - (_keyword = _PyPegen_expect_token(p, 522)) // token='as' + (_keyword = _PyPegen_expect_token(p, 622)) // token='as' && (a = _PyPegen_expect_soft_keyword(p, "_")) // soft_keyword='"_"' ) @@ -20349,7 +20349,7 @@ invalid_as_pattern_rule(Parser *p) if ( (or_pattern_var = or_pattern_rule(p)) // or_pattern && - (_keyword = _PyPegen_expect_token(p, 522)) // token='as' + (_keyword = _PyPegen_expect_token(p, 622)) // token='as' && _PyPegen_lookahead_with_name(0, _PyPegen_name_token, p) && @@ -20497,7 +20497,7 @@ invalid_if_stmt_rule(Parser *p) expr_ty named_expression_var; Token * newline_var; if ( - (_keyword = _PyPegen_expect_token(p, 510)) // token='if' + (_keyword = _PyPegen_expect_token(p, 624)) // token='if' && (named_expression_var = named_expression_rule(p)) // named_expression && @@ -20528,7 +20528,7 @@ invalid_if_stmt_rule(Parser *p) expr_ty a_1; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 510)) // token='if' + (a = _PyPegen_expect_token(p, 624)) // token='if' && (a_1 = named_expression_rule(p)) // named_expression && @@ -20581,7 +20581,7 @@ invalid_elif_stmt_rule(Parser *p) expr_ty named_expression_var; Token * newline_var; if ( - (_keyword = _PyPegen_expect_token(p, 517)) // token='elif' + (_keyword = _PyPegen_expect_token(p, 626)) // token='elif' && (named_expression_var = named_expression_rule(p)) // named_expression && @@ -20612,7 +20612,7 @@ invalid_elif_stmt_rule(Parser *p) expr_ty named_expression_var; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 517)) // token='elif' + (a = _PyPegen_expect_token(p, 626)) // token='elif' && (named_expression_var = named_expression_rule(p)) // named_expression && @@ -20663,7 +20663,7 @@ invalid_else_stmt_rule(Parser *p) Token * a; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 518)) // token='else' + (a = _PyPegen_expect_token(p, 627)) // token='else' && (_literal = _PyPegen_expect_token(p, 11)) // token=':' && @@ -20714,7 +20714,7 @@ invalid_while_stmt_rule(Parser *p) expr_ty named_expression_var; Token * newline_var; if ( - (_keyword = _PyPegen_expect_token(p, 512)) // token='while' + (_keyword = _PyPegen_expect_token(p, 629)) // token='while' && (named_expression_var = named_expression_rule(p)) // named_expression && @@ -20745,7 +20745,7 @@ invalid_while_stmt_rule(Parser *p) expr_ty named_expression_var; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 512)) // token='while' + (a = _PyPegen_expect_token(p, 629)) // token='while' && (named_expression_var = named_expression_rule(p)) // named_expression && @@ -20803,11 +20803,11 @@ invalid_for_stmt_rule(Parser *p) if ( (_opt_var = _PyPegen_expect_token(p, ASYNC), 1) // ASYNC? && - (a = _PyPegen_expect_token(p, 519)) // token='for' + (a = _PyPegen_expect_token(p, 630)) // token='for' && (star_targets_var = star_targets_rule(p)) // star_targets && - (_keyword = _PyPegen_expect_token(p, 520)) // token='in' + (_keyword = _PyPegen_expect_token(p, 631)) // token='in' && (star_expressions_var = star_expressions_rule(p)) // star_expressions && @@ -20870,7 +20870,7 @@ invalid_def_raw_rule(Parser *p) if ( (_opt_var = _PyPegen_expect_token(p, ASYNC), 1) // ASYNC? && - (a = _PyPegen_expect_token(p, 516)) // token='def' + (a = _PyPegen_expect_token(p, 632)) // token='def' && (name_var = _PyPegen_name_token(p)) // NAME && @@ -20932,7 +20932,7 @@ invalid_class_def_raw_rule(Parser *p) expr_ty name_var; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 515)) // token='class' + (a = _PyPegen_expect_token(p, 633)) // token='class' && (name_var = _PyPegen_name_token(p)) // NAME && @@ -21510,7 +21510,7 @@ _tmp_6_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_6[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'import'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 513)) // token='import' + (_keyword = _PyPegen_expect_token(p, 531)) // token='import' ) { D(fprintf(stderr, "%*c+ _tmp_6[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'import'")); @@ -21529,7 +21529,7 @@ _tmp_6_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_6[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'from'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 514)) // token='from' + (_keyword = _PyPegen_expect_token(p, 569)) // token='from' ) { D(fprintf(stderr, "%*c+ _tmp_6[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'from'")); @@ -21565,7 +21565,7 @@ _tmp_7_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_7[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'def'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 516)) // token='def' + (_keyword = _PyPegen_expect_token(p, 632)) // token='def' ) { D(fprintf(stderr, "%*c+ _tmp_7[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'def'")); @@ -21639,7 +21639,7 @@ _tmp_8_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_8[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'class'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 515)) // token='class' + (_keyword = _PyPegen_expect_token(p, 633)) // token='class' ) { D(fprintf(stderr, "%*c+ _tmp_8[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'class'")); @@ -21694,7 +21694,7 @@ _tmp_9_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_9[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'with'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 521)) // token='with' + (_keyword = _PyPegen_expect_token(p, 606)) // token='with' ) { D(fprintf(stderr, "%*c+ _tmp_9[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'with'")); @@ -21749,7 +21749,7 @@ _tmp_10_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_10[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'for'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 519)) // token='for' + (_keyword = _PyPegen_expect_token(p, 630)) // token='for' ) { D(fprintf(stderr, "%*c+ _tmp_10[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'for'")); @@ -22140,7 +22140,7 @@ _tmp_17_rule(Parser *p) Token * _keyword; expr_ty z; if ( - (_keyword = _PyPegen_expect_token(p, 514)) // token='from' + (_keyword = _PyPegen_expect_token(p, 569)) // token='from' && (z = expression_rule(p)) // expression ) @@ -22762,7 +22762,7 @@ _tmp_28_rule(Parser *p) Token * _keyword; expr_ty z; if ( - (_keyword = _PyPegen_expect_token(p, 522)) // token='as' + (_keyword = _PyPegen_expect_token(p, 622)) // token='as' && (z = _PyPegen_name_token(p)) // NAME ) @@ -22920,7 +22920,7 @@ _tmp_31_rule(Parser *p) Token * _keyword; expr_ty z; if ( - (_keyword = _PyPegen_expect_token(p, 522)) // token='as' + (_keyword = _PyPegen_expect_token(p, 622)) // token='as' && (z = _PyPegen_name_token(p)) // NAME ) @@ -24730,7 +24730,7 @@ _tmp_60_rule(Parser *p) Token * _keyword; expr_ty z; if ( - (_keyword = _PyPegen_expect_token(p, 522)) // token='as' + (_keyword = _PyPegen_expect_token(p, 622)) // token='as' && (z = _PyPegen_name_token(p)) // NAME ) @@ -29791,7 +29791,7 @@ _tmp_144_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_144[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'else'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 518)) // token='else' + (_keyword = _PyPegen_expect_token(p, 627)) // token='else' ) { D(fprintf(stderr, "%*c+ _tmp_144[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'else'")); @@ -29958,7 +29958,7 @@ _tmp_146_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_146[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'True'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 526)) // token='True' + (_keyword = _PyPegen_expect_token(p, 594)) // token='True' ) { D(fprintf(stderr, "%*c+ _tmp_146[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'True'")); @@ -29977,7 +29977,7 @@ _tmp_146_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_146[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'None'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 525)) // token='None' + (_keyword = _PyPegen_expect_token(p, 595)) // token='None' ) { D(fprintf(stderr, "%*c+ _tmp_146[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'None'")); @@ -29996,7 +29996,7 @@ _tmp_146_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_146[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'False'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 527)) // token='False' + (_keyword = _PyPegen_expect_token(p, 596)) // token='False' ) { D(fprintf(stderr, "%*c+ _tmp_146[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'False'")); @@ -31444,7 +31444,7 @@ _tmp_170_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_170[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'except'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 523)) // token='except' + (_keyword = _PyPegen_expect_token(p, 620)) // token='except' ) { D(fprintf(stderr, "%*c+ _tmp_170[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'except'")); @@ -31463,7 +31463,7 @@ _tmp_170_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_170[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'finally'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 524)) // token='finally' + (_keyword = _PyPegen_expect_token(p, 617)) // token='finally' ) { D(fprintf(stderr, "%*c+ _tmp_170[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'finally'")); @@ -31500,7 +31500,7 @@ _tmp_171_rule(Parser *p) Token * _keyword; expr_ty name_var; if ( - (_keyword = _PyPegen_expect_token(p, 522)) // token='as' + (_keyword = _PyPegen_expect_token(p, 622)) // token='as' && (name_var = _PyPegen_name_token(p)) // NAME ) @@ -31539,7 +31539,7 @@ _tmp_172_rule(Parser *p) Token * _keyword; expr_ty name_var; if ( - (_keyword = _PyPegen_expect_token(p, 522)) // token='as' + (_keyword = _PyPegen_expect_token(p, 622)) // token='as' && (name_var = _PyPegen_name_token(p)) // NAME ) @@ -31578,7 +31578,7 @@ _tmp_173_rule(Parser *p) Token * _keyword; expr_ty name_var; if ( - (_keyword = _PyPegen_expect_token(p, 522)) // token='as' + (_keyword = _PyPegen_expect_token(p, 622)) // token='as' && (name_var = _PyPegen_name_token(p)) // NAME ) @@ -32196,7 +32196,7 @@ _tmp_186_rule(Parser *p) Token * _keyword; expr_ty c; if ( - (_keyword = _PyPegen_expect_token(p, 531)) // token='or' + (_keyword = _PyPegen_expect_token(p, 571)) // token='or' && (c = conjunction_rule(p)) // conjunction ) @@ -32240,7 +32240,7 @@ _tmp_187_rule(Parser *p) Token * _keyword; expr_ty c; if ( - (_keyword = _PyPegen_expect_token(p, 532)) // token='and' + (_keyword = _PyPegen_expect_token(p, 572)) // token='and' && (c = inversion_rule(p)) // inversion ) @@ -32284,7 +32284,7 @@ _tmp_188_rule(Parser *p) Token * _keyword; expr_ty z; if ( - (_keyword = _PyPegen_expect_token(p, 510)) // token='if' + (_keyword = _PyPegen_expect_token(p, 624)) // token='if' && (z = disjunction_rule(p)) // disjunction ) @@ -32328,7 +32328,7 @@ _tmp_189_rule(Parser *p) Token * _keyword; expr_ty z; if ( - (_keyword = _PyPegen_expect_token(p, 510)) // token='if' + (_keyword = _PyPegen_expect_token(p, 624)) // token='if' && (z = disjunction_rule(p)) // disjunction ) @@ -32922,7 +32922,7 @@ _tmp_202_rule(Parser *p) Token * _keyword; expr_ty star_target_var; if ( - (_keyword = _PyPegen_expect_token(p, 522)) // token='as' + (_keyword = _PyPegen_expect_token(p, 622)) // token='as' && (star_target_var = star_target_rule(p)) // star_target ) @@ -32961,7 +32961,7 @@ _tmp_203_rule(Parser *p) Token * _keyword; expr_ty star_target_var; if ( - (_keyword = _PyPegen_expect_token(p, 522)) // token='as' + (_keyword = _PyPegen_expect_token(p, 622)) // token='as' && (star_target_var = star_target_rule(p)) // star_target ) @@ -33000,7 +33000,7 @@ _tmp_204_rule(Parser *p) Token * _keyword; expr_ty star_target_var; if ( - (_keyword = _PyPegen_expect_token(p, 522)) // token='as' + (_keyword = _PyPegen_expect_token(p, 622)) // token='as' && (star_target_var = star_target_rule(p)) // star_target ) @@ -33039,7 +33039,7 @@ _tmp_205_rule(Parser *p) Token * _keyword; expr_ty star_target_var; if ( - (_keyword = _PyPegen_expect_token(p, 522)) // token='as' + (_keyword = _PyPegen_expect_token(p, 622)) // token='as' && (star_target_var = star_target_rule(p)) // star_target ) diff --git a/Tools/peg_generator/pegen/__main__.py b/Tools/peg_generator/pegen/__main__.py index a12fe787f42..2910d6ccf1c 100755 --- a/Tools/peg_generator/pegen/__main__.py +++ b/Tools/peg_generator/pegen/__main__.py @@ -10,10 +10,9 @@ import sys import time import token import traceback - from typing import Tuple -from pegen.build import Grammar, Parser, Tokenizer, ParserGenerator +from pegen.build import Grammar, Parser, ParserGenerator, Tokenizer from pegen.validator import validate_grammar diff --git a/Tools/peg_generator/pegen/build.py b/Tools/peg_generator/pegen/build.py index 6f0a091ff47..bf01078ff0b 100644 --- a/Tools/peg_generator/pegen/build.py +++ b/Tools/peg_generator/pegen/build.py @@ -1,11 +1,10 @@ +import itertools import pathlib import shutil -import tokenize import sysconfig import tempfile -import itertools - -from typing import Optional, Tuple, List, IO, Set, Dict +import tokenize +from typing import IO, Dict, List, Optional, Set, Tuple from pegen.c_generator import CParserGenerator from pegen.grammar import Grammar @@ -45,9 +44,9 @@ def compile_c_extension( of distutils (this is useful in case you want to use a temporary directory). """ import distutils.log - from distutils.core import Distribution, Extension - from distutils.command.clean import clean # type: ignore from distutils.command.build_ext import build_ext # type: ignore + from distutils.command.clean import clean # type: ignore + from distutils.core import Distribution, Extension from distutils.tests.support import fixup_build_ext # type: ignore if verbose: diff --git a/Tools/peg_generator/pegen/c_generator.py b/Tools/peg_generator/pegen/c_generator.py index e928fd3de17..d15e91098df 100644 --- a/Tools/peg_generator/pegen/c_generator.py +++ b/Tools/peg_generator/pegen/c_generator.py @@ -1,8 +1,8 @@ import ast -from dataclasses import field, dataclass import re -from typing import Any, Dict, IO, Optional, List, Text, Tuple, Set +from dataclasses import dataclass, field from enum import Enum +from typing import IO, Any, Dict, List, Optional, Set, Text, Tuple from pegen import grammar from pegen.grammar import ( @@ -27,7 +27,6 @@ from pegen.grammar import ( ) from pegen.parser_generator import ParserGenerator - EXTENSION_PREFIX = """\ #include "pegen.h" @@ -120,23 +119,18 @@ class CCallMakerVisitor(GrammarVisitor): self.exact_tokens = exact_tokens self.non_exact_tokens = non_exact_tokens self.cache: Dict[Any, FunctionCall] = {} - self.keyword_cache: Dict[str, int] = {} - self.soft_keywords: Set[str] = set() def keyword_helper(self, keyword: str) -> FunctionCall: - if keyword not in self.keyword_cache: - self.keyword_cache[keyword] = self.gen.keyword_type() return FunctionCall( assigned_variable="_keyword", function="_PyPegen_expect_token", - arguments=["p", self.keyword_cache[keyword]], + arguments=["p", self.gen.keywords[keyword]], return_type="Token *", nodetype=NodeTypes.KEYWORD, comment=f"token='{keyword}'", ) def soft_keyword_helper(self, value: str) -> FunctionCall: - self.soft_keywords.add(value.replace('"', "")) return FunctionCall( assigned_variable="_keyword", function="_PyPegen_expect_soft_keyword", @@ -200,20 +194,12 @@ class CCallMakerVisitor(GrammarVisitor): ) def visit_Rhs(self, node: Rhs) -> FunctionCall: - def can_we_inline(node: Rhs) -> int: - if len(node.alts) != 1 or len(node.alts[0].items) != 1: - return False - # If the alternative has an action we cannot inline - if getattr(node.alts[0], "action", None) is not None: - return False - return True - if node in self.cache: return self.cache[node] - if can_we_inline(node): + if node.can_be_inlined: self.cache[node] = self.generate_call(node.alts[0].items[0]) else: - name = self.gen.name_node(node) + name = self.gen.artifical_rule_from_rhs(node) self.cache[node] = FunctionCall( assigned_variable=f"{name}_var", function=f"{name}_rule", @@ -306,7 +292,7 @@ class CCallMakerVisitor(GrammarVisitor): def visit_Repeat0(self, node: Repeat0) -> FunctionCall: if node in self.cache: return self.cache[node] - name = self.gen.name_loop(node.node, False) + name = self.gen.artificial_rule_from_repeat(node.node, False) self.cache[node] = FunctionCall( assigned_variable=f"{name}_var", function=f"{name}_rule", @@ -319,7 +305,7 @@ class CCallMakerVisitor(GrammarVisitor): def visit_Repeat1(self, node: Repeat1) -> FunctionCall: if node in self.cache: return self.cache[node] - name = self.gen.name_loop(node.node, True) + name = self.gen.artificial_rule_from_repeat(node.node, True) self.cache[node] = FunctionCall( assigned_variable=f"{name}_var", function=f"{name}_rule", @@ -332,7 +318,7 @@ class CCallMakerVisitor(GrammarVisitor): def visit_Gather(self, node: Gather) -> FunctionCall: if node in self.cache: return self.cache[node] - name = self.gen.name_gather(node) + name = self.gen.artifical_rule_from_gather(node) self.cache[node] = FunctionCall( assigned_variable=f"{name}_var", function=f"{name}_rule", @@ -429,7 +415,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor): self.print(f"}}") def generate(self, filename: str) -> None: - self.collect_todo() + self.collect_rules() self.print(f"// @generated by pegen from {filename}") header = self.grammar.metas.get("header", EXTENSION_PREFIX) if header: @@ -439,11 +425,11 @@ class CParserGenerator(ParserGenerator, GrammarVisitor): self.print(subheader) self._setup_keywords() self._setup_soft_keywords() - for i, (rulename, rule) in enumerate(self.todo.items(), 1000): + for i, (rulename, rule) in enumerate(self.all_rules.items(), 1000): comment = " // Left-recursive" if rule.left_recursive else "" self.print(f"#define {rulename}_type {i}{comment}") self.print() - for rulename, rule in self.todo.items(): + for rulename, rule in self.all_rules.items(): if rule.is_loop() or rule.is_gather(): type = "asdl_seq *" elif rule.type: @@ -452,13 +438,11 @@ class CParserGenerator(ParserGenerator, GrammarVisitor): type = "void *" self.print(f"static {type}{rulename}_rule(Parser *p);") self.print() - while self.todo: - for rulename, rule in list(self.todo.items()): - del self.todo[rulename] - self.print() - if rule.left_recursive: - self.print("// Left-recursive") - self.visit(rule) + for rulename, rule in list(self.all_rules.items()): + self.print() + if rule.left_recursive: + self.print("// Left-recursive") + self.visit(rule) if self.skip_actions: mode = 0 else: @@ -472,7 +456,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor): def _group_keywords_by_length(self) -> Dict[int, List[Tuple[str, int]]]: groups: Dict[int, List[Tuple[str, int]]] = {} - for keyword_str, keyword_type in self.callmakervisitor.keyword_cache.items(): + for keyword_str, keyword_type in self.keywords.items(): length = len(keyword_str) if length in groups: groups[length].append((keyword_str, keyword_type)) @@ -481,9 +465,8 @@ class CParserGenerator(ParserGenerator, GrammarVisitor): return groups def _setup_keywords(self) -> None: - keyword_cache = self.callmakervisitor.keyword_cache n_keyword_lists = ( - len(max(keyword_cache.keys(), key=len)) + 1 if len(keyword_cache) > 0 else 0 + len(max(self.keywords.keys(), key=len)) + 1 if len(self.keywords) > 0 else 0 ) self.print(f"static const int n_keyword_lists = {n_keyword_lists};") groups = self._group_keywords_by_length() @@ -503,7 +486,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor): self.print("};") def _setup_soft_keywords(self) -> None: - soft_keywords = sorted(self.callmakervisitor.soft_keywords) + soft_keywords = sorted(self.soft_keywords) self.print("static char *soft_keywords[] = {") with self.indent(): for keyword in soft_keywords: diff --git a/Tools/peg_generator/pegen/first_sets.py b/Tools/peg_generator/pegen/first_sets.py index 50ced22c2a5..611ef514d09 100755 --- a/Tools/peg_generator/pegen/first_sets.py +++ b/Tools/peg_generator/pegen/first_sets.py @@ -3,30 +3,27 @@ import argparse import pprint import sys -from typing import Set, Dict +from typing import Dict, Set from pegen.build import build_parser from pegen.grammar import ( Alt, Cut, Gather, - Grammar, GrammarVisitor, Group, - Leaf, Lookahead, NamedItem, NameLeaf, NegativeLookahead, Opt, - Repeat, Repeat0, Repeat1, Rhs, Rule, StringLeaf, - PositiveLookahead, ) +from pegen.parser_generator import compute_nullables argparser = argparse.ArgumentParser( prog="calculate_first_sets", @@ -38,8 +35,7 @@ argparser.add_argument("grammar_file", help="The grammar file") class FirstSetCalculator(GrammarVisitor): def __init__(self, rules: Dict[str, Rule]) -> None: self.rules = rules - for rule in rules.values(): - rule.nullable_visit(rules) + self.nullables = compute_nullables(rules) self.first_sets: Dict[str, Set[str]] = dict() self.in_process: Set[str] = set() @@ -129,7 +125,7 @@ class FirstSetCalculator(GrammarVisitor): elif item.name not in self.first_sets: self.in_process.add(item.name) terminals = self.visit(item.rhs) - if item.nullable: + if item in self.nullables: terminals.add("") self.first_sets[item.name] = terminals self.in_process.remove(item.name) diff --git a/Tools/peg_generator/pegen/grammar.py b/Tools/peg_generator/pegen/grammar.py index 66fd5b329a5..fa47b98201c 100644 --- a/Tools/peg_generator/pegen/grammar.py +++ b/Tools/peg_generator/pegen/grammar.py @@ -2,6 +2,7 @@ from __future__ import annotations from abc import abstractmethod from typing import ( + TYPE_CHECKING, AbstractSet, Any, Dict, @@ -11,11 +12,9 @@ from typing import ( Optional, Set, Tuple, - TYPE_CHECKING, Union, ) - if TYPE_CHECKING: from pegen.parser_generator import ParserGenerator @@ -31,7 +30,7 @@ class GrammarVisitor: visitor = getattr(self, method, self.generic_visit) return visitor(node, *args, **kwargs) - def generic_visit(self, node: Iterable[Any], *args: Any, **kwargs: Any) -> None: + def generic_visit(self, node: Iterable[Any], *args: Any, **kwargs: Any) -> Any: """Called if no explicit visitor function exists for a node.""" for value in node: if isinstance(value, list): @@ -73,8 +72,6 @@ class Rule: self.type = type self.rhs = rhs self.memo = bool(memo) - self.visited = False - self.nullable = False self.left_recursive = False self.leader = False @@ -101,17 +98,6 @@ class Rule: def __iter__(self) -> Iterator[Rhs]: yield self.rhs - def nullable_visit(self, rules: Dict[str, Rule]) -> bool: - if self.visited: - # A left-recursive rule is considered non-nullable. - return False - self.visited = True - self.nullable = self.rhs.nullable_visit(rules) - return self.nullable - - def initial_names(self) -> AbstractSet[str]: - return self.rhs.initial_names() - def flatten(self) -> Rhs: # If it's a single parenthesized group, flatten it. rhs = self.rhs @@ -124,10 +110,6 @@ class Rule: rhs = rhs.alts[0].items[0].item.rhs return rhs - def collect_todo(self, gen: ParserGenerator) -> None: - rhs = self.flatten() - rhs.collect_todo(gen) - class Leaf: def __init__(self, value: str): @@ -140,14 +122,6 @@ class Leaf: if False: yield - @abstractmethod - def nullable_visit(self, rules: Dict[str, Rule]) -> bool: - raise NotImplementedError - - @abstractmethod - def initial_names(self) -> AbstractSet[str]: - raise NotImplementedError - class NameLeaf(Leaf): """The value is the name.""" @@ -160,15 +134,6 @@ class NameLeaf(Leaf): def __repr__(self) -> str: return f"NameLeaf({self.value!r})" - def nullable_visit(self, rules: Dict[str, Rule]) -> bool: - if self.value in rules: - return rules[self.value].nullable_visit(rules) - # Token or unknown; never empty. - return False - - def initial_names(self) -> AbstractSet[str]: - return {self.value} - class StringLeaf(Leaf): """The value is a string literal, including quotes.""" @@ -176,13 +141,6 @@ class StringLeaf(Leaf): def __repr__(self) -> str: return f"StringLeaf({self.value!r})" - def nullable_visit(self, rules: Dict[str, Rule]) -> bool: - # The string token '' is considered empty. - return not self.value - - def initial_names(self) -> AbstractSet[str]: - return set() - class Rhs: def __init__(self, alts: List[Alt]): @@ -198,21 +156,14 @@ class Rhs: def __iter__(self) -> Iterator[List[Alt]]: yield self.alts - def nullable_visit(self, rules: Dict[str, Rule]) -> bool: - for alt in self.alts: - if alt.nullable_visit(rules): - return True - return False - - def initial_names(self) -> AbstractSet[str]: - names: Set[str] = set() - for alt in self.alts: - names |= alt.initial_names() - return names - - def collect_todo(self, gen: ParserGenerator) -> None: - for alt in self.alts: - alt.collect_todo(gen) + @property + def can_be_inlined(self) -> bool: + if len(self.alts) != 1 or len(self.alts[0].items) != 1: + return False + # If the alternative has an action we cannot inline + if getattr(self.alts[0], "action", None) is not None: + return False + return True class Alt: @@ -239,31 +190,12 @@ class Alt: def __iter__(self) -> Iterator[List[NamedItem]]: yield self.items - def nullable_visit(self, rules: Dict[str, Rule]) -> bool: - for item in self.items: - if not item.nullable_visit(rules): - return False - return True - - def initial_names(self) -> AbstractSet[str]: - names: Set[str] = set() - for item in self.items: - names |= item.initial_names() - if not item.nullable: - break - return names - - def collect_todo(self, gen: ParserGenerator) -> None: - for item in self.items: - item.collect_todo(gen) - class NamedItem: def __init__(self, name: Optional[str], item: Item, type: Optional[str] = None): self.name = name self.item = item self.type = type - self.nullable = False def __str__(self) -> str: if not SIMPLE_STR and self.name: @@ -277,16 +209,6 @@ class NamedItem: def __iter__(self) -> Iterator[Item]: yield self.item - def nullable_visit(self, rules: Dict[str, Rule]) -> bool: - self.nullable = self.item.nullable_visit(rules) - return self.nullable - - def initial_names(self) -> AbstractSet[str]: - return self.item.initial_names() - - def collect_todo(self, gen: ParserGenerator) -> None: - gen.callmakervisitor.visit(self.item) - class Forced: def __init__(self, node: Plain): @@ -298,12 +220,6 @@ class Forced: def __iter__(self) -> Iterator[Plain]: yield self.node - def nullable_visit(self, rules: Dict[str, Rule]) -> bool: - return True - - def initial_names(self) -> AbstractSet[str]: - return set() - class Lookahead: def __init__(self, node: Plain, sign: str): @@ -316,12 +232,6 @@ class Lookahead: def __iter__(self) -> Iterator[Plain]: yield self.node - def nullable_visit(self, rules: Dict[str, Rule]) -> bool: - return True - - def initial_names(self) -> AbstractSet[str]: - return set() - class PositiveLookahead(Lookahead): def __init__(self, node: Plain): @@ -357,12 +267,6 @@ class Opt: def __iter__(self) -> Iterator[Item]: yield self.node - def nullable_visit(self, rules: Dict[str, Rule]) -> bool: - return True - - def initial_names(self) -> AbstractSet[str]: - return self.node.initial_names() - class Repeat: """Shared base class for x* and x+.""" @@ -371,16 +275,9 @@ class Repeat: self.node = node self.memo: Optional[Tuple[Optional[str], str]] = None - @abstractmethod - def nullable_visit(self, rules: Dict[str, Rule]) -> bool: - raise NotImplementedError - def __iter__(self) -> Iterator[Plain]: yield self.node - def initial_names(self) -> AbstractSet[str]: - return self.node.initial_names() - class Repeat0(Repeat): def __str__(self) -> str: @@ -394,9 +291,6 @@ class Repeat0(Repeat): def __repr__(self) -> str: return f"Repeat0({self.node!r})" - def nullable_visit(self, rules: Dict[str, Rule]) -> bool: - return True - class Repeat1(Repeat): def __str__(self) -> str: @@ -410,9 +304,6 @@ class Repeat1(Repeat): def __repr__(self) -> str: return f"Repeat1({self.node!r})" - def nullable_visit(self, rules: Dict[str, Rule]) -> bool: - return False - class Gather(Repeat): def __init__(self, separator: Plain, node: Plain): @@ -425,9 +316,6 @@ class Gather(Repeat): def __repr__(self) -> str: return f"Gather({self.separator!r}, {self.node!r})" - def nullable_visit(self, rules: Dict[str, Rule]) -> bool: - return False - class Group: def __init__(self, rhs: Rhs): @@ -442,12 +330,6 @@ class Group: def __iter__(self) -> Iterator[Rhs]: yield self.rhs - def nullable_visit(self, rules: Dict[str, Rule]) -> bool: - return self.rhs.nullable_visit(rules) - - def initial_names(self) -> AbstractSet[str]: - return self.rhs.initial_names() - class Cut: def __init__(self) -> None: @@ -468,9 +350,6 @@ class Cut: return NotImplemented return True - def nullable_visit(self, rules: Dict[str, Rule]) -> bool: - return True - def initial_names(self) -> AbstractSet[str]: return set() diff --git a/Tools/peg_generator/pegen/grammar_visualizer.py b/Tools/peg_generator/pegen/grammar_visualizer.py index 7362ec5fa0f..ab5c6364f63 100644 --- a/Tools/peg_generator/pegen/grammar_visualizer.py +++ b/Tools/peg_generator/pegen/grammar_visualizer.py @@ -1,7 +1,6 @@ import argparse import sys - -from typing import Any, Iterator, Callable +from typing import Any, Callable, Iterator from pegen.build import build_parser from pegen.grammar import Grammar, Rule diff --git a/Tools/peg_generator/pegen/keywordgen.py b/Tools/peg_generator/pegen/keywordgen.py index 6a07f6e8b7b..35a5e1a229c 100644 --- a/Tools/peg_generator/pegen/keywordgen.py +++ b/Tools/peg_generator/pegen/keywordgen.py @@ -59,11 +59,11 @@ def main() -> None: with args.tokens_file as tok_file: all_tokens, exact_tok, non_exact_tok = generate_token_definitions(tok_file) gen = CParserGenerator(grammar, all_tokens, exact_tok, non_exact_tok, file=None) - gen.collect_todo() + gen.collect_rules() with args.keyword_file as thefile: - all_keywords = sorted(list(gen.callmakervisitor.keyword_cache.keys()) + EXTRA_KEYWORDS) - all_soft_keywords = sorted(gen.callmakervisitor.soft_keywords) + all_keywords = sorted(list(gen.keywords.keys()) + EXTRA_KEYWORDS) + all_soft_keywords = sorted(gen.soft_keywords) keywords = "" if not all_keywords else " " + ",\n ".join(map(repr, all_keywords)) soft_keywords = ( diff --git a/Tools/peg_generator/pegen/parser_generator.py b/Tools/peg_generator/pegen/parser_generator.py index 33ecee1ed44..f2105d8faa2 100644 --- a/Tools/peg_generator/pegen/parser_generator.py +++ b/Tools/peg_generator/pegen/parser_generator.py @@ -1,22 +1,76 @@ +import ast import contextlib +import re from abc import abstractmethod -from typing import IO, AbstractSet, Dict, Iterator, List, Optional, Set, Text, Tuple +from typing import ( + IO, + AbstractSet, + Any, + Dict, + Iterable, + Iterator, + List, + Optional, + Set, + Text, + Tuple, + Union, +) from pegen import sccutils from pegen.grammar import ( Alt, + Cut, + Forced, Gather, Grammar, GrammarError, GrammarVisitor, + Group, + Lookahead, NamedItem, NameLeaf, + Opt, Plain, + Repeat0, + Repeat1, Rhs, Rule, + StringLeaf, ) +class RuleCollectorVisitor(GrammarVisitor): + """Visitor that invokes a provieded callmaker visitor with just the NamedItem nodes""" + + def __init__(self, rules: Dict[str, Rule], callmakervisitor: GrammarVisitor) -> None: + self.rulses = rules + self.callmaker = callmakervisitor + + def visit_Rule(self, rule: Rule) -> None: + self.visit(rule.flatten()) + + def visit_NamedItem(self, item: NamedItem) -> None: + self.callmaker.visit(item) + + +class KeywordCollectorVisitor(GrammarVisitor): + """Visitor that collects all the keywods and soft keywords in the Grammar""" + + def __init__(self, gen: "ParserGenerator", keywords: Dict[str, int], soft_keywords: Set[str]): + self.generator = gen + self.keywords = keywords + self.soft_keywords = soft_keywords + + def visit_StringLeaf(self, node: StringLeaf) -> None: + val = ast.literal_eval(node.value) + if re.match(r"[a-zA-Z_]\w*\Z", val): # This is a keyword + if node.value.endswith("'") and node.value not in self.keywords: + self.keywords[val] = self.generator.keyword_type() + else: + return self.soft_keywords.add(node.value.replace('"', "")) + + class RuleCheckingVisitor(GrammarVisitor): def __init__(self, rules: Dict[str, Rule], tokens: Set[str]): self.rules = rules @@ -39,6 +93,8 @@ class ParserGenerator: def __init__(self, grammar: Grammar, tokens: Set[str], file: Optional[IO[Text]]): self.grammar = grammar self.tokens = tokens + self.keywords: Dict[str, int] = {} + self.soft_keywords: Set[str] = set() self.rules = grammar.rules self.validate_rule_names() if "trailer" not in grammar.metas and "start" not in self.rules: @@ -48,12 +104,10 @@ class ParserGenerator: checker.visit(rule) self.file = file self.level = 0 - compute_nullables(self.rules) self.first_graph, self.first_sccs = compute_left_recursives(self.rules) - self.todo = self.rules.copy() # Rules to generate self.counter = 0 # For name_rule()/name_loop() self.keyword_counter = 499 # For keyword_type() - self.all_rules: Dict[str, Rule] = {} # Rules + temporal rules + self.all_rules: Dict[str, Rule] = self.rules.copy() # Rules + temporal rules self._local_variable_stack: List[List[str]] = [] def validate_rule_names(self) -> None: @@ -94,39 +148,43 @@ class ParserGenerator: for line in lines.splitlines(): self.print(line) - def collect_todo(self) -> None: + def collect_rules(self) -> None: + keyword_collector = KeywordCollectorVisitor(self, self.keywords, self.soft_keywords) + for rule in self.all_rules.values(): + keyword_collector.visit(rule) + + rule_collector = RuleCollectorVisitor(self.rules, self.callmakervisitor) done: Set[str] = set() while True: - alltodo = list(self.todo) - self.all_rules.update(self.todo) - todo = [i for i in alltodo if i not in done] + computed_rules = list(self.all_rules) + todo = [i for i in computed_rules if i not in done] if not todo: break + done = set(self.all_rules) for rulename in todo: - self.todo[rulename].collect_todo(self) - done = set(alltodo) + rule_collector.visit(self.all_rules[rulename]) def keyword_type(self) -> int: self.keyword_counter += 1 return self.keyword_counter - def name_node(self, rhs: Rhs) -> str: + def artifical_rule_from_rhs(self, rhs: Rhs) -> str: self.counter += 1 name = f"_tmp_{self.counter}" # TODO: Pick a nicer name. - self.todo[name] = Rule(name, None, rhs) + self.all_rules[name] = Rule(name, None, rhs) return name - def name_loop(self, node: Plain, is_repeat1: bool) -> str: + def artificial_rule_from_repeat(self, node: Plain, is_repeat1: bool) -> str: self.counter += 1 if is_repeat1: prefix = "_loop1_" else: prefix = "_loop0_" - name = f"{prefix}{self.counter}" # TODO: It's ugly to signal via the name. - self.todo[name] = Rule(name, None, Rhs([Alt([NamedItem(None, node)])])) + name = f"{prefix}{self.counter}" + self.all_rules[name] = Rule(name, None, Rhs([Alt([NamedItem(None, node)])])) return name - def name_gather(self, node: Gather) -> str: + def artifical_rule_from_gather(self, node: Gather) -> str: self.counter += 1 name = f"_gather_{self.counter}" self.counter += 1 @@ -135,7 +193,7 @@ class ParserGenerator: [NamedItem(None, node.separator), NamedItem("elem", node.node)], action="elem", ) - self.todo[extra_function_name] = Rule( + self.all_rules[extra_function_name] = Rule( extra_function_name, None, Rhs([extra_function_alt]), @@ -143,7 +201,7 @@ class ParserGenerator: alt = Alt( [NamedItem("elem", node.node), NamedItem("seq", NameLeaf(extra_function_name))], ) - self.todo[name] = Rule( + self.all_rules[name] = Rule( name, None, Rhs([alt]), @@ -160,13 +218,120 @@ class ParserGenerator: return name -def compute_nullables(rules: Dict[str, Rule]) -> None: +class NullableVisitor(GrammarVisitor): + def __init__(self, rules: Dict[str, Rule]) -> None: + self.rules = rules + self.visited: Set[Any] = set() + self.nullables: Set[Union[Rule, NamedItem]] = set() + + def visit_Rule(self, rule: Rule) -> bool: + if rule in self.visited: + return False + self.visited.add(rule) + if self.visit(rule.rhs): + self.nullables.add(rule) + return rule in self.nullables + + def visit_Rhs(self, rhs: Rhs) -> bool: + for alt in rhs.alts: + if self.visit(alt): + return True + return False + + def visit_Alt(self, alt: Alt) -> bool: + for item in alt.items: + if not self.visit(item): + return False + return True + + def visit_Forced(self, force: Forced) -> bool: + return True + + def visit_LookAhead(self, lookahead: Lookahead) -> bool: + return True + + def visit_Opt(self, opt: Opt) -> bool: + return True + + def visit_Repeat0(self, repeat: Repeat0) -> bool: + return True + + def visit_Repeat1(self, repeat: Repeat1) -> bool: + return False + + def visit_Gather(self, gather: Gather) -> bool: + return False + + def visit_Cut(self, cut: Cut) -> bool: + return False + + def visit_Group(self, group: Group) -> bool: + return self.visit(group.rhs) + + def visit_NamedItem(self, item: NamedItem) -> bool: + if self.visit(item.item): + self.nullables.add(item) + return item in self.nullables + + def visit_NameLeaf(self, node: NameLeaf) -> bool: + if node.value in self.rules: + return self.visit(self.rules[node.value]) + # Token or unknown; never empty. + return False + + def visit_StringLeaf(self, node: StringLeaf) -> bool: + # The string token '' is considered empty. + return not node.value + + +def compute_nullables(rules: Dict[str, Rule]) -> Set[Any]: """Compute which rules in a grammar are nullable. Thanks to TatSu (tatsu/leftrec.py) for inspiration. """ + nullable_visitor = NullableVisitor(rules) for rule in rules.values(): - rule.nullable_visit(rules) + nullable_visitor.visit(rule) + return nullable_visitor.nullables + + +class InitialNamesVisitor(GrammarVisitor): + def __init__(self, rules: Dict[str, Rule]) -> None: + self.rules = rules + self.nullables = compute_nullables(rules) + + def generic_visit(self, node: Iterable[Any], *args: Any, **kwargs: Any) -> Set[Any]: + names: Set[str] = set() + for value in node: + if isinstance(value, list): + for item in value: + names |= self.visit(item, *args, **kwargs) + else: + names |= self.visit(value, *args, **kwargs) + return names + + def visit_Alt(self, alt: Alt) -> Set[Any]: + names: Set[str] = set() + for item in alt.items: + names |= self.visit(item) + if item not in self.nullables: + break + return names + + def visit_Forced(self, force: Forced) -> Set[Any]: + return set() + + def visit_LookAhead(self, lookahead: Lookahead) -> Set[Any]: + return set() + + def visit_Cut(self, cut: Cut) -> Set[Any]: + return set() + + def visit_NameLeaf(self, node: NameLeaf) -> Set[Any]: + return {node.value} + + def visit_StringLeaf(self, node: StringLeaf) -> Set[Any]: + return set() def compute_left_recursives( @@ -207,10 +372,11 @@ def make_first_graph(rules: Dict[str, Rule]) -> Dict[str, AbstractSet[str]]: Note that this requires the nullable flags to have been computed. """ + initial_name_visitor = InitialNamesVisitor(rules) graph = {} vertices: Set[str] = set() for rulename, rhs in rules.items(): - graph[rulename] = names = rhs.initial_names() + graph[rulename] = names = initial_name_visitor.visit(rhs) vertices |= names for vertex in vertices: graph.setdefault(vertex, set()) diff --git a/Tools/peg_generator/pegen/python_generator.py b/Tools/peg_generator/pegen/python_generator.py index 201bf2baa80..7aa730ae1c9 100644 --- a/Tools/peg_generator/pegen/python_generator.py +++ b/Tools/peg_generator/pegen/python_generator.py @@ -95,8 +95,6 @@ class PythonCallMakerVisitor(GrammarVisitor): def __init__(self, parser_generator: ParserGenerator): self.gen = parser_generator self.cache: Dict[Any, Any] = {} - self.keywords: Set[str] = set() - self.soft_keywords: Set[str] = set() def visit_NameLeaf(self, node: NameLeaf) -> Tuple[Optional[str], str]: name = node.value @@ -111,12 +109,6 @@ class PythonCallMakerVisitor(GrammarVisitor): return name, f"self.{name}()" def visit_StringLeaf(self, node: StringLeaf) -> Tuple[str, str]: - val = ast.literal_eval(node.value) - if re.match(r"[a-zA-Z_]\w*\Z", val): # This is a keyword - if node.value.endswith("'"): - self.keywords.add(val) - else: - self.soft_keywords.add(val) return "literal", f"self.expect({node.value})" def visit_Rhs(self, node: Rhs) -> Tuple[Optional[str], str]: @@ -125,7 +117,7 @@ class PythonCallMakerVisitor(GrammarVisitor): if len(node.alts) == 1 and len(node.alts[0].items) == 1: self.cache[node] = self.visit(node.alts[0].items[0]) else: - name = self.gen.name_node(node) + name = self.gen.artifical_rule_from_rhs(node) self.cache[node] = name, f"self.{name}()" return self.cache[node] @@ -163,21 +155,21 @@ class PythonCallMakerVisitor(GrammarVisitor): def visit_Repeat0(self, node: Repeat0) -> Tuple[str, str]: if node in self.cache: return self.cache[node] - name = self.gen.name_loop(node.node, False) + name = self.gen.artificial_rule_from_repeat(node.node, False) self.cache[node] = name, f"self.{name}()," # Also a trailing comma! return self.cache[node] def visit_Repeat1(self, node: Repeat1) -> Tuple[str, str]: if node in self.cache: return self.cache[node] - name = self.gen.name_loop(node.node, True) + name = self.gen.artificial_rule_from_repeat(node.node, True) self.cache[node] = name, f"self.{name}()" # But no trailing comma here! return self.cache[node] def visit_Gather(self, node: Gather) -> Tuple[str, str]: if node in self.cache: return self.cache[node] - name = self.gen.name_gather(node) + name = self.gen.artifical_rule_from_gather(node) self.cache[node] = name, f"self.{name}()" # No trailing comma here either! return self.cache[node] @@ -219,6 +211,7 @@ class PythonParserGenerator(ParserGenerator, GrammarVisitor): ) def generate(self, filename: str) -> None: + self.collect_rules() header = self.grammar.metas.get("header", MODULE_PREFIX) if header is not None: self.print(header.rstrip("\n").format(filename=filename)) @@ -228,17 +221,15 @@ class PythonParserGenerator(ParserGenerator, GrammarVisitor): cls_name = self.grammar.metas.get("class", "GeneratedParser") self.print("# Keywords and soft keywords are listed at the end of the parser definition.") self.print(f"class {cls_name}(Parser):") - while self.todo: - for rulename, rule in list(self.todo.items()): - del self.todo[rulename] - self.print() - with self.indent(): - self.visit(rule) + for rule in self.all_rules.values(): + self.print() + with self.indent(): + self.visit(rule) self.print() with self.indent(): - self.print(f"KEYWORDS = {tuple(self.callmakervisitor.keywords)}") - self.print(f"SOFT_KEYWORDS = {tuple(self.callmakervisitor.soft_keywords)}") + self.print(f"KEYWORDS = {tuple(self.keywords)}") + self.print(f"SOFT_KEYWORDS = {tuple(self.soft_keywords)}") trailer = self.grammar.metas.get("trailer", MODULE_SUFFIX.format(class_name=cls_name)) if trailer is not None: @@ -270,8 +261,6 @@ class PythonParserGenerator(ParserGenerator, GrammarVisitor): self.print(f"def {node.name}(self) -> Optional[{node_type}]:") with self.indent(): self.print(f"# {node.name}: {rhs}") - if node.nullable: - self.print(f"# nullable={node.nullable}") self.print("mark = self._mark()") if self.alts_uses_locations(node.rhs.alts): self.print("tok = self._tokenizer.peek()") diff --git a/Tools/peg_generator/pegen/testutil.py b/Tools/peg_generator/pegen/testutil.py index e0928a4af70..8e5dbc5cdbb 100644 --- a/Tools/peg_generator/pegen/testutil.py +++ b/Tools/peg_generator/pegen/testutil.py @@ -4,10 +4,9 @@ import os import pathlib import sys import textwrap -import tokenize import token - -from typing import Any, cast, Dict, IO, Type, Final +import tokenize +from typing import IO, Any, Dict, Final, Type, cast from pegen.build import compile_c_extension from pegen.c_generator import CParserGenerator diff --git a/Tools/peg_generator/pegen/validator.py b/Tools/peg_generator/pegen/validator.py index e7d6980d8b2..c48a01eedf5 100644 --- a/Tools/peg_generator/pegen/validator.py +++ b/Tools/peg_generator/pegen/validator.py @@ -1,12 +1,7 @@ from typing import Optional from pegen import grammar -from pegen.grammar import ( - Alt, - GrammarVisitor, - Rule, - Rhs, -) +from pegen.grammar import Alt, GrammarVisitor, Rhs, Rule class ValidationError(Exception):