diff --git a/Lib/test/test_peg_generator/test_grammar_validator.py b/Lib/test/test_peg_generator/test_grammar_validator.py new file mode 100644 index 00000000000..2e72ff8df28 --- /dev/null +++ b/Lib/test/test_peg_generator/test_grammar_validator.py @@ -0,0 +1,51 @@ +import unittest +from test import test_tools + +test_tools.skip_if_missing('peg_generator') +with test_tools.imports_under_tool('peg_generator'): + from pegen.grammar_parser import GeneratedParser as GrammarParser + from pegen.validator import SubRuleValidator, ValidationError + from pegen.testutil import parse_string + from pegen.grammar import Grammar + + +class TestPegen(unittest.TestCase): + def test_rule_with_no_collision(self) -> None: + grammar_source = """ + start: bad_rule + sum: + | NAME '-' NAME + | NAME '+' NAME + """ + grammar: Grammar = parse_string(grammar_source, GrammarParser) + validator = SubRuleValidator(grammar) + for rule_name, rule in grammar.rules.items(): + validator.validate_rule(rule_name, rule) + + def test_rule_with_simple_collision(self) -> None: + grammar_source = """ + start: bad_rule + sum: + | NAME '+' NAME + | NAME '+' NAME ';' + """ + grammar: Grammar = parse_string(grammar_source, GrammarParser) + validator = SubRuleValidator(grammar) + with self.assertRaises(ValidationError): + for rule_name, rule in grammar.rules.items(): + validator.validate_rule(rule_name, rule) + + def test_rule_with_collision_after_some_other_rules(self) -> None: + grammar_source = """ + start: bad_rule + sum: + | NAME '+' NAME + | NAME '*' NAME ';' + | NAME '-' NAME + | NAME '+' NAME ';' + """ + grammar: Grammar = parse_string(grammar_source, GrammarParser) + validator = SubRuleValidator(grammar) + with self.assertRaises(ValidationError): + for rule_name, rule in grammar.rules.items(): + validator.validate_rule(rule_name, rule) diff --git a/Tools/peg_generator/pegen/__main__.py b/Tools/peg_generator/pegen/__main__.py index 1dcbaad1c38..c0f3b687587 100755 --- a/Tools/peg_generator/pegen/__main__.py +++ b/Tools/peg_generator/pegen/__main__.py @@ -14,6 +14,7 @@ import traceback from typing import Tuple from pegen.build import Grammar, Parser, Tokenizer, ParserGenerator +from pegen.validator import validate_grammar def generate_c_code( @@ -128,6 +129,8 @@ def main() -> None: grammar, parser, tokenizer, gen = args.func(args) t1 = time.time() + validate_grammar(grammar) + if not args.quiet: if args.verbose: print("Raw Grammar:") diff --git a/Tools/peg_generator/pegen/validator.py b/Tools/peg_generator/pegen/validator.py new file mode 100644 index 00000000000..0e3dd41cca4 --- /dev/null +++ b/Tools/peg_generator/pegen/validator.py @@ -0,0 +1,52 @@ +from pegen import grammar +from pegen.grammar import ( + Alt, + Cut, + Gather, + GrammarVisitor, + Group, + Lookahead, + NamedItem, + NameLeaf, + NegativeLookahead, + Opt, + PositiveLookahead, + Repeat0, + Repeat1, + Rhs, + Rule, + StringLeaf, +) + +class ValidationError(Exception): + pass + +class GrammarValidator(GrammarVisitor): + def __init__(self, grammar: grammar.Grammar): + self.grammar = grammar + self.rulename = None + + def validate_rule(self, rulename: str, node: Rule): + self.rulename = rulename + self.visit(node) + self.rulename = None + + +class SubRuleValidator(GrammarValidator): + def visit_Rhs(self, node: Rule): + for index, alt in enumerate(node.alts): + alts_to_consider = node.alts[index+1:] + for other_alt in alts_to_consider: + self.check_intersection(alt, other_alt) + + def check_intersection(self, first_alt: Alt, second_alt: Alt) -> bool: + if str(second_alt).startswith(str(first_alt)): + raise ValidationError( + f"In {self.rulename} there is an alternative that will " + f"never be visited:\n{second_alt}") + +def validate_grammar(the_grammar: grammar.Grammar): + for validator_cls in GrammarValidator.__subclasses__(): + validator = validator_cls(the_grammar) + for rule_name, rule in the_grammar.rules.items(): + validator.validate_rule(rule_name, rule)