Add small validator utility for PEG grammars (GH-23519)
This commit is contained in:
parent
7865f516f3
commit
3bcc4ead3f
|
@ -0,0 +1,51 @@
|
|||
import unittest
|
||||
from test import test_tools
|
||||
|
||||
test_tools.skip_if_missing('peg_generator')
|
||||
with test_tools.imports_under_tool('peg_generator'):
|
||||
from pegen.grammar_parser import GeneratedParser as GrammarParser
|
||||
from pegen.validator import SubRuleValidator, ValidationError
|
||||
from pegen.testutil import parse_string
|
||||
from pegen.grammar import Grammar
|
||||
|
||||
|
||||
class TestPegen(unittest.TestCase):
|
||||
def test_rule_with_no_collision(self) -> None:
|
||||
grammar_source = """
|
||||
start: bad_rule
|
||||
sum:
|
||||
| NAME '-' NAME
|
||||
| NAME '+' NAME
|
||||
"""
|
||||
grammar: Grammar = parse_string(grammar_source, GrammarParser)
|
||||
validator = SubRuleValidator(grammar)
|
||||
for rule_name, rule in grammar.rules.items():
|
||||
validator.validate_rule(rule_name, rule)
|
||||
|
||||
def test_rule_with_simple_collision(self) -> None:
|
||||
grammar_source = """
|
||||
start: bad_rule
|
||||
sum:
|
||||
| NAME '+' NAME
|
||||
| NAME '+' NAME ';'
|
||||
"""
|
||||
grammar: Grammar = parse_string(grammar_source, GrammarParser)
|
||||
validator = SubRuleValidator(grammar)
|
||||
with self.assertRaises(ValidationError):
|
||||
for rule_name, rule in grammar.rules.items():
|
||||
validator.validate_rule(rule_name, rule)
|
||||
|
||||
def test_rule_with_collision_after_some_other_rules(self) -> None:
|
||||
grammar_source = """
|
||||
start: bad_rule
|
||||
sum:
|
||||
| NAME '+' NAME
|
||||
| NAME '*' NAME ';'
|
||||
| NAME '-' NAME
|
||||
| NAME '+' NAME ';'
|
||||
"""
|
||||
grammar: Grammar = parse_string(grammar_source, GrammarParser)
|
||||
validator = SubRuleValidator(grammar)
|
||||
with self.assertRaises(ValidationError):
|
||||
for rule_name, rule in grammar.rules.items():
|
||||
validator.validate_rule(rule_name, rule)
|
|
@ -14,6 +14,7 @@ import traceback
|
|||
from typing import Tuple
|
||||
|
||||
from pegen.build import Grammar, Parser, Tokenizer, ParserGenerator
|
||||
from pegen.validator import validate_grammar
|
||||
|
||||
|
||||
def generate_c_code(
|
||||
|
@ -128,6 +129,8 @@ def main() -> None:
|
|||
grammar, parser, tokenizer, gen = args.func(args)
|
||||
t1 = time.time()
|
||||
|
||||
validate_grammar(grammar)
|
||||
|
||||
if not args.quiet:
|
||||
if args.verbose:
|
||||
print("Raw Grammar:")
|
||||
|
|
|
@ -0,0 +1,52 @@
|
|||
from pegen import grammar
|
||||
from pegen.grammar import (
|
||||
Alt,
|
||||
Cut,
|
||||
Gather,
|
||||
GrammarVisitor,
|
||||
Group,
|
||||
Lookahead,
|
||||
NamedItem,
|
||||
NameLeaf,
|
||||
NegativeLookahead,
|
||||
Opt,
|
||||
PositiveLookahead,
|
||||
Repeat0,
|
||||
Repeat1,
|
||||
Rhs,
|
||||
Rule,
|
||||
StringLeaf,
|
||||
)
|
||||
|
||||
class ValidationError(Exception):
|
||||
pass
|
||||
|
||||
class GrammarValidator(GrammarVisitor):
|
||||
def __init__(self, grammar: grammar.Grammar):
|
||||
self.grammar = grammar
|
||||
self.rulename = None
|
||||
|
||||
def validate_rule(self, rulename: str, node: Rule):
|
||||
self.rulename = rulename
|
||||
self.visit(node)
|
||||
self.rulename = None
|
||||
|
||||
|
||||
class SubRuleValidator(GrammarValidator):
|
||||
def visit_Rhs(self, node: Rule):
|
||||
for index, alt in enumerate(node.alts):
|
||||
alts_to_consider = node.alts[index+1:]
|
||||
for other_alt in alts_to_consider:
|
||||
self.check_intersection(alt, other_alt)
|
||||
|
||||
def check_intersection(self, first_alt: Alt, second_alt: Alt) -> bool:
|
||||
if str(second_alt).startswith(str(first_alt)):
|
||||
raise ValidationError(
|
||||
f"In {self.rulename} there is an alternative that will "
|
||||
f"never be visited:\n{second_alt}")
|
||||
|
||||
def validate_grammar(the_grammar: grammar.Grammar):
|
||||
for validator_cls in GrammarValidator.__subclasses__():
|
||||
validator = validator_cls(the_grammar)
|
||||
for rule_name, rule in the_grammar.rules.items():
|
||||
validator.validate_rule(rule_name, rule)
|
Loading…
Reference in New Issue