Add small validator utility for PEG grammars (GH-23519)
This commit is contained in:
parent
7865f516f3
commit
3bcc4ead3f
|
@ -0,0 +1,51 @@
|
||||||
|
import unittest
|
||||||
|
from test import test_tools
|
||||||
|
|
||||||
|
test_tools.skip_if_missing('peg_generator')
|
||||||
|
with test_tools.imports_under_tool('peg_generator'):
|
||||||
|
from pegen.grammar_parser import GeneratedParser as GrammarParser
|
||||||
|
from pegen.validator import SubRuleValidator, ValidationError
|
||||||
|
from pegen.testutil import parse_string
|
||||||
|
from pegen.grammar import Grammar
|
||||||
|
|
||||||
|
|
||||||
|
class TestPegen(unittest.TestCase):
|
||||||
|
def test_rule_with_no_collision(self) -> None:
|
||||||
|
grammar_source = """
|
||||||
|
start: bad_rule
|
||||||
|
sum:
|
||||||
|
| NAME '-' NAME
|
||||||
|
| NAME '+' NAME
|
||||||
|
"""
|
||||||
|
grammar: Grammar = parse_string(grammar_source, GrammarParser)
|
||||||
|
validator = SubRuleValidator(grammar)
|
||||||
|
for rule_name, rule in grammar.rules.items():
|
||||||
|
validator.validate_rule(rule_name, rule)
|
||||||
|
|
||||||
|
def test_rule_with_simple_collision(self) -> None:
|
||||||
|
grammar_source = """
|
||||||
|
start: bad_rule
|
||||||
|
sum:
|
||||||
|
| NAME '+' NAME
|
||||||
|
| NAME '+' NAME ';'
|
||||||
|
"""
|
||||||
|
grammar: Grammar = parse_string(grammar_source, GrammarParser)
|
||||||
|
validator = SubRuleValidator(grammar)
|
||||||
|
with self.assertRaises(ValidationError):
|
||||||
|
for rule_name, rule in grammar.rules.items():
|
||||||
|
validator.validate_rule(rule_name, rule)
|
||||||
|
|
||||||
|
def test_rule_with_collision_after_some_other_rules(self) -> None:
|
||||||
|
grammar_source = """
|
||||||
|
start: bad_rule
|
||||||
|
sum:
|
||||||
|
| NAME '+' NAME
|
||||||
|
| NAME '*' NAME ';'
|
||||||
|
| NAME '-' NAME
|
||||||
|
| NAME '+' NAME ';'
|
||||||
|
"""
|
||||||
|
grammar: Grammar = parse_string(grammar_source, GrammarParser)
|
||||||
|
validator = SubRuleValidator(grammar)
|
||||||
|
with self.assertRaises(ValidationError):
|
||||||
|
for rule_name, rule in grammar.rules.items():
|
||||||
|
validator.validate_rule(rule_name, rule)
|
|
@ -14,6 +14,7 @@ import traceback
|
||||||
from typing import Tuple
|
from typing import Tuple
|
||||||
|
|
||||||
from pegen.build import Grammar, Parser, Tokenizer, ParserGenerator
|
from pegen.build import Grammar, Parser, Tokenizer, ParserGenerator
|
||||||
|
from pegen.validator import validate_grammar
|
||||||
|
|
||||||
|
|
||||||
def generate_c_code(
|
def generate_c_code(
|
||||||
|
@ -128,6 +129,8 @@ def main() -> None:
|
||||||
grammar, parser, tokenizer, gen = args.func(args)
|
grammar, parser, tokenizer, gen = args.func(args)
|
||||||
t1 = time.time()
|
t1 = time.time()
|
||||||
|
|
||||||
|
validate_grammar(grammar)
|
||||||
|
|
||||||
if not args.quiet:
|
if not args.quiet:
|
||||||
if args.verbose:
|
if args.verbose:
|
||||||
print("Raw Grammar:")
|
print("Raw Grammar:")
|
||||||
|
|
|
@ -0,0 +1,52 @@
|
||||||
|
from pegen import grammar
|
||||||
|
from pegen.grammar import (
|
||||||
|
Alt,
|
||||||
|
Cut,
|
||||||
|
Gather,
|
||||||
|
GrammarVisitor,
|
||||||
|
Group,
|
||||||
|
Lookahead,
|
||||||
|
NamedItem,
|
||||||
|
NameLeaf,
|
||||||
|
NegativeLookahead,
|
||||||
|
Opt,
|
||||||
|
PositiveLookahead,
|
||||||
|
Repeat0,
|
||||||
|
Repeat1,
|
||||||
|
Rhs,
|
||||||
|
Rule,
|
||||||
|
StringLeaf,
|
||||||
|
)
|
||||||
|
|
||||||
|
class ValidationError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class GrammarValidator(GrammarVisitor):
|
||||||
|
def __init__(self, grammar: grammar.Grammar):
|
||||||
|
self.grammar = grammar
|
||||||
|
self.rulename = None
|
||||||
|
|
||||||
|
def validate_rule(self, rulename: str, node: Rule):
|
||||||
|
self.rulename = rulename
|
||||||
|
self.visit(node)
|
||||||
|
self.rulename = None
|
||||||
|
|
||||||
|
|
||||||
|
class SubRuleValidator(GrammarValidator):
|
||||||
|
def visit_Rhs(self, node: Rule):
|
||||||
|
for index, alt in enumerate(node.alts):
|
||||||
|
alts_to_consider = node.alts[index+1:]
|
||||||
|
for other_alt in alts_to_consider:
|
||||||
|
self.check_intersection(alt, other_alt)
|
||||||
|
|
||||||
|
def check_intersection(self, first_alt: Alt, second_alt: Alt) -> bool:
|
||||||
|
if str(second_alt).startswith(str(first_alt)):
|
||||||
|
raise ValidationError(
|
||||||
|
f"In {self.rulename} there is an alternative that will "
|
||||||
|
f"never be visited:\n{second_alt}")
|
||||||
|
|
||||||
|
def validate_grammar(the_grammar: grammar.Grammar):
|
||||||
|
for validator_cls in GrammarValidator.__subclasses__():
|
||||||
|
validator = validator_cls(the_grammar)
|
||||||
|
for rule_name, rule in the_grammar.rules.items():
|
||||||
|
validator.validate_rule(rule_name, rule)
|
Loading…
Reference in New Issue