Add small validator utility for PEG grammars (GH-23519)

This commit is contained in:
Pablo Galindo 2020-12-26 19:11:29 +00:00 committed by GitHub
parent 7865f516f3
commit 3bcc4ead3f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 106 additions and 0 deletions

View File

@ -0,0 +1,51 @@
import unittest
from test import test_tools
test_tools.skip_if_missing('peg_generator')
with test_tools.imports_under_tool('peg_generator'):
from pegen.grammar_parser import GeneratedParser as GrammarParser
from pegen.validator import SubRuleValidator, ValidationError
from pegen.testutil import parse_string
from pegen.grammar import Grammar
class TestPegen(unittest.TestCase):
def test_rule_with_no_collision(self) -> None:
grammar_source = """
start: bad_rule
sum:
| NAME '-' NAME
| NAME '+' NAME
"""
grammar: Grammar = parse_string(grammar_source, GrammarParser)
validator = SubRuleValidator(grammar)
for rule_name, rule in grammar.rules.items():
validator.validate_rule(rule_name, rule)
def test_rule_with_simple_collision(self) -> None:
grammar_source = """
start: bad_rule
sum:
| NAME '+' NAME
| NAME '+' NAME ';'
"""
grammar: Grammar = parse_string(grammar_source, GrammarParser)
validator = SubRuleValidator(grammar)
with self.assertRaises(ValidationError):
for rule_name, rule in grammar.rules.items():
validator.validate_rule(rule_name, rule)
def test_rule_with_collision_after_some_other_rules(self) -> None:
grammar_source = """
start: bad_rule
sum:
| NAME '+' NAME
| NAME '*' NAME ';'
| NAME '-' NAME
| NAME '+' NAME ';'
"""
grammar: Grammar = parse_string(grammar_source, GrammarParser)
validator = SubRuleValidator(grammar)
with self.assertRaises(ValidationError):
for rule_name, rule in grammar.rules.items():
validator.validate_rule(rule_name, rule)

View File

@ -14,6 +14,7 @@ import traceback
from typing import Tuple from typing import Tuple
from pegen.build import Grammar, Parser, Tokenizer, ParserGenerator from pegen.build import Grammar, Parser, Tokenizer, ParserGenerator
from pegen.validator import validate_grammar
def generate_c_code( def generate_c_code(
@ -128,6 +129,8 @@ def main() -> None:
grammar, parser, tokenizer, gen = args.func(args) grammar, parser, tokenizer, gen = args.func(args)
t1 = time.time() t1 = time.time()
validate_grammar(grammar)
if not args.quiet: if not args.quiet:
if args.verbose: if args.verbose:
print("Raw Grammar:") print("Raw Grammar:")

View File

@ -0,0 +1,52 @@
from pegen import grammar
from pegen.grammar import (
Alt,
Cut,
Gather,
GrammarVisitor,
Group,
Lookahead,
NamedItem,
NameLeaf,
NegativeLookahead,
Opt,
PositiveLookahead,
Repeat0,
Repeat1,
Rhs,
Rule,
StringLeaf,
)
class ValidationError(Exception):
pass
class GrammarValidator(GrammarVisitor):
def __init__(self, grammar: grammar.Grammar):
self.grammar = grammar
self.rulename = None
def validate_rule(self, rulename: str, node: Rule):
self.rulename = rulename
self.visit(node)
self.rulename = None
class SubRuleValidator(GrammarValidator):
def visit_Rhs(self, node: Rule):
for index, alt in enumerate(node.alts):
alts_to_consider = node.alts[index+1:]
for other_alt in alts_to_consider:
self.check_intersection(alt, other_alt)
def check_intersection(self, first_alt: Alt, second_alt: Alt) -> bool:
if str(second_alt).startswith(str(first_alt)):
raise ValidationError(
f"In {self.rulename} there is an alternative that will "
f"never be visited:\n{second_alt}")
def validate_grammar(the_grammar: grammar.Grammar):
for validator_cls in GrammarValidator.__subclasses__():
validator = validator_cls(the_grammar)
for rule_name, rule in the_grammar.rules.items():
validator.validate_rule(rule_name, rule)