226 lines
6.9 KiB
Python
226 lines
6.9 KiB
Python
import unittest
|
|
|
|
from test import test_tools
|
|
from typing import Dict, Set
|
|
|
|
test_tools.skip_if_missing('peg_generator')
|
|
with test_tools.imports_under_tool('peg_generator'):
|
|
from pegen.grammar_parser import GeneratedParser as GrammarParser
|
|
from pegen.testutil import parse_string
|
|
from pegen.first_sets import FirstSetCalculator
|
|
from pegen.grammar import Grammar
|
|
|
|
|
|
class TestFirstSets(unittest.TestCase):
|
|
def calculate_first_sets(self, grammar_source: str) -> Dict[str, Set[str]]:
|
|
grammar: Grammar = parse_string(grammar_source, GrammarParser)
|
|
return FirstSetCalculator(grammar.rules).calculate()
|
|
|
|
def test_alternatives(self) -> None:
|
|
grammar = """
|
|
start: expr NEWLINE? ENDMARKER
|
|
expr: A | B
|
|
A: 'a' | '-'
|
|
B: 'b' | '+'
|
|
"""
|
|
self.assertEqual(self.calculate_first_sets(grammar), {
|
|
"A": {"'a'", "'-'"},
|
|
"B": {"'+'", "'b'"},
|
|
"expr": {"'+'", "'a'", "'b'", "'-'"},
|
|
"start": {"'+'", "'a'", "'b'", "'-'"},
|
|
})
|
|
|
|
def test_optionals(self) -> None:
|
|
grammar = """
|
|
start: expr NEWLINE
|
|
expr: ['a'] ['b'] 'c'
|
|
"""
|
|
self.assertEqual(self.calculate_first_sets(grammar), {
|
|
"expr": {"'c'", "'a'", "'b'"},
|
|
"start": {"'c'", "'a'", "'b'"},
|
|
})
|
|
|
|
def test_repeat_with_separator(self) -> None:
|
|
grammar = """
|
|
start: ','.thing+ NEWLINE
|
|
thing: NUMBER
|
|
"""
|
|
self.assertEqual(self.calculate_first_sets(grammar), {"thing": {"NUMBER"}, "start": {"NUMBER"}})
|
|
|
|
def test_optional_operator(self) -> None:
|
|
grammar = """
|
|
start: sum NEWLINE
|
|
sum: (term)? 'b'
|
|
term: NUMBER
|
|
"""
|
|
self.assertEqual(self.calculate_first_sets(grammar), {
|
|
"term": {"NUMBER"},
|
|
"sum": {"NUMBER", "'b'"},
|
|
"start": {"'b'", "NUMBER"},
|
|
})
|
|
|
|
def test_optional_literal(self) -> None:
|
|
grammar = """
|
|
start: sum NEWLINE
|
|
sum: '+' ? term
|
|
term: NUMBER
|
|
"""
|
|
self.assertEqual(self.calculate_first_sets(grammar), {
|
|
"term": {"NUMBER"},
|
|
"sum": {"'+'", "NUMBER"},
|
|
"start": {"'+'", "NUMBER"},
|
|
})
|
|
|
|
def test_optional_after(self) -> None:
|
|
grammar = """
|
|
start: term NEWLINE
|
|
term: NUMBER ['+']
|
|
"""
|
|
self.assertEqual(self.calculate_first_sets(grammar), {"term": {"NUMBER"}, "start": {"NUMBER"}})
|
|
|
|
def test_optional_before(self) -> None:
|
|
grammar = """
|
|
start: term NEWLINE
|
|
term: ['+'] NUMBER
|
|
"""
|
|
self.assertEqual(self.calculate_first_sets(grammar), {"term": {"NUMBER", "'+'"}, "start": {"NUMBER", "'+'"}})
|
|
|
|
def test_repeat_0(self) -> None:
|
|
grammar = """
|
|
start: thing* "+" NEWLINE
|
|
thing: NUMBER
|
|
"""
|
|
self.assertEqual(self.calculate_first_sets(grammar), {"thing": {"NUMBER"}, "start": {'"+"', "NUMBER"}})
|
|
|
|
def test_repeat_0_with_group(self) -> None:
|
|
grammar = """
|
|
start: ('+' '-')* term NEWLINE
|
|
term: NUMBER
|
|
"""
|
|
self.assertEqual(self.calculate_first_sets(grammar), {"term": {"NUMBER"}, "start": {"'+'", "NUMBER"}})
|
|
|
|
def test_repeat_1(self) -> None:
|
|
grammar = """
|
|
start: thing+ '-' NEWLINE
|
|
thing: NUMBER
|
|
"""
|
|
self.assertEqual(self.calculate_first_sets(grammar), {"thing": {"NUMBER"}, "start": {"NUMBER"}})
|
|
|
|
def test_repeat_1_with_group(self) -> None:
|
|
grammar = """
|
|
start: ('+' term)+ term NEWLINE
|
|
term: NUMBER
|
|
"""
|
|
self.assertEqual(self.calculate_first_sets(grammar), {"term": {"NUMBER"}, "start": {"'+'"}})
|
|
|
|
def test_gather(self) -> None:
|
|
grammar = """
|
|
start: ','.thing+ NEWLINE
|
|
thing: NUMBER
|
|
"""
|
|
self.assertEqual(self.calculate_first_sets(grammar), {"thing": {"NUMBER"}, "start": {"NUMBER"}})
|
|
|
|
def test_positive_lookahead(self) -> None:
|
|
grammar = """
|
|
start: expr NEWLINE
|
|
expr: &'a' opt
|
|
opt: 'a' | 'b' | 'c'
|
|
"""
|
|
self.assertEqual(self.calculate_first_sets(grammar), {
|
|
"expr": {"'a'"},
|
|
"start": {"'a'"},
|
|
"opt": {"'b'", "'c'", "'a'"},
|
|
})
|
|
|
|
def test_negative_lookahead(self) -> None:
|
|
grammar = """
|
|
start: expr NEWLINE
|
|
expr: !'a' opt
|
|
opt: 'a' | 'b' | 'c'
|
|
"""
|
|
self.assertEqual(self.calculate_first_sets(grammar), {
|
|
"opt": {"'b'", "'a'", "'c'"},
|
|
"expr": {"'b'", "'c'"},
|
|
"start": {"'b'", "'c'"},
|
|
})
|
|
|
|
def test_left_recursion(self) -> None:
|
|
grammar = """
|
|
start: expr NEWLINE
|
|
expr: ('-' term | expr '+' term | term)
|
|
term: NUMBER
|
|
foo: 'foo'
|
|
bar: 'bar'
|
|
baz: 'baz'
|
|
"""
|
|
self.assertEqual(self.calculate_first_sets(grammar), {
|
|
"expr": {"NUMBER", "'-'"},
|
|
"term": {"NUMBER"},
|
|
"start": {"NUMBER", "'-'"},
|
|
"foo": {"'foo'"},
|
|
"bar": {"'bar'"},
|
|
"baz": {"'baz'"},
|
|
})
|
|
|
|
def test_advance_left_recursion(self) -> None:
|
|
grammar = """
|
|
start: NUMBER | sign start
|
|
sign: ['-']
|
|
"""
|
|
self.assertEqual(self.calculate_first_sets(grammar), {"sign": {"'-'", ""}, "start": {"'-'", "NUMBER"}})
|
|
|
|
def test_mutual_left_recursion(self) -> None:
|
|
grammar = """
|
|
start: foo 'E'
|
|
foo: bar 'A' | 'B'
|
|
bar: foo 'C' | 'D'
|
|
"""
|
|
self.assertEqual(self.calculate_first_sets(grammar), {
|
|
"foo": {"'D'", "'B'"},
|
|
"bar": {"'D'"},
|
|
"start": {"'D'", "'B'"},
|
|
})
|
|
|
|
def test_nasty_left_recursion(self) -> None:
|
|
# TODO: Validate this
|
|
grammar = """
|
|
start: target '='
|
|
target: maybe '+' | NAME
|
|
maybe: maybe '-' | target
|
|
"""
|
|
self.assertEqual(self.calculate_first_sets(grammar), {"maybe": set(), "target": {"NAME"}, "start": {"NAME"}})
|
|
|
|
def test_nullable_rule(self) -> None:
|
|
grammar = """
|
|
start: sign thing $
|
|
sign: ['-']
|
|
thing: NUMBER
|
|
"""
|
|
self.assertEqual(self.calculate_first_sets(grammar), {
|
|
"sign": {"", "'-'"},
|
|
"thing": {"NUMBER"},
|
|
"start": {"NUMBER", "'-'"},
|
|
})
|
|
|
|
def test_epsilon_production_in_start_rule(self) -> None:
|
|
grammar = """
|
|
start: ['-'] $
|
|
"""
|
|
self.assertEqual(self.calculate_first_sets(grammar), {"start": {"ENDMARKER", "'-'"}})
|
|
|
|
def test_multiple_nullable_rules(self) -> None:
|
|
grammar = """
|
|
start: sign thing other another $
|
|
sign: ['-']
|
|
thing: ['+']
|
|
other: '*'
|
|
another: '/'
|
|
"""
|
|
self.assertEqual(self.calculate_first_sets(grammar), {
|
|
"sign": {"", "'-'"},
|
|
"thing": {"'+'", ""},
|
|
"start": {"'+'", "'-'", "'*'"},
|
|
"other": {"'*'"},
|
|
"another": {"'/'"},
|
|
})
|