2021-09-05 10:58:52 -03:00
|
|
|
import ast
|
2020-04-22 19:29:27 -03:00
|
|
|
import contextlib
|
2021-09-05 10:58:52 -03:00
|
|
|
import re
|
2020-04-22 19:29:27 -03:00
|
|
|
from abc import abstractmethod
|
2021-09-05 10:58:52 -03:00
|
|
|
from typing import (
|
|
|
|
IO,
|
|
|
|
AbstractSet,
|
|
|
|
Any,
|
|
|
|
Dict,
|
|
|
|
Iterable,
|
|
|
|
Iterator,
|
|
|
|
List,
|
|
|
|
Optional,
|
|
|
|
Set,
|
|
|
|
Text,
|
|
|
|
Tuple,
|
|
|
|
Union,
|
|
|
|
)
|
2020-04-22 19:29:27 -03:00
|
|
|
|
|
|
|
from pegen import sccutils
|
|
|
|
from pegen.grammar import (
|
|
|
|
Alt,
|
2021-09-05 10:58:52 -03:00
|
|
|
Cut,
|
|
|
|
Forced,
|
2021-08-12 13:37:30 -03:00
|
|
|
Gather,
|
|
|
|
Grammar,
|
|
|
|
GrammarError,
|
|
|
|
GrammarVisitor,
|
2021-09-05 10:58:52 -03:00
|
|
|
Group,
|
|
|
|
Lookahead,
|
2020-04-22 19:29:27 -03:00
|
|
|
NamedItem,
|
|
|
|
NameLeaf,
|
2021-09-05 10:58:52 -03:00
|
|
|
Opt,
|
2021-08-12 13:37:30 -03:00
|
|
|
Plain,
|
2021-09-05 10:58:52 -03:00
|
|
|
Repeat0,
|
|
|
|
Repeat1,
|
2021-08-12 13:37:30 -03:00
|
|
|
Rhs,
|
|
|
|
Rule,
|
2021-09-05 10:58:52 -03:00
|
|
|
StringLeaf,
|
2020-04-22 19:29:27 -03:00
|
|
|
)
|
|
|
|
|
|
|
|
|
2021-09-05 10:58:52 -03:00
|
|
|
class RuleCollectorVisitor(GrammarVisitor):
|
2024-05-28 04:53:32 -03:00
|
|
|
"""Visitor that invokes a provided callmaker visitor with just the NamedItem nodes"""
|
2021-09-05 10:58:52 -03:00
|
|
|
|
|
|
|
def __init__(self, rules: Dict[str, Rule], callmakervisitor: GrammarVisitor) -> None:
|
|
|
|
self.rulses = rules
|
|
|
|
self.callmaker = callmakervisitor
|
|
|
|
|
|
|
|
def visit_Rule(self, rule: Rule) -> None:
|
|
|
|
self.visit(rule.flatten())
|
|
|
|
|
|
|
|
def visit_NamedItem(self, item: NamedItem) -> None:
|
|
|
|
self.callmaker.visit(item)
|
|
|
|
|
|
|
|
|
|
|
|
class KeywordCollectorVisitor(GrammarVisitor):
|
|
|
|
"""Visitor that collects all the keywods and soft keywords in the Grammar"""
|
|
|
|
|
|
|
|
def __init__(self, gen: "ParserGenerator", keywords: Dict[str, int], soft_keywords: Set[str]):
|
|
|
|
self.generator = gen
|
|
|
|
self.keywords = keywords
|
|
|
|
self.soft_keywords = soft_keywords
|
|
|
|
|
|
|
|
def visit_StringLeaf(self, node: StringLeaf) -> None:
|
|
|
|
val = ast.literal_eval(node.value)
|
|
|
|
if re.match(r"[a-zA-Z_]\w*\Z", val): # This is a keyword
|
|
|
|
if node.value.endswith("'") and node.value not in self.keywords:
|
|
|
|
self.keywords[val] = self.generator.keyword_type()
|
|
|
|
else:
|
|
|
|
return self.soft_keywords.add(node.value.replace('"', ""))
|
|
|
|
|
|
|
|
|
2020-04-22 19:29:27 -03:00
|
|
|
class RuleCheckingVisitor(GrammarVisitor):
|
2021-08-12 13:37:30 -03:00
|
|
|
def __init__(self, rules: Dict[str, Rule], tokens: Set[str]):
|
2020-04-22 19:29:27 -03:00
|
|
|
self.rules = rules
|
2020-05-01 19:14:12 -03:00
|
|
|
self.tokens = tokens
|
2020-04-22 19:29:27 -03:00
|
|
|
|
|
|
|
def visit_NameLeaf(self, node: NameLeaf) -> None:
|
2021-08-12 13:37:30 -03:00
|
|
|
if node.value not in self.rules and node.value not in self.tokens:
|
2020-04-22 19:29:27 -03:00
|
|
|
raise GrammarError(f"Dangling reference to rule {node.value!r}")
|
|
|
|
|
2020-05-21 17:39:44 -03:00
|
|
|
def visit_NamedItem(self, node: NamedItem) -> None:
|
2020-05-10 01:34:50 -03:00
|
|
|
if node.name and node.name.startswith("_"):
|
|
|
|
raise GrammarError(f"Variable names cannot start with underscore: '{node.name}'")
|
|
|
|
self.visit(node.item)
|
|
|
|
|
2020-04-22 19:29:27 -03:00
|
|
|
|
|
|
|
class ParserGenerator:
|
|
|
|
callmakervisitor: GrammarVisitor
|
|
|
|
|
2021-08-12 13:37:30 -03:00
|
|
|
def __init__(self, grammar: Grammar, tokens: Set[str], file: Optional[IO[Text]]):
|
2020-04-22 19:29:27 -03:00
|
|
|
self.grammar = grammar
|
2020-05-01 19:14:12 -03:00
|
|
|
self.tokens = tokens
|
2021-09-05 10:58:52 -03:00
|
|
|
self.keywords: Dict[str, int] = {}
|
|
|
|
self.soft_keywords: Set[str] = set()
|
2020-04-22 19:29:27 -03:00
|
|
|
self.rules = grammar.rules
|
2020-05-10 01:34:50 -03:00
|
|
|
self.validate_rule_names()
|
2020-04-22 19:29:27 -03:00
|
|
|
if "trailer" not in grammar.metas and "start" not in self.rules:
|
|
|
|
raise GrammarError("Grammar without a trailer must have a 'start' rule")
|
2020-05-01 19:14:12 -03:00
|
|
|
checker = RuleCheckingVisitor(self.rules, self.tokens)
|
2020-04-22 19:29:27 -03:00
|
|
|
for rule in self.rules.values():
|
|
|
|
checker.visit(rule)
|
|
|
|
self.file = file
|
|
|
|
self.level = 0
|
|
|
|
self.first_graph, self.first_sccs = compute_left_recursives(self.rules)
|
|
|
|
self.counter = 0 # For name_rule()/name_loop()
|
|
|
|
self.keyword_counter = 499 # For keyword_type()
|
2021-09-05 10:58:52 -03:00
|
|
|
self.all_rules: Dict[str, Rule] = self.rules.copy() # Rules + temporal rules
|
2020-04-29 06:42:21 -03:00
|
|
|
self._local_variable_stack: List[List[str]] = []
|
|
|
|
|
2020-05-21 17:39:44 -03:00
|
|
|
def validate_rule_names(self) -> None:
|
2020-05-10 01:34:50 -03:00
|
|
|
for rule in self.rules:
|
|
|
|
if rule.startswith("_"):
|
|
|
|
raise GrammarError(f"Rule names cannot start with underscore: '{rule}'")
|
|
|
|
|
2020-04-29 06:42:21 -03:00
|
|
|
@contextlib.contextmanager
|
|
|
|
def local_variable_context(self) -> Iterator[None]:
|
|
|
|
self._local_variable_stack.append([])
|
|
|
|
yield
|
|
|
|
self._local_variable_stack.pop()
|
|
|
|
|
|
|
|
@property
|
|
|
|
def local_variable_names(self) -> List[str]:
|
|
|
|
return self._local_variable_stack[-1]
|
2020-04-22 19:29:27 -03:00
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
def generate(self, filename: str) -> None:
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
|
|
@contextlib.contextmanager
|
|
|
|
def indent(self) -> Iterator[None]:
|
|
|
|
self.level += 1
|
|
|
|
try:
|
|
|
|
yield
|
|
|
|
finally:
|
|
|
|
self.level -= 1
|
|
|
|
|
|
|
|
def print(self, *args: object) -> None:
|
|
|
|
if not args:
|
|
|
|
print(file=self.file)
|
|
|
|
else:
|
|
|
|
print(" " * self.level, end="", file=self.file)
|
|
|
|
print(*args, file=self.file)
|
|
|
|
|
|
|
|
def printblock(self, lines: str) -> None:
|
|
|
|
for line in lines.splitlines():
|
|
|
|
self.print(line)
|
|
|
|
|
2021-09-05 10:58:52 -03:00
|
|
|
def collect_rules(self) -> None:
|
|
|
|
keyword_collector = KeywordCollectorVisitor(self, self.keywords, self.soft_keywords)
|
|
|
|
for rule in self.all_rules.values():
|
|
|
|
keyword_collector.visit(rule)
|
|
|
|
|
|
|
|
rule_collector = RuleCollectorVisitor(self.rules, self.callmakervisitor)
|
2020-04-22 19:29:27 -03:00
|
|
|
done: Set[str] = set()
|
|
|
|
while True:
|
2021-09-05 10:58:52 -03:00
|
|
|
computed_rules = list(self.all_rules)
|
|
|
|
todo = [i for i in computed_rules if i not in done]
|
2020-04-22 19:29:27 -03:00
|
|
|
if not todo:
|
|
|
|
break
|
2021-09-05 10:58:52 -03:00
|
|
|
done = set(self.all_rules)
|
2020-04-22 19:29:27 -03:00
|
|
|
for rulename in todo:
|
2021-09-05 10:58:52 -03:00
|
|
|
rule_collector.visit(self.all_rules[rulename])
|
2020-04-22 19:29:27 -03:00
|
|
|
|
|
|
|
def keyword_type(self) -> int:
|
|
|
|
self.keyword_counter += 1
|
|
|
|
return self.keyword_counter
|
|
|
|
|
2024-06-19 11:12:40 -03:00
|
|
|
def artificial_rule_from_rhs(self, rhs: Rhs) -> str:
|
2020-04-22 19:29:27 -03:00
|
|
|
self.counter += 1
|
|
|
|
name = f"_tmp_{self.counter}" # TODO: Pick a nicer name.
|
2021-09-05 10:58:52 -03:00
|
|
|
self.all_rules[name] = Rule(name, None, rhs)
|
2020-04-22 19:29:27 -03:00
|
|
|
return name
|
|
|
|
|
2021-09-05 10:58:52 -03:00
|
|
|
def artificial_rule_from_repeat(self, node: Plain, is_repeat1: bool) -> str:
|
2020-04-22 19:29:27 -03:00
|
|
|
self.counter += 1
|
|
|
|
if is_repeat1:
|
|
|
|
prefix = "_loop1_"
|
|
|
|
else:
|
|
|
|
prefix = "_loop0_"
|
2021-09-05 10:58:52 -03:00
|
|
|
name = f"{prefix}{self.counter}"
|
|
|
|
self.all_rules[name] = Rule(name, None, Rhs([Alt([NamedItem(None, node)])]))
|
2020-04-22 19:29:27 -03:00
|
|
|
return name
|
|
|
|
|
2024-06-19 11:12:40 -03:00
|
|
|
def artificial_rule_from_gather(self, node: Gather) -> str:
|
2020-04-22 19:29:27 -03:00
|
|
|
self.counter += 1
|
|
|
|
name = f"_gather_{self.counter}"
|
|
|
|
self.counter += 1
|
|
|
|
extra_function_name = f"_loop0_{self.counter}"
|
|
|
|
extra_function_alt = Alt(
|
2021-08-12 13:37:30 -03:00
|
|
|
[NamedItem(None, node.separator), NamedItem("elem", node.node)],
|
|
|
|
action="elem",
|
2020-04-22 19:29:27 -03:00
|
|
|
)
|
2021-09-05 10:58:52 -03:00
|
|
|
self.all_rules[extra_function_name] = Rule(
|
2021-08-12 13:37:30 -03:00
|
|
|
extra_function_name,
|
|
|
|
None,
|
|
|
|
Rhs([extra_function_alt]),
|
|
|
|
)
|
|
|
|
alt = Alt(
|
|
|
|
[NamedItem("elem", node.node), NamedItem("seq", NameLeaf(extra_function_name))],
|
|
|
|
)
|
2021-09-05 10:58:52 -03:00
|
|
|
self.all_rules[name] = Rule(
|
2021-08-12 13:37:30 -03:00
|
|
|
name,
|
|
|
|
None,
|
|
|
|
Rhs([alt]),
|
2020-04-22 19:29:27 -03:00
|
|
|
)
|
|
|
|
return name
|
|
|
|
|
2020-04-29 06:42:21 -03:00
|
|
|
def dedupe(self, name: str) -> str:
|
|
|
|
origname = name
|
|
|
|
counter = 0
|
|
|
|
while name in self.local_variable_names:
|
|
|
|
counter += 1
|
|
|
|
name = f"{origname}_{counter}"
|
|
|
|
self.local_variable_names.append(name)
|
|
|
|
return name
|
2020-04-22 19:29:27 -03:00
|
|
|
|
|
|
|
|
2021-09-05 10:58:52 -03:00
|
|
|
class NullableVisitor(GrammarVisitor):
|
|
|
|
def __init__(self, rules: Dict[str, Rule]) -> None:
|
|
|
|
self.rules = rules
|
|
|
|
self.visited: Set[Any] = set()
|
|
|
|
self.nullables: Set[Union[Rule, NamedItem]] = set()
|
|
|
|
|
|
|
|
def visit_Rule(self, rule: Rule) -> bool:
|
|
|
|
if rule in self.visited:
|
|
|
|
return False
|
|
|
|
self.visited.add(rule)
|
|
|
|
if self.visit(rule.rhs):
|
|
|
|
self.nullables.add(rule)
|
|
|
|
return rule in self.nullables
|
|
|
|
|
|
|
|
def visit_Rhs(self, rhs: Rhs) -> bool:
|
|
|
|
for alt in rhs.alts:
|
|
|
|
if self.visit(alt):
|
|
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
|
|
def visit_Alt(self, alt: Alt) -> bool:
|
|
|
|
for item in alt.items:
|
|
|
|
if not self.visit(item):
|
|
|
|
return False
|
|
|
|
return True
|
|
|
|
|
|
|
|
def visit_Forced(self, force: Forced) -> bool:
|
|
|
|
return True
|
|
|
|
|
|
|
|
def visit_LookAhead(self, lookahead: Lookahead) -> bool:
|
|
|
|
return True
|
|
|
|
|
|
|
|
def visit_Opt(self, opt: Opt) -> bool:
|
|
|
|
return True
|
|
|
|
|
|
|
|
def visit_Repeat0(self, repeat: Repeat0) -> bool:
|
|
|
|
return True
|
|
|
|
|
|
|
|
def visit_Repeat1(self, repeat: Repeat1) -> bool:
|
|
|
|
return False
|
|
|
|
|
|
|
|
def visit_Gather(self, gather: Gather) -> bool:
|
|
|
|
return False
|
|
|
|
|
|
|
|
def visit_Cut(self, cut: Cut) -> bool:
|
|
|
|
return False
|
|
|
|
|
|
|
|
def visit_Group(self, group: Group) -> bool:
|
|
|
|
return self.visit(group.rhs)
|
|
|
|
|
|
|
|
def visit_NamedItem(self, item: NamedItem) -> bool:
|
|
|
|
if self.visit(item.item):
|
|
|
|
self.nullables.add(item)
|
|
|
|
return item in self.nullables
|
|
|
|
|
|
|
|
def visit_NameLeaf(self, node: NameLeaf) -> bool:
|
|
|
|
if node.value in self.rules:
|
|
|
|
return self.visit(self.rules[node.value])
|
|
|
|
# Token or unknown; never empty.
|
|
|
|
return False
|
|
|
|
|
|
|
|
def visit_StringLeaf(self, node: StringLeaf) -> bool:
|
|
|
|
# The string token '' is considered empty.
|
|
|
|
return not node.value
|
|
|
|
|
|
|
|
|
|
|
|
def compute_nullables(rules: Dict[str, Rule]) -> Set[Any]:
|
2020-04-22 19:29:27 -03:00
|
|
|
"""Compute which rules in a grammar are nullable.
|
|
|
|
|
|
|
|
Thanks to TatSu (tatsu/leftrec.py) for inspiration.
|
|
|
|
"""
|
2021-09-05 10:58:52 -03:00
|
|
|
nullable_visitor = NullableVisitor(rules)
|
2020-04-22 19:29:27 -03:00
|
|
|
for rule in rules.values():
|
2021-09-05 10:58:52 -03:00
|
|
|
nullable_visitor.visit(rule)
|
|
|
|
return nullable_visitor.nullables
|
|
|
|
|
|
|
|
|
|
|
|
class InitialNamesVisitor(GrammarVisitor):
|
|
|
|
def __init__(self, rules: Dict[str, Rule]) -> None:
|
|
|
|
self.rules = rules
|
|
|
|
self.nullables = compute_nullables(rules)
|
|
|
|
|
|
|
|
def generic_visit(self, node: Iterable[Any], *args: Any, **kwargs: Any) -> Set[Any]:
|
|
|
|
names: Set[str] = set()
|
|
|
|
for value in node:
|
|
|
|
if isinstance(value, list):
|
|
|
|
for item in value:
|
|
|
|
names |= self.visit(item, *args, **kwargs)
|
|
|
|
else:
|
|
|
|
names |= self.visit(value, *args, **kwargs)
|
|
|
|
return names
|
|
|
|
|
|
|
|
def visit_Alt(self, alt: Alt) -> Set[Any]:
|
|
|
|
names: Set[str] = set()
|
|
|
|
for item in alt.items:
|
|
|
|
names |= self.visit(item)
|
|
|
|
if item not in self.nullables:
|
|
|
|
break
|
|
|
|
return names
|
|
|
|
|
|
|
|
def visit_Forced(self, force: Forced) -> Set[Any]:
|
|
|
|
return set()
|
|
|
|
|
|
|
|
def visit_LookAhead(self, lookahead: Lookahead) -> Set[Any]:
|
|
|
|
return set()
|
|
|
|
|
|
|
|
def visit_Cut(self, cut: Cut) -> Set[Any]:
|
|
|
|
return set()
|
|
|
|
|
|
|
|
def visit_NameLeaf(self, node: NameLeaf) -> Set[Any]:
|
|
|
|
return {node.value}
|
|
|
|
|
|
|
|
def visit_StringLeaf(self, node: StringLeaf) -> Set[Any]:
|
|
|
|
return set()
|
2020-04-22 19:29:27 -03:00
|
|
|
|
|
|
|
|
|
|
|
def compute_left_recursives(
|
|
|
|
rules: Dict[str, Rule]
|
|
|
|
) -> Tuple[Dict[str, AbstractSet[str]], List[AbstractSet[str]]]:
|
|
|
|
graph = make_first_graph(rules)
|
|
|
|
sccs = list(sccutils.strongly_connected_components(graph.keys(), graph))
|
|
|
|
for scc in sccs:
|
|
|
|
if len(scc) > 1:
|
|
|
|
for name in scc:
|
|
|
|
rules[name].left_recursive = True
|
|
|
|
# Try to find a leader such that all cycles go through it.
|
|
|
|
leaders = set(scc)
|
|
|
|
for start in scc:
|
|
|
|
for cycle in sccutils.find_cycles_in_scc(graph, scc, start):
|
2020-04-29 06:42:21 -03:00
|
|
|
# print("Cycle:", " -> ".join(cycle))
|
2020-04-22 19:29:27 -03:00
|
|
|
leaders -= scc - set(cycle)
|
|
|
|
if not leaders:
|
|
|
|
raise ValueError(
|
|
|
|
f"SCC {scc} has no leadership candidate (no element is included in all cycles)"
|
|
|
|
)
|
2020-04-29 06:42:21 -03:00
|
|
|
# print("Leaders:", leaders)
|
2020-04-22 19:29:27 -03:00
|
|
|
leader = min(leaders) # Pick an arbitrary leader from the candidates.
|
|
|
|
rules[leader].leader = True
|
|
|
|
else:
|
|
|
|
name = min(scc) # The only element.
|
|
|
|
if name in graph[name]:
|
|
|
|
rules[name].left_recursive = True
|
|
|
|
rules[name].leader = True
|
|
|
|
return graph, sccs
|
|
|
|
|
|
|
|
|
|
|
|
def make_first_graph(rules: Dict[str, Rule]) -> Dict[str, AbstractSet[str]]:
|
|
|
|
"""Compute the graph of left-invocations.
|
|
|
|
|
|
|
|
There's an edge from A to B if A may invoke B at its initial
|
|
|
|
position.
|
|
|
|
|
|
|
|
Note that this requires the nullable flags to have been computed.
|
|
|
|
"""
|
2021-09-05 10:58:52 -03:00
|
|
|
initial_name_visitor = InitialNamesVisitor(rules)
|
2020-04-22 19:29:27 -03:00
|
|
|
graph = {}
|
|
|
|
vertices: Set[str] = set()
|
|
|
|
for rulename, rhs in rules.items():
|
2021-09-05 10:58:52 -03:00
|
|
|
graph[rulename] = names = initial_name_visitor.visit(rhs)
|
2020-04-22 19:29:27 -03:00
|
|
|
vertices |= names
|
|
|
|
for vertex in vertices:
|
|
|
|
graph.setdefault(vertex, set())
|
|
|
|
return graph
|