cpython/Tools/peg_generator/pegen/parser_generator.py

210 lines
7.0 KiB
Python

import contextlib
from abc import abstractmethod
from typing import AbstractSet, Dict, IO, Iterator, List, Optional, Set, Text, Tuple
from pegen import sccutils
from pegen.grammar import (
Grammar,
Rule,
Rhs,
Alt,
NamedItem,
Plain,
NameLeaf,
Gather,
)
from pegen.grammar import GrammarError, GrammarVisitor
class RuleCheckingVisitor(GrammarVisitor):
def __init__(self, rules: Dict[str, Rule], tokens: Dict[int, str]):
self.rules = rules
self.tokens = tokens
def visit_NameLeaf(self, node: NameLeaf) -> None:
if node.value not in self.rules and node.value not in self.tokens.values():
# TODO: Add line/col info to (leaf) nodes
raise GrammarError(f"Dangling reference to rule {node.value!r}")
def visit_NamedItem(self, node: NamedItem) -> None:
if node.name and node.name.startswith("_"):
raise GrammarError(f"Variable names cannot start with underscore: '{node.name}'")
self.visit(node.item)
class ParserGenerator:
callmakervisitor: GrammarVisitor
def __init__(self, grammar: Grammar, tokens: Dict[int, str], file: Optional[IO[Text]]):
self.grammar = grammar
self.tokens = tokens
self.rules = grammar.rules
self.validate_rule_names()
if "trailer" not in grammar.metas and "start" not in self.rules:
raise GrammarError("Grammar without a trailer must have a 'start' rule")
checker = RuleCheckingVisitor(self.rules, self.tokens)
for rule in self.rules.values():
checker.visit(rule)
self.file = file
self.level = 0
compute_nullables(self.rules)
self.first_graph, self.first_sccs = compute_left_recursives(self.rules)
self.todo = self.rules.copy() # Rules to generate
self.counter = 0 # For name_rule()/name_loop()
self.keyword_counter = 499 # For keyword_type()
self.all_rules: Dict[str, Rule] = {} # Rules + temporal rules
self._local_variable_stack: List[List[str]] = []
def validate_rule_names(self) -> None:
for rule in self.rules:
if rule.startswith("_"):
raise GrammarError(f"Rule names cannot start with underscore: '{rule}'")
@contextlib.contextmanager
def local_variable_context(self) -> Iterator[None]:
self._local_variable_stack.append([])
yield
self._local_variable_stack.pop()
@property
def local_variable_names(self) -> List[str]:
return self._local_variable_stack[-1]
@abstractmethod
def generate(self, filename: str) -> None:
raise NotImplementedError
@contextlib.contextmanager
def indent(self) -> Iterator[None]:
self.level += 1
try:
yield
finally:
self.level -= 1
def print(self, *args: object) -> None:
if not args:
print(file=self.file)
else:
print(" " * self.level, end="", file=self.file)
print(*args, file=self.file)
def printblock(self, lines: str) -> None:
for line in lines.splitlines():
self.print(line)
def collect_todo(self) -> None:
done: Set[str] = set()
while True:
alltodo = list(self.todo)
self.all_rules.update(self.todo)
todo = [i for i in alltodo if i not in done]
if not todo:
break
for rulename in todo:
self.todo[rulename].collect_todo(self)
done = set(alltodo)
def keyword_type(self) -> int:
self.keyword_counter += 1
return self.keyword_counter
def name_node(self, rhs: Rhs) -> str:
self.counter += 1
name = f"_tmp_{self.counter}" # TODO: Pick a nicer name.
self.todo[name] = Rule(name, None, rhs)
return name
def name_loop(self, node: Plain, is_repeat1: bool) -> str:
self.counter += 1
if is_repeat1:
prefix = "_loop1_"
else:
prefix = "_loop0_"
name = f"{prefix}{self.counter}" # TODO: It's ugly to signal via the name.
self.todo[name] = Rule(name, None, Rhs([Alt([NamedItem(None, node)])]))
return name
def name_gather(self, node: Gather) -> str:
self.counter += 1
name = f"_gather_{self.counter}"
self.counter += 1
extra_function_name = f"_loop0_{self.counter}"
extra_function_alt = Alt(
[NamedItem(None, node.separator), NamedItem("elem", node.node)], action="elem",
)
self.todo[extra_function_name] = Rule(
extra_function_name, None, Rhs([extra_function_alt]),
)
alt = Alt([NamedItem("elem", node.node), NamedItem("seq", NameLeaf(extra_function_name))],)
self.todo[name] = Rule(name, None, Rhs([alt]),)
return name
def dedupe(self, name: str) -> str:
origname = name
counter = 0
while name in self.local_variable_names:
counter += 1
name = f"{origname}_{counter}"
self.local_variable_names.append(name)
return name
def compute_nullables(rules: Dict[str, Rule]) -> None:
"""Compute which rules in a grammar are nullable.
Thanks to TatSu (tatsu/leftrec.py) for inspiration.
"""
for rule in rules.values():
rule.nullable_visit(rules)
def compute_left_recursives(
rules: Dict[str, Rule]
) -> Tuple[Dict[str, AbstractSet[str]], List[AbstractSet[str]]]:
graph = make_first_graph(rules)
sccs = list(sccutils.strongly_connected_components(graph.keys(), graph))
for scc in sccs:
if len(scc) > 1:
for name in scc:
rules[name].left_recursive = True
# Try to find a leader such that all cycles go through it.
leaders = set(scc)
for start in scc:
for cycle in sccutils.find_cycles_in_scc(graph, scc, start):
# print("Cycle:", " -> ".join(cycle))
leaders -= scc - set(cycle)
if not leaders:
raise ValueError(
f"SCC {scc} has no leadership candidate (no element is included in all cycles)"
)
# print("Leaders:", leaders)
leader = min(leaders) # Pick an arbitrary leader from the candidates.
rules[leader].leader = True
else:
name = min(scc) # The only element.
if name in graph[name]:
rules[name].left_recursive = True
rules[name].leader = True
return graph, sccs
def make_first_graph(rules: Dict[str, Rule]) -> Dict[str, AbstractSet[str]]:
"""Compute the graph of left-invocations.
There's an edge from A to B if A may invoke B at its initial
position.
Note that this requires the nullable flags to have been computed.
"""
graph = {}
vertices: Set[str] = set()
for rulename, rhs in rules.items():
graph[rulename] = names = rhs.initial_names()
vertices |= names
for vertex in vertices:
graph.setdefault(vertex, set())
return graph