mirror of https://github.com/python/cpython
471 lines
12 KiB
Python
471 lines
12 KiB
Python
from __future__ import annotations
|
|
|
|
from abc import abstractmethod
|
|
from typing import (
|
|
AbstractSet,
|
|
Any,
|
|
Callable,
|
|
Dict,
|
|
Iterable,
|
|
Iterator,
|
|
List,
|
|
Optional,
|
|
Set,
|
|
Tuple,
|
|
TYPE_CHECKING,
|
|
TypeVar,
|
|
Union,
|
|
)
|
|
|
|
from pegen.parser import memoize, Parser
|
|
|
|
if TYPE_CHECKING:
|
|
from pegen.parser_generator import ParserGenerator
|
|
|
|
|
|
class GrammarError(Exception):
|
|
pass
|
|
|
|
|
|
class GrammarVisitor:
|
|
def visit(self, node: Any, *args: Any, **kwargs: Any) -> Any:
|
|
"""Visit a node."""
|
|
method = "visit_" + node.__class__.__name__
|
|
visitor = getattr(self, method, self.generic_visit)
|
|
return visitor(node, *args, **kwargs)
|
|
|
|
def generic_visit(self, node: Iterable[Any], *args: Any, **kwargs: Any) -> None:
|
|
"""Called if no explicit visitor function exists for a node."""
|
|
for value in node:
|
|
if isinstance(value, list):
|
|
for item in value:
|
|
self.visit(item, *args, **kwargs)
|
|
else:
|
|
self.visit(value, *args, **kwargs)
|
|
|
|
|
|
class Grammar:
|
|
def __init__(self, rules: Iterable[Rule], metas: Iterable[Tuple[str, Optional[str]]]):
|
|
self.rules = {rule.name: rule for rule in rules}
|
|
self.metas = dict(metas)
|
|
|
|
def __str__(self) -> str:
|
|
return "\n".join(str(rule) for name, rule in self.rules.items())
|
|
|
|
def __repr__(self) -> str:
|
|
lines = ["Grammar("]
|
|
lines.append(" [")
|
|
for rule in self.rules.values():
|
|
lines.append(f" {repr(rule)},")
|
|
lines.append(" ],")
|
|
lines.append(" {repr(list(self.metas.items()))}")
|
|
lines.append(")")
|
|
return "\n".join(lines)
|
|
|
|
def __iter__(self) -> Iterator[Rule]:
|
|
yield from self.rules.values()
|
|
|
|
|
|
# Global flag whether we want actions in __str__() -- default off.
|
|
SIMPLE_STR = True
|
|
|
|
|
|
class Rule:
|
|
def __init__(self, name: str, type: Optional[str], rhs: Rhs, memo: Optional[object] = None):
|
|
self.name = name
|
|
self.type = type
|
|
self.rhs = rhs
|
|
self.memo = bool(memo)
|
|
self.visited = False
|
|
self.nullable = False
|
|
self.left_recursive = False
|
|
self.leader = False
|
|
|
|
def is_loop(self) -> bool:
|
|
return self.name.startswith("_loop")
|
|
|
|
def is_gather(self) -> bool:
|
|
return self.name.startswith("_gather")
|
|
|
|
def __str__(self) -> str:
|
|
if SIMPLE_STR or self.type is None:
|
|
res = f"{self.name}: {self.rhs}"
|
|
else:
|
|
res = f"{self.name}[{self.type}]: {self.rhs}"
|
|
if len(res) < 88:
|
|
return res
|
|
lines = [res.split(":")[0] + ":"]
|
|
lines += [f" | {alt}" for alt in self.rhs.alts]
|
|
return "\n".join(lines)
|
|
|
|
def __repr__(self) -> str:
|
|
return f"Rule({self.name!r}, {self.type!r}, {self.rhs!r})"
|
|
|
|
def __iter__(self) -> Iterator[Rhs]:
|
|
yield self.rhs
|
|
|
|
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
|
|
if self.visited:
|
|
# A left-recursive rule is considered non-nullable.
|
|
return False
|
|
self.visited = True
|
|
self.nullable = self.rhs.nullable_visit(rules)
|
|
return self.nullable
|
|
|
|
def initial_names(self) -> AbstractSet[str]:
|
|
return self.rhs.initial_names()
|
|
|
|
def flatten(self) -> Rhs:
|
|
# If it's a single parenthesized group, flatten it.
|
|
rhs = self.rhs
|
|
if (
|
|
not self.is_loop()
|
|
and len(rhs.alts) == 1
|
|
and len(rhs.alts[0].items) == 1
|
|
and isinstance(rhs.alts[0].items[0].item, Group)
|
|
):
|
|
rhs = rhs.alts[0].items[0].item.rhs
|
|
return rhs
|
|
|
|
def collect_todo(self, gen: ParserGenerator) -> None:
|
|
rhs = self.flatten()
|
|
rhs.collect_todo(gen)
|
|
|
|
|
|
class Leaf:
|
|
def __init__(self, value: str):
|
|
self.value = value
|
|
|
|
def __str__(self) -> str:
|
|
return self.value
|
|
|
|
def __iter__(self) -> Iterable[str]:
|
|
if False:
|
|
yield
|
|
|
|
@abstractmethod
|
|
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
|
|
raise NotImplementedError
|
|
|
|
@abstractmethod
|
|
def initial_names(self) -> AbstractSet[str]:
|
|
raise NotImplementedError
|
|
|
|
|
|
class NameLeaf(Leaf):
|
|
"""The value is the name."""
|
|
|
|
def __str__(self) -> str:
|
|
if self.value == "ENDMARKER":
|
|
return "$"
|
|
return super().__str__()
|
|
|
|
def __repr__(self) -> str:
|
|
return f"NameLeaf({self.value!r})"
|
|
|
|
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
|
|
if self.value in rules:
|
|
return rules[self.value].nullable_visit(rules)
|
|
# Token or unknown; never empty.
|
|
return False
|
|
|
|
def initial_names(self) -> AbstractSet[str]:
|
|
return {self.value}
|
|
|
|
|
|
class StringLeaf(Leaf):
|
|
"""The value is a string literal, including quotes."""
|
|
|
|
def __repr__(self) -> str:
|
|
return f"StringLeaf({self.value!r})"
|
|
|
|
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
|
|
# The string token '' is considered empty.
|
|
return not self.value
|
|
|
|
def initial_names(self) -> AbstractSet[str]:
|
|
return set()
|
|
|
|
|
|
class Rhs:
|
|
def __init__(self, alts: List[Alt]):
|
|
self.alts = alts
|
|
self.memo: Optional[Tuple[Optional[str], str]] = None
|
|
|
|
def __str__(self) -> str:
|
|
return " | ".join(str(alt) for alt in self.alts)
|
|
|
|
def __repr__(self) -> str:
|
|
return f"Rhs({self.alts!r})"
|
|
|
|
def __iter__(self) -> Iterator[List[Alt]]:
|
|
yield self.alts
|
|
|
|
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
|
|
for alt in self.alts:
|
|
if alt.nullable_visit(rules):
|
|
return True
|
|
return False
|
|
|
|
def initial_names(self) -> AbstractSet[str]:
|
|
names: Set[str] = set()
|
|
for alt in self.alts:
|
|
names |= alt.initial_names()
|
|
return names
|
|
|
|
def collect_todo(self, gen: ParserGenerator) -> None:
|
|
for alt in self.alts:
|
|
alt.collect_todo(gen)
|
|
|
|
|
|
class Alt:
|
|
def __init__(self, items: List[NamedItem], *, icut: int = -1, action: Optional[str] = None):
|
|
self.items = items
|
|
self.icut = icut
|
|
self.action = action
|
|
|
|
def __str__(self) -> str:
|
|
core = " ".join(str(item) for item in self.items)
|
|
if not SIMPLE_STR and self.action:
|
|
return f"{core} {{ {self.action} }}"
|
|
else:
|
|
return core
|
|
|
|
def __repr__(self) -> str:
|
|
args = [repr(self.items)]
|
|
if self.icut >= 0:
|
|
args.append(f"icut={self.icut}")
|
|
if self.action:
|
|
args.append(f"action={self.action!r}")
|
|
return f"Alt({', '.join(args)})"
|
|
|
|
def __iter__(self) -> Iterator[List[NamedItem]]:
|
|
yield self.items
|
|
|
|
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
|
|
for item in self.items:
|
|
if not item.nullable_visit(rules):
|
|
return False
|
|
return True
|
|
|
|
def initial_names(self) -> AbstractSet[str]:
|
|
names: Set[str] = set()
|
|
for item in self.items:
|
|
names |= item.initial_names()
|
|
if not item.nullable:
|
|
break
|
|
return names
|
|
|
|
def collect_todo(self, gen: ParserGenerator) -> None:
|
|
for item in self.items:
|
|
item.collect_todo(gen)
|
|
|
|
|
|
class NamedItem:
|
|
def __init__(self, name: Optional[str], item: Item):
|
|
self.name = name
|
|
self.item = item
|
|
self.nullable = False
|
|
|
|
def __str__(self) -> str:
|
|
if not SIMPLE_STR and self.name:
|
|
return f"{self.name}={self.item}"
|
|
else:
|
|
return str(self.item)
|
|
|
|
def __repr__(self) -> str:
|
|
return f"NamedItem({self.name!r}, {self.item!r})"
|
|
|
|
def __iter__(self) -> Iterator[Item]:
|
|
yield self.item
|
|
|
|
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
|
|
self.nullable = self.item.nullable_visit(rules)
|
|
return self.nullable
|
|
|
|
def initial_names(self) -> AbstractSet[str]:
|
|
return self.item.initial_names()
|
|
|
|
def collect_todo(self, gen: ParserGenerator) -> None:
|
|
gen.callmakervisitor.visit(self.item)
|
|
|
|
|
|
class Lookahead:
|
|
def __init__(self, node: Plain, sign: str):
|
|
self.node = node
|
|
self.sign = sign
|
|
|
|
def __str__(self) -> str:
|
|
return f"{self.sign}{self.node}"
|
|
|
|
def __iter__(self) -> Iterator[Plain]:
|
|
yield self.node
|
|
|
|
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
|
|
return True
|
|
|
|
def initial_names(self) -> AbstractSet[str]:
|
|
return set()
|
|
|
|
|
|
class PositiveLookahead(Lookahead):
|
|
def __init__(self, node: Plain):
|
|
super().__init__(node, "&")
|
|
|
|
def __repr__(self) -> str:
|
|
return f"PositiveLookahead({self.node!r})"
|
|
|
|
|
|
class NegativeLookahead(Lookahead):
|
|
def __init__(self, node: Plain):
|
|
super().__init__(node, "!")
|
|
|
|
def __repr__(self) -> str:
|
|
return f"NegativeLookahead({self.node!r})"
|
|
|
|
|
|
class Opt:
|
|
def __init__(self, node: Item):
|
|
self.node = node
|
|
|
|
def __str__(self) -> str:
|
|
s = str(self.node)
|
|
# TODO: Decide whether to use [X] or X? based on type of X
|
|
if " " in s:
|
|
return f"[{s}]"
|
|
else:
|
|
return f"{s}?"
|
|
|
|
def __repr__(self) -> str:
|
|
return f"Opt({self.node!r})"
|
|
|
|
def __iter__(self) -> Iterator[Item]:
|
|
yield self.node
|
|
|
|
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
|
|
return True
|
|
|
|
def initial_names(self) -> AbstractSet[str]:
|
|
return self.node.initial_names()
|
|
|
|
|
|
class Repeat:
|
|
"""Shared base class for x* and x+."""
|
|
|
|
def __init__(self, node: Plain):
|
|
self.node = node
|
|
self.memo: Optional[Tuple[Optional[str], str]] = None
|
|
|
|
@abstractmethod
|
|
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
|
|
raise NotImplementedError
|
|
|
|
def __iter__(self) -> Iterator[Plain]:
|
|
yield self.node
|
|
|
|
def initial_names(self) -> AbstractSet[str]:
|
|
return self.node.initial_names()
|
|
|
|
|
|
class Repeat0(Repeat):
|
|
def __str__(self) -> str:
|
|
s = str(self.node)
|
|
# TODO: Decide whether to use (X)* or X* based on type of X
|
|
if " " in s:
|
|
return f"({s})*"
|
|
else:
|
|
return f"{s}*"
|
|
|
|
def __repr__(self) -> str:
|
|
return f"Repeat0({self.node!r})"
|
|
|
|
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
|
|
return True
|
|
|
|
|
|
class Repeat1(Repeat):
|
|
def __str__(self) -> str:
|
|
s = str(self.node)
|
|
# TODO: Decide whether to use (X)+ or X+ based on type of X
|
|
if " " in s:
|
|
return f"({s})+"
|
|
else:
|
|
return f"{s}+"
|
|
|
|
def __repr__(self) -> str:
|
|
return f"Repeat1({self.node!r})"
|
|
|
|
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
|
|
return False
|
|
|
|
|
|
class Gather(Repeat):
|
|
def __init__(self, separator: Plain, node: Plain):
|
|
self.separator = separator
|
|
self.node = node
|
|
|
|
def __str__(self) -> str:
|
|
return f"{self.separator!s}.{self.node!s}+"
|
|
|
|
def __repr__(self) -> str:
|
|
return f"Gather({self.separator!r}, {self.node!r})"
|
|
|
|
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
|
|
return False
|
|
|
|
|
|
class Group:
|
|
def __init__(self, rhs: Rhs):
|
|
self.rhs = rhs
|
|
|
|
def __str__(self) -> str:
|
|
return f"({self.rhs})"
|
|
|
|
def __repr__(self) -> str:
|
|
return f"Group({self.rhs!r})"
|
|
|
|
def __iter__(self) -> Iterator[Rhs]:
|
|
yield self.rhs
|
|
|
|
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
|
|
return self.rhs.nullable_visit(rules)
|
|
|
|
def initial_names(self) -> AbstractSet[str]:
|
|
return self.rhs.initial_names()
|
|
|
|
|
|
class Cut:
|
|
def __init__(self) -> None:
|
|
pass
|
|
|
|
def __repr__(self) -> str:
|
|
return f"Cut()"
|
|
|
|
def __str__(self) -> str:
|
|
return f"~"
|
|
|
|
def __iter__(self) -> Iterator[Tuple[str, str]]:
|
|
if False:
|
|
yield
|
|
|
|
def __eq__(self, other: object) -> bool:
|
|
if not isinstance(other, Cut):
|
|
return NotImplemented
|
|
return True
|
|
|
|
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
|
|
return True
|
|
|
|
def initial_names(self) -> AbstractSet[str]:
|
|
return set()
|
|
|
|
|
|
Plain = Union[Leaf, Group]
|
|
Item = Union[Plain, Opt, Repeat, Lookahead, Rhs, Cut]
|
|
RuleName = Tuple[str, str]
|
|
MetaTuple = Tuple[str, Optional[str]]
|
|
MetaList = List[MetaTuple]
|
|
RuleList = List[Rule]
|
|
NamedItemList = List[NamedItem]
|
|
LookaheadOrCut = Union[Lookahead, Cut]
|