Extract visitors from the grammar nodes and call makers in the peg generator (GH-28172)

Simplify the peg generator logic by extracting as much visitors as possible to disentangle the flow and separate concerns.
This commit is contained in:
Pablo Galindo Salgado 2021-09-05 14:58:52 +01:00 committed by GitHub
parent 28264269de
commit b01fd533fe
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 419 additions and 415 deletions

View File

@ -15,6 +15,7 @@ with test_tools.imports_under_tool("peg_generator"):
from pegen.grammar import GrammarVisitor, GrammarError, Grammar
from pegen.grammar_visualizer import ASTGrammarPrinter
from pegen.parser import Parser
from pegen.parser_generator import compute_nullables, compute_left_recursives
from pegen.python_generator import PythonParserGenerator
@ -502,11 +503,10 @@ class TestPegen(unittest.TestCase):
sign: ['-' | '+']
"""
grammar: Grammar = parse_string(grammar_source, GrammarParser)
out = io.StringIO()
genr = PythonParserGenerator(grammar, out)
rules = grammar.rules
self.assertFalse(rules["start"].nullable) # Not None!
self.assertTrue(rules["sign"].nullable)
nullables = compute_nullables(rules)
self.assertNotIn(rules["start"], nullables) # Not None!
self.assertIn(rules["sign"], nullables)
def test_advanced_left_recursive(self) -> None:
grammar_source = """
@ -514,11 +514,11 @@ class TestPegen(unittest.TestCase):
sign: ['-']
"""
grammar: Grammar = parse_string(grammar_source, GrammarParser)
out = io.StringIO()
genr = PythonParserGenerator(grammar, out)
rules = grammar.rules
self.assertFalse(rules["start"].nullable) # Not None!
self.assertTrue(rules["sign"].nullable)
nullables = compute_nullables(rules)
compute_left_recursives(rules)
self.assertNotIn(rules["start"], nullables) # Not None!
self.assertIn(rules["sign"], nullables)
self.assertTrue(rules["start"].left_recursive)
self.assertFalse(rules["sign"].left_recursive)

File diff suppressed because it is too large Load Diff

View File

@ -10,10 +10,9 @@ import sys
import time
import token
import traceback
from typing import Tuple
from pegen.build import Grammar, Parser, Tokenizer, ParserGenerator
from pegen.build import Grammar, Parser, ParserGenerator, Tokenizer
from pegen.validator import validate_grammar

View File

@ -1,11 +1,10 @@
import itertools
import pathlib
import shutil
import tokenize
import sysconfig
import tempfile
import itertools
from typing import Optional, Tuple, List, IO, Set, Dict
import tokenize
from typing import IO, Dict, List, Optional, Set, Tuple
from pegen.c_generator import CParserGenerator
from pegen.grammar import Grammar
@ -45,9 +44,9 @@ def compile_c_extension(
of distutils (this is useful in case you want to use a temporary directory).
"""
import distutils.log
from distutils.core import Distribution, Extension
from distutils.command.clean import clean # type: ignore
from distutils.command.build_ext import build_ext # type: ignore
from distutils.command.clean import clean # type: ignore
from distutils.core import Distribution, Extension
from distutils.tests.support import fixup_build_ext # type: ignore
if verbose:

View File

@ -1,8 +1,8 @@
import ast
from dataclasses import field, dataclass
import re
from typing import Any, Dict, IO, Optional, List, Text, Tuple, Set
from dataclasses import dataclass, field
from enum import Enum
from typing import IO, Any, Dict, List, Optional, Set, Text, Tuple
from pegen import grammar
from pegen.grammar import (
@ -27,7 +27,6 @@ from pegen.grammar import (
)
from pegen.parser_generator import ParserGenerator
EXTENSION_PREFIX = """\
#include "pegen.h"
@ -120,23 +119,18 @@ class CCallMakerVisitor(GrammarVisitor):
self.exact_tokens = exact_tokens
self.non_exact_tokens = non_exact_tokens
self.cache: Dict[Any, FunctionCall] = {}
self.keyword_cache: Dict[str, int] = {}
self.soft_keywords: Set[str] = set()
def keyword_helper(self, keyword: str) -> FunctionCall:
if keyword not in self.keyword_cache:
self.keyword_cache[keyword] = self.gen.keyword_type()
return FunctionCall(
assigned_variable="_keyword",
function="_PyPegen_expect_token",
arguments=["p", self.keyword_cache[keyword]],
arguments=["p", self.gen.keywords[keyword]],
return_type="Token *",
nodetype=NodeTypes.KEYWORD,
comment=f"token='{keyword}'",
)
def soft_keyword_helper(self, value: str) -> FunctionCall:
self.soft_keywords.add(value.replace('"', ""))
return FunctionCall(
assigned_variable="_keyword",
function="_PyPegen_expect_soft_keyword",
@ -200,20 +194,12 @@ class CCallMakerVisitor(GrammarVisitor):
)
def visit_Rhs(self, node: Rhs) -> FunctionCall:
def can_we_inline(node: Rhs) -> int:
if len(node.alts) != 1 or len(node.alts[0].items) != 1:
return False
# If the alternative has an action we cannot inline
if getattr(node.alts[0], "action", None) is not None:
return False
return True
if node in self.cache:
return self.cache[node]
if can_we_inline(node):
if node.can_be_inlined:
self.cache[node] = self.generate_call(node.alts[0].items[0])
else:
name = self.gen.name_node(node)
name = self.gen.artifical_rule_from_rhs(node)
self.cache[node] = FunctionCall(
assigned_variable=f"{name}_var",
function=f"{name}_rule",
@ -306,7 +292,7 @@ class CCallMakerVisitor(GrammarVisitor):
def visit_Repeat0(self, node: Repeat0) -> FunctionCall:
if node in self.cache:
return self.cache[node]
name = self.gen.name_loop(node.node, False)
name = self.gen.artificial_rule_from_repeat(node.node, False)
self.cache[node] = FunctionCall(
assigned_variable=f"{name}_var",
function=f"{name}_rule",
@ -319,7 +305,7 @@ class CCallMakerVisitor(GrammarVisitor):
def visit_Repeat1(self, node: Repeat1) -> FunctionCall:
if node in self.cache:
return self.cache[node]
name = self.gen.name_loop(node.node, True)
name = self.gen.artificial_rule_from_repeat(node.node, True)
self.cache[node] = FunctionCall(
assigned_variable=f"{name}_var",
function=f"{name}_rule",
@ -332,7 +318,7 @@ class CCallMakerVisitor(GrammarVisitor):
def visit_Gather(self, node: Gather) -> FunctionCall:
if node in self.cache:
return self.cache[node]
name = self.gen.name_gather(node)
name = self.gen.artifical_rule_from_gather(node)
self.cache[node] = FunctionCall(
assigned_variable=f"{name}_var",
function=f"{name}_rule",
@ -429,7 +415,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
self.print(f"}}")
def generate(self, filename: str) -> None:
self.collect_todo()
self.collect_rules()
self.print(f"// @generated by pegen from {filename}")
header = self.grammar.metas.get("header", EXTENSION_PREFIX)
if header:
@ -439,11 +425,11 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
self.print(subheader)
self._setup_keywords()
self._setup_soft_keywords()
for i, (rulename, rule) in enumerate(self.todo.items(), 1000):
for i, (rulename, rule) in enumerate(self.all_rules.items(), 1000):
comment = " // Left-recursive" if rule.left_recursive else ""
self.print(f"#define {rulename}_type {i}{comment}")
self.print()
for rulename, rule in self.todo.items():
for rulename, rule in self.all_rules.items():
if rule.is_loop() or rule.is_gather():
type = "asdl_seq *"
elif rule.type:
@ -452,13 +438,11 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
type = "void *"
self.print(f"static {type}{rulename}_rule(Parser *p);")
self.print()
while self.todo:
for rulename, rule in list(self.todo.items()):
del self.todo[rulename]
self.print()
if rule.left_recursive:
self.print("// Left-recursive")
self.visit(rule)
for rulename, rule in list(self.all_rules.items()):
self.print()
if rule.left_recursive:
self.print("// Left-recursive")
self.visit(rule)
if self.skip_actions:
mode = 0
else:
@ -472,7 +456,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
def _group_keywords_by_length(self) -> Dict[int, List[Tuple[str, int]]]:
groups: Dict[int, List[Tuple[str, int]]] = {}
for keyword_str, keyword_type in self.callmakervisitor.keyword_cache.items():
for keyword_str, keyword_type in self.keywords.items():
length = len(keyword_str)
if length in groups:
groups[length].append((keyword_str, keyword_type))
@ -481,9 +465,8 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
return groups
def _setup_keywords(self) -> None:
keyword_cache = self.callmakervisitor.keyword_cache
n_keyword_lists = (
len(max(keyword_cache.keys(), key=len)) + 1 if len(keyword_cache) > 0 else 0
len(max(self.keywords.keys(), key=len)) + 1 if len(self.keywords) > 0 else 0
)
self.print(f"static const int n_keyword_lists = {n_keyword_lists};")
groups = self._group_keywords_by_length()
@ -503,7 +486,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
self.print("};")
def _setup_soft_keywords(self) -> None:
soft_keywords = sorted(self.callmakervisitor.soft_keywords)
soft_keywords = sorted(self.soft_keywords)
self.print("static char *soft_keywords[] = {")
with self.indent():
for keyword in soft_keywords:

View File

@ -3,30 +3,27 @@
import argparse
import pprint
import sys
from typing import Set, Dict
from typing import Dict, Set
from pegen.build import build_parser
from pegen.grammar import (
Alt,
Cut,
Gather,
Grammar,
GrammarVisitor,
Group,
Leaf,
Lookahead,
NamedItem,
NameLeaf,
NegativeLookahead,
Opt,
Repeat,
Repeat0,
Repeat1,
Rhs,
Rule,
StringLeaf,
PositiveLookahead,
)
from pegen.parser_generator import compute_nullables
argparser = argparse.ArgumentParser(
prog="calculate_first_sets",
@ -38,8 +35,7 @@ argparser.add_argument("grammar_file", help="The grammar file")
class FirstSetCalculator(GrammarVisitor):
def __init__(self, rules: Dict[str, Rule]) -> None:
self.rules = rules
for rule in rules.values():
rule.nullable_visit(rules)
self.nullables = compute_nullables(rules)
self.first_sets: Dict[str, Set[str]] = dict()
self.in_process: Set[str] = set()
@ -129,7 +125,7 @@ class FirstSetCalculator(GrammarVisitor):
elif item.name not in self.first_sets:
self.in_process.add(item.name)
terminals = self.visit(item.rhs)
if item.nullable:
if item in self.nullables:
terminals.add("")
self.first_sets[item.name] = terminals
self.in_process.remove(item.name)

View File

@ -2,6 +2,7 @@ from __future__ import annotations
from abc import abstractmethod
from typing import (
TYPE_CHECKING,
AbstractSet,
Any,
Dict,
@ -11,11 +12,9 @@ from typing import (
Optional,
Set,
Tuple,
TYPE_CHECKING,
Union,
)
if TYPE_CHECKING:
from pegen.parser_generator import ParserGenerator
@ -31,7 +30,7 @@ class GrammarVisitor:
visitor = getattr(self, method, self.generic_visit)
return visitor(node, *args, **kwargs)
def generic_visit(self, node: Iterable[Any], *args: Any, **kwargs: Any) -> None:
def generic_visit(self, node: Iterable[Any], *args: Any, **kwargs: Any) -> Any:
"""Called if no explicit visitor function exists for a node."""
for value in node:
if isinstance(value, list):
@ -73,8 +72,6 @@ class Rule:
self.type = type
self.rhs = rhs
self.memo = bool(memo)
self.visited = False
self.nullable = False
self.left_recursive = False
self.leader = False
@ -101,17 +98,6 @@ class Rule:
def __iter__(self) -> Iterator[Rhs]:
yield self.rhs
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
if self.visited:
# A left-recursive rule is considered non-nullable.
return False
self.visited = True
self.nullable = self.rhs.nullable_visit(rules)
return self.nullable
def initial_names(self) -> AbstractSet[str]:
return self.rhs.initial_names()
def flatten(self) -> Rhs:
# If it's a single parenthesized group, flatten it.
rhs = self.rhs
@ -124,10 +110,6 @@ class Rule:
rhs = rhs.alts[0].items[0].item.rhs
return rhs
def collect_todo(self, gen: ParserGenerator) -> None:
rhs = self.flatten()
rhs.collect_todo(gen)
class Leaf:
def __init__(self, value: str):
@ -140,14 +122,6 @@ class Leaf:
if False:
yield
@abstractmethod
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
raise NotImplementedError
@abstractmethod
def initial_names(self) -> AbstractSet[str]:
raise NotImplementedError
class NameLeaf(Leaf):
"""The value is the name."""
@ -160,15 +134,6 @@ class NameLeaf(Leaf):
def __repr__(self) -> str:
return f"NameLeaf({self.value!r})"
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
if self.value in rules:
return rules[self.value].nullable_visit(rules)
# Token or unknown; never empty.
return False
def initial_names(self) -> AbstractSet[str]:
return {self.value}
class StringLeaf(Leaf):
"""The value is a string literal, including quotes."""
@ -176,13 +141,6 @@ class StringLeaf(Leaf):
def __repr__(self) -> str:
return f"StringLeaf({self.value!r})"
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
# The string token '' is considered empty.
return not self.value
def initial_names(self) -> AbstractSet[str]:
return set()
class Rhs:
def __init__(self, alts: List[Alt]):
@ -198,21 +156,14 @@ class Rhs:
def __iter__(self) -> Iterator[List[Alt]]:
yield self.alts
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
for alt in self.alts:
if alt.nullable_visit(rules):
return True
return False
def initial_names(self) -> AbstractSet[str]:
names: Set[str] = set()
for alt in self.alts:
names |= alt.initial_names()
return names
def collect_todo(self, gen: ParserGenerator) -> None:
for alt in self.alts:
alt.collect_todo(gen)
@property
def can_be_inlined(self) -> bool:
if len(self.alts) != 1 or len(self.alts[0].items) != 1:
return False
# If the alternative has an action we cannot inline
if getattr(self.alts[0], "action", None) is not None:
return False
return True
class Alt:
@ -239,31 +190,12 @@ class Alt:
def __iter__(self) -> Iterator[List[NamedItem]]:
yield self.items
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
for item in self.items:
if not item.nullable_visit(rules):
return False
return True
def initial_names(self) -> AbstractSet[str]:
names: Set[str] = set()
for item in self.items:
names |= item.initial_names()
if not item.nullable:
break
return names
def collect_todo(self, gen: ParserGenerator) -> None:
for item in self.items:
item.collect_todo(gen)
class NamedItem:
def __init__(self, name: Optional[str], item: Item, type: Optional[str] = None):
self.name = name
self.item = item
self.type = type
self.nullable = False
def __str__(self) -> str:
if not SIMPLE_STR and self.name:
@ -277,16 +209,6 @@ class NamedItem:
def __iter__(self) -> Iterator[Item]:
yield self.item
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
self.nullable = self.item.nullable_visit(rules)
return self.nullable
def initial_names(self) -> AbstractSet[str]:
return self.item.initial_names()
def collect_todo(self, gen: ParserGenerator) -> None:
gen.callmakervisitor.visit(self.item)
class Forced:
def __init__(self, node: Plain):
@ -298,12 +220,6 @@ class Forced:
def __iter__(self) -> Iterator[Plain]:
yield self.node
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
return True
def initial_names(self) -> AbstractSet[str]:
return set()
class Lookahead:
def __init__(self, node: Plain, sign: str):
@ -316,12 +232,6 @@ class Lookahead:
def __iter__(self) -> Iterator[Plain]:
yield self.node
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
return True
def initial_names(self) -> AbstractSet[str]:
return set()
class PositiveLookahead(Lookahead):
def __init__(self, node: Plain):
@ -357,12 +267,6 @@ class Opt:
def __iter__(self) -> Iterator[Item]:
yield self.node
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
return True
def initial_names(self) -> AbstractSet[str]:
return self.node.initial_names()
class Repeat:
"""Shared base class for x* and x+."""
@ -371,16 +275,9 @@ class Repeat:
self.node = node
self.memo: Optional[Tuple[Optional[str], str]] = None
@abstractmethod
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
raise NotImplementedError
def __iter__(self) -> Iterator[Plain]:
yield self.node
def initial_names(self) -> AbstractSet[str]:
return self.node.initial_names()
class Repeat0(Repeat):
def __str__(self) -> str:
@ -394,9 +291,6 @@ class Repeat0(Repeat):
def __repr__(self) -> str:
return f"Repeat0({self.node!r})"
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
return True
class Repeat1(Repeat):
def __str__(self) -> str:
@ -410,9 +304,6 @@ class Repeat1(Repeat):
def __repr__(self) -> str:
return f"Repeat1({self.node!r})"
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
return False
class Gather(Repeat):
def __init__(self, separator: Plain, node: Plain):
@ -425,9 +316,6 @@ class Gather(Repeat):
def __repr__(self) -> str:
return f"Gather({self.separator!r}, {self.node!r})"
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
return False
class Group:
def __init__(self, rhs: Rhs):
@ -442,12 +330,6 @@ class Group:
def __iter__(self) -> Iterator[Rhs]:
yield self.rhs
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
return self.rhs.nullable_visit(rules)
def initial_names(self) -> AbstractSet[str]:
return self.rhs.initial_names()
class Cut:
def __init__(self) -> None:
@ -468,9 +350,6 @@ class Cut:
return NotImplemented
return True
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
return True
def initial_names(self) -> AbstractSet[str]:
return set()

View File

@ -1,7 +1,6 @@
import argparse
import sys
from typing import Any, Iterator, Callable
from typing import Any, Callable, Iterator
from pegen.build import build_parser
from pegen.grammar import Grammar, Rule

View File

@ -59,11 +59,11 @@ def main() -> None:
with args.tokens_file as tok_file:
all_tokens, exact_tok, non_exact_tok = generate_token_definitions(tok_file)
gen = CParserGenerator(grammar, all_tokens, exact_tok, non_exact_tok, file=None)
gen.collect_todo()
gen.collect_rules()
with args.keyword_file as thefile:
all_keywords = sorted(list(gen.callmakervisitor.keyword_cache.keys()) + EXTRA_KEYWORDS)
all_soft_keywords = sorted(gen.callmakervisitor.soft_keywords)
all_keywords = sorted(list(gen.keywords.keys()) + EXTRA_KEYWORDS)
all_soft_keywords = sorted(gen.soft_keywords)
keywords = "" if not all_keywords else " " + ",\n ".join(map(repr, all_keywords))
soft_keywords = (

View File

@ -1,22 +1,76 @@
import ast
import contextlib
import re
from abc import abstractmethod
from typing import IO, AbstractSet, Dict, Iterator, List, Optional, Set, Text, Tuple
from typing import (
IO,
AbstractSet,
Any,
Dict,
Iterable,
Iterator,
List,
Optional,
Set,
Text,
Tuple,
Union,
)
from pegen import sccutils
from pegen.grammar import (
Alt,
Cut,
Forced,
Gather,
Grammar,
GrammarError,
GrammarVisitor,
Group,
Lookahead,
NamedItem,
NameLeaf,
Opt,
Plain,
Repeat0,
Repeat1,
Rhs,
Rule,
StringLeaf,
)
class RuleCollectorVisitor(GrammarVisitor):
"""Visitor that invokes a provieded callmaker visitor with just the NamedItem nodes"""
def __init__(self, rules: Dict[str, Rule], callmakervisitor: GrammarVisitor) -> None:
self.rulses = rules
self.callmaker = callmakervisitor
def visit_Rule(self, rule: Rule) -> None:
self.visit(rule.flatten())
def visit_NamedItem(self, item: NamedItem) -> None:
self.callmaker.visit(item)
class KeywordCollectorVisitor(GrammarVisitor):
"""Visitor that collects all the keywods and soft keywords in the Grammar"""
def __init__(self, gen: "ParserGenerator", keywords: Dict[str, int], soft_keywords: Set[str]):
self.generator = gen
self.keywords = keywords
self.soft_keywords = soft_keywords
def visit_StringLeaf(self, node: StringLeaf) -> None:
val = ast.literal_eval(node.value)
if re.match(r"[a-zA-Z_]\w*\Z", val): # This is a keyword
if node.value.endswith("'") and node.value not in self.keywords:
self.keywords[val] = self.generator.keyword_type()
else:
return self.soft_keywords.add(node.value.replace('"', ""))
class RuleCheckingVisitor(GrammarVisitor):
def __init__(self, rules: Dict[str, Rule], tokens: Set[str]):
self.rules = rules
@ -39,6 +93,8 @@ class ParserGenerator:
def __init__(self, grammar: Grammar, tokens: Set[str], file: Optional[IO[Text]]):
self.grammar = grammar
self.tokens = tokens
self.keywords: Dict[str, int] = {}
self.soft_keywords: Set[str] = set()
self.rules = grammar.rules
self.validate_rule_names()
if "trailer" not in grammar.metas and "start" not in self.rules:
@ -48,12 +104,10 @@ class ParserGenerator:
checker.visit(rule)
self.file = file
self.level = 0
compute_nullables(self.rules)
self.first_graph, self.first_sccs = compute_left_recursives(self.rules)
self.todo = self.rules.copy() # Rules to generate
self.counter = 0 # For name_rule()/name_loop()
self.keyword_counter = 499 # For keyword_type()
self.all_rules: Dict[str, Rule] = {} # Rules + temporal rules
self.all_rules: Dict[str, Rule] = self.rules.copy() # Rules + temporal rules
self._local_variable_stack: List[List[str]] = []
def validate_rule_names(self) -> None:
@ -94,39 +148,43 @@ class ParserGenerator:
for line in lines.splitlines():
self.print(line)
def collect_todo(self) -> None:
def collect_rules(self) -> None:
keyword_collector = KeywordCollectorVisitor(self, self.keywords, self.soft_keywords)
for rule in self.all_rules.values():
keyword_collector.visit(rule)
rule_collector = RuleCollectorVisitor(self.rules, self.callmakervisitor)
done: Set[str] = set()
while True:
alltodo = list(self.todo)
self.all_rules.update(self.todo)
todo = [i for i in alltodo if i not in done]
computed_rules = list(self.all_rules)
todo = [i for i in computed_rules if i not in done]
if not todo:
break
done = set(self.all_rules)
for rulename in todo:
self.todo[rulename].collect_todo(self)
done = set(alltodo)
rule_collector.visit(self.all_rules[rulename])
def keyword_type(self) -> int:
self.keyword_counter += 1
return self.keyword_counter
def name_node(self, rhs: Rhs) -> str:
def artifical_rule_from_rhs(self, rhs: Rhs) -> str:
self.counter += 1
name = f"_tmp_{self.counter}" # TODO: Pick a nicer name.
self.todo[name] = Rule(name, None, rhs)
self.all_rules[name] = Rule(name, None, rhs)
return name
def name_loop(self, node: Plain, is_repeat1: bool) -> str:
def artificial_rule_from_repeat(self, node: Plain, is_repeat1: bool) -> str:
self.counter += 1
if is_repeat1:
prefix = "_loop1_"
else:
prefix = "_loop0_"
name = f"{prefix}{self.counter}" # TODO: It's ugly to signal via the name.
self.todo[name] = Rule(name, None, Rhs([Alt([NamedItem(None, node)])]))
name = f"{prefix}{self.counter}"
self.all_rules[name] = Rule(name, None, Rhs([Alt([NamedItem(None, node)])]))
return name
def name_gather(self, node: Gather) -> str:
def artifical_rule_from_gather(self, node: Gather) -> str:
self.counter += 1
name = f"_gather_{self.counter}"
self.counter += 1
@ -135,7 +193,7 @@ class ParserGenerator:
[NamedItem(None, node.separator), NamedItem("elem", node.node)],
action="elem",
)
self.todo[extra_function_name] = Rule(
self.all_rules[extra_function_name] = Rule(
extra_function_name,
None,
Rhs([extra_function_alt]),
@ -143,7 +201,7 @@ class ParserGenerator:
alt = Alt(
[NamedItem("elem", node.node), NamedItem("seq", NameLeaf(extra_function_name))],
)
self.todo[name] = Rule(
self.all_rules[name] = Rule(
name,
None,
Rhs([alt]),
@ -160,13 +218,120 @@ class ParserGenerator:
return name
def compute_nullables(rules: Dict[str, Rule]) -> None:
class NullableVisitor(GrammarVisitor):
def __init__(self, rules: Dict[str, Rule]) -> None:
self.rules = rules
self.visited: Set[Any] = set()
self.nullables: Set[Union[Rule, NamedItem]] = set()
def visit_Rule(self, rule: Rule) -> bool:
if rule in self.visited:
return False
self.visited.add(rule)
if self.visit(rule.rhs):
self.nullables.add(rule)
return rule in self.nullables
def visit_Rhs(self, rhs: Rhs) -> bool:
for alt in rhs.alts:
if self.visit(alt):
return True
return False
def visit_Alt(self, alt: Alt) -> bool:
for item in alt.items:
if not self.visit(item):
return False
return True
def visit_Forced(self, force: Forced) -> bool:
return True
def visit_LookAhead(self, lookahead: Lookahead) -> bool:
return True
def visit_Opt(self, opt: Opt) -> bool:
return True
def visit_Repeat0(self, repeat: Repeat0) -> bool:
return True
def visit_Repeat1(self, repeat: Repeat1) -> bool:
return False
def visit_Gather(self, gather: Gather) -> bool:
return False
def visit_Cut(self, cut: Cut) -> bool:
return False
def visit_Group(self, group: Group) -> bool:
return self.visit(group.rhs)
def visit_NamedItem(self, item: NamedItem) -> bool:
if self.visit(item.item):
self.nullables.add(item)
return item in self.nullables
def visit_NameLeaf(self, node: NameLeaf) -> bool:
if node.value in self.rules:
return self.visit(self.rules[node.value])
# Token or unknown; never empty.
return False
def visit_StringLeaf(self, node: StringLeaf) -> bool:
# The string token '' is considered empty.
return not node.value
def compute_nullables(rules: Dict[str, Rule]) -> Set[Any]:
"""Compute which rules in a grammar are nullable.
Thanks to TatSu (tatsu/leftrec.py) for inspiration.
"""
nullable_visitor = NullableVisitor(rules)
for rule in rules.values():
rule.nullable_visit(rules)
nullable_visitor.visit(rule)
return nullable_visitor.nullables
class InitialNamesVisitor(GrammarVisitor):
def __init__(self, rules: Dict[str, Rule]) -> None:
self.rules = rules
self.nullables = compute_nullables(rules)
def generic_visit(self, node: Iterable[Any], *args: Any, **kwargs: Any) -> Set[Any]:
names: Set[str] = set()
for value in node:
if isinstance(value, list):
for item in value:
names |= self.visit(item, *args, **kwargs)
else:
names |= self.visit(value, *args, **kwargs)
return names
def visit_Alt(self, alt: Alt) -> Set[Any]:
names: Set[str] = set()
for item in alt.items:
names |= self.visit(item)
if item not in self.nullables:
break
return names
def visit_Forced(self, force: Forced) -> Set[Any]:
return set()
def visit_LookAhead(self, lookahead: Lookahead) -> Set[Any]:
return set()
def visit_Cut(self, cut: Cut) -> Set[Any]:
return set()
def visit_NameLeaf(self, node: NameLeaf) -> Set[Any]:
return {node.value}
def visit_StringLeaf(self, node: StringLeaf) -> Set[Any]:
return set()
def compute_left_recursives(
@ -207,10 +372,11 @@ def make_first_graph(rules: Dict[str, Rule]) -> Dict[str, AbstractSet[str]]:
Note that this requires the nullable flags to have been computed.
"""
initial_name_visitor = InitialNamesVisitor(rules)
graph = {}
vertices: Set[str] = set()
for rulename, rhs in rules.items():
graph[rulename] = names = rhs.initial_names()
graph[rulename] = names = initial_name_visitor.visit(rhs)
vertices |= names
for vertex in vertices:
graph.setdefault(vertex, set())

View File

@ -95,8 +95,6 @@ class PythonCallMakerVisitor(GrammarVisitor):
def __init__(self, parser_generator: ParserGenerator):
self.gen = parser_generator
self.cache: Dict[Any, Any] = {}
self.keywords: Set[str] = set()
self.soft_keywords: Set[str] = set()
def visit_NameLeaf(self, node: NameLeaf) -> Tuple[Optional[str], str]:
name = node.value
@ -111,12 +109,6 @@ class PythonCallMakerVisitor(GrammarVisitor):
return name, f"self.{name}()"
def visit_StringLeaf(self, node: StringLeaf) -> Tuple[str, str]:
val = ast.literal_eval(node.value)
if re.match(r"[a-zA-Z_]\w*\Z", val): # This is a keyword
if node.value.endswith("'"):
self.keywords.add(val)
else:
self.soft_keywords.add(val)
return "literal", f"self.expect({node.value})"
def visit_Rhs(self, node: Rhs) -> Tuple[Optional[str], str]:
@ -125,7 +117,7 @@ class PythonCallMakerVisitor(GrammarVisitor):
if len(node.alts) == 1 and len(node.alts[0].items) == 1:
self.cache[node] = self.visit(node.alts[0].items[0])
else:
name = self.gen.name_node(node)
name = self.gen.artifical_rule_from_rhs(node)
self.cache[node] = name, f"self.{name}()"
return self.cache[node]
@ -163,21 +155,21 @@ class PythonCallMakerVisitor(GrammarVisitor):
def visit_Repeat0(self, node: Repeat0) -> Tuple[str, str]:
if node in self.cache:
return self.cache[node]
name = self.gen.name_loop(node.node, False)
name = self.gen.artificial_rule_from_repeat(node.node, False)
self.cache[node] = name, f"self.{name}()," # Also a trailing comma!
return self.cache[node]
def visit_Repeat1(self, node: Repeat1) -> Tuple[str, str]:
if node in self.cache:
return self.cache[node]
name = self.gen.name_loop(node.node, True)
name = self.gen.artificial_rule_from_repeat(node.node, True)
self.cache[node] = name, f"self.{name}()" # But no trailing comma here!
return self.cache[node]
def visit_Gather(self, node: Gather) -> Tuple[str, str]:
if node in self.cache:
return self.cache[node]
name = self.gen.name_gather(node)
name = self.gen.artifical_rule_from_gather(node)
self.cache[node] = name, f"self.{name}()" # No trailing comma here either!
return self.cache[node]
@ -219,6 +211,7 @@ class PythonParserGenerator(ParserGenerator, GrammarVisitor):
)
def generate(self, filename: str) -> None:
self.collect_rules()
header = self.grammar.metas.get("header", MODULE_PREFIX)
if header is not None:
self.print(header.rstrip("\n").format(filename=filename))
@ -228,17 +221,15 @@ class PythonParserGenerator(ParserGenerator, GrammarVisitor):
cls_name = self.grammar.metas.get("class", "GeneratedParser")
self.print("# Keywords and soft keywords are listed at the end of the parser definition.")
self.print(f"class {cls_name}(Parser):")
while self.todo:
for rulename, rule in list(self.todo.items()):
del self.todo[rulename]
self.print()
with self.indent():
self.visit(rule)
for rule in self.all_rules.values():
self.print()
with self.indent():
self.visit(rule)
self.print()
with self.indent():
self.print(f"KEYWORDS = {tuple(self.callmakervisitor.keywords)}")
self.print(f"SOFT_KEYWORDS = {tuple(self.callmakervisitor.soft_keywords)}")
self.print(f"KEYWORDS = {tuple(self.keywords)}")
self.print(f"SOFT_KEYWORDS = {tuple(self.soft_keywords)}")
trailer = self.grammar.metas.get("trailer", MODULE_SUFFIX.format(class_name=cls_name))
if trailer is not None:
@ -270,8 +261,6 @@ class PythonParserGenerator(ParserGenerator, GrammarVisitor):
self.print(f"def {node.name}(self) -> Optional[{node_type}]:")
with self.indent():
self.print(f"# {node.name}: {rhs}")
if node.nullable:
self.print(f"# nullable={node.nullable}")
self.print("mark = self._mark()")
if self.alts_uses_locations(node.rhs.alts):
self.print("tok = self._tokenizer.peek()")

View File

@ -4,10 +4,9 @@ import os
import pathlib
import sys
import textwrap
import tokenize
import token
from typing import Any, cast, Dict, IO, Type, Final
import tokenize
from typing import IO, Any, Dict, Final, Type, cast
from pegen.build import compile_c_extension
from pegen.c_generator import CParserGenerator

View File

@ -1,12 +1,7 @@
from typing import Optional
from pegen import grammar
from pegen.grammar import (
Alt,
GrammarVisitor,
Rule,
Rhs,
)
from pegen.grammar import Alt, GrammarVisitor, Rhs, Rule
class ValidationError(Exception):