mirror of https://github.com/python/cpython
Extract visitors from the grammar nodes and call makers in the peg generator (GH-28172)
Simplify the peg generator logic by extracting as much visitors as possible to disentangle the flow and separate concerns.
This commit is contained in:
parent
28264269de
commit
b01fd533fe
|
@ -15,6 +15,7 @@ with test_tools.imports_under_tool("peg_generator"):
|
|||
from pegen.grammar import GrammarVisitor, GrammarError, Grammar
|
||||
from pegen.grammar_visualizer import ASTGrammarPrinter
|
||||
from pegen.parser import Parser
|
||||
from pegen.parser_generator import compute_nullables, compute_left_recursives
|
||||
from pegen.python_generator import PythonParserGenerator
|
||||
|
||||
|
||||
|
@ -502,11 +503,10 @@ class TestPegen(unittest.TestCase):
|
|||
sign: ['-' | '+']
|
||||
"""
|
||||
grammar: Grammar = parse_string(grammar_source, GrammarParser)
|
||||
out = io.StringIO()
|
||||
genr = PythonParserGenerator(grammar, out)
|
||||
rules = grammar.rules
|
||||
self.assertFalse(rules["start"].nullable) # Not None!
|
||||
self.assertTrue(rules["sign"].nullable)
|
||||
nullables = compute_nullables(rules)
|
||||
self.assertNotIn(rules["start"], nullables) # Not None!
|
||||
self.assertIn(rules["sign"], nullables)
|
||||
|
||||
def test_advanced_left_recursive(self) -> None:
|
||||
grammar_source = """
|
||||
|
@ -514,11 +514,11 @@ class TestPegen(unittest.TestCase):
|
|||
sign: ['-']
|
||||
"""
|
||||
grammar: Grammar = parse_string(grammar_source, GrammarParser)
|
||||
out = io.StringIO()
|
||||
genr = PythonParserGenerator(grammar, out)
|
||||
rules = grammar.rules
|
||||
self.assertFalse(rules["start"].nullable) # Not None!
|
||||
self.assertTrue(rules["sign"].nullable)
|
||||
nullables = compute_nullables(rules)
|
||||
compute_left_recursives(rules)
|
||||
self.assertNotIn(rules["start"], nullables) # Not None!
|
||||
self.assertIn(rules["sign"], nullables)
|
||||
self.assertTrue(rules["start"].left_recursive)
|
||||
self.assertFalse(rules["sign"].left_recursive)
|
||||
|
||||
|
|
334
Parser/parser.c
334
Parser/parser.c
File diff suppressed because it is too large
Load Diff
|
@ -10,10 +10,9 @@ import sys
|
|||
import time
|
||||
import token
|
||||
import traceback
|
||||
|
||||
from typing import Tuple
|
||||
|
||||
from pegen.build import Grammar, Parser, Tokenizer, ParserGenerator
|
||||
from pegen.build import Grammar, Parser, ParserGenerator, Tokenizer
|
||||
from pegen.validator import validate_grammar
|
||||
|
||||
|
||||
|
|
|
@ -1,11 +1,10 @@
|
|||
import itertools
|
||||
import pathlib
|
||||
import shutil
|
||||
import tokenize
|
||||
import sysconfig
|
||||
import tempfile
|
||||
import itertools
|
||||
|
||||
from typing import Optional, Tuple, List, IO, Set, Dict
|
||||
import tokenize
|
||||
from typing import IO, Dict, List, Optional, Set, Tuple
|
||||
|
||||
from pegen.c_generator import CParserGenerator
|
||||
from pegen.grammar import Grammar
|
||||
|
@ -45,9 +44,9 @@ def compile_c_extension(
|
|||
of distutils (this is useful in case you want to use a temporary directory).
|
||||
"""
|
||||
import distutils.log
|
||||
from distutils.core import Distribution, Extension
|
||||
from distutils.command.clean import clean # type: ignore
|
||||
from distutils.command.build_ext import build_ext # type: ignore
|
||||
from distutils.command.clean import clean # type: ignore
|
||||
from distutils.core import Distribution, Extension
|
||||
from distutils.tests.support import fixup_build_ext # type: ignore
|
||||
|
||||
if verbose:
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
import ast
|
||||
from dataclasses import field, dataclass
|
||||
import re
|
||||
from typing import Any, Dict, IO, Optional, List, Text, Tuple, Set
|
||||
from dataclasses import dataclass, field
|
||||
from enum import Enum
|
||||
from typing import IO, Any, Dict, List, Optional, Set, Text, Tuple
|
||||
|
||||
from pegen import grammar
|
||||
from pegen.grammar import (
|
||||
|
@ -27,7 +27,6 @@ from pegen.grammar import (
|
|||
)
|
||||
from pegen.parser_generator import ParserGenerator
|
||||
|
||||
|
||||
EXTENSION_PREFIX = """\
|
||||
#include "pegen.h"
|
||||
|
||||
|
@ -120,23 +119,18 @@ class CCallMakerVisitor(GrammarVisitor):
|
|||
self.exact_tokens = exact_tokens
|
||||
self.non_exact_tokens = non_exact_tokens
|
||||
self.cache: Dict[Any, FunctionCall] = {}
|
||||
self.keyword_cache: Dict[str, int] = {}
|
||||
self.soft_keywords: Set[str] = set()
|
||||
|
||||
def keyword_helper(self, keyword: str) -> FunctionCall:
|
||||
if keyword not in self.keyword_cache:
|
||||
self.keyword_cache[keyword] = self.gen.keyword_type()
|
||||
return FunctionCall(
|
||||
assigned_variable="_keyword",
|
||||
function="_PyPegen_expect_token",
|
||||
arguments=["p", self.keyword_cache[keyword]],
|
||||
arguments=["p", self.gen.keywords[keyword]],
|
||||
return_type="Token *",
|
||||
nodetype=NodeTypes.KEYWORD,
|
||||
comment=f"token='{keyword}'",
|
||||
)
|
||||
|
||||
def soft_keyword_helper(self, value: str) -> FunctionCall:
|
||||
self.soft_keywords.add(value.replace('"', ""))
|
||||
return FunctionCall(
|
||||
assigned_variable="_keyword",
|
||||
function="_PyPegen_expect_soft_keyword",
|
||||
|
@ -200,20 +194,12 @@ class CCallMakerVisitor(GrammarVisitor):
|
|||
)
|
||||
|
||||
def visit_Rhs(self, node: Rhs) -> FunctionCall:
|
||||
def can_we_inline(node: Rhs) -> int:
|
||||
if len(node.alts) != 1 or len(node.alts[0].items) != 1:
|
||||
return False
|
||||
# If the alternative has an action we cannot inline
|
||||
if getattr(node.alts[0], "action", None) is not None:
|
||||
return False
|
||||
return True
|
||||
|
||||
if node in self.cache:
|
||||
return self.cache[node]
|
||||
if can_we_inline(node):
|
||||
if node.can_be_inlined:
|
||||
self.cache[node] = self.generate_call(node.alts[0].items[0])
|
||||
else:
|
||||
name = self.gen.name_node(node)
|
||||
name = self.gen.artifical_rule_from_rhs(node)
|
||||
self.cache[node] = FunctionCall(
|
||||
assigned_variable=f"{name}_var",
|
||||
function=f"{name}_rule",
|
||||
|
@ -306,7 +292,7 @@ class CCallMakerVisitor(GrammarVisitor):
|
|||
def visit_Repeat0(self, node: Repeat0) -> FunctionCall:
|
||||
if node in self.cache:
|
||||
return self.cache[node]
|
||||
name = self.gen.name_loop(node.node, False)
|
||||
name = self.gen.artificial_rule_from_repeat(node.node, False)
|
||||
self.cache[node] = FunctionCall(
|
||||
assigned_variable=f"{name}_var",
|
||||
function=f"{name}_rule",
|
||||
|
@ -319,7 +305,7 @@ class CCallMakerVisitor(GrammarVisitor):
|
|||
def visit_Repeat1(self, node: Repeat1) -> FunctionCall:
|
||||
if node in self.cache:
|
||||
return self.cache[node]
|
||||
name = self.gen.name_loop(node.node, True)
|
||||
name = self.gen.artificial_rule_from_repeat(node.node, True)
|
||||
self.cache[node] = FunctionCall(
|
||||
assigned_variable=f"{name}_var",
|
||||
function=f"{name}_rule",
|
||||
|
@ -332,7 +318,7 @@ class CCallMakerVisitor(GrammarVisitor):
|
|||
def visit_Gather(self, node: Gather) -> FunctionCall:
|
||||
if node in self.cache:
|
||||
return self.cache[node]
|
||||
name = self.gen.name_gather(node)
|
||||
name = self.gen.artifical_rule_from_gather(node)
|
||||
self.cache[node] = FunctionCall(
|
||||
assigned_variable=f"{name}_var",
|
||||
function=f"{name}_rule",
|
||||
|
@ -429,7 +415,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
|
|||
self.print(f"}}")
|
||||
|
||||
def generate(self, filename: str) -> None:
|
||||
self.collect_todo()
|
||||
self.collect_rules()
|
||||
self.print(f"// @generated by pegen from {filename}")
|
||||
header = self.grammar.metas.get("header", EXTENSION_PREFIX)
|
||||
if header:
|
||||
|
@ -439,11 +425,11 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
|
|||
self.print(subheader)
|
||||
self._setup_keywords()
|
||||
self._setup_soft_keywords()
|
||||
for i, (rulename, rule) in enumerate(self.todo.items(), 1000):
|
||||
for i, (rulename, rule) in enumerate(self.all_rules.items(), 1000):
|
||||
comment = " // Left-recursive" if rule.left_recursive else ""
|
||||
self.print(f"#define {rulename}_type {i}{comment}")
|
||||
self.print()
|
||||
for rulename, rule in self.todo.items():
|
||||
for rulename, rule in self.all_rules.items():
|
||||
if rule.is_loop() or rule.is_gather():
|
||||
type = "asdl_seq *"
|
||||
elif rule.type:
|
||||
|
@ -452,13 +438,11 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
|
|||
type = "void *"
|
||||
self.print(f"static {type}{rulename}_rule(Parser *p);")
|
||||
self.print()
|
||||
while self.todo:
|
||||
for rulename, rule in list(self.todo.items()):
|
||||
del self.todo[rulename]
|
||||
self.print()
|
||||
if rule.left_recursive:
|
||||
self.print("// Left-recursive")
|
||||
self.visit(rule)
|
||||
for rulename, rule in list(self.all_rules.items()):
|
||||
self.print()
|
||||
if rule.left_recursive:
|
||||
self.print("// Left-recursive")
|
||||
self.visit(rule)
|
||||
if self.skip_actions:
|
||||
mode = 0
|
||||
else:
|
||||
|
@ -472,7 +456,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
|
|||
|
||||
def _group_keywords_by_length(self) -> Dict[int, List[Tuple[str, int]]]:
|
||||
groups: Dict[int, List[Tuple[str, int]]] = {}
|
||||
for keyword_str, keyword_type in self.callmakervisitor.keyword_cache.items():
|
||||
for keyword_str, keyword_type in self.keywords.items():
|
||||
length = len(keyword_str)
|
||||
if length in groups:
|
||||
groups[length].append((keyword_str, keyword_type))
|
||||
|
@ -481,9 +465,8 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
|
|||
return groups
|
||||
|
||||
def _setup_keywords(self) -> None:
|
||||
keyword_cache = self.callmakervisitor.keyword_cache
|
||||
n_keyword_lists = (
|
||||
len(max(keyword_cache.keys(), key=len)) + 1 if len(keyword_cache) > 0 else 0
|
||||
len(max(self.keywords.keys(), key=len)) + 1 if len(self.keywords) > 0 else 0
|
||||
)
|
||||
self.print(f"static const int n_keyword_lists = {n_keyword_lists};")
|
||||
groups = self._group_keywords_by_length()
|
||||
|
@ -503,7 +486,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
|
|||
self.print("};")
|
||||
|
||||
def _setup_soft_keywords(self) -> None:
|
||||
soft_keywords = sorted(self.callmakervisitor.soft_keywords)
|
||||
soft_keywords = sorted(self.soft_keywords)
|
||||
self.print("static char *soft_keywords[] = {")
|
||||
with self.indent():
|
||||
for keyword in soft_keywords:
|
||||
|
|
|
@ -3,30 +3,27 @@
|
|||
import argparse
|
||||
import pprint
|
||||
import sys
|
||||
from typing import Set, Dict
|
||||
from typing import Dict, Set
|
||||
|
||||
from pegen.build import build_parser
|
||||
from pegen.grammar import (
|
||||
Alt,
|
||||
Cut,
|
||||
Gather,
|
||||
Grammar,
|
||||
GrammarVisitor,
|
||||
Group,
|
||||
Leaf,
|
||||
Lookahead,
|
||||
NamedItem,
|
||||
NameLeaf,
|
||||
NegativeLookahead,
|
||||
Opt,
|
||||
Repeat,
|
||||
Repeat0,
|
||||
Repeat1,
|
||||
Rhs,
|
||||
Rule,
|
||||
StringLeaf,
|
||||
PositiveLookahead,
|
||||
)
|
||||
from pegen.parser_generator import compute_nullables
|
||||
|
||||
argparser = argparse.ArgumentParser(
|
||||
prog="calculate_first_sets",
|
||||
|
@ -38,8 +35,7 @@ argparser.add_argument("grammar_file", help="The grammar file")
|
|||
class FirstSetCalculator(GrammarVisitor):
|
||||
def __init__(self, rules: Dict[str, Rule]) -> None:
|
||||
self.rules = rules
|
||||
for rule in rules.values():
|
||||
rule.nullable_visit(rules)
|
||||
self.nullables = compute_nullables(rules)
|
||||
self.first_sets: Dict[str, Set[str]] = dict()
|
||||
self.in_process: Set[str] = set()
|
||||
|
||||
|
@ -129,7 +125,7 @@ class FirstSetCalculator(GrammarVisitor):
|
|||
elif item.name not in self.first_sets:
|
||||
self.in_process.add(item.name)
|
||||
terminals = self.visit(item.rhs)
|
||||
if item.nullable:
|
||||
if item in self.nullables:
|
||||
terminals.add("")
|
||||
self.first_sets[item.name] = terminals
|
||||
self.in_process.remove(item.name)
|
||||
|
|
|
@ -2,6 +2,7 @@ from __future__ import annotations
|
|||
|
||||
from abc import abstractmethod
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
AbstractSet,
|
||||
Any,
|
||||
Dict,
|
||||
|
@ -11,11 +12,9 @@ from typing import (
|
|||
Optional,
|
||||
Set,
|
||||
Tuple,
|
||||
TYPE_CHECKING,
|
||||
Union,
|
||||
)
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pegen.parser_generator import ParserGenerator
|
||||
|
||||
|
@ -31,7 +30,7 @@ class GrammarVisitor:
|
|||
visitor = getattr(self, method, self.generic_visit)
|
||||
return visitor(node, *args, **kwargs)
|
||||
|
||||
def generic_visit(self, node: Iterable[Any], *args: Any, **kwargs: Any) -> None:
|
||||
def generic_visit(self, node: Iterable[Any], *args: Any, **kwargs: Any) -> Any:
|
||||
"""Called if no explicit visitor function exists for a node."""
|
||||
for value in node:
|
||||
if isinstance(value, list):
|
||||
|
@ -73,8 +72,6 @@ class Rule:
|
|||
self.type = type
|
||||
self.rhs = rhs
|
||||
self.memo = bool(memo)
|
||||
self.visited = False
|
||||
self.nullable = False
|
||||
self.left_recursive = False
|
||||
self.leader = False
|
||||
|
||||
|
@ -101,17 +98,6 @@ class Rule:
|
|||
def __iter__(self) -> Iterator[Rhs]:
|
||||
yield self.rhs
|
||||
|
||||
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
|
||||
if self.visited:
|
||||
# A left-recursive rule is considered non-nullable.
|
||||
return False
|
||||
self.visited = True
|
||||
self.nullable = self.rhs.nullable_visit(rules)
|
||||
return self.nullable
|
||||
|
||||
def initial_names(self) -> AbstractSet[str]:
|
||||
return self.rhs.initial_names()
|
||||
|
||||
def flatten(self) -> Rhs:
|
||||
# If it's a single parenthesized group, flatten it.
|
||||
rhs = self.rhs
|
||||
|
@ -124,10 +110,6 @@ class Rule:
|
|||
rhs = rhs.alts[0].items[0].item.rhs
|
||||
return rhs
|
||||
|
||||
def collect_todo(self, gen: ParserGenerator) -> None:
|
||||
rhs = self.flatten()
|
||||
rhs.collect_todo(gen)
|
||||
|
||||
|
||||
class Leaf:
|
||||
def __init__(self, value: str):
|
||||
|
@ -140,14 +122,6 @@ class Leaf:
|
|||
if False:
|
||||
yield
|
||||
|
||||
@abstractmethod
|
||||
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def initial_names(self) -> AbstractSet[str]:
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class NameLeaf(Leaf):
|
||||
"""The value is the name."""
|
||||
|
@ -160,15 +134,6 @@ class NameLeaf(Leaf):
|
|||
def __repr__(self) -> str:
|
||||
return f"NameLeaf({self.value!r})"
|
||||
|
||||
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
|
||||
if self.value in rules:
|
||||
return rules[self.value].nullable_visit(rules)
|
||||
# Token or unknown; never empty.
|
||||
return False
|
||||
|
||||
def initial_names(self) -> AbstractSet[str]:
|
||||
return {self.value}
|
||||
|
||||
|
||||
class StringLeaf(Leaf):
|
||||
"""The value is a string literal, including quotes."""
|
||||
|
@ -176,13 +141,6 @@ class StringLeaf(Leaf):
|
|||
def __repr__(self) -> str:
|
||||
return f"StringLeaf({self.value!r})"
|
||||
|
||||
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
|
||||
# The string token '' is considered empty.
|
||||
return not self.value
|
||||
|
||||
def initial_names(self) -> AbstractSet[str]:
|
||||
return set()
|
||||
|
||||
|
||||
class Rhs:
|
||||
def __init__(self, alts: List[Alt]):
|
||||
|
@ -198,21 +156,14 @@ class Rhs:
|
|||
def __iter__(self) -> Iterator[List[Alt]]:
|
||||
yield self.alts
|
||||
|
||||
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
|
||||
for alt in self.alts:
|
||||
if alt.nullable_visit(rules):
|
||||
return True
|
||||
return False
|
||||
|
||||
def initial_names(self) -> AbstractSet[str]:
|
||||
names: Set[str] = set()
|
||||
for alt in self.alts:
|
||||
names |= alt.initial_names()
|
||||
return names
|
||||
|
||||
def collect_todo(self, gen: ParserGenerator) -> None:
|
||||
for alt in self.alts:
|
||||
alt.collect_todo(gen)
|
||||
@property
|
||||
def can_be_inlined(self) -> bool:
|
||||
if len(self.alts) != 1 or len(self.alts[0].items) != 1:
|
||||
return False
|
||||
# If the alternative has an action we cannot inline
|
||||
if getattr(self.alts[0], "action", None) is not None:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
class Alt:
|
||||
|
@ -239,31 +190,12 @@ class Alt:
|
|||
def __iter__(self) -> Iterator[List[NamedItem]]:
|
||||
yield self.items
|
||||
|
||||
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
|
||||
for item in self.items:
|
||||
if not item.nullable_visit(rules):
|
||||
return False
|
||||
return True
|
||||
|
||||
def initial_names(self) -> AbstractSet[str]:
|
||||
names: Set[str] = set()
|
||||
for item in self.items:
|
||||
names |= item.initial_names()
|
||||
if not item.nullable:
|
||||
break
|
||||
return names
|
||||
|
||||
def collect_todo(self, gen: ParserGenerator) -> None:
|
||||
for item in self.items:
|
||||
item.collect_todo(gen)
|
||||
|
||||
|
||||
class NamedItem:
|
||||
def __init__(self, name: Optional[str], item: Item, type: Optional[str] = None):
|
||||
self.name = name
|
||||
self.item = item
|
||||
self.type = type
|
||||
self.nullable = False
|
||||
|
||||
def __str__(self) -> str:
|
||||
if not SIMPLE_STR and self.name:
|
||||
|
@ -277,16 +209,6 @@ class NamedItem:
|
|||
def __iter__(self) -> Iterator[Item]:
|
||||
yield self.item
|
||||
|
||||
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
|
||||
self.nullable = self.item.nullable_visit(rules)
|
||||
return self.nullable
|
||||
|
||||
def initial_names(self) -> AbstractSet[str]:
|
||||
return self.item.initial_names()
|
||||
|
||||
def collect_todo(self, gen: ParserGenerator) -> None:
|
||||
gen.callmakervisitor.visit(self.item)
|
||||
|
||||
|
||||
class Forced:
|
||||
def __init__(self, node: Plain):
|
||||
|
@ -298,12 +220,6 @@ class Forced:
|
|||
def __iter__(self) -> Iterator[Plain]:
|
||||
yield self.node
|
||||
|
||||
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
|
||||
return True
|
||||
|
||||
def initial_names(self) -> AbstractSet[str]:
|
||||
return set()
|
||||
|
||||
|
||||
class Lookahead:
|
||||
def __init__(self, node: Plain, sign: str):
|
||||
|
@ -316,12 +232,6 @@ class Lookahead:
|
|||
def __iter__(self) -> Iterator[Plain]:
|
||||
yield self.node
|
||||
|
||||
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
|
||||
return True
|
||||
|
||||
def initial_names(self) -> AbstractSet[str]:
|
||||
return set()
|
||||
|
||||
|
||||
class PositiveLookahead(Lookahead):
|
||||
def __init__(self, node: Plain):
|
||||
|
@ -357,12 +267,6 @@ class Opt:
|
|||
def __iter__(self) -> Iterator[Item]:
|
||||
yield self.node
|
||||
|
||||
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
|
||||
return True
|
||||
|
||||
def initial_names(self) -> AbstractSet[str]:
|
||||
return self.node.initial_names()
|
||||
|
||||
|
||||
class Repeat:
|
||||
"""Shared base class for x* and x+."""
|
||||
|
@ -371,16 +275,9 @@ class Repeat:
|
|||
self.node = node
|
||||
self.memo: Optional[Tuple[Optional[str], str]] = None
|
||||
|
||||
@abstractmethod
|
||||
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
|
||||
raise NotImplementedError
|
||||
|
||||
def __iter__(self) -> Iterator[Plain]:
|
||||
yield self.node
|
||||
|
||||
def initial_names(self) -> AbstractSet[str]:
|
||||
return self.node.initial_names()
|
||||
|
||||
|
||||
class Repeat0(Repeat):
|
||||
def __str__(self) -> str:
|
||||
|
@ -394,9 +291,6 @@ class Repeat0(Repeat):
|
|||
def __repr__(self) -> str:
|
||||
return f"Repeat0({self.node!r})"
|
||||
|
||||
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
|
||||
return True
|
||||
|
||||
|
||||
class Repeat1(Repeat):
|
||||
def __str__(self) -> str:
|
||||
|
@ -410,9 +304,6 @@ class Repeat1(Repeat):
|
|||
def __repr__(self) -> str:
|
||||
return f"Repeat1({self.node!r})"
|
||||
|
||||
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
|
||||
return False
|
||||
|
||||
|
||||
class Gather(Repeat):
|
||||
def __init__(self, separator: Plain, node: Plain):
|
||||
|
@ -425,9 +316,6 @@ class Gather(Repeat):
|
|||
def __repr__(self) -> str:
|
||||
return f"Gather({self.separator!r}, {self.node!r})"
|
||||
|
||||
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
|
||||
return False
|
||||
|
||||
|
||||
class Group:
|
||||
def __init__(self, rhs: Rhs):
|
||||
|
@ -442,12 +330,6 @@ class Group:
|
|||
def __iter__(self) -> Iterator[Rhs]:
|
||||
yield self.rhs
|
||||
|
||||
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
|
||||
return self.rhs.nullable_visit(rules)
|
||||
|
||||
def initial_names(self) -> AbstractSet[str]:
|
||||
return self.rhs.initial_names()
|
||||
|
||||
|
||||
class Cut:
|
||||
def __init__(self) -> None:
|
||||
|
@ -468,9 +350,6 @@ class Cut:
|
|||
return NotImplemented
|
||||
return True
|
||||
|
||||
def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
|
||||
return True
|
||||
|
||||
def initial_names(self) -> AbstractSet[str]:
|
||||
return set()
|
||||
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
import argparse
|
||||
import sys
|
||||
|
||||
from typing import Any, Iterator, Callable
|
||||
from typing import Any, Callable, Iterator
|
||||
|
||||
from pegen.build import build_parser
|
||||
from pegen.grammar import Grammar, Rule
|
||||
|
|
|
@ -59,11 +59,11 @@ def main() -> None:
|
|||
with args.tokens_file as tok_file:
|
||||
all_tokens, exact_tok, non_exact_tok = generate_token_definitions(tok_file)
|
||||
gen = CParserGenerator(grammar, all_tokens, exact_tok, non_exact_tok, file=None)
|
||||
gen.collect_todo()
|
||||
gen.collect_rules()
|
||||
|
||||
with args.keyword_file as thefile:
|
||||
all_keywords = sorted(list(gen.callmakervisitor.keyword_cache.keys()) + EXTRA_KEYWORDS)
|
||||
all_soft_keywords = sorted(gen.callmakervisitor.soft_keywords)
|
||||
all_keywords = sorted(list(gen.keywords.keys()) + EXTRA_KEYWORDS)
|
||||
all_soft_keywords = sorted(gen.soft_keywords)
|
||||
|
||||
keywords = "" if not all_keywords else " " + ",\n ".join(map(repr, all_keywords))
|
||||
soft_keywords = (
|
||||
|
|
|
@ -1,22 +1,76 @@
|
|||
import ast
|
||||
import contextlib
|
||||
import re
|
||||
from abc import abstractmethod
|
||||
from typing import IO, AbstractSet, Dict, Iterator, List, Optional, Set, Text, Tuple
|
||||
from typing import (
|
||||
IO,
|
||||
AbstractSet,
|
||||
Any,
|
||||
Dict,
|
||||
Iterable,
|
||||
Iterator,
|
||||
List,
|
||||
Optional,
|
||||
Set,
|
||||
Text,
|
||||
Tuple,
|
||||
Union,
|
||||
)
|
||||
|
||||
from pegen import sccutils
|
||||
from pegen.grammar import (
|
||||
Alt,
|
||||
Cut,
|
||||
Forced,
|
||||
Gather,
|
||||
Grammar,
|
||||
GrammarError,
|
||||
GrammarVisitor,
|
||||
Group,
|
||||
Lookahead,
|
||||
NamedItem,
|
||||
NameLeaf,
|
||||
Opt,
|
||||
Plain,
|
||||
Repeat0,
|
||||
Repeat1,
|
||||
Rhs,
|
||||
Rule,
|
||||
StringLeaf,
|
||||
)
|
||||
|
||||
|
||||
class RuleCollectorVisitor(GrammarVisitor):
|
||||
"""Visitor that invokes a provieded callmaker visitor with just the NamedItem nodes"""
|
||||
|
||||
def __init__(self, rules: Dict[str, Rule], callmakervisitor: GrammarVisitor) -> None:
|
||||
self.rulses = rules
|
||||
self.callmaker = callmakervisitor
|
||||
|
||||
def visit_Rule(self, rule: Rule) -> None:
|
||||
self.visit(rule.flatten())
|
||||
|
||||
def visit_NamedItem(self, item: NamedItem) -> None:
|
||||
self.callmaker.visit(item)
|
||||
|
||||
|
||||
class KeywordCollectorVisitor(GrammarVisitor):
|
||||
"""Visitor that collects all the keywods and soft keywords in the Grammar"""
|
||||
|
||||
def __init__(self, gen: "ParserGenerator", keywords: Dict[str, int], soft_keywords: Set[str]):
|
||||
self.generator = gen
|
||||
self.keywords = keywords
|
||||
self.soft_keywords = soft_keywords
|
||||
|
||||
def visit_StringLeaf(self, node: StringLeaf) -> None:
|
||||
val = ast.literal_eval(node.value)
|
||||
if re.match(r"[a-zA-Z_]\w*\Z", val): # This is a keyword
|
||||
if node.value.endswith("'") and node.value not in self.keywords:
|
||||
self.keywords[val] = self.generator.keyword_type()
|
||||
else:
|
||||
return self.soft_keywords.add(node.value.replace('"', ""))
|
||||
|
||||
|
||||
class RuleCheckingVisitor(GrammarVisitor):
|
||||
def __init__(self, rules: Dict[str, Rule], tokens: Set[str]):
|
||||
self.rules = rules
|
||||
|
@ -39,6 +93,8 @@ class ParserGenerator:
|
|||
def __init__(self, grammar: Grammar, tokens: Set[str], file: Optional[IO[Text]]):
|
||||
self.grammar = grammar
|
||||
self.tokens = tokens
|
||||
self.keywords: Dict[str, int] = {}
|
||||
self.soft_keywords: Set[str] = set()
|
||||
self.rules = grammar.rules
|
||||
self.validate_rule_names()
|
||||
if "trailer" not in grammar.metas and "start" not in self.rules:
|
||||
|
@ -48,12 +104,10 @@ class ParserGenerator:
|
|||
checker.visit(rule)
|
||||
self.file = file
|
||||
self.level = 0
|
||||
compute_nullables(self.rules)
|
||||
self.first_graph, self.first_sccs = compute_left_recursives(self.rules)
|
||||
self.todo = self.rules.copy() # Rules to generate
|
||||
self.counter = 0 # For name_rule()/name_loop()
|
||||
self.keyword_counter = 499 # For keyword_type()
|
||||
self.all_rules: Dict[str, Rule] = {} # Rules + temporal rules
|
||||
self.all_rules: Dict[str, Rule] = self.rules.copy() # Rules + temporal rules
|
||||
self._local_variable_stack: List[List[str]] = []
|
||||
|
||||
def validate_rule_names(self) -> None:
|
||||
|
@ -94,39 +148,43 @@ class ParserGenerator:
|
|||
for line in lines.splitlines():
|
||||
self.print(line)
|
||||
|
||||
def collect_todo(self) -> None:
|
||||
def collect_rules(self) -> None:
|
||||
keyword_collector = KeywordCollectorVisitor(self, self.keywords, self.soft_keywords)
|
||||
for rule in self.all_rules.values():
|
||||
keyword_collector.visit(rule)
|
||||
|
||||
rule_collector = RuleCollectorVisitor(self.rules, self.callmakervisitor)
|
||||
done: Set[str] = set()
|
||||
while True:
|
||||
alltodo = list(self.todo)
|
||||
self.all_rules.update(self.todo)
|
||||
todo = [i for i in alltodo if i not in done]
|
||||
computed_rules = list(self.all_rules)
|
||||
todo = [i for i in computed_rules if i not in done]
|
||||
if not todo:
|
||||
break
|
||||
done = set(self.all_rules)
|
||||
for rulename in todo:
|
||||
self.todo[rulename].collect_todo(self)
|
||||
done = set(alltodo)
|
||||
rule_collector.visit(self.all_rules[rulename])
|
||||
|
||||
def keyword_type(self) -> int:
|
||||
self.keyword_counter += 1
|
||||
return self.keyword_counter
|
||||
|
||||
def name_node(self, rhs: Rhs) -> str:
|
||||
def artifical_rule_from_rhs(self, rhs: Rhs) -> str:
|
||||
self.counter += 1
|
||||
name = f"_tmp_{self.counter}" # TODO: Pick a nicer name.
|
||||
self.todo[name] = Rule(name, None, rhs)
|
||||
self.all_rules[name] = Rule(name, None, rhs)
|
||||
return name
|
||||
|
||||
def name_loop(self, node: Plain, is_repeat1: bool) -> str:
|
||||
def artificial_rule_from_repeat(self, node: Plain, is_repeat1: bool) -> str:
|
||||
self.counter += 1
|
||||
if is_repeat1:
|
||||
prefix = "_loop1_"
|
||||
else:
|
||||
prefix = "_loop0_"
|
||||
name = f"{prefix}{self.counter}" # TODO: It's ugly to signal via the name.
|
||||
self.todo[name] = Rule(name, None, Rhs([Alt([NamedItem(None, node)])]))
|
||||
name = f"{prefix}{self.counter}"
|
||||
self.all_rules[name] = Rule(name, None, Rhs([Alt([NamedItem(None, node)])]))
|
||||
return name
|
||||
|
||||
def name_gather(self, node: Gather) -> str:
|
||||
def artifical_rule_from_gather(self, node: Gather) -> str:
|
||||
self.counter += 1
|
||||
name = f"_gather_{self.counter}"
|
||||
self.counter += 1
|
||||
|
@ -135,7 +193,7 @@ class ParserGenerator:
|
|||
[NamedItem(None, node.separator), NamedItem("elem", node.node)],
|
||||
action="elem",
|
||||
)
|
||||
self.todo[extra_function_name] = Rule(
|
||||
self.all_rules[extra_function_name] = Rule(
|
||||
extra_function_name,
|
||||
None,
|
||||
Rhs([extra_function_alt]),
|
||||
|
@ -143,7 +201,7 @@ class ParserGenerator:
|
|||
alt = Alt(
|
||||
[NamedItem("elem", node.node), NamedItem("seq", NameLeaf(extra_function_name))],
|
||||
)
|
||||
self.todo[name] = Rule(
|
||||
self.all_rules[name] = Rule(
|
||||
name,
|
||||
None,
|
||||
Rhs([alt]),
|
||||
|
@ -160,13 +218,120 @@ class ParserGenerator:
|
|||
return name
|
||||
|
||||
|
||||
def compute_nullables(rules: Dict[str, Rule]) -> None:
|
||||
class NullableVisitor(GrammarVisitor):
|
||||
def __init__(self, rules: Dict[str, Rule]) -> None:
|
||||
self.rules = rules
|
||||
self.visited: Set[Any] = set()
|
||||
self.nullables: Set[Union[Rule, NamedItem]] = set()
|
||||
|
||||
def visit_Rule(self, rule: Rule) -> bool:
|
||||
if rule in self.visited:
|
||||
return False
|
||||
self.visited.add(rule)
|
||||
if self.visit(rule.rhs):
|
||||
self.nullables.add(rule)
|
||||
return rule in self.nullables
|
||||
|
||||
def visit_Rhs(self, rhs: Rhs) -> bool:
|
||||
for alt in rhs.alts:
|
||||
if self.visit(alt):
|
||||
return True
|
||||
return False
|
||||
|
||||
def visit_Alt(self, alt: Alt) -> bool:
|
||||
for item in alt.items:
|
||||
if not self.visit(item):
|
||||
return False
|
||||
return True
|
||||
|
||||
def visit_Forced(self, force: Forced) -> bool:
|
||||
return True
|
||||
|
||||
def visit_LookAhead(self, lookahead: Lookahead) -> bool:
|
||||
return True
|
||||
|
||||
def visit_Opt(self, opt: Opt) -> bool:
|
||||
return True
|
||||
|
||||
def visit_Repeat0(self, repeat: Repeat0) -> bool:
|
||||
return True
|
||||
|
||||
def visit_Repeat1(self, repeat: Repeat1) -> bool:
|
||||
return False
|
||||
|
||||
def visit_Gather(self, gather: Gather) -> bool:
|
||||
return False
|
||||
|
||||
def visit_Cut(self, cut: Cut) -> bool:
|
||||
return False
|
||||
|
||||
def visit_Group(self, group: Group) -> bool:
|
||||
return self.visit(group.rhs)
|
||||
|
||||
def visit_NamedItem(self, item: NamedItem) -> bool:
|
||||
if self.visit(item.item):
|
||||
self.nullables.add(item)
|
||||
return item in self.nullables
|
||||
|
||||
def visit_NameLeaf(self, node: NameLeaf) -> bool:
|
||||
if node.value in self.rules:
|
||||
return self.visit(self.rules[node.value])
|
||||
# Token or unknown; never empty.
|
||||
return False
|
||||
|
||||
def visit_StringLeaf(self, node: StringLeaf) -> bool:
|
||||
# The string token '' is considered empty.
|
||||
return not node.value
|
||||
|
||||
|
||||
def compute_nullables(rules: Dict[str, Rule]) -> Set[Any]:
|
||||
"""Compute which rules in a grammar are nullable.
|
||||
|
||||
Thanks to TatSu (tatsu/leftrec.py) for inspiration.
|
||||
"""
|
||||
nullable_visitor = NullableVisitor(rules)
|
||||
for rule in rules.values():
|
||||
rule.nullable_visit(rules)
|
||||
nullable_visitor.visit(rule)
|
||||
return nullable_visitor.nullables
|
||||
|
||||
|
||||
class InitialNamesVisitor(GrammarVisitor):
|
||||
def __init__(self, rules: Dict[str, Rule]) -> None:
|
||||
self.rules = rules
|
||||
self.nullables = compute_nullables(rules)
|
||||
|
||||
def generic_visit(self, node: Iterable[Any], *args: Any, **kwargs: Any) -> Set[Any]:
|
||||
names: Set[str] = set()
|
||||
for value in node:
|
||||
if isinstance(value, list):
|
||||
for item in value:
|
||||
names |= self.visit(item, *args, **kwargs)
|
||||
else:
|
||||
names |= self.visit(value, *args, **kwargs)
|
||||
return names
|
||||
|
||||
def visit_Alt(self, alt: Alt) -> Set[Any]:
|
||||
names: Set[str] = set()
|
||||
for item in alt.items:
|
||||
names |= self.visit(item)
|
||||
if item not in self.nullables:
|
||||
break
|
||||
return names
|
||||
|
||||
def visit_Forced(self, force: Forced) -> Set[Any]:
|
||||
return set()
|
||||
|
||||
def visit_LookAhead(self, lookahead: Lookahead) -> Set[Any]:
|
||||
return set()
|
||||
|
||||
def visit_Cut(self, cut: Cut) -> Set[Any]:
|
||||
return set()
|
||||
|
||||
def visit_NameLeaf(self, node: NameLeaf) -> Set[Any]:
|
||||
return {node.value}
|
||||
|
||||
def visit_StringLeaf(self, node: StringLeaf) -> Set[Any]:
|
||||
return set()
|
||||
|
||||
|
||||
def compute_left_recursives(
|
||||
|
@ -207,10 +372,11 @@ def make_first_graph(rules: Dict[str, Rule]) -> Dict[str, AbstractSet[str]]:
|
|||
|
||||
Note that this requires the nullable flags to have been computed.
|
||||
"""
|
||||
initial_name_visitor = InitialNamesVisitor(rules)
|
||||
graph = {}
|
||||
vertices: Set[str] = set()
|
||||
for rulename, rhs in rules.items():
|
||||
graph[rulename] = names = rhs.initial_names()
|
||||
graph[rulename] = names = initial_name_visitor.visit(rhs)
|
||||
vertices |= names
|
||||
for vertex in vertices:
|
||||
graph.setdefault(vertex, set())
|
||||
|
|
|
@ -95,8 +95,6 @@ class PythonCallMakerVisitor(GrammarVisitor):
|
|||
def __init__(self, parser_generator: ParserGenerator):
|
||||
self.gen = parser_generator
|
||||
self.cache: Dict[Any, Any] = {}
|
||||
self.keywords: Set[str] = set()
|
||||
self.soft_keywords: Set[str] = set()
|
||||
|
||||
def visit_NameLeaf(self, node: NameLeaf) -> Tuple[Optional[str], str]:
|
||||
name = node.value
|
||||
|
@ -111,12 +109,6 @@ class PythonCallMakerVisitor(GrammarVisitor):
|
|||
return name, f"self.{name}()"
|
||||
|
||||
def visit_StringLeaf(self, node: StringLeaf) -> Tuple[str, str]:
|
||||
val = ast.literal_eval(node.value)
|
||||
if re.match(r"[a-zA-Z_]\w*\Z", val): # This is a keyword
|
||||
if node.value.endswith("'"):
|
||||
self.keywords.add(val)
|
||||
else:
|
||||
self.soft_keywords.add(val)
|
||||
return "literal", f"self.expect({node.value})"
|
||||
|
||||
def visit_Rhs(self, node: Rhs) -> Tuple[Optional[str], str]:
|
||||
|
@ -125,7 +117,7 @@ class PythonCallMakerVisitor(GrammarVisitor):
|
|||
if len(node.alts) == 1 and len(node.alts[0].items) == 1:
|
||||
self.cache[node] = self.visit(node.alts[0].items[0])
|
||||
else:
|
||||
name = self.gen.name_node(node)
|
||||
name = self.gen.artifical_rule_from_rhs(node)
|
||||
self.cache[node] = name, f"self.{name}()"
|
||||
return self.cache[node]
|
||||
|
||||
|
@ -163,21 +155,21 @@ class PythonCallMakerVisitor(GrammarVisitor):
|
|||
def visit_Repeat0(self, node: Repeat0) -> Tuple[str, str]:
|
||||
if node in self.cache:
|
||||
return self.cache[node]
|
||||
name = self.gen.name_loop(node.node, False)
|
||||
name = self.gen.artificial_rule_from_repeat(node.node, False)
|
||||
self.cache[node] = name, f"self.{name}()," # Also a trailing comma!
|
||||
return self.cache[node]
|
||||
|
||||
def visit_Repeat1(self, node: Repeat1) -> Tuple[str, str]:
|
||||
if node in self.cache:
|
||||
return self.cache[node]
|
||||
name = self.gen.name_loop(node.node, True)
|
||||
name = self.gen.artificial_rule_from_repeat(node.node, True)
|
||||
self.cache[node] = name, f"self.{name}()" # But no trailing comma here!
|
||||
return self.cache[node]
|
||||
|
||||
def visit_Gather(self, node: Gather) -> Tuple[str, str]:
|
||||
if node in self.cache:
|
||||
return self.cache[node]
|
||||
name = self.gen.name_gather(node)
|
||||
name = self.gen.artifical_rule_from_gather(node)
|
||||
self.cache[node] = name, f"self.{name}()" # No trailing comma here either!
|
||||
return self.cache[node]
|
||||
|
||||
|
@ -219,6 +211,7 @@ class PythonParserGenerator(ParserGenerator, GrammarVisitor):
|
|||
)
|
||||
|
||||
def generate(self, filename: str) -> None:
|
||||
self.collect_rules()
|
||||
header = self.grammar.metas.get("header", MODULE_PREFIX)
|
||||
if header is not None:
|
||||
self.print(header.rstrip("\n").format(filename=filename))
|
||||
|
@ -228,17 +221,15 @@ class PythonParserGenerator(ParserGenerator, GrammarVisitor):
|
|||
cls_name = self.grammar.metas.get("class", "GeneratedParser")
|
||||
self.print("# Keywords and soft keywords are listed at the end of the parser definition.")
|
||||
self.print(f"class {cls_name}(Parser):")
|
||||
while self.todo:
|
||||
for rulename, rule in list(self.todo.items()):
|
||||
del self.todo[rulename]
|
||||
self.print()
|
||||
with self.indent():
|
||||
self.visit(rule)
|
||||
for rule in self.all_rules.values():
|
||||
self.print()
|
||||
with self.indent():
|
||||
self.visit(rule)
|
||||
|
||||
self.print()
|
||||
with self.indent():
|
||||
self.print(f"KEYWORDS = {tuple(self.callmakervisitor.keywords)}")
|
||||
self.print(f"SOFT_KEYWORDS = {tuple(self.callmakervisitor.soft_keywords)}")
|
||||
self.print(f"KEYWORDS = {tuple(self.keywords)}")
|
||||
self.print(f"SOFT_KEYWORDS = {tuple(self.soft_keywords)}")
|
||||
|
||||
trailer = self.grammar.metas.get("trailer", MODULE_SUFFIX.format(class_name=cls_name))
|
||||
if trailer is not None:
|
||||
|
@ -270,8 +261,6 @@ class PythonParserGenerator(ParserGenerator, GrammarVisitor):
|
|||
self.print(f"def {node.name}(self) -> Optional[{node_type}]:")
|
||||
with self.indent():
|
||||
self.print(f"# {node.name}: {rhs}")
|
||||
if node.nullable:
|
||||
self.print(f"# nullable={node.nullable}")
|
||||
self.print("mark = self._mark()")
|
||||
if self.alts_uses_locations(node.rhs.alts):
|
||||
self.print("tok = self._tokenizer.peek()")
|
||||
|
|
|
@ -4,10 +4,9 @@ import os
|
|||
import pathlib
|
||||
import sys
|
||||
import textwrap
|
||||
import tokenize
|
||||
import token
|
||||
|
||||
from typing import Any, cast, Dict, IO, Type, Final
|
||||
import tokenize
|
||||
from typing import IO, Any, Dict, Final, Type, cast
|
||||
|
||||
from pegen.build import compile_c_extension
|
||||
from pegen.c_generator import CParserGenerator
|
||||
|
|
|
@ -1,12 +1,7 @@
|
|||
from typing import Optional
|
||||
|
||||
from pegen import grammar
|
||||
from pegen.grammar import (
|
||||
Alt,
|
||||
GrammarVisitor,
|
||||
Rule,
|
||||
Rhs,
|
||||
)
|
||||
from pegen.grammar import Alt, GrammarVisitor, Rhs, Rule
|
||||
|
||||
|
||||
class ValidationError(Exception):
|
||||
|
|
Loading…
Reference in New Issue