gh-124889: Rework Python generator cache (#125816)

This commit is contained in:
Mikhail Efimov 2024-10-22 11:42:56 +03:00 committed by GitHub
parent 4efe64aa56
commit c1bdbe84c8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 48 additions and 27 deletions

View File

@ -1,6 +1,6 @@
import os.path
import token
from typing import IO, Any, Dict, Optional, Sequence, Set, Text, Tuple
from typing import IO, Any, Callable, Dict, Optional, Sequence, Set, Text, Tuple
from pegen import grammar
from pegen.grammar import (
@ -93,7 +93,7 @@ class InvalidNodeVisitor(GrammarVisitor):
class PythonCallMakerVisitor(GrammarVisitor):
def __init__(self, parser_generator: ParserGenerator):
self.gen = parser_generator
self.cache: Dict[Any, Any] = {}
self.cache: Dict[str, Tuple[str, str]] = {}
def visit_NameLeaf(self, node: NameLeaf) -> Tuple[Optional[str], str]:
name = node.value
@ -110,16 +110,6 @@ class PythonCallMakerVisitor(GrammarVisitor):
def visit_StringLeaf(self, node: StringLeaf) -> Tuple[str, str]:
return "literal", f"self.expect({node.value})"
def visit_Rhs(self, node: Rhs) -> Tuple[Optional[str], str]:
if node in self.cache:
return self.cache[node]
if len(node.alts) == 1 and len(node.alts[0].items) == 1:
self.cache[node] = self.visit(node.alts[0].items[0])
else:
name = self.gen.artificial_rule_from_rhs(node)
self.cache[node] = name, f"self.{name}()"
return self.cache[node]
def visit_NamedItem(self, node: NamedItem) -> Tuple[Optional[str], str]:
name, call = self.visit(node.item)
if node.name:
@ -151,26 +141,57 @@ class PythonCallMakerVisitor(GrammarVisitor):
else:
return "opt", f"{call},"
def _generate_artificial_rule_call(
self,
node: Any,
prefix: str,
call_by_name_func: Callable[[str], str],
rule_generation_func: Callable[[], str],
) -> Tuple[str, str]:
node_str = f"{node}"
key = f"{prefix}_{node_str}"
if key in self.cache:
return self.cache[key]
name = rule_generation_func()
call = call_by_name_func(name)
self.cache[key] = name, call
return self.cache[key]
def visit_Rhs(self, node: Rhs) -> Tuple[str, str]:
if len(node.alts) == 1 and len(node.alts[0].items) == 1:
return self.visit(node.alts[0].items[0])
return self._generate_artificial_rule_call(
node,
"rhs",
lambda name: f"self.{name}()",
lambda: self.gen.artificial_rule_from_rhs(node),
)
def visit_Repeat0(self, node: Repeat0) -> Tuple[str, str]:
if node in self.cache:
return self.cache[node]
name = self.gen.artificial_rule_from_repeat(node.node, False)
self.cache[node] = name, f"self.{name}()," # Also a trailing comma!
return self.cache[node]
return self._generate_artificial_rule_call(
node,
"repeat0",
lambda name: f"self.{name}(),", # Also a trailing comma!
lambda: self.gen.artificial_rule_from_repeat(node.node, is_repeat1=False),
)
def visit_Repeat1(self, node: Repeat1) -> Tuple[str, str]:
if node in self.cache:
return self.cache[node]
name = self.gen.artificial_rule_from_repeat(node.node, True)
self.cache[node] = name, f"self.{name}()" # But no trailing comma here!
return self.cache[node]
return self._generate_artificial_rule_call(
node,
"repeat1",
lambda name: f"self.{name}()", # But no trailing comma here!
lambda: self.gen.artificial_rule_from_repeat(node.node, is_repeat1=True),
)
def visit_Gather(self, node: Gather) -> Tuple[str, str]:
if node in self.cache:
return self.cache[node]
name = self.gen.artificial_rule_from_gather(node)
self.cache[node] = name, f"self.{name}()" # No trailing comma here either!
return self.cache[node]
return self._generate_artificial_rule_call(
node,
"gather",
lambda name: f"self.{name}()", # No trailing comma here either!
lambda: self.gen.artificial_rule_from_gather(node),
)
def visit_Group(self, node: Group) -> Tuple[Optional[str], str]:
return self.visit(node.rhs)