gh-124889: Remove redundant artificial rules in PEG parser (#124893)

Cache in C PEG-generator reworked:
we save artificial rules in cache by Node string representation as a key instead of Node object itself.
As a result total count of artificial rules in parsers.c is lowered from 283 to 170.
More natural number ordering is used for the names of artificial rules.

Auxiliary method CCallMakerVisitor._generate_artificial_rule_call is added.
Its purpose is abstracting work with artificial rules cache.

Explicit using of "is_repeat1" kwarg is added to visit_Repeat0 and visit_Repeat1 methods.
Its slightly improve code readabitily.
This commit is contained in:
efimov-mikhail 2024-10-03 15:58:56 +03:00 committed by GitHub
parent e6dd71da3a
commit 1f9025a4e7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 4635 additions and 11473 deletions

16011
Parser/parser.c generated

File diff suppressed because it is too large Load Diff

View File

@ -3,7 +3,7 @@ import os.path
import re import re
from dataclasses import dataclass, field from dataclasses import dataclass, field
from enum import Enum from enum import Enum
from typing import IO, Any, Dict, List, Optional, Set, Text, Tuple from typing import IO, Any, Callable, Dict, List, Optional, Set, Text, Tuple
from pegen import grammar from pegen import grammar
from pegen.grammar import ( from pegen.grammar import (
@ -130,7 +130,7 @@ class CCallMakerVisitor(GrammarVisitor):
self.gen = parser_generator self.gen = parser_generator
self.exact_tokens = exact_tokens self.exact_tokens = exact_tokens
self.non_exact_tokens = non_exact_tokens self.non_exact_tokens = non_exact_tokens
self.cache: Dict[Any, FunctionCall] = {} self.cache: Dict[str, str] = {}
self.cleanup_statements: List[str] = [] self.cleanup_statements: List[str] = []
def keyword_helper(self, keyword: str) -> FunctionCall: def keyword_helper(self, keyword: str) -> FunctionCall:
@ -206,21 +206,6 @@ class CCallMakerVisitor(GrammarVisitor):
comment=f"token='{val}'", comment=f"token='{val}'",
) )
def visit_Rhs(self, node: Rhs) -> FunctionCall:
if node in self.cache:
return self.cache[node]
if node.can_be_inlined:
self.cache[node] = self.generate_call(node.alts[0].items[0])
else:
name = self.gen.artificial_rule_from_rhs(node)
self.cache[node] = FunctionCall(
assigned_variable=f"{name}_var",
function=f"{name}_rule",
arguments=["p"],
comment=f"{node}",
)
return self.cache[node]
def visit_NamedItem(self, node: NamedItem) -> FunctionCall: def visit_NamedItem(self, node: NamedItem) -> FunctionCall:
call = self.generate_call(node.item) call = self.generate_call(node.item)
if node.name: if node.name:
@ -302,44 +287,62 @@ class CCallMakerVisitor(GrammarVisitor):
comment=f"{node}", comment=f"{node}",
) )
def visit_Repeat0(self, node: Repeat0) -> FunctionCall: def _generate_artificial_rule_call(
if node in self.cache: self,
return self.cache[node] node: Any,
name = self.gen.artificial_rule_from_repeat(node.node, False) prefix: str,
self.cache[node] = FunctionCall( rule_generation_func: Callable[[], str],
return_type: Optional[str] = None,
) -> FunctionCall:
node_str = f"{node}"
key = f"{prefix}_{node_str}"
if key in self.cache:
name = self.cache[key]
else:
name = rule_generation_func()
self.cache[key] = name
return FunctionCall(
assigned_variable=f"{name}_var", assigned_variable=f"{name}_var",
function=f"{name}_rule", function=f"{name}_rule",
arguments=["p"], arguments=["p"],
return_type="asdl_seq *", return_type=return_type,
comment=f"{node}", comment=node_str,
)
def visit_Rhs(self, node: Rhs) -> FunctionCall:
if node.can_be_inlined:
return self.generate_call(node.alts[0].items[0])
return self._generate_artificial_rule_call(
node,
"rhs",
lambda: self.gen.artificial_rule_from_rhs(node),
)
def visit_Repeat0(self, node: Repeat0) -> FunctionCall:
return self._generate_artificial_rule_call(
node,
"repeat0",
lambda: self.gen.artificial_rule_from_repeat(node.node, is_repeat1=False),
"asdl_seq *",
) )
return self.cache[node]
def visit_Repeat1(self, node: Repeat1) -> FunctionCall: def visit_Repeat1(self, node: Repeat1) -> FunctionCall:
if node in self.cache: return self._generate_artificial_rule_call(
return self.cache[node] node,
name = self.gen.artificial_rule_from_repeat(node.node, True) "repeat1",
self.cache[node] = FunctionCall( lambda: self.gen.artificial_rule_from_repeat(node.node, is_repeat1=True),
assigned_variable=f"{name}_var", "asdl_seq *",
function=f"{name}_rule",
arguments=["p"],
return_type="asdl_seq *",
comment=f"{node}",
) )
return self.cache[node]
def visit_Gather(self, node: Gather) -> FunctionCall: def visit_Gather(self, node: Gather) -> FunctionCall:
if node in self.cache: return self._generate_artificial_rule_call(
return self.cache[node] node,
name = self.gen.artificial_rule_from_gather(node) "gather",
self.cache[node] = FunctionCall( lambda: self.gen.artificial_rule_from_gather(node),
assigned_variable=f"{name}_var", "asdl_seq *",
function=f"{name}_rule",
arguments=["p"],
return_type="asdl_seq *",
comment=f"{node}",
) )
return self.cache[node]
def visit_Group(self, node: Group) -> FunctionCall: def visit_Group(self, node: Group) -> FunctionCall:
return self.generate_call(node.rhs) return self.generate_call(node.rhs)

View File

@ -184,8 +184,6 @@ class ParserGenerator:
return name return name
def artificial_rule_from_gather(self, node: Gather) -> str: def artificial_rule_from_gather(self, node: Gather) -> str:
self.counter += 1
name = f"_gather_{self.counter}"
self.counter += 1 self.counter += 1
extra_function_name = f"_loop0_{self.counter}" extra_function_name = f"_loop0_{self.counter}"
extra_function_alt = Alt( extra_function_alt = Alt(
@ -197,6 +195,8 @@ class ParserGenerator:
None, None,
Rhs([extra_function_alt]), Rhs([extra_function_alt]),
) )
self.counter += 1
name = f"_gather_{self.counter}"
alt = Alt( alt = Alt(
[NamedItem("elem", node.node), NamedItem("seq", NameLeaf(extra_function_name))], [NamedItem("elem", node.node), NamedItem("seq", NameLeaf(extra_function_name))],
) )