bpo-40334: refactor and cleanup for the PEG generators (GH-19775)

This commit is contained in:
Pablo Galindo 2020-04-29 10:42:21 +01:00 committed by GitHub
parent 9b64ef3ac7
commit 4db245ee9d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 292 additions and 230 deletions

View File

@ -648,7 +648,7 @@ file_rule(Parser *p)
if (
(a = statements_rule(p), 1)
&&
(endmarker_var = _PyPegen_endmarker_token(p))
(endmarker_var = _PyPegen_expect_token(p, ENDMARKER))
)
{
res = Module ( a , NULL , p -> arena );
@ -712,7 +712,7 @@ eval_rule(Parser *p)
&&
(_loop0_1_var = _loop0_1_rule(p))
&&
(endmarker_var = _PyPegen_endmarker_token(p))
(endmarker_var = _PyPegen_expect_token(p, ENDMARKER))
)
{
res = Expression ( a , p -> arena );
@ -846,7 +846,7 @@ statement_newline_rule(Parser *p)
if (
(a = compound_stmt_rule(p))
&&
(newline_var = _PyPegen_newline_token(p))
(newline_var = _PyPegen_expect_token(p, NEWLINE))
)
{
res = _PyPegen_singleton_seq ( p , a );
@ -872,7 +872,7 @@ statement_newline_rule(Parser *p)
{ // NEWLINE
void *newline_var;
if (
(newline_var = _PyPegen_newline_token(p))
(newline_var = _PyPegen_expect_token(p, NEWLINE))
)
{
Token *token = _PyPegen_get_last_nonnwhitespace_token(p);
@ -895,7 +895,7 @@ statement_newline_rule(Parser *p)
{ // $
void *endmarker_var;
if (
(endmarker_var = _PyPegen_endmarker_token(p))
(endmarker_var = _PyPegen_expect_token(p, ENDMARKER))
)
{
res = _PyPegen_interactive_exit ( p );
@ -929,7 +929,7 @@ simple_stmt_rule(Parser *p)
&&
_PyPegen_lookahead_with_int(0, _PyPegen_expect_token, p, 13)
&&
(newline_var = _PyPegen_newline_token(p))
(newline_var = _PyPegen_expect_token(p, NEWLINE))
)
{
res = _PyPegen_singleton_seq ( p , a );
@ -951,7 +951,7 @@ simple_stmt_rule(Parser *p)
&&
(opt_var = _PyPegen_expect_token(p, 13), 1)
&&
(newline_var = _PyPegen_newline_token(p))
(newline_var = _PyPegen_expect_token(p, NEWLINE))
)
{
res = a;
@ -2684,7 +2684,7 @@ for_stmt_rule(Parser *p)
void *literal;
expr_ty t;
if (
(is_async = _PyPegen_async_token(p), 1)
(is_async = _PyPegen_expect_token(p, ASYNC), 1)
&&
(keyword = _PyPegen_expect_token(p, 517))
&&
@ -2751,7 +2751,7 @@ with_stmt_rule(Parser *p)
void *literal_1;
void *literal_2;
if (
(is_async = _PyPegen_async_token(p), 1)
(is_async = _PyPegen_expect_token(p, ASYNC), 1)
&&
(keyword = _PyPegen_expect_token(p, 519))
&&
@ -2790,7 +2790,7 @@ with_stmt_rule(Parser *p)
void *keyword;
void *literal;
if (
(is_async = _PyPegen_async_token(p), 1)
(is_async = _PyPegen_expect_token(p, ASYNC), 1)
&&
(keyword = _PyPegen_expect_token(p, 519))
&&
@ -3263,7 +3263,7 @@ function_def_raw_rule(Parser *p)
expr_ty n;
void *params;
if (
(is_async = _PyPegen_async_token(p), 1)
(is_async = _PyPegen_expect_token(p, ASYNC), 1)
&&
(keyword = _PyPegen_expect_token(p, 522))
&&
@ -4002,13 +4002,13 @@ block_rule(Parser *p)
void *indent_var;
void *newline_var;
if (
(newline_var = _PyPegen_newline_token(p))
(newline_var = _PyPegen_expect_token(p, NEWLINE))
&&
(indent_var = _PyPegen_indent_token(p))
(indent_var = _PyPegen_expect_token(p, INDENT))
&&
(a = statements_rule(p))
&&
(dedent_var = _PyPegen_dedent_token(p))
(dedent_var = _PyPegen_expect_token(p, DEDENT))
)
{
res = a;
@ -6754,7 +6754,7 @@ await_primary_rule(Parser *p)
expr_ty a;
void *await_var;
if (
(await_var = _PyPegen_await_token(p))
(await_var = _PyPegen_expect_token(p, AWAIT))
&&
(a = primary_rule(p))
)
@ -9919,9 +9919,9 @@ invalid_block_rule(Parser *p)
{ // NEWLINE !INDENT
void *newline_var;
if (
(newline_var = _PyPegen_newline_token(p))
(newline_var = _PyPegen_expect_token(p, NEWLINE))
&&
_PyPegen_lookahead(0, _PyPegen_indent_token, p)
_PyPegen_lookahead_with_int(0, _PyPegen_expect_token, p, INDENT)
)
{
res = RAISE_INDENTATION_ERROR ( "expected an indented block" );
@ -10036,7 +10036,7 @@ _loop0_1_rule(Parser *p)
{ // NEWLINE
void *newline_var;
while (
(newline_var = _PyPegen_newline_token(p))
(newline_var = _PyPegen_expect_token(p, NEWLINE))
)
{
res = newline_var;
@ -10273,7 +10273,7 @@ _tmp_6_rule(Parser *p)
{ // ASYNC
void *async_var;
if (
(async_var = _PyPegen_async_token(p))
(async_var = _PyPegen_expect_token(p, ASYNC))
)
{
res = async_var;
@ -10345,7 +10345,7 @@ _tmp_8_rule(Parser *p)
{ // ASYNC
void *async_var;
if (
(async_var = _PyPegen_async_token(p))
(async_var = _PyPegen_expect_token(p, ASYNC))
)
{
res = async_var;
@ -10381,7 +10381,7 @@ _tmp_9_rule(Parser *p)
{ // ASYNC
void *async_var;
if (
(async_var = _PyPegen_async_token(p))
(async_var = _PyPegen_expect_token(p, ASYNC))
)
{
res = async_var;
@ -15068,7 +15068,7 @@ _tmp_128_rule(Parser *p)
&&
(f = named_expression_rule(p))
&&
(newline_var = _PyPegen_newline_token(p))
(newline_var = _PyPegen_expect_token(p, NEWLINE))
)
{
res = f;
@ -15257,7 +15257,7 @@ _tmp_134_rule(Parser *p)
void *keyword_1;
void *y;
if (
(y = _PyPegen_async_token(p), 1)
(y = _PyPegen_expect_token(p, ASYNC), 1)
&&
(keyword = _PyPegen_expect_token(p, 517))
&&

View File

@ -692,16 +692,6 @@ _PyPegen_lookahead_with_name(int positive, expr_ty (func)(Parser *), Parser *p)
return (res != NULL) == positive;
}
int
_PyPegen_lookahead_with_string(int positive, void *(func)(Parser *, const char *), Parser *p,
const char *arg)
{
int mark = p->mark;
void *res = func(p, arg);
p->mark = mark;
return (res != NULL) == positive;
}
int
_PyPegen_lookahead_with_int(int positive, Token *(func)(Parser *, int), Parser *p, int arg)
{
@ -751,24 +741,6 @@ _PyPegen_get_last_nonnwhitespace_token(Parser *p)
return token;
}
void *
_PyPegen_async_token(Parser *p)
{
return _PyPegen_expect_token(p, ASYNC);
}
void *
_PyPegen_await_token(Parser *p)
{
return _PyPegen_expect_token(p, AWAIT);
}
void *
_PyPegen_endmarker_token(Parser *p)
{
return _PyPegen_expect_token(p, ENDMARKER);
}
expr_ty
_PyPegen_name_token(Parser *p)
{
@ -794,24 +766,6 @@ _PyPegen_string_token(Parser *p)
return _PyPegen_expect_token(p, STRING);
}
void *
_PyPegen_newline_token(Parser *p)
{
return _PyPegen_expect_token(p, NEWLINE);
}
void *
_PyPegen_indent_token(Parser *p)
{
return _PyPegen_expect_token(p, INDENT);
}
void *
_PyPegen_dedent_token(Parser *p)
{
return _PyPegen_expect_token(p, DEDENT);
}
static PyObject *
parsenumber_raw(const char *s)
{

View File

@ -104,7 +104,6 @@ int _PyPegen_update_memo(Parser *p, int mark, int type, void *node);
int _PyPegen_is_memoized(Parser *p, int type, void *pres);
int _PyPegen_lookahead_with_name(int, expr_ty (func)(Parser *), Parser *);
int _PyPegen_lookahead_with_string(int, void *(func)(Parser *, const char *), Parser *, const char *);
int _PyPegen_lookahead_with_int(int, Token *(func)(Parser *, int), Parser *, int);
int _PyPegen_lookahead(int, void *(func)(Parser *), Parser *);

View File

@ -33,7 +33,7 @@ dump: peg_extension/parse.c
$(PYTHON) -c "from peg_extension import parse; import ast; t = parse.parse_file('$(TESTFILE)', mode=1); print(ast.dump(t))"
regen-metaparser: pegen/metagrammar.gram pegen/*.py
$(PYTHON) -m pegen -q -c pegen/metagrammar.gram -o pegen/grammar_parser.py
$(PYTHON) -m pegen -q python pegen/metagrammar.gram -o pegen/grammar_parser.py
# Note: These targets really depend on the generated shared object in peg_extension/parse.*.so but
# this has different names in different systems so we are abusing the implicit dependency on

View File

@ -1,33 +1,36 @@
import ast
from dataclasses import dataclass, field
import re
from typing import Any, cast, Dict, IO, Optional, List, Text, Tuple, Set
from typing import IO, Any, Dict, List, Optional, Set, Text, Tuple
from enum import Enum
from pegen import grammar
from pegen.grammar import (
Cut,
GrammarVisitor,
Rhs,
Alt,
Cut,
Gather,
GrammarVisitor,
Group,
Lookahead,
NamedItem,
NameLeaf,
StringLeaf,
Lookahead,
PositiveLookahead,
NegativeLookahead,
Opt,
PositiveLookahead,
Repeat0,
Repeat1,
Gather,
Group,
Rhs,
Rule,
StringLeaf,
)
from pegen import grammar
from pegen.parser_generator import dedupe, ParserGenerator
from pegen.parser_generator import ParserGenerator
EXTENSION_PREFIX = """\
#include "pegen.h"
"""
EXTENSION_SUFFIX = """
void *
_PyPegen_parse(Parser *p)
@ -41,6 +44,43 @@ _PyPegen_parse(Parser *p)
"""
class NodeTypes(Enum):
NAME_TOKEN = 0
NUMBER_TOKEN = 1
STRING_TOKEN = 2
GENERIC_TOKEN = 3
KEYWORD = 4
CUT_OPERATOR = 5
BASE_NODETYPES = {
"NAME": NodeTypes.NAME_TOKEN,
"NUMBER": NodeTypes.NUMBER_TOKEN,
"STRING": NodeTypes.STRING_TOKEN,
}
@dataclass
class FunctionCall:
function: str
arguments: Optional[List[Any]] = None
assigned_variable: Optional[str] = None
nodetype: Optional[NodeTypes] = None
force_true: bool = False
metadata: Dict[str, Any] = field(default_factory=dict)
def __str__(self) -> str:
parts = []
parts.append(self.function)
if self.arguments:
parts.append(f"({', '.join(map(str, self.arguments))})")
if self.force_true:
parts.append(", 1")
if self.assigned_variable:
parts = ["(", self.assigned_variable, " = ", *parts, ")"]
return "".join(parts)
class CCallMakerVisitor(GrammarVisitor):
def __init__(
self,
@ -54,28 +94,57 @@ class CCallMakerVisitor(GrammarVisitor):
self.cache: Dict[Any, Any] = {}
self.keyword_cache: Dict[str, int] = {}
def keyword_helper(self, keyword: str) -> Tuple[str, str]:
def keyword_helper(self, keyword: str) -> FunctionCall:
if keyword not in self.keyword_cache:
self.keyword_cache[keyword] = self.gen.keyword_type()
return "keyword", f"_PyPegen_expect_token(p, {self.keyword_cache[keyword]})"
return FunctionCall(
assigned_variable="keyword",
function="_PyPegen_expect_token",
arguments=["p", self.keyword_cache[keyword]],
nodetype=NodeTypes.KEYWORD,
)
def visit_NameLeaf(self, node: NameLeaf) -> Tuple[str, str]:
def visit_NameLeaf(self, node: NameLeaf) -> FunctionCall:
name = node.value
if name in self.non_exact_tokens:
name = name.lower()
return f"{name}_var", f"_PyPegen_{name}_token(p)"
return f"{name}_var", f"{name}_rule(p)"
if name in BASE_NODETYPES:
return FunctionCall(
assigned_variable=f"{name.lower()}_var",
function=f"_PyPegen_{name.lower()}_token",
arguments=["p"],
nodetype=BASE_NODETYPES[name],
metadata={"rulename": name.lower()},
)
return FunctionCall(
assigned_variable=f"{name.lower()}_var",
function=f"_PyPegen_expect_token",
arguments=["p", name],
nodetype=NodeTypes.GENERIC_TOKEN,
metadata={"rulename": name.lower()},
)
def visit_StringLeaf(self, node: StringLeaf) -> Tuple[str, str]:
return FunctionCall(
assigned_variable=f"{name}_var",
function=f"{name}_rule",
arguments=["p"],
metadata={"rulename": name.lower()},
)
def visit_StringLeaf(self, node: StringLeaf) -> FunctionCall:
val = ast.literal_eval(node.value)
if re.match(r"[a-zA-Z_]\w*\Z", val): # This is a keyword
return self.keyword_helper(val)
else:
assert val in self.exact_tokens, f"{node.value} is not a known literal"
type = self.exact_tokens[val]
return "literal", f"_PyPegen_expect_token(p, {type})"
return FunctionCall(
assigned_variable="literal",
function=f"_PyPegen_expect_token",
arguments=["p", type],
nodetype=NodeTypes.GENERIC_TOKEN,
)
def visit_Rhs(self, node: Rhs) -> Tuple[Optional[str], str]:
def visit_Rhs(self, node: Rhs) -> FunctionCall:
def can_we_inline(node: Rhs) -> int:
if len(node.alts) != 1 or len(node.alts[0].items) != 1:
return False
@ -90,65 +159,96 @@ class CCallMakerVisitor(GrammarVisitor):
self.cache[node] = self.visit(node.alts[0].items[0])
else:
name = self.gen.name_node(node)
self.cache[node] = f"{name}_var", f"{name}_rule(p)"
self.cache[node] = FunctionCall(
assigned_variable=f"{name}_var",
function=f"{name}_rule",
arguments=["p"],
metadata={"rulename": name},
)
return self.cache[node]
def visit_NamedItem(self, node: NamedItem) -> Tuple[Optional[str], str]:
name, call = self.visit(node.item)
def visit_NamedItem(self, node: NamedItem) -> FunctionCall:
call = self.visit(node.item)
if node.name:
name = node.name
return name, call
call.assigned_variable = node.name
return call
def lookahead_call_helper(self, node: Lookahead, positive: int) -> Tuple[None, str]:
name, call = self.visit(node.node)
func, args = call.split("(", 1)
assert args[-1] == ")"
args = args[:-1]
if "name_token" in call:
return None, f"_PyPegen_lookahead_with_name({positive}, {func}, {args})"
elif not args.startswith("p,"):
return None, f"_PyPegen_lookahead({positive}, {func}, {args})"
elif args[2:].strip().isalnum():
return None, f"_PyPegen_lookahead_with_int({positive}, {func}, {args})"
def lookahead_call_helper(self, node: Lookahead, positive: int) -> FunctionCall:
call = self.visit(node.node)
if call.nodetype == NodeTypes.NAME_TOKEN:
return FunctionCall(
function=f"_PyPegen_lookahead_with_name",
arguments=[positive, call.function, *call.arguments],
)
elif call.nodetype in {NodeTypes.GENERIC_TOKEN, NodeTypes.KEYWORD}:
return FunctionCall(
function=f"_PyPegen_lookahead_with_int",
arguments=[positive, call.function, *call.arguments],
)
else:
return None, f"_PyPegen_lookahead_with_string({positive}, {func}, {args})"
return FunctionCall(
function=f"_PyPegen_lookahead",
arguments=[positive, call.function, *call.arguments],
)
def visit_PositiveLookahead(self, node: PositiveLookahead) -> Tuple[None, str]:
def visit_PositiveLookahead(self, node: PositiveLookahead) -> FunctionCall:
return self.lookahead_call_helper(node, 1)
def visit_NegativeLookahead(self, node: NegativeLookahead) -> Tuple[None, str]:
def visit_NegativeLookahead(self, node: NegativeLookahead) -> FunctionCall:
return self.lookahead_call_helper(node, 0)
def visit_Opt(self, node: Opt) -> Tuple[str, str]:
name, call = self.visit(node.node)
return "opt_var", f"{call}, 1" # Using comma operator!
def visit_Opt(self, node: Opt) -> FunctionCall:
call = self.visit(node.node)
return FunctionCall(
assigned_variable="opt_var",
function=call.function,
arguments=call.arguments,
force_true=True,
)
def visit_Repeat0(self, node: Repeat0) -> Tuple[str, str]:
def visit_Repeat0(self, node: Repeat0) -> FunctionCall:
if node in self.cache:
return self.cache[node]
name = self.gen.name_loop(node.node, False)
self.cache[node] = f"{name}_var", f"{name}_rule(p)"
self.cache[node] = FunctionCall(
assigned_variable=f"{name}_var",
function=f"{name}_rule",
arguments=["p"],
metadata={"rulename": name},
)
return self.cache[node]
def visit_Repeat1(self, node: Repeat1) -> Tuple[str, str]:
def visit_Repeat1(self, node: Repeat1) -> FunctionCall:
if node in self.cache:
return self.cache[node]
name = self.gen.name_loop(node.node, True)
self.cache[node] = f"{name}_var", f"{name}_rule(p)"
self.cache[node] = FunctionCall(
assigned_variable=f"{name}_var",
function=f"{name}_rule",
arguments=["p"],
metadata={"rulename": name},
)
return self.cache[node]
def visit_Gather(self, node: Gather) -> Tuple[str, str]:
def visit_Gather(self, node: Gather) -> FunctionCall:
if node in self.cache:
return self.cache[node]
name = self.gen.name_gather(node)
self.cache[node] = f"{name}_var", f"{name}_rule(p)"
self.cache[node] = FunctionCall(
assigned_variable=f"{name}_var",
function=f"{name}_rule",
arguments=["p"],
metadata={"rulename": name},
)
return self.cache[node]
def visit_Group(self, node: Group) -> Tuple[Optional[str], str]:
def visit_Group(self, node: Group) -> FunctionCall:
return self.visit(node.rhs)
def visit_Cut(self, node: Cut) -> Tuple[str, str]:
return "cut_var", "1"
def visit_Cut(self, node: Cut) -> FunctionCall:
return FunctionCall(
assigned_variable="cut_var", function="1", nodetype=NodeTypes.CUT_OPERATOR
)
class CParserGenerator(ParserGenerator, GrammarVisitor):
@ -252,7 +352,6 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
mode += 1
modulename = self.grammar.metas.get("modulename", "parse")
trailer = self.grammar.metas.get("trailer", EXTENSION_SUFFIX)
keyword_cache = self.callmakervisitor.keyword_cache
if trailer:
self.print(trailer.rstrip("\n") % dict(mode=mode, modulename=modulename))
@ -448,13 +547,11 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
self._handle_default_rule_body(node, rhs, result_type)
self.print("}")
def visit_NamedItem(self, node: NamedItem, names: List[str]) -> None:
name, call = self.callmakervisitor.visit(node)
if not name:
def visit_NamedItem(self, node: NamedItem) -> None:
call = self.callmakervisitor.visit(node)
if call.assigned_variable:
call.assigned_variable = self.dedupe(call.assigned_variable)
self.print(call)
else:
name = dedupe(name, names)
self.print(f"({name} = {call})")
def visit_Rhs(
self, node: Rhs, is_loop: bool, is_gather: bool, rulename: Optional[str]
@ -464,7 +561,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
for alt in node.alts:
self.visit(alt, is_loop=is_loop, is_gather=is_gather, rulename=rulename)
def join_conditions(self, keyword: str, node: Any, names: List[str]) -> None:
def join_conditions(self, keyword: str, node: Any) -> None:
self.print(f"{keyword} (")
with self.indent():
first = True
@ -473,7 +570,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
first = False
else:
self.print("&&")
self.visit(item, names=names)
self.visit(item)
self.print(")")
def emit_action(self, node: Alt, cleanup_code: Optional[str] = None) -> None:
@ -492,29 +589,34 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
f'fprintf(stderr, "Hit with action [%d-%d]: %s\\n", mark, p->mark, "{node}");'
)
def emit_default_action(self, is_gather: bool, names: List[str], node: Alt) -> None:
if len(names) > 1:
def emit_default_action(self, is_gather: bool, node: Alt) -> None:
if len(self.local_variable_names) > 1:
if is_gather:
assert len(names) == 2
self.print(f"res = _PyPegen_seq_insert_in_front(p, {names[0]}, {names[1]});")
assert len(self.local_variable_names) == 2
self.print(
f"res = _PyPegen_seq_insert_in_front(p, "
f"{self.local_variable_names[0]}, {self.local_variable_names[1]});"
)
else:
if self.debug:
self.print(
f'fprintf(stderr, "Hit without action [%d:%d]: %s\\n", mark, p->mark, "{node}");'
)
self.print(f"res = _PyPegen_dummy_name(p, {', '.join(names)});")
self.print(
f"res = _PyPegen_dummy_name(p, {', '.join(self.local_variable_names)});"
)
else:
if self.debug:
self.print(
f'fprintf(stderr, "Hit with default action [%d:%d]: %s\\n", mark, p->mark, "{node}");'
)
self.print(f"res = {names[0]};")
self.print(f"res = {self.local_variable_names[0]};")
def emit_dummy_action(self) -> None:
self.print(f"res = _PyPegen_dummy_name(p);")
def handle_alt_normal(self, node: Alt, is_gather: bool, names: List[str]) -> None:
self.join_conditions(keyword="if", node=node, names=names)
def handle_alt_normal(self, node: Alt, is_gather: bool) -> None:
self.join_conditions(keyword="if", node=node)
self.print("{")
# We have parsed successfully all the conditions for the option.
with self.indent():
@ -526,17 +628,15 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
elif node.action:
self.emit_action(node)
else:
self.emit_default_action(is_gather, names, node)
self.emit_default_action(is_gather, node)
# As the current option has parsed correctly, do not continue with the rest.
self.print(f"goto done;")
self.print("}")
def handle_alt_loop(
self, node: Alt, is_gather: bool, rulename: Optional[str], names: List[str]
) -> None:
def handle_alt_loop(self, node: Alt, is_gather: bool, rulename: Optional[str]) -> None:
# Condition of the main body of the alternative
self.join_conditions(keyword="while", node=node, names=names)
self.join_conditions(keyword="while", node=node)
self.print("{")
# We have parsed successfully one item!
with self.indent():
@ -548,7 +648,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
elif node.action:
self.emit_action(node, cleanup_code="PyMem_Free(children);")
else:
self.emit_default_action(is_gather, names, node)
self.emit_default_action(is_gather, node)
# Add the result of rule to the temporary buffer of children. This buffer
# will populate later an asdl_seq with all elements to return.
@ -580,47 +680,45 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
if v == "opt_var":
self.print("UNUSED(opt_var); // Silence compiler warnings")
names: List[str] = []
with self.local_variable_context():
if is_loop:
self.handle_alt_loop(node, is_gather, rulename, names)
self.handle_alt_loop(node, is_gather, rulename)
else:
self.handle_alt_normal(node, is_gather, names)
self.handle_alt_normal(node, is_gather)
self.print("p->mark = mark;")
if "cut_var" in names:
if "cut_var" in vars:
self.print("if (cut_var) return NULL;")
self.print("}")
def collect_vars(self, node: Alt) -> Dict[str, Optional[str]]:
names: List[str] = []
def collect_vars(self, node: Alt) -> Dict[Optional[str], Optional[str]]:
types = {}
with self.local_variable_context():
for item in node.items:
name, type = self.add_var(item, names)
name, type = self.add_var(item)
types[name] = type
return types
def add_var(self, node: NamedItem, names: List[str]) -> Tuple[str, Optional[str]]:
name: str
call: str
name, call = self.callmakervisitor.visit(node.item)
type = None
if not name:
return name, type
if name.startswith("cut"):
return name, "int"
if name.endswith("_var"):
rulename = name[:-4]
rule = self.rules.get(rulename)
if rule is not None:
def add_var(self, node: NamedItem) -> Tuple[Optional[str], Optional[str]]:
call = self.callmakervisitor.visit(node.item)
if not call.assigned_variable:
return None, None
if call.nodetype == NodeTypes.CUT_OPERATOR:
return call.assigned_variable, "int"
name = call.assigned_variable
rulename = call.metadata.get("rulename")
type: Optional[str] = None
assert self.all_rules is not None
if rulename and rulename in self.all_rules:
rule = self.all_rules.get(rulename)
if rule.is_loop() or rule.is_gather():
type = "asdl_seq *"
else:
type = rule.type
elif name.startswith("_loop") or name.startswith("_gather"):
type = "asdl_seq *"
elif name in ("name_var", "string_var", "number_var"):
elif call.nodetype in BASE_NODETYPES.values():
type = "expr_ty"
if node.name:
name = node.name
name = dedupe(name, names)
return name, type
return self.dedupe(node.name if node.name else call.assigned_variable), type

View File

@ -13,7 +13,6 @@ from pegen.grammar import (
NamedItem,
Plain,
NameLeaf,
StringLeaf,
Gather,
)
from pegen.grammar import GrammarError, GrammarVisitor
@ -48,6 +47,18 @@ class ParserGenerator:
self.todo = self.rules.copy() # Rules to generate
self.counter = 0 # For name_rule()/name_loop()
self.keyword_counter = 499 # For keyword_type()
self.all_rules: Optional[Dict[str, Rule]] = None # Rules + temporal rules
self._local_variable_stack: List[List[str]] = []
@contextlib.contextmanager
def local_variable_context(self) -> Iterator[None]:
self._local_variable_stack.append([])
yield
self._local_variable_stack.pop()
@property
def local_variable_names(self) -> List[str]:
return self._local_variable_stack[-1]
@abstractmethod
def generate(self, filename: str) -> None:
@ -82,6 +93,7 @@ class ParserGenerator:
for rulename in todo:
self.todo[rulename].collect_todo(self)
done = set(alltodo)
self.all_rules = self.todo.copy()
def keyword_type(self) -> int:
self.keyword_counter += 1
@ -109,25 +121,22 @@ class ParserGenerator:
self.counter += 1
extra_function_name = f"_loop0_{self.counter}"
extra_function_alt = Alt(
[NamedItem(None, node.separator), NamedItem("elem", node.node),], action="elem",
[NamedItem(None, node.separator), NamedItem("elem", node.node)], action="elem",
)
self.todo[extra_function_name] = Rule(
extra_function_name, None, Rhs([extra_function_alt]),
)
alt = Alt(
[NamedItem("elem", node.node), NamedItem("seq", NameLeaf(extra_function_name)),],
)
alt = Alt([NamedItem("elem", node.node), NamedItem("seq", NameLeaf(extra_function_name))],)
self.todo[name] = Rule(name, None, Rhs([alt]),)
return name
def dedupe(name: str, names: List[str]) -> str:
def dedupe(self, name: str) -> str:
origname = name
counter = 0
while name in names:
while name in self.local_variable_names:
counter += 1
name = f"{origname}_{counter}"
names.append(name)
self.local_variable_names.append(name)
return name
@ -153,13 +162,13 @@ def compute_left_recursives(
leaders = set(scc)
for start in scc:
for cycle in sccutils.find_cycles_in_scc(graph, scc, start):
## print("Cycle:", " -> ".join(cycle))
# print("Cycle:", " -> ".join(cycle))
leaders -= scc - set(cycle)
if not leaders:
raise ValueError(
f"SCC {scc} has no leadership candidate (no element is included in all cycles)"
)
## print("Leaders:", leaders)
# print("Leaders:", leaders)
leader = min(leaders) # Pick an arbitrary leader from the candidates.
rules[leader].leader = True
else:

View File

@ -1,4 +1,4 @@
from typing import Any, Dict, List, Optional, IO, Text, Tuple
from typing import Any, Dict, Optional, IO, Text, Tuple
from pegen.grammar import (
Cut,
@ -19,7 +19,7 @@ from pegen.grammar import (
Alt,
)
from pegen import grammar
from pegen.parser_generator import dedupe, ParserGenerator
from pegen.parser_generator import ParserGenerator
MODULE_PREFIX = """\
#!/usr/bin/env python3.8
@ -173,7 +173,7 @@ class PythonParserGenerator(ParserGenerator, GrammarVisitor):
else:
self.print("return None")
def visit_NamedItem(self, node: NamedItem, names: List[str]) -> None:
def visit_NamedItem(self, node: NamedItem) -> None:
name, call = self.callmakervisitor.visit(node.item)
if node.name:
name = node.name
@ -181,7 +181,7 @@ class PythonParserGenerator(ParserGenerator, GrammarVisitor):
self.print(call)
else:
if name != "cut":
name = dedupe(name, names)
name = self.dedupe(name)
self.print(f"({name} := {call})")
def visit_Rhs(self, node: Rhs, is_loop: bool = False, is_gather: bool = False) -> None:
@ -191,7 +191,7 @@ class PythonParserGenerator(ParserGenerator, GrammarVisitor):
self.visit(alt, is_loop=is_loop, is_gather=is_gather)
def visit_Alt(self, node: Alt, is_loop: bool, is_gather: bool) -> None:
names: List[str] = []
with self.local_variable_context():
self.print("cut = False") # TODO: Only if needed.
if is_loop:
self.print("while (")
@ -204,16 +204,18 @@ class PythonParserGenerator(ParserGenerator, GrammarVisitor):
first = False
else:
self.print("and")
self.visit(item, names=names)
self.visit(item)
self.print("):")
with self.indent():
action = node.action
if not action:
if is_gather:
assert len(names) == 2
action = f"[{names[0]}] + {names[1]}"
assert len(self.local_variable_names) == 2
action = (
f"[{self.local_variable_names[0]}] + {self.local_variable_names[1]}"
)
else:
action = f"[{', '.join(names)}]"
action = f"[{', '.join(self.local_variable_names)}]"
if is_loop:
self.print(f"children.append({action})")
self.print(f"mark = self.mark()")