bpo-40334: refactor and cleanup for the PEG generators (GH-19775)
This commit is contained in:
parent
9b64ef3ac7
commit
4db245ee9d
|
@ -648,7 +648,7 @@ file_rule(Parser *p)
|
|||
if (
|
||||
(a = statements_rule(p), 1)
|
||||
&&
|
||||
(endmarker_var = _PyPegen_endmarker_token(p))
|
||||
(endmarker_var = _PyPegen_expect_token(p, ENDMARKER))
|
||||
)
|
||||
{
|
||||
res = Module ( a , NULL , p -> arena );
|
||||
|
@ -712,7 +712,7 @@ eval_rule(Parser *p)
|
|||
&&
|
||||
(_loop0_1_var = _loop0_1_rule(p))
|
||||
&&
|
||||
(endmarker_var = _PyPegen_endmarker_token(p))
|
||||
(endmarker_var = _PyPegen_expect_token(p, ENDMARKER))
|
||||
)
|
||||
{
|
||||
res = Expression ( a , p -> arena );
|
||||
|
@ -846,7 +846,7 @@ statement_newline_rule(Parser *p)
|
|||
if (
|
||||
(a = compound_stmt_rule(p))
|
||||
&&
|
||||
(newline_var = _PyPegen_newline_token(p))
|
||||
(newline_var = _PyPegen_expect_token(p, NEWLINE))
|
||||
)
|
||||
{
|
||||
res = _PyPegen_singleton_seq ( p , a );
|
||||
|
@ -872,7 +872,7 @@ statement_newline_rule(Parser *p)
|
|||
{ // NEWLINE
|
||||
void *newline_var;
|
||||
if (
|
||||
(newline_var = _PyPegen_newline_token(p))
|
||||
(newline_var = _PyPegen_expect_token(p, NEWLINE))
|
||||
)
|
||||
{
|
||||
Token *token = _PyPegen_get_last_nonnwhitespace_token(p);
|
||||
|
@ -895,7 +895,7 @@ statement_newline_rule(Parser *p)
|
|||
{ // $
|
||||
void *endmarker_var;
|
||||
if (
|
||||
(endmarker_var = _PyPegen_endmarker_token(p))
|
||||
(endmarker_var = _PyPegen_expect_token(p, ENDMARKER))
|
||||
)
|
||||
{
|
||||
res = _PyPegen_interactive_exit ( p );
|
||||
|
@ -929,7 +929,7 @@ simple_stmt_rule(Parser *p)
|
|||
&&
|
||||
_PyPegen_lookahead_with_int(0, _PyPegen_expect_token, p, 13)
|
||||
&&
|
||||
(newline_var = _PyPegen_newline_token(p))
|
||||
(newline_var = _PyPegen_expect_token(p, NEWLINE))
|
||||
)
|
||||
{
|
||||
res = _PyPegen_singleton_seq ( p , a );
|
||||
|
@ -951,7 +951,7 @@ simple_stmt_rule(Parser *p)
|
|||
&&
|
||||
(opt_var = _PyPegen_expect_token(p, 13), 1)
|
||||
&&
|
||||
(newline_var = _PyPegen_newline_token(p))
|
||||
(newline_var = _PyPegen_expect_token(p, NEWLINE))
|
||||
)
|
||||
{
|
||||
res = a;
|
||||
|
@ -2684,7 +2684,7 @@ for_stmt_rule(Parser *p)
|
|||
void *literal;
|
||||
expr_ty t;
|
||||
if (
|
||||
(is_async = _PyPegen_async_token(p), 1)
|
||||
(is_async = _PyPegen_expect_token(p, ASYNC), 1)
|
||||
&&
|
||||
(keyword = _PyPegen_expect_token(p, 517))
|
||||
&&
|
||||
|
@ -2751,7 +2751,7 @@ with_stmt_rule(Parser *p)
|
|||
void *literal_1;
|
||||
void *literal_2;
|
||||
if (
|
||||
(is_async = _PyPegen_async_token(p), 1)
|
||||
(is_async = _PyPegen_expect_token(p, ASYNC), 1)
|
||||
&&
|
||||
(keyword = _PyPegen_expect_token(p, 519))
|
||||
&&
|
||||
|
@ -2790,7 +2790,7 @@ with_stmt_rule(Parser *p)
|
|||
void *keyword;
|
||||
void *literal;
|
||||
if (
|
||||
(is_async = _PyPegen_async_token(p), 1)
|
||||
(is_async = _PyPegen_expect_token(p, ASYNC), 1)
|
||||
&&
|
||||
(keyword = _PyPegen_expect_token(p, 519))
|
||||
&&
|
||||
|
@ -3263,7 +3263,7 @@ function_def_raw_rule(Parser *p)
|
|||
expr_ty n;
|
||||
void *params;
|
||||
if (
|
||||
(is_async = _PyPegen_async_token(p), 1)
|
||||
(is_async = _PyPegen_expect_token(p, ASYNC), 1)
|
||||
&&
|
||||
(keyword = _PyPegen_expect_token(p, 522))
|
||||
&&
|
||||
|
@ -4002,13 +4002,13 @@ block_rule(Parser *p)
|
|||
void *indent_var;
|
||||
void *newline_var;
|
||||
if (
|
||||
(newline_var = _PyPegen_newline_token(p))
|
||||
(newline_var = _PyPegen_expect_token(p, NEWLINE))
|
||||
&&
|
||||
(indent_var = _PyPegen_indent_token(p))
|
||||
(indent_var = _PyPegen_expect_token(p, INDENT))
|
||||
&&
|
||||
(a = statements_rule(p))
|
||||
&&
|
||||
(dedent_var = _PyPegen_dedent_token(p))
|
||||
(dedent_var = _PyPegen_expect_token(p, DEDENT))
|
||||
)
|
||||
{
|
||||
res = a;
|
||||
|
@ -6754,7 +6754,7 @@ await_primary_rule(Parser *p)
|
|||
expr_ty a;
|
||||
void *await_var;
|
||||
if (
|
||||
(await_var = _PyPegen_await_token(p))
|
||||
(await_var = _PyPegen_expect_token(p, AWAIT))
|
||||
&&
|
||||
(a = primary_rule(p))
|
||||
)
|
||||
|
@ -9919,9 +9919,9 @@ invalid_block_rule(Parser *p)
|
|||
{ // NEWLINE !INDENT
|
||||
void *newline_var;
|
||||
if (
|
||||
(newline_var = _PyPegen_newline_token(p))
|
||||
(newline_var = _PyPegen_expect_token(p, NEWLINE))
|
||||
&&
|
||||
_PyPegen_lookahead(0, _PyPegen_indent_token, p)
|
||||
_PyPegen_lookahead_with_int(0, _PyPegen_expect_token, p, INDENT)
|
||||
)
|
||||
{
|
||||
res = RAISE_INDENTATION_ERROR ( "expected an indented block" );
|
||||
|
@ -10036,7 +10036,7 @@ _loop0_1_rule(Parser *p)
|
|||
{ // NEWLINE
|
||||
void *newline_var;
|
||||
while (
|
||||
(newline_var = _PyPegen_newline_token(p))
|
||||
(newline_var = _PyPegen_expect_token(p, NEWLINE))
|
||||
)
|
||||
{
|
||||
res = newline_var;
|
||||
|
@ -10273,7 +10273,7 @@ _tmp_6_rule(Parser *p)
|
|||
{ // ASYNC
|
||||
void *async_var;
|
||||
if (
|
||||
(async_var = _PyPegen_async_token(p))
|
||||
(async_var = _PyPegen_expect_token(p, ASYNC))
|
||||
)
|
||||
{
|
||||
res = async_var;
|
||||
|
@ -10345,7 +10345,7 @@ _tmp_8_rule(Parser *p)
|
|||
{ // ASYNC
|
||||
void *async_var;
|
||||
if (
|
||||
(async_var = _PyPegen_async_token(p))
|
||||
(async_var = _PyPegen_expect_token(p, ASYNC))
|
||||
)
|
||||
{
|
||||
res = async_var;
|
||||
|
@ -10381,7 +10381,7 @@ _tmp_9_rule(Parser *p)
|
|||
{ // ASYNC
|
||||
void *async_var;
|
||||
if (
|
||||
(async_var = _PyPegen_async_token(p))
|
||||
(async_var = _PyPegen_expect_token(p, ASYNC))
|
||||
)
|
||||
{
|
||||
res = async_var;
|
||||
|
@ -15068,7 +15068,7 @@ _tmp_128_rule(Parser *p)
|
|||
&&
|
||||
(f = named_expression_rule(p))
|
||||
&&
|
||||
(newline_var = _PyPegen_newline_token(p))
|
||||
(newline_var = _PyPegen_expect_token(p, NEWLINE))
|
||||
)
|
||||
{
|
||||
res = f;
|
||||
|
@ -15257,7 +15257,7 @@ _tmp_134_rule(Parser *p)
|
|||
void *keyword_1;
|
||||
void *y;
|
||||
if (
|
||||
(y = _PyPegen_async_token(p), 1)
|
||||
(y = _PyPegen_expect_token(p, ASYNC), 1)
|
||||
&&
|
||||
(keyword = _PyPegen_expect_token(p, 517))
|
||||
&&
|
||||
|
|
|
@ -692,16 +692,6 @@ _PyPegen_lookahead_with_name(int positive, expr_ty (func)(Parser *), Parser *p)
|
|||
return (res != NULL) == positive;
|
||||
}
|
||||
|
||||
int
|
||||
_PyPegen_lookahead_with_string(int positive, void *(func)(Parser *, const char *), Parser *p,
|
||||
const char *arg)
|
||||
{
|
||||
int mark = p->mark;
|
||||
void *res = func(p, arg);
|
||||
p->mark = mark;
|
||||
return (res != NULL) == positive;
|
||||
}
|
||||
|
||||
int
|
||||
_PyPegen_lookahead_with_int(int positive, Token *(func)(Parser *, int), Parser *p, int arg)
|
||||
{
|
||||
|
@ -751,24 +741,6 @@ _PyPegen_get_last_nonnwhitespace_token(Parser *p)
|
|||
return token;
|
||||
}
|
||||
|
||||
void *
|
||||
_PyPegen_async_token(Parser *p)
|
||||
{
|
||||
return _PyPegen_expect_token(p, ASYNC);
|
||||
}
|
||||
|
||||
void *
|
||||
_PyPegen_await_token(Parser *p)
|
||||
{
|
||||
return _PyPegen_expect_token(p, AWAIT);
|
||||
}
|
||||
|
||||
void *
|
||||
_PyPegen_endmarker_token(Parser *p)
|
||||
{
|
||||
return _PyPegen_expect_token(p, ENDMARKER);
|
||||
}
|
||||
|
||||
expr_ty
|
||||
_PyPegen_name_token(Parser *p)
|
||||
{
|
||||
|
@ -794,24 +766,6 @@ _PyPegen_string_token(Parser *p)
|
|||
return _PyPegen_expect_token(p, STRING);
|
||||
}
|
||||
|
||||
void *
|
||||
_PyPegen_newline_token(Parser *p)
|
||||
{
|
||||
return _PyPegen_expect_token(p, NEWLINE);
|
||||
}
|
||||
|
||||
void *
|
||||
_PyPegen_indent_token(Parser *p)
|
||||
{
|
||||
return _PyPegen_expect_token(p, INDENT);
|
||||
}
|
||||
|
||||
void *
|
||||
_PyPegen_dedent_token(Parser *p)
|
||||
{
|
||||
return _PyPegen_expect_token(p, DEDENT);
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
parsenumber_raw(const char *s)
|
||||
{
|
||||
|
|
|
@ -104,7 +104,6 @@ int _PyPegen_update_memo(Parser *p, int mark, int type, void *node);
|
|||
int _PyPegen_is_memoized(Parser *p, int type, void *pres);
|
||||
|
||||
int _PyPegen_lookahead_with_name(int, expr_ty (func)(Parser *), Parser *);
|
||||
int _PyPegen_lookahead_with_string(int, void *(func)(Parser *, const char *), Parser *, const char *);
|
||||
int _PyPegen_lookahead_with_int(int, Token *(func)(Parser *, int), Parser *, int);
|
||||
int _PyPegen_lookahead(int, void *(func)(Parser *), Parser *);
|
||||
|
||||
|
|
|
@ -33,7 +33,7 @@ dump: peg_extension/parse.c
|
|||
$(PYTHON) -c "from peg_extension import parse; import ast; t = parse.parse_file('$(TESTFILE)', mode=1); print(ast.dump(t))"
|
||||
|
||||
regen-metaparser: pegen/metagrammar.gram pegen/*.py
|
||||
$(PYTHON) -m pegen -q -c pegen/metagrammar.gram -o pegen/grammar_parser.py
|
||||
$(PYTHON) -m pegen -q python pegen/metagrammar.gram -o pegen/grammar_parser.py
|
||||
|
||||
# Note: These targets really depend on the generated shared object in peg_extension/parse.*.so but
|
||||
# this has different names in different systems so we are abusing the implicit dependency on
|
||||
|
|
|
@ -1,33 +1,36 @@
|
|||
import ast
|
||||
from dataclasses import dataclass, field
|
||||
import re
|
||||
from typing import Any, cast, Dict, IO, Optional, List, Text, Tuple, Set
|
||||
from typing import IO, Any, Dict, List, Optional, Set, Text, Tuple
|
||||
from enum import Enum
|
||||
|
||||
from pegen import grammar
|
||||
from pegen.grammar import (
|
||||
Cut,
|
||||
GrammarVisitor,
|
||||
Rhs,
|
||||
Alt,
|
||||
Cut,
|
||||
Gather,
|
||||
GrammarVisitor,
|
||||
Group,
|
||||
Lookahead,
|
||||
NamedItem,
|
||||
NameLeaf,
|
||||
StringLeaf,
|
||||
Lookahead,
|
||||
PositiveLookahead,
|
||||
NegativeLookahead,
|
||||
Opt,
|
||||
PositiveLookahead,
|
||||
Repeat0,
|
||||
Repeat1,
|
||||
Gather,
|
||||
Group,
|
||||
Rhs,
|
||||
Rule,
|
||||
StringLeaf,
|
||||
)
|
||||
from pegen import grammar
|
||||
from pegen.parser_generator import dedupe, ParserGenerator
|
||||
from pegen.parser_generator import ParserGenerator
|
||||
|
||||
EXTENSION_PREFIX = """\
|
||||
#include "pegen.h"
|
||||
|
||||
"""
|
||||
|
||||
|
||||
EXTENSION_SUFFIX = """
|
||||
void *
|
||||
_PyPegen_parse(Parser *p)
|
||||
|
@ -41,6 +44,43 @@ _PyPegen_parse(Parser *p)
|
|||
"""
|
||||
|
||||
|
||||
class NodeTypes(Enum):
|
||||
NAME_TOKEN = 0
|
||||
NUMBER_TOKEN = 1
|
||||
STRING_TOKEN = 2
|
||||
GENERIC_TOKEN = 3
|
||||
KEYWORD = 4
|
||||
CUT_OPERATOR = 5
|
||||
|
||||
|
||||
BASE_NODETYPES = {
|
||||
"NAME": NodeTypes.NAME_TOKEN,
|
||||
"NUMBER": NodeTypes.NUMBER_TOKEN,
|
||||
"STRING": NodeTypes.STRING_TOKEN,
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class FunctionCall:
|
||||
function: str
|
||||
arguments: Optional[List[Any]] = None
|
||||
assigned_variable: Optional[str] = None
|
||||
nodetype: Optional[NodeTypes] = None
|
||||
force_true: bool = False
|
||||
metadata: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def __str__(self) -> str:
|
||||
parts = []
|
||||
parts.append(self.function)
|
||||
if self.arguments:
|
||||
parts.append(f"({', '.join(map(str, self.arguments))})")
|
||||
if self.force_true:
|
||||
parts.append(", 1")
|
||||
if self.assigned_variable:
|
||||
parts = ["(", self.assigned_variable, " = ", *parts, ")"]
|
||||
return "".join(parts)
|
||||
|
||||
|
||||
class CCallMakerVisitor(GrammarVisitor):
|
||||
def __init__(
|
||||
self,
|
||||
|
@ -54,28 +94,57 @@ class CCallMakerVisitor(GrammarVisitor):
|
|||
self.cache: Dict[Any, Any] = {}
|
||||
self.keyword_cache: Dict[str, int] = {}
|
||||
|
||||
def keyword_helper(self, keyword: str) -> Tuple[str, str]:
|
||||
def keyword_helper(self, keyword: str) -> FunctionCall:
|
||||
if keyword not in self.keyword_cache:
|
||||
self.keyword_cache[keyword] = self.gen.keyword_type()
|
||||
return "keyword", f"_PyPegen_expect_token(p, {self.keyword_cache[keyword]})"
|
||||
return FunctionCall(
|
||||
assigned_variable="keyword",
|
||||
function="_PyPegen_expect_token",
|
||||
arguments=["p", self.keyword_cache[keyword]],
|
||||
nodetype=NodeTypes.KEYWORD,
|
||||
)
|
||||
|
||||
def visit_NameLeaf(self, node: NameLeaf) -> Tuple[str, str]:
|
||||
def visit_NameLeaf(self, node: NameLeaf) -> FunctionCall:
|
||||
name = node.value
|
||||
if name in self.non_exact_tokens:
|
||||
name = name.lower()
|
||||
return f"{name}_var", f"_PyPegen_{name}_token(p)"
|
||||
return f"{name}_var", f"{name}_rule(p)"
|
||||
if name in BASE_NODETYPES:
|
||||
return FunctionCall(
|
||||
assigned_variable=f"{name.lower()}_var",
|
||||
function=f"_PyPegen_{name.lower()}_token",
|
||||
arguments=["p"],
|
||||
nodetype=BASE_NODETYPES[name],
|
||||
metadata={"rulename": name.lower()},
|
||||
)
|
||||
return FunctionCall(
|
||||
assigned_variable=f"{name.lower()}_var",
|
||||
function=f"_PyPegen_expect_token",
|
||||
arguments=["p", name],
|
||||
nodetype=NodeTypes.GENERIC_TOKEN,
|
||||
metadata={"rulename": name.lower()},
|
||||
)
|
||||
|
||||
def visit_StringLeaf(self, node: StringLeaf) -> Tuple[str, str]:
|
||||
return FunctionCall(
|
||||
assigned_variable=f"{name}_var",
|
||||
function=f"{name}_rule",
|
||||
arguments=["p"],
|
||||
metadata={"rulename": name.lower()},
|
||||
)
|
||||
|
||||
def visit_StringLeaf(self, node: StringLeaf) -> FunctionCall:
|
||||
val = ast.literal_eval(node.value)
|
||||
if re.match(r"[a-zA-Z_]\w*\Z", val): # This is a keyword
|
||||
return self.keyword_helper(val)
|
||||
else:
|
||||
assert val in self.exact_tokens, f"{node.value} is not a known literal"
|
||||
type = self.exact_tokens[val]
|
||||
return "literal", f"_PyPegen_expect_token(p, {type})"
|
||||
return FunctionCall(
|
||||
assigned_variable="literal",
|
||||
function=f"_PyPegen_expect_token",
|
||||
arguments=["p", type],
|
||||
nodetype=NodeTypes.GENERIC_TOKEN,
|
||||
)
|
||||
|
||||
def visit_Rhs(self, node: Rhs) -> Tuple[Optional[str], str]:
|
||||
def visit_Rhs(self, node: Rhs) -> FunctionCall:
|
||||
def can_we_inline(node: Rhs) -> int:
|
||||
if len(node.alts) != 1 or len(node.alts[0].items) != 1:
|
||||
return False
|
||||
|
@ -90,65 +159,96 @@ class CCallMakerVisitor(GrammarVisitor):
|
|||
self.cache[node] = self.visit(node.alts[0].items[0])
|
||||
else:
|
||||
name = self.gen.name_node(node)
|
||||
self.cache[node] = f"{name}_var", f"{name}_rule(p)"
|
||||
self.cache[node] = FunctionCall(
|
||||
assigned_variable=f"{name}_var",
|
||||
function=f"{name}_rule",
|
||||
arguments=["p"],
|
||||
metadata={"rulename": name},
|
||||
)
|
||||
return self.cache[node]
|
||||
|
||||
def visit_NamedItem(self, node: NamedItem) -> Tuple[Optional[str], str]:
|
||||
name, call = self.visit(node.item)
|
||||
def visit_NamedItem(self, node: NamedItem) -> FunctionCall:
|
||||
call = self.visit(node.item)
|
||||
if node.name:
|
||||
name = node.name
|
||||
return name, call
|
||||
call.assigned_variable = node.name
|
||||
return call
|
||||
|
||||
def lookahead_call_helper(self, node: Lookahead, positive: int) -> Tuple[None, str]:
|
||||
name, call = self.visit(node.node)
|
||||
func, args = call.split("(", 1)
|
||||
assert args[-1] == ")"
|
||||
args = args[:-1]
|
||||
if "name_token" in call:
|
||||
return None, f"_PyPegen_lookahead_with_name({positive}, {func}, {args})"
|
||||
elif not args.startswith("p,"):
|
||||
return None, f"_PyPegen_lookahead({positive}, {func}, {args})"
|
||||
elif args[2:].strip().isalnum():
|
||||
return None, f"_PyPegen_lookahead_with_int({positive}, {func}, {args})"
|
||||
def lookahead_call_helper(self, node: Lookahead, positive: int) -> FunctionCall:
|
||||
call = self.visit(node.node)
|
||||
if call.nodetype == NodeTypes.NAME_TOKEN:
|
||||
return FunctionCall(
|
||||
function=f"_PyPegen_lookahead_with_name",
|
||||
arguments=[positive, call.function, *call.arguments],
|
||||
)
|
||||
elif call.nodetype in {NodeTypes.GENERIC_TOKEN, NodeTypes.KEYWORD}:
|
||||
return FunctionCall(
|
||||
function=f"_PyPegen_lookahead_with_int",
|
||||
arguments=[positive, call.function, *call.arguments],
|
||||
)
|
||||
else:
|
||||
return None, f"_PyPegen_lookahead_with_string({positive}, {func}, {args})"
|
||||
return FunctionCall(
|
||||
function=f"_PyPegen_lookahead",
|
||||
arguments=[positive, call.function, *call.arguments],
|
||||
)
|
||||
|
||||
def visit_PositiveLookahead(self, node: PositiveLookahead) -> Tuple[None, str]:
|
||||
def visit_PositiveLookahead(self, node: PositiveLookahead) -> FunctionCall:
|
||||
return self.lookahead_call_helper(node, 1)
|
||||
|
||||
def visit_NegativeLookahead(self, node: NegativeLookahead) -> Tuple[None, str]:
|
||||
def visit_NegativeLookahead(self, node: NegativeLookahead) -> FunctionCall:
|
||||
return self.lookahead_call_helper(node, 0)
|
||||
|
||||
def visit_Opt(self, node: Opt) -> Tuple[str, str]:
|
||||
name, call = self.visit(node.node)
|
||||
return "opt_var", f"{call}, 1" # Using comma operator!
|
||||
def visit_Opt(self, node: Opt) -> FunctionCall:
|
||||
call = self.visit(node.node)
|
||||
return FunctionCall(
|
||||
assigned_variable="opt_var",
|
||||
function=call.function,
|
||||
arguments=call.arguments,
|
||||
force_true=True,
|
||||
)
|
||||
|
||||
def visit_Repeat0(self, node: Repeat0) -> Tuple[str, str]:
|
||||
def visit_Repeat0(self, node: Repeat0) -> FunctionCall:
|
||||
if node in self.cache:
|
||||
return self.cache[node]
|
||||
name = self.gen.name_loop(node.node, False)
|
||||
self.cache[node] = f"{name}_var", f"{name}_rule(p)"
|
||||
self.cache[node] = FunctionCall(
|
||||
assigned_variable=f"{name}_var",
|
||||
function=f"{name}_rule",
|
||||
arguments=["p"],
|
||||
metadata={"rulename": name},
|
||||
)
|
||||
return self.cache[node]
|
||||
|
||||
def visit_Repeat1(self, node: Repeat1) -> Tuple[str, str]:
|
||||
def visit_Repeat1(self, node: Repeat1) -> FunctionCall:
|
||||
if node in self.cache:
|
||||
return self.cache[node]
|
||||
name = self.gen.name_loop(node.node, True)
|
||||
self.cache[node] = f"{name}_var", f"{name}_rule(p)"
|
||||
self.cache[node] = FunctionCall(
|
||||
assigned_variable=f"{name}_var",
|
||||
function=f"{name}_rule",
|
||||
arguments=["p"],
|
||||
metadata={"rulename": name},
|
||||
)
|
||||
return self.cache[node]
|
||||
|
||||
def visit_Gather(self, node: Gather) -> Tuple[str, str]:
|
||||
def visit_Gather(self, node: Gather) -> FunctionCall:
|
||||
if node in self.cache:
|
||||
return self.cache[node]
|
||||
name = self.gen.name_gather(node)
|
||||
self.cache[node] = f"{name}_var", f"{name}_rule(p)"
|
||||
self.cache[node] = FunctionCall(
|
||||
assigned_variable=f"{name}_var",
|
||||
function=f"{name}_rule",
|
||||
arguments=["p"],
|
||||
metadata={"rulename": name},
|
||||
)
|
||||
return self.cache[node]
|
||||
|
||||
def visit_Group(self, node: Group) -> Tuple[Optional[str], str]:
|
||||
def visit_Group(self, node: Group) -> FunctionCall:
|
||||
return self.visit(node.rhs)
|
||||
|
||||
def visit_Cut(self, node: Cut) -> Tuple[str, str]:
|
||||
return "cut_var", "1"
|
||||
def visit_Cut(self, node: Cut) -> FunctionCall:
|
||||
return FunctionCall(
|
||||
assigned_variable="cut_var", function="1", nodetype=NodeTypes.CUT_OPERATOR
|
||||
)
|
||||
|
||||
|
||||
class CParserGenerator(ParserGenerator, GrammarVisitor):
|
||||
|
@ -252,7 +352,6 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
|
|||
mode += 1
|
||||
modulename = self.grammar.metas.get("modulename", "parse")
|
||||
trailer = self.grammar.metas.get("trailer", EXTENSION_SUFFIX)
|
||||
keyword_cache = self.callmakervisitor.keyword_cache
|
||||
if trailer:
|
||||
self.print(trailer.rstrip("\n") % dict(mode=mode, modulename=modulename))
|
||||
|
||||
|
@ -448,13 +547,11 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
|
|||
self._handle_default_rule_body(node, rhs, result_type)
|
||||
self.print("}")
|
||||
|
||||
def visit_NamedItem(self, node: NamedItem, names: List[str]) -> None:
|
||||
name, call = self.callmakervisitor.visit(node)
|
||||
if not name:
|
||||
self.print(call)
|
||||
else:
|
||||
name = dedupe(name, names)
|
||||
self.print(f"({name} = {call})")
|
||||
def visit_NamedItem(self, node: NamedItem) -> None:
|
||||
call = self.callmakervisitor.visit(node)
|
||||
if call.assigned_variable:
|
||||
call.assigned_variable = self.dedupe(call.assigned_variable)
|
||||
self.print(call)
|
||||
|
||||
def visit_Rhs(
|
||||
self, node: Rhs, is_loop: bool, is_gather: bool, rulename: Optional[str]
|
||||
|
@ -464,7 +561,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
|
|||
for alt in node.alts:
|
||||
self.visit(alt, is_loop=is_loop, is_gather=is_gather, rulename=rulename)
|
||||
|
||||
def join_conditions(self, keyword: str, node: Any, names: List[str]) -> None:
|
||||
def join_conditions(self, keyword: str, node: Any) -> None:
|
||||
self.print(f"{keyword} (")
|
||||
with self.indent():
|
||||
first = True
|
||||
|
@ -473,7 +570,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
|
|||
first = False
|
||||
else:
|
||||
self.print("&&")
|
||||
self.visit(item, names=names)
|
||||
self.visit(item)
|
||||
self.print(")")
|
||||
|
||||
def emit_action(self, node: Alt, cleanup_code: Optional[str] = None) -> None:
|
||||
|
@ -492,29 +589,34 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
|
|||
f'fprintf(stderr, "Hit with action [%d-%d]: %s\\n", mark, p->mark, "{node}");'
|
||||
)
|
||||
|
||||
def emit_default_action(self, is_gather: bool, names: List[str], node: Alt) -> None:
|
||||
if len(names) > 1:
|
||||
def emit_default_action(self, is_gather: bool, node: Alt) -> None:
|
||||
if len(self.local_variable_names) > 1:
|
||||
if is_gather:
|
||||
assert len(names) == 2
|
||||
self.print(f"res = _PyPegen_seq_insert_in_front(p, {names[0]}, {names[1]});")
|
||||
assert len(self.local_variable_names) == 2
|
||||
self.print(
|
||||
f"res = _PyPegen_seq_insert_in_front(p, "
|
||||
f"{self.local_variable_names[0]}, {self.local_variable_names[1]});"
|
||||
)
|
||||
else:
|
||||
if self.debug:
|
||||
self.print(
|
||||
f'fprintf(stderr, "Hit without action [%d:%d]: %s\\n", mark, p->mark, "{node}");'
|
||||
)
|
||||
self.print(f"res = _PyPegen_dummy_name(p, {', '.join(names)});")
|
||||
self.print(
|
||||
f"res = _PyPegen_dummy_name(p, {', '.join(self.local_variable_names)});"
|
||||
)
|
||||
else:
|
||||
if self.debug:
|
||||
self.print(
|
||||
f'fprintf(stderr, "Hit with default action [%d:%d]: %s\\n", mark, p->mark, "{node}");'
|
||||
)
|
||||
self.print(f"res = {names[0]};")
|
||||
self.print(f"res = {self.local_variable_names[0]};")
|
||||
|
||||
def emit_dummy_action(self) -> None:
|
||||
self.print(f"res = _PyPegen_dummy_name(p);")
|
||||
|
||||
def handle_alt_normal(self, node: Alt, is_gather: bool, names: List[str]) -> None:
|
||||
self.join_conditions(keyword="if", node=node, names=names)
|
||||
def handle_alt_normal(self, node: Alt, is_gather: bool) -> None:
|
||||
self.join_conditions(keyword="if", node=node)
|
||||
self.print("{")
|
||||
# We have parsed successfully all the conditions for the option.
|
||||
with self.indent():
|
||||
|
@ -526,17 +628,15 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
|
|||
elif node.action:
|
||||
self.emit_action(node)
|
||||
else:
|
||||
self.emit_default_action(is_gather, names, node)
|
||||
self.emit_default_action(is_gather, node)
|
||||
|
||||
# As the current option has parsed correctly, do not continue with the rest.
|
||||
self.print(f"goto done;")
|
||||
self.print("}")
|
||||
|
||||
def handle_alt_loop(
|
||||
self, node: Alt, is_gather: bool, rulename: Optional[str], names: List[str]
|
||||
) -> None:
|
||||
def handle_alt_loop(self, node: Alt, is_gather: bool, rulename: Optional[str]) -> None:
|
||||
# Condition of the main body of the alternative
|
||||
self.join_conditions(keyword="while", node=node, names=names)
|
||||
self.join_conditions(keyword="while", node=node)
|
||||
self.print("{")
|
||||
# We have parsed successfully one item!
|
||||
with self.indent():
|
||||
|
@ -548,7 +648,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
|
|||
elif node.action:
|
||||
self.emit_action(node, cleanup_code="PyMem_Free(children);")
|
||||
else:
|
||||
self.emit_default_action(is_gather, names, node)
|
||||
self.emit_default_action(is_gather, node)
|
||||
|
||||
# Add the result of rule to the temporary buffer of children. This buffer
|
||||
# will populate later an asdl_seq with all elements to return.
|
||||
|
@ -580,47 +680,45 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
|
|||
if v == "opt_var":
|
||||
self.print("UNUSED(opt_var); // Silence compiler warnings")
|
||||
|
||||
names: List[str] = []
|
||||
if is_loop:
|
||||
self.handle_alt_loop(node, is_gather, rulename, names)
|
||||
else:
|
||||
self.handle_alt_normal(node, is_gather, names)
|
||||
with self.local_variable_context():
|
||||
if is_loop:
|
||||
self.handle_alt_loop(node, is_gather, rulename)
|
||||
else:
|
||||
self.handle_alt_normal(node, is_gather)
|
||||
|
||||
self.print("p->mark = mark;")
|
||||
if "cut_var" in names:
|
||||
if "cut_var" in vars:
|
||||
self.print("if (cut_var) return NULL;")
|
||||
self.print("}")
|
||||
|
||||
def collect_vars(self, node: Alt) -> Dict[str, Optional[str]]:
|
||||
names: List[str] = []
|
||||
def collect_vars(self, node: Alt) -> Dict[Optional[str], Optional[str]]:
|
||||
types = {}
|
||||
for item in node.items:
|
||||
name, type = self.add_var(item, names)
|
||||
types[name] = type
|
||||
with self.local_variable_context():
|
||||
for item in node.items:
|
||||
name, type = self.add_var(item)
|
||||
types[name] = type
|
||||
return types
|
||||
|
||||
def add_var(self, node: NamedItem, names: List[str]) -> Tuple[str, Optional[str]]:
|
||||
name: str
|
||||
call: str
|
||||
name, call = self.callmakervisitor.visit(node.item)
|
||||
type = None
|
||||
if not name:
|
||||
return name, type
|
||||
if name.startswith("cut"):
|
||||
return name, "int"
|
||||
if name.endswith("_var"):
|
||||
rulename = name[:-4]
|
||||
rule = self.rules.get(rulename)
|
||||
if rule is not None:
|
||||
if rule.is_loop() or rule.is_gather():
|
||||
type = "asdl_seq *"
|
||||
else:
|
||||
type = rule.type
|
||||
elif name.startswith("_loop") or name.startswith("_gather"):
|
||||
def add_var(self, node: NamedItem) -> Tuple[Optional[str], Optional[str]]:
|
||||
call = self.callmakervisitor.visit(node.item)
|
||||
if not call.assigned_variable:
|
||||
return None, None
|
||||
if call.nodetype == NodeTypes.CUT_OPERATOR:
|
||||
return call.assigned_variable, "int"
|
||||
|
||||
name = call.assigned_variable
|
||||
rulename = call.metadata.get("rulename")
|
||||
|
||||
type: Optional[str] = None
|
||||
|
||||
assert self.all_rules is not None
|
||||
if rulename and rulename in self.all_rules:
|
||||
rule = self.all_rules.get(rulename)
|
||||
if rule.is_loop() or rule.is_gather():
|
||||
type = "asdl_seq *"
|
||||
elif name in ("name_var", "string_var", "number_var"):
|
||||
type = "expr_ty"
|
||||
if node.name:
|
||||
name = node.name
|
||||
name = dedupe(name, names)
|
||||
return name, type
|
||||
else:
|
||||
type = rule.type
|
||||
elif call.nodetype in BASE_NODETYPES.values():
|
||||
type = "expr_ty"
|
||||
|
||||
return self.dedupe(node.name if node.name else call.assigned_variable), type
|
||||
|
|
|
@ -13,7 +13,6 @@ from pegen.grammar import (
|
|||
NamedItem,
|
||||
Plain,
|
||||
NameLeaf,
|
||||
StringLeaf,
|
||||
Gather,
|
||||
)
|
||||
from pegen.grammar import GrammarError, GrammarVisitor
|
||||
|
@ -48,6 +47,18 @@ class ParserGenerator:
|
|||
self.todo = self.rules.copy() # Rules to generate
|
||||
self.counter = 0 # For name_rule()/name_loop()
|
||||
self.keyword_counter = 499 # For keyword_type()
|
||||
self.all_rules: Optional[Dict[str, Rule]] = None # Rules + temporal rules
|
||||
self._local_variable_stack: List[List[str]] = []
|
||||
|
||||
@contextlib.contextmanager
|
||||
def local_variable_context(self) -> Iterator[None]:
|
||||
self._local_variable_stack.append([])
|
||||
yield
|
||||
self._local_variable_stack.pop()
|
||||
|
||||
@property
|
||||
def local_variable_names(self) -> List[str]:
|
||||
return self._local_variable_stack[-1]
|
||||
|
||||
@abstractmethod
|
||||
def generate(self, filename: str) -> None:
|
||||
|
@ -82,6 +93,7 @@ class ParserGenerator:
|
|||
for rulename in todo:
|
||||
self.todo[rulename].collect_todo(self)
|
||||
done = set(alltodo)
|
||||
self.all_rules = self.todo.copy()
|
||||
|
||||
def keyword_type(self) -> int:
|
||||
self.keyword_counter += 1
|
||||
|
@ -109,26 +121,23 @@ class ParserGenerator:
|
|||
self.counter += 1
|
||||
extra_function_name = f"_loop0_{self.counter}"
|
||||
extra_function_alt = Alt(
|
||||
[NamedItem(None, node.separator), NamedItem("elem", node.node),], action="elem",
|
||||
[NamedItem(None, node.separator), NamedItem("elem", node.node)], action="elem",
|
||||
)
|
||||
self.todo[extra_function_name] = Rule(
|
||||
extra_function_name, None, Rhs([extra_function_alt]),
|
||||
)
|
||||
alt = Alt(
|
||||
[NamedItem("elem", node.node), NamedItem("seq", NameLeaf(extra_function_name)),],
|
||||
)
|
||||
alt = Alt([NamedItem("elem", node.node), NamedItem("seq", NameLeaf(extra_function_name))],)
|
||||
self.todo[name] = Rule(name, None, Rhs([alt]),)
|
||||
return name
|
||||
|
||||
|
||||
def dedupe(name: str, names: List[str]) -> str:
|
||||
origname = name
|
||||
counter = 0
|
||||
while name in names:
|
||||
counter += 1
|
||||
name = f"{origname}_{counter}"
|
||||
names.append(name)
|
||||
return name
|
||||
def dedupe(self, name: str) -> str:
|
||||
origname = name
|
||||
counter = 0
|
||||
while name in self.local_variable_names:
|
||||
counter += 1
|
||||
name = f"{origname}_{counter}"
|
||||
self.local_variable_names.append(name)
|
||||
return name
|
||||
|
||||
|
||||
def compute_nullables(rules: Dict[str, Rule]) -> None:
|
||||
|
@ -153,13 +162,13 @@ def compute_left_recursives(
|
|||
leaders = set(scc)
|
||||
for start in scc:
|
||||
for cycle in sccutils.find_cycles_in_scc(graph, scc, start):
|
||||
## print("Cycle:", " -> ".join(cycle))
|
||||
# print("Cycle:", " -> ".join(cycle))
|
||||
leaders -= scc - set(cycle)
|
||||
if not leaders:
|
||||
raise ValueError(
|
||||
f"SCC {scc} has no leadership candidate (no element is included in all cycles)"
|
||||
)
|
||||
## print("Leaders:", leaders)
|
||||
# print("Leaders:", leaders)
|
||||
leader = min(leaders) # Pick an arbitrary leader from the candidates.
|
||||
rules[leader].leader = True
|
||||
else:
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
from typing import Any, Dict, List, Optional, IO, Text, Tuple
|
||||
from typing import Any, Dict, Optional, IO, Text, Tuple
|
||||
|
||||
from pegen.grammar import (
|
||||
Cut,
|
||||
|
@ -19,7 +19,7 @@ from pegen.grammar import (
|
|||
Alt,
|
||||
)
|
||||
from pegen import grammar
|
||||
from pegen.parser_generator import dedupe, ParserGenerator
|
||||
from pegen.parser_generator import ParserGenerator
|
||||
|
||||
MODULE_PREFIX = """\
|
||||
#!/usr/bin/env python3.8
|
||||
|
@ -173,7 +173,7 @@ class PythonParserGenerator(ParserGenerator, GrammarVisitor):
|
|||
else:
|
||||
self.print("return None")
|
||||
|
||||
def visit_NamedItem(self, node: NamedItem, names: List[str]) -> None:
|
||||
def visit_NamedItem(self, node: NamedItem) -> None:
|
||||
name, call = self.callmakervisitor.visit(node.item)
|
||||
if node.name:
|
||||
name = node.name
|
||||
|
@ -181,7 +181,7 @@ class PythonParserGenerator(ParserGenerator, GrammarVisitor):
|
|||
self.print(call)
|
||||
else:
|
||||
if name != "cut":
|
||||
name = dedupe(name, names)
|
||||
name = self.dedupe(name)
|
||||
self.print(f"({name} := {call})")
|
||||
|
||||
def visit_Rhs(self, node: Rhs, is_loop: bool = False, is_gather: bool = False) -> None:
|
||||
|
@ -191,34 +191,36 @@ class PythonParserGenerator(ParserGenerator, GrammarVisitor):
|
|||
self.visit(alt, is_loop=is_loop, is_gather=is_gather)
|
||||
|
||||
def visit_Alt(self, node: Alt, is_loop: bool, is_gather: bool) -> None:
|
||||
names: List[str] = []
|
||||
self.print("cut = False") # TODO: Only if needed.
|
||||
if is_loop:
|
||||
self.print("while (")
|
||||
else:
|
||||
self.print("if (")
|
||||
with self.indent():
|
||||
first = True
|
||||
for item in node.items:
|
||||
if first:
|
||||
first = False
|
||||
else:
|
||||
self.print("and")
|
||||
self.visit(item, names=names)
|
||||
self.print("):")
|
||||
with self.indent():
|
||||
action = node.action
|
||||
if not action:
|
||||
if is_gather:
|
||||
assert len(names) == 2
|
||||
action = f"[{names[0]}] + {names[1]}"
|
||||
else:
|
||||
action = f"[{', '.join(names)}]"
|
||||
with self.local_variable_context():
|
||||
self.print("cut = False") # TODO: Only if needed.
|
||||
if is_loop:
|
||||
self.print(f"children.append({action})")
|
||||
self.print(f"mark = self.mark()")
|
||||
self.print("while (")
|
||||
else:
|
||||
self.print(f"return {action}")
|
||||
self.print("self.reset(mark)")
|
||||
# Skip remaining alternatives if a cut was reached.
|
||||
self.print("if cut: return None") # TODO: Only if needed.
|
||||
self.print("if (")
|
||||
with self.indent():
|
||||
first = True
|
||||
for item in node.items:
|
||||
if first:
|
||||
first = False
|
||||
else:
|
||||
self.print("and")
|
||||
self.visit(item)
|
||||
self.print("):")
|
||||
with self.indent():
|
||||
action = node.action
|
||||
if not action:
|
||||
if is_gather:
|
||||
assert len(self.local_variable_names) == 2
|
||||
action = (
|
||||
f"[{self.local_variable_names[0]}] + {self.local_variable_names[1]}"
|
||||
)
|
||||
else:
|
||||
action = f"[{', '.join(self.local_variable_names)}]"
|
||||
if is_loop:
|
||||
self.print(f"children.append({action})")
|
||||
self.print(f"mark = self.mark()")
|
||||
else:
|
||||
self.print(f"return {action}")
|
||||
self.print("self.reset(mark)")
|
||||
# Skip remaining alternatives if a cut was reached.
|
||||
self.print("if cut: return None") # TODO: Only if needed.
|
||||
|
|
Loading…
Reference in New Issue