bpo-40334: refactor and cleanup for the PEG generators (GH-19775)

This commit is contained in:
Pablo Galindo 2020-04-29 10:42:21 +01:00 committed by GitHub
parent 9b64ef3ac7
commit 4db245ee9d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 292 additions and 230 deletions

View File

@ -648,7 +648,7 @@ file_rule(Parser *p)
if ( if (
(a = statements_rule(p), 1) (a = statements_rule(p), 1)
&& &&
(endmarker_var = _PyPegen_endmarker_token(p)) (endmarker_var = _PyPegen_expect_token(p, ENDMARKER))
) )
{ {
res = Module ( a , NULL , p -> arena ); res = Module ( a , NULL , p -> arena );
@ -712,7 +712,7 @@ eval_rule(Parser *p)
&& &&
(_loop0_1_var = _loop0_1_rule(p)) (_loop0_1_var = _loop0_1_rule(p))
&& &&
(endmarker_var = _PyPegen_endmarker_token(p)) (endmarker_var = _PyPegen_expect_token(p, ENDMARKER))
) )
{ {
res = Expression ( a , p -> arena ); res = Expression ( a , p -> arena );
@ -846,7 +846,7 @@ statement_newline_rule(Parser *p)
if ( if (
(a = compound_stmt_rule(p)) (a = compound_stmt_rule(p))
&& &&
(newline_var = _PyPegen_newline_token(p)) (newline_var = _PyPegen_expect_token(p, NEWLINE))
) )
{ {
res = _PyPegen_singleton_seq ( p , a ); res = _PyPegen_singleton_seq ( p , a );
@ -872,7 +872,7 @@ statement_newline_rule(Parser *p)
{ // NEWLINE { // NEWLINE
void *newline_var; void *newline_var;
if ( if (
(newline_var = _PyPegen_newline_token(p)) (newline_var = _PyPegen_expect_token(p, NEWLINE))
) )
{ {
Token *token = _PyPegen_get_last_nonnwhitespace_token(p); Token *token = _PyPegen_get_last_nonnwhitespace_token(p);
@ -895,7 +895,7 @@ statement_newline_rule(Parser *p)
{ // $ { // $
void *endmarker_var; void *endmarker_var;
if ( if (
(endmarker_var = _PyPegen_endmarker_token(p)) (endmarker_var = _PyPegen_expect_token(p, ENDMARKER))
) )
{ {
res = _PyPegen_interactive_exit ( p ); res = _PyPegen_interactive_exit ( p );
@ -929,7 +929,7 @@ simple_stmt_rule(Parser *p)
&& &&
_PyPegen_lookahead_with_int(0, _PyPegen_expect_token, p, 13) _PyPegen_lookahead_with_int(0, _PyPegen_expect_token, p, 13)
&& &&
(newline_var = _PyPegen_newline_token(p)) (newline_var = _PyPegen_expect_token(p, NEWLINE))
) )
{ {
res = _PyPegen_singleton_seq ( p , a ); res = _PyPegen_singleton_seq ( p , a );
@ -951,7 +951,7 @@ simple_stmt_rule(Parser *p)
&& &&
(opt_var = _PyPegen_expect_token(p, 13), 1) (opt_var = _PyPegen_expect_token(p, 13), 1)
&& &&
(newline_var = _PyPegen_newline_token(p)) (newline_var = _PyPegen_expect_token(p, NEWLINE))
) )
{ {
res = a; res = a;
@ -2684,7 +2684,7 @@ for_stmt_rule(Parser *p)
void *literal; void *literal;
expr_ty t; expr_ty t;
if ( if (
(is_async = _PyPegen_async_token(p), 1) (is_async = _PyPegen_expect_token(p, ASYNC), 1)
&& &&
(keyword = _PyPegen_expect_token(p, 517)) (keyword = _PyPegen_expect_token(p, 517))
&& &&
@ -2751,7 +2751,7 @@ with_stmt_rule(Parser *p)
void *literal_1; void *literal_1;
void *literal_2; void *literal_2;
if ( if (
(is_async = _PyPegen_async_token(p), 1) (is_async = _PyPegen_expect_token(p, ASYNC), 1)
&& &&
(keyword = _PyPegen_expect_token(p, 519)) (keyword = _PyPegen_expect_token(p, 519))
&& &&
@ -2790,7 +2790,7 @@ with_stmt_rule(Parser *p)
void *keyword; void *keyword;
void *literal; void *literal;
if ( if (
(is_async = _PyPegen_async_token(p), 1) (is_async = _PyPegen_expect_token(p, ASYNC), 1)
&& &&
(keyword = _PyPegen_expect_token(p, 519)) (keyword = _PyPegen_expect_token(p, 519))
&& &&
@ -3263,7 +3263,7 @@ function_def_raw_rule(Parser *p)
expr_ty n; expr_ty n;
void *params; void *params;
if ( if (
(is_async = _PyPegen_async_token(p), 1) (is_async = _PyPegen_expect_token(p, ASYNC), 1)
&& &&
(keyword = _PyPegen_expect_token(p, 522)) (keyword = _PyPegen_expect_token(p, 522))
&& &&
@ -4002,13 +4002,13 @@ block_rule(Parser *p)
void *indent_var; void *indent_var;
void *newline_var; void *newline_var;
if ( if (
(newline_var = _PyPegen_newline_token(p)) (newline_var = _PyPegen_expect_token(p, NEWLINE))
&& &&
(indent_var = _PyPegen_indent_token(p)) (indent_var = _PyPegen_expect_token(p, INDENT))
&& &&
(a = statements_rule(p)) (a = statements_rule(p))
&& &&
(dedent_var = _PyPegen_dedent_token(p)) (dedent_var = _PyPegen_expect_token(p, DEDENT))
) )
{ {
res = a; res = a;
@ -6754,7 +6754,7 @@ await_primary_rule(Parser *p)
expr_ty a; expr_ty a;
void *await_var; void *await_var;
if ( if (
(await_var = _PyPegen_await_token(p)) (await_var = _PyPegen_expect_token(p, AWAIT))
&& &&
(a = primary_rule(p)) (a = primary_rule(p))
) )
@ -9919,9 +9919,9 @@ invalid_block_rule(Parser *p)
{ // NEWLINE !INDENT { // NEWLINE !INDENT
void *newline_var; void *newline_var;
if ( if (
(newline_var = _PyPegen_newline_token(p)) (newline_var = _PyPegen_expect_token(p, NEWLINE))
&& &&
_PyPegen_lookahead(0, _PyPegen_indent_token, p) _PyPegen_lookahead_with_int(0, _PyPegen_expect_token, p, INDENT)
) )
{ {
res = RAISE_INDENTATION_ERROR ( "expected an indented block" ); res = RAISE_INDENTATION_ERROR ( "expected an indented block" );
@ -10036,7 +10036,7 @@ _loop0_1_rule(Parser *p)
{ // NEWLINE { // NEWLINE
void *newline_var; void *newline_var;
while ( while (
(newline_var = _PyPegen_newline_token(p)) (newline_var = _PyPegen_expect_token(p, NEWLINE))
) )
{ {
res = newline_var; res = newline_var;
@ -10273,7 +10273,7 @@ _tmp_6_rule(Parser *p)
{ // ASYNC { // ASYNC
void *async_var; void *async_var;
if ( if (
(async_var = _PyPegen_async_token(p)) (async_var = _PyPegen_expect_token(p, ASYNC))
) )
{ {
res = async_var; res = async_var;
@ -10345,7 +10345,7 @@ _tmp_8_rule(Parser *p)
{ // ASYNC { // ASYNC
void *async_var; void *async_var;
if ( if (
(async_var = _PyPegen_async_token(p)) (async_var = _PyPegen_expect_token(p, ASYNC))
) )
{ {
res = async_var; res = async_var;
@ -10381,7 +10381,7 @@ _tmp_9_rule(Parser *p)
{ // ASYNC { // ASYNC
void *async_var; void *async_var;
if ( if (
(async_var = _PyPegen_async_token(p)) (async_var = _PyPegen_expect_token(p, ASYNC))
) )
{ {
res = async_var; res = async_var;
@ -15068,7 +15068,7 @@ _tmp_128_rule(Parser *p)
&& &&
(f = named_expression_rule(p)) (f = named_expression_rule(p))
&& &&
(newline_var = _PyPegen_newline_token(p)) (newline_var = _PyPegen_expect_token(p, NEWLINE))
) )
{ {
res = f; res = f;
@ -15257,7 +15257,7 @@ _tmp_134_rule(Parser *p)
void *keyword_1; void *keyword_1;
void *y; void *y;
if ( if (
(y = _PyPegen_async_token(p), 1) (y = _PyPegen_expect_token(p, ASYNC), 1)
&& &&
(keyword = _PyPegen_expect_token(p, 517)) (keyword = _PyPegen_expect_token(p, 517))
&& &&

View File

@ -692,16 +692,6 @@ _PyPegen_lookahead_with_name(int positive, expr_ty (func)(Parser *), Parser *p)
return (res != NULL) == positive; return (res != NULL) == positive;
} }
int
_PyPegen_lookahead_with_string(int positive, void *(func)(Parser *, const char *), Parser *p,
const char *arg)
{
int mark = p->mark;
void *res = func(p, arg);
p->mark = mark;
return (res != NULL) == positive;
}
int int
_PyPegen_lookahead_with_int(int positive, Token *(func)(Parser *, int), Parser *p, int arg) _PyPegen_lookahead_with_int(int positive, Token *(func)(Parser *, int), Parser *p, int arg)
{ {
@ -751,24 +741,6 @@ _PyPegen_get_last_nonnwhitespace_token(Parser *p)
return token; return token;
} }
void *
_PyPegen_async_token(Parser *p)
{
return _PyPegen_expect_token(p, ASYNC);
}
void *
_PyPegen_await_token(Parser *p)
{
return _PyPegen_expect_token(p, AWAIT);
}
void *
_PyPegen_endmarker_token(Parser *p)
{
return _PyPegen_expect_token(p, ENDMARKER);
}
expr_ty expr_ty
_PyPegen_name_token(Parser *p) _PyPegen_name_token(Parser *p)
{ {
@ -794,24 +766,6 @@ _PyPegen_string_token(Parser *p)
return _PyPegen_expect_token(p, STRING); return _PyPegen_expect_token(p, STRING);
} }
void *
_PyPegen_newline_token(Parser *p)
{
return _PyPegen_expect_token(p, NEWLINE);
}
void *
_PyPegen_indent_token(Parser *p)
{
return _PyPegen_expect_token(p, INDENT);
}
void *
_PyPegen_dedent_token(Parser *p)
{
return _PyPegen_expect_token(p, DEDENT);
}
static PyObject * static PyObject *
parsenumber_raw(const char *s) parsenumber_raw(const char *s)
{ {

View File

@ -104,7 +104,6 @@ int _PyPegen_update_memo(Parser *p, int mark, int type, void *node);
int _PyPegen_is_memoized(Parser *p, int type, void *pres); int _PyPegen_is_memoized(Parser *p, int type, void *pres);
int _PyPegen_lookahead_with_name(int, expr_ty (func)(Parser *), Parser *); int _PyPegen_lookahead_with_name(int, expr_ty (func)(Parser *), Parser *);
int _PyPegen_lookahead_with_string(int, void *(func)(Parser *, const char *), Parser *, const char *);
int _PyPegen_lookahead_with_int(int, Token *(func)(Parser *, int), Parser *, int); int _PyPegen_lookahead_with_int(int, Token *(func)(Parser *, int), Parser *, int);
int _PyPegen_lookahead(int, void *(func)(Parser *), Parser *); int _PyPegen_lookahead(int, void *(func)(Parser *), Parser *);

View File

@ -33,7 +33,7 @@ dump: peg_extension/parse.c
$(PYTHON) -c "from peg_extension import parse; import ast; t = parse.parse_file('$(TESTFILE)', mode=1); print(ast.dump(t))" $(PYTHON) -c "from peg_extension import parse; import ast; t = parse.parse_file('$(TESTFILE)', mode=1); print(ast.dump(t))"
regen-metaparser: pegen/metagrammar.gram pegen/*.py regen-metaparser: pegen/metagrammar.gram pegen/*.py
$(PYTHON) -m pegen -q -c pegen/metagrammar.gram -o pegen/grammar_parser.py $(PYTHON) -m pegen -q python pegen/metagrammar.gram -o pegen/grammar_parser.py
# Note: These targets really depend on the generated shared object in peg_extension/parse.*.so but # Note: These targets really depend on the generated shared object in peg_extension/parse.*.so but
# this has different names in different systems so we are abusing the implicit dependency on # this has different names in different systems so we are abusing the implicit dependency on

View File

@ -1,33 +1,36 @@
import ast import ast
from dataclasses import dataclass, field
import re import re
from typing import Any, cast, Dict, IO, Optional, List, Text, Tuple, Set from typing import IO, Any, Dict, List, Optional, Set, Text, Tuple
from enum import Enum
from pegen import grammar
from pegen.grammar import ( from pegen.grammar import (
Cut,
GrammarVisitor,
Rhs,
Alt, Alt,
Cut,
Gather,
GrammarVisitor,
Group,
Lookahead,
NamedItem, NamedItem,
NameLeaf, NameLeaf,
StringLeaf,
Lookahead,
PositiveLookahead,
NegativeLookahead, NegativeLookahead,
Opt, Opt,
PositiveLookahead,
Repeat0, Repeat0,
Repeat1, Repeat1,
Gather, Rhs,
Group,
Rule, Rule,
StringLeaf,
) )
from pegen import grammar from pegen.parser_generator import ParserGenerator
from pegen.parser_generator import dedupe, ParserGenerator
EXTENSION_PREFIX = """\ EXTENSION_PREFIX = """\
#include "pegen.h" #include "pegen.h"
""" """
EXTENSION_SUFFIX = """ EXTENSION_SUFFIX = """
void * void *
_PyPegen_parse(Parser *p) _PyPegen_parse(Parser *p)
@ -41,6 +44,43 @@ _PyPegen_parse(Parser *p)
""" """
class NodeTypes(Enum):
NAME_TOKEN = 0
NUMBER_TOKEN = 1
STRING_TOKEN = 2
GENERIC_TOKEN = 3
KEYWORD = 4
CUT_OPERATOR = 5
BASE_NODETYPES = {
"NAME": NodeTypes.NAME_TOKEN,
"NUMBER": NodeTypes.NUMBER_TOKEN,
"STRING": NodeTypes.STRING_TOKEN,
}
@dataclass
class FunctionCall:
function: str
arguments: Optional[List[Any]] = None
assigned_variable: Optional[str] = None
nodetype: Optional[NodeTypes] = None
force_true: bool = False
metadata: Dict[str, Any] = field(default_factory=dict)
def __str__(self) -> str:
parts = []
parts.append(self.function)
if self.arguments:
parts.append(f"({', '.join(map(str, self.arguments))})")
if self.force_true:
parts.append(", 1")
if self.assigned_variable:
parts = ["(", self.assigned_variable, " = ", *parts, ")"]
return "".join(parts)
class CCallMakerVisitor(GrammarVisitor): class CCallMakerVisitor(GrammarVisitor):
def __init__( def __init__(
self, self,
@ -54,28 +94,57 @@ class CCallMakerVisitor(GrammarVisitor):
self.cache: Dict[Any, Any] = {} self.cache: Dict[Any, Any] = {}
self.keyword_cache: Dict[str, int] = {} self.keyword_cache: Dict[str, int] = {}
def keyword_helper(self, keyword: str) -> Tuple[str, str]: def keyword_helper(self, keyword: str) -> FunctionCall:
if keyword not in self.keyword_cache: if keyword not in self.keyword_cache:
self.keyword_cache[keyword] = self.gen.keyword_type() self.keyword_cache[keyword] = self.gen.keyword_type()
return "keyword", f"_PyPegen_expect_token(p, {self.keyword_cache[keyword]})" return FunctionCall(
assigned_variable="keyword",
function="_PyPegen_expect_token",
arguments=["p", self.keyword_cache[keyword]],
nodetype=NodeTypes.KEYWORD,
)
def visit_NameLeaf(self, node: NameLeaf) -> Tuple[str, str]: def visit_NameLeaf(self, node: NameLeaf) -> FunctionCall:
name = node.value name = node.value
if name in self.non_exact_tokens: if name in self.non_exact_tokens:
name = name.lower() if name in BASE_NODETYPES:
return f"{name}_var", f"_PyPegen_{name}_token(p)" return FunctionCall(
return f"{name}_var", f"{name}_rule(p)" assigned_variable=f"{name.lower()}_var",
function=f"_PyPegen_{name.lower()}_token",
arguments=["p"],
nodetype=BASE_NODETYPES[name],
metadata={"rulename": name.lower()},
)
return FunctionCall(
assigned_variable=f"{name.lower()}_var",
function=f"_PyPegen_expect_token",
arguments=["p", name],
nodetype=NodeTypes.GENERIC_TOKEN,
metadata={"rulename": name.lower()},
)
def visit_StringLeaf(self, node: StringLeaf) -> Tuple[str, str]: return FunctionCall(
assigned_variable=f"{name}_var",
function=f"{name}_rule",
arguments=["p"],
metadata={"rulename": name.lower()},
)
def visit_StringLeaf(self, node: StringLeaf) -> FunctionCall:
val = ast.literal_eval(node.value) val = ast.literal_eval(node.value)
if re.match(r"[a-zA-Z_]\w*\Z", val): # This is a keyword if re.match(r"[a-zA-Z_]\w*\Z", val): # This is a keyword
return self.keyword_helper(val) return self.keyword_helper(val)
else: else:
assert val in self.exact_tokens, f"{node.value} is not a known literal" assert val in self.exact_tokens, f"{node.value} is not a known literal"
type = self.exact_tokens[val] type = self.exact_tokens[val]
return "literal", f"_PyPegen_expect_token(p, {type})" return FunctionCall(
assigned_variable="literal",
function=f"_PyPegen_expect_token",
arguments=["p", type],
nodetype=NodeTypes.GENERIC_TOKEN,
)
def visit_Rhs(self, node: Rhs) -> Tuple[Optional[str], str]: def visit_Rhs(self, node: Rhs) -> FunctionCall:
def can_we_inline(node: Rhs) -> int: def can_we_inline(node: Rhs) -> int:
if len(node.alts) != 1 or len(node.alts[0].items) != 1: if len(node.alts) != 1 or len(node.alts[0].items) != 1:
return False return False
@ -90,65 +159,96 @@ class CCallMakerVisitor(GrammarVisitor):
self.cache[node] = self.visit(node.alts[0].items[0]) self.cache[node] = self.visit(node.alts[0].items[0])
else: else:
name = self.gen.name_node(node) name = self.gen.name_node(node)
self.cache[node] = f"{name}_var", f"{name}_rule(p)" self.cache[node] = FunctionCall(
assigned_variable=f"{name}_var",
function=f"{name}_rule",
arguments=["p"],
metadata={"rulename": name},
)
return self.cache[node] return self.cache[node]
def visit_NamedItem(self, node: NamedItem) -> Tuple[Optional[str], str]: def visit_NamedItem(self, node: NamedItem) -> FunctionCall:
name, call = self.visit(node.item) call = self.visit(node.item)
if node.name: if node.name:
name = node.name call.assigned_variable = node.name
return name, call return call
def lookahead_call_helper(self, node: Lookahead, positive: int) -> Tuple[None, str]: def lookahead_call_helper(self, node: Lookahead, positive: int) -> FunctionCall:
name, call = self.visit(node.node) call = self.visit(node.node)
func, args = call.split("(", 1) if call.nodetype == NodeTypes.NAME_TOKEN:
assert args[-1] == ")" return FunctionCall(
args = args[:-1] function=f"_PyPegen_lookahead_with_name",
if "name_token" in call: arguments=[positive, call.function, *call.arguments],
return None, f"_PyPegen_lookahead_with_name({positive}, {func}, {args})" )
elif not args.startswith("p,"): elif call.nodetype in {NodeTypes.GENERIC_TOKEN, NodeTypes.KEYWORD}:
return None, f"_PyPegen_lookahead({positive}, {func}, {args})" return FunctionCall(
elif args[2:].strip().isalnum(): function=f"_PyPegen_lookahead_with_int",
return None, f"_PyPegen_lookahead_with_int({positive}, {func}, {args})" arguments=[positive, call.function, *call.arguments],
)
else: else:
return None, f"_PyPegen_lookahead_with_string({positive}, {func}, {args})" return FunctionCall(
function=f"_PyPegen_lookahead",
arguments=[positive, call.function, *call.arguments],
)
def visit_PositiveLookahead(self, node: PositiveLookahead) -> Tuple[None, str]: def visit_PositiveLookahead(self, node: PositiveLookahead) -> FunctionCall:
return self.lookahead_call_helper(node, 1) return self.lookahead_call_helper(node, 1)
def visit_NegativeLookahead(self, node: NegativeLookahead) -> Tuple[None, str]: def visit_NegativeLookahead(self, node: NegativeLookahead) -> FunctionCall:
return self.lookahead_call_helper(node, 0) return self.lookahead_call_helper(node, 0)
def visit_Opt(self, node: Opt) -> Tuple[str, str]: def visit_Opt(self, node: Opt) -> FunctionCall:
name, call = self.visit(node.node) call = self.visit(node.node)
return "opt_var", f"{call}, 1" # Using comma operator! return FunctionCall(
assigned_variable="opt_var",
function=call.function,
arguments=call.arguments,
force_true=True,
)
def visit_Repeat0(self, node: Repeat0) -> Tuple[str, str]: def visit_Repeat0(self, node: Repeat0) -> FunctionCall:
if node in self.cache: if node in self.cache:
return self.cache[node] return self.cache[node]
name = self.gen.name_loop(node.node, False) name = self.gen.name_loop(node.node, False)
self.cache[node] = f"{name}_var", f"{name}_rule(p)" self.cache[node] = FunctionCall(
assigned_variable=f"{name}_var",
function=f"{name}_rule",
arguments=["p"],
metadata={"rulename": name},
)
return self.cache[node] return self.cache[node]
def visit_Repeat1(self, node: Repeat1) -> Tuple[str, str]: def visit_Repeat1(self, node: Repeat1) -> FunctionCall:
if node in self.cache: if node in self.cache:
return self.cache[node] return self.cache[node]
name = self.gen.name_loop(node.node, True) name = self.gen.name_loop(node.node, True)
self.cache[node] = f"{name}_var", f"{name}_rule(p)" self.cache[node] = FunctionCall(
assigned_variable=f"{name}_var",
function=f"{name}_rule",
arguments=["p"],
metadata={"rulename": name},
)
return self.cache[node] return self.cache[node]
def visit_Gather(self, node: Gather) -> Tuple[str, str]: def visit_Gather(self, node: Gather) -> FunctionCall:
if node in self.cache: if node in self.cache:
return self.cache[node] return self.cache[node]
name = self.gen.name_gather(node) name = self.gen.name_gather(node)
self.cache[node] = f"{name}_var", f"{name}_rule(p)" self.cache[node] = FunctionCall(
assigned_variable=f"{name}_var",
function=f"{name}_rule",
arguments=["p"],
metadata={"rulename": name},
)
return self.cache[node] return self.cache[node]
def visit_Group(self, node: Group) -> Tuple[Optional[str], str]: def visit_Group(self, node: Group) -> FunctionCall:
return self.visit(node.rhs) return self.visit(node.rhs)
def visit_Cut(self, node: Cut) -> Tuple[str, str]: def visit_Cut(self, node: Cut) -> FunctionCall:
return "cut_var", "1" return FunctionCall(
assigned_variable="cut_var", function="1", nodetype=NodeTypes.CUT_OPERATOR
)
class CParserGenerator(ParserGenerator, GrammarVisitor): class CParserGenerator(ParserGenerator, GrammarVisitor):
@ -252,7 +352,6 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
mode += 1 mode += 1
modulename = self.grammar.metas.get("modulename", "parse") modulename = self.grammar.metas.get("modulename", "parse")
trailer = self.grammar.metas.get("trailer", EXTENSION_SUFFIX) trailer = self.grammar.metas.get("trailer", EXTENSION_SUFFIX)
keyword_cache = self.callmakervisitor.keyword_cache
if trailer: if trailer:
self.print(trailer.rstrip("\n") % dict(mode=mode, modulename=modulename)) self.print(trailer.rstrip("\n") % dict(mode=mode, modulename=modulename))
@ -448,13 +547,11 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
self._handle_default_rule_body(node, rhs, result_type) self._handle_default_rule_body(node, rhs, result_type)
self.print("}") self.print("}")
def visit_NamedItem(self, node: NamedItem, names: List[str]) -> None: def visit_NamedItem(self, node: NamedItem) -> None:
name, call = self.callmakervisitor.visit(node) call = self.callmakervisitor.visit(node)
if not name: if call.assigned_variable:
self.print(call) call.assigned_variable = self.dedupe(call.assigned_variable)
else: self.print(call)
name = dedupe(name, names)
self.print(f"({name} = {call})")
def visit_Rhs( def visit_Rhs(
self, node: Rhs, is_loop: bool, is_gather: bool, rulename: Optional[str] self, node: Rhs, is_loop: bool, is_gather: bool, rulename: Optional[str]
@ -464,7 +561,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
for alt in node.alts: for alt in node.alts:
self.visit(alt, is_loop=is_loop, is_gather=is_gather, rulename=rulename) self.visit(alt, is_loop=is_loop, is_gather=is_gather, rulename=rulename)
def join_conditions(self, keyword: str, node: Any, names: List[str]) -> None: def join_conditions(self, keyword: str, node: Any) -> None:
self.print(f"{keyword} (") self.print(f"{keyword} (")
with self.indent(): with self.indent():
first = True first = True
@ -473,7 +570,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
first = False first = False
else: else:
self.print("&&") self.print("&&")
self.visit(item, names=names) self.visit(item)
self.print(")") self.print(")")
def emit_action(self, node: Alt, cleanup_code: Optional[str] = None) -> None: def emit_action(self, node: Alt, cleanup_code: Optional[str] = None) -> None:
@ -492,29 +589,34 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
f'fprintf(stderr, "Hit with action [%d-%d]: %s\\n", mark, p->mark, "{node}");' f'fprintf(stderr, "Hit with action [%d-%d]: %s\\n", mark, p->mark, "{node}");'
) )
def emit_default_action(self, is_gather: bool, names: List[str], node: Alt) -> None: def emit_default_action(self, is_gather: bool, node: Alt) -> None:
if len(names) > 1: if len(self.local_variable_names) > 1:
if is_gather: if is_gather:
assert len(names) == 2 assert len(self.local_variable_names) == 2
self.print(f"res = _PyPegen_seq_insert_in_front(p, {names[0]}, {names[1]});") self.print(
f"res = _PyPegen_seq_insert_in_front(p, "
f"{self.local_variable_names[0]}, {self.local_variable_names[1]});"
)
else: else:
if self.debug: if self.debug:
self.print( self.print(
f'fprintf(stderr, "Hit without action [%d:%d]: %s\\n", mark, p->mark, "{node}");' f'fprintf(stderr, "Hit without action [%d:%d]: %s\\n", mark, p->mark, "{node}");'
) )
self.print(f"res = _PyPegen_dummy_name(p, {', '.join(names)});") self.print(
f"res = _PyPegen_dummy_name(p, {', '.join(self.local_variable_names)});"
)
else: else:
if self.debug: if self.debug:
self.print( self.print(
f'fprintf(stderr, "Hit with default action [%d:%d]: %s\\n", mark, p->mark, "{node}");' f'fprintf(stderr, "Hit with default action [%d:%d]: %s\\n", mark, p->mark, "{node}");'
) )
self.print(f"res = {names[0]};") self.print(f"res = {self.local_variable_names[0]};")
def emit_dummy_action(self) -> None: def emit_dummy_action(self) -> None:
self.print(f"res = _PyPegen_dummy_name(p);") self.print(f"res = _PyPegen_dummy_name(p);")
def handle_alt_normal(self, node: Alt, is_gather: bool, names: List[str]) -> None: def handle_alt_normal(self, node: Alt, is_gather: bool) -> None:
self.join_conditions(keyword="if", node=node, names=names) self.join_conditions(keyword="if", node=node)
self.print("{") self.print("{")
# We have parsed successfully all the conditions for the option. # We have parsed successfully all the conditions for the option.
with self.indent(): with self.indent():
@ -526,17 +628,15 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
elif node.action: elif node.action:
self.emit_action(node) self.emit_action(node)
else: else:
self.emit_default_action(is_gather, names, node) self.emit_default_action(is_gather, node)
# As the current option has parsed correctly, do not continue with the rest. # As the current option has parsed correctly, do not continue with the rest.
self.print(f"goto done;") self.print(f"goto done;")
self.print("}") self.print("}")
def handle_alt_loop( def handle_alt_loop(self, node: Alt, is_gather: bool, rulename: Optional[str]) -> None:
self, node: Alt, is_gather: bool, rulename: Optional[str], names: List[str]
) -> None:
# Condition of the main body of the alternative # Condition of the main body of the alternative
self.join_conditions(keyword="while", node=node, names=names) self.join_conditions(keyword="while", node=node)
self.print("{") self.print("{")
# We have parsed successfully one item! # We have parsed successfully one item!
with self.indent(): with self.indent():
@ -548,7 +648,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
elif node.action: elif node.action:
self.emit_action(node, cleanup_code="PyMem_Free(children);") self.emit_action(node, cleanup_code="PyMem_Free(children);")
else: else:
self.emit_default_action(is_gather, names, node) self.emit_default_action(is_gather, node)
# Add the result of rule to the temporary buffer of children. This buffer # Add the result of rule to the temporary buffer of children. This buffer
# will populate later an asdl_seq with all elements to return. # will populate later an asdl_seq with all elements to return.
@ -580,47 +680,45 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
if v == "opt_var": if v == "opt_var":
self.print("UNUSED(opt_var); // Silence compiler warnings") self.print("UNUSED(opt_var); // Silence compiler warnings")
names: List[str] = [] with self.local_variable_context():
if is_loop: if is_loop:
self.handle_alt_loop(node, is_gather, rulename, names) self.handle_alt_loop(node, is_gather, rulename)
else: else:
self.handle_alt_normal(node, is_gather, names) self.handle_alt_normal(node, is_gather)
self.print("p->mark = mark;") self.print("p->mark = mark;")
if "cut_var" in names: if "cut_var" in vars:
self.print("if (cut_var) return NULL;") self.print("if (cut_var) return NULL;")
self.print("}") self.print("}")
def collect_vars(self, node: Alt) -> Dict[str, Optional[str]]: def collect_vars(self, node: Alt) -> Dict[Optional[str], Optional[str]]:
names: List[str] = []
types = {} types = {}
for item in node.items: with self.local_variable_context():
name, type = self.add_var(item, names) for item in node.items:
types[name] = type name, type = self.add_var(item)
types[name] = type
return types return types
def add_var(self, node: NamedItem, names: List[str]) -> Tuple[str, Optional[str]]: def add_var(self, node: NamedItem) -> Tuple[Optional[str], Optional[str]]:
name: str call = self.callmakervisitor.visit(node.item)
call: str if not call.assigned_variable:
name, call = self.callmakervisitor.visit(node.item) return None, None
type = None if call.nodetype == NodeTypes.CUT_OPERATOR:
if not name: return call.assigned_variable, "int"
return name, type
if name.startswith("cut"): name = call.assigned_variable
return name, "int" rulename = call.metadata.get("rulename")
if name.endswith("_var"):
rulename = name[:-4] type: Optional[str] = None
rule = self.rules.get(rulename)
if rule is not None: assert self.all_rules is not None
if rule.is_loop() or rule.is_gather(): if rulename and rulename in self.all_rules:
type = "asdl_seq *" rule = self.all_rules.get(rulename)
else: if rule.is_loop() or rule.is_gather():
type = rule.type
elif name.startswith("_loop") or name.startswith("_gather"):
type = "asdl_seq *" type = "asdl_seq *"
elif name in ("name_var", "string_var", "number_var"): else:
type = "expr_ty" type = rule.type
if node.name: elif call.nodetype in BASE_NODETYPES.values():
name = node.name type = "expr_ty"
name = dedupe(name, names)
return name, type return self.dedupe(node.name if node.name else call.assigned_variable), type

View File

@ -13,7 +13,6 @@ from pegen.grammar import (
NamedItem, NamedItem,
Plain, Plain,
NameLeaf, NameLeaf,
StringLeaf,
Gather, Gather,
) )
from pegen.grammar import GrammarError, GrammarVisitor from pegen.grammar import GrammarError, GrammarVisitor
@ -48,6 +47,18 @@ class ParserGenerator:
self.todo = self.rules.copy() # Rules to generate self.todo = self.rules.copy() # Rules to generate
self.counter = 0 # For name_rule()/name_loop() self.counter = 0 # For name_rule()/name_loop()
self.keyword_counter = 499 # For keyword_type() self.keyword_counter = 499 # For keyword_type()
self.all_rules: Optional[Dict[str, Rule]] = None # Rules + temporal rules
self._local_variable_stack: List[List[str]] = []
@contextlib.contextmanager
def local_variable_context(self) -> Iterator[None]:
self._local_variable_stack.append([])
yield
self._local_variable_stack.pop()
@property
def local_variable_names(self) -> List[str]:
return self._local_variable_stack[-1]
@abstractmethod @abstractmethod
def generate(self, filename: str) -> None: def generate(self, filename: str) -> None:
@ -82,6 +93,7 @@ class ParserGenerator:
for rulename in todo: for rulename in todo:
self.todo[rulename].collect_todo(self) self.todo[rulename].collect_todo(self)
done = set(alltodo) done = set(alltodo)
self.all_rules = self.todo.copy()
def keyword_type(self) -> int: def keyword_type(self) -> int:
self.keyword_counter += 1 self.keyword_counter += 1
@ -109,26 +121,23 @@ class ParserGenerator:
self.counter += 1 self.counter += 1
extra_function_name = f"_loop0_{self.counter}" extra_function_name = f"_loop0_{self.counter}"
extra_function_alt = Alt( extra_function_alt = Alt(
[NamedItem(None, node.separator), NamedItem("elem", node.node),], action="elem", [NamedItem(None, node.separator), NamedItem("elem", node.node)], action="elem",
) )
self.todo[extra_function_name] = Rule( self.todo[extra_function_name] = Rule(
extra_function_name, None, Rhs([extra_function_alt]), extra_function_name, None, Rhs([extra_function_alt]),
) )
alt = Alt( alt = Alt([NamedItem("elem", node.node), NamedItem("seq", NameLeaf(extra_function_name))],)
[NamedItem("elem", node.node), NamedItem("seq", NameLeaf(extra_function_name)),],
)
self.todo[name] = Rule(name, None, Rhs([alt]),) self.todo[name] = Rule(name, None, Rhs([alt]),)
return name return name
def dedupe(self, name: str) -> str:
def dedupe(name: str, names: List[str]) -> str: origname = name
origname = name counter = 0
counter = 0 while name in self.local_variable_names:
while name in names: counter += 1
counter += 1 name = f"{origname}_{counter}"
name = f"{origname}_{counter}" self.local_variable_names.append(name)
names.append(name) return name
return name
def compute_nullables(rules: Dict[str, Rule]) -> None: def compute_nullables(rules: Dict[str, Rule]) -> None:
@ -153,13 +162,13 @@ def compute_left_recursives(
leaders = set(scc) leaders = set(scc)
for start in scc: for start in scc:
for cycle in sccutils.find_cycles_in_scc(graph, scc, start): for cycle in sccutils.find_cycles_in_scc(graph, scc, start):
## print("Cycle:", " -> ".join(cycle)) # print("Cycle:", " -> ".join(cycle))
leaders -= scc - set(cycle) leaders -= scc - set(cycle)
if not leaders: if not leaders:
raise ValueError( raise ValueError(
f"SCC {scc} has no leadership candidate (no element is included in all cycles)" f"SCC {scc} has no leadership candidate (no element is included in all cycles)"
) )
## print("Leaders:", leaders) # print("Leaders:", leaders)
leader = min(leaders) # Pick an arbitrary leader from the candidates. leader = min(leaders) # Pick an arbitrary leader from the candidates.
rules[leader].leader = True rules[leader].leader = True
else: else:

View File

@ -1,4 +1,4 @@
from typing import Any, Dict, List, Optional, IO, Text, Tuple from typing import Any, Dict, Optional, IO, Text, Tuple
from pegen.grammar import ( from pegen.grammar import (
Cut, Cut,
@ -19,7 +19,7 @@ from pegen.grammar import (
Alt, Alt,
) )
from pegen import grammar from pegen import grammar
from pegen.parser_generator import dedupe, ParserGenerator from pegen.parser_generator import ParserGenerator
MODULE_PREFIX = """\ MODULE_PREFIX = """\
#!/usr/bin/env python3.8 #!/usr/bin/env python3.8
@ -173,7 +173,7 @@ class PythonParserGenerator(ParserGenerator, GrammarVisitor):
else: else:
self.print("return None") self.print("return None")
def visit_NamedItem(self, node: NamedItem, names: List[str]) -> None: def visit_NamedItem(self, node: NamedItem) -> None:
name, call = self.callmakervisitor.visit(node.item) name, call = self.callmakervisitor.visit(node.item)
if node.name: if node.name:
name = node.name name = node.name
@ -181,7 +181,7 @@ class PythonParserGenerator(ParserGenerator, GrammarVisitor):
self.print(call) self.print(call)
else: else:
if name != "cut": if name != "cut":
name = dedupe(name, names) name = self.dedupe(name)
self.print(f"({name} := {call})") self.print(f"({name} := {call})")
def visit_Rhs(self, node: Rhs, is_loop: bool = False, is_gather: bool = False) -> None: def visit_Rhs(self, node: Rhs, is_loop: bool = False, is_gather: bool = False) -> None:
@ -191,34 +191,36 @@ class PythonParserGenerator(ParserGenerator, GrammarVisitor):
self.visit(alt, is_loop=is_loop, is_gather=is_gather) self.visit(alt, is_loop=is_loop, is_gather=is_gather)
def visit_Alt(self, node: Alt, is_loop: bool, is_gather: bool) -> None: def visit_Alt(self, node: Alt, is_loop: bool, is_gather: bool) -> None:
names: List[str] = [] with self.local_variable_context():
self.print("cut = False") # TODO: Only if needed. self.print("cut = False") # TODO: Only if needed.
if is_loop:
self.print("while (")
else:
self.print("if (")
with self.indent():
first = True
for item in node.items:
if first:
first = False
else:
self.print("and")
self.visit(item, names=names)
self.print("):")
with self.indent():
action = node.action
if not action:
if is_gather:
assert len(names) == 2
action = f"[{names[0]}] + {names[1]}"
else:
action = f"[{', '.join(names)}]"
if is_loop: if is_loop:
self.print(f"children.append({action})") self.print("while (")
self.print(f"mark = self.mark()")
else: else:
self.print(f"return {action}") self.print("if (")
self.print("self.reset(mark)") with self.indent():
# Skip remaining alternatives if a cut was reached. first = True
self.print("if cut: return None") # TODO: Only if needed. for item in node.items:
if first:
first = False
else:
self.print("and")
self.visit(item)
self.print("):")
with self.indent():
action = node.action
if not action:
if is_gather:
assert len(self.local_variable_names) == 2
action = (
f"[{self.local_variable_names[0]}] + {self.local_variable_names[1]}"
)
else:
action = f"[{', '.join(self.local_variable_names)}]"
if is_loop:
self.print(f"children.append({action})")
self.print(f"mark = self.mark()")
else:
self.print(f"return {action}")
self.print("self.reset(mark)")
# Skip remaining alternatives if a cut was reached.
self.print("if cut: return None") # TODO: Only if needed.