bpo-40334: refactor and cleanup for the PEG generators (GH-19775)
This commit is contained in:
parent
9b64ef3ac7
commit
4db245ee9d
|
@ -648,7 +648,7 @@ file_rule(Parser *p)
|
||||||
if (
|
if (
|
||||||
(a = statements_rule(p), 1)
|
(a = statements_rule(p), 1)
|
||||||
&&
|
&&
|
||||||
(endmarker_var = _PyPegen_endmarker_token(p))
|
(endmarker_var = _PyPegen_expect_token(p, ENDMARKER))
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
res = Module ( a , NULL , p -> arena );
|
res = Module ( a , NULL , p -> arena );
|
||||||
|
@ -712,7 +712,7 @@ eval_rule(Parser *p)
|
||||||
&&
|
&&
|
||||||
(_loop0_1_var = _loop0_1_rule(p))
|
(_loop0_1_var = _loop0_1_rule(p))
|
||||||
&&
|
&&
|
||||||
(endmarker_var = _PyPegen_endmarker_token(p))
|
(endmarker_var = _PyPegen_expect_token(p, ENDMARKER))
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
res = Expression ( a , p -> arena );
|
res = Expression ( a , p -> arena );
|
||||||
|
@ -846,7 +846,7 @@ statement_newline_rule(Parser *p)
|
||||||
if (
|
if (
|
||||||
(a = compound_stmt_rule(p))
|
(a = compound_stmt_rule(p))
|
||||||
&&
|
&&
|
||||||
(newline_var = _PyPegen_newline_token(p))
|
(newline_var = _PyPegen_expect_token(p, NEWLINE))
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
res = _PyPegen_singleton_seq ( p , a );
|
res = _PyPegen_singleton_seq ( p , a );
|
||||||
|
@ -872,7 +872,7 @@ statement_newline_rule(Parser *p)
|
||||||
{ // NEWLINE
|
{ // NEWLINE
|
||||||
void *newline_var;
|
void *newline_var;
|
||||||
if (
|
if (
|
||||||
(newline_var = _PyPegen_newline_token(p))
|
(newline_var = _PyPegen_expect_token(p, NEWLINE))
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
Token *token = _PyPegen_get_last_nonnwhitespace_token(p);
|
Token *token = _PyPegen_get_last_nonnwhitespace_token(p);
|
||||||
|
@ -895,7 +895,7 @@ statement_newline_rule(Parser *p)
|
||||||
{ // $
|
{ // $
|
||||||
void *endmarker_var;
|
void *endmarker_var;
|
||||||
if (
|
if (
|
||||||
(endmarker_var = _PyPegen_endmarker_token(p))
|
(endmarker_var = _PyPegen_expect_token(p, ENDMARKER))
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
res = _PyPegen_interactive_exit ( p );
|
res = _PyPegen_interactive_exit ( p );
|
||||||
|
@ -929,7 +929,7 @@ simple_stmt_rule(Parser *p)
|
||||||
&&
|
&&
|
||||||
_PyPegen_lookahead_with_int(0, _PyPegen_expect_token, p, 13)
|
_PyPegen_lookahead_with_int(0, _PyPegen_expect_token, p, 13)
|
||||||
&&
|
&&
|
||||||
(newline_var = _PyPegen_newline_token(p))
|
(newline_var = _PyPegen_expect_token(p, NEWLINE))
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
res = _PyPegen_singleton_seq ( p , a );
|
res = _PyPegen_singleton_seq ( p , a );
|
||||||
|
@ -951,7 +951,7 @@ simple_stmt_rule(Parser *p)
|
||||||
&&
|
&&
|
||||||
(opt_var = _PyPegen_expect_token(p, 13), 1)
|
(opt_var = _PyPegen_expect_token(p, 13), 1)
|
||||||
&&
|
&&
|
||||||
(newline_var = _PyPegen_newline_token(p))
|
(newline_var = _PyPegen_expect_token(p, NEWLINE))
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
res = a;
|
res = a;
|
||||||
|
@ -2684,7 +2684,7 @@ for_stmt_rule(Parser *p)
|
||||||
void *literal;
|
void *literal;
|
||||||
expr_ty t;
|
expr_ty t;
|
||||||
if (
|
if (
|
||||||
(is_async = _PyPegen_async_token(p), 1)
|
(is_async = _PyPegen_expect_token(p, ASYNC), 1)
|
||||||
&&
|
&&
|
||||||
(keyword = _PyPegen_expect_token(p, 517))
|
(keyword = _PyPegen_expect_token(p, 517))
|
||||||
&&
|
&&
|
||||||
|
@ -2751,7 +2751,7 @@ with_stmt_rule(Parser *p)
|
||||||
void *literal_1;
|
void *literal_1;
|
||||||
void *literal_2;
|
void *literal_2;
|
||||||
if (
|
if (
|
||||||
(is_async = _PyPegen_async_token(p), 1)
|
(is_async = _PyPegen_expect_token(p, ASYNC), 1)
|
||||||
&&
|
&&
|
||||||
(keyword = _PyPegen_expect_token(p, 519))
|
(keyword = _PyPegen_expect_token(p, 519))
|
||||||
&&
|
&&
|
||||||
|
@ -2790,7 +2790,7 @@ with_stmt_rule(Parser *p)
|
||||||
void *keyword;
|
void *keyword;
|
||||||
void *literal;
|
void *literal;
|
||||||
if (
|
if (
|
||||||
(is_async = _PyPegen_async_token(p), 1)
|
(is_async = _PyPegen_expect_token(p, ASYNC), 1)
|
||||||
&&
|
&&
|
||||||
(keyword = _PyPegen_expect_token(p, 519))
|
(keyword = _PyPegen_expect_token(p, 519))
|
||||||
&&
|
&&
|
||||||
|
@ -3263,7 +3263,7 @@ function_def_raw_rule(Parser *p)
|
||||||
expr_ty n;
|
expr_ty n;
|
||||||
void *params;
|
void *params;
|
||||||
if (
|
if (
|
||||||
(is_async = _PyPegen_async_token(p), 1)
|
(is_async = _PyPegen_expect_token(p, ASYNC), 1)
|
||||||
&&
|
&&
|
||||||
(keyword = _PyPegen_expect_token(p, 522))
|
(keyword = _PyPegen_expect_token(p, 522))
|
||||||
&&
|
&&
|
||||||
|
@ -4002,13 +4002,13 @@ block_rule(Parser *p)
|
||||||
void *indent_var;
|
void *indent_var;
|
||||||
void *newline_var;
|
void *newline_var;
|
||||||
if (
|
if (
|
||||||
(newline_var = _PyPegen_newline_token(p))
|
(newline_var = _PyPegen_expect_token(p, NEWLINE))
|
||||||
&&
|
&&
|
||||||
(indent_var = _PyPegen_indent_token(p))
|
(indent_var = _PyPegen_expect_token(p, INDENT))
|
||||||
&&
|
&&
|
||||||
(a = statements_rule(p))
|
(a = statements_rule(p))
|
||||||
&&
|
&&
|
||||||
(dedent_var = _PyPegen_dedent_token(p))
|
(dedent_var = _PyPegen_expect_token(p, DEDENT))
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
res = a;
|
res = a;
|
||||||
|
@ -6754,7 +6754,7 @@ await_primary_rule(Parser *p)
|
||||||
expr_ty a;
|
expr_ty a;
|
||||||
void *await_var;
|
void *await_var;
|
||||||
if (
|
if (
|
||||||
(await_var = _PyPegen_await_token(p))
|
(await_var = _PyPegen_expect_token(p, AWAIT))
|
||||||
&&
|
&&
|
||||||
(a = primary_rule(p))
|
(a = primary_rule(p))
|
||||||
)
|
)
|
||||||
|
@ -9919,9 +9919,9 @@ invalid_block_rule(Parser *p)
|
||||||
{ // NEWLINE !INDENT
|
{ // NEWLINE !INDENT
|
||||||
void *newline_var;
|
void *newline_var;
|
||||||
if (
|
if (
|
||||||
(newline_var = _PyPegen_newline_token(p))
|
(newline_var = _PyPegen_expect_token(p, NEWLINE))
|
||||||
&&
|
&&
|
||||||
_PyPegen_lookahead(0, _PyPegen_indent_token, p)
|
_PyPegen_lookahead_with_int(0, _PyPegen_expect_token, p, INDENT)
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
res = RAISE_INDENTATION_ERROR ( "expected an indented block" );
|
res = RAISE_INDENTATION_ERROR ( "expected an indented block" );
|
||||||
|
@ -10036,7 +10036,7 @@ _loop0_1_rule(Parser *p)
|
||||||
{ // NEWLINE
|
{ // NEWLINE
|
||||||
void *newline_var;
|
void *newline_var;
|
||||||
while (
|
while (
|
||||||
(newline_var = _PyPegen_newline_token(p))
|
(newline_var = _PyPegen_expect_token(p, NEWLINE))
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
res = newline_var;
|
res = newline_var;
|
||||||
|
@ -10273,7 +10273,7 @@ _tmp_6_rule(Parser *p)
|
||||||
{ // ASYNC
|
{ // ASYNC
|
||||||
void *async_var;
|
void *async_var;
|
||||||
if (
|
if (
|
||||||
(async_var = _PyPegen_async_token(p))
|
(async_var = _PyPegen_expect_token(p, ASYNC))
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
res = async_var;
|
res = async_var;
|
||||||
|
@ -10345,7 +10345,7 @@ _tmp_8_rule(Parser *p)
|
||||||
{ // ASYNC
|
{ // ASYNC
|
||||||
void *async_var;
|
void *async_var;
|
||||||
if (
|
if (
|
||||||
(async_var = _PyPegen_async_token(p))
|
(async_var = _PyPegen_expect_token(p, ASYNC))
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
res = async_var;
|
res = async_var;
|
||||||
|
@ -10381,7 +10381,7 @@ _tmp_9_rule(Parser *p)
|
||||||
{ // ASYNC
|
{ // ASYNC
|
||||||
void *async_var;
|
void *async_var;
|
||||||
if (
|
if (
|
||||||
(async_var = _PyPegen_async_token(p))
|
(async_var = _PyPegen_expect_token(p, ASYNC))
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
res = async_var;
|
res = async_var;
|
||||||
|
@ -15068,7 +15068,7 @@ _tmp_128_rule(Parser *p)
|
||||||
&&
|
&&
|
||||||
(f = named_expression_rule(p))
|
(f = named_expression_rule(p))
|
||||||
&&
|
&&
|
||||||
(newline_var = _PyPegen_newline_token(p))
|
(newline_var = _PyPegen_expect_token(p, NEWLINE))
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
res = f;
|
res = f;
|
||||||
|
@ -15257,7 +15257,7 @@ _tmp_134_rule(Parser *p)
|
||||||
void *keyword_1;
|
void *keyword_1;
|
||||||
void *y;
|
void *y;
|
||||||
if (
|
if (
|
||||||
(y = _PyPegen_async_token(p), 1)
|
(y = _PyPegen_expect_token(p, ASYNC), 1)
|
||||||
&&
|
&&
|
||||||
(keyword = _PyPegen_expect_token(p, 517))
|
(keyword = _PyPegen_expect_token(p, 517))
|
||||||
&&
|
&&
|
||||||
|
|
|
@ -692,16 +692,6 @@ _PyPegen_lookahead_with_name(int positive, expr_ty (func)(Parser *), Parser *p)
|
||||||
return (res != NULL) == positive;
|
return (res != NULL) == positive;
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
|
||||||
_PyPegen_lookahead_with_string(int positive, void *(func)(Parser *, const char *), Parser *p,
|
|
||||||
const char *arg)
|
|
||||||
{
|
|
||||||
int mark = p->mark;
|
|
||||||
void *res = func(p, arg);
|
|
||||||
p->mark = mark;
|
|
||||||
return (res != NULL) == positive;
|
|
||||||
}
|
|
||||||
|
|
||||||
int
|
int
|
||||||
_PyPegen_lookahead_with_int(int positive, Token *(func)(Parser *, int), Parser *p, int arg)
|
_PyPegen_lookahead_with_int(int positive, Token *(func)(Parser *, int), Parser *p, int arg)
|
||||||
{
|
{
|
||||||
|
@ -751,24 +741,6 @@ _PyPegen_get_last_nonnwhitespace_token(Parser *p)
|
||||||
return token;
|
return token;
|
||||||
}
|
}
|
||||||
|
|
||||||
void *
|
|
||||||
_PyPegen_async_token(Parser *p)
|
|
||||||
{
|
|
||||||
return _PyPegen_expect_token(p, ASYNC);
|
|
||||||
}
|
|
||||||
|
|
||||||
void *
|
|
||||||
_PyPegen_await_token(Parser *p)
|
|
||||||
{
|
|
||||||
return _PyPegen_expect_token(p, AWAIT);
|
|
||||||
}
|
|
||||||
|
|
||||||
void *
|
|
||||||
_PyPegen_endmarker_token(Parser *p)
|
|
||||||
{
|
|
||||||
return _PyPegen_expect_token(p, ENDMARKER);
|
|
||||||
}
|
|
||||||
|
|
||||||
expr_ty
|
expr_ty
|
||||||
_PyPegen_name_token(Parser *p)
|
_PyPegen_name_token(Parser *p)
|
||||||
{
|
{
|
||||||
|
@ -794,24 +766,6 @@ _PyPegen_string_token(Parser *p)
|
||||||
return _PyPegen_expect_token(p, STRING);
|
return _PyPegen_expect_token(p, STRING);
|
||||||
}
|
}
|
||||||
|
|
||||||
void *
|
|
||||||
_PyPegen_newline_token(Parser *p)
|
|
||||||
{
|
|
||||||
return _PyPegen_expect_token(p, NEWLINE);
|
|
||||||
}
|
|
||||||
|
|
||||||
void *
|
|
||||||
_PyPegen_indent_token(Parser *p)
|
|
||||||
{
|
|
||||||
return _PyPegen_expect_token(p, INDENT);
|
|
||||||
}
|
|
||||||
|
|
||||||
void *
|
|
||||||
_PyPegen_dedent_token(Parser *p)
|
|
||||||
{
|
|
||||||
return _PyPegen_expect_token(p, DEDENT);
|
|
||||||
}
|
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
parsenumber_raw(const char *s)
|
parsenumber_raw(const char *s)
|
||||||
{
|
{
|
||||||
|
|
|
@ -104,7 +104,6 @@ int _PyPegen_update_memo(Parser *p, int mark, int type, void *node);
|
||||||
int _PyPegen_is_memoized(Parser *p, int type, void *pres);
|
int _PyPegen_is_memoized(Parser *p, int type, void *pres);
|
||||||
|
|
||||||
int _PyPegen_lookahead_with_name(int, expr_ty (func)(Parser *), Parser *);
|
int _PyPegen_lookahead_with_name(int, expr_ty (func)(Parser *), Parser *);
|
||||||
int _PyPegen_lookahead_with_string(int, void *(func)(Parser *, const char *), Parser *, const char *);
|
|
||||||
int _PyPegen_lookahead_with_int(int, Token *(func)(Parser *, int), Parser *, int);
|
int _PyPegen_lookahead_with_int(int, Token *(func)(Parser *, int), Parser *, int);
|
||||||
int _PyPegen_lookahead(int, void *(func)(Parser *), Parser *);
|
int _PyPegen_lookahead(int, void *(func)(Parser *), Parser *);
|
||||||
|
|
||||||
|
|
|
@ -33,7 +33,7 @@ dump: peg_extension/parse.c
|
||||||
$(PYTHON) -c "from peg_extension import parse; import ast; t = parse.parse_file('$(TESTFILE)', mode=1); print(ast.dump(t))"
|
$(PYTHON) -c "from peg_extension import parse; import ast; t = parse.parse_file('$(TESTFILE)', mode=1); print(ast.dump(t))"
|
||||||
|
|
||||||
regen-metaparser: pegen/metagrammar.gram pegen/*.py
|
regen-metaparser: pegen/metagrammar.gram pegen/*.py
|
||||||
$(PYTHON) -m pegen -q -c pegen/metagrammar.gram -o pegen/grammar_parser.py
|
$(PYTHON) -m pegen -q python pegen/metagrammar.gram -o pegen/grammar_parser.py
|
||||||
|
|
||||||
# Note: These targets really depend on the generated shared object in peg_extension/parse.*.so but
|
# Note: These targets really depend on the generated shared object in peg_extension/parse.*.so but
|
||||||
# this has different names in different systems so we are abusing the implicit dependency on
|
# this has different names in different systems so we are abusing the implicit dependency on
|
||||||
|
|
|
@ -1,33 +1,36 @@
|
||||||
import ast
|
import ast
|
||||||
|
from dataclasses import dataclass, field
|
||||||
import re
|
import re
|
||||||
from typing import Any, cast, Dict, IO, Optional, List, Text, Tuple, Set
|
from typing import IO, Any, Dict, List, Optional, Set, Text, Tuple
|
||||||
|
from enum import Enum
|
||||||
|
|
||||||
|
from pegen import grammar
|
||||||
from pegen.grammar import (
|
from pegen.grammar import (
|
||||||
Cut,
|
|
||||||
GrammarVisitor,
|
|
||||||
Rhs,
|
|
||||||
Alt,
|
Alt,
|
||||||
|
Cut,
|
||||||
|
Gather,
|
||||||
|
GrammarVisitor,
|
||||||
|
Group,
|
||||||
|
Lookahead,
|
||||||
NamedItem,
|
NamedItem,
|
||||||
NameLeaf,
|
NameLeaf,
|
||||||
StringLeaf,
|
|
||||||
Lookahead,
|
|
||||||
PositiveLookahead,
|
|
||||||
NegativeLookahead,
|
NegativeLookahead,
|
||||||
Opt,
|
Opt,
|
||||||
|
PositiveLookahead,
|
||||||
Repeat0,
|
Repeat0,
|
||||||
Repeat1,
|
Repeat1,
|
||||||
Gather,
|
Rhs,
|
||||||
Group,
|
|
||||||
Rule,
|
Rule,
|
||||||
|
StringLeaf,
|
||||||
)
|
)
|
||||||
from pegen import grammar
|
from pegen.parser_generator import ParserGenerator
|
||||||
from pegen.parser_generator import dedupe, ParserGenerator
|
|
||||||
|
|
||||||
EXTENSION_PREFIX = """\
|
EXTENSION_PREFIX = """\
|
||||||
#include "pegen.h"
|
#include "pegen.h"
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
EXTENSION_SUFFIX = """
|
EXTENSION_SUFFIX = """
|
||||||
void *
|
void *
|
||||||
_PyPegen_parse(Parser *p)
|
_PyPegen_parse(Parser *p)
|
||||||
|
@ -41,6 +44,43 @@ _PyPegen_parse(Parser *p)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class NodeTypes(Enum):
|
||||||
|
NAME_TOKEN = 0
|
||||||
|
NUMBER_TOKEN = 1
|
||||||
|
STRING_TOKEN = 2
|
||||||
|
GENERIC_TOKEN = 3
|
||||||
|
KEYWORD = 4
|
||||||
|
CUT_OPERATOR = 5
|
||||||
|
|
||||||
|
|
||||||
|
BASE_NODETYPES = {
|
||||||
|
"NAME": NodeTypes.NAME_TOKEN,
|
||||||
|
"NUMBER": NodeTypes.NUMBER_TOKEN,
|
||||||
|
"STRING": NodeTypes.STRING_TOKEN,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class FunctionCall:
|
||||||
|
function: str
|
||||||
|
arguments: Optional[List[Any]] = None
|
||||||
|
assigned_variable: Optional[str] = None
|
||||||
|
nodetype: Optional[NodeTypes] = None
|
||||||
|
force_true: bool = False
|
||||||
|
metadata: Dict[str, Any] = field(default_factory=dict)
|
||||||
|
|
||||||
|
def __str__(self) -> str:
|
||||||
|
parts = []
|
||||||
|
parts.append(self.function)
|
||||||
|
if self.arguments:
|
||||||
|
parts.append(f"({', '.join(map(str, self.arguments))})")
|
||||||
|
if self.force_true:
|
||||||
|
parts.append(", 1")
|
||||||
|
if self.assigned_variable:
|
||||||
|
parts = ["(", self.assigned_variable, " = ", *parts, ")"]
|
||||||
|
return "".join(parts)
|
||||||
|
|
||||||
|
|
||||||
class CCallMakerVisitor(GrammarVisitor):
|
class CCallMakerVisitor(GrammarVisitor):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
|
@ -54,28 +94,57 @@ class CCallMakerVisitor(GrammarVisitor):
|
||||||
self.cache: Dict[Any, Any] = {}
|
self.cache: Dict[Any, Any] = {}
|
||||||
self.keyword_cache: Dict[str, int] = {}
|
self.keyword_cache: Dict[str, int] = {}
|
||||||
|
|
||||||
def keyword_helper(self, keyword: str) -> Tuple[str, str]:
|
def keyword_helper(self, keyword: str) -> FunctionCall:
|
||||||
if keyword not in self.keyword_cache:
|
if keyword not in self.keyword_cache:
|
||||||
self.keyword_cache[keyword] = self.gen.keyword_type()
|
self.keyword_cache[keyword] = self.gen.keyword_type()
|
||||||
return "keyword", f"_PyPegen_expect_token(p, {self.keyword_cache[keyword]})"
|
return FunctionCall(
|
||||||
|
assigned_variable="keyword",
|
||||||
|
function="_PyPegen_expect_token",
|
||||||
|
arguments=["p", self.keyword_cache[keyword]],
|
||||||
|
nodetype=NodeTypes.KEYWORD,
|
||||||
|
)
|
||||||
|
|
||||||
def visit_NameLeaf(self, node: NameLeaf) -> Tuple[str, str]:
|
def visit_NameLeaf(self, node: NameLeaf) -> FunctionCall:
|
||||||
name = node.value
|
name = node.value
|
||||||
if name in self.non_exact_tokens:
|
if name in self.non_exact_tokens:
|
||||||
name = name.lower()
|
if name in BASE_NODETYPES:
|
||||||
return f"{name}_var", f"_PyPegen_{name}_token(p)"
|
return FunctionCall(
|
||||||
return f"{name}_var", f"{name}_rule(p)"
|
assigned_variable=f"{name.lower()}_var",
|
||||||
|
function=f"_PyPegen_{name.lower()}_token",
|
||||||
|
arguments=["p"],
|
||||||
|
nodetype=BASE_NODETYPES[name],
|
||||||
|
metadata={"rulename": name.lower()},
|
||||||
|
)
|
||||||
|
return FunctionCall(
|
||||||
|
assigned_variable=f"{name.lower()}_var",
|
||||||
|
function=f"_PyPegen_expect_token",
|
||||||
|
arguments=["p", name],
|
||||||
|
nodetype=NodeTypes.GENERIC_TOKEN,
|
||||||
|
metadata={"rulename": name.lower()},
|
||||||
|
)
|
||||||
|
|
||||||
def visit_StringLeaf(self, node: StringLeaf) -> Tuple[str, str]:
|
return FunctionCall(
|
||||||
|
assigned_variable=f"{name}_var",
|
||||||
|
function=f"{name}_rule",
|
||||||
|
arguments=["p"],
|
||||||
|
metadata={"rulename": name.lower()},
|
||||||
|
)
|
||||||
|
|
||||||
|
def visit_StringLeaf(self, node: StringLeaf) -> FunctionCall:
|
||||||
val = ast.literal_eval(node.value)
|
val = ast.literal_eval(node.value)
|
||||||
if re.match(r"[a-zA-Z_]\w*\Z", val): # This is a keyword
|
if re.match(r"[a-zA-Z_]\w*\Z", val): # This is a keyword
|
||||||
return self.keyword_helper(val)
|
return self.keyword_helper(val)
|
||||||
else:
|
else:
|
||||||
assert val in self.exact_tokens, f"{node.value} is not a known literal"
|
assert val in self.exact_tokens, f"{node.value} is not a known literal"
|
||||||
type = self.exact_tokens[val]
|
type = self.exact_tokens[val]
|
||||||
return "literal", f"_PyPegen_expect_token(p, {type})"
|
return FunctionCall(
|
||||||
|
assigned_variable="literal",
|
||||||
|
function=f"_PyPegen_expect_token",
|
||||||
|
arguments=["p", type],
|
||||||
|
nodetype=NodeTypes.GENERIC_TOKEN,
|
||||||
|
)
|
||||||
|
|
||||||
def visit_Rhs(self, node: Rhs) -> Tuple[Optional[str], str]:
|
def visit_Rhs(self, node: Rhs) -> FunctionCall:
|
||||||
def can_we_inline(node: Rhs) -> int:
|
def can_we_inline(node: Rhs) -> int:
|
||||||
if len(node.alts) != 1 or len(node.alts[0].items) != 1:
|
if len(node.alts) != 1 or len(node.alts[0].items) != 1:
|
||||||
return False
|
return False
|
||||||
|
@ -90,65 +159,96 @@ class CCallMakerVisitor(GrammarVisitor):
|
||||||
self.cache[node] = self.visit(node.alts[0].items[0])
|
self.cache[node] = self.visit(node.alts[0].items[0])
|
||||||
else:
|
else:
|
||||||
name = self.gen.name_node(node)
|
name = self.gen.name_node(node)
|
||||||
self.cache[node] = f"{name}_var", f"{name}_rule(p)"
|
self.cache[node] = FunctionCall(
|
||||||
|
assigned_variable=f"{name}_var",
|
||||||
|
function=f"{name}_rule",
|
||||||
|
arguments=["p"],
|
||||||
|
metadata={"rulename": name},
|
||||||
|
)
|
||||||
return self.cache[node]
|
return self.cache[node]
|
||||||
|
|
||||||
def visit_NamedItem(self, node: NamedItem) -> Tuple[Optional[str], str]:
|
def visit_NamedItem(self, node: NamedItem) -> FunctionCall:
|
||||||
name, call = self.visit(node.item)
|
call = self.visit(node.item)
|
||||||
if node.name:
|
if node.name:
|
||||||
name = node.name
|
call.assigned_variable = node.name
|
||||||
return name, call
|
return call
|
||||||
|
|
||||||
def lookahead_call_helper(self, node: Lookahead, positive: int) -> Tuple[None, str]:
|
def lookahead_call_helper(self, node: Lookahead, positive: int) -> FunctionCall:
|
||||||
name, call = self.visit(node.node)
|
call = self.visit(node.node)
|
||||||
func, args = call.split("(", 1)
|
if call.nodetype == NodeTypes.NAME_TOKEN:
|
||||||
assert args[-1] == ")"
|
return FunctionCall(
|
||||||
args = args[:-1]
|
function=f"_PyPegen_lookahead_with_name",
|
||||||
if "name_token" in call:
|
arguments=[positive, call.function, *call.arguments],
|
||||||
return None, f"_PyPegen_lookahead_with_name({positive}, {func}, {args})"
|
)
|
||||||
elif not args.startswith("p,"):
|
elif call.nodetype in {NodeTypes.GENERIC_TOKEN, NodeTypes.KEYWORD}:
|
||||||
return None, f"_PyPegen_lookahead({positive}, {func}, {args})"
|
return FunctionCall(
|
||||||
elif args[2:].strip().isalnum():
|
function=f"_PyPegen_lookahead_with_int",
|
||||||
return None, f"_PyPegen_lookahead_with_int({positive}, {func}, {args})"
|
arguments=[positive, call.function, *call.arguments],
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
return None, f"_PyPegen_lookahead_with_string({positive}, {func}, {args})"
|
return FunctionCall(
|
||||||
|
function=f"_PyPegen_lookahead",
|
||||||
|
arguments=[positive, call.function, *call.arguments],
|
||||||
|
)
|
||||||
|
|
||||||
def visit_PositiveLookahead(self, node: PositiveLookahead) -> Tuple[None, str]:
|
def visit_PositiveLookahead(self, node: PositiveLookahead) -> FunctionCall:
|
||||||
return self.lookahead_call_helper(node, 1)
|
return self.lookahead_call_helper(node, 1)
|
||||||
|
|
||||||
def visit_NegativeLookahead(self, node: NegativeLookahead) -> Tuple[None, str]:
|
def visit_NegativeLookahead(self, node: NegativeLookahead) -> FunctionCall:
|
||||||
return self.lookahead_call_helper(node, 0)
|
return self.lookahead_call_helper(node, 0)
|
||||||
|
|
||||||
def visit_Opt(self, node: Opt) -> Tuple[str, str]:
|
def visit_Opt(self, node: Opt) -> FunctionCall:
|
||||||
name, call = self.visit(node.node)
|
call = self.visit(node.node)
|
||||||
return "opt_var", f"{call}, 1" # Using comma operator!
|
return FunctionCall(
|
||||||
|
assigned_variable="opt_var",
|
||||||
|
function=call.function,
|
||||||
|
arguments=call.arguments,
|
||||||
|
force_true=True,
|
||||||
|
)
|
||||||
|
|
||||||
def visit_Repeat0(self, node: Repeat0) -> Tuple[str, str]:
|
def visit_Repeat0(self, node: Repeat0) -> FunctionCall:
|
||||||
if node in self.cache:
|
if node in self.cache:
|
||||||
return self.cache[node]
|
return self.cache[node]
|
||||||
name = self.gen.name_loop(node.node, False)
|
name = self.gen.name_loop(node.node, False)
|
||||||
self.cache[node] = f"{name}_var", f"{name}_rule(p)"
|
self.cache[node] = FunctionCall(
|
||||||
|
assigned_variable=f"{name}_var",
|
||||||
|
function=f"{name}_rule",
|
||||||
|
arguments=["p"],
|
||||||
|
metadata={"rulename": name},
|
||||||
|
)
|
||||||
return self.cache[node]
|
return self.cache[node]
|
||||||
|
|
||||||
def visit_Repeat1(self, node: Repeat1) -> Tuple[str, str]:
|
def visit_Repeat1(self, node: Repeat1) -> FunctionCall:
|
||||||
if node in self.cache:
|
if node in self.cache:
|
||||||
return self.cache[node]
|
return self.cache[node]
|
||||||
name = self.gen.name_loop(node.node, True)
|
name = self.gen.name_loop(node.node, True)
|
||||||
self.cache[node] = f"{name}_var", f"{name}_rule(p)"
|
self.cache[node] = FunctionCall(
|
||||||
|
assigned_variable=f"{name}_var",
|
||||||
|
function=f"{name}_rule",
|
||||||
|
arguments=["p"],
|
||||||
|
metadata={"rulename": name},
|
||||||
|
)
|
||||||
return self.cache[node]
|
return self.cache[node]
|
||||||
|
|
||||||
def visit_Gather(self, node: Gather) -> Tuple[str, str]:
|
def visit_Gather(self, node: Gather) -> FunctionCall:
|
||||||
if node in self.cache:
|
if node in self.cache:
|
||||||
return self.cache[node]
|
return self.cache[node]
|
||||||
name = self.gen.name_gather(node)
|
name = self.gen.name_gather(node)
|
||||||
self.cache[node] = f"{name}_var", f"{name}_rule(p)"
|
self.cache[node] = FunctionCall(
|
||||||
|
assigned_variable=f"{name}_var",
|
||||||
|
function=f"{name}_rule",
|
||||||
|
arguments=["p"],
|
||||||
|
metadata={"rulename": name},
|
||||||
|
)
|
||||||
return self.cache[node]
|
return self.cache[node]
|
||||||
|
|
||||||
def visit_Group(self, node: Group) -> Tuple[Optional[str], str]:
|
def visit_Group(self, node: Group) -> FunctionCall:
|
||||||
return self.visit(node.rhs)
|
return self.visit(node.rhs)
|
||||||
|
|
||||||
def visit_Cut(self, node: Cut) -> Tuple[str, str]:
|
def visit_Cut(self, node: Cut) -> FunctionCall:
|
||||||
return "cut_var", "1"
|
return FunctionCall(
|
||||||
|
assigned_variable="cut_var", function="1", nodetype=NodeTypes.CUT_OPERATOR
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class CParserGenerator(ParserGenerator, GrammarVisitor):
|
class CParserGenerator(ParserGenerator, GrammarVisitor):
|
||||||
|
@ -252,7 +352,6 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
|
||||||
mode += 1
|
mode += 1
|
||||||
modulename = self.grammar.metas.get("modulename", "parse")
|
modulename = self.grammar.metas.get("modulename", "parse")
|
||||||
trailer = self.grammar.metas.get("trailer", EXTENSION_SUFFIX)
|
trailer = self.grammar.metas.get("trailer", EXTENSION_SUFFIX)
|
||||||
keyword_cache = self.callmakervisitor.keyword_cache
|
|
||||||
if trailer:
|
if trailer:
|
||||||
self.print(trailer.rstrip("\n") % dict(mode=mode, modulename=modulename))
|
self.print(trailer.rstrip("\n") % dict(mode=mode, modulename=modulename))
|
||||||
|
|
||||||
|
@ -448,13 +547,11 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
|
||||||
self._handle_default_rule_body(node, rhs, result_type)
|
self._handle_default_rule_body(node, rhs, result_type)
|
||||||
self.print("}")
|
self.print("}")
|
||||||
|
|
||||||
def visit_NamedItem(self, node: NamedItem, names: List[str]) -> None:
|
def visit_NamedItem(self, node: NamedItem) -> None:
|
||||||
name, call = self.callmakervisitor.visit(node)
|
call = self.callmakervisitor.visit(node)
|
||||||
if not name:
|
if call.assigned_variable:
|
||||||
self.print(call)
|
call.assigned_variable = self.dedupe(call.assigned_variable)
|
||||||
else:
|
self.print(call)
|
||||||
name = dedupe(name, names)
|
|
||||||
self.print(f"({name} = {call})")
|
|
||||||
|
|
||||||
def visit_Rhs(
|
def visit_Rhs(
|
||||||
self, node: Rhs, is_loop: bool, is_gather: bool, rulename: Optional[str]
|
self, node: Rhs, is_loop: bool, is_gather: bool, rulename: Optional[str]
|
||||||
|
@ -464,7 +561,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
|
||||||
for alt in node.alts:
|
for alt in node.alts:
|
||||||
self.visit(alt, is_loop=is_loop, is_gather=is_gather, rulename=rulename)
|
self.visit(alt, is_loop=is_loop, is_gather=is_gather, rulename=rulename)
|
||||||
|
|
||||||
def join_conditions(self, keyword: str, node: Any, names: List[str]) -> None:
|
def join_conditions(self, keyword: str, node: Any) -> None:
|
||||||
self.print(f"{keyword} (")
|
self.print(f"{keyword} (")
|
||||||
with self.indent():
|
with self.indent():
|
||||||
first = True
|
first = True
|
||||||
|
@ -473,7 +570,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
|
||||||
first = False
|
first = False
|
||||||
else:
|
else:
|
||||||
self.print("&&")
|
self.print("&&")
|
||||||
self.visit(item, names=names)
|
self.visit(item)
|
||||||
self.print(")")
|
self.print(")")
|
||||||
|
|
||||||
def emit_action(self, node: Alt, cleanup_code: Optional[str] = None) -> None:
|
def emit_action(self, node: Alt, cleanup_code: Optional[str] = None) -> None:
|
||||||
|
@ -492,29 +589,34 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
|
||||||
f'fprintf(stderr, "Hit with action [%d-%d]: %s\\n", mark, p->mark, "{node}");'
|
f'fprintf(stderr, "Hit with action [%d-%d]: %s\\n", mark, p->mark, "{node}");'
|
||||||
)
|
)
|
||||||
|
|
||||||
def emit_default_action(self, is_gather: bool, names: List[str], node: Alt) -> None:
|
def emit_default_action(self, is_gather: bool, node: Alt) -> None:
|
||||||
if len(names) > 1:
|
if len(self.local_variable_names) > 1:
|
||||||
if is_gather:
|
if is_gather:
|
||||||
assert len(names) == 2
|
assert len(self.local_variable_names) == 2
|
||||||
self.print(f"res = _PyPegen_seq_insert_in_front(p, {names[0]}, {names[1]});")
|
self.print(
|
||||||
|
f"res = _PyPegen_seq_insert_in_front(p, "
|
||||||
|
f"{self.local_variable_names[0]}, {self.local_variable_names[1]});"
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
if self.debug:
|
if self.debug:
|
||||||
self.print(
|
self.print(
|
||||||
f'fprintf(stderr, "Hit without action [%d:%d]: %s\\n", mark, p->mark, "{node}");'
|
f'fprintf(stderr, "Hit without action [%d:%d]: %s\\n", mark, p->mark, "{node}");'
|
||||||
)
|
)
|
||||||
self.print(f"res = _PyPegen_dummy_name(p, {', '.join(names)});")
|
self.print(
|
||||||
|
f"res = _PyPegen_dummy_name(p, {', '.join(self.local_variable_names)});"
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
if self.debug:
|
if self.debug:
|
||||||
self.print(
|
self.print(
|
||||||
f'fprintf(stderr, "Hit with default action [%d:%d]: %s\\n", mark, p->mark, "{node}");'
|
f'fprintf(stderr, "Hit with default action [%d:%d]: %s\\n", mark, p->mark, "{node}");'
|
||||||
)
|
)
|
||||||
self.print(f"res = {names[0]};")
|
self.print(f"res = {self.local_variable_names[0]};")
|
||||||
|
|
||||||
def emit_dummy_action(self) -> None:
|
def emit_dummy_action(self) -> None:
|
||||||
self.print(f"res = _PyPegen_dummy_name(p);")
|
self.print(f"res = _PyPegen_dummy_name(p);")
|
||||||
|
|
||||||
def handle_alt_normal(self, node: Alt, is_gather: bool, names: List[str]) -> None:
|
def handle_alt_normal(self, node: Alt, is_gather: bool) -> None:
|
||||||
self.join_conditions(keyword="if", node=node, names=names)
|
self.join_conditions(keyword="if", node=node)
|
||||||
self.print("{")
|
self.print("{")
|
||||||
# We have parsed successfully all the conditions for the option.
|
# We have parsed successfully all the conditions for the option.
|
||||||
with self.indent():
|
with self.indent():
|
||||||
|
@ -526,17 +628,15 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
|
||||||
elif node.action:
|
elif node.action:
|
||||||
self.emit_action(node)
|
self.emit_action(node)
|
||||||
else:
|
else:
|
||||||
self.emit_default_action(is_gather, names, node)
|
self.emit_default_action(is_gather, node)
|
||||||
|
|
||||||
# As the current option has parsed correctly, do not continue with the rest.
|
# As the current option has parsed correctly, do not continue with the rest.
|
||||||
self.print(f"goto done;")
|
self.print(f"goto done;")
|
||||||
self.print("}")
|
self.print("}")
|
||||||
|
|
||||||
def handle_alt_loop(
|
def handle_alt_loop(self, node: Alt, is_gather: bool, rulename: Optional[str]) -> None:
|
||||||
self, node: Alt, is_gather: bool, rulename: Optional[str], names: List[str]
|
|
||||||
) -> None:
|
|
||||||
# Condition of the main body of the alternative
|
# Condition of the main body of the alternative
|
||||||
self.join_conditions(keyword="while", node=node, names=names)
|
self.join_conditions(keyword="while", node=node)
|
||||||
self.print("{")
|
self.print("{")
|
||||||
# We have parsed successfully one item!
|
# We have parsed successfully one item!
|
||||||
with self.indent():
|
with self.indent():
|
||||||
|
@ -548,7 +648,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
|
||||||
elif node.action:
|
elif node.action:
|
||||||
self.emit_action(node, cleanup_code="PyMem_Free(children);")
|
self.emit_action(node, cleanup_code="PyMem_Free(children);")
|
||||||
else:
|
else:
|
||||||
self.emit_default_action(is_gather, names, node)
|
self.emit_default_action(is_gather, node)
|
||||||
|
|
||||||
# Add the result of rule to the temporary buffer of children. This buffer
|
# Add the result of rule to the temporary buffer of children. This buffer
|
||||||
# will populate later an asdl_seq with all elements to return.
|
# will populate later an asdl_seq with all elements to return.
|
||||||
|
@ -580,47 +680,45 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
|
||||||
if v == "opt_var":
|
if v == "opt_var":
|
||||||
self.print("UNUSED(opt_var); // Silence compiler warnings")
|
self.print("UNUSED(opt_var); // Silence compiler warnings")
|
||||||
|
|
||||||
names: List[str] = []
|
with self.local_variable_context():
|
||||||
if is_loop:
|
if is_loop:
|
||||||
self.handle_alt_loop(node, is_gather, rulename, names)
|
self.handle_alt_loop(node, is_gather, rulename)
|
||||||
else:
|
else:
|
||||||
self.handle_alt_normal(node, is_gather, names)
|
self.handle_alt_normal(node, is_gather)
|
||||||
|
|
||||||
self.print("p->mark = mark;")
|
self.print("p->mark = mark;")
|
||||||
if "cut_var" in names:
|
if "cut_var" in vars:
|
||||||
self.print("if (cut_var) return NULL;")
|
self.print("if (cut_var) return NULL;")
|
||||||
self.print("}")
|
self.print("}")
|
||||||
|
|
||||||
def collect_vars(self, node: Alt) -> Dict[str, Optional[str]]:
|
def collect_vars(self, node: Alt) -> Dict[Optional[str], Optional[str]]:
|
||||||
names: List[str] = []
|
|
||||||
types = {}
|
types = {}
|
||||||
for item in node.items:
|
with self.local_variable_context():
|
||||||
name, type = self.add_var(item, names)
|
for item in node.items:
|
||||||
types[name] = type
|
name, type = self.add_var(item)
|
||||||
|
types[name] = type
|
||||||
return types
|
return types
|
||||||
|
|
||||||
def add_var(self, node: NamedItem, names: List[str]) -> Tuple[str, Optional[str]]:
|
def add_var(self, node: NamedItem) -> Tuple[Optional[str], Optional[str]]:
|
||||||
name: str
|
call = self.callmakervisitor.visit(node.item)
|
||||||
call: str
|
if not call.assigned_variable:
|
||||||
name, call = self.callmakervisitor.visit(node.item)
|
return None, None
|
||||||
type = None
|
if call.nodetype == NodeTypes.CUT_OPERATOR:
|
||||||
if not name:
|
return call.assigned_variable, "int"
|
||||||
return name, type
|
|
||||||
if name.startswith("cut"):
|
name = call.assigned_variable
|
||||||
return name, "int"
|
rulename = call.metadata.get("rulename")
|
||||||
if name.endswith("_var"):
|
|
||||||
rulename = name[:-4]
|
type: Optional[str] = None
|
||||||
rule = self.rules.get(rulename)
|
|
||||||
if rule is not None:
|
assert self.all_rules is not None
|
||||||
if rule.is_loop() or rule.is_gather():
|
if rulename and rulename in self.all_rules:
|
||||||
type = "asdl_seq *"
|
rule = self.all_rules.get(rulename)
|
||||||
else:
|
if rule.is_loop() or rule.is_gather():
|
||||||
type = rule.type
|
|
||||||
elif name.startswith("_loop") or name.startswith("_gather"):
|
|
||||||
type = "asdl_seq *"
|
type = "asdl_seq *"
|
||||||
elif name in ("name_var", "string_var", "number_var"):
|
else:
|
||||||
type = "expr_ty"
|
type = rule.type
|
||||||
if node.name:
|
elif call.nodetype in BASE_NODETYPES.values():
|
||||||
name = node.name
|
type = "expr_ty"
|
||||||
name = dedupe(name, names)
|
|
||||||
return name, type
|
return self.dedupe(node.name if node.name else call.assigned_variable), type
|
||||||
|
|
|
@ -13,7 +13,6 @@ from pegen.grammar import (
|
||||||
NamedItem,
|
NamedItem,
|
||||||
Plain,
|
Plain,
|
||||||
NameLeaf,
|
NameLeaf,
|
||||||
StringLeaf,
|
|
||||||
Gather,
|
Gather,
|
||||||
)
|
)
|
||||||
from pegen.grammar import GrammarError, GrammarVisitor
|
from pegen.grammar import GrammarError, GrammarVisitor
|
||||||
|
@ -48,6 +47,18 @@ class ParserGenerator:
|
||||||
self.todo = self.rules.copy() # Rules to generate
|
self.todo = self.rules.copy() # Rules to generate
|
||||||
self.counter = 0 # For name_rule()/name_loop()
|
self.counter = 0 # For name_rule()/name_loop()
|
||||||
self.keyword_counter = 499 # For keyword_type()
|
self.keyword_counter = 499 # For keyword_type()
|
||||||
|
self.all_rules: Optional[Dict[str, Rule]] = None # Rules + temporal rules
|
||||||
|
self._local_variable_stack: List[List[str]] = []
|
||||||
|
|
||||||
|
@contextlib.contextmanager
|
||||||
|
def local_variable_context(self) -> Iterator[None]:
|
||||||
|
self._local_variable_stack.append([])
|
||||||
|
yield
|
||||||
|
self._local_variable_stack.pop()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def local_variable_names(self) -> List[str]:
|
||||||
|
return self._local_variable_stack[-1]
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def generate(self, filename: str) -> None:
|
def generate(self, filename: str) -> None:
|
||||||
|
@ -82,6 +93,7 @@ class ParserGenerator:
|
||||||
for rulename in todo:
|
for rulename in todo:
|
||||||
self.todo[rulename].collect_todo(self)
|
self.todo[rulename].collect_todo(self)
|
||||||
done = set(alltodo)
|
done = set(alltodo)
|
||||||
|
self.all_rules = self.todo.copy()
|
||||||
|
|
||||||
def keyword_type(self) -> int:
|
def keyword_type(self) -> int:
|
||||||
self.keyword_counter += 1
|
self.keyword_counter += 1
|
||||||
|
@ -109,26 +121,23 @@ class ParserGenerator:
|
||||||
self.counter += 1
|
self.counter += 1
|
||||||
extra_function_name = f"_loop0_{self.counter}"
|
extra_function_name = f"_loop0_{self.counter}"
|
||||||
extra_function_alt = Alt(
|
extra_function_alt = Alt(
|
||||||
[NamedItem(None, node.separator), NamedItem("elem", node.node),], action="elem",
|
[NamedItem(None, node.separator), NamedItem("elem", node.node)], action="elem",
|
||||||
)
|
)
|
||||||
self.todo[extra_function_name] = Rule(
|
self.todo[extra_function_name] = Rule(
|
||||||
extra_function_name, None, Rhs([extra_function_alt]),
|
extra_function_name, None, Rhs([extra_function_alt]),
|
||||||
)
|
)
|
||||||
alt = Alt(
|
alt = Alt([NamedItem("elem", node.node), NamedItem("seq", NameLeaf(extra_function_name))],)
|
||||||
[NamedItem("elem", node.node), NamedItem("seq", NameLeaf(extra_function_name)),],
|
|
||||||
)
|
|
||||||
self.todo[name] = Rule(name, None, Rhs([alt]),)
|
self.todo[name] = Rule(name, None, Rhs([alt]),)
|
||||||
return name
|
return name
|
||||||
|
|
||||||
|
def dedupe(self, name: str) -> str:
|
||||||
def dedupe(name: str, names: List[str]) -> str:
|
origname = name
|
||||||
origname = name
|
counter = 0
|
||||||
counter = 0
|
while name in self.local_variable_names:
|
||||||
while name in names:
|
counter += 1
|
||||||
counter += 1
|
name = f"{origname}_{counter}"
|
||||||
name = f"{origname}_{counter}"
|
self.local_variable_names.append(name)
|
||||||
names.append(name)
|
return name
|
||||||
return name
|
|
||||||
|
|
||||||
|
|
||||||
def compute_nullables(rules: Dict[str, Rule]) -> None:
|
def compute_nullables(rules: Dict[str, Rule]) -> None:
|
||||||
|
@ -153,13 +162,13 @@ def compute_left_recursives(
|
||||||
leaders = set(scc)
|
leaders = set(scc)
|
||||||
for start in scc:
|
for start in scc:
|
||||||
for cycle in sccutils.find_cycles_in_scc(graph, scc, start):
|
for cycle in sccutils.find_cycles_in_scc(graph, scc, start):
|
||||||
## print("Cycle:", " -> ".join(cycle))
|
# print("Cycle:", " -> ".join(cycle))
|
||||||
leaders -= scc - set(cycle)
|
leaders -= scc - set(cycle)
|
||||||
if not leaders:
|
if not leaders:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"SCC {scc} has no leadership candidate (no element is included in all cycles)"
|
f"SCC {scc} has no leadership candidate (no element is included in all cycles)"
|
||||||
)
|
)
|
||||||
## print("Leaders:", leaders)
|
# print("Leaders:", leaders)
|
||||||
leader = min(leaders) # Pick an arbitrary leader from the candidates.
|
leader = min(leaders) # Pick an arbitrary leader from the candidates.
|
||||||
rules[leader].leader = True
|
rules[leader].leader = True
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
from typing import Any, Dict, List, Optional, IO, Text, Tuple
|
from typing import Any, Dict, Optional, IO, Text, Tuple
|
||||||
|
|
||||||
from pegen.grammar import (
|
from pegen.grammar import (
|
||||||
Cut,
|
Cut,
|
||||||
|
@ -19,7 +19,7 @@ from pegen.grammar import (
|
||||||
Alt,
|
Alt,
|
||||||
)
|
)
|
||||||
from pegen import grammar
|
from pegen import grammar
|
||||||
from pegen.parser_generator import dedupe, ParserGenerator
|
from pegen.parser_generator import ParserGenerator
|
||||||
|
|
||||||
MODULE_PREFIX = """\
|
MODULE_PREFIX = """\
|
||||||
#!/usr/bin/env python3.8
|
#!/usr/bin/env python3.8
|
||||||
|
@ -173,7 +173,7 @@ class PythonParserGenerator(ParserGenerator, GrammarVisitor):
|
||||||
else:
|
else:
|
||||||
self.print("return None")
|
self.print("return None")
|
||||||
|
|
||||||
def visit_NamedItem(self, node: NamedItem, names: List[str]) -> None:
|
def visit_NamedItem(self, node: NamedItem) -> None:
|
||||||
name, call = self.callmakervisitor.visit(node.item)
|
name, call = self.callmakervisitor.visit(node.item)
|
||||||
if node.name:
|
if node.name:
|
||||||
name = node.name
|
name = node.name
|
||||||
|
@ -181,7 +181,7 @@ class PythonParserGenerator(ParserGenerator, GrammarVisitor):
|
||||||
self.print(call)
|
self.print(call)
|
||||||
else:
|
else:
|
||||||
if name != "cut":
|
if name != "cut":
|
||||||
name = dedupe(name, names)
|
name = self.dedupe(name)
|
||||||
self.print(f"({name} := {call})")
|
self.print(f"({name} := {call})")
|
||||||
|
|
||||||
def visit_Rhs(self, node: Rhs, is_loop: bool = False, is_gather: bool = False) -> None:
|
def visit_Rhs(self, node: Rhs, is_loop: bool = False, is_gather: bool = False) -> None:
|
||||||
|
@ -191,34 +191,36 @@ class PythonParserGenerator(ParserGenerator, GrammarVisitor):
|
||||||
self.visit(alt, is_loop=is_loop, is_gather=is_gather)
|
self.visit(alt, is_loop=is_loop, is_gather=is_gather)
|
||||||
|
|
||||||
def visit_Alt(self, node: Alt, is_loop: bool, is_gather: bool) -> None:
|
def visit_Alt(self, node: Alt, is_loop: bool, is_gather: bool) -> None:
|
||||||
names: List[str] = []
|
with self.local_variable_context():
|
||||||
self.print("cut = False") # TODO: Only if needed.
|
self.print("cut = False") # TODO: Only if needed.
|
||||||
if is_loop:
|
|
||||||
self.print("while (")
|
|
||||||
else:
|
|
||||||
self.print("if (")
|
|
||||||
with self.indent():
|
|
||||||
first = True
|
|
||||||
for item in node.items:
|
|
||||||
if first:
|
|
||||||
first = False
|
|
||||||
else:
|
|
||||||
self.print("and")
|
|
||||||
self.visit(item, names=names)
|
|
||||||
self.print("):")
|
|
||||||
with self.indent():
|
|
||||||
action = node.action
|
|
||||||
if not action:
|
|
||||||
if is_gather:
|
|
||||||
assert len(names) == 2
|
|
||||||
action = f"[{names[0]}] + {names[1]}"
|
|
||||||
else:
|
|
||||||
action = f"[{', '.join(names)}]"
|
|
||||||
if is_loop:
|
if is_loop:
|
||||||
self.print(f"children.append({action})")
|
self.print("while (")
|
||||||
self.print(f"mark = self.mark()")
|
|
||||||
else:
|
else:
|
||||||
self.print(f"return {action}")
|
self.print("if (")
|
||||||
self.print("self.reset(mark)")
|
with self.indent():
|
||||||
# Skip remaining alternatives if a cut was reached.
|
first = True
|
||||||
self.print("if cut: return None") # TODO: Only if needed.
|
for item in node.items:
|
||||||
|
if first:
|
||||||
|
first = False
|
||||||
|
else:
|
||||||
|
self.print("and")
|
||||||
|
self.visit(item)
|
||||||
|
self.print("):")
|
||||||
|
with self.indent():
|
||||||
|
action = node.action
|
||||||
|
if not action:
|
||||||
|
if is_gather:
|
||||||
|
assert len(self.local_variable_names) == 2
|
||||||
|
action = (
|
||||||
|
f"[{self.local_variable_names[0]}] + {self.local_variable_names[1]}"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
action = f"[{', '.join(self.local_variable_names)}]"
|
||||||
|
if is_loop:
|
||||||
|
self.print(f"children.append({action})")
|
||||||
|
self.print(f"mark = self.mark()")
|
||||||
|
else:
|
||||||
|
self.print(f"return {action}")
|
||||||
|
self.print("self.reset(mark)")
|
||||||
|
# Skip remaining alternatives if a cut was reached.
|
||||||
|
self.print("if cut: return None") # TODO: Only if needed.
|
||||||
|
|
Loading…
Reference in New Issue