[3.9] Backport GH-20370 and GH-20436: Soft keywords (GH-20458)
This commit is contained in:
parent
788d7bfe18
commit
1bfe659ee5
|
@ -402,3 +402,45 @@ class TestCParser(TempdirManager, unittest.TestCase):
|
||||||
parse.parse_string("a", mode=0)
|
parse.parse_string("a", mode=0)
|
||||||
"""
|
"""
|
||||||
self.run_test(grammar_source, test_source)
|
self.run_test(grammar_source, test_source)
|
||||||
|
|
||||||
|
def test_no_soft_keywords(self) -> None:
|
||||||
|
grammar_source = """
|
||||||
|
start: expr+ NEWLINE? ENDMARKER
|
||||||
|
expr: 'foo'
|
||||||
|
"""
|
||||||
|
grammar = parse_string(grammar_source, GrammarParser)
|
||||||
|
parser_source = generate_c_parser_source(grammar)
|
||||||
|
assert "expect_soft_keyword" not in parser_source
|
||||||
|
|
||||||
|
def test_soft_keywords(self) -> None:
|
||||||
|
grammar_source = """
|
||||||
|
start: expr+ NEWLINE? ENDMARKER
|
||||||
|
expr: "foo"
|
||||||
|
"""
|
||||||
|
grammar = parse_string(grammar_source, GrammarParser)
|
||||||
|
parser_source = generate_c_parser_source(grammar)
|
||||||
|
assert "expect_soft_keyword" in parser_source
|
||||||
|
|
||||||
|
def test_soft_keywords_parse(self) -> None:
|
||||||
|
grammar_source = """
|
||||||
|
start: "if" expr '+' expr NEWLINE
|
||||||
|
expr: NAME
|
||||||
|
"""
|
||||||
|
test_source = """
|
||||||
|
valid_cases = ["if if + if"]
|
||||||
|
invalid_cases = ["if if"]
|
||||||
|
self.check_input_strings_for_grammar(valid_cases, invalid_cases)
|
||||||
|
"""
|
||||||
|
self.run_test(grammar_source, test_source)
|
||||||
|
|
||||||
|
def test_soft_keywords_lookahead(self) -> None:
|
||||||
|
grammar_source = """
|
||||||
|
start: &"if" "if" expr '+' expr NEWLINE
|
||||||
|
expr: NAME
|
||||||
|
"""
|
||||||
|
test_source = """
|
||||||
|
valid_cases = ["if if + if"]
|
||||||
|
invalid_cases = ["if if"]
|
||||||
|
self.check_input_strings_for_grammar(valid_cases, invalid_cases)
|
||||||
|
"""
|
||||||
|
self.run_test(grammar_source, test_source)
|
||||||
|
|
|
@ -708,7 +708,6 @@ _PyPegen_is_memoized(Parser *p, int type, void *pres)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int
|
int
|
||||||
_PyPegen_lookahead_with_name(int positive, expr_ty (func)(Parser *), Parser *p)
|
_PyPegen_lookahead_with_name(int positive, expr_ty (func)(Parser *), Parser *p)
|
||||||
{
|
{
|
||||||
|
@ -718,6 +717,15 @@ _PyPegen_lookahead_with_name(int positive, expr_ty (func)(Parser *), Parser *p)
|
||||||
return (res != NULL) == positive;
|
return (res != NULL) == positive;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
_PyPegen_lookahead_with_string(int positive, expr_ty (func)(Parser *, const char*), Parser *p, const char* arg)
|
||||||
|
{
|
||||||
|
int mark = p->mark;
|
||||||
|
void *res = func(p, arg);
|
||||||
|
p->mark = mark;
|
||||||
|
return (res != NULL) == positive;
|
||||||
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
_PyPegen_lookahead_with_int(int positive, Token *(func)(Parser *, int), Parser *p, int arg)
|
_PyPegen_lookahead_with_int(int positive, Token *(func)(Parser *, int), Parser *p, int arg)
|
||||||
{
|
{
|
||||||
|
@ -753,6 +761,30 @@ _PyPegen_expect_token(Parser *p, int type)
|
||||||
return t;
|
return t;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
expr_ty
|
||||||
|
_PyPegen_expect_soft_keyword(Parser *p, const char *keyword)
|
||||||
|
{
|
||||||
|
if (p->mark == p->fill) {
|
||||||
|
if (_PyPegen_fill_token(p) < 0) {
|
||||||
|
p->error_indicator = 1;
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Token *t = p->tokens[p->mark];
|
||||||
|
if (t->type != NAME) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
char* s = PyBytes_AsString(t->bytes);
|
||||||
|
if (!s) {
|
||||||
|
p->error_indicator = 1;
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
if (strcmp(s, keyword) != 0) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
return _PyPegen_name_token(p);
|
||||||
|
}
|
||||||
|
|
||||||
Token *
|
Token *
|
||||||
_PyPegen_get_last_nonnwhitespace_token(Parser *p)
|
_PyPegen_get_last_nonnwhitespace_token(Parser *p)
|
||||||
{
|
{
|
||||||
|
|
|
@ -118,10 +118,12 @@ int _PyPegen_update_memo(Parser *p, int mark, int type, void *node);
|
||||||
int _PyPegen_is_memoized(Parser *p, int type, void *pres);
|
int _PyPegen_is_memoized(Parser *p, int type, void *pres);
|
||||||
|
|
||||||
int _PyPegen_lookahead_with_name(int, expr_ty (func)(Parser *), Parser *);
|
int _PyPegen_lookahead_with_name(int, expr_ty (func)(Parser *), Parser *);
|
||||||
|
int _PyPegen_lookahead_with_string(int , expr_ty (func)(Parser *, const char*), Parser *, const char*);
|
||||||
int _PyPegen_lookahead_with_int(int, Token *(func)(Parser *, int), Parser *, int);
|
int _PyPegen_lookahead_with_int(int, Token *(func)(Parser *, int), Parser *, int);
|
||||||
int _PyPegen_lookahead(int, void *(func)(Parser *), Parser *);
|
int _PyPegen_lookahead(int, void *(func)(Parser *), Parser *);
|
||||||
|
|
||||||
Token *_PyPegen_expect_token(Parser *p, int type);
|
Token *_PyPegen_expect_token(Parser *p, int type);
|
||||||
|
expr_ty _PyPegen_expect_soft_keyword(Parser *p, const char *keyword);
|
||||||
Token *_PyPegen_get_last_nonnwhitespace_token(Parser *);
|
Token *_PyPegen_get_last_nonnwhitespace_token(Parser *);
|
||||||
int _PyPegen_fill_token(Parser *p);
|
int _PyPegen_fill_token(Parser *p);
|
||||||
expr_ty _PyPegen_name_token(Parser *p);
|
expr_ty _PyPegen_name_token(Parser *p);
|
||||||
|
|
|
@ -58,7 +58,8 @@ class NodeTypes(Enum):
|
||||||
STRING_TOKEN = 2
|
STRING_TOKEN = 2
|
||||||
GENERIC_TOKEN = 3
|
GENERIC_TOKEN = 3
|
||||||
KEYWORD = 4
|
KEYWORD = 4
|
||||||
CUT_OPERATOR = 5
|
SOFT_KEYWORD = 5
|
||||||
|
CUT_OPERATOR = 6
|
||||||
|
|
||||||
|
|
||||||
BASE_NODETYPES = {
|
BASE_NODETYPES = {
|
||||||
|
@ -117,6 +118,16 @@ class CCallMakerVisitor(GrammarVisitor):
|
||||||
comment=f"token='{keyword}'",
|
comment=f"token='{keyword}'",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def soft_keyword_helper(self, value: str) -> FunctionCall:
|
||||||
|
return FunctionCall(
|
||||||
|
assigned_variable="_keyword",
|
||||||
|
function="_PyPegen_expect_soft_keyword",
|
||||||
|
arguments=["p", value],
|
||||||
|
return_type="expr_ty",
|
||||||
|
nodetype=NodeTypes.SOFT_KEYWORD,
|
||||||
|
comment=f"soft_keyword='{value}'",
|
||||||
|
)
|
||||||
|
|
||||||
def visit_NameLeaf(self, node: NameLeaf) -> FunctionCall:
|
def visit_NameLeaf(self, node: NameLeaf) -> FunctionCall:
|
||||||
name = node.value
|
name = node.value
|
||||||
if name in self.non_exact_tokens:
|
if name in self.non_exact_tokens:
|
||||||
|
@ -154,7 +165,10 @@ class CCallMakerVisitor(GrammarVisitor):
|
||||||
def visit_StringLeaf(self, node: StringLeaf) -> FunctionCall:
|
def visit_StringLeaf(self, node: StringLeaf) -> FunctionCall:
|
||||||
val = ast.literal_eval(node.value)
|
val = ast.literal_eval(node.value)
|
||||||
if re.match(r"[a-zA-Z_]\w*\Z", val): # This is a keyword
|
if re.match(r"[a-zA-Z_]\w*\Z", val): # This is a keyword
|
||||||
return self.keyword_helper(val)
|
if node.value.endswith("'"):
|
||||||
|
return self.keyword_helper(val)
|
||||||
|
else:
|
||||||
|
return self.soft_keyword_helper(node.value)
|
||||||
else:
|
else:
|
||||||
assert val in self.exact_tokens, f"{node.value} is not a known literal"
|
assert val in self.exact_tokens, f"{node.value} is not a known literal"
|
||||||
type = self.exact_tokens[val]
|
type = self.exact_tokens[val]
|
||||||
|
@ -204,6 +218,12 @@ class CCallMakerVisitor(GrammarVisitor):
|
||||||
arguments=[positive, call.function, *call.arguments],
|
arguments=[positive, call.function, *call.arguments],
|
||||||
return_type="int",
|
return_type="int",
|
||||||
)
|
)
|
||||||
|
elif call.nodetype == NodeTypes.SOFT_KEYWORD:
|
||||||
|
return FunctionCall(
|
||||||
|
function=f"_PyPegen_lookahead_with_string",
|
||||||
|
arguments=[positive, call.function, *call.arguments],
|
||||||
|
return_type="int",
|
||||||
|
)
|
||||||
elif call.nodetype in {NodeTypes.GENERIC_TOKEN, NodeTypes.KEYWORD}:
|
elif call.nodetype in {NodeTypes.GENERIC_TOKEN, NodeTypes.KEYWORD}:
|
||||||
return FunctionCall(
|
return FunctionCall(
|
||||||
function=f"_PyPegen_lookahead_with_int",
|
function=f"_PyPegen_lookahead_with_int",
|
||||||
|
@ -656,8 +676,9 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
|
||||||
self.print("{")
|
self.print("{")
|
||||||
# We have parsed successfully all the conditions for the option.
|
# We have parsed successfully all the conditions for the option.
|
||||||
with self.indent():
|
with self.indent():
|
||||||
|
node_str = str(node).replace('"', '\\"')
|
||||||
self.print(
|
self.print(
|
||||||
f'D(fprintf(stderr, "%*c+ {rulename}[%d-%d]: %s succeeded!\\n", p->level, \' \', _mark, p->mark, "{node}"));'
|
f'D(fprintf(stderr, "%*c+ {rulename}[%d-%d]: %s succeeded!\\n", p->level, \' \', _mark, p->mark, "{node_str}"));'
|
||||||
)
|
)
|
||||||
# Prepare to emmit the rule action and do so
|
# Prepare to emmit the rule action and do so
|
||||||
if node.action and "EXTRA" in node.action:
|
if node.action and "EXTRA" in node.action:
|
||||||
|
@ -710,8 +731,9 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
|
||||||
self.print(f"{{ // {node}")
|
self.print(f"{{ // {node}")
|
||||||
with self.indent():
|
with self.indent():
|
||||||
self._check_for_errors()
|
self._check_for_errors()
|
||||||
|
node_str = str(node).replace('"', '\\"')
|
||||||
self.print(
|
self.print(
|
||||||
f'D(fprintf(stderr, "%*c> {rulename}[%d-%d]: %s\\n", p->level, \' \', _mark, p->mark, "{node}"));'
|
f'D(fprintf(stderr, "%*c> {rulename}[%d-%d]: %s\\n", p->level, \' \', _mark, p->mark, "{node_str}"));'
|
||||||
)
|
)
|
||||||
# Prepare variable declarations for the alternative
|
# Prepare variable declarations for the alternative
|
||||||
vars = self.collect_vars(node)
|
vars = self.collect_vars(node)
|
||||||
|
@ -733,9 +755,10 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
|
||||||
self.handle_alt_normal(node, is_gather, rulename)
|
self.handle_alt_normal(node, is_gather, rulename)
|
||||||
|
|
||||||
self.print("p->mark = _mark;")
|
self.print("p->mark = _mark;")
|
||||||
|
node_str = str(node).replace('"', '\\"')
|
||||||
self.print(
|
self.print(
|
||||||
f"D(fprintf(stderr, \"%*c%s {rulename}[%d-%d]: %s failed!\\n\", p->level, ' ',\n"
|
f"D(fprintf(stderr, \"%*c%s {rulename}[%d-%d]: %s failed!\\n\", p->level, ' ',\n"
|
||||||
f' p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "{node}"));'
|
f' p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "{node_str}"));'
|
||||||
)
|
)
|
||||||
if "_cut_var" in vars:
|
if "_cut_var" in vars:
|
||||||
self.print("if (_cut_var) {")
|
self.print("if (_cut_var) {")
|
||||||
|
|
Loading…
Reference in New Issue