Fix lookahead of soft keywords in the PEG parser (GH-20436)

Automerge-Triggered-By: @gvanrossum
This commit is contained in:
Pablo Galindo 2020-05-27 00:15:52 +01:00 committed by GitHub
parent 21fda91f8d
commit 404b23b85b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 31 additions and 2 deletions

View File

@ -432,3 +432,15 @@ class TestCParser(TempdirManager, unittest.TestCase):
self.check_input_strings_for_grammar(valid_cases, invalid_cases) self.check_input_strings_for_grammar(valid_cases, invalid_cases)
""" """
self.run_test(grammar_source, test_source) self.run_test(grammar_source, test_source)
def test_soft_keywords_lookahead(self) -> None:
grammar_source = """
start: &"if" "if" expr '+' expr NEWLINE
expr: NAME
"""
test_source = """
valid_cases = ["if if + if"]
invalid_cases = ["if if"]
self.check_input_strings_for_grammar(valid_cases, invalid_cases)
"""
self.run_test(grammar_source, test_source)

View File

@ -718,6 +718,15 @@ _PyPegen_lookahead_with_name(int positive, expr_ty (func)(Parser *), Parser *p)
return (res != NULL) == positive; return (res != NULL) == positive;
} }
int
_PyPegen_lookahead_with_string(int positive, expr_ty (func)(Parser *, const char*), Parser *p, const char* arg)
{
int mark = p->mark;
void *res = func(p, arg);
p->mark = mark;
return (res != NULL) == positive;
}
int int
_PyPegen_lookahead_with_int(int positive, Token *(func)(Parser *, int), Parser *p, int arg) _PyPegen_lookahead_with_int(int positive, Token *(func)(Parser *, int), Parser *p, int arg)
{ {

View File

@ -119,6 +119,7 @@ int _PyPegen_is_memoized(Parser *p, int type, void *pres);
int _PyPegen_lookahead_with_name(int, expr_ty (func)(Parser *), Parser *); int _PyPegen_lookahead_with_name(int, expr_ty (func)(Parser *), Parser *);
int _PyPegen_lookahead_with_int(int, Token *(func)(Parser *, int), Parser *, int); int _PyPegen_lookahead_with_int(int, Token *(func)(Parser *, int), Parser *, int);
int _PyPegen_lookahead_with_string(int , expr_ty (func)(Parser *, const char*), Parser *, const char*);
int _PyPegen_lookahead(int, void *(func)(Parser *), Parser *); int _PyPegen_lookahead(int, void *(func)(Parser *), Parser *);
Token *_PyPegen_expect_token(Parser *p, int type); Token *_PyPegen_expect_token(Parser *p, int type);

View File

@ -58,7 +58,8 @@ class NodeTypes(Enum):
STRING_TOKEN = 2 STRING_TOKEN = 2
GENERIC_TOKEN = 3 GENERIC_TOKEN = 3
KEYWORD = 4 KEYWORD = 4
CUT_OPERATOR = 5 SOFT_KEYWORD = 5
CUT_OPERATOR = 6
BASE_NODETYPES = { BASE_NODETYPES = {
@ -123,7 +124,7 @@ class CCallMakerVisitor(GrammarVisitor):
function="_PyPegen_expect_soft_keyword", function="_PyPegen_expect_soft_keyword",
arguments=["p", value], arguments=["p", value],
return_type="expr_ty", return_type="expr_ty",
nodetype=NodeTypes.NAME_TOKEN, nodetype=NodeTypes.SOFT_KEYWORD,
comment=f"soft_keyword='{value}'", comment=f"soft_keyword='{value}'",
) )
@ -217,6 +218,12 @@ class CCallMakerVisitor(GrammarVisitor):
arguments=[positive, call.function, *call.arguments], arguments=[positive, call.function, *call.arguments],
return_type="int", return_type="int",
) )
elif call.nodetype == NodeTypes.SOFT_KEYWORD:
return FunctionCall(
function=f"_PyPegen_lookahead_with_string",
arguments=[positive, call.function, *call.arguments],
return_type="int",
)
elif call.nodetype in {NodeTypes.GENERIC_TOKEN, NodeTypes.KEYWORD}: elif call.nodetype in {NodeTypes.GENERIC_TOKEN, NodeTypes.KEYWORD}:
return FunctionCall( return FunctionCall(
function=f"_PyPegen_lookahead_with_int", function=f"_PyPegen_lookahead_with_int",