Update pegen to use the latest upstream developments (GH-27586)

This commit is contained in:
Pablo Galindo Salgado 2021-08-12 17:37:30 +01:00 committed by GitHub
parent 8e832fb2a2
commit 953d27261e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
26 changed files with 1243 additions and 673 deletions

View File

@ -11,8 +11,8 @@ from test import support
from test.support import os_helper
from test.support.script_helper import assert_python_ok
_py_cflags_nodist = sysconfig.get_config_var('PY_CFLAGS_NODIST')
_pgo_flag = sysconfig.get_config_var('PGO_PROF_USE_FLAG')
_py_cflags_nodist = sysconfig.get_config_var("PY_CFLAGS_NODIST")
_pgo_flag = sysconfig.get_config_var("PGO_PROF_USE_FLAG")
if _pgo_flag and _py_cflags_nodist and _pgo_flag in _py_cflags_nodist:
raise unittest.SkipTest("peg_generator test disabled under PGO build")
@ -458,3 +458,28 @@ class TestCParser(unittest.TestCase):
self.check_input_strings_for_grammar(valid_cases, invalid_cases)
"""
self.run_test(grammar_source, test_source)
def test_forced(self) -> None:
grammar_source = """
start: NAME &&':' | NAME
"""
test_source = """
self.assertEqual(parse.parse_string("number :", mode=0), None)
with self.assertRaises(SyntaxError) as e:
parse.parse_string("a", mode=0)
self.assertIn("expected ':'", str(e.exception))
"""
self.run_test(grammar_source, test_source)
def test_forced_with_group(self) -> None:
grammar_source = """
start: NAME &&(':' | ';') | NAME
"""
test_source = """
self.assertEqual(parse.parse_string("number :", mode=0), None)
self.assertEqual(parse.parse_string("number ;", mode=0), None)
with self.assertRaises(SyntaxError) as e:
parse.parse_string("a", mode=0)
self.assertIn("expected (':' | ';')", e.exception.args[0])
"""
self.run_test(grammar_source, test_source)

View File

@ -3,8 +3,8 @@ import unittest
from test import test_tools
from typing import Dict, Set
test_tools.skip_if_missing('peg_generator')
with test_tools.imports_under_tool('peg_generator'):
test_tools.skip_if_missing("peg_generator")
with test_tools.imports_under_tool("peg_generator"):
from pegen.grammar_parser import GeneratedParser as GrammarParser
from pegen.testutil import parse_string
from pegen.first_sets import FirstSetCalculator
@ -23,29 +23,38 @@ class TestFirstSets(unittest.TestCase):
A: 'a' | '-'
B: 'b' | '+'
"""
self.assertEqual(self.calculate_first_sets(grammar), {
"A": {"'a'", "'-'"},
"B": {"'+'", "'b'"},
"expr": {"'+'", "'a'", "'b'", "'-'"},
"start": {"'+'", "'a'", "'b'", "'-'"},
})
self.assertEqual(
self.calculate_first_sets(grammar),
{
"A": {"'a'", "'-'"},
"B": {"'+'", "'b'"},
"expr": {"'+'", "'a'", "'b'", "'-'"},
"start": {"'+'", "'a'", "'b'", "'-'"},
},
)
def test_optionals(self) -> None:
grammar = """
start: expr NEWLINE
expr: ['a'] ['b'] 'c'
"""
self.assertEqual(self.calculate_first_sets(grammar), {
"expr": {"'c'", "'a'", "'b'"},
"start": {"'c'", "'a'", "'b'"},
})
self.assertEqual(
self.calculate_first_sets(grammar),
{
"expr": {"'c'", "'a'", "'b'"},
"start": {"'c'", "'a'", "'b'"},
},
)
def test_repeat_with_separator(self) -> None:
grammar = """
start: ','.thing+ NEWLINE
thing: NUMBER
"""
self.assertEqual(self.calculate_first_sets(grammar), {"thing": {"NUMBER"}, "start": {"NUMBER"}})
self.assertEqual(
self.calculate_first_sets(grammar),
{"thing": {"NUMBER"}, "start": {"NUMBER"}},
)
def test_optional_operator(self) -> None:
grammar = """
@ -53,11 +62,14 @@ class TestFirstSets(unittest.TestCase):
sum: (term)? 'b'
term: NUMBER
"""
self.assertEqual(self.calculate_first_sets(grammar), {
"term": {"NUMBER"},
"sum": {"NUMBER", "'b'"},
"start": {"'b'", "NUMBER"},
})
self.assertEqual(
self.calculate_first_sets(grammar),
{
"term": {"NUMBER"},
"sum": {"NUMBER", "'b'"},
"start": {"'b'", "NUMBER"},
},
)
def test_optional_literal(self) -> None:
grammar = """
@ -65,60 +77,83 @@ class TestFirstSets(unittest.TestCase):
sum: '+' ? term
term: NUMBER
"""
self.assertEqual(self.calculate_first_sets(grammar), {
"term": {"NUMBER"},
"sum": {"'+'", "NUMBER"},
"start": {"'+'", "NUMBER"},
})
self.assertEqual(
self.calculate_first_sets(grammar),
{
"term": {"NUMBER"},
"sum": {"'+'", "NUMBER"},
"start": {"'+'", "NUMBER"},
},
)
def test_optional_after(self) -> None:
grammar = """
start: term NEWLINE
term: NUMBER ['+']
"""
self.assertEqual(self.calculate_first_sets(grammar), {"term": {"NUMBER"}, "start": {"NUMBER"}})
self.assertEqual(
self.calculate_first_sets(grammar),
{"term": {"NUMBER"}, "start": {"NUMBER"}},
)
def test_optional_before(self) -> None:
grammar = """
start: term NEWLINE
term: ['+'] NUMBER
"""
self.assertEqual(self.calculate_first_sets(grammar), {"term": {"NUMBER", "'+'"}, "start": {"NUMBER", "'+'"}})
self.assertEqual(
self.calculate_first_sets(grammar),
{"term": {"NUMBER", "'+'"}, "start": {"NUMBER", "'+'"}},
)
def test_repeat_0(self) -> None:
grammar = """
start: thing* "+" NEWLINE
thing: NUMBER
"""
self.assertEqual(self.calculate_first_sets(grammar), {"thing": {"NUMBER"}, "start": {'"+"', "NUMBER"}})
self.assertEqual(
self.calculate_first_sets(grammar),
{"thing": {"NUMBER"}, "start": {'"+"', "NUMBER"}},
)
def test_repeat_0_with_group(self) -> None:
grammar = """
start: ('+' '-')* term NEWLINE
term: NUMBER
"""
self.assertEqual(self.calculate_first_sets(grammar), {"term": {"NUMBER"}, "start": {"'+'", "NUMBER"}})
self.assertEqual(
self.calculate_first_sets(grammar),
{"term": {"NUMBER"}, "start": {"'+'", "NUMBER"}},
)
def test_repeat_1(self) -> None:
grammar = """
start: thing+ '-' NEWLINE
thing: NUMBER
"""
self.assertEqual(self.calculate_first_sets(grammar), {"thing": {"NUMBER"}, "start": {"NUMBER"}})
self.assertEqual(
self.calculate_first_sets(grammar),
{"thing": {"NUMBER"}, "start": {"NUMBER"}},
)
def test_repeat_1_with_group(self) -> None:
grammar = """
start: ('+' term)+ term NEWLINE
term: NUMBER
"""
self.assertEqual(self.calculate_first_sets(grammar), {"term": {"NUMBER"}, "start": {"'+'"}})
self.assertEqual(
self.calculate_first_sets(grammar), {"term": {"NUMBER"}, "start": {"'+'"}}
)
def test_gather(self) -> None:
grammar = """
start: ','.thing+ NEWLINE
thing: NUMBER
"""
self.assertEqual(self.calculate_first_sets(grammar), {"thing": {"NUMBER"}, "start": {"NUMBER"}})
self.assertEqual(
self.calculate_first_sets(grammar),
{"thing": {"NUMBER"}, "start": {"NUMBER"}},
)
def test_positive_lookahead(self) -> None:
grammar = """
@ -126,11 +161,14 @@ class TestFirstSets(unittest.TestCase):
expr: &'a' opt
opt: 'a' | 'b' | 'c'
"""
self.assertEqual(self.calculate_first_sets(grammar), {
"expr": {"'a'"},
"start": {"'a'"},
"opt": {"'b'", "'c'", "'a'"},
})
self.assertEqual(
self.calculate_first_sets(grammar),
{
"expr": {"'a'"},
"start": {"'a'"},
"opt": {"'b'", "'c'", "'a'"},
},
)
def test_negative_lookahead(self) -> None:
grammar = """
@ -138,11 +176,14 @@ class TestFirstSets(unittest.TestCase):
expr: !'a' opt
opt: 'a' | 'b' | 'c'
"""
self.assertEqual(self.calculate_first_sets(grammar), {
"opt": {"'b'", "'a'", "'c'"},
"expr": {"'b'", "'c'"},
"start": {"'b'", "'c'"},
})
self.assertEqual(
self.calculate_first_sets(grammar),
{
"opt": {"'b'", "'a'", "'c'"},
"expr": {"'b'", "'c'"},
"start": {"'b'", "'c'"},
},
)
def test_left_recursion(self) -> None:
grammar = """
@ -153,21 +194,27 @@ class TestFirstSets(unittest.TestCase):
bar: 'bar'
baz: 'baz'
"""
self.assertEqual(self.calculate_first_sets(grammar), {
"expr": {"NUMBER", "'-'"},
"term": {"NUMBER"},
"start": {"NUMBER", "'-'"},
"foo": {"'foo'"},
"bar": {"'bar'"},
"baz": {"'baz'"},
})
self.assertEqual(
self.calculate_first_sets(grammar),
{
"expr": {"NUMBER", "'-'"},
"term": {"NUMBER"},
"start": {"NUMBER", "'-'"},
"foo": {"'foo'"},
"bar": {"'bar'"},
"baz": {"'baz'"},
},
)
def test_advance_left_recursion(self) -> None:
grammar = """
start: NUMBER | sign start
sign: ['-']
"""
self.assertEqual(self.calculate_first_sets(grammar), {"sign": {"'-'", ""}, "start": {"'-'", "NUMBER"}})
self.assertEqual(
self.calculate_first_sets(grammar),
{"sign": {"'-'", ""}, "start": {"'-'", "NUMBER"}},
)
def test_mutual_left_recursion(self) -> None:
grammar = """
@ -175,11 +222,14 @@ class TestFirstSets(unittest.TestCase):
foo: bar 'A' | 'B'
bar: foo 'C' | 'D'
"""
self.assertEqual(self.calculate_first_sets(grammar), {
"foo": {"'D'", "'B'"},
"bar": {"'D'"},
"start": {"'D'", "'B'"},
})
self.assertEqual(
self.calculate_first_sets(grammar),
{
"foo": {"'D'", "'B'"},
"bar": {"'D'"},
"start": {"'D'", "'B'"},
},
)
def test_nasty_left_recursion(self) -> None:
# TODO: Validate this
@ -188,7 +238,10 @@ class TestFirstSets(unittest.TestCase):
target: maybe '+' | NAME
maybe: maybe '-' | target
"""
self.assertEqual(self.calculate_first_sets(grammar), {"maybe": set(), "target": {"NAME"}, "start": {"NAME"}})
self.assertEqual(
self.calculate_first_sets(grammar),
{"maybe": set(), "target": {"NAME"}, "start": {"NAME"}},
)
def test_nullable_rule(self) -> None:
grammar = """
@ -196,17 +249,22 @@ class TestFirstSets(unittest.TestCase):
sign: ['-']
thing: NUMBER
"""
self.assertEqual(self.calculate_first_sets(grammar), {
"sign": {"", "'-'"},
"thing": {"NUMBER"},
"start": {"NUMBER", "'-'"},
})
self.assertEqual(
self.calculate_first_sets(grammar),
{
"sign": {"", "'-'"},
"thing": {"NUMBER"},
"start": {"NUMBER", "'-'"},
},
)
def test_epsilon_production_in_start_rule(self) -> None:
grammar = """
start: ['-'] $
"""
self.assertEqual(self.calculate_first_sets(grammar), {"start": {"ENDMARKER", "'-'"}})
self.assertEqual(
self.calculate_first_sets(grammar), {"start": {"ENDMARKER", "'-'"}}
)
def test_multiple_nullable_rules(self) -> None:
grammar = """
@ -216,10 +274,13 @@ class TestFirstSets(unittest.TestCase):
other: '*'
another: '/'
"""
self.assertEqual(self.calculate_first_sets(grammar), {
"sign": {"", "'-'"},
"thing": {"'+'", ""},
"start": {"'+'", "'-'", "'*'"},
"other": {"'*'"},
"another": {"'/'"},
})
self.assertEqual(
self.calculate_first_sets(grammar),
{
"sign": {"", "'-'"},
"thing": {"'+'", ""},
"start": {"'+'", "'-'", "'*'"},
"other": {"'*'"},
"another": {"'/'"},
},
)

View File

@ -1,8 +1,8 @@
import unittest
from test import test_tools
test_tools.skip_if_missing('peg_generator')
with test_tools.imports_under_tool('peg_generator'):
test_tools.skip_if_missing("peg_generator")
with test_tools.imports_under_tool("peg_generator"):
from pegen.grammar_parser import GeneratedParser as GrammarParser
from pegen.validator import SubRuleValidator, ValidationError
from pegen.testutil import parse_string

View File

@ -1,3 +1,5 @@
import ast
import difflib
import io
import textwrap
import unittest
@ -6,14 +8,10 @@ from test import test_tools
from typing import Dict, Any
from tokenize import TokenInfo, NAME, NEWLINE, NUMBER, OP
test_tools.skip_if_missing('peg_generator')
with test_tools.imports_under_tool('peg_generator'):
test_tools.skip_if_missing("peg_generator")
with test_tools.imports_under_tool("peg_generator"):
from pegen.grammar_parser import GeneratedParser as GrammarParser
from pegen.testutil import (
parse_string,
generate_parser,
make_parser
)
from pegen.testutil import parse_string, generate_parser, make_parser
from pegen.grammar import GrammarVisitor, GrammarError, Grammar
from pegen.grammar_visualizer import ASTGrammarPrinter
from pegen.parser import Parser
@ -38,7 +36,9 @@ class TestPegen(unittest.TestCase):
# Check the str() and repr() of a few rules; AST nodes don't support ==.
self.assertEqual(str(rules["start"]), "start: sum NEWLINE")
self.assertEqual(str(rules["sum"]), "sum: term '+' term | term")
expected_repr = "Rule('term', None, Rhs([Alt([NamedItem(None, NameLeaf('NUMBER'))])]))"
expected_repr = (
"Rule('term', None, Rhs([Alt([NamedItem(None, NameLeaf('NUMBER'))])]))"
)
self.assertEqual(repr(rules["term"]), expected_repr)
def test_long_rule_str(self) -> None:
@ -71,7 +71,7 @@ class TestPegen(unittest.TestCase):
self.assertEqual(str(rules["sum"]), "sum: term '+' term | term")
self.assertEqual(
repr(rules["term"]),
"Rule('term', 'int', Rhs([Alt([NamedItem(None, NameLeaf('NUMBER'))])]))"
"Rule('term', 'int', Rhs([Alt([NamedItem(None, NameLeaf('NUMBER'))])]))",
)
def test_gather(self) -> None:
@ -81,24 +81,31 @@ class TestPegen(unittest.TestCase):
"""
rules = parse_string(grammar, GrammarParser).rules
self.assertEqual(str(rules["start"]), "start: ','.thing+ NEWLINE")
self.assertTrue(repr(rules["start"]).startswith(
"Rule('start', None, Rhs([Alt([NamedItem(None, Gather(StringLeaf(\"','\"), NameLeaf('thing'"
))
self.assertTrue(
repr(rules["start"]).startswith(
"Rule('start', None, Rhs([Alt([NamedItem(None, Gather(StringLeaf(\"','\"), NameLeaf('thing'"
)
)
self.assertEqual(str(rules["thing"]), "thing: NUMBER")
parser_class = make_parser(grammar)
node = parse_string("42\n", parser_class)
assert node == [
[[TokenInfo(NUMBER, string="42", start=(1, 0), end=(1, 2), line="42\n")]],
TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="42\n"),
]
node = parse_string("1, 2\n", parser_class)
assert node == [
self.assertEqual(
node,
[
[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1, 2\n")],
[TokenInfo(NUMBER, string="2", start=(1, 3), end=(1, 4), line="1, 2\n")],
[
TokenInfo(
NUMBER, string="1", start=(1, 0), end=(1, 1), line="1, 2\n"
),
TokenInfo(
NUMBER, string="2", start=(1, 3), end=(1, 4), line="1, 2\n"
),
],
TokenInfo(
NEWLINE, string="\n", start=(1, 4), end=(1, 5), line="1, 2\n"
),
],
TokenInfo(NEWLINE, string="\n", start=(1, 4), end=(1, 5), line="1, 2\n"),
]
)
def test_expr_grammar(self) -> None:
grammar = """
@ -108,10 +115,13 @@ class TestPegen(unittest.TestCase):
"""
parser_class = make_parser(grammar)
node = parse_string("42\n", parser_class)
self.assertEqual(node, [
[[TokenInfo(NUMBER, string="42", start=(1, 0), end=(1, 2), line="42\n")]],
TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="42\n"),
])
self.assertEqual(
node,
[
TokenInfo(NUMBER, string="42", start=(1, 0), end=(1, 2), line="42\n"),
TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="42\n"),
],
)
def test_optional_operator(self) -> None:
grammar = """
@ -120,22 +130,39 @@ class TestPegen(unittest.TestCase):
term: NUMBER
"""
parser_class = make_parser(grammar)
node = parse_string("1+2\n", parser_class)
self.assertEqual(node, [
node = parse_string("1 + 2\n", parser_class)
self.assertEqual(
node,
[
[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1+2\n")],
[
TokenInfo(OP, string="+", start=(1, 1), end=(1, 2), line="1+2\n"),
[TokenInfo(NUMBER, string="2", start=(1, 2), end=(1, 3), line="1+2\n")],
TokenInfo(
NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2\n"
),
[
TokenInfo(
OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2\n"
),
TokenInfo(
NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2\n"
),
],
],
TokenInfo(
NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 + 2\n"
),
],
TokenInfo(NEWLINE, string="\n", start=(1, 3), end=(1, 4), line="1+2\n"),
])
)
node = parse_string("1\n", parser_class)
self.assertEqual(node, [
[[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n")], None],
TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
])
self.assertEqual(
node,
[
[
TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n"),
None,
],
TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
],
)
def test_optional_literal(self) -> None:
grammar = """
@ -145,18 +172,29 @@ class TestPegen(unittest.TestCase):
"""
parser_class = make_parser(grammar)
node = parse_string("1+\n", parser_class)
self.assertEqual(node, [
self.assertEqual(
node,
[
[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1+\n")],
TokenInfo(OP, string="+", start=(1, 1), end=(1, 2), line="1+\n"),
[
TokenInfo(
NUMBER, string="1", start=(1, 0), end=(1, 1), line="1+\n"
),
TokenInfo(OP, string="+", start=(1, 1), end=(1, 2), line="1+\n"),
],
TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="1+\n"),
],
TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="1+\n"),
])
)
node = parse_string("1\n", parser_class)
self.assertEqual(node, [
[[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n")], None],
TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
])
self.assertEqual(
node,
[
[
TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n"),
None,
],
TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
],
)
def test_alt_optional_operator(self) -> None:
grammar = """
@ -166,21 +204,38 @@ class TestPegen(unittest.TestCase):
"""
parser_class = make_parser(grammar)
node = parse_string("1 + 2\n", parser_class)
self.assertEqual(node, [
self.assertEqual(
node,
[
[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2\n")],
[
TokenInfo(OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2\n"),
[TokenInfo(NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2\n")],
TokenInfo(
NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2\n"
),
[
TokenInfo(
OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2\n"
),
TokenInfo(
NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2\n"
),
],
],
TokenInfo(
NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 + 2\n"
),
],
TokenInfo(NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 + 2\n"),
])
)
node = parse_string("1\n", parser_class)
self.assertEqual(node, [
[[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n")], None],
TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
])
self.assertEqual(
node,
[
[
TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n"),
None,
],
TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
],
)
def test_repeat_0_simple(self) -> None:
grammar = """
@ -189,20 +244,32 @@ class TestPegen(unittest.TestCase):
"""
parser_class = make_parser(grammar)
node = parse_string("1 2 3\n", parser_class)
self.assertEqual(node, [
[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 2 3\n")],
self.assertEqual(
node,
[
[[TokenInfo(NUMBER, string="2", start=(1, 2), end=(1, 3), line="1 2 3\n")]],
[[TokenInfo(NUMBER, string="3", start=(1, 4), end=(1, 5), line="1 2 3\n")]],
TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 2 3\n"),
[
TokenInfo(
NUMBER, string="2", start=(1, 2), end=(1, 3), line="1 2 3\n"
),
TokenInfo(
NUMBER, string="3", start=(1, 4), end=(1, 5), line="1 2 3\n"
),
],
TokenInfo(
NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 2 3\n"
),
],
TokenInfo(NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 2 3\n"),
])
)
node = parse_string("1\n", parser_class)
self.assertEqual(node, [
[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n")],
[],
TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
])
self.assertEqual(
node,
[
TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n"),
[],
TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
],
)
def test_repeat_0_complex(self) -> None:
grammar = """
@ -211,24 +278,43 @@ class TestPegen(unittest.TestCase):
"""
parser_class = make_parser(grammar)
node = parse_string("1 + 2 + 3\n", parser_class)
self.assertEqual(node, [
[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2 + 3\n")],
self.assertEqual(
node,
[
TokenInfo(
NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2 + 3\n"
),
[
[
TokenInfo(OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n"),
[TokenInfo(NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2 + 3\n")],
]
],
[
TokenInfo(
OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n"
),
TokenInfo(
NUMBER,
string="2",
start=(1, 4),
end=(1, 5),
line="1 + 2 + 3\n",
),
],
[
TokenInfo(OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n"),
[TokenInfo(NUMBER, string="3", start=(1, 8), end=(1, 9), line="1 + 2 + 3\n")],
]
TokenInfo(
OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n"
),
TokenInfo(
NUMBER,
string="3",
start=(1, 8),
end=(1, 9),
line="1 + 2 + 3\n",
),
],
],
TokenInfo(
NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n"
),
],
TokenInfo(NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n"),
])
)
def test_repeat_1_simple(self) -> None:
grammar = """
@ -237,14 +323,23 @@ class TestPegen(unittest.TestCase):
"""
parser_class = make_parser(grammar)
node = parse_string("1 2 3\n", parser_class)
self.assertEqual(node, [
[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 2 3\n")],
self.assertEqual(
node,
[
[[TokenInfo(NUMBER, string="2", start=(1, 2), end=(1, 3), line="1 2 3\n")]],
[[TokenInfo(NUMBER, string="3", start=(1, 4), end=(1, 5), line="1 2 3\n")]],
TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 2 3\n"),
[
TokenInfo(
NUMBER, string="2", start=(1, 2), end=(1, 3), line="1 2 3\n"
),
TokenInfo(
NUMBER, string="3", start=(1, 4), end=(1, 5), line="1 2 3\n"
),
],
TokenInfo(
NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 2 3\n"
),
],
TokenInfo(NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 2 3\n"),
])
)
with self.assertRaises(SyntaxError):
parse_string("1\n", parser_class)
@ -255,24 +350,43 @@ class TestPegen(unittest.TestCase):
"""
parser_class = make_parser(grammar)
node = parse_string("1 + 2 + 3\n", parser_class)
self.assertEqual(node, [
[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2 + 3\n")],
self.assertEqual(
node,
[
TokenInfo(
NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2 + 3\n"
),
[
[
TokenInfo(OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n"),
[TokenInfo(NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2 + 3\n")],
]
],
[
TokenInfo(
OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n"
),
TokenInfo(
NUMBER,
string="2",
start=(1, 4),
end=(1, 5),
line="1 + 2 + 3\n",
),
],
[
TokenInfo(OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n"),
[TokenInfo(NUMBER, string="3", start=(1, 8), end=(1, 9), line="1 + 2 + 3\n")],
]
TokenInfo(
OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n"
),
TokenInfo(
NUMBER,
string="3",
start=(1, 8),
end=(1, 9),
line="1 + 2 + 3\n",
),
],
],
TokenInfo(
NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n"
),
],
TokenInfo(NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n"),
])
)
with self.assertRaises(SyntaxError):
parse_string("1\n", parser_class)
@ -283,14 +397,25 @@ class TestPegen(unittest.TestCase):
"""
parser_class = make_parser(grammar)
node = parse_string("1, 2, 3\n", parser_class)
self.assertEqual(node, [
self.assertEqual(
node,
[
[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1, 2, 3\n")],
[TokenInfo(NUMBER, string="2", start=(1, 3), end=(1, 4), line="1, 2, 3\n")],
[TokenInfo(NUMBER, string="3", start=(1, 6), end=(1, 7), line="1, 2, 3\n")],
[
TokenInfo(
NUMBER, string="1", start=(1, 0), end=(1, 1), line="1, 2, 3\n"
),
TokenInfo(
NUMBER, string="2", start=(1, 3), end=(1, 4), line="1, 2, 3\n"
),
TokenInfo(
NUMBER, string="3", start=(1, 6), end=(1, 7), line="1, 2, 3\n"
),
],
TokenInfo(
NEWLINE, string="\n", start=(1, 7), end=(1, 8), line="1, 2, 3\n"
),
],
TokenInfo(NEWLINE, string="\n", start=(1, 7), end=(1, 8), line="1, 2, 3\n"),
])
)
def test_left_recursive(self) -> None:
grammar_source = """
@ -311,18 +436,41 @@ class TestPegen(unittest.TestCase):
self.assertFalse(rules["bar"].left_recursive)
self.assertFalse(rules["baz"].left_recursive)
node = parse_string("1 + 2 + 3\n", parser_class)
self.assertEqual(node, [
self.assertEqual(
node,
[
[
[[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2 + 3\n")]],
TokenInfo(OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n"),
[TokenInfo(NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2 + 3\n")],
[
TokenInfo(
NUMBER,
string="1",
start=(1, 0),
end=(1, 1),
line="1 + 2 + 3\n",
),
TokenInfo(
OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n"
),
TokenInfo(
NUMBER,
string="2",
start=(1, 4),
end=(1, 5),
line="1 + 2 + 3\n",
),
],
TokenInfo(
OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n"
),
TokenInfo(
NUMBER, string="3", start=(1, 8), end=(1, 9), line="1 + 2 + 3\n"
),
],
TokenInfo(OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n"),
[TokenInfo(NUMBER, string="3", start=(1, 8), end=(1, 9), line="1 + 2 + 3\n")],
TokenInfo(
NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n"
),
],
TokenInfo(NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n"),
])
)
def test_python_expr(self) -> None:
grammar = """
@ -392,31 +540,79 @@ class TestPegen(unittest.TestCase):
exec(out.getvalue(), ns)
parser_class: Type[Parser] = ns["GeneratedParser"]
node = parse_string("D A C A E", parser_class)
self.assertEqual(node, [
self.assertEqual(
node,
[
[
[
[TokenInfo(type=NAME, string="D", start=(1, 0), end=(1, 1), line="D A C A E")],
TokenInfo(type=NAME, string="A", start=(1, 2), end=(1, 3), line="D A C A E"),
[
TokenInfo(
type=NAME,
string="D",
start=(1, 0),
end=(1, 1),
line="D A C A E",
),
TokenInfo(
type=NAME,
string="A",
start=(1, 2),
end=(1, 3),
line="D A C A E",
),
],
TokenInfo(
type=NAME,
string="C",
start=(1, 4),
end=(1, 5),
line="D A C A E",
),
],
TokenInfo(type=NAME, string="C", start=(1, 4), end=(1, 5), line="D A C A E"),
TokenInfo(
type=NAME,
string="A",
start=(1, 6),
end=(1, 7),
line="D A C A E",
),
],
TokenInfo(type=NAME, string="A", start=(1, 6), end=(1, 7), line="D A C A E"),
TokenInfo(
type=NAME, string="E", start=(1, 8), end=(1, 9), line="D A C A E"
),
],
TokenInfo(type=NAME, string="E", start=(1, 8), end=(1, 9), line="D A C A E"),
])
)
node = parse_string("B C A E", parser_class)
self.assertIsNotNone(node)
self.assertEqual(node, [
self.assertEqual(
node,
[
[
[TokenInfo(type=NAME, string="B", start=(1, 0), end=(1, 1), line="B C A E")],
TokenInfo(type=NAME, string="C", start=(1, 2), end=(1, 3), line="B C A E"),
[
TokenInfo(
type=NAME,
string="B",
start=(1, 0),
end=(1, 1),
line="B C A E",
),
TokenInfo(
type=NAME,
string="C",
start=(1, 2),
end=(1, 3),
line="B C A E",
),
],
TokenInfo(
type=NAME, string="A", start=(1, 4), end=(1, 5), line="B C A E"
),
],
TokenInfo(type=NAME, string="A", start=(1, 4), end=(1, 5), line="B C A E"),
TokenInfo(
type=NAME, string="E", start=(1, 6), end=(1, 7), line="B C A E"
),
],
TokenInfo(type=NAME, string="E", start=(1, 6), end=(1, 7), line="B C A E"),
])
)
def test_nasty_mutually_left_recursive(self) -> None:
# This grammar does not recognize 'x - + =', much to my chagrin.
@ -454,43 +650,44 @@ class TestPegen(unittest.TestCase):
"""
parser_class = make_parser(grammar)
node = parse_string("foo = 12 + 12 .", parser_class)
self.assertEqual(node, [
self.assertEqual(
node,
[
TokenInfo(
NAME, string="foo", start=(1, 0), end=(1, 3), line="foo = 12 + 12 ."
),
TokenInfo(
OP, string="=", start=(1, 4), end=(1, 5), line="foo = 12 + 12 ."
),
[
[TokenInfo(NAME, string="foo", start=(1, 0), end=(1, 3), line="foo = 12 + 12 .")],
TokenInfo(OP, string="=", start=(1, 4), end=(1, 5), line="foo = 12 + 12 ."),
TokenInfo(
NUMBER,
string="12",
start=(1, 6),
end=(1, 8),
line="foo = 12 + 12 .",
),
[
[
TokenInfo(
NUMBER, string="12", start=(1, 6), end=(1, 8), line="foo = 12 + 12 ."
)
],
[
[
[
TokenInfo(
OP,
string="+",
start=(1, 9),
end=(1, 10),
line="foo = 12 + 12 .",
),
[
TokenInfo(
NUMBER,
string="12",
start=(1, 11),
end=(1, 13),
line="foo = 12 + 12 .",
)
],
]
]
],
OP,
string="+",
start=(1, 9),
end=(1, 10),
line="foo = 12 + 12 .",
),
TokenInfo(
NUMBER,
string="12",
start=(1, 11),
end=(1, 13),
line="foo = 12 + 12 .",
),
]
],
]
]
])
],
],
)
def test_named_lookahead_error(self) -> None:
grammar = """
@ -533,11 +730,14 @@ class TestPegen(unittest.TestCase):
"""
parser_class = make_parser(grammar)
node = parse_string("(1)", parser_class)
self.assertEqual(node, [
TokenInfo(OP, string="(", start=(1, 0), end=(1, 1), line="(1)"),
[TokenInfo(NUMBER, string="1", start=(1, 1), end=(1, 2), line="(1)")],
TokenInfo(OP, string=")", start=(1, 2), end=(1, 3), line="(1)"),
])
self.assertEqual(
node,
[
TokenInfo(OP, string="(", start=(1, 0), end=(1, 1), line="(1)"),
TokenInfo(NUMBER, string="1", start=(1, 1), end=(1, 2), line="(1)"),
TokenInfo(OP, string=")", start=(1, 2), end=(1, 3), line="(1)"),
],
)
def test_dangling_reference(self) -> None:
grammar = """
@ -589,6 +789,124 @@ class TestPegen(unittest.TestCase):
with self.assertRaisesRegex(GrammarError, "cannot start with underscore: '_x'"):
parser_class = make_parser(grammar)
def test_soft_keyword(self) -> None:
grammar = """
start:
| "number" n=NUMBER { eval(n.string) }
| "string" n=STRING { n.string }
| SOFT_KEYWORD l=NAME n=(NUMBER | NAME | STRING) { f"{l.string} = {n.string}"}
"""
parser_class = make_parser(grammar)
self.assertEqual(parse_string("number 1", parser_class, verbose=True), 1)
self.assertEqual(parse_string("string 'b'", parser_class, verbose=True), "'b'")
self.assertEqual(
parse_string("number test 1", parser_class, verbose=True), "test = 1"
)
assert (
parse_string("string test 'b'", parser_class, verbose=True) == "test = 'b'"
)
with self.assertRaises(SyntaxError):
parse_string("test 1", parser_class, verbose=True)
def test_forced(self) -> None:
grammar = """
start: NAME &&':' | NAME
"""
parser_class = make_parser(grammar)
self.assertTrue(parse_string("number :", parser_class, verbose=True))
with self.assertRaises(SyntaxError) as e:
parse_string("a", parser_class, verbose=True)
self.assertIn("expected ':'", str(e.exception))
def test_forced_with_group(self) -> None:
grammar = """
start: NAME &&(':' | ';') | NAME
"""
parser_class = make_parser(grammar)
self.assertTrue(parse_string("number :", parser_class, verbose=True))
self.assertTrue(parse_string("number ;", parser_class, verbose=True))
with self.assertRaises(SyntaxError) as e:
parse_string("a", parser_class, verbose=True)
self.assertIn("expected (':' | ';')", e.exception.args[0])
def test_unreachable_explicit(self) -> None:
source = """
start: NAME { UNREACHABLE }
"""
grammar = parse_string(source, GrammarParser)
out = io.StringIO()
genr = PythonParserGenerator(
grammar, out, unreachable_formatting="This is a test"
)
genr.generate("<string>")
self.assertIn("This is a test", out.getvalue())
def test_unreachable_implicit1(self) -> None:
source = """
start: NAME | invalid_input
invalid_input: NUMBER { None }
"""
grammar = parse_string(source, GrammarParser)
out = io.StringIO()
genr = PythonParserGenerator(
grammar, out, unreachable_formatting="This is a test"
)
genr.generate("<string>")
self.assertIn("This is a test", out.getvalue())
def test_unreachable_implicit2(self) -> None:
source = """
start: NAME | '(' invalid_input ')'
invalid_input: NUMBER { None }
"""
grammar = parse_string(source, GrammarParser)
out = io.StringIO()
genr = PythonParserGenerator(
grammar, out, unreachable_formatting="This is a test"
)
genr.generate("<string>")
self.assertIn("This is a test", out.getvalue())
def test_unreachable_implicit3(self) -> None:
source = """
start: NAME | invalid_input { None }
invalid_input: NUMBER
"""
grammar = parse_string(source, GrammarParser)
out = io.StringIO()
genr = PythonParserGenerator(
grammar, out, unreachable_formatting="This is a test"
)
genr.generate("<string>")
self.assertNotIn("This is a test", out.getvalue())
def test_locations_in_alt_action_and_group(self) -> None:
grammar = """
start: t=term NEWLINE? $ { ast.Expression(t, LOCATIONS) }
term:
| l=term '*' r=factor { ast.BinOp(l, ast.Mult(), r, LOCATIONS) }
| l=term '/' r=factor { ast.BinOp(l, ast.Div(), r, LOCATIONS) }
| factor
factor:
| (
n=NAME { ast.Name(id=n.string, ctx=ast.Load(), LOCATIONS) } |
n=NUMBER { ast.Constant(value=ast.literal_eval(n.string), LOCATIONS) }
)
"""
parser_class = make_parser(grammar)
source = "2*3\n"
o = ast.dump(parse_string(source, parser_class).body, include_attributes=True)
p = ast.dump(ast.parse(source).body[0].value, include_attributes=True).replace(
" kind=None,", ""
)
diff = "\n".join(
difflib.unified_diff(
o.split("\n"), p.split("\n"), "cpython", "python-pegen"
)
)
self.assertFalse(diff)
class TestGrammarVisitor:
class Visitor(GrammarVisitor):

View File

@ -1176,7 +1176,7 @@ statements_rule(Parser *p)
)
{
D(fprintf(stderr, "%*c+ statements[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "statement+"));
_res = ( asdl_stmt_seq * ) _PyPegen_seq_flatten ( p , a );
_res = ( asdl_stmt_seq* ) _PyPegen_seq_flatten ( p , a );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@ -1217,7 +1217,7 @@ statement_rule(Parser *p)
)
{
D(fprintf(stderr, "%*c+ statement[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "compound_stmt"));
_res = ( asdl_stmt_seq * ) _PyPegen_singleton_seq ( p , a );
_res = ( asdl_stmt_seq* ) _PyPegen_singleton_seq ( p , a );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@ -1294,7 +1294,7 @@ statement_newline_rule(Parser *p)
)
{
D(fprintf(stderr, "%*c+ statement_newline[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "compound_stmt NEWLINE"));
_res = ( asdl_stmt_seq * ) _PyPegen_singleton_seq ( p , a );
_res = ( asdl_stmt_seq* ) _PyPegen_singleton_seq ( p , a );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@ -1346,7 +1346,7 @@ statement_newline_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro
_res = ( asdl_stmt_seq * ) _PyPegen_singleton_seq ( p , CHECK ( stmt_ty , _PyAST_Pass ( EXTRA ) ) );
_res = ( asdl_stmt_seq* ) _PyPegen_singleton_seq ( p , CHECK ( stmt_ty , _PyAST_Pass ( EXTRA ) ) );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@ -1416,7 +1416,7 @@ simple_stmts_rule(Parser *p)
)
{
D(fprintf(stderr, "%*c+ simple_stmts[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "simple_stmt !';' NEWLINE"));
_res = ( asdl_stmt_seq * ) _PyPegen_singleton_seq ( p , a );
_res = ( asdl_stmt_seq* ) _PyPegen_singleton_seq ( p , a );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@ -2403,7 +2403,7 @@ augassign_rule(Parser *p)
)
{
D(fprintf(stderr, "%*c+ augassign[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'@='"));
_res = CHECK_VERSION ( AugOperator * , 5 , "The '@' operator is" , _PyPegen_augoperator ( p , MatMult ) );
_res = CHECK_VERSION ( AugOperator* , 5 , "The '@' operator is" , _PyPegen_augoperator ( p , MatMult ) );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@ -2841,7 +2841,7 @@ global_stmt_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro
_res = _PyAST_Global ( CHECK ( asdl_identifier_seq * , _PyPegen_map_names_to_ids ( p , a ) ) , EXTRA );
_res = _PyAST_Global ( CHECK ( asdl_identifier_seq* , _PyPegen_map_names_to_ids ( p , a ) ) , EXTRA );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@ -2903,7 +2903,7 @@ nonlocal_stmt_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro
_res = _PyAST_Nonlocal ( CHECK ( asdl_identifier_seq * , _PyPegen_map_names_to_ids ( p , a ) ) , EXTRA );
_res = _PyAST_Nonlocal ( CHECK ( asdl_identifier_seq* , _PyPegen_map_names_to_ids ( p , a ) ) , EXTRA );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@ -3460,7 +3460,7 @@ import_from_targets_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro
_res = ( asdl_alias_seq * ) _PyPegen_singleton_seq ( p , CHECK ( alias_ty , _PyPegen_alias_for_star ( p , EXTRA ) ) );
_res = ( asdl_alias_seq* ) _PyPegen_singleton_seq ( p , CHECK ( alias_ty , _PyPegen_alias_for_star ( p , EXTRA ) ) );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@ -4649,7 +4649,7 @@ slash_with_default_rule(Parser *p)
)
{
D(fprintf(stderr, "%*c+ slash_with_default[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "param_no_default* param_with_default+ '/' ','"));
_res = _PyPegen_slash_with_default ( p , ( asdl_arg_seq * ) a , b );
_res = _PyPegen_slash_with_default ( p , ( asdl_arg_seq* ) a , b );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@ -4681,7 +4681,7 @@ slash_with_default_rule(Parser *p)
)
{
D(fprintf(stderr, "%*c+ slash_with_default[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "param_no_default* param_with_default+ '/' &')'"));
_res = _PyPegen_slash_with_default ( p , ( asdl_arg_seq * ) a , b );
_res = _PyPegen_slash_with_default ( p , ( asdl_arg_seq* ) a , b );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@ -5340,7 +5340,7 @@ if_stmt_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro
_res = _PyAST_If ( a , b , CHECK ( asdl_stmt_seq * , _PyPegen_singleton_seq ( p , c ) ) , EXTRA );
_res = _PyAST_If ( a , b , CHECK ( asdl_stmt_seq* , _PyPegen_singleton_seq ( p , c ) ) , EXTRA );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@ -5478,7 +5478,7 @@ elif_stmt_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro
_res = _PyAST_If ( a , b , CHECK ( asdl_stmt_seq * , _PyPegen_singleton_seq ( p , c ) ) , EXTRA );
_res = _PyAST_If ( a , b , CHECK ( asdl_stmt_seq* , _PyPegen_singleton_seq ( p , c ) ) , EXTRA );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@ -6756,7 +6756,7 @@ subject_expr_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro
_res = _PyAST_Tuple ( CHECK ( asdl_expr_seq * , _PyPegen_seq_insert_in_front ( p , value , values ) ) , Load , EXTRA );
_res = _PyAST_Tuple ( CHECK ( asdl_expr_seq* , _PyPegen_seq_insert_in_front ( p , value , values ) ) , Load , EXTRA );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@ -9049,7 +9049,7 @@ mapping_pattern_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro
_res = _PyAST_MatchMapping ( CHECK ( asdl_expr_seq * , _PyPegen_get_pattern_keys ( p , items ) ) , CHECK ( asdl_pattern_seq * , _PyPegen_get_patterns ( p , items ) ) , rest -> v . Name . id , EXTRA );
_res = _PyAST_MatchMapping ( CHECK ( asdl_expr_seq* , _PyPegen_get_pattern_keys ( p , items ) ) , CHECK ( asdl_pattern_seq* , _PyPegen_get_patterns ( p , items ) ) , rest -> v . Name . id , EXTRA );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@ -9092,7 +9092,7 @@ mapping_pattern_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro
_res = _PyAST_MatchMapping ( CHECK ( asdl_expr_seq * , _PyPegen_get_pattern_keys ( p , items ) ) , CHECK ( asdl_pattern_seq * , _PyPegen_get_patterns ( p , items ) ) , NULL , EXTRA );
_res = _PyAST_MatchMapping ( CHECK ( asdl_expr_seq* , _PyPegen_get_pattern_keys ( p , items ) ) , CHECK ( asdl_pattern_seq* , _PyPegen_get_patterns ( p , items ) ) , NULL , EXTRA );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@ -9381,7 +9381,7 @@ class_pattern_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro
_res = _PyAST_MatchClass ( cls , NULL , CHECK ( asdl_identifier_seq * , _PyPegen_map_names_to_ids ( p , CHECK ( asdl_expr_seq * , _PyPegen_get_pattern_keys ( p , keywords ) ) ) ) , CHECK ( asdl_pattern_seq * , _PyPegen_get_patterns ( p , keywords ) ) , EXTRA );
_res = _PyAST_MatchClass ( cls , NULL , CHECK ( asdl_identifier_seq* , _PyPegen_map_names_to_ids ( p , CHECK ( asdl_expr_seq* , _PyPegen_get_pattern_keys ( p , keywords ) ) ) ) , CHECK ( asdl_pattern_seq* , _PyPegen_get_patterns ( p , keywords ) ) , EXTRA );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@ -9433,7 +9433,7 @@ class_pattern_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro
_res = _PyAST_MatchClass ( cls , patterns , CHECK ( asdl_identifier_seq * , _PyPegen_map_names_to_ids ( p , CHECK ( asdl_expr_seq * , _PyPegen_get_pattern_keys ( p , keywords ) ) ) ) , CHECK ( asdl_pattern_seq * , _PyPegen_get_patterns ( p , keywords ) ) , EXTRA );
_res = _PyAST_MatchClass ( cls , patterns , CHECK ( asdl_identifier_seq* , _PyPegen_map_names_to_ids ( p , CHECK ( asdl_expr_seq* , _PyPegen_get_pattern_keys ( p , keywords ) ) ) ) , CHECK ( asdl_pattern_seq* , _PyPegen_get_patterns ( p , keywords ) ) , EXTRA );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@ -9642,7 +9642,7 @@ expressions_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro
_res = _PyAST_Tuple ( CHECK ( asdl_expr_seq * , _PyPegen_seq_insert_in_front ( p , a , b ) ) , Load , EXTRA );
_res = _PyAST_Tuple ( CHECK ( asdl_expr_seq* , _PyPegen_seq_insert_in_front ( p , a , b ) ) , Load , EXTRA );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@ -9678,7 +9678,7 @@ expressions_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro
_res = _PyAST_Tuple ( CHECK ( asdl_expr_seq * , _PyPegen_singleton_seq ( p , a ) ) , Load , EXTRA );
_res = _PyAST_Tuple ( CHECK ( asdl_expr_seq* , _PyPegen_singleton_seq ( p , a ) ) , Load , EXTRA );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@ -10004,7 +10004,7 @@ star_expressions_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro
_res = _PyAST_Tuple ( CHECK ( asdl_expr_seq * , _PyPegen_seq_insert_in_front ( p , a , b ) ) , Load , EXTRA );
_res = _PyAST_Tuple ( CHECK ( asdl_expr_seq* , _PyPegen_seq_insert_in_front ( p , a , b ) ) , Load , EXTRA );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@ -10040,7 +10040,7 @@ star_expressions_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro
_res = _PyAST_Tuple ( CHECK ( asdl_expr_seq * , _PyPegen_singleton_seq ( p , a ) ) , Load , EXTRA );
_res = _PyAST_Tuple ( CHECK ( asdl_expr_seq* , _PyPegen_singleton_seq ( p , a ) ) , Load , EXTRA );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@ -10485,7 +10485,7 @@ disjunction_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro
_res = _PyAST_BoolOp ( Or , CHECK ( asdl_expr_seq * , _PyPegen_seq_insert_in_front ( p , a , b ) ) , EXTRA );
_res = _PyAST_BoolOp ( Or , CHECK ( asdl_expr_seq* , _PyPegen_seq_insert_in_front ( p , a , b ) ) , EXTRA );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@ -10571,7 +10571,7 @@ conjunction_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro
_res = _PyAST_BoolOp ( And , CHECK ( asdl_expr_seq * , _PyPegen_seq_insert_in_front ( p , a , b ) ) , EXTRA );
_res = _PyAST_BoolOp ( And , CHECK ( asdl_expr_seq* , _PyPegen_seq_insert_in_front ( p , a , b ) ) , EXTRA );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@ -10739,7 +10739,7 @@ comparison_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro
_res = _PyAST_Compare ( a , CHECK ( asdl_int_seq * , _PyPegen_get_cmpops ( p , b ) ) , CHECK ( asdl_expr_seq * , _PyPegen_get_exprs ( p , b ) ) , EXTRA );
_res = _PyAST_Compare ( a , CHECK ( asdl_int_seq* , _PyPegen_get_cmpops ( p , b ) ) , CHECK ( asdl_expr_seq* , _PyPegen_get_exprs ( p , b ) ) , EXTRA );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@ -12837,7 +12837,7 @@ primary_raw(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro
_res = _PyAST_Call ( a , CHECK ( asdl_expr_seq * , ( asdl_expr_seq * ) _PyPegen_singleton_seq ( p , b ) ) , NULL , EXTRA );
_res = _PyAST_Call ( a , CHECK ( asdl_expr_seq* , ( asdl_expr_seq* ) _PyPegen_singleton_seq ( p , b ) ) , NULL , EXTRA );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@ -13896,7 +13896,7 @@ lambda_slash_with_default_rule(Parser *p)
)
{
D(fprintf(stderr, "%*c+ lambda_slash_with_default[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "lambda_param_no_default* lambda_param_with_default+ '/' ','"));
_res = _PyPegen_slash_with_default ( p , ( asdl_arg_seq * ) a , b );
_res = _PyPegen_slash_with_default ( p , ( asdl_arg_seq* ) a , b );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@ -13928,7 +13928,7 @@ lambda_slash_with_default_rule(Parser *p)
)
{
D(fprintf(stderr, "%*c+ lambda_slash_with_default[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "lambda_param_no_default* lambda_param_with_default+ '/' &':'"));
_res = _PyPegen_slash_with_default ( p , ( asdl_arg_seq * ) a , b );
_res = _PyPegen_slash_with_default ( p , ( asdl_arg_seq* ) a , b );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@ -14689,7 +14689,7 @@ dict_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro
_res = _PyAST_Dict ( CHECK ( asdl_expr_seq * , _PyPegen_get_keys ( p , a ) ) , CHECK ( asdl_expr_seq * , _PyPegen_get_values ( p , a ) ) , EXTRA );
_res = _PyAST_Dict ( CHECK ( asdl_expr_seq* , _PyPegen_get_keys ( p , a ) ) , CHECK ( asdl_expr_seq* , _PyPegen_get_values ( p , a ) ) , EXTRA );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@ -15556,7 +15556,7 @@ args_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro
_res = _PyAST_Call ( _PyPegen_dummy_name ( p ) , CHECK_NULL_ALLOWED ( asdl_expr_seq * , _PyPegen_seq_extract_starred_exprs ( p , a ) ) , CHECK_NULL_ALLOWED ( asdl_keyword_seq * , _PyPegen_seq_delete_starred_exprs ( p , a ) ) , EXTRA );
_res = _PyAST_Call ( _PyPegen_dummy_name ( p ) , CHECK_NULL_ALLOWED ( asdl_expr_seq* , _PyPegen_seq_extract_starred_exprs ( p , a ) ) , CHECK_NULL_ALLOWED ( asdl_keyword_seq* , _PyPegen_seq_delete_starred_exprs ( p , a ) ) , EXTRA );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@ -16026,7 +16026,7 @@ star_targets_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro
_res = _PyAST_Tuple ( CHECK ( asdl_expr_seq * , _PyPegen_seq_insert_in_front ( p , a , b ) ) , Store , EXTRA );
_res = _PyAST_Tuple ( CHECK ( asdl_expr_seq* , _PyPegen_seq_insert_in_front ( p , a , b ) ) , Store , EXTRA );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@ -16119,7 +16119,7 @@ star_targets_tuple_seq_rule(Parser *p)
)
{
D(fprintf(stderr, "%*c+ star_targets_tuple_seq[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "star_target ((',' star_target))+ ','?"));
_res = ( asdl_expr_seq * ) _PyPegen_seq_insert_in_front ( p , a , b );
_res = ( asdl_expr_seq* ) _PyPegen_seq_insert_in_front ( p , a , b );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@ -16146,7 +16146,7 @@ star_targets_tuple_seq_rule(Parser *p)
)
{
D(fprintf(stderr, "%*c+ star_targets_tuple_seq[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "star_target ','"));
_res = ( asdl_expr_seq * ) _PyPegen_singleton_seq ( p , a );
_res = ( asdl_expr_seq* ) _PyPegen_singleton_seq ( p , a );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@ -16923,7 +16923,7 @@ t_primary_raw(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro
_res = _PyAST_Call ( a , CHECK ( asdl_expr_seq * , ( asdl_expr_seq * ) _PyPegen_singleton_seq ( p , b ) ) , NULL , EXTRA );
_res = _PyAST_Call ( a , CHECK ( asdl_expr_seq* , ( asdl_expr_seq* ) _PyPegen_singleton_seq ( p , b ) ) , NULL , EXTRA );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@ -17474,7 +17474,7 @@ type_expressions_rule(Parser *p)
)
{
D(fprintf(stderr, "%*c+ type_expressions[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "','.expression+ ',' '*' expression ',' '**' expression"));
_res = ( asdl_expr_seq * ) _PyPegen_seq_append_to_end ( p , CHECK ( asdl_seq * , _PyPegen_seq_append_to_end ( p , a , b ) ) , c );
_res = ( asdl_expr_seq* ) _PyPegen_seq_append_to_end ( p , CHECK ( asdl_seq* , _PyPegen_seq_append_to_end ( p , a , b ) ) , c );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@ -17507,7 +17507,7 @@ type_expressions_rule(Parser *p)
)
{
D(fprintf(stderr, "%*c+ type_expressions[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "','.expression+ ',' '*' expression"));
_res = ( asdl_expr_seq * ) _PyPegen_seq_append_to_end ( p , a , b );
_res = ( asdl_expr_seq* ) _PyPegen_seq_append_to_end ( p , a , b );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@ -17540,7 +17540,7 @@ type_expressions_rule(Parser *p)
)
{
D(fprintf(stderr, "%*c+ type_expressions[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "','.expression+ ',' '**' expression"));
_res = ( asdl_expr_seq * ) _PyPegen_seq_append_to_end ( p , a , b );
_res = ( asdl_expr_seq* ) _PyPegen_seq_append_to_end ( p , a , b );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@ -17576,7 +17576,7 @@ type_expressions_rule(Parser *p)
)
{
D(fprintf(stderr, "%*c+ type_expressions[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'*' expression ',' '**' expression"));
_res = ( asdl_expr_seq * ) _PyPegen_seq_append_to_end ( p , CHECK ( asdl_seq * , _PyPegen_singleton_seq ( p , a ) ) , b );
_res = ( asdl_expr_seq* ) _PyPegen_seq_append_to_end ( p , CHECK ( asdl_seq* , _PyPegen_singleton_seq ( p , a ) ) , b );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@ -17603,7 +17603,7 @@ type_expressions_rule(Parser *p)
)
{
D(fprintf(stderr, "%*c+ type_expressions[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'*' expression"));
_res = ( asdl_expr_seq * ) _PyPegen_singleton_seq ( p , a );
_res = ( asdl_expr_seq* ) _PyPegen_singleton_seq ( p , a );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@ -17630,7 +17630,7 @@ type_expressions_rule(Parser *p)
)
{
D(fprintf(stderr, "%*c+ type_expressions[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'**' expression"));
_res = ( asdl_expr_seq * ) _PyPegen_singleton_seq ( p , a );
_res = ( asdl_expr_seq* ) _PyPegen_singleton_seq ( p , a );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);
@ -20149,7 +20149,7 @@ invalid_match_stmt_rule(Parser *p)
)
{
D(fprintf(stderr, "%*c+ invalid_match_stmt[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "\"match\" subject_expr !':'"));
_res = CHECK_VERSION ( void * , 10 , "Pattern matching is" , RAISE_SYNTAX_ERROR ( "expected ':'" ) );
_res = CHECK_VERSION ( void* , 10 , "Pattern matching is" , RAISE_SYNTAX_ERROR ( "expected ':'" ) );
if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1;
D(p->level--);

View File

@ -897,6 +897,19 @@ _PyPegen_expect_token(Parser *p, int type)
return t;
}
void*
_PyPegen_expect_forced_result(Parser *p, void* result, const char* expected) {
if (p->error_indicator == 1) {
return NULL;
}
if (result == NULL) {
RAISE_SYNTAX_ERROR("expected (%s)", expected);
return NULL;
}
return result;
}
Token *
_PyPegen_expect_forced_token(Parser *p, int type, const char* expected) {

View File

@ -130,6 +130,7 @@ int _PyPegen_lookahead_with_string(int , expr_ty (func)(Parser *, const char*),
int _PyPegen_lookahead(int, void *(func)(Parser *), Parser *);
Token *_PyPegen_expect_token(Parser *p, int type);
void* _PyPegen_expect_forced_result(Parser *p, void* result, const char* expected);
Token *_PyPegen_expect_forced_token(Parser *p, int type, const char* expected);
expr_ty _PyPegen_expect_soft_keyword(Parser *p, const char *keyword);
expr_ty _PyPegen_soft_keyword_token(Parser *p);

View File

@ -1,5 +1,5 @@
[mypy]
files = pegen, scripts
files = pegen
follow_imports = error
no_implicit_optional = True

View File

@ -100,7 +100,9 @@ c_parser.add_argument(
"--optimized", action="store_true", help="Compile the extension in optimized mode"
)
c_parser.add_argument(
"--skip-actions", action="store_true", help="Suppress code emission for rule actions",
"--skip-actions",
action="store_true",
help="Suppress code emission for rule actions",
)
python_parser = subparsers.add_parser("python", help="Generate Python code")
@ -114,7 +116,9 @@ python_parser.add_argument(
help="Where to write the generated parser",
)
python_parser.add_argument(
"--skip-actions", action="store_true", help="Suppress code emission for rule actions",
"--skip-actions",
action="store_true",
help="Suppress code emission for rule actions",
)

View File

@ -6,9 +6,17 @@ always fail. We rely on string comparison of the base classes instead.
TODO: Remove the above-described hack.
"""
from typing import Any, Optional, Tuple
def ast_dump(node, annotate_fields=True, include_attributes=False, *, indent=None):
def _format(node, level=0):
def ast_dump(
node: Any,
annotate_fields: bool = True,
include_attributes: bool = False,
*,
indent: Optional[str] = None,
) -> str:
def _format(node: Any, level: int = 0) -> Tuple[str, bool]:
if indent is not None:
level += 1
prefix = "\n" + indent * level

View File

@ -58,7 +58,7 @@ def compile_c_extension(
extra_compile_args = get_extra_flags("CFLAGS", "PY_CFLAGS_NODIST")
extra_compile_args.append("-DPy_BUILD_CORE_MODULE")
# Define _Py_TEST_PEGEN to not call PyAST_Validate() in Parser/pegen.c
extra_compile_args.append('-D_Py_TEST_PEGEN')
extra_compile_args.append("-D_Py_TEST_PEGEN")
extra_link_args = get_extra_flags("LDFLAGS", "PY_LDFLAGS_NODIST")
if keep_asserts:
extra_compile_args.append("-UNDEBUG")
@ -175,7 +175,10 @@ def build_c_generator(
def build_python_generator(
grammar: Grammar, grammar_file: str, output_file: str, skip_actions: bool = False,
grammar: Grammar,
grammar_file: str,
output_file: str,
skip_actions: bool = False,
) -> ParserGenerator:
with open(output_file, "w") as file:
gen: ParserGenerator = PythonParserGenerator(grammar, file) # TODO: skip_actions
@ -246,5 +249,10 @@ def build_python_parser_and_generator(
skip_actions (bool, optional): Whether to pretend no rule has any actions.
"""
grammar, parser, tokenizer = build_parser(grammar_file, verbose_tokenizer, verbose_parser)
gen = build_python_generator(grammar, grammar_file, output_file, skip_actions=skip_actions,)
gen = build_python_generator(
grammar,
grammar_file,
output_file,
skip_actions=skip_actions,
)
return grammar, parser, tokenizer, gen

View File

@ -12,6 +12,7 @@ from pegen.grammar import (
Gather,
GrammarVisitor,
Group,
Leaf,
Lookahead,
NamedItem,
NameLeaf,
@ -91,7 +92,16 @@ class FunctionCall:
parts.append(", 1")
if self.assigned_variable:
if self.assigned_variable_type:
parts = ["(", self.assigned_variable, " = ", '(', self.assigned_variable_type, ')', *parts, ")"]
parts = [
"(",
self.assigned_variable,
" = ",
"(",
self.assigned_variable_type,
")",
*parts,
")",
]
else:
parts = ["(", self.assigned_variable, " = ", *parts, ")"]
if self.comment:
@ -256,9 +266,10 @@ class CCallMakerVisitor(GrammarVisitor):
def visit_Forced(self, node: Forced) -> FunctionCall:
call = self.generate_call(node.node)
if call.nodetype == NodeTypes.GENERIC_TOKEN:
if isinstance(node.node, Leaf):
assert isinstance(node.node, Leaf)
val = ast.literal_eval(node.node.value)
assert val in self.exact_tokens, f"{node.value} is not a known literal"
assert val in self.exact_tokens, f"{node.node.value} is not a known literal"
type = self.exact_tokens[val]
return FunctionCall(
assigned_variable="_literal",
@ -268,9 +279,19 @@ class CCallMakerVisitor(GrammarVisitor):
return_type="Token *",
comment=f"forced_token='{val}'",
)
if isinstance(node.node, Group):
call = self.visit(node.node.rhs)
call.assigned_variable = None
call.comment = None
return FunctionCall(
assigned_variable="_literal",
function=f"_PyPegen_expect_forced_result",
arguments=["p", str(call), f'"{node.node.rhs!s}"'],
return_type="void *",
comment=f"forced_token=({node.node.rhs!s})",
)
else:
raise NotImplementedError(
f"Forced tokens don't work with {call.nodetype} tokens")
raise NotImplementedError(f"Forced tokens don't work with {node.node} nodes")
def visit_Opt(self, node: Opt) -> FunctionCall:
call = self.generate_call(node.node)
@ -347,7 +368,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
debug: bool = False,
skip_actions: bool = False,
):
super().__init__(grammar, tokens, file)
super().__init__(grammar, set(tokens.values()), file)
self.callmakervisitor: CCallMakerVisitor = CCallMakerVisitor(
self, exact_tokens, non_exact_tokens
)
@ -386,7 +407,11 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
self.print(f"goto {goto_target};")
self.print(f"}}")
def out_of_memory_return(self, expr: str, cleanup_code: Optional[str] = None,) -> None:
def out_of_memory_return(
self,
expr: str,
cleanup_code: Optional[str] = None,
) -> None:
self.print(f"if ({expr}) {{")
with self.indent():
if cleanup_code is not None:
@ -568,7 +593,10 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
if any(alt.action and "EXTRA" in alt.action for alt in rhs.alts):
self._set_up_token_start_metadata_extraction()
self.visit(
rhs, is_loop=False, is_gather=node.is_gather(), rulename=node.name,
rhs,
is_loop=False,
is_gather=node.is_gather(),
rulename=node.name,
)
if self.debug:
self.print(f'D(fprintf(stderr, "Fail at %d: {node.name}\\n", p->mark));')
@ -601,7 +629,10 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
if any(alt.action and "EXTRA" in alt.action for alt in rhs.alts):
self._set_up_token_start_metadata_extraction()
self.visit(
rhs, is_loop=True, is_gather=node.is_gather(), rulename=node.name,
rhs,
is_loop=True,
is_gather=node.is_gather(),
rulename=node.name,
)
if is_repeat1:
self.print("if (_n == 0 || p->error_indicator) {")
@ -771,7 +802,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
def visit_Alt(
self, node: Alt, is_loop: bool, is_gather: bool, rulename: Optional[str]
) -> None:
if len(node.items) == 1 and str(node.items[0]).startswith('invalid_'):
if len(node.items) == 1 and str(node.items[0]).startswith("invalid_"):
self.print(f"if (p->call_invalid_rules) {{ // {node}")
else:
self.print(f"{{ // {node}")
@ -791,7 +822,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
if v == "_cut_var":
v += " = 0" # cut_var must be initialized
self.print(f"{var_type}{v};")
if v.startswith("_opt_var"):
if v and v.startswith("_opt_var"):
self.print(f"UNUSED({v}); // Silence compiler warnings")
with self.local_variable_context():

View File

@ -29,7 +29,8 @@ from pegen.grammar import (
)
argparser = argparse.ArgumentParser(
prog="calculate_first_sets", description="Calculate the first sets of a grammar",
prog="calculate_first_sets",
description="Calculate the first sets of a grammar",
)
argparser.add_argument("grammar_file", help="The grammar file")

View File

@ -2,7 +2,10 @@
# @generated by pegen from ./Tools/peg_generator/pegen/metagrammar.gram
import ast
from typing import Optional, Any
import sys
import tokenize
from typing import Any, Optional
from pegen.parser import memoize, memoize_left_rec, logger, Parser
from ast import literal_eval
@ -35,83 +38,71 @@ from pegen.grammar import (
StringLeaf,
)
# Keywords and soft keywords are listed at the end of the parser definition.
class GeneratedParser(Parser):
@memoize
def start(self) -> Optional[Grammar]:
# start: grammar $
mark = self.mark()
cut = False
mark = self._mark()
if (
(grammar := self.grammar())
and
(endmarker := self.expect('ENDMARKER'))
(_endmarker := self.expect('ENDMARKER'))
):
return grammar
self.reset(mark)
if cut: return None
self._reset(mark)
return None
@memoize
def grammar(self) -> Optional[Grammar]:
# grammar: metas rules | rules
mark = self.mark()
cut = False
mark = self._mark()
if (
(metas := self.metas())
and
(rules := self.rules())
):
return Grammar ( rules , metas )
self.reset(mark)
if cut: return None
cut = False
self._reset(mark)
if (
(rules := self.rules())
):
return Grammar ( rules , [ ] )
self.reset(mark)
if cut: return None
return Grammar ( rules , [] )
self._reset(mark)
return None
@memoize
def metas(self) -> Optional[MetaList]:
# metas: meta metas | meta
mark = self.mark()
cut = False
mark = self._mark()
if (
(meta := self.meta())
and
(metas := self.metas())
):
return [ meta ] + metas
self.reset(mark)
if cut: return None
cut = False
return [meta] + metas
self._reset(mark)
if (
(meta := self.meta())
):
return [ meta ]
self.reset(mark)
if cut: return None
return [meta]
self._reset(mark)
return None
@memoize
def meta(self) -> Optional[MetaTuple]:
# meta: "@" NAME NEWLINE | "@" NAME NAME NEWLINE | "@" NAME STRING NEWLINE
mark = self.mark()
cut = False
mark = self._mark()
if (
(literal := self.expect("@"))
and
(name := self.name())
and
(newline := self.expect('NEWLINE'))
(_newline := self.expect('NEWLINE'))
):
return ( name . string , None )
self.reset(mark)
if cut: return None
cut = False
self._reset(mark)
if (
(literal := self.expect("@"))
and
@ -119,12 +110,10 @@ class GeneratedParser(Parser):
and
(b := self.name())
and
(newline := self.expect('NEWLINE'))
(_newline := self.expect('NEWLINE'))
):
return ( a . string , b . string )
self.reset(mark)
if cut: return None
cut = False
self._reset(mark)
if (
(literal := self.expect("@"))
and
@ -132,40 +121,34 @@ class GeneratedParser(Parser):
and
(string := self.string())
and
(newline := self.expect('NEWLINE'))
(_newline := self.expect('NEWLINE'))
):
return ( name . string , literal_eval ( string . string ) )
self.reset(mark)
if cut: return None
self._reset(mark)
return None
@memoize
def rules(self) -> Optional[RuleList]:
# rules: rule rules | rule
mark = self.mark()
cut = False
mark = self._mark()
if (
(rule := self.rule())
and
(rules := self.rules())
):
return [ rule ] + rules
self.reset(mark)
if cut: return None
cut = False
return [rule] + rules
self._reset(mark)
if (
(rule := self.rule())
):
return [ rule ]
self.reset(mark)
if cut: return None
return [rule]
self._reset(mark)
return None
@memoize
def rule(self) -> Optional[Rule]:
# rule: rulename memoflag? ":" alts NEWLINE INDENT more_alts DEDENT | rulename memoflag? ":" NEWLINE INDENT more_alts DEDENT | rulename memoflag? ":" alts NEWLINE
mark = self.mark()
cut = False
mark = self._mark()
if (
(rulename := self.rulename())
and
@ -175,18 +158,16 @@ class GeneratedParser(Parser):
and
(alts := self.alts())
and
(newline := self.expect('NEWLINE'))
(_newline := self.expect('NEWLINE'))
and
(indent := self.expect('INDENT'))
(_indent := self.expect('INDENT'))
and
(more_alts := self.more_alts())
and
(dedent := self.expect('DEDENT'))
(_dedent := self.expect('DEDENT'))
):
return Rule ( rulename [ 0 ] , rulename [ 1 ] , Rhs ( alts . alts + more_alts . alts ) , memo = opt )
self.reset(mark)
if cut: return None
cut = False
return Rule ( rulename [0] , rulename [1] , Rhs ( alts . alts + more_alts . alts ) , memo = opt )
self._reset(mark)
if (
(rulename := self.rulename())
and
@ -194,18 +175,16 @@ class GeneratedParser(Parser):
and
(literal := self.expect(":"))
and
(newline := self.expect('NEWLINE'))
(_newline := self.expect('NEWLINE'))
and
(indent := self.expect('INDENT'))
(_indent := self.expect('INDENT'))
and
(more_alts := self.more_alts())
and
(dedent := self.expect('DEDENT'))
(_dedent := self.expect('DEDENT'))
):
return Rule ( rulename [ 0 ] , rulename [ 1 ] , more_alts , memo = opt )
self.reset(mark)
if cut: return None
cut = False
return Rule ( rulename [0] , rulename [1] , more_alts , memo = opt )
self._reset(mark)
if (
(rulename := self.rulename())
and
@ -215,76 +194,49 @@ class GeneratedParser(Parser):
and
(alts := self.alts())
and
(newline := self.expect('NEWLINE'))
(_newline := self.expect('NEWLINE'))
):
return Rule ( rulename [ 0 ] , rulename [ 1 ] , alts , memo = opt )
self.reset(mark)
if cut: return None
return Rule ( rulename [0] , rulename [1] , alts , memo = opt )
self._reset(mark)
return None
@memoize
def rulename(self) -> Optional[RuleName]:
# rulename: NAME '[' NAME '*' ']' | NAME '[' NAME ']' | NAME
mark = self.mark()
cut = False
# rulename: NAME annotation | NAME
mark = self._mark()
if (
(name := self.name())
and
(literal := self.expect('['))
and
(type := self.name())
and
(literal_1 := self.expect('*'))
and
(literal_2 := self.expect(']'))
(annotation := self.annotation())
):
return ( name . string , type . string + "*" )
self.reset(mark)
if cut: return None
cut = False
if (
(name := self.name())
and
(literal := self.expect('['))
and
(type := self.name())
and
(literal_1 := self.expect(']'))
):
return ( name . string , type . string )
self.reset(mark)
if cut: return None
cut = False
return ( name . string , annotation )
self._reset(mark)
if (
(name := self.name())
):
return ( name . string , None )
self.reset(mark)
if cut: return None
self._reset(mark)
return None
@memoize
def memoflag(self) -> Optional[str]:
# memoflag: '(' 'memo' ')'
mark = self.mark()
cut = False
# memoflag: '(' "memo" ')'
mark = self._mark()
if (
(literal := self.expect('('))
and
(literal_1 := self.expect('memo'))
(literal_1 := self.expect("memo"))
and
(literal_2 := self.expect(')'))
):
return "memo"
self.reset(mark)
if cut: return None
self._reset(mark)
return None
@memoize
def alts(self) -> Optional[Rhs]:
# alts: alt "|" alts | alt
mark = self.mark()
cut = False
mark = self._mark()
if (
(alt := self.alt())
and
@ -292,53 +244,45 @@ class GeneratedParser(Parser):
and
(alts := self.alts())
):
return Rhs ( [ alt ] + alts . alts )
self.reset(mark)
if cut: return None
cut = False
return Rhs ( [alt] + alts . alts )
self._reset(mark)
if (
(alt := self.alt())
):
return Rhs ( [ alt ] )
self.reset(mark)
if cut: return None
return Rhs ( [alt] )
self._reset(mark)
return None
@memoize
def more_alts(self) -> Optional[Rhs]:
# more_alts: "|" alts NEWLINE more_alts | "|" alts NEWLINE
mark = self.mark()
cut = False
mark = self._mark()
if (
(literal := self.expect("|"))
and
(alts := self.alts())
and
(newline := self.expect('NEWLINE'))
(_newline := self.expect('NEWLINE'))
and
(more_alts := self.more_alts())
):
return Rhs ( alts . alts + more_alts . alts )
self.reset(mark)
if cut: return None
cut = False
self._reset(mark)
if (
(literal := self.expect("|"))
and
(alts := self.alts())
and
(newline := self.expect('NEWLINE'))
(_newline := self.expect('NEWLINE'))
):
return Rhs ( alts . alts )
self.reset(mark)
if cut: return None
self._reset(mark)
return None
@memoize
def alt(self) -> Optional[Alt]:
# alt: items '$' action | items '$' | items action | items
mark = self.mark()
cut = False
mark = self._mark()
if (
(items := self.items())
and
@ -346,101 +290,65 @@ class GeneratedParser(Parser):
and
(action := self.action())
):
return Alt ( items + [ NamedItem ( None , NameLeaf ( 'ENDMARKER' ) ) ] , action = action )
self.reset(mark)
if cut: return None
cut = False
return Alt ( items + [NamedItem ( None , NameLeaf ( 'ENDMARKER' ) )] , action = action )
self._reset(mark)
if (
(items := self.items())
and
(literal := self.expect('$'))
):
return Alt ( items + [ NamedItem ( None , NameLeaf ( 'ENDMARKER' ) ) ] , action = None )
self.reset(mark)
if cut: return None
cut = False
return Alt ( items + [NamedItem ( None , NameLeaf ( 'ENDMARKER' ) )] , action = None )
self._reset(mark)
if (
(items := self.items())
and
(action := self.action())
):
return Alt ( items , action = action )
self.reset(mark)
if cut: return None
cut = False
self._reset(mark)
if (
(items := self.items())
):
return Alt ( items , action = None )
self.reset(mark)
if cut: return None
self._reset(mark)
return None
@memoize
def items(self) -> Optional[NamedItemList]:
# items: named_item items | named_item
mark = self.mark()
cut = False
mark = self._mark()
if (
(named_item := self.named_item())
and
(items := self.items())
):
return [ named_item ] + items
self.reset(mark)
if cut: return None
cut = False
return [named_item] + items
self._reset(mark)
if (
(named_item := self.named_item())
):
return [ named_item ]
self.reset(mark)
if cut: return None
return [named_item]
self._reset(mark)
return None
@memoize
def named_item(self) -> Optional[NamedItem]:
# named_item: NAME '[' NAME '*' ']' '=' ~ item | NAME '[' NAME ']' '=' ~ item | NAME '=' ~ item | item | forced_atom | lookahead
mark = self.mark()
# named_item: NAME annotation '=' ~ item | NAME '=' ~ item | item | forced_atom | lookahead
mark = self._mark()
cut = False
if (
(name := self.name())
and
(literal := self.expect('['))
(annotation := self.annotation())
and
(type := self.name())
and
(literal_1 := self.expect('*'))
and
(literal_2 := self.expect(']'))
and
(literal_3 := self.expect('='))
(literal := self.expect('='))
and
(cut := True)
and
(item := self.item())
):
return NamedItem ( name . string , item , f"{type.string}*" )
self.reset(mark)
if cut: return None
cut = False
if (
(name := self.name())
and
(literal := self.expect('['))
and
(type := self.name())
and
(literal_1 := self.expect(']'))
and
(literal_2 := self.expect('='))
and
(cut := True)
and
(item := self.item())
):
return NamedItem ( name . string , item , type . string )
self.reset(mark)
return NamedItem ( name . string , item , annotation )
self._reset(mark)
if cut: return None
cut = False
if (
@ -453,35 +361,29 @@ class GeneratedParser(Parser):
(item := self.item())
):
return NamedItem ( name . string , item )
self.reset(mark)
self._reset(mark)
if cut: return None
cut = False
if (
(item := self.item())
):
return NamedItem ( None , item )
self.reset(mark)
if cut: return None
cut = False
self._reset(mark)
if (
(it := self.forced_atom())
(forced := self.forced_atom())
):
return NamedItem ( None , it )
self.reset(mark)
if cut: return None
cut = False
return NamedItem ( None , forced )
self._reset(mark)
if (
(it := self.lookahead())
):
return NamedItem ( None , it )
self.reset(mark)
if cut: return None
self._reset(mark)
return None
@memoize
def forced_atom(self) -> Optional[NamedItem]:
def forced_atom(self) -> Optional[Forced]:
# forced_atom: '&' '&' ~ atom
mark = self.mark()
mark = self._mark()
cut = False
if (
(literal := self.expect('&'))
@ -493,14 +395,14 @@ class GeneratedParser(Parser):
(atom := self.atom())
):
return Forced ( atom )
self.reset(mark)
self._reset(mark)
if cut: return None
return None
@memoize
def lookahead(self) -> Optional[LookaheadOrCut]:
# lookahead: '&' ~ atom | '!' ~ atom | '~'
mark = self.mark()
mark = self._mark()
cut = False
if (
(literal := self.expect('&'))
@ -510,7 +412,7 @@ class GeneratedParser(Parser):
(atom := self.atom())
):
return PositiveLookahead ( atom )
self.reset(mark)
self._reset(mark)
if cut: return None
cut = False
if (
@ -521,21 +423,19 @@ class GeneratedParser(Parser):
(atom := self.atom())
):
return NegativeLookahead ( atom )
self.reset(mark)
self._reset(mark)
if cut: return None
cut = False
if (
(literal := self.expect('~'))
):
return Cut ( )
self.reset(mark)
if cut: return None
self._reset(mark)
return None
@memoize
def item(self) -> Optional[Item]:
# item: '[' ~ alts ']' | atom '?' | atom '*' | atom '+' | atom '.' atom '+' | atom
mark = self.mark()
mark = self._mark()
cut = False
if (
(literal := self.expect('['))
@ -547,36 +447,29 @@ class GeneratedParser(Parser):
(literal_1 := self.expect(']'))
):
return Opt ( alts )
self.reset(mark)
self._reset(mark)
if cut: return None
cut = False
if (
(atom := self.atom())
and
(literal := self.expect('?'))
):
return Opt ( atom )
self.reset(mark)
if cut: return None
cut = False
self._reset(mark)
if (
(atom := self.atom())
and
(literal := self.expect('*'))
):
return Repeat0 ( atom )
self.reset(mark)
if cut: return None
cut = False
self._reset(mark)
if (
(atom := self.atom())
and
(literal := self.expect('+'))
):
return Repeat1 ( atom )
self.reset(mark)
if cut: return None
cut = False
self._reset(mark)
if (
(sep := self.atom())
and
@ -587,21 +480,18 @@ class GeneratedParser(Parser):
(literal_1 := self.expect('+'))
):
return Gather ( sep , node )
self.reset(mark)
if cut: return None
cut = False
self._reset(mark)
if (
(atom := self.atom())
):
return atom
self.reset(mark)
if cut: return None
self._reset(mark)
return None
@memoize
def atom(self) -> Optional[Plain]:
# atom: '(' ~ alts ')' | NAME | STRING
mark = self.mark()
mark = self._mark()
cut = False
if (
(literal := self.expect('('))
@ -613,28 +503,24 @@ class GeneratedParser(Parser):
(literal_1 := self.expect(')'))
):
return Group ( alts )
self.reset(mark)
self._reset(mark)
if cut: return None
cut = False
if (
(name := self.name())
):
return NameLeaf ( name . string )
self.reset(mark)
if cut: return None
cut = False
self._reset(mark)
if (
(string := self.string())
):
return StringLeaf ( string . string )
self.reset(mark)
if cut: return None
self._reset(mark)
return None
@memoize
def action(self) -> Optional[str]:
# action: "{" ~ target_atoms "}"
mark = self.mark()
mark = self._mark()
cut = False
if (
(literal := self.expect("{"))
@ -646,95 +532,123 @@ class GeneratedParser(Parser):
(literal_1 := self.expect("}"))
):
return target_atoms
self.reset(mark)
self._reset(mark)
if cut: return None
return None
@memoize
def annotation(self) -> Optional[str]:
# annotation: "[" ~ target_atoms "]"
mark = self._mark()
cut = False
if (
(literal := self.expect("["))
and
(cut := True)
and
(target_atoms := self.target_atoms())
and
(literal_1 := self.expect("]"))
):
return target_atoms
self._reset(mark)
if cut: return None
return None
@memoize
def target_atoms(self) -> Optional[str]:
# target_atoms: target_atom target_atoms | target_atom
mark = self.mark()
cut = False
mark = self._mark()
if (
(target_atom := self.target_atom())
and
(target_atoms := self.target_atoms())
):
return target_atom + " " + target_atoms
self.reset(mark)
if cut: return None
cut = False
self._reset(mark)
if (
(target_atom := self.target_atom())
):
return target_atom
self.reset(mark)
if cut: return None
self._reset(mark)
return None
@memoize
def target_atom(self) -> Optional[str]:
# target_atom: "{" ~ target_atoms "}" | NAME | NUMBER | STRING | "?" | ":" | !"}" OP
mark = self.mark()
# target_atom: "{" ~ target_atoms? "}" | "[" ~ target_atoms? "]" | NAME "*" | NAME | NUMBER | STRING | "?" | ":" | !"}" !"]" OP
mark = self._mark()
cut = False
if (
(literal := self.expect("{"))
and
(cut := True)
and
(target_atoms := self.target_atoms())
(atoms := self.target_atoms(),)
and
(literal_1 := self.expect("}"))
):
return "{" + target_atoms + "}"
self.reset(mark)
return "{" + ( atoms or "" ) + "}"
self._reset(mark)
if cut: return None
cut = False
if (
(literal := self.expect("["))
and
(cut := True)
and
(atoms := self.target_atoms(),)
and
(literal_1 := self.expect("]"))
):
return "[" + ( atoms or "" ) + "]"
self._reset(mark)
if cut: return None
if (
(name := self.name())
and
(literal := self.expect("*"))
):
return name . string + "*"
self._reset(mark)
if (
(name := self.name())
):
return name . string
self.reset(mark)
if cut: return None
cut = False
self._reset(mark)
if (
(number := self.number())
):
return number . string
self.reset(mark)
if cut: return None
cut = False
self._reset(mark)
if (
(string := self.string())
):
return string . string
self.reset(mark)
if cut: return None
cut = False
self._reset(mark)
if (
(literal := self.expect("?"))
):
return "?"
self.reset(mark)
if cut: return None
cut = False
self._reset(mark)
if (
(literal := self.expect(":"))
):
return ":"
self.reset(mark)
if cut: return None
cut = False
self._reset(mark)
if (
self.negative_lookahead(self.expect, "}")
and
self.negative_lookahead(self.expect, "]")
and
(op := self.op())
):
return op . string
self.reset(mark)
if cut: return None
self._reset(mark)
return None
KEYWORDS = ()
SOFT_KEYWORDS = ('memo',)
if __name__ == '__main__':
from pegen.parser import simple_parser_main

View File

@ -38,7 +38,7 @@ issoftkeyword = frozenset(softkwlist).__contains__
EXTRA_KEYWORDS = ["async", "await"]
def main():
def main() -> None:
parser = argparse.ArgumentParser(
description="Generate the Lib/keywords.py file from the grammar."
)
@ -58,9 +58,7 @@ def main():
grammar, _, _ = build_parser(args.grammar)
with args.tokens_file as tok_file:
all_tokens, exact_tok, non_exact_tok = generate_token_definitions(tok_file)
gen: ParserGenerator = CParserGenerator(
grammar, all_tokens, exact_tok, non_exact_tok, file=None
)
gen = CParserGenerator(grammar, all_tokens, exact_tok, non_exact_tok, file=None)
gen.collect_todo()
with args.keyword_file as thefile:
@ -68,7 +66,9 @@ def main():
all_soft_keywords = sorted(gen.callmakervisitor.soft_keywords)
keywords = "" if not all_keywords else " " + ",\n ".join(map(repr, all_keywords))
soft_keywords = "" if not all_soft_keywords else " " + ",\n ".join(map(repr, all_soft_keywords))
soft_keywords = (
"" if not all_soft_keywords else " " + ",\n ".join(map(repr, all_soft_keywords))
)
thefile.write(TEMPLATE.format(keywords=keywords, soft_keywords=soft_keywords))

View File

@ -57,13 +57,12 @@ rule[Rule]:
| rulename memoflag? ":" alts NEWLINE { Rule(rulename[0], rulename[1], alts, memo=opt) }
rulename[RuleName]:
| NAME '[' type=NAME '*' ']' { (name.string, type.string+"*") }
| NAME '[' type=NAME ']' { (name.string, type.string) }
| NAME annotation { (name.string, annotation) }
| NAME { (name.string, None) }
# In the future this may return something more complicated
memoflag[str]:
| '(' 'memo' ')' { "memo" }
| '(' "memo" ')' { "memo" }
alts[Rhs]:
| alt "|" alts { Rhs([alt] + alts.alts)}
@ -84,14 +83,13 @@ items[NamedItemList]:
| named_item { [named_item] }
named_item[NamedItem]:
| NAME '[' type=NAME '*' ']' '=' ~ item {NamedItem(name.string, item, f"{type.string}*")}
| NAME '[' type=NAME ']' '=' ~ item {NamedItem(name.string, item, type.string)}
| NAME annotation '=' ~ item {NamedItem(name.string, item, annotation)}
| NAME '=' ~ item {NamedItem(name.string, item)}
| item {NamedItem(None, item)}
| it=forced_atom {NamedItem(None, it)}
| forced=forced_atom {NamedItem(None, forced)}
| it=lookahead {NamedItem(None, it)}
forced_atom[NamedItem]:
forced_atom[Forced]:
| '&''&' ~ atom {Forced(atom)}
lookahead[LookaheadOrCut]:
@ -112,19 +110,22 @@ atom[Plain]:
| NAME {NameLeaf(name.string) }
| STRING {StringLeaf(string.string)}
# Mini-grammar for the actions
# Mini-grammar for the actions and annotations
action[str]: "{" ~ target_atoms "}" { target_atoms }
annotation[str]: "[" ~ target_atoms "]" { target_atoms }
target_atoms[str]:
| target_atom target_atoms { target_atom + " " + target_atoms }
| target_atom { target_atom }
target_atom[str]:
| "{" ~ target_atoms "}" { "{" + target_atoms + "}" }
| "{" ~ atoms=target_atoms? "}" { "{" + (atoms or "") + "}" }
| "[" ~ atoms=target_atoms? "]" { "[" + (atoms or "") + "]" }
| NAME "*" { name.string + "*" }
| NAME { name.string }
| NUMBER { number.string }
| STRING { string.string }
| "?" { "?" }
| ":" { ":" }
| !"}" OP { op.string }
| !"}" !"]" OP { op.string }

View File

@ -4,13 +4,10 @@ import time
import token
import tokenize
import traceback
from abc import abstractmethod
from typing import Any, Callable, cast, Dict, Optional, Tuple, Type, TypeVar
from typing import Any, Callable, ClassVar, Dict, Optional, Tuple, Type, TypeVar, cast
from pegen.tokenizer import exact_token_types
from pegen.tokenizer import Mark
from pegen.tokenizer import Tokenizer
from pegen.tokenizer import Mark, Tokenizer, exact_token_types
T = TypeVar("T")
P = TypeVar("P", bound="Parser")
@ -45,12 +42,12 @@ def memoize(method: F) -> F:
method_name = method.__name__
def memoize_wrapper(self: P, *args: object) -> T:
mark = self.mark()
mark = self._mark()
key = mark, method_name, args
# Fast path: cache hit, and not verbose.
if key in self._cache and not self._verbose:
tree, endmark = self._cache[key]
self.reset(endmark)
self._reset(endmark)
return tree
# Slow path: no cache hit, or verbose.
verbose = self._verbose
@ -64,13 +61,13 @@ def memoize(method: F) -> F:
self._level -= 1
if verbose:
print(f"{fill}... {method_name}({argsr}) -> {tree!s:.200}")
endmark = self.mark()
endmark = self._mark()
self._cache[key] = tree, endmark
else:
tree, endmark = self._cache[key]
if verbose:
print(f"{fill}{method_name}({argsr}) -> {tree!s:.200}")
self.reset(endmark)
self._reset(endmark)
return tree
memoize_wrapper.__wrapped__ = method # type: ignore
@ -82,12 +79,12 @@ def memoize_left_rec(method: Callable[[P], Optional[T]]) -> Callable[[P], Option
method_name = method.__name__
def memoize_left_rec_wrapper(self: P) -> Optional[T]:
mark = self.mark()
mark = self._mark()
key = mark, method_name, ()
# Fast path: cache hit, and not verbose.
if key in self._cache and not self._verbose:
tree, endmark = self._cache[key]
self.reset(endmark)
self._reset(endmark)
return tree
# Slow path: no cache hit, or verbose.
verbose = self._verbose
@ -113,9 +110,13 @@ def memoize_left_rec(method: Callable[[P], Optional[T]]) -> Callable[[P], Option
print(f"{fill}Recursive {method_name} at {mark} depth {depth}")
while True:
self.reset(mark)
result = method(self)
endmark = self.mark()
self._reset(mark)
self.in_recursive_rule += 1
try:
result = method(self)
finally:
self.in_recursive_rule -= 1
endmark = self._mark()
depth += 1
if verbose:
print(
@ -131,24 +132,24 @@ def memoize_left_rec(method: Callable[[P], Optional[T]]) -> Callable[[P], Option
break
self._cache[key] = lastresult, lastmark = result, endmark
self.reset(lastmark)
self._reset(lastmark)
tree = lastresult
self._level -= 1
if verbose:
print(f"{fill}{method_name}() -> {tree!s:.200} [cached]")
if tree:
endmark = self.mark()
endmark = self._mark()
else:
endmark = mark
self.reset(endmark)
self._reset(endmark)
self._cache[key] = tree, endmark
else:
tree, endmark = self._cache[key]
if verbose:
print(f"{fill}{method_name}() -> {tree!s:.200} [fresh]")
if tree:
self.reset(endmark)
self._reset(endmark)
return tree
memoize_left_rec_wrapper.__wrapped__ = method # type: ignore
@ -158,15 +159,21 @@ def memoize_left_rec(method: Callable[[P], Optional[T]]) -> Callable[[P], Option
class Parser:
"""Parsing base class."""
KEYWORDS: ClassVar[Tuple[str, ...]]
SOFT_KEYWORDS: ClassVar[Tuple[str, ...]]
def __init__(self, tokenizer: Tokenizer, *, verbose: bool = False):
self._tokenizer = tokenizer
self._verbose = verbose
self._level = 0
self._cache: Dict[Tuple[Mark, str, Tuple[Any, ...]], Tuple[Any, Mark]] = {}
# Integer tracking wether we are in a left recursive rule or not. Can be useful
# for error reporting.
self.in_recursive_rule = 0
# Pass through common tokenizer methods.
# TODO: Rename to _mark and _reset.
self.mark = self._tokenizer.mark
self.reset = self._tokenizer.reset
self._mark = self._tokenizer.mark
self._reset = self._tokenizer.reset
@abstractmethod
def start(self) -> Any:
@ -179,7 +186,7 @@ class Parser:
@memoize
def name(self) -> Optional[tokenize.TokenInfo]:
tok = self._tokenizer.peek()
if tok.type == token.NAME:
if tok.type == token.NAME and tok.string not in self.KEYWORDS:
return self._tokenizer.getnext()
return None
@ -204,6 +211,20 @@ class Parser:
return self._tokenizer.getnext()
return None
@memoize
def type_comment(self) -> Optional[tokenize.TokenInfo]:
tok = self._tokenizer.peek()
if tok.type == token.TYPE_COMMENT:
return self._tokenizer.getnext()
return None
@memoize
def soft_keyword(self) -> Optional[tokenize.TokenInfo]:
tok = self._tokenizer.peek()
if tok.type == token.NAME and tok.string in self.SOFT_KEYWORDS:
return self._tokenizer.getnext()
return None
@memoize
def expect(self, type: str) -> Optional[tokenize.TokenInfo]:
tok = self._tokenizer.peek()
@ -219,23 +240,26 @@ class Parser:
return self._tokenizer.getnext()
return None
def expect_forced(self, res: Any, expectation: str) -> Optional[tokenize.TokenInfo]:
if res is None:
raise self.make_syntax_error(f"expected {expectation}")
return res
def positive_lookahead(self, func: Callable[..., T], *args: object) -> T:
mark = self.mark()
mark = self._mark()
ok = func(*args)
self.reset(mark)
self._reset(mark)
return ok
def negative_lookahead(self, func: Callable[..., object], *args: object) -> bool:
mark = self.mark()
mark = self._mark()
ok = func(*args)
self.reset(mark)
self._reset(mark)
return not ok
def make_syntax_error(self, filename: str = "<unknown>") -> SyntaxError:
def make_syntax_error(self, message: str, filename: str = "<unknown>") -> SyntaxError:
tok = self._tokenizer.diagnose()
return SyntaxError(
"pegen parse failure", (filename, tok.start[0], 1 + tok.start[1], tok.line)
)
return SyntaxError(message, (filename, tok.start[0], 1 + tok.start[1], tok.line))
def simple_parser_main(parser_class: Type[Parser]) -> None:

View File

@ -1,30 +1,29 @@
import contextlib
from abc import abstractmethod
from typing import AbstractSet, Dict, IO, Iterator, List, Optional, Set, Text, Tuple
from typing import IO, AbstractSet, Dict, Iterator, List, Optional, Set, Text, Tuple
from pegen import sccutils
from pegen.grammar import (
Grammar,
Rule,
Rhs,
Alt,
NamedItem,
Plain,
NameLeaf,
Gather,
Grammar,
GrammarError,
GrammarVisitor,
NamedItem,
NameLeaf,
Plain,
Rhs,
Rule,
)
from pegen.grammar import GrammarError, GrammarVisitor
class RuleCheckingVisitor(GrammarVisitor):
def __init__(self, rules: Dict[str, Rule], tokens: Dict[int, str]):
def __init__(self, rules: Dict[str, Rule], tokens: Set[str]):
self.rules = rules
self.tokens = tokens
def visit_NameLeaf(self, node: NameLeaf) -> None:
if node.value not in self.rules and node.value not in self.tokens.values():
# TODO: Add line/col info to (leaf) nodes
if node.value not in self.rules and node.value not in self.tokens:
raise GrammarError(f"Dangling reference to rule {node.value!r}")
def visit_NamedItem(self, node: NamedItem) -> None:
@ -37,7 +36,7 @@ class ParserGenerator:
callmakervisitor: GrammarVisitor
def __init__(self, grammar: Grammar, tokens: Dict[int, str], file: Optional[IO[Text]]):
def __init__(self, grammar: Grammar, tokens: Set[str], file: Optional[IO[Text]]):
self.grammar = grammar
self.tokens = tokens
self.rules = grammar.rules
@ -133,13 +132,22 @@ class ParserGenerator:
self.counter += 1
extra_function_name = f"_loop0_{self.counter}"
extra_function_alt = Alt(
[NamedItem(None, node.separator), NamedItem("elem", node.node)], action="elem",
[NamedItem(None, node.separator), NamedItem("elem", node.node)],
action="elem",
)
self.todo[extra_function_name] = Rule(
extra_function_name, None, Rhs([extra_function_alt]),
extra_function_name,
None,
Rhs([extra_function_alt]),
)
alt = Alt(
[NamedItem("elem", node.node), NamedItem("seq", NameLeaf(extra_function_name))],
)
self.todo[name] = Rule(
name,
None,
Rhs([alt]),
)
alt = Alt([NamedItem("elem", node.node), NamedItem("seq", NameLeaf(extra_function_name))],)
self.todo[name] = Rule(name, None, Rhs([alt]),)
return name
def dedupe(self, name: str) -> str:

View File

@ -1,25 +1,28 @@
import ast
import re
import token
from typing import Any, Dict, Optional, IO, Text, Tuple
from typing import IO, Any, Dict, Optional, Sequence, Set, Text, Tuple
from pegen import grammar
from pegen.grammar import (
Alt,
Cut,
Forced,
Gather,
GrammarVisitor,
NameLeaf,
StringLeaf,
Rhs,
NamedItem,
Group,
Lookahead,
PositiveLookahead,
NamedItem,
NameLeaf,
NegativeLookahead,
Opt,
PositiveLookahead,
Repeat0,
Repeat1,
Gather,
Group,
Rhs,
Rule,
Alt,
StringLeaf,
)
from pegen import grammar
from pegen.parser_generator import ParserGenerator
MODULE_PREFIX = """\
@ -27,7 +30,10 @@ MODULE_PREFIX = """\
# @generated by pegen from {filename}
import ast
from typing import Optional, Any
import sys
import tokenize
from typing import Any, Optional
from pegen.parser import memoize, memoize_left_rec, logger, Parser
@ -36,25 +42,81 @@ MODULE_SUFFIX = """
if __name__ == '__main__':
from pegen.parser import simple_parser_main
simple_parser_main(GeneratedParser)
simple_parser_main({class_name})
"""
class InvalidNodeVisitor(GrammarVisitor):
def visit_NameLeaf(self, node: NameLeaf) -> bool:
name = node.value
return name.startswith("invalid")
def visit_StringLeaf(self, node: StringLeaf) -> bool:
return False
def visit_NamedItem(self, node: NamedItem) -> bool:
return self.visit(node.item)
def visit_Rhs(self, node: Rhs) -> bool:
return any(self.visit(alt) for alt in node.alts)
def visit_Alt(self, node: Alt) -> bool:
return any(self.visit(item) for item in node.items)
def lookahead_call_helper(self, node: Lookahead) -> bool:
return self.visit(node.node)
def visit_PositiveLookahead(self, node: PositiveLookahead) -> bool:
return self.lookahead_call_helper(node)
def visit_NegativeLookahead(self, node: NegativeLookahead) -> bool:
return self.lookahead_call_helper(node)
def visit_Opt(self, node: Opt) -> bool:
return self.visit(node.node)
def visit_Repeat(self, node: Repeat0) -> Tuple[str, str]:
return self.visit(node.node)
def visit_Gather(self, node: Gather) -> Tuple[str, str]:
return self.visit(node.node)
def visit_Group(self, node: Group) -> bool:
return self.visit(node.rhs)
def visit_Cut(self, node: Cut) -> bool:
return False
def visit_Forced(self, node: Forced) -> bool:
return self.visit(node.node)
class PythonCallMakerVisitor(GrammarVisitor):
def __init__(self, parser_generator: ParserGenerator):
self.gen = parser_generator
self.cache: Dict[Any, Any] = {}
self.keywords: Set[str] = set()
self.soft_keywords: Set[str] = set()
def visit_NameLeaf(self, node: NameLeaf) -> Tuple[Optional[str], str]:
name = node.value
if name in ("NAME", "NUMBER", "STRING", "OP"):
if name == "SOFT_KEYWORD":
return "soft_keyword", "self.soft_keyword()"
if name in ("NAME", "NUMBER", "STRING", "OP", "TYPE_COMMENT"):
name = name.lower()
return name, f"self.{name}()"
if name in ("NEWLINE", "DEDENT", "INDENT", "ENDMARKER", "ASYNC", "AWAIT"):
return name.lower(), f"self.expect({name!r})"
# Avoid using names that can be Python keywords
return "_" + name.lower(), f"self.expect({name!r})"
return name, f"self.{name}()"
def visit_StringLeaf(self, node: StringLeaf) -> Tuple[str, str]:
val = ast.literal_eval(node.value)
if re.match(r"[a-zA-Z_]\w*\Z", val): # This is a keyword
if node.value.endswith("'"):
self.keywords.add(val)
else:
self.soft_keywords.add(val)
return "literal", f"self.expect({node.value})"
def visit_Rhs(self, node: Rhs) -> Tuple[Optional[str], str]:
@ -125,16 +187,36 @@ class PythonCallMakerVisitor(GrammarVisitor):
def visit_Cut(self, node: Cut) -> Tuple[str, str]:
return "cut", "True"
def visit_Forced(self, node: Forced) -> Tuple[str, str]:
if isinstance(node.node, Group):
_, val = self.visit(node.node.rhs)
return "forced", f"self.expect_forced({val}, '''({node.node.rhs!s})''')"
else:
return (
"forced",
f"self.expect_forced(self.expect({node.node.value}), {node.node.value!r})",
)
class PythonParserGenerator(ParserGenerator, GrammarVisitor):
def __init__(
self,
grammar: grammar.Grammar,
file: Optional[IO[Text]],
tokens: Dict[int, str] = token.tok_name,
tokens: Set[str] = set(token.tok_name.values()),
location_formatting: Optional[str] = None,
unreachable_formatting: Optional[str] = None,
):
tokens.add("SOFT_KEYWORD")
super().__init__(grammar, tokens, file)
self.callmakervisitor = PythonCallMakerVisitor(self)
self.callmakervisitor: PythonCallMakerVisitor = PythonCallMakerVisitor(self)
self.invalidvisitor: InvalidNodeVisitor = InvalidNodeVisitor()
self.unreachable_formatting = unreachable_formatting or "None # pragma: no cover"
self.location_formatting = (
location_formatting
or "lineno=start_lineno, col_offset=start_col_offset, "
"end_lineno=end_lineno, end_col_offset=end_col_offset"
)
def generate(self, filename: str) -> None:
header = self.grammar.metas.get("header", MODULE_PREFIX)
@ -142,18 +224,35 @@ class PythonParserGenerator(ParserGenerator, GrammarVisitor):
self.print(header.rstrip("\n").format(filename=filename))
subheader = self.grammar.metas.get("subheader", "")
if subheader:
self.print(subheader.format(filename=filename))
self.print("class GeneratedParser(Parser):")
self.print(subheader)
cls_name = self.grammar.metas.get("class", "GeneratedParser")
self.print("# Keywords and soft keywords are listed at the end of the parser definition.")
self.print(f"class {cls_name}(Parser):")
while self.todo:
for rulename, rule in list(self.todo.items()):
del self.todo[rulename]
self.print()
with self.indent():
self.visit(rule)
trailer = self.grammar.metas.get("trailer", MODULE_SUFFIX)
self.print()
with self.indent():
self.print(f"KEYWORDS = {tuple(self.callmakervisitor.keywords)}")
self.print(f"SOFT_KEYWORDS = {tuple(self.callmakervisitor.soft_keywords)}")
trailer = self.grammar.metas.get("trailer", MODULE_SUFFIX.format(class_name=cls_name))
if trailer is not None:
self.print(trailer.rstrip("\n"))
def alts_uses_locations(self, alts: Sequence[Alt]) -> bool:
for alt in alts:
if alt.action and "LOCATIONS" in alt.action:
return True
for n in alt.items:
if isinstance(n.item, Group) and self.alts_uses_locations(n.item.rhs.alts):
return True
return False
def visit_Rule(self, node: Rule) -> None:
is_loop = node.is_loop()
is_gather = node.is_gather()
@ -173,7 +272,10 @@ class PythonParserGenerator(ParserGenerator, GrammarVisitor):
self.print(f"# {node.name}: {rhs}")
if node.nullable:
self.print(f"# nullable={node.nullable}")
self.print("mark = self.mark()")
self.print("mark = self._mark()")
if self.alts_uses_locations(node.rhs.alts):
self.print("tok = self._tokenizer.peek()")
self.print("start_lineno, start_col_offset = tok.start")
if is_loop:
self.print("children = []")
self.visit(rhs, is_loop=is_loop, is_gather=is_gather)
@ -200,8 +302,10 @@ class PythonParserGenerator(ParserGenerator, GrammarVisitor):
self.visit(alt, is_loop=is_loop, is_gather=is_gather)
def visit_Alt(self, node: Alt, is_loop: bool, is_gather: bool) -> None:
has_cut = any(isinstance(item.item, Cut) for item in node.items)
with self.local_variable_context():
self.print("cut = False") # TODO: Only if needed.
if has_cut:
self.print("cut = False")
if is_loop:
self.print("while (")
else:
@ -227,12 +331,26 @@ class PythonParserGenerator(ParserGenerator, GrammarVisitor):
f"[{self.local_variable_names[0]}] + {self.local_variable_names[1]}"
)
else:
action = f"[{', '.join(self.local_variable_names)}]"
if self.invalidvisitor.visit(node):
action = "UNREACHABLE"
elif len(self.local_variable_names) == 1:
action = f"{self.local_variable_names[0]}"
else:
action = f"[{', '.join(self.local_variable_names)}]"
elif "LOCATIONS" in action:
self.print("tok = self._tokenizer.get_last_non_whitespace_token()")
self.print("end_lineno, end_col_offset = tok.end")
action = action.replace("LOCATIONS", self.location_formatting)
if is_loop:
self.print(f"children.append({action})")
self.print(f"mark = self.mark()")
self.print(f"mark = self._mark()")
else:
if "UNREACHABLE" in action:
action = action.replace("UNREACHABLE", self.unreachable_formatting)
self.print(f"return {action}")
self.print("self.reset(mark)")
self.print("self._reset(mark)")
# Skip remaining alternatives if a cut was reached.
self.print("if cut: return None") # TODO: Only if needed.
if has_cut:
self.print("if cut: return None")

View File

@ -18,7 +18,7 @@ from pegen.python_generator import PythonParserGenerator
from pegen.tokenizer import Tokenizer
ALL_TOKENS = token.tok_name
EXACT_TOKENS = token.EXACT_TOKEN_TYPES # type: ignore
EXACT_TOKENS = token.EXACT_TOKEN_TYPES
NON_EXACT_TOKENS = {
name for index, name in token.tok_name.items() if index not in EXACT_TOKENS.values()
}
@ -42,7 +42,7 @@ def run_parser(file: IO[bytes], parser_class: Type[Parser], *, verbose: bool = F
parser = parser_class(tokenizer, verbose=verbose)
result = parser.start()
if result is None:
raise parser.make_syntax_error()
raise parser.make_syntax_error("invalid syntax")
return result
@ -66,6 +66,7 @@ def import_file(full_name: str, path: str) -> Any:
"""Import a python module from a path"""
spec = importlib.util.spec_from_file_location(full_name, path)
assert spec is not None
mod = importlib.util.module_from_spec(spec)
# We assume this is not None and has an exec_module() method.

View File

@ -1,10 +1,10 @@
import token
import tokenize
from typing import List, Iterator
from typing import Dict, Iterator, List
Mark = int # NewType('Mark', int)
exact_token_types = token.EXACT_TOKEN_TYPES # type: ignore
exact_token_types = token.EXACT_TOKEN_TYPES
def shorttok(tok: tokenize.TokenInfo) -> str:
@ -19,26 +19,22 @@ class Tokenizer:
_tokens: List[tokenize.TokenInfo]
def __init__(self, tokengen: Iterator[tokenize.TokenInfo], *, verbose: bool = False):
def __init__(
self, tokengen: Iterator[tokenize.TokenInfo], *, path: str = "", verbose: bool = False
):
self._tokengen = tokengen
self._tokens = []
self._index = 0
self._verbose = verbose
self._lines: Dict[int, str] = {}
self._path = path
if verbose:
self.report(False, False)
def getnext(self) -> tokenize.TokenInfo:
"""Return the next token and updates the index."""
cached = True
while self._index == len(self._tokens):
tok = next(self._tokengen)
if tok.type in (tokenize.NL, tokenize.COMMENT):
continue
if tok.type == token.ERRORTOKEN and tok.string.isspace():
continue
self._tokens.append(tok)
cached = False
tok = self._tokens[self._index]
cached = not self._index == len(self._tokens)
tok = self.peek()
self._index += 1
if self._verbose:
self.report(cached, False)
@ -52,7 +48,15 @@ class Tokenizer:
continue
if tok.type == token.ERRORTOKEN and tok.string.isspace():
continue
if (
tok.type == token.NEWLINE
and self._tokens
and self._tokens[-1].type == token.NEWLINE
):
continue
self._tokens.append(tok)
if not self._path:
self._lines[tok.start[0]] = tok.line
return self._tokens[self._index]
def diagnose(self) -> tokenize.TokenInfo:
@ -60,6 +64,34 @@ class Tokenizer:
self.getnext()
return self._tokens[-1]
def get_last_non_whitespace_token(self) -> tokenize.TokenInfo:
for tok in reversed(self._tokens[: self._index]):
if tok.type != tokenize.ENDMARKER and (
tok.type < tokenize.NEWLINE or tok.type > tokenize.DEDENT
):
break
return tok
def get_lines(self, line_numbers: List[int]) -> List[str]:
"""Retrieve source lines corresponding to line numbers."""
if self._lines:
lines = self._lines
else:
n = len(line_numbers)
lines = {}
count = 0
seen = 0
with open(self._path) as f:
for l in f:
count += 1
if count in line_numbers:
seen += 1
lines[count] = l
if seen == n:
break
return [lines[n] for n in line_numbers]
def mark(self) -> Mark:
return self._index

View File

@ -1,51 +1,45 @@
from typing import Optional
from pegen import grammar
from pegen.grammar import (
Alt,
Cut,
Gather,
GrammarVisitor,
Group,
Lookahead,
NamedItem,
NameLeaf,
NegativeLookahead,
Opt,
PositiveLookahead,
Repeat0,
Repeat1,
Rhs,
Rule,
StringLeaf,
Rhs,
)
class ValidationError(Exception):
pass
class GrammarValidator(GrammarVisitor):
def __init__(self, grammar: grammar.Grammar):
self.grammar = grammar
self.rulename = None
def validate_rule(self, rulename: str, node: Rule):
class GrammarValidator(GrammarVisitor):
def __init__(self, grammar: grammar.Grammar) -> None:
self.grammar = grammar
self.rulename: Optional[str] = None
def validate_rule(self, rulename: str, node: Rule) -> None:
self.rulename = rulename
self.visit(node)
self.rulename = None
class SubRuleValidator(GrammarValidator):
def visit_Rhs(self, node: Rule):
def visit_Rhs(self, node: Rhs) -> None:
for index, alt in enumerate(node.alts):
alts_to_consider = node.alts[index+1:]
alts_to_consider = node.alts[index + 1 :]
for other_alt in alts_to_consider:
self.check_intersection(alt, other_alt)
def check_intersection(self, first_alt: Alt, second_alt: Alt) -> bool:
def check_intersection(self, first_alt: Alt, second_alt: Alt) -> None:
if str(second_alt).startswith(str(first_alt)):
raise ValidationError(
f"In {self.rulename} there is an alternative that will "
f"never be visited:\n{second_alt}")
f"In {self.rulename} there is an alternative that will "
f"never be visited:\n{second_alt}"
)
def validate_grammar(the_grammar: grammar.Grammar):
def validate_grammar(the_grammar: grammar.Grammar) -> None:
for validator_cls in GrammarValidator.__subclasses__():
validator = validator_cls(the_grammar)
for rule_name, rule in the_grammar.rules.items():

View File

@ -76,7 +76,10 @@ def run_benchmark_stdlib(subcommand):
parse_directory(
"../../Lib",
verbose=False,
excluded_files=["*/bad*", "*/lib2to3/tests/data/*",],
excluded_files=[
"*/bad*",
"*/lib2to3/tests/data/*",
],
short=True,
mode=modes[subcommand],
)

View File

@ -8,7 +8,8 @@ from typing import Dict, Any
from urllib.request import urlretrieve
argparser = argparse.ArgumentParser(
prog="download_pypi_packages", description="Helper program to download PyPI packages",
prog="download_pypi_packages",
description="Helper program to download PyPI packages",
)
argparser.add_argument(
"-n", "--number", type=int, default=100, help="Number of packages to download"

View File

@ -41,7 +41,10 @@ from pegen.grammar import (
Rhs,
)
argparser = argparse.ArgumentParser(prog="graph_grammar", description="Graph a grammar tree",)
argparser = argparse.ArgumentParser(
prog="graph_grammar",
description="Graph a grammar tree",
)
argparser.add_argument(
"-s",
"--start",

View File

@ -19,7 +19,8 @@ from scripts import test_parse_directory
HERE = pathlib.Path(__file__).resolve().parent
argparser = argparse.ArgumentParser(
prog="test_pypi_packages", description="Helper program to test parsing PyPI packages",
prog="test_pypi_packages",
description="Helper program to test parsing PyPI packages",
)
argparser.add_argument(
"-t", "--tree", action="count", help="Compare parse tree to official AST", default=0