mirror of https://github.com/python/cpython
1093 lines
37 KiB
Python
1093 lines
37 KiB
Python
import ast
|
|
import difflib
|
|
import io
|
|
import textwrap
|
|
import unittest
|
|
|
|
from test import test_tools
|
|
from typing import Dict, Any
|
|
from tokenize import TokenInfo, NAME, NEWLINE, NUMBER, OP
|
|
|
|
test_tools.skip_if_missing("peg_generator")
|
|
with test_tools.imports_under_tool("peg_generator"):
|
|
from pegen.grammar_parser import GeneratedParser as GrammarParser
|
|
from pegen.testutil import parse_string, generate_parser, make_parser
|
|
from pegen.grammar import GrammarVisitor, GrammarError, Grammar
|
|
from pegen.grammar_visualizer import ASTGrammarPrinter
|
|
from pegen.parser import Parser
|
|
from pegen.python_generator import PythonParserGenerator
|
|
|
|
|
|
class TestPegen(unittest.TestCase):
|
|
def test_parse_grammar(self) -> None:
|
|
grammar_source = """
|
|
start: sum NEWLINE
|
|
sum: t1=term '+' t2=term { action } | term
|
|
term: NUMBER
|
|
"""
|
|
expected = """
|
|
start: sum NEWLINE
|
|
sum: term '+' term | term
|
|
term: NUMBER
|
|
"""
|
|
grammar: Grammar = parse_string(grammar_source, GrammarParser)
|
|
rules = grammar.rules
|
|
self.assertEqual(str(grammar), textwrap.dedent(expected).strip())
|
|
# Check the str() and repr() of a few rules; AST nodes don't support ==.
|
|
self.assertEqual(str(rules["start"]), "start: sum NEWLINE")
|
|
self.assertEqual(str(rules["sum"]), "sum: term '+' term | term")
|
|
expected_repr = (
|
|
"Rule('term', None, Rhs([Alt([NamedItem(None, NameLeaf('NUMBER'))])]))"
|
|
)
|
|
self.assertEqual(repr(rules["term"]), expected_repr)
|
|
|
|
def test_long_rule_str(self) -> None:
|
|
grammar_source = """
|
|
start: zero | one | one zero | one one | one zero zero | one zero one | one one zero | one one one
|
|
"""
|
|
expected = """
|
|
start:
|
|
| zero
|
|
| one
|
|
| one zero
|
|
| one one
|
|
| one zero zero
|
|
| one zero one
|
|
| one one zero
|
|
| one one one
|
|
"""
|
|
grammar: Grammar = parse_string(grammar_source, GrammarParser)
|
|
self.assertEqual(str(grammar.rules["start"]), textwrap.dedent(expected).strip())
|
|
|
|
def test_typed_rules(self) -> None:
|
|
grammar = """
|
|
start[int]: sum NEWLINE
|
|
sum[int]: t1=term '+' t2=term { action } | term
|
|
term[int]: NUMBER
|
|
"""
|
|
rules = parse_string(grammar, GrammarParser).rules
|
|
# Check the str() and repr() of a few rules; AST nodes don't support ==.
|
|
self.assertEqual(str(rules["start"]), "start: sum NEWLINE")
|
|
self.assertEqual(str(rules["sum"]), "sum: term '+' term | term")
|
|
self.assertEqual(
|
|
repr(rules["term"]),
|
|
"Rule('term', 'int', Rhs([Alt([NamedItem(None, NameLeaf('NUMBER'))])]))",
|
|
)
|
|
|
|
def test_gather(self) -> None:
|
|
grammar = """
|
|
start: ','.thing+ NEWLINE
|
|
thing: NUMBER
|
|
"""
|
|
rules = parse_string(grammar, GrammarParser).rules
|
|
self.assertEqual(str(rules["start"]), "start: ','.thing+ NEWLINE")
|
|
self.assertTrue(
|
|
repr(rules["start"]).startswith(
|
|
"Rule('start', None, Rhs([Alt([NamedItem(None, Gather(StringLeaf(\"','\"), NameLeaf('thing'"
|
|
)
|
|
)
|
|
self.assertEqual(str(rules["thing"]), "thing: NUMBER")
|
|
parser_class = make_parser(grammar)
|
|
node = parse_string("42\n", parser_class)
|
|
node = parse_string("1, 2\n", parser_class)
|
|
self.assertEqual(
|
|
node,
|
|
[
|
|
[
|
|
TokenInfo(
|
|
NUMBER, string="1", start=(1, 0), end=(1, 1), line="1, 2\n"
|
|
),
|
|
TokenInfo(
|
|
NUMBER, string="2", start=(1, 3), end=(1, 4), line="1, 2\n"
|
|
),
|
|
],
|
|
TokenInfo(
|
|
NEWLINE, string="\n", start=(1, 4), end=(1, 5), line="1, 2\n"
|
|
),
|
|
],
|
|
)
|
|
|
|
def test_expr_grammar(self) -> None:
|
|
grammar = """
|
|
start: sum NEWLINE
|
|
sum: term '+' term | term
|
|
term: NUMBER
|
|
"""
|
|
parser_class = make_parser(grammar)
|
|
node = parse_string("42\n", parser_class)
|
|
self.assertEqual(
|
|
node,
|
|
[
|
|
TokenInfo(NUMBER, string="42", start=(1, 0), end=(1, 2), line="42\n"),
|
|
TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="42\n"),
|
|
],
|
|
)
|
|
|
|
def test_optional_operator(self) -> None:
|
|
grammar = """
|
|
start: sum NEWLINE
|
|
sum: term ('+' term)?
|
|
term: NUMBER
|
|
"""
|
|
parser_class = make_parser(grammar)
|
|
node = parse_string("1 + 2\n", parser_class)
|
|
self.assertEqual(
|
|
node,
|
|
[
|
|
[
|
|
TokenInfo(
|
|
NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2\n"
|
|
),
|
|
[
|
|
TokenInfo(
|
|
OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2\n"
|
|
),
|
|
TokenInfo(
|
|
NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2\n"
|
|
),
|
|
],
|
|
],
|
|
TokenInfo(
|
|
NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 + 2\n"
|
|
),
|
|
],
|
|
)
|
|
node = parse_string("1\n", parser_class)
|
|
self.assertEqual(
|
|
node,
|
|
[
|
|
[
|
|
TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n"),
|
|
None,
|
|
],
|
|
TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
|
|
],
|
|
)
|
|
|
|
def test_optional_literal(self) -> None:
|
|
grammar = """
|
|
start: sum NEWLINE
|
|
sum: term '+' ?
|
|
term: NUMBER
|
|
"""
|
|
parser_class = make_parser(grammar)
|
|
node = parse_string("1+\n", parser_class)
|
|
self.assertEqual(
|
|
node,
|
|
[
|
|
[
|
|
TokenInfo(
|
|
NUMBER, string="1", start=(1, 0), end=(1, 1), line="1+\n"
|
|
),
|
|
TokenInfo(OP, string="+", start=(1, 1), end=(1, 2), line="1+\n"),
|
|
],
|
|
TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="1+\n"),
|
|
],
|
|
)
|
|
node = parse_string("1\n", parser_class)
|
|
self.assertEqual(
|
|
node,
|
|
[
|
|
[
|
|
TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n"),
|
|
None,
|
|
],
|
|
TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
|
|
],
|
|
)
|
|
|
|
def test_alt_optional_operator(self) -> None:
|
|
grammar = """
|
|
start: sum NEWLINE
|
|
sum: term ['+' term]
|
|
term: NUMBER
|
|
"""
|
|
parser_class = make_parser(grammar)
|
|
node = parse_string("1 + 2\n", parser_class)
|
|
self.assertEqual(
|
|
node,
|
|
[
|
|
[
|
|
TokenInfo(
|
|
NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2\n"
|
|
),
|
|
[
|
|
TokenInfo(
|
|
OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2\n"
|
|
),
|
|
TokenInfo(
|
|
NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2\n"
|
|
),
|
|
],
|
|
],
|
|
TokenInfo(
|
|
NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 + 2\n"
|
|
),
|
|
],
|
|
)
|
|
node = parse_string("1\n", parser_class)
|
|
self.assertEqual(
|
|
node,
|
|
[
|
|
[
|
|
TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n"),
|
|
None,
|
|
],
|
|
TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
|
|
],
|
|
)
|
|
|
|
def test_repeat_0_simple(self) -> None:
|
|
grammar = """
|
|
start: thing thing* NEWLINE
|
|
thing: NUMBER
|
|
"""
|
|
parser_class = make_parser(grammar)
|
|
node = parse_string("1 2 3\n", parser_class)
|
|
self.assertEqual(
|
|
node,
|
|
[
|
|
TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 2 3\n"),
|
|
[
|
|
TokenInfo(
|
|
NUMBER, string="2", start=(1, 2), end=(1, 3), line="1 2 3\n"
|
|
),
|
|
TokenInfo(
|
|
NUMBER, string="3", start=(1, 4), end=(1, 5), line="1 2 3\n"
|
|
),
|
|
],
|
|
TokenInfo(
|
|
NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 2 3\n"
|
|
),
|
|
],
|
|
)
|
|
node = parse_string("1\n", parser_class)
|
|
self.assertEqual(
|
|
node,
|
|
[
|
|
TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n"),
|
|
[],
|
|
TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
|
|
],
|
|
)
|
|
|
|
def test_repeat_0_complex(self) -> None:
|
|
grammar = """
|
|
start: term ('+' term)* NEWLINE
|
|
term: NUMBER
|
|
"""
|
|
parser_class = make_parser(grammar)
|
|
node = parse_string("1 + 2 + 3\n", parser_class)
|
|
self.assertEqual(
|
|
node,
|
|
[
|
|
TokenInfo(
|
|
NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2 + 3\n"
|
|
),
|
|
[
|
|
[
|
|
TokenInfo(
|
|
OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n"
|
|
),
|
|
TokenInfo(
|
|
NUMBER,
|
|
string="2",
|
|
start=(1, 4),
|
|
end=(1, 5),
|
|
line="1 + 2 + 3\n",
|
|
),
|
|
],
|
|
[
|
|
TokenInfo(
|
|
OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n"
|
|
),
|
|
TokenInfo(
|
|
NUMBER,
|
|
string="3",
|
|
start=(1, 8),
|
|
end=(1, 9),
|
|
line="1 + 2 + 3\n",
|
|
),
|
|
],
|
|
],
|
|
TokenInfo(
|
|
NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n"
|
|
),
|
|
],
|
|
)
|
|
|
|
def test_repeat_1_simple(self) -> None:
|
|
grammar = """
|
|
start: thing thing+ NEWLINE
|
|
thing: NUMBER
|
|
"""
|
|
parser_class = make_parser(grammar)
|
|
node = parse_string("1 2 3\n", parser_class)
|
|
self.assertEqual(
|
|
node,
|
|
[
|
|
TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 2 3\n"),
|
|
[
|
|
TokenInfo(
|
|
NUMBER, string="2", start=(1, 2), end=(1, 3), line="1 2 3\n"
|
|
),
|
|
TokenInfo(
|
|
NUMBER, string="3", start=(1, 4), end=(1, 5), line="1 2 3\n"
|
|
),
|
|
],
|
|
TokenInfo(
|
|
NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 2 3\n"
|
|
),
|
|
],
|
|
)
|
|
with self.assertRaises(SyntaxError):
|
|
parse_string("1\n", parser_class)
|
|
|
|
def test_repeat_1_complex(self) -> None:
|
|
grammar = """
|
|
start: term ('+' term)+ NEWLINE
|
|
term: NUMBER
|
|
"""
|
|
parser_class = make_parser(grammar)
|
|
node = parse_string("1 + 2 + 3\n", parser_class)
|
|
self.assertEqual(
|
|
node,
|
|
[
|
|
TokenInfo(
|
|
NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2 + 3\n"
|
|
),
|
|
[
|
|
[
|
|
TokenInfo(
|
|
OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n"
|
|
),
|
|
TokenInfo(
|
|
NUMBER,
|
|
string="2",
|
|
start=(1, 4),
|
|
end=(1, 5),
|
|
line="1 + 2 + 3\n",
|
|
),
|
|
],
|
|
[
|
|
TokenInfo(
|
|
OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n"
|
|
),
|
|
TokenInfo(
|
|
NUMBER,
|
|
string="3",
|
|
start=(1, 8),
|
|
end=(1, 9),
|
|
line="1 + 2 + 3\n",
|
|
),
|
|
],
|
|
],
|
|
TokenInfo(
|
|
NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n"
|
|
),
|
|
],
|
|
)
|
|
with self.assertRaises(SyntaxError):
|
|
parse_string("1\n", parser_class)
|
|
|
|
def test_repeat_with_sep_simple(self) -> None:
|
|
grammar = """
|
|
start: ','.thing+ NEWLINE
|
|
thing: NUMBER
|
|
"""
|
|
parser_class = make_parser(grammar)
|
|
node = parse_string("1, 2, 3\n", parser_class)
|
|
self.assertEqual(
|
|
node,
|
|
[
|
|
[
|
|
TokenInfo(
|
|
NUMBER, string="1", start=(1, 0), end=(1, 1), line="1, 2, 3\n"
|
|
),
|
|
TokenInfo(
|
|
NUMBER, string="2", start=(1, 3), end=(1, 4), line="1, 2, 3\n"
|
|
),
|
|
TokenInfo(
|
|
NUMBER, string="3", start=(1, 6), end=(1, 7), line="1, 2, 3\n"
|
|
),
|
|
],
|
|
TokenInfo(
|
|
NEWLINE, string="\n", start=(1, 7), end=(1, 8), line="1, 2, 3\n"
|
|
),
|
|
],
|
|
)
|
|
|
|
def test_left_recursive(self) -> None:
|
|
grammar_source = """
|
|
start: expr NEWLINE
|
|
expr: ('-' term | expr '+' term | term)
|
|
term: NUMBER
|
|
foo: NAME+
|
|
bar: NAME*
|
|
baz: NAME?
|
|
"""
|
|
grammar: Grammar = parse_string(grammar_source, GrammarParser)
|
|
parser_class = generate_parser(grammar)
|
|
rules = grammar.rules
|
|
self.assertFalse(rules["start"].left_recursive)
|
|
self.assertTrue(rules["expr"].left_recursive)
|
|
self.assertFalse(rules["term"].left_recursive)
|
|
self.assertFalse(rules["foo"].left_recursive)
|
|
self.assertFalse(rules["bar"].left_recursive)
|
|
self.assertFalse(rules["baz"].left_recursive)
|
|
node = parse_string("1 + 2 + 3\n", parser_class)
|
|
self.assertEqual(
|
|
node,
|
|
[
|
|
[
|
|
[
|
|
TokenInfo(
|
|
NUMBER,
|
|
string="1",
|
|
start=(1, 0),
|
|
end=(1, 1),
|
|
line="1 + 2 + 3\n",
|
|
),
|
|
TokenInfo(
|
|
OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n"
|
|
),
|
|
TokenInfo(
|
|
NUMBER,
|
|
string="2",
|
|
start=(1, 4),
|
|
end=(1, 5),
|
|
line="1 + 2 + 3\n",
|
|
),
|
|
],
|
|
TokenInfo(
|
|
OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n"
|
|
),
|
|
TokenInfo(
|
|
NUMBER, string="3", start=(1, 8), end=(1, 9), line="1 + 2 + 3\n"
|
|
),
|
|
],
|
|
TokenInfo(
|
|
NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n"
|
|
),
|
|
],
|
|
)
|
|
|
|
def test_python_expr(self) -> None:
|
|
grammar = """
|
|
start: expr NEWLINE? $ { ast.Expression(expr, lineno=1, col_offset=0) }
|
|
expr: ( expr '+' term { ast.BinOp(expr, ast.Add(), term, lineno=expr.lineno, col_offset=expr.col_offset, end_lineno=term.end_lineno, end_col_offset=term.end_col_offset) }
|
|
| expr '-' term { ast.BinOp(expr, ast.Sub(), term, lineno=expr.lineno, col_offset=expr.col_offset, end_lineno=term.end_lineno, end_col_offset=term.end_col_offset) }
|
|
| term { term }
|
|
)
|
|
term: ( l=term '*' r=factor { ast.BinOp(l, ast.Mult(), r, lineno=l.lineno, col_offset=l.col_offset, end_lineno=r.end_lineno, end_col_offset=r.end_col_offset) }
|
|
| l=term '/' r=factor { ast.BinOp(l, ast.Div(), r, lineno=l.lineno, col_offset=l.col_offset, end_lineno=r.end_lineno, end_col_offset=r.end_col_offset) }
|
|
| factor { factor }
|
|
)
|
|
factor: ( '(' expr ')' { expr }
|
|
| atom { atom }
|
|
)
|
|
atom: ( n=NAME { ast.Name(id=n.string, ctx=ast.Load(), lineno=n.start[0], col_offset=n.start[1], end_lineno=n.end[0], end_col_offset=n.end[1]) }
|
|
| n=NUMBER { ast.Constant(value=ast.literal_eval(n.string), lineno=n.start[0], col_offset=n.start[1], end_lineno=n.end[0], end_col_offset=n.end[1]) }
|
|
)
|
|
"""
|
|
parser_class = make_parser(grammar)
|
|
node = parse_string("(1 + 2*3 + 5)/(6 - 2)\n", parser_class)
|
|
code = compile(node, "", "eval")
|
|
val = eval(code)
|
|
self.assertEqual(val, 3.0)
|
|
|
|
def test_nullable(self) -> None:
|
|
grammar_source = """
|
|
start: sign NUMBER
|
|
sign: ['-' | '+']
|
|
"""
|
|
grammar: Grammar = parse_string(grammar_source, GrammarParser)
|
|
out = io.StringIO()
|
|
genr = PythonParserGenerator(grammar, out)
|
|
rules = grammar.rules
|
|
self.assertFalse(rules["start"].nullable) # Not None!
|
|
self.assertTrue(rules["sign"].nullable)
|
|
|
|
def test_advanced_left_recursive(self) -> None:
|
|
grammar_source = """
|
|
start: NUMBER | sign start
|
|
sign: ['-']
|
|
"""
|
|
grammar: Grammar = parse_string(grammar_source, GrammarParser)
|
|
out = io.StringIO()
|
|
genr = PythonParserGenerator(grammar, out)
|
|
rules = grammar.rules
|
|
self.assertFalse(rules["start"].nullable) # Not None!
|
|
self.assertTrue(rules["sign"].nullable)
|
|
self.assertTrue(rules["start"].left_recursive)
|
|
self.assertFalse(rules["sign"].left_recursive)
|
|
|
|
def test_mutually_left_recursive(self) -> None:
|
|
grammar_source = """
|
|
start: foo 'E'
|
|
foo: bar 'A' | 'B'
|
|
bar: foo 'C' | 'D'
|
|
"""
|
|
grammar: Grammar = parse_string(grammar_source, GrammarParser)
|
|
out = io.StringIO()
|
|
genr = PythonParserGenerator(grammar, out)
|
|
rules = grammar.rules
|
|
self.assertFalse(rules["start"].left_recursive)
|
|
self.assertTrue(rules["foo"].left_recursive)
|
|
self.assertTrue(rules["bar"].left_recursive)
|
|
genr.generate("<string>")
|
|
ns: Dict[str, Any] = {}
|
|
exec(out.getvalue(), ns)
|
|
parser_class: Type[Parser] = ns["GeneratedParser"]
|
|
node = parse_string("D A C A E", parser_class)
|
|
|
|
self.assertEqual(
|
|
node,
|
|
[
|
|
[
|
|
[
|
|
[
|
|
TokenInfo(
|
|
type=NAME,
|
|
string="D",
|
|
start=(1, 0),
|
|
end=(1, 1),
|
|
line="D A C A E",
|
|
),
|
|
TokenInfo(
|
|
type=NAME,
|
|
string="A",
|
|
start=(1, 2),
|
|
end=(1, 3),
|
|
line="D A C A E",
|
|
),
|
|
],
|
|
TokenInfo(
|
|
type=NAME,
|
|
string="C",
|
|
start=(1, 4),
|
|
end=(1, 5),
|
|
line="D A C A E",
|
|
),
|
|
],
|
|
TokenInfo(
|
|
type=NAME,
|
|
string="A",
|
|
start=(1, 6),
|
|
end=(1, 7),
|
|
line="D A C A E",
|
|
),
|
|
],
|
|
TokenInfo(
|
|
type=NAME, string="E", start=(1, 8), end=(1, 9), line="D A C A E"
|
|
),
|
|
],
|
|
)
|
|
node = parse_string("B C A E", parser_class)
|
|
self.assertEqual(
|
|
node,
|
|
[
|
|
[
|
|
[
|
|
TokenInfo(
|
|
type=NAME,
|
|
string="B",
|
|
start=(1, 0),
|
|
end=(1, 1),
|
|
line="B C A E",
|
|
),
|
|
TokenInfo(
|
|
type=NAME,
|
|
string="C",
|
|
start=(1, 2),
|
|
end=(1, 3),
|
|
line="B C A E",
|
|
),
|
|
],
|
|
TokenInfo(
|
|
type=NAME, string="A", start=(1, 4), end=(1, 5), line="B C A E"
|
|
),
|
|
],
|
|
TokenInfo(
|
|
type=NAME, string="E", start=(1, 6), end=(1, 7), line="B C A E"
|
|
),
|
|
],
|
|
)
|
|
|
|
def test_nasty_mutually_left_recursive(self) -> None:
|
|
# This grammar does not recognize 'x - + =', much to my chagrin.
|
|
# But that's the way PEG works.
|
|
# [Breathlessly]
|
|
# The problem is that the toplevel target call
|
|
# recurses into maybe, which recognizes 'x - +',
|
|
# and then the toplevel target looks for another '+',
|
|
# which fails, so it retreats to NAME,
|
|
# which succeeds, so we end up just recognizing 'x',
|
|
# and then start fails because there's no '=' after that.
|
|
grammar_source = """
|
|
start: target '='
|
|
target: maybe '+' | NAME
|
|
maybe: maybe '-' | target
|
|
"""
|
|
grammar: Grammar = parse_string(grammar_source, GrammarParser)
|
|
out = io.StringIO()
|
|
genr = PythonParserGenerator(grammar, out)
|
|
genr.generate("<string>")
|
|
ns: Dict[str, Any] = {}
|
|
exec(out.getvalue(), ns)
|
|
parser_class = ns["GeneratedParser"]
|
|
with self.assertRaises(SyntaxError):
|
|
parse_string("x - + =", parser_class)
|
|
|
|
def test_lookahead(self) -> None:
|
|
grammar = """
|
|
start: (expr_stmt | assign_stmt) &'.'
|
|
expr_stmt: !(target '=') expr
|
|
assign_stmt: target '=' expr
|
|
expr: term ('+' term)*
|
|
target: NAME
|
|
term: NUMBER
|
|
"""
|
|
parser_class = make_parser(grammar)
|
|
node = parse_string("foo = 12 + 12 .", parser_class)
|
|
self.assertEqual(
|
|
node,
|
|
[
|
|
TokenInfo(
|
|
NAME, string="foo", start=(1, 0), end=(1, 3), line="foo = 12 + 12 ."
|
|
),
|
|
TokenInfo(
|
|
OP, string="=", start=(1, 4), end=(1, 5), line="foo = 12 + 12 ."
|
|
),
|
|
[
|
|
TokenInfo(
|
|
NUMBER,
|
|
string="12",
|
|
start=(1, 6),
|
|
end=(1, 8),
|
|
line="foo = 12 + 12 .",
|
|
),
|
|
[
|
|
[
|
|
TokenInfo(
|
|
OP,
|
|
string="+",
|
|
start=(1, 9),
|
|
end=(1, 10),
|
|
line="foo = 12 + 12 .",
|
|
),
|
|
TokenInfo(
|
|
NUMBER,
|
|
string="12",
|
|
start=(1, 11),
|
|
end=(1, 13),
|
|
line="foo = 12 + 12 .",
|
|
),
|
|
]
|
|
],
|
|
],
|
|
],
|
|
)
|
|
|
|
def test_named_lookahead_error(self) -> None:
|
|
grammar = """
|
|
start: foo=!'x' NAME
|
|
"""
|
|
with self.assertRaises(SyntaxError):
|
|
make_parser(grammar)
|
|
|
|
def test_start_leader(self) -> None:
|
|
grammar = """
|
|
start: attr | NAME
|
|
attr: start '.' NAME
|
|
"""
|
|
# Would assert False without a special case in compute_left_recursives().
|
|
make_parser(grammar)
|
|
|
|
def test_opt_sequence(self) -> None:
|
|
grammar = """
|
|
start: [NAME*]
|
|
"""
|
|
# This case was failing because of a double trailing comma at the end
|
|
# of a line in the generated source. See bpo-41044
|
|
make_parser(grammar)
|
|
|
|
def test_left_recursion_too_complex(self) -> None:
|
|
grammar = """
|
|
start: foo
|
|
foo: bar '+' | baz '+' | '+'
|
|
bar: baz '-' | foo '-' | '-'
|
|
baz: foo '*' | bar '*' | '*'
|
|
"""
|
|
with self.assertRaises(ValueError) as errinfo:
|
|
make_parser(grammar)
|
|
self.assertTrue("no leader" in str(errinfo.exception.value))
|
|
|
|
def test_cut(self) -> None:
|
|
grammar = """
|
|
start: '(' ~ expr ')'
|
|
expr: NUMBER
|
|
"""
|
|
parser_class = make_parser(grammar)
|
|
node = parse_string("(1)", parser_class)
|
|
self.assertEqual(
|
|
node,
|
|
[
|
|
TokenInfo(OP, string="(", start=(1, 0), end=(1, 1), line="(1)"),
|
|
TokenInfo(NUMBER, string="1", start=(1, 1), end=(1, 2), line="(1)"),
|
|
TokenInfo(OP, string=")", start=(1, 2), end=(1, 3), line="(1)"),
|
|
],
|
|
)
|
|
|
|
def test_dangling_reference(self) -> None:
|
|
grammar = """
|
|
start: foo ENDMARKER
|
|
foo: bar NAME
|
|
"""
|
|
with self.assertRaises(GrammarError):
|
|
parser_class = make_parser(grammar)
|
|
|
|
def test_bad_token_reference(self) -> None:
|
|
grammar = """
|
|
start: foo
|
|
foo: NAMEE
|
|
"""
|
|
with self.assertRaises(GrammarError):
|
|
parser_class = make_parser(grammar)
|
|
|
|
def test_missing_start(self) -> None:
|
|
grammar = """
|
|
foo: NAME
|
|
"""
|
|
with self.assertRaises(GrammarError):
|
|
parser_class = make_parser(grammar)
|
|
|
|
def test_invalid_rule_name(self) -> None:
|
|
grammar = """
|
|
start: _a b
|
|
_a: 'a'
|
|
b: 'b'
|
|
"""
|
|
with self.assertRaisesRegex(GrammarError, "cannot start with underscore: '_a'"):
|
|
parser_class = make_parser(grammar)
|
|
|
|
def test_invalid_variable_name(self) -> None:
|
|
grammar = """
|
|
start: a b
|
|
a: _x='a'
|
|
b: 'b'
|
|
"""
|
|
with self.assertRaisesRegex(GrammarError, "cannot start with underscore: '_x'"):
|
|
parser_class = make_parser(grammar)
|
|
|
|
def test_invalid_variable_name_in_temporal_rule(self) -> None:
|
|
grammar = """
|
|
start: a b
|
|
a: (_x='a' | 'b') | 'c'
|
|
b: 'b'
|
|
"""
|
|
with self.assertRaisesRegex(GrammarError, "cannot start with underscore: '_x'"):
|
|
parser_class = make_parser(grammar)
|
|
|
|
def test_soft_keyword(self) -> None:
|
|
grammar = """
|
|
start:
|
|
| "number" n=NUMBER { eval(n.string) }
|
|
| "string" n=STRING { n.string }
|
|
| SOFT_KEYWORD l=NAME n=(NUMBER | NAME | STRING) { f"{l.string} = {n.string}"}
|
|
"""
|
|
parser_class = make_parser(grammar)
|
|
self.assertEqual(parse_string("number 1", parser_class, verbose=True), 1)
|
|
self.assertEqual(parse_string("string 'b'", parser_class, verbose=True), "'b'")
|
|
self.assertEqual(
|
|
parse_string("number test 1", parser_class, verbose=True), "test = 1"
|
|
)
|
|
assert (
|
|
parse_string("string test 'b'", parser_class, verbose=True) == "test = 'b'"
|
|
)
|
|
with self.assertRaises(SyntaxError):
|
|
parse_string("test 1", parser_class, verbose=True)
|
|
|
|
def test_forced(self) -> None:
|
|
grammar = """
|
|
start: NAME &&':' | NAME
|
|
"""
|
|
parser_class = make_parser(grammar)
|
|
self.assertTrue(parse_string("number :", parser_class, verbose=True))
|
|
with self.assertRaises(SyntaxError) as e:
|
|
parse_string("a", parser_class, verbose=True)
|
|
|
|
self.assertIn("expected ':'", str(e.exception))
|
|
|
|
def test_forced_with_group(self) -> None:
|
|
grammar = """
|
|
start: NAME &&(':' | ';') | NAME
|
|
"""
|
|
parser_class = make_parser(grammar)
|
|
self.assertTrue(parse_string("number :", parser_class, verbose=True))
|
|
self.assertTrue(parse_string("number ;", parser_class, verbose=True))
|
|
with self.assertRaises(SyntaxError) as e:
|
|
parse_string("a", parser_class, verbose=True)
|
|
self.assertIn("expected (':' | ';')", e.exception.args[0])
|
|
|
|
def test_unreachable_explicit(self) -> None:
|
|
source = """
|
|
start: NAME { UNREACHABLE }
|
|
"""
|
|
grammar = parse_string(source, GrammarParser)
|
|
out = io.StringIO()
|
|
genr = PythonParserGenerator(
|
|
grammar, out, unreachable_formatting="This is a test"
|
|
)
|
|
genr.generate("<string>")
|
|
self.assertIn("This is a test", out.getvalue())
|
|
|
|
def test_unreachable_implicit1(self) -> None:
|
|
source = """
|
|
start: NAME | invalid_input
|
|
invalid_input: NUMBER { None }
|
|
"""
|
|
grammar = parse_string(source, GrammarParser)
|
|
out = io.StringIO()
|
|
genr = PythonParserGenerator(
|
|
grammar, out, unreachable_formatting="This is a test"
|
|
)
|
|
genr.generate("<string>")
|
|
self.assertIn("This is a test", out.getvalue())
|
|
|
|
def test_unreachable_implicit2(self) -> None:
|
|
source = """
|
|
start: NAME | '(' invalid_input ')'
|
|
invalid_input: NUMBER { None }
|
|
"""
|
|
grammar = parse_string(source, GrammarParser)
|
|
out = io.StringIO()
|
|
genr = PythonParserGenerator(
|
|
grammar, out, unreachable_formatting="This is a test"
|
|
)
|
|
genr.generate("<string>")
|
|
self.assertIn("This is a test", out.getvalue())
|
|
|
|
def test_unreachable_implicit3(self) -> None:
|
|
source = """
|
|
start: NAME | invalid_input { None }
|
|
invalid_input: NUMBER
|
|
"""
|
|
grammar = parse_string(source, GrammarParser)
|
|
out = io.StringIO()
|
|
genr = PythonParserGenerator(
|
|
grammar, out, unreachable_formatting="This is a test"
|
|
)
|
|
genr.generate("<string>")
|
|
self.assertNotIn("This is a test", out.getvalue())
|
|
|
|
def test_locations_in_alt_action_and_group(self) -> None:
|
|
grammar = """
|
|
start: t=term NEWLINE? $ { ast.Expression(t, LOCATIONS) }
|
|
term:
|
|
| l=term '*' r=factor { ast.BinOp(l, ast.Mult(), r, LOCATIONS) }
|
|
| l=term '/' r=factor { ast.BinOp(l, ast.Div(), r, LOCATIONS) }
|
|
| factor
|
|
factor:
|
|
| (
|
|
n=NAME { ast.Name(id=n.string, ctx=ast.Load(), LOCATIONS) } |
|
|
n=NUMBER { ast.Constant(value=ast.literal_eval(n.string), LOCATIONS) }
|
|
)
|
|
"""
|
|
parser_class = make_parser(grammar)
|
|
source = "2*3\n"
|
|
o = ast.dump(parse_string(source, parser_class).body, include_attributes=True)
|
|
p = ast.dump(ast.parse(source).body[0].value, include_attributes=True).replace(
|
|
" kind=None,", ""
|
|
)
|
|
diff = "\n".join(
|
|
difflib.unified_diff(
|
|
o.split("\n"), p.split("\n"), "cpython", "python-pegen"
|
|
)
|
|
)
|
|
self.assertFalse(diff)
|
|
|
|
|
|
class TestGrammarVisitor:
|
|
class Visitor(GrammarVisitor):
|
|
def __init__(self) -> None:
|
|
self.n_nodes = 0
|
|
|
|
def visit(self, node: Any, *args: Any, **kwargs: Any) -> None:
|
|
self.n_nodes += 1
|
|
super().visit(node, *args, **kwargs)
|
|
|
|
def test_parse_trivial_grammar(self) -> None:
|
|
grammar = """
|
|
start: 'a'
|
|
"""
|
|
rules = parse_string(grammar, GrammarParser)
|
|
visitor = self.Visitor()
|
|
|
|
visitor.visit(rules)
|
|
|
|
self.assertEqual(visitor.n_nodes, 6)
|
|
|
|
def test_parse_or_grammar(self) -> None:
|
|
grammar = """
|
|
start: rule
|
|
rule: 'a' | 'b'
|
|
"""
|
|
rules = parse_string(grammar, GrammarParser)
|
|
visitor = self.Visitor()
|
|
|
|
visitor.visit(rules)
|
|
|
|
# Grammar/Rule/Rhs/Alt/NamedItem/NameLeaf -> 6
|
|
# Rule/Rhs/ -> 2
|
|
# Alt/NamedItem/StringLeaf -> 3
|
|
# Alt/NamedItem/StringLeaf -> 3
|
|
|
|
self.assertEqual(visitor.n_nodes, 14)
|
|
|
|
def test_parse_repeat1_grammar(self) -> None:
|
|
grammar = """
|
|
start: 'a'+
|
|
"""
|
|
rules = parse_string(grammar, GrammarParser)
|
|
visitor = self.Visitor()
|
|
|
|
visitor.visit(rules)
|
|
|
|
# Grammar/Rule/Rhs/Alt/NamedItem/Repeat1/StringLeaf -> 6
|
|
self.assertEqual(visitor.n_nodes, 7)
|
|
|
|
def test_parse_repeat0_grammar(self) -> None:
|
|
grammar = """
|
|
start: 'a'*
|
|
"""
|
|
rules = parse_string(grammar, GrammarParser)
|
|
visitor = self.Visitor()
|
|
|
|
visitor.visit(rules)
|
|
|
|
# Grammar/Rule/Rhs/Alt/NamedItem/Repeat0/StringLeaf -> 6
|
|
|
|
self.assertEqual(visitor.n_nodes, 7)
|
|
|
|
def test_parse_optional_grammar(self) -> None:
|
|
grammar = """
|
|
start: 'a' ['b']
|
|
"""
|
|
rules = parse_string(grammar, GrammarParser)
|
|
visitor = self.Visitor()
|
|
|
|
visitor.visit(rules)
|
|
|
|
# Grammar/Rule/Rhs/Alt/NamedItem/StringLeaf -> 6
|
|
# NamedItem/Opt/Rhs/Alt/NamedItem/Stringleaf -> 6
|
|
|
|
self.assertEqual(visitor.n_nodes, 12)
|
|
|
|
|
|
class TestGrammarVisualizer(unittest.TestCase):
|
|
def test_simple_rule(self) -> None:
|
|
grammar = """
|
|
start: 'a' 'b'
|
|
"""
|
|
rules = parse_string(grammar, GrammarParser)
|
|
|
|
printer = ASTGrammarPrinter()
|
|
lines: List[str] = []
|
|
printer.print_grammar_ast(rules, printer=lines.append)
|
|
|
|
output = "\n".join(lines)
|
|
expected_output = textwrap.dedent(
|
|
"""\
|
|
└──Rule
|
|
└──Rhs
|
|
└──Alt
|
|
├──NamedItem
|
|
│ └──StringLeaf("'a'")
|
|
└──NamedItem
|
|
└──StringLeaf("'b'")
|
|
"""
|
|
)
|
|
|
|
self.assertEqual(output, expected_output)
|
|
|
|
def test_multiple_rules(self) -> None:
|
|
grammar = """
|
|
start: a b
|
|
a: 'a'
|
|
b: 'b'
|
|
"""
|
|
rules = parse_string(grammar, GrammarParser)
|
|
|
|
printer = ASTGrammarPrinter()
|
|
lines: List[str] = []
|
|
printer.print_grammar_ast(rules, printer=lines.append)
|
|
|
|
output = "\n".join(lines)
|
|
expected_output = textwrap.dedent(
|
|
"""\
|
|
└──Rule
|
|
└──Rhs
|
|
└──Alt
|
|
├──NamedItem
|
|
│ └──NameLeaf('a')
|
|
└──NamedItem
|
|
└──NameLeaf('b')
|
|
|
|
└──Rule
|
|
└──Rhs
|
|
└──Alt
|
|
└──NamedItem
|
|
└──StringLeaf("'a'")
|
|
|
|
└──Rule
|
|
└──Rhs
|
|
└──Alt
|
|
└──NamedItem
|
|
└──StringLeaf("'b'")
|
|
"""
|
|
)
|
|
|
|
self.assertEqual(output, expected_output)
|
|
|
|
def test_deep_nested_rule(self) -> None:
|
|
grammar = """
|
|
start: 'a' ['b'['c'['d']]]
|
|
"""
|
|
rules = parse_string(grammar, GrammarParser)
|
|
|
|
printer = ASTGrammarPrinter()
|
|
lines: List[str] = []
|
|
printer.print_grammar_ast(rules, printer=lines.append)
|
|
|
|
output = "\n".join(lines)
|
|
expected_output = textwrap.dedent(
|
|
"""\
|
|
└──Rule
|
|
└──Rhs
|
|
└──Alt
|
|
├──NamedItem
|
|
│ └──StringLeaf("'a'")
|
|
└──NamedItem
|
|
└──Opt
|
|
└──Rhs
|
|
└──Alt
|
|
├──NamedItem
|
|
│ └──StringLeaf("'b'")
|
|
└──NamedItem
|
|
└──Opt
|
|
└──Rhs
|
|
└──Alt
|
|
├──NamedItem
|
|
│ └──StringLeaf("'c'")
|
|
└──NamedItem
|
|
└──Opt
|
|
└──Rhs
|
|
└──Alt
|
|
└──NamedItem
|
|
└──StringLeaf("'d'")
|
|
"""
|
|
)
|
|
|
|
self.assertEqual(output, expected_output)
|