bpo-40334: Refactor peg_generator to receive a Tokens file when building c code (GH-19745)
This commit is contained in:
parent
3d53d8756f
commit
5b9f4988c9
|
@ -823,7 +823,9 @@ regen-grammar: regen-token
|
|||
.PHONY: regen-pegen
|
||||
regen-pegen:
|
||||
@$(MKDIR_P) $(srcdir)/Parser/pegen
|
||||
PYTHONPATH=$(srcdir)/Tools/peg_generator $(PYTHON_FOR_REGEN) -m pegen -c -q $(srcdir)/Grammar/python.gram \
|
||||
PYTHONPATH=$(srcdir)/Tools/peg_generator $(PYTHON_FOR_REGEN) -m pegen -q c \
|
||||
$(srcdir)/Grammar/python.gram \
|
||||
$(srcdir)/Grammar/Tokens \
|
||||
-o $(srcdir)/Parser/pegen/parse.new.c
|
||||
$(UPDATE_FILE) $(srcdir)/Parser/pegen/parse.c $(srcdir)/Parser/pegen/parse.new.c
|
||||
|
||||
|
|
|
@ -168,7 +168,7 @@
|
|||
</Target>
|
||||
<Target Name="_RegenPegen" BeforeTargets="Build">
|
||||
<!-- Regenerate Parser/pegen/parse.c -->
|
||||
<Exec Command=""$PYTHONPATH=$(srcdir)/Tools/peg_generator" "$(PythonExe)" -m pegen -c -q "$(PySourcePath)Grammar\python.gram" -o "$(IntDir)parse.c"" />
|
||||
<Exec Command=""$PYTHONPATH=$(srcdir)/Tools/peg_generator" "$(PythonExe)" -m pegen -q c "$(PySourcePath)Grammar\python.gram" "$(PySourcePath)Grammar\Tokens" -o "$(IntDir)parse.c"" />
|
||||
<Copy SourceFiles="$(IntDir)parse.c" DestinationFiles="$(PySourcePath)Parser\pegen\parse.c">
|
||||
<Output TaskParameter="CopiedFiles" ItemName="_UpdatedParse" />
|
||||
</Copy>
|
||||
|
|
|
@ -10,6 +10,7 @@ CPYTHON ?= ../../Lib
|
|||
MYPY ?= mypy
|
||||
|
||||
GRAMMAR = ../../Grammar/python.gram
|
||||
TOKENS = ../../Grammar/Tokens
|
||||
TESTFILE = data/cprog.py
|
||||
TIMEFILE = data/xxl.py
|
||||
TESTDIR = .
|
||||
|
@ -20,8 +21,8 @@ data/xxl.py:
|
|||
|
||||
build: peg_extension/parse.c
|
||||
|
||||
peg_extension/parse.c: $(GRAMMAR) pegen/*.py peg_extension/peg_extension.c ../../Parser/pegen/pegen.c ../../Parser/pegen/parse_string.c ../../Parser/pegen/*.h pegen/grammar_parser.py
|
||||
$(PYTHON) -m pegen -q -c $(GRAMMAR) -o peg_extension/parse.c --compile-extension
|
||||
peg_extension/parse.c: $(GRAMMAR) $(TOKENS) pegen/*.py peg_extension/peg_extension.c ../../Parser/pegen/pegen.c ../../Parser/pegen/parse_string.c ../../Parser/pegen/*.h pegen/grammar_parser.py
|
||||
$(PYTHON) -m pegen -q c $(GRAMMAR) $(TOKENS) -o peg_extension/parse.c --compile-extension
|
||||
|
||||
clean:
|
||||
-rm -f peg_extension/*.o peg_extension/*.so peg_extension/parse.c
|
||||
|
@ -79,7 +80,8 @@ time_stdlib_parse: data/xxl.py
|
|||
|
||||
test_local:
|
||||
$(PYTHON) scripts/test_parse_directory.py \
|
||||
-g $(GRAMMAR) \
|
||||
--grammar-file $(GRAMMAR) \
|
||||
--tokens-file $(TOKENS) \
|
||||
-d $(TESTDIR) \
|
||||
$(TESTFLAGS) \
|
||||
--exclude "*/failset/*" \
|
||||
|
@ -88,7 +90,8 @@ test_local:
|
|||
|
||||
test_global: $(CPYTHON)
|
||||
$(PYTHON) scripts/test_parse_directory.py \
|
||||
-g $(GRAMMAR) \
|
||||
--grammar-file $(GRAMMAR) \
|
||||
--tokens-file $(TOKENS) \
|
||||
-d $(CPYTHON) \
|
||||
$(TESTFLAGS) \
|
||||
--exclude "*/test2to3/*" \
|
||||
|
|
|
@ -11,6 +11,64 @@ import time
|
|||
import token
|
||||
import traceback
|
||||
|
||||
from typing import Tuple
|
||||
|
||||
from pegen.build import Grammar, Parser, Tokenizer, ParserGenerator
|
||||
|
||||
|
||||
def generate_c_code(
|
||||
args: argparse.Namespace,
|
||||
) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
|
||||
from pegen.build import build_c_parser_and_generator
|
||||
|
||||
verbose = args.verbose
|
||||
verbose_tokenizer = verbose >= 3
|
||||
verbose_parser = verbose == 2 or verbose >= 4
|
||||
try:
|
||||
grammar, parser, tokenizer, gen = build_c_parser_and_generator(
|
||||
args.grammar_filename,
|
||||
args.tokens_filename,
|
||||
args.output,
|
||||
args.compile_extension,
|
||||
verbose_tokenizer,
|
||||
verbose_parser,
|
||||
args.verbose,
|
||||
keep_asserts_in_extension=False if args.optimized else True,
|
||||
skip_actions=args.skip_actions,
|
||||
)
|
||||
return grammar, parser, tokenizer, gen
|
||||
except Exception as err:
|
||||
if args.verbose:
|
||||
raise # Show traceback
|
||||
traceback.print_exception(err.__class__, err, None)
|
||||
sys.stderr.write("For full traceback, use -v\n")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def generate_python_code(
|
||||
args: argparse.Namespace,
|
||||
) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
|
||||
from pegen.build import build_python_parser_and_generator
|
||||
|
||||
verbose = args.verbose
|
||||
verbose_tokenizer = verbose >= 3
|
||||
verbose_parser = verbose == 2 or verbose >= 4
|
||||
try:
|
||||
grammar, parser, tokenizer, gen = build_python_parser_and_generator(
|
||||
args.grammar_filename,
|
||||
args.output,
|
||||
verbose_tokenizer,
|
||||
verbose_parser,
|
||||
skip_actions=args.skip_actions,
|
||||
)
|
||||
return grammar, parser, tokenizer, gen
|
||||
except Exception as err:
|
||||
if args.verbose:
|
||||
raise # Show traceback
|
||||
traceback.print_exception(err.__class__, err, None)
|
||||
sys.stderr.write("For full traceback, use -v\n")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
argparser = argparse.ArgumentParser(
|
||||
prog="pegen", description="Experimental PEG-like parser generator"
|
||||
|
@ -23,63 +81,52 @@ argparser.add_argument(
|
|||
default=0,
|
||||
help="Print timing stats; repeat for more debug output",
|
||||
)
|
||||
argparser.add_argument(
|
||||
"-c", "--cpython", action="store_true", help="Generate C code for inclusion into CPython"
|
||||
subparsers = argparser.add_subparsers(help="target language for the generated code")
|
||||
|
||||
c_parser = subparsers.add_parser("c", help="Generate C code for inclusion into CPython")
|
||||
c_parser.set_defaults(func=generate_c_code)
|
||||
c_parser.add_argument("grammar_filename", help="Grammar description")
|
||||
c_parser.add_argument("tokens_filename", help="Tokens description")
|
||||
c_parser.add_argument(
|
||||
"-o", "--output", metavar="OUT", default="parse.c", help="Where to write the generated parser"
|
||||
)
|
||||
argparser.add_argument(
|
||||
c_parser.add_argument(
|
||||
"--compile-extension",
|
||||
action="store_true",
|
||||
help="Compile generated C code into an extension module",
|
||||
)
|
||||
argparser.add_argument(
|
||||
c_parser.add_argument(
|
||||
"--optimized", action="store_true", help="Compile the extension in optimized mode"
|
||||
)
|
||||
c_parser.add_argument(
|
||||
"--skip-actions", action="store_true", help="Suppress code emission for rule actions",
|
||||
)
|
||||
|
||||
python_parser = subparsers.add_parser("python", help="Generate Python code")
|
||||
python_parser.set_defaults(func=generate_python_code)
|
||||
python_parser.add_argument("grammar_filename", help="Grammar description")
|
||||
python_parser.add_argument(
|
||||
"-o",
|
||||
"--output",
|
||||
metavar="OUT",
|
||||
help="Where to write the generated parser (default parse.py or parse.c)",
|
||||
default="parse.py",
|
||||
help="Where to write the generated parser",
|
||||
)
|
||||
argparser.add_argument("filename", help="Grammar description")
|
||||
argparser.add_argument(
|
||||
"--optimized", action="store_true", help="Compile the extension in optimized mode"
|
||||
)
|
||||
argparser.add_argument(
|
||||
python_parser.add_argument(
|
||||
"--skip-actions", action="store_true", help="Suppress code emission for rule actions",
|
||||
)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
from pegen.build import build_parser_and_generator
|
||||
from pegen.testutil import print_memstats
|
||||
|
||||
args = argparser.parse_args()
|
||||
verbose = args.verbose
|
||||
verbose_tokenizer = verbose >= 3
|
||||
verbose_parser = verbose == 2 or verbose >= 4
|
||||
if "func" not in args:
|
||||
argparser.error("Must specify the target language mode ('c' or 'python')")
|
||||
|
||||
t0 = time.time()
|
||||
|
||||
output_file = args.output
|
||||
if not output_file:
|
||||
if args.cpython:
|
||||
output_file = "parse.c"
|
||||
else:
|
||||
output_file = "parse.py"
|
||||
|
||||
try:
|
||||
grammar, parser, tokenizer, gen = build_parser_and_generator(
|
||||
args.filename,
|
||||
output_file,
|
||||
args.compile_extension,
|
||||
verbose_tokenizer,
|
||||
verbose_parser,
|
||||
args.verbose,
|
||||
keep_asserts_in_extension=False if args.optimized else True,
|
||||
skip_actions=args.skip_actions,
|
||||
)
|
||||
except Exception as err:
|
||||
if args.verbose:
|
||||
raise # Show traceback
|
||||
traceback.print_exception(err.__class__, err, None)
|
||||
sys.stderr.write("For full traceback, use -v\n")
|
||||
sys.exit(1)
|
||||
grammar, parser, tokenizer, gen = args.func(args)
|
||||
t1 = time.time()
|
||||
|
||||
if not args.quiet:
|
||||
if args.verbose:
|
||||
|
@ -110,8 +157,6 @@ def main() -> None:
|
|||
else:
|
||||
print()
|
||||
|
||||
t1 = time.time()
|
||||
|
||||
if args.verbose:
|
||||
dt = t1 - t0
|
||||
diag = tokenizer.diagnose()
|
||||
|
|
|
@ -3,8 +3,9 @@ import shutil
|
|||
import tokenize
|
||||
import sys
|
||||
import sysconfig
|
||||
import itertools
|
||||
|
||||
from typing import Optional, Tuple
|
||||
from typing import Optional, Tuple, List, IO, Iterator, Set, Dict
|
||||
|
||||
from pegen.c_generator import CParserGenerator
|
||||
from pegen.grammar import Grammar
|
||||
|
@ -17,12 +18,12 @@ from pegen.tokenizer import Tokenizer
|
|||
MOD_DIR = pathlib.Path(__file__).parent
|
||||
|
||||
|
||||
def get_extra_flags(compiler_flags, compiler_py_flags_nodist):
|
||||
def get_extra_flags(compiler_flags: str, compiler_py_flags_nodist: str) -> List[str]:
|
||||
flags = sysconfig.get_config_var(compiler_flags)
|
||||
py_flags_nodist = sysconfig.get_config_var(compiler_py_flags_nodist)
|
||||
if flags is None or py_flags_nodist is None:
|
||||
return []
|
||||
return f'{flags} {py_flags_nodist}'.split()
|
||||
return f"{flags} {py_flags_nodist}".split()
|
||||
|
||||
|
||||
def compile_c_extension(
|
||||
|
@ -45,15 +46,15 @@ def compile_c_extension(
|
|||
from distutils.core import Distribution, Extension
|
||||
from distutils.command.clean import clean # type: ignore
|
||||
from distutils.command.build_ext import build_ext # type: ignore
|
||||
from distutils.tests.support import fixup_build_ext
|
||||
from distutils.tests.support import fixup_build_ext # type: ignore
|
||||
|
||||
if verbose:
|
||||
distutils.log.set_verbosity(distutils.log.DEBUG)
|
||||
|
||||
source_file_path = pathlib.Path(generated_source_path)
|
||||
extension_name = source_file_path.stem
|
||||
extra_compile_args = get_extra_flags('CFLAGS', 'PY_CFLAGS_NODIST')
|
||||
extra_link_args = get_extra_flags('LDFLAGS', 'PY_LDFLAGS_NODIST')
|
||||
extra_compile_args = get_extra_flags("CFLAGS", "PY_CFLAGS_NODIST")
|
||||
extra_link_args = get_extra_flags("LDFLAGS", "PY_LDFLAGS_NODIST")
|
||||
if keep_asserts:
|
||||
extra_compile_args.append("-UNDEBUG")
|
||||
extension = [
|
||||
|
@ -111,39 +112,69 @@ def build_parser(
|
|||
return grammar, parser, tokenizer
|
||||
|
||||
|
||||
def build_generator(
|
||||
tokenizer: Tokenizer,
|
||||
def generate_token_definitions(tokens: IO[str]) -> Tuple[Dict[str, int], Set[str]]:
|
||||
exact_tokens = {}
|
||||
non_exact_tokens = set()
|
||||
numbers = itertools.count(0)
|
||||
|
||||
for line in tokens:
|
||||
line = line.strip()
|
||||
|
||||
if not line or line.startswith("#"):
|
||||
continue
|
||||
|
||||
pieces = line.split()
|
||||
index = next(numbers)
|
||||
|
||||
if len(pieces) == 1:
|
||||
(token,) = pieces
|
||||
non_exact_tokens.add(token)
|
||||
elif len(pieces) == 2:
|
||||
_, op = pieces
|
||||
exact_tokens[op.strip("'")] = index
|
||||
else:
|
||||
raise ValueError(f"Unexpected line found in Tokens file: {line}")
|
||||
|
||||
return exact_tokens, non_exact_tokens
|
||||
|
||||
|
||||
def build_c_generator(
|
||||
grammar: Grammar,
|
||||
grammar_file: str,
|
||||
tokens_file: str,
|
||||
output_file: str,
|
||||
compile_extension: bool = False,
|
||||
verbose_c_extension: bool = False,
|
||||
keep_asserts_in_extension: bool = True,
|
||||
skip_actions: bool = False,
|
||||
) -> ParserGenerator:
|
||||
# TODO: Allow other extensions; pass the output type as an argument.
|
||||
if not output_file.endswith((".c", ".py")):
|
||||
raise RuntimeError("Your output file must either be a .c or .py file")
|
||||
with open(tokens_file, "r") as tok_file:
|
||||
exact_tok, non_exact_tok = generate_token_definitions(tok_file)
|
||||
with open(output_file, "w") as file:
|
||||
gen: ParserGenerator
|
||||
if output_file.endswith(".c"):
|
||||
gen = CParserGenerator(grammar, file, skip_actions=skip_actions)
|
||||
elif output_file.endswith(".py"):
|
||||
gen = PythonParserGenerator(grammar, file) # TODO: skip_actions
|
||||
else:
|
||||
assert False # Should have been checked above
|
||||
gen: ParserGenerator = CParserGenerator(
|
||||
grammar, exact_tok, non_exact_tok, file, skip_actions=skip_actions
|
||||
)
|
||||
gen.generate(grammar_file)
|
||||
|
||||
if compile_extension and output_file.endswith(".c"):
|
||||
if compile_extension:
|
||||
compile_c_extension(
|
||||
output_file, verbose=verbose_c_extension, keep_asserts=keep_asserts_in_extension
|
||||
)
|
||||
|
||||
return gen
|
||||
|
||||
|
||||
def build_parser_and_generator(
|
||||
def build_python_generator(
|
||||
grammar: Grammar, grammar_file: str, output_file: str, skip_actions: bool = False,
|
||||
) -> ParserGenerator:
|
||||
with open(output_file, "w") as file:
|
||||
gen: ParserGenerator = PythonParserGenerator(grammar, file) # TODO: skip_actions
|
||||
gen.generate(grammar_file)
|
||||
return gen
|
||||
|
||||
|
||||
def build_c_parser_and_generator(
|
||||
grammar_file: str,
|
||||
tokens_file: str,
|
||||
output_file: str,
|
||||
compile_extension: bool = False,
|
||||
verbose_tokenizer: bool = False,
|
||||
|
@ -152,10 +183,11 @@ def build_parser_and_generator(
|
|||
keep_asserts_in_extension: bool = True,
|
||||
skip_actions: bool = False,
|
||||
) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
|
||||
"""Generate rules, parser, tokenizer, parser generator for a given grammar
|
||||
"""Generate rules, C parser, tokenizer, parser generator for a given grammar
|
||||
|
||||
Args:
|
||||
grammar_file (string): Path for the grammar file
|
||||
tokens_file (string): Path for the tokens file
|
||||
output_file (string): Path for the output file
|
||||
compile_extension (bool, optional): Whether to compile the C extension.
|
||||
Defaults to False.
|
||||
|
@ -170,10 +202,10 @@ def build_parser_and_generator(
|
|||
skip_actions (bool, optional): Whether to pretend no rule has any actions.
|
||||
"""
|
||||
grammar, parser, tokenizer = build_parser(grammar_file, verbose_tokenizer, verbose_parser)
|
||||
gen = build_generator(
|
||||
tokenizer,
|
||||
gen = build_c_generator(
|
||||
grammar,
|
||||
grammar_file,
|
||||
tokens_file,
|
||||
output_file,
|
||||
compile_extension,
|
||||
verbose_c_extension,
|
||||
|
@ -182,3 +214,26 @@ def build_parser_and_generator(
|
|||
)
|
||||
|
||||
return grammar, parser, tokenizer, gen
|
||||
|
||||
|
||||
def build_python_parser_and_generator(
|
||||
grammar_file: str,
|
||||
output_file: str,
|
||||
verbose_tokenizer: bool = False,
|
||||
verbose_parser: bool = False,
|
||||
skip_actions: bool = False,
|
||||
) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
|
||||
"""Generate rules, python parser, tokenizer, parser generator for a given grammar
|
||||
|
||||
Args:
|
||||
grammar_file (string): Path for the grammar file
|
||||
output_file (string): Path for the output file
|
||||
verbose_tokenizer (bool, optional): Whether to display additional output
|
||||
when generating the tokenizer. Defaults to False.
|
||||
verbose_parser (bool, optional): Whether to display additional output
|
||||
when generating the parser. Defaults to False.
|
||||
skip_actions (bool, optional): Whether to pretend no rule has any actions.
|
||||
"""
|
||||
grammar, parser, tokenizer = build_parser(grammar_file, verbose_tokenizer, verbose_parser)
|
||||
gen = build_python_generator(grammar, grammar_file, output_file, skip_actions=skip_actions,)
|
||||
return grammar, parser, tokenizer, gen
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
import ast
|
||||
import re
|
||||
from typing import Any, cast, Dict, IO, Optional, List, Text, Tuple
|
||||
from typing import Any, cast, Dict, IO, Optional, List, Text, Tuple, Set
|
||||
|
||||
from pegen.grammar import (
|
||||
Cut,
|
||||
|
@ -22,7 +22,6 @@ from pegen.grammar import (
|
|||
)
|
||||
from pegen import grammar
|
||||
from pegen.parser_generator import dedupe, ParserGenerator
|
||||
from pegen.tokenizer import exact_token_types
|
||||
|
||||
EXTENSION_PREFIX = """\
|
||||
#include "pegen.h"
|
||||
|
@ -43,8 +42,15 @@ _PyPegen_parse(Parser *p)
|
|||
|
||||
|
||||
class CCallMakerVisitor(GrammarVisitor):
|
||||
def __init__(self, parser_generator: ParserGenerator):
|
||||
def __init__(
|
||||
self,
|
||||
parser_generator: ParserGenerator,
|
||||
exact_tokens: Dict[str, int],
|
||||
non_exact_tokens: Set[str],
|
||||
):
|
||||
self.gen = parser_generator
|
||||
self.exact_tokens = exact_tokens
|
||||
self.non_exact_tokens = non_exact_tokens
|
||||
self.cache: Dict[Any, Any] = {}
|
||||
self.keyword_cache: Dict[str, int] = {}
|
||||
|
||||
|
@ -55,10 +61,7 @@ class CCallMakerVisitor(GrammarVisitor):
|
|||
|
||||
def visit_NameLeaf(self, node: NameLeaf) -> Tuple[str, str]:
|
||||
name = node.value
|
||||
if name in ("NAME", "NUMBER", "STRING"):
|
||||
name = name.lower()
|
||||
return f"{name}_var", f"_PyPegen_{name}_token(p)"
|
||||
if name in ("NEWLINE", "DEDENT", "INDENT", "ENDMARKER", "ASYNC", "AWAIT"):
|
||||
if name in self.non_exact_tokens:
|
||||
name = name.lower()
|
||||
return f"{name}_var", f"_PyPegen_{name}_token(p)"
|
||||
return f"{name}_var", f"{name}_rule(p)"
|
||||
|
@ -68,12 +71,12 @@ class CCallMakerVisitor(GrammarVisitor):
|
|||
if re.match(r"[a-zA-Z_]\w*\Z", val): # This is a keyword
|
||||
return self.keyword_helper(val)
|
||||
else:
|
||||
assert val in exact_token_types, f"{node.value} is not a known literal"
|
||||
type = exact_token_types[val]
|
||||
assert val in self.exact_tokens, f"{node.value} is not a known literal"
|
||||
type = self.exact_tokens[val]
|
||||
return "literal", f"_PyPegen_expect_token(p, {type})"
|
||||
|
||||
def visit_Rhs(self, node: Rhs) -> Tuple[Optional[str], str]:
|
||||
def can_we_inline(node):
|
||||
def can_we_inline(node: Rhs) -> int:
|
||||
if len(node.alts) != 1 or len(node.alts[0].items) != 1:
|
||||
return False
|
||||
# If the alternative has an action we cannot inline
|
||||
|
@ -152,12 +155,16 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
|
|||
def __init__(
|
||||
self,
|
||||
grammar: grammar.Grammar,
|
||||
exact_tokens: Dict[str, int],
|
||||
non_exact_tokens: Set[str],
|
||||
file: Optional[IO[Text]],
|
||||
debug: bool = False,
|
||||
skip_actions: bool = False,
|
||||
):
|
||||
super().__init__(grammar, file)
|
||||
self.callmakervisitor: CCallMakerVisitor = CCallMakerVisitor(self)
|
||||
self.callmakervisitor: CCallMakerVisitor = CCallMakerVisitor(
|
||||
self, exact_tokens, non_exact_tokens
|
||||
)
|
||||
self._varname_counter = 0
|
||||
self.debug = debug
|
||||
self.skip_actions = skip_actions
|
||||
|
@ -184,7 +191,11 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
|
|||
self.print(f"}}")
|
||||
|
||||
def out_of_memory_return(
|
||||
self, expr: str, returnval: str, message: str = "Parser out of memory", cleanup_code=None
|
||||
self,
|
||||
expr: str,
|
||||
returnval: str,
|
||||
message: str = "Parser out of memory",
|
||||
cleanup_code: Optional[str] = None,
|
||||
) -> None:
|
||||
self.print(f"if ({expr}) {{")
|
||||
with self.indent():
|
||||
|
@ -465,7 +476,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
|
|||
self.visit(item, names=names)
|
||||
self.print(")")
|
||||
|
||||
def emit_action(self, node: Alt, cleanup_code=None) -> None:
|
||||
def emit_action(self, node: Alt, cleanup_code: Optional[str] = None) -> None:
|
||||
self.print(f"res = {node.action};")
|
||||
|
||||
self.print("if (res == NULL && PyErr_Occurred()) {")
|
||||
|
|
|
@ -5,6 +5,7 @@ import pathlib
|
|||
import sys
|
||||
import textwrap
|
||||
import tokenize
|
||||
import token
|
||||
|
||||
from typing import Any, cast, Dict, IO, Type, Final
|
||||
|
||||
|
@ -16,6 +17,11 @@ from pegen.parser import Parser
|
|||
from pegen.python_generator import PythonParserGenerator
|
||||
from pegen.tokenizer import Tokenizer
|
||||
|
||||
EXACT_TOKENS = token.EXACT_TOKEN_TYPES # type: ignore
|
||||
NON_EXACT_TOKENS = {
|
||||
name for index, name in token.tok_name.items() if index not in EXACT_TOKENS.values()
|
||||
}
|
||||
|
||||
|
||||
def generate_parser(grammar: Grammar) -> Type[Parser]:
|
||||
# Generate a parser.
|
||||
|
@ -70,7 +76,7 @@ def import_file(full_name: str, path: str) -> Any:
|
|||
|
||||
def generate_c_parser_source(grammar: Grammar) -> str:
|
||||
out = io.StringIO()
|
||||
genr = CParserGenerator(grammar, out)
|
||||
genr = CParserGenerator(grammar, EXACT_TOKENS, NON_EXACT_TOKENS, out)
|
||||
genr.generate("<string>")
|
||||
return out.getvalue()
|
||||
|
||||
|
@ -90,7 +96,7 @@ def generate_parser_c_extension(
|
|||
assert not os.listdir(path)
|
||||
source = path / "parse.c"
|
||||
with open(source, "w") as file:
|
||||
genr = CParserGenerator(grammar, file, debug=debug)
|
||||
genr = CParserGenerator(grammar, EXACT_TOKENS, NON_EXACT_TOKENS, file, debug=debug)
|
||||
genr.generate("parse.c")
|
||||
compile_c_extension(str(source), build_dir=str(path))
|
||||
|
||||
|
|
|
@ -13,7 +13,7 @@ from pathlib import PurePath
|
|||
from typing import List, Optional, Any
|
||||
|
||||
sys.path.insert(0, os.getcwd())
|
||||
from pegen.build import build_parser_and_generator
|
||||
from pegen.build import build_c_parser_and_generator
|
||||
from pegen.testutil import print_memstats
|
||||
from scripts import show_parse
|
||||
|
||||
|
@ -26,7 +26,8 @@ argparser = argparse.ArgumentParser(
|
|||
description="Helper program to test directories or files for pegen",
|
||||
)
|
||||
argparser.add_argument("-d", "--directory", help="Directory path containing files to test")
|
||||
argparser.add_argument("-g", "--grammar-file", help="Grammar file path")
|
||||
argparser.add_argument("--grammar-file", help="Grammar file path")
|
||||
argparser.add_argument("--tokens-file", help="Tokens file path")
|
||||
argparser.add_argument(
|
||||
"-e", "--exclude", action="append", default=[], help="Glob(s) for matching files to exclude"
|
||||
)
|
||||
|
@ -114,6 +115,7 @@ def compare_trees(
|
|||
def parse_directory(
|
||||
directory: str,
|
||||
grammar_file: str,
|
||||
tokens_file: str,
|
||||
verbose: bool,
|
||||
excluded_files: List[str],
|
||||
skip_actions: bool,
|
||||
|
@ -131,15 +133,16 @@ def parse_directory(
|
|||
print("You must specify a directory of files to test.", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if grammar_file:
|
||||
if grammar_file and tokens_file:
|
||||
if not os.path.exists(grammar_file):
|
||||
print(f"The specified grammar file, {grammar_file}, does not exist.", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
try:
|
||||
if not extension and parser == "pegen":
|
||||
build_parser_and_generator(
|
||||
build_c_parser_and_generator(
|
||||
grammar_file,
|
||||
tokens_file,
|
||||
"peg_extension/parse.c",
|
||||
compile_extension=True,
|
||||
skip_actions=skip_actions,
|
||||
|
@ -154,7 +157,9 @@ def parse_directory(
|
|||
return 1
|
||||
|
||||
else:
|
||||
print("A grammar file was not provided - attempting to use existing file...\n")
|
||||
print(
|
||||
"A grammar file or a tokens file was not provided - attempting to use existing parser from stdlib...\n"
|
||||
)
|
||||
|
||||
if parser == "pegen":
|
||||
try:
|
||||
|
@ -264,6 +269,7 @@ def main() -> None:
|
|||
args = argparser.parse_args()
|
||||
directory = args.directory
|
||||
grammar_file = args.grammar_file
|
||||
tokens_file = args.tokens_file
|
||||
verbose = args.verbose
|
||||
excluded_files = args.exclude
|
||||
skip_actions = args.skip_actions
|
||||
|
@ -273,6 +279,7 @@ def main() -> None:
|
|||
parse_directory(
|
||||
directory,
|
||||
grammar_file,
|
||||
tokens_file,
|
||||
verbose,
|
||||
excluded_files,
|
||||
skip_actions,
|
||||
|
|
Loading…
Reference in New Issue