bpo-40334: Refactor peg_generator to receive a Tokens file when building c code (GH-19745)

This commit is contained in:
Pablo Galindo 2020-04-28 13:11:55 +01:00 committed by GitHub
parent 3d53d8756f
commit 5b9f4988c9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 220 additions and 91 deletions

View File

@ -823,7 +823,9 @@ regen-grammar: regen-token
.PHONY: regen-pegen .PHONY: regen-pegen
regen-pegen: regen-pegen:
@$(MKDIR_P) $(srcdir)/Parser/pegen @$(MKDIR_P) $(srcdir)/Parser/pegen
PYTHONPATH=$(srcdir)/Tools/peg_generator $(PYTHON_FOR_REGEN) -m pegen -c -q $(srcdir)/Grammar/python.gram \ PYTHONPATH=$(srcdir)/Tools/peg_generator $(PYTHON_FOR_REGEN) -m pegen -q c \
$(srcdir)/Grammar/python.gram \
$(srcdir)/Grammar/Tokens \
-o $(srcdir)/Parser/pegen/parse.new.c -o $(srcdir)/Parser/pegen/parse.new.c
$(UPDATE_FILE) $(srcdir)/Parser/pegen/parse.c $(srcdir)/Parser/pegen/parse.new.c $(UPDATE_FILE) $(srcdir)/Parser/pegen/parse.c $(srcdir)/Parser/pegen/parse.new.c

View File

@ -168,7 +168,7 @@
</Target> </Target>
<Target Name="_RegenPegen" BeforeTargets="Build"> <Target Name="_RegenPegen" BeforeTargets="Build">
<!-- Regenerate Parser/pegen/parse.c --> <!-- Regenerate Parser/pegen/parse.c -->
<Exec Command="&quot;$PYTHONPATH=$(srcdir)/Tools/peg_generator&quot; &quot;$(PythonExe)&quot; -m pegen -c -q &quot;$(PySourcePath)Grammar\python.gram&quot; -o &quot;$(IntDir)parse.c&quot;" /> <Exec Command="&quot;$PYTHONPATH=$(srcdir)/Tools/peg_generator&quot; &quot;$(PythonExe)&quot; -m pegen -q c &quot;$(PySourcePath)Grammar\python.gram&quot; &quot;$(PySourcePath)Grammar\Tokens&quot; -o &quot;$(IntDir)parse.c&quot;" />
<Copy SourceFiles="$(IntDir)parse.c" DestinationFiles="$(PySourcePath)Parser\pegen\parse.c"> <Copy SourceFiles="$(IntDir)parse.c" DestinationFiles="$(PySourcePath)Parser\pegen\parse.c">
<Output TaskParameter="CopiedFiles" ItemName="_UpdatedParse" /> <Output TaskParameter="CopiedFiles" ItemName="_UpdatedParse" />
</Copy> </Copy>

View File

@ -10,6 +10,7 @@ CPYTHON ?= ../../Lib
MYPY ?= mypy MYPY ?= mypy
GRAMMAR = ../../Grammar/python.gram GRAMMAR = ../../Grammar/python.gram
TOKENS = ../../Grammar/Tokens
TESTFILE = data/cprog.py TESTFILE = data/cprog.py
TIMEFILE = data/xxl.py TIMEFILE = data/xxl.py
TESTDIR = . TESTDIR = .
@ -20,8 +21,8 @@ data/xxl.py:
build: peg_extension/parse.c build: peg_extension/parse.c
peg_extension/parse.c: $(GRAMMAR) pegen/*.py peg_extension/peg_extension.c ../../Parser/pegen/pegen.c ../../Parser/pegen/parse_string.c ../../Parser/pegen/*.h pegen/grammar_parser.py peg_extension/parse.c: $(GRAMMAR) $(TOKENS) pegen/*.py peg_extension/peg_extension.c ../../Parser/pegen/pegen.c ../../Parser/pegen/parse_string.c ../../Parser/pegen/*.h pegen/grammar_parser.py
$(PYTHON) -m pegen -q -c $(GRAMMAR) -o peg_extension/parse.c --compile-extension $(PYTHON) -m pegen -q c $(GRAMMAR) $(TOKENS) -o peg_extension/parse.c --compile-extension
clean: clean:
-rm -f peg_extension/*.o peg_extension/*.so peg_extension/parse.c -rm -f peg_extension/*.o peg_extension/*.so peg_extension/parse.c
@ -79,7 +80,8 @@ time_stdlib_parse: data/xxl.py
test_local: test_local:
$(PYTHON) scripts/test_parse_directory.py \ $(PYTHON) scripts/test_parse_directory.py \
-g $(GRAMMAR) \ --grammar-file $(GRAMMAR) \
--tokens-file $(TOKENS) \
-d $(TESTDIR) \ -d $(TESTDIR) \
$(TESTFLAGS) \ $(TESTFLAGS) \
--exclude "*/failset/*" \ --exclude "*/failset/*" \
@ -88,7 +90,8 @@ test_local:
test_global: $(CPYTHON) test_global: $(CPYTHON)
$(PYTHON) scripts/test_parse_directory.py \ $(PYTHON) scripts/test_parse_directory.py \
-g $(GRAMMAR) \ --grammar-file $(GRAMMAR) \
--tokens-file $(TOKENS) \
-d $(CPYTHON) \ -d $(CPYTHON) \
$(TESTFLAGS) \ $(TESTFLAGS) \
--exclude "*/test2to3/*" \ --exclude "*/test2to3/*" \

View File

@ -11,6 +11,64 @@ import time
import token import token
import traceback import traceback
from typing import Tuple
from pegen.build import Grammar, Parser, Tokenizer, ParserGenerator
def generate_c_code(
args: argparse.Namespace,
) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
from pegen.build import build_c_parser_and_generator
verbose = args.verbose
verbose_tokenizer = verbose >= 3
verbose_parser = verbose == 2 or verbose >= 4
try:
grammar, parser, tokenizer, gen = build_c_parser_and_generator(
args.grammar_filename,
args.tokens_filename,
args.output,
args.compile_extension,
verbose_tokenizer,
verbose_parser,
args.verbose,
keep_asserts_in_extension=False if args.optimized else True,
skip_actions=args.skip_actions,
)
return grammar, parser, tokenizer, gen
except Exception as err:
if args.verbose:
raise # Show traceback
traceback.print_exception(err.__class__, err, None)
sys.stderr.write("For full traceback, use -v\n")
sys.exit(1)
def generate_python_code(
args: argparse.Namespace,
) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
from pegen.build import build_python_parser_and_generator
verbose = args.verbose
verbose_tokenizer = verbose >= 3
verbose_parser = verbose == 2 or verbose >= 4
try:
grammar, parser, tokenizer, gen = build_python_parser_and_generator(
args.grammar_filename,
args.output,
verbose_tokenizer,
verbose_parser,
skip_actions=args.skip_actions,
)
return grammar, parser, tokenizer, gen
except Exception as err:
if args.verbose:
raise # Show traceback
traceback.print_exception(err.__class__, err, None)
sys.stderr.write("For full traceback, use -v\n")
sys.exit(1)
argparser = argparse.ArgumentParser( argparser = argparse.ArgumentParser(
prog="pegen", description="Experimental PEG-like parser generator" prog="pegen", description="Experimental PEG-like parser generator"
@ -23,63 +81,52 @@ argparser.add_argument(
default=0, default=0,
help="Print timing stats; repeat for more debug output", help="Print timing stats; repeat for more debug output",
) )
argparser.add_argument( subparsers = argparser.add_subparsers(help="target language for the generated code")
"-c", "--cpython", action="store_true", help="Generate C code for inclusion into CPython"
c_parser = subparsers.add_parser("c", help="Generate C code for inclusion into CPython")
c_parser.set_defaults(func=generate_c_code)
c_parser.add_argument("grammar_filename", help="Grammar description")
c_parser.add_argument("tokens_filename", help="Tokens description")
c_parser.add_argument(
"-o", "--output", metavar="OUT", default="parse.c", help="Where to write the generated parser"
) )
argparser.add_argument( c_parser.add_argument(
"--compile-extension", "--compile-extension",
action="store_true", action="store_true",
help="Compile generated C code into an extension module", help="Compile generated C code into an extension module",
) )
argparser.add_argument( c_parser.add_argument(
"--optimized", action="store_true", help="Compile the extension in optimized mode"
)
c_parser.add_argument(
"--skip-actions", action="store_true", help="Suppress code emission for rule actions",
)
python_parser = subparsers.add_parser("python", help="Generate Python code")
python_parser.set_defaults(func=generate_python_code)
python_parser.add_argument("grammar_filename", help="Grammar description")
python_parser.add_argument(
"-o", "-o",
"--output", "--output",
metavar="OUT", metavar="OUT",
help="Where to write the generated parser (default parse.py or parse.c)", default="parse.py",
help="Where to write the generated parser",
) )
argparser.add_argument("filename", help="Grammar description") python_parser.add_argument(
argparser.add_argument(
"--optimized", action="store_true", help="Compile the extension in optimized mode"
)
argparser.add_argument(
"--skip-actions", action="store_true", help="Suppress code emission for rule actions", "--skip-actions", action="store_true", help="Suppress code emission for rule actions",
) )
def main() -> None: def main() -> None:
from pegen.build import build_parser_and_generator
from pegen.testutil import print_memstats from pegen.testutil import print_memstats
args = argparser.parse_args() args = argparser.parse_args()
verbose = args.verbose if "func" not in args:
verbose_tokenizer = verbose >= 3 argparser.error("Must specify the target language mode ('c' or 'python')")
verbose_parser = verbose == 2 or verbose >= 4
t0 = time.time() t0 = time.time()
grammar, parser, tokenizer, gen = args.func(args)
output_file = args.output t1 = time.time()
if not output_file:
if args.cpython:
output_file = "parse.c"
else:
output_file = "parse.py"
try:
grammar, parser, tokenizer, gen = build_parser_and_generator(
args.filename,
output_file,
args.compile_extension,
verbose_tokenizer,
verbose_parser,
args.verbose,
keep_asserts_in_extension=False if args.optimized else True,
skip_actions=args.skip_actions,
)
except Exception as err:
if args.verbose:
raise # Show traceback
traceback.print_exception(err.__class__, err, None)
sys.stderr.write("For full traceback, use -v\n")
sys.exit(1)
if not args.quiet: if not args.quiet:
if args.verbose: if args.verbose:
@ -110,8 +157,6 @@ def main() -> None:
else: else:
print() print()
t1 = time.time()
if args.verbose: if args.verbose:
dt = t1 - t0 dt = t1 - t0
diag = tokenizer.diagnose() diag = tokenizer.diagnose()

View File

@ -3,8 +3,9 @@ import shutil
import tokenize import tokenize
import sys import sys
import sysconfig import sysconfig
import itertools
from typing import Optional, Tuple from typing import Optional, Tuple, List, IO, Iterator, Set, Dict
from pegen.c_generator import CParserGenerator from pegen.c_generator import CParserGenerator
from pegen.grammar import Grammar from pegen.grammar import Grammar
@ -17,12 +18,12 @@ from pegen.tokenizer import Tokenizer
MOD_DIR = pathlib.Path(__file__).parent MOD_DIR = pathlib.Path(__file__).parent
def get_extra_flags(compiler_flags, compiler_py_flags_nodist): def get_extra_flags(compiler_flags: str, compiler_py_flags_nodist: str) -> List[str]:
flags = sysconfig.get_config_var(compiler_flags) flags = sysconfig.get_config_var(compiler_flags)
py_flags_nodist = sysconfig.get_config_var(compiler_py_flags_nodist) py_flags_nodist = sysconfig.get_config_var(compiler_py_flags_nodist)
if flags is None or py_flags_nodist is None: if flags is None or py_flags_nodist is None:
return [] return []
return f'{flags} {py_flags_nodist}'.split() return f"{flags} {py_flags_nodist}".split()
def compile_c_extension( def compile_c_extension(
@ -45,15 +46,15 @@ def compile_c_extension(
from distutils.core import Distribution, Extension from distutils.core import Distribution, Extension
from distutils.command.clean import clean # type: ignore from distutils.command.clean import clean # type: ignore
from distutils.command.build_ext import build_ext # type: ignore from distutils.command.build_ext import build_ext # type: ignore
from distutils.tests.support import fixup_build_ext from distutils.tests.support import fixup_build_ext # type: ignore
if verbose: if verbose:
distutils.log.set_verbosity(distutils.log.DEBUG) distutils.log.set_verbosity(distutils.log.DEBUG)
source_file_path = pathlib.Path(generated_source_path) source_file_path = pathlib.Path(generated_source_path)
extension_name = source_file_path.stem extension_name = source_file_path.stem
extra_compile_args = get_extra_flags('CFLAGS', 'PY_CFLAGS_NODIST') extra_compile_args = get_extra_flags("CFLAGS", "PY_CFLAGS_NODIST")
extra_link_args = get_extra_flags('LDFLAGS', 'PY_LDFLAGS_NODIST') extra_link_args = get_extra_flags("LDFLAGS", "PY_LDFLAGS_NODIST")
if keep_asserts: if keep_asserts:
extra_compile_args.append("-UNDEBUG") extra_compile_args.append("-UNDEBUG")
extension = [ extension = [
@ -111,39 +112,69 @@ def build_parser(
return grammar, parser, tokenizer return grammar, parser, tokenizer
def build_generator( def generate_token_definitions(tokens: IO[str]) -> Tuple[Dict[str, int], Set[str]]:
tokenizer: Tokenizer, exact_tokens = {}
non_exact_tokens = set()
numbers = itertools.count(0)
for line in tokens:
line = line.strip()
if not line or line.startswith("#"):
continue
pieces = line.split()
index = next(numbers)
if len(pieces) == 1:
(token,) = pieces
non_exact_tokens.add(token)
elif len(pieces) == 2:
_, op = pieces
exact_tokens[op.strip("'")] = index
else:
raise ValueError(f"Unexpected line found in Tokens file: {line}")
return exact_tokens, non_exact_tokens
def build_c_generator(
grammar: Grammar, grammar: Grammar,
grammar_file: str, grammar_file: str,
tokens_file: str,
output_file: str, output_file: str,
compile_extension: bool = False, compile_extension: bool = False,
verbose_c_extension: bool = False, verbose_c_extension: bool = False,
keep_asserts_in_extension: bool = True, keep_asserts_in_extension: bool = True,
skip_actions: bool = False, skip_actions: bool = False,
) -> ParserGenerator: ) -> ParserGenerator:
# TODO: Allow other extensions; pass the output type as an argument. with open(tokens_file, "r") as tok_file:
if not output_file.endswith((".c", ".py")): exact_tok, non_exact_tok = generate_token_definitions(tok_file)
raise RuntimeError("Your output file must either be a .c or .py file")
with open(output_file, "w") as file: with open(output_file, "w") as file:
gen: ParserGenerator gen: ParserGenerator = CParserGenerator(
if output_file.endswith(".c"): grammar, exact_tok, non_exact_tok, file, skip_actions=skip_actions
gen = CParserGenerator(grammar, file, skip_actions=skip_actions) )
elif output_file.endswith(".py"):
gen = PythonParserGenerator(grammar, file) # TODO: skip_actions
else:
assert False # Should have been checked above
gen.generate(grammar_file) gen.generate(grammar_file)
if compile_extension and output_file.endswith(".c"): if compile_extension:
compile_c_extension( compile_c_extension(
output_file, verbose=verbose_c_extension, keep_asserts=keep_asserts_in_extension output_file, verbose=verbose_c_extension, keep_asserts=keep_asserts_in_extension
) )
return gen return gen
def build_parser_and_generator( def build_python_generator(
grammar: Grammar, grammar_file: str, output_file: str, skip_actions: bool = False,
) -> ParserGenerator:
with open(output_file, "w") as file:
gen: ParserGenerator = PythonParserGenerator(grammar, file) # TODO: skip_actions
gen.generate(grammar_file)
return gen
def build_c_parser_and_generator(
grammar_file: str, grammar_file: str,
tokens_file: str,
output_file: str, output_file: str,
compile_extension: bool = False, compile_extension: bool = False,
verbose_tokenizer: bool = False, verbose_tokenizer: bool = False,
@ -152,10 +183,11 @@ def build_parser_and_generator(
keep_asserts_in_extension: bool = True, keep_asserts_in_extension: bool = True,
skip_actions: bool = False, skip_actions: bool = False,
) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]: ) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
"""Generate rules, parser, tokenizer, parser generator for a given grammar """Generate rules, C parser, tokenizer, parser generator for a given grammar
Args: Args:
grammar_file (string): Path for the grammar file grammar_file (string): Path for the grammar file
tokens_file (string): Path for the tokens file
output_file (string): Path for the output file output_file (string): Path for the output file
compile_extension (bool, optional): Whether to compile the C extension. compile_extension (bool, optional): Whether to compile the C extension.
Defaults to False. Defaults to False.
@ -170,10 +202,10 @@ def build_parser_and_generator(
skip_actions (bool, optional): Whether to pretend no rule has any actions. skip_actions (bool, optional): Whether to pretend no rule has any actions.
""" """
grammar, parser, tokenizer = build_parser(grammar_file, verbose_tokenizer, verbose_parser) grammar, parser, tokenizer = build_parser(grammar_file, verbose_tokenizer, verbose_parser)
gen = build_generator( gen = build_c_generator(
tokenizer,
grammar, grammar,
grammar_file, grammar_file,
tokens_file,
output_file, output_file,
compile_extension, compile_extension,
verbose_c_extension, verbose_c_extension,
@ -182,3 +214,26 @@ def build_parser_and_generator(
) )
return grammar, parser, tokenizer, gen return grammar, parser, tokenizer, gen
def build_python_parser_and_generator(
grammar_file: str,
output_file: str,
verbose_tokenizer: bool = False,
verbose_parser: bool = False,
skip_actions: bool = False,
) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
"""Generate rules, python parser, tokenizer, parser generator for a given grammar
Args:
grammar_file (string): Path for the grammar file
output_file (string): Path for the output file
verbose_tokenizer (bool, optional): Whether to display additional output
when generating the tokenizer. Defaults to False.
verbose_parser (bool, optional): Whether to display additional output
when generating the parser. Defaults to False.
skip_actions (bool, optional): Whether to pretend no rule has any actions.
"""
grammar, parser, tokenizer = build_parser(grammar_file, verbose_tokenizer, verbose_parser)
gen = build_python_generator(grammar, grammar_file, output_file, skip_actions=skip_actions,)
return grammar, parser, tokenizer, gen

View File

@ -1,6 +1,6 @@
import ast import ast
import re import re
from typing import Any, cast, Dict, IO, Optional, List, Text, Tuple from typing import Any, cast, Dict, IO, Optional, List, Text, Tuple, Set
from pegen.grammar import ( from pegen.grammar import (
Cut, Cut,
@ -22,7 +22,6 @@ from pegen.grammar import (
) )
from pegen import grammar from pegen import grammar
from pegen.parser_generator import dedupe, ParserGenerator from pegen.parser_generator import dedupe, ParserGenerator
from pegen.tokenizer import exact_token_types
EXTENSION_PREFIX = """\ EXTENSION_PREFIX = """\
#include "pegen.h" #include "pegen.h"
@ -43,8 +42,15 @@ _PyPegen_parse(Parser *p)
class CCallMakerVisitor(GrammarVisitor): class CCallMakerVisitor(GrammarVisitor):
def __init__(self, parser_generator: ParserGenerator): def __init__(
self,
parser_generator: ParserGenerator,
exact_tokens: Dict[str, int],
non_exact_tokens: Set[str],
):
self.gen = parser_generator self.gen = parser_generator
self.exact_tokens = exact_tokens
self.non_exact_tokens = non_exact_tokens
self.cache: Dict[Any, Any] = {} self.cache: Dict[Any, Any] = {}
self.keyword_cache: Dict[str, int] = {} self.keyword_cache: Dict[str, int] = {}
@ -55,10 +61,7 @@ class CCallMakerVisitor(GrammarVisitor):
def visit_NameLeaf(self, node: NameLeaf) -> Tuple[str, str]: def visit_NameLeaf(self, node: NameLeaf) -> Tuple[str, str]:
name = node.value name = node.value
if name in ("NAME", "NUMBER", "STRING"): if name in self.non_exact_tokens:
name = name.lower()
return f"{name}_var", f"_PyPegen_{name}_token(p)"
if name in ("NEWLINE", "DEDENT", "INDENT", "ENDMARKER", "ASYNC", "AWAIT"):
name = name.lower() name = name.lower()
return f"{name}_var", f"_PyPegen_{name}_token(p)" return f"{name}_var", f"_PyPegen_{name}_token(p)"
return f"{name}_var", f"{name}_rule(p)" return f"{name}_var", f"{name}_rule(p)"
@ -68,12 +71,12 @@ class CCallMakerVisitor(GrammarVisitor):
if re.match(r"[a-zA-Z_]\w*\Z", val): # This is a keyword if re.match(r"[a-zA-Z_]\w*\Z", val): # This is a keyword
return self.keyword_helper(val) return self.keyword_helper(val)
else: else:
assert val in exact_token_types, f"{node.value} is not a known literal" assert val in self.exact_tokens, f"{node.value} is not a known literal"
type = exact_token_types[val] type = self.exact_tokens[val]
return "literal", f"_PyPegen_expect_token(p, {type})" return "literal", f"_PyPegen_expect_token(p, {type})"
def visit_Rhs(self, node: Rhs) -> Tuple[Optional[str], str]: def visit_Rhs(self, node: Rhs) -> Tuple[Optional[str], str]:
def can_we_inline(node): def can_we_inline(node: Rhs) -> int:
if len(node.alts) != 1 or len(node.alts[0].items) != 1: if len(node.alts) != 1 or len(node.alts[0].items) != 1:
return False return False
# If the alternative has an action we cannot inline # If the alternative has an action we cannot inline
@ -152,12 +155,16 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
def __init__( def __init__(
self, self,
grammar: grammar.Grammar, grammar: grammar.Grammar,
exact_tokens: Dict[str, int],
non_exact_tokens: Set[str],
file: Optional[IO[Text]], file: Optional[IO[Text]],
debug: bool = False, debug: bool = False,
skip_actions: bool = False, skip_actions: bool = False,
): ):
super().__init__(grammar, file) super().__init__(grammar, file)
self.callmakervisitor: CCallMakerVisitor = CCallMakerVisitor(self) self.callmakervisitor: CCallMakerVisitor = CCallMakerVisitor(
self, exact_tokens, non_exact_tokens
)
self._varname_counter = 0 self._varname_counter = 0
self.debug = debug self.debug = debug
self.skip_actions = skip_actions self.skip_actions = skip_actions
@ -184,7 +191,11 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
self.print(f"}}") self.print(f"}}")
def out_of_memory_return( def out_of_memory_return(
self, expr: str, returnval: str, message: str = "Parser out of memory", cleanup_code=None self,
expr: str,
returnval: str,
message: str = "Parser out of memory",
cleanup_code: Optional[str] = None,
) -> None: ) -> None:
self.print(f"if ({expr}) {{") self.print(f"if ({expr}) {{")
with self.indent(): with self.indent():
@ -465,7 +476,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
self.visit(item, names=names) self.visit(item, names=names)
self.print(")") self.print(")")
def emit_action(self, node: Alt, cleanup_code=None) -> None: def emit_action(self, node: Alt, cleanup_code: Optional[str] = None) -> None:
self.print(f"res = {node.action};") self.print(f"res = {node.action};")
self.print("if (res == NULL && PyErr_Occurred()) {") self.print("if (res == NULL && PyErr_Occurred()) {")

View File

@ -5,6 +5,7 @@ import pathlib
import sys import sys
import textwrap import textwrap
import tokenize import tokenize
import token
from typing import Any, cast, Dict, IO, Type, Final from typing import Any, cast, Dict, IO, Type, Final
@ -16,6 +17,11 @@ from pegen.parser import Parser
from pegen.python_generator import PythonParserGenerator from pegen.python_generator import PythonParserGenerator
from pegen.tokenizer import Tokenizer from pegen.tokenizer import Tokenizer
EXACT_TOKENS = token.EXACT_TOKEN_TYPES # type: ignore
NON_EXACT_TOKENS = {
name for index, name in token.tok_name.items() if index not in EXACT_TOKENS.values()
}
def generate_parser(grammar: Grammar) -> Type[Parser]: def generate_parser(grammar: Grammar) -> Type[Parser]:
# Generate a parser. # Generate a parser.
@ -70,7 +76,7 @@ def import_file(full_name: str, path: str) -> Any:
def generate_c_parser_source(grammar: Grammar) -> str: def generate_c_parser_source(grammar: Grammar) -> str:
out = io.StringIO() out = io.StringIO()
genr = CParserGenerator(grammar, out) genr = CParserGenerator(grammar, EXACT_TOKENS, NON_EXACT_TOKENS, out)
genr.generate("<string>") genr.generate("<string>")
return out.getvalue() return out.getvalue()
@ -90,7 +96,7 @@ def generate_parser_c_extension(
assert not os.listdir(path) assert not os.listdir(path)
source = path / "parse.c" source = path / "parse.c"
with open(source, "w") as file: with open(source, "w") as file:
genr = CParserGenerator(grammar, file, debug=debug) genr = CParserGenerator(grammar, EXACT_TOKENS, NON_EXACT_TOKENS, file, debug=debug)
genr.generate("parse.c") genr.generate("parse.c")
compile_c_extension(str(source), build_dir=str(path)) compile_c_extension(str(source), build_dir=str(path))

View File

@ -13,7 +13,7 @@ from pathlib import PurePath
from typing import List, Optional, Any from typing import List, Optional, Any
sys.path.insert(0, os.getcwd()) sys.path.insert(0, os.getcwd())
from pegen.build import build_parser_and_generator from pegen.build import build_c_parser_and_generator
from pegen.testutil import print_memstats from pegen.testutil import print_memstats
from scripts import show_parse from scripts import show_parse
@ -26,7 +26,8 @@ argparser = argparse.ArgumentParser(
description="Helper program to test directories or files for pegen", description="Helper program to test directories or files for pegen",
) )
argparser.add_argument("-d", "--directory", help="Directory path containing files to test") argparser.add_argument("-d", "--directory", help="Directory path containing files to test")
argparser.add_argument("-g", "--grammar-file", help="Grammar file path") argparser.add_argument("--grammar-file", help="Grammar file path")
argparser.add_argument("--tokens-file", help="Tokens file path")
argparser.add_argument( argparser.add_argument(
"-e", "--exclude", action="append", default=[], help="Glob(s) for matching files to exclude" "-e", "--exclude", action="append", default=[], help="Glob(s) for matching files to exclude"
) )
@ -114,6 +115,7 @@ def compare_trees(
def parse_directory( def parse_directory(
directory: str, directory: str,
grammar_file: str, grammar_file: str,
tokens_file: str,
verbose: bool, verbose: bool,
excluded_files: List[str], excluded_files: List[str],
skip_actions: bool, skip_actions: bool,
@ -131,15 +133,16 @@ def parse_directory(
print("You must specify a directory of files to test.", file=sys.stderr) print("You must specify a directory of files to test.", file=sys.stderr)
return 1 return 1
if grammar_file: if grammar_file and tokens_file:
if not os.path.exists(grammar_file): if not os.path.exists(grammar_file):
print(f"The specified grammar file, {grammar_file}, does not exist.", file=sys.stderr) print(f"The specified grammar file, {grammar_file}, does not exist.", file=sys.stderr)
return 1 return 1
try: try:
if not extension and parser == "pegen": if not extension and parser == "pegen":
build_parser_and_generator( build_c_parser_and_generator(
grammar_file, grammar_file,
tokens_file,
"peg_extension/parse.c", "peg_extension/parse.c",
compile_extension=True, compile_extension=True,
skip_actions=skip_actions, skip_actions=skip_actions,
@ -154,7 +157,9 @@ def parse_directory(
return 1 return 1
else: else:
print("A grammar file was not provided - attempting to use existing file...\n") print(
"A grammar file or a tokens file was not provided - attempting to use existing parser from stdlib...\n"
)
if parser == "pegen": if parser == "pegen":
try: try:
@ -264,6 +269,7 @@ def main() -> None:
args = argparser.parse_args() args = argparser.parse_args()
directory = args.directory directory = args.directory
grammar_file = args.grammar_file grammar_file = args.grammar_file
tokens_file = args.tokens_file
verbose = args.verbose verbose = args.verbose
excluded_files = args.exclude excluded_files = args.exclude
skip_actions = args.skip_actions skip_actions = args.skip_actions
@ -273,6 +279,7 @@ def main() -> None:
parse_directory( parse_directory(
directory, directory,
grammar_file, grammar_file,
tokens_file,
verbose, verbose,
excluded_files, excluded_files,
skip_actions, skip_actions,