2021-09-05 10:58:52 -03:00
|
|
|
import itertools
|
2020-04-22 19:29:27 -03:00
|
|
|
import pathlib
|
2022-02-02 16:15:16 -04:00
|
|
|
import sys
|
2020-04-23 10:46:22 -03:00
|
|
|
import sysconfig
|
2020-05-02 01:23:06 -03:00
|
|
|
import tempfile
|
2021-09-05 10:58:52 -03:00
|
|
|
import tokenize
|
|
|
|
from typing import IO, Dict, List, Optional, Set, Tuple
|
2020-04-22 19:29:27 -03:00
|
|
|
|
|
|
|
from pegen.c_generator import CParserGenerator
|
|
|
|
from pegen.grammar import Grammar
|
|
|
|
from pegen.grammar_parser import GeneratedParser as GrammarParser
|
|
|
|
from pegen.parser import Parser
|
|
|
|
from pegen.parser_generator import ParserGenerator
|
|
|
|
from pegen.python_generator import PythonParserGenerator
|
|
|
|
from pegen.tokenizer import Tokenizer
|
|
|
|
|
2020-05-01 18:33:54 -03:00
|
|
|
MOD_DIR = pathlib.Path(__file__).resolve().parent
|
2020-04-22 19:29:27 -03:00
|
|
|
|
2020-05-01 19:14:12 -03:00
|
|
|
TokenDefinitions = Tuple[Dict[int, str], Dict[str, int], Set[str]]
|
|
|
|
|
2020-04-22 19:29:27 -03:00
|
|
|
|
2020-04-28 09:11:55 -03:00
|
|
|
def get_extra_flags(compiler_flags: str, compiler_py_flags_nodist: str) -> List[str]:
|
2020-04-23 10:46:22 -03:00
|
|
|
flags = sysconfig.get_config_var(compiler_flags)
|
|
|
|
py_flags_nodist = sysconfig.get_config_var(compiler_py_flags_nodist)
|
|
|
|
if flags is None or py_flags_nodist is None:
|
|
|
|
return []
|
2020-04-28 09:11:55 -03:00
|
|
|
return f"{flags} {py_flags_nodist}".split()
|
2020-04-23 10:46:22 -03:00
|
|
|
|
|
|
|
|
2020-04-22 19:29:27 -03:00
|
|
|
def compile_c_extension(
|
|
|
|
generated_source_path: str,
|
|
|
|
build_dir: Optional[str] = None,
|
|
|
|
verbose: bool = False,
|
|
|
|
keep_asserts: bool = True,
|
2022-04-06 18:55:58 -03:00
|
|
|
disable_optimization: bool = False,
|
|
|
|
library_dir: Optional[str] = None,
|
2020-04-22 19:29:27 -03:00
|
|
|
) -> str:
|
|
|
|
"""Compile the generated source for a parser generator into an extension module.
|
|
|
|
|
|
|
|
The extension module will be generated in the same directory as the provided path
|
|
|
|
for the generated source, with the same basename (in addition to extension module
|
|
|
|
metadata). For example, for the source mydir/parser.c the generated extension
|
|
|
|
in a darwin system with python 3.8 will be mydir/parser.cpython-38-darwin.so.
|
|
|
|
|
|
|
|
If *build_dir* is provided, that path will be used as the temporary build directory
|
|
|
|
of distutils (this is useful in case you want to use a temporary directory).
|
2022-04-06 18:55:58 -03:00
|
|
|
|
|
|
|
If *library_dir* is provided, that path will be used as the directory for a
|
|
|
|
static library of the common parser sources (this is useful in case you are
|
|
|
|
creating multiple extensions).
|
2020-04-22 19:29:27 -03:00
|
|
|
"""
|
2020-04-23 20:53:29 -03:00
|
|
|
import distutils.log
|
2021-09-05 10:58:52 -03:00
|
|
|
from distutils.core import Distribution, Extension
|
2020-04-28 09:11:55 -03:00
|
|
|
from distutils.tests.support import fixup_build_ext # type: ignore
|
2020-04-23 20:53:29 -03:00
|
|
|
|
2022-04-06 18:55:58 -03:00
|
|
|
from distutils.ccompiler import new_compiler
|
|
|
|
from distutils.dep_util import newer_group
|
|
|
|
from distutils.sysconfig import customize_compiler
|
|
|
|
|
2020-04-22 19:29:27 -03:00
|
|
|
if verbose:
|
2022-04-06 18:55:58 -03:00
|
|
|
distutils.log.set_threshold(distutils.log.DEBUG)
|
2020-04-22 19:29:27 -03:00
|
|
|
|
|
|
|
source_file_path = pathlib.Path(generated_source_path)
|
|
|
|
extension_name = source_file_path.stem
|
2020-04-28 09:11:55 -03:00
|
|
|
extra_compile_args = get_extra_flags("CFLAGS", "PY_CFLAGS_NODIST")
|
2021-03-17 22:46:06 -03:00
|
|
|
extra_compile_args.append("-DPy_BUILD_CORE_MODULE")
|
|
|
|
# Define _Py_TEST_PEGEN to not call PyAST_Validate() in Parser/pegen.c
|
2021-08-12 13:37:30 -03:00
|
|
|
extra_compile_args.append("-D_Py_TEST_PEGEN")
|
2020-04-28 09:11:55 -03:00
|
|
|
extra_link_args = get_extra_flags("LDFLAGS", "PY_LDFLAGS_NODIST")
|
2020-04-22 19:29:27 -03:00
|
|
|
if keep_asserts:
|
|
|
|
extra_compile_args.append("-UNDEBUG")
|
2022-02-02 16:15:16 -04:00
|
|
|
if disable_optimization:
|
|
|
|
if sys.platform == 'win32':
|
|
|
|
extra_compile_args.append("/Od")
|
|
|
|
extra_link_args.append("/LTCG:OFF")
|
|
|
|
else:
|
|
|
|
extra_compile_args.append("-O0")
|
|
|
|
if sysconfig.get_config_var("GNULD") == "yes":
|
|
|
|
extra_link_args.append("-fno-lto")
|
2022-04-06 18:55:58 -03:00
|
|
|
|
|
|
|
common_sources = [
|
|
|
|
str(MOD_DIR.parent.parent.parent / "Python" / "Python-ast.c"),
|
|
|
|
str(MOD_DIR.parent.parent.parent / "Python" / "asdl.c"),
|
|
|
|
str(MOD_DIR.parent.parent.parent / "Parser" / "tokenizer.c"),
|
|
|
|
str(MOD_DIR.parent.parent.parent / "Parser" / "pegen.c"),
|
|
|
|
str(MOD_DIR.parent.parent.parent / "Parser" / "pegen_errors.c"),
|
|
|
|
str(MOD_DIR.parent.parent.parent / "Parser" / "action_helpers.c"),
|
|
|
|
str(MOD_DIR.parent.parent.parent / "Parser" / "string_parser.c"),
|
|
|
|
str(MOD_DIR.parent / "peg_extension" / "peg_extension.c"),
|
|
|
|
]
|
|
|
|
include_dirs = [
|
|
|
|
str(MOD_DIR.parent.parent.parent / "Include" / "internal"),
|
|
|
|
str(MOD_DIR.parent.parent.parent / "Parser"),
|
2020-04-22 19:29:27 -03:00
|
|
|
]
|
2022-04-06 18:55:58 -03:00
|
|
|
extension = Extension(
|
|
|
|
extension_name,
|
|
|
|
sources=[generated_source_path],
|
|
|
|
extra_compile_args=extra_compile_args,
|
|
|
|
extra_link_args=extra_link_args,
|
|
|
|
)
|
|
|
|
dist = Distribution({"name": extension_name, "ext_modules": [extension]})
|
|
|
|
cmd = dist.get_command_obj("build_ext")
|
2020-04-23 08:42:13 -03:00
|
|
|
fixup_build_ext(cmd)
|
2022-04-06 18:55:58 -03:00
|
|
|
cmd.build_lib = str(source_file_path.parent)
|
|
|
|
cmd.include_dirs = include_dirs
|
2020-04-22 19:29:27 -03:00
|
|
|
if build_dir:
|
|
|
|
cmd.build_temp = build_dir
|
|
|
|
cmd.ensure_finalized()
|
|
|
|
|
2022-04-06 18:55:58 -03:00
|
|
|
compiler = new_compiler()
|
|
|
|
customize_compiler(compiler)
|
|
|
|
compiler.set_include_dirs(cmd.include_dirs)
|
|
|
|
compiler.set_library_dirs(cmd.library_dirs)
|
|
|
|
# build static lib
|
|
|
|
if library_dir:
|
|
|
|
library_filename = compiler.library_filename(extension_name,
|
|
|
|
output_dir=library_dir)
|
|
|
|
if newer_group(common_sources, library_filename, 'newer'):
|
|
|
|
if sys.platform == 'win32':
|
|
|
|
pdb = compiler.static_lib_format % (extension_name, '.pdb')
|
|
|
|
compile_opts = [f"/Fd{library_dir}\\{pdb}"]
|
|
|
|
compile_opts.extend(extra_compile_args)
|
|
|
|
else:
|
|
|
|
compile_opts = extra_compile_args
|
|
|
|
objects = compiler.compile(common_sources,
|
|
|
|
output_dir=library_dir,
|
|
|
|
debug=cmd.debug,
|
|
|
|
extra_postargs=compile_opts)
|
|
|
|
compiler.create_static_lib(objects, extension_name,
|
|
|
|
output_dir=library_dir,
|
|
|
|
debug=cmd.debug)
|
|
|
|
if sys.platform == 'win32':
|
|
|
|
compiler.add_library_dir(library_dir)
|
|
|
|
extension.libraries = [extension_name]
|
|
|
|
elif sys.platform == 'darwin':
|
|
|
|
compiler.set_link_objects([
|
|
|
|
'-Wl,-force_load', library_filename,
|
|
|
|
])
|
|
|
|
else:
|
|
|
|
compiler.set_link_objects([
|
|
|
|
'-Wl,--whole-archive', library_filename, '-Wl,--no-whole-archive',
|
|
|
|
])
|
|
|
|
else:
|
|
|
|
extension.sources[0:0] = common_sources
|
|
|
|
|
|
|
|
# Compile the source code to object files.
|
|
|
|
ext_path = cmd.get_ext_fullpath(extension_name)
|
|
|
|
if newer_group(extension.sources, ext_path, 'newer'):
|
|
|
|
objects = compiler.compile(extension.sources,
|
|
|
|
output_dir=cmd.build_temp,
|
|
|
|
debug=cmd.debug,
|
|
|
|
extra_postargs=extra_compile_args)
|
|
|
|
else:
|
|
|
|
objects = compiler.object_filenames(extension.sources,
|
|
|
|
output_dir=cmd.build_temp)
|
|
|
|
# Now link the object files together into a "shared object"
|
|
|
|
compiler.link_shared_object(
|
|
|
|
objects, ext_path,
|
|
|
|
libraries=cmd.get_libraries(extension),
|
|
|
|
extra_postargs=extra_link_args,
|
|
|
|
export_symbols=cmd.get_export_symbols(extension),
|
|
|
|
debug=cmd.debug,
|
|
|
|
build_temp=cmd.build_temp)
|
|
|
|
|
|
|
|
return pathlib.Path(ext_path)
|
2020-04-22 19:29:27 -03:00
|
|
|
|
|
|
|
|
|
|
|
def build_parser(
|
|
|
|
grammar_file: str, verbose_tokenizer: bool = False, verbose_parser: bool = False
|
|
|
|
) -> Tuple[Grammar, Parser, Tokenizer]:
|
|
|
|
with open(grammar_file) as file:
|
|
|
|
tokenizer = Tokenizer(tokenize.generate_tokens(file.readline), verbose=verbose_tokenizer)
|
|
|
|
parser = GrammarParser(tokenizer, verbose=verbose_parser)
|
|
|
|
grammar = parser.start()
|
|
|
|
|
|
|
|
if not grammar:
|
|
|
|
raise parser.make_syntax_error(grammar_file)
|
|
|
|
|
|
|
|
return grammar, parser, tokenizer
|
|
|
|
|
|
|
|
|
2020-05-01 19:14:12 -03:00
|
|
|
def generate_token_definitions(tokens: IO[str]) -> TokenDefinitions:
|
|
|
|
all_tokens = {}
|
2020-04-28 09:11:55 -03:00
|
|
|
exact_tokens = {}
|
|
|
|
non_exact_tokens = set()
|
|
|
|
numbers = itertools.count(0)
|
|
|
|
|
|
|
|
for line in tokens:
|
|
|
|
line = line.strip()
|
|
|
|
|
|
|
|
if not line or line.startswith("#"):
|
|
|
|
continue
|
|
|
|
|
|
|
|
pieces = line.split()
|
|
|
|
index = next(numbers)
|
|
|
|
|
|
|
|
if len(pieces) == 1:
|
|
|
|
(token,) = pieces
|
|
|
|
non_exact_tokens.add(token)
|
2020-05-01 19:14:12 -03:00
|
|
|
all_tokens[index] = token
|
2020-04-28 09:11:55 -03:00
|
|
|
elif len(pieces) == 2:
|
2020-05-01 19:14:12 -03:00
|
|
|
token, op = pieces
|
2020-04-28 09:11:55 -03:00
|
|
|
exact_tokens[op.strip("'")] = index
|
2020-05-01 19:14:12 -03:00
|
|
|
all_tokens[index] = token
|
2020-04-28 09:11:55 -03:00
|
|
|
else:
|
|
|
|
raise ValueError(f"Unexpected line found in Tokens file: {line}")
|
|
|
|
|
2020-05-01 19:14:12 -03:00
|
|
|
return all_tokens, exact_tokens, non_exact_tokens
|
2020-04-28 09:11:55 -03:00
|
|
|
|
|
|
|
|
|
|
|
def build_c_generator(
|
2020-04-22 19:29:27 -03:00
|
|
|
grammar: Grammar,
|
|
|
|
grammar_file: str,
|
2020-04-28 09:11:55 -03:00
|
|
|
tokens_file: str,
|
2020-04-22 19:29:27 -03:00
|
|
|
output_file: str,
|
|
|
|
compile_extension: bool = False,
|
|
|
|
verbose_c_extension: bool = False,
|
|
|
|
keep_asserts_in_extension: bool = True,
|
|
|
|
skip_actions: bool = False,
|
|
|
|
) -> ParserGenerator:
|
2020-04-28 09:11:55 -03:00
|
|
|
with open(tokens_file, "r") as tok_file:
|
2020-05-01 19:14:12 -03:00
|
|
|
all_tokens, exact_tok, non_exact_tok = generate_token_definitions(tok_file)
|
2020-04-22 19:29:27 -03:00
|
|
|
with open(output_file, "w") as file:
|
2020-04-28 09:11:55 -03:00
|
|
|
gen: ParserGenerator = CParserGenerator(
|
2020-05-01 19:14:12 -03:00
|
|
|
grammar, all_tokens, exact_tok, non_exact_tok, file, skip_actions=skip_actions
|
2020-04-28 09:11:55 -03:00
|
|
|
)
|
2020-04-22 19:29:27 -03:00
|
|
|
gen.generate(grammar_file)
|
|
|
|
|
2020-04-28 09:11:55 -03:00
|
|
|
if compile_extension:
|
2020-05-02 01:23:06 -03:00
|
|
|
with tempfile.TemporaryDirectory() as build_dir:
|
|
|
|
compile_c_extension(
|
|
|
|
output_file,
|
|
|
|
build_dir=build_dir,
|
|
|
|
verbose=verbose_c_extension,
|
|
|
|
keep_asserts=keep_asserts_in_extension,
|
|
|
|
)
|
2020-04-28 09:11:55 -03:00
|
|
|
return gen
|
|
|
|
|
2020-04-22 19:29:27 -03:00
|
|
|
|
2020-04-28 09:11:55 -03:00
|
|
|
def build_python_generator(
|
2021-08-12 13:37:30 -03:00
|
|
|
grammar: Grammar,
|
|
|
|
grammar_file: str,
|
|
|
|
output_file: str,
|
|
|
|
skip_actions: bool = False,
|
2020-04-28 09:11:55 -03:00
|
|
|
) -> ParserGenerator:
|
|
|
|
with open(output_file, "w") as file:
|
|
|
|
gen: ParserGenerator = PythonParserGenerator(grammar, file) # TODO: skip_actions
|
|
|
|
gen.generate(grammar_file)
|
2020-04-22 19:29:27 -03:00
|
|
|
return gen
|
|
|
|
|
|
|
|
|
2020-04-28 09:11:55 -03:00
|
|
|
def build_c_parser_and_generator(
|
2020-04-22 19:29:27 -03:00
|
|
|
grammar_file: str,
|
2020-04-28 09:11:55 -03:00
|
|
|
tokens_file: str,
|
2020-04-22 19:29:27 -03:00
|
|
|
output_file: str,
|
|
|
|
compile_extension: bool = False,
|
|
|
|
verbose_tokenizer: bool = False,
|
|
|
|
verbose_parser: bool = False,
|
|
|
|
verbose_c_extension: bool = False,
|
|
|
|
keep_asserts_in_extension: bool = True,
|
|
|
|
skip_actions: bool = False,
|
|
|
|
) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
|
2020-04-28 09:11:55 -03:00
|
|
|
"""Generate rules, C parser, tokenizer, parser generator for a given grammar
|
2020-04-22 19:29:27 -03:00
|
|
|
|
|
|
|
Args:
|
|
|
|
grammar_file (string): Path for the grammar file
|
2020-04-28 09:11:55 -03:00
|
|
|
tokens_file (string): Path for the tokens file
|
2020-04-22 19:29:27 -03:00
|
|
|
output_file (string): Path for the output file
|
|
|
|
compile_extension (bool, optional): Whether to compile the C extension.
|
|
|
|
Defaults to False.
|
|
|
|
verbose_tokenizer (bool, optional): Whether to display additional output
|
|
|
|
when generating the tokenizer. Defaults to False.
|
|
|
|
verbose_parser (bool, optional): Whether to display additional output
|
|
|
|
when generating the parser. Defaults to False.
|
|
|
|
verbose_c_extension (bool, optional): Whether to display additional
|
|
|
|
output when compiling the C extension . Defaults to False.
|
|
|
|
keep_asserts_in_extension (bool, optional): Whether to keep the assert statements
|
|
|
|
when compiling the extension module. Defaults to True.
|
|
|
|
skip_actions (bool, optional): Whether to pretend no rule has any actions.
|
|
|
|
"""
|
|
|
|
grammar, parser, tokenizer = build_parser(grammar_file, verbose_tokenizer, verbose_parser)
|
2020-04-28 09:11:55 -03:00
|
|
|
gen = build_c_generator(
|
2020-04-22 19:29:27 -03:00
|
|
|
grammar,
|
|
|
|
grammar_file,
|
2020-04-28 09:11:55 -03:00
|
|
|
tokens_file,
|
2020-04-22 19:29:27 -03:00
|
|
|
output_file,
|
|
|
|
compile_extension,
|
|
|
|
verbose_c_extension,
|
|
|
|
keep_asserts_in_extension,
|
|
|
|
skip_actions=skip_actions,
|
|
|
|
)
|
|
|
|
|
|
|
|
return grammar, parser, tokenizer, gen
|
2020-04-28 09:11:55 -03:00
|
|
|
|
|
|
|
|
|
|
|
def build_python_parser_and_generator(
|
|
|
|
grammar_file: str,
|
|
|
|
output_file: str,
|
|
|
|
verbose_tokenizer: bool = False,
|
|
|
|
verbose_parser: bool = False,
|
|
|
|
skip_actions: bool = False,
|
|
|
|
) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
|
|
|
|
"""Generate rules, python parser, tokenizer, parser generator for a given grammar
|
|
|
|
|
|
|
|
Args:
|
|
|
|
grammar_file (string): Path for the grammar file
|
|
|
|
output_file (string): Path for the output file
|
|
|
|
verbose_tokenizer (bool, optional): Whether to display additional output
|
|
|
|
when generating the tokenizer. Defaults to False.
|
|
|
|
verbose_parser (bool, optional): Whether to display additional output
|
|
|
|
when generating the parser. Defaults to False.
|
|
|
|
skip_actions (bool, optional): Whether to pretend no rule has any actions.
|
|
|
|
"""
|
|
|
|
grammar, parser, tokenizer = build_parser(grammar_file, verbose_tokenizer, verbose_parser)
|
2021-08-12 13:37:30 -03:00
|
|
|
gen = build_python_generator(
|
|
|
|
grammar,
|
|
|
|
grammar_file,
|
|
|
|
output_file,
|
|
|
|
skip_actions=skip_actions,
|
|
|
|
)
|
2020-04-28 09:11:55 -03:00
|
|
|
return grammar, parser, tokenizer, gen
|