cpython/Tools/peg_generator/pegen/testutil.py

import importlib.util
import io
import os
import pathlib
import sys
import textwrap
import token
import tokenize
from typing import IO, Any, Dict, Final, Optional, Type, cast

from pegen.build import compile_c_extension
from pegen.c_generator import CParserGenerator
from pegen.grammar import Grammar
from pegen.grammar_parser import GeneratedParser as GrammarParser
from pegen.parser import Parser
from pegen.python_generator import PythonParserGenerator
from pegen.tokenizer import Tokenizer

ALL_TOKENS = token.tok_name
EXACT_TOKENS = token.EXACT_TOKEN_TYPES
NON_EXACT_TOKENS = {
    name for index, name in token.tok_name.items() if index not in EXACT_TOKENS.values()
}


def generate_parser(grammar: Grammar) -> Type[Parser]:
    # Generate a parser.
    out = io.StringIO()
    genr = PythonParserGenerator(grammar, out)
    genr.generate("<string>")

    # Load the generated parser class.
    ns: Dict[str, Any] = {}
    exec(out.getvalue(), ns)
    return ns["GeneratedParser"]


def run_parser(file: IO[bytes], parser_class: Type[Parser], *, verbose: bool = False) -> Any:
    # Run a parser on a file (stream).
    tokenizer = Tokenizer(tokenize.generate_tokens(file.readline))  # type: ignore[arg-type] # typeshed issue #3515
    parser = parser_class(tokenizer, verbose=verbose)
    result = parser.start()
    if result is None:
        raise parser.make_syntax_error("invalid syntax")
    return result


def parse_string(
    source: str, parser_class: Type[Parser], *, dedent: bool = True, verbose: bool = False
) -> Any:
    # Run the parser on a string.
    if dedent:
        source = textwrap.dedent(source)
    file = io.StringIO(source)
    return run_parser(file, parser_class, verbose=verbose)  # type: ignore[arg-type] # typeshed issue #3515


def make_parser(source: str) -> Type[Parser]:
    # Combine parse_string() and generate_parser().
    grammar = parse_string(source, GrammarParser)
    return generate_parser(grammar)


def import_file(full_name: str, path: str) -> Any:
    """Import a python module from a path"""

    spec = importlib.util.spec_from_file_location(full_name, path)
    assert spec is not None
    mod = importlib.util.module_from_spec(spec)

    # We assume this is not None and has an exec_module() method.
    # See https://docs.python.org/3/reference/import.html?highlight=exec_module#loading
    loader = cast(Any, spec.loader)
    loader.exec_module(mod)
    return mod


def generate_c_parser_source(grammar: Grammar) -> str:
    out = io.StringIO()
    genr = CParserGenerator(grammar, ALL_TOKENS, EXACT_TOKENS, NON_EXACT_TOKENS, out)
    genr.generate("<string>")
    return out.getvalue()


def generate_parser_c_extension(
    grammar: Grammar,
    path: pathlib.PurePath,
    debug: bool = False,
    library_dir: Optional[str] = None,
) -> Any:
    """Generate a parser c extension for the given grammar in the given path

    Returns a module object with a parse_string() method.
    TODO: express that using a Protocol.
    """
    # Make sure that the working directory is empty: reusing non-empty temporary
    # directories when generating extensions can lead to segmentation faults.
    # Check issue #95 (https://github.com/gvanrossum/pegen/issues/95) for more
    # context.
    assert not os.listdir(path)
    source = path / "parse.c"
    with open(source, "w", encoding="utf-8") as file:
        genr = CParserGenerator(
            grammar, ALL_TOKENS, EXACT_TOKENS, NON_EXACT_TOKENS, file, debug=debug
        )
        genr.generate("parse.c")
    compile_c_extension(
        str(source),
        build_dir=str(path),
        # Significant test_peg_generator speedups
        disable_optimization=True,
        library_dir=library_dir,
    )


def print_memstats() -> bool:
    MiB: Final = 2**20
    try:
        import psutil
    except ImportError:
        return False
    print("Memory stats:")
    process = psutil.Process()
    meminfo = process.memory_info()
    res = {}
    res["rss"] = meminfo.rss / MiB
    res["vms"] = meminfo.vms / MiB
    if sys.platform == "win32":
        res["maxrss"] = meminfo.peak_wset / MiB
    else:
        # See https://stackoverflow.com/questions/938733/total-memory-used-by-python-process
        import resource  # Since it doesn't exist on Windows.

        rusage = resource.getrusage(resource.RUSAGE_SELF)
        if sys.platform == "darwin":
            factor = 1
        else:
            factor = 1024  # Linux
        res["maxrss"] = rusage.ru_maxrss * factor / MiB
    for key, value in res.items():
        print(f"  {key:12.12s}: {value:10.0f} MiB")
    return True
bpo-40334: PEP 617 implementation: New PEG parser for CPython (GH-19503) Co-authored-by: Guido van Rossum <guido@python.org> Co-authored-by: Lysandros Nikolaou <lisandrosnik@gmail.com> 2020-04-22 19:29:27 -03:00			`import importlib.util`
			`import io`
			`import os`
			`import pathlib`
			`import sys`
			`import textwrap`
bpo-40334: Refactor peg_generator to receive a Tokens file when building c code (GH-19745) 2020-04-28 09:11:55 -03:00			`import token`
Extract visitors from the grammar nodes and call makers in the peg generator (GH-28172) Simplify the peg generator logic by extracting as much visitors as possible to disentangle the flow and separate concerns. 2021-09-05 10:58:52 -03:00			`import tokenize`
bpo-46576: Speed up test_peg_generator by using a static library for shared sources (GH-32338) Speed up test_peg_generator by using a static library for shared sources to avoid recompiling as much code. 2022-04-06 18:55:58 -03:00			`from typing import IO, Any, Dict, Final, Optional, Type, cast`
bpo-40334: PEP 617 implementation: New PEG parser for CPython (GH-19503) Co-authored-by: Guido van Rossum <guido@python.org> Co-authored-by: Lysandros Nikolaou <lisandrosnik@gmail.com> 2020-04-22 19:29:27 -03:00
			`from pegen.build import compile_c_extension`
			`from pegen.c_generator import CParserGenerator`
			`from pegen.grammar import Grammar`
			`from pegen.grammar_parser import GeneratedParser as GrammarParser`
			`from pegen.parser import Parser`
			`from pegen.python_generator import PythonParserGenerator`
			`from pegen.tokenizer import Tokenizer`

bpo-40334: use the TOKENS file when checking dangling rules (GH-19849) 2020-05-01 19:14:12 -03:00			`ALL_TOKENS = token.tok_name`
Update pegen to use the latest upstream developments (GH-27586) 2021-08-12 13:37:30 -03:00			`EXACT_TOKENS = token.EXACT_TOKEN_TYPES`
bpo-40334: Refactor peg_generator to receive a Tokens file when building c code (GH-19745) 2020-04-28 09:11:55 -03:00			`NON_EXACT_TOKENS = {`
			`name for index, name in token.tok_name.items() if index not in EXACT_TOKENS.values()`
			`}`

bpo-40334: PEP 617 implementation: New PEG parser for CPython (GH-19503) Co-authored-by: Guido van Rossum <guido@python.org> Co-authored-by: Lysandros Nikolaou <lisandrosnik@gmail.com> 2020-04-22 19:29:27 -03:00
			`def generate_parser(grammar: Grammar) -> Type[Parser]:`
			`# Generate a parser.`
			`out = io.StringIO()`
			`genr = PythonParserGenerator(grammar, out)`
			`genr.generate("<string>")`

			`# Load the generated parser class.`
			`ns: Dict[str, Any] = {}`
			`exec(out.getvalue(), ns)`
			`return ns["GeneratedParser"]`


			`def run_parser(file: IO[bytes], parser_class: Type[Parser], *, verbose: bool = False) -> Any:`
			`# Run a parser on a file (stream).`
gh-108455: peg_generator: make the mypy config slightly stricter (#108627) * Enable `--no-implicit-reexport` * Enable the `truthy-bool` error code * Enable the `ignore-without-code` error code * Explicitly note that `--warn-unreachable` cannot yet be enabled 2023-08-29 08:23:22 -03:00			`tokenizer = Tokenizer(tokenize.generate_tokens(file.readline)) # type: ignore[arg-type] # typeshed issue #3515`
bpo-40334: PEP 617 implementation: New PEG parser for CPython (GH-19503) Co-authored-by: Guido van Rossum <guido@python.org> Co-authored-by: Lysandros Nikolaou <lisandrosnik@gmail.com> 2020-04-22 19:29:27 -03:00			`parser = parser_class(tokenizer, verbose=verbose)`
			`result = parser.start()`
			`if result is None:`
Update pegen to use the latest upstream developments (GH-27586) 2021-08-12 13:37:30 -03:00			`raise parser.make_syntax_error("invalid syntax")`
bpo-40334: PEP 617 implementation: New PEG parser for CPython (GH-19503) Co-authored-by: Guido van Rossum <guido@python.org> Co-authored-by: Lysandros Nikolaou <lisandrosnik@gmail.com> 2020-04-22 19:29:27 -03:00			`return result`


			`def parse_string(`
			`source: str, parser_class: Type[Parser], *, dedent: bool = True, verbose: bool = False`
			`) -> Any:`
			`# Run the parser on a string.`
			`if dedent:`
			`source = textwrap.dedent(source)`
			`file = io.StringIO(source)`
gh-108455: peg_generator: make the mypy config slightly stricter (#108627) * Enable `--no-implicit-reexport` * Enable the `truthy-bool` error code * Enable the `ignore-without-code` error code * Explicitly note that `--warn-unreachable` cannot yet be enabled 2023-08-29 08:23:22 -03:00			`return run_parser(file, parser_class, verbose=verbose) # type: ignore[arg-type] # typeshed issue #3515`
bpo-40334: PEP 617 implementation: New PEG parser for CPython (GH-19503) Co-authored-by: Guido van Rossum <guido@python.org> Co-authored-by: Lysandros Nikolaou <lisandrosnik@gmail.com> 2020-04-22 19:29:27 -03:00

			`def make_parser(source: str) -> Type[Parser]:`
			`# Combine parse_string() and generate_parser().`
			`grammar = parse_string(source, GrammarParser)`
			`return generate_parser(grammar)`


			`def import_file(full_name: str, path: str) -> Any:`
			`"""Import a python module from a path"""`

			`spec = importlib.util.spec_from_file_location(full_name, path)`
Update pegen to use the latest upstream developments (GH-27586) 2021-08-12 13:37:30 -03:00			`assert spec is not None`
bpo-40334: PEP 617 implementation: New PEG parser for CPython (GH-19503) Co-authored-by: Guido van Rossum <guido@python.org> Co-authored-by: Lysandros Nikolaou <lisandrosnik@gmail.com> 2020-04-22 19:29:27 -03:00			`mod = importlib.util.module_from_spec(spec)`

			`# We assume this is not None and has an exec_module() method.`
			`# See https://docs.python.org/3/reference/import.html?highlight=exec_module#loading`
			`loader = cast(Any, spec.loader)`
			`loader.exec_module(mod)`
			`return mod`


			`def generate_c_parser_source(grammar: Grammar) -> str:`
			`out = io.StringIO()`
bpo-40334: use the TOKENS file when checking dangling rules (GH-19849) 2020-05-01 19:14:12 -03:00			`genr = CParserGenerator(grammar, ALL_TOKENS, EXACT_TOKENS, NON_EXACT_TOKENS, out)`
bpo-40334: PEP 617 implementation: New PEG parser for CPython (GH-19503) Co-authored-by: Guido van Rossum <guido@python.org> Co-authored-by: Lysandros Nikolaou <lisandrosnik@gmail.com> 2020-04-22 19:29:27 -03:00			`genr.generate("<string>")`
			`return out.getvalue()`


			`def generate_parser_c_extension(`
gh-105191: Cleanup peg generator; keep only necessary files (#105197) 2023-06-01 12:24:15 -03:00			`grammar: Grammar,`
			`path: pathlib.PurePath,`
			`debug: bool = False,`
bpo-46576: Speed up test_peg_generator by using a static library for shared sources (GH-32338) Speed up test_peg_generator by using a static library for shared sources to avoid recompiling as much code. 2022-04-06 18:55:58 -03:00			`library_dir: Optional[str] = None,`
bpo-40334: PEP 617 implementation: New PEG parser for CPython (GH-19503) Co-authored-by: Guido van Rossum <guido@python.org> Co-authored-by: Lysandros Nikolaou <lisandrosnik@gmail.com> 2020-04-22 19:29:27 -03:00			`) -> Any:`
			`"""Generate a parser c extension for the given grammar in the given path`

			`Returns a module object with a parse_string() method.`
			`TODO: express that using a Protocol.`
			`"""`
			`# Make sure that the working directory is empty: reusing non-empty temporary`
			`# directories when generating extensions can lead to segmentation faults.`
			`# Check issue #95 (https://github.com/gvanrossum/pegen/issues/95) for more`
			`# context.`
			`assert not os.listdir(path)`
			`source = path / "parse.c"`
bpo-40750: Support -d flag in the new parser (GH-20340) 2020-05-25 14:38:45 -03:00			`with open(source, "w", encoding="utf-8") as file:`
bpo-40334: use the TOKENS file when checking dangling rules (GH-19849) 2020-05-01 19:14:12 -03:00			`genr = CParserGenerator(`
			`grammar, ALL_TOKENS, EXACT_TOKENS, NON_EXACT_TOKENS, file, debug=debug`
			`)`
bpo-40334: PEP 617 implementation: New PEG parser for CPython (GH-19503) Co-authored-by: Guido van Rossum <guido@python.org> Co-authored-by: Lysandros Nikolaou <lisandrosnik@gmail.com> 2020-04-22 19:29:27 -03:00			`genr.generate("parse.c")`
bpo-46576: Speed up test_peg_generator by using a static library for shared sources (GH-32338) Speed up test_peg_generator by using a static library for shared sources to avoid recompiling as much code. 2022-04-06 18:55:58 -03:00			`compile_c_extension(`
			`str(source),`
			`build_dir=str(path),`
			`# Significant test_peg_generator speedups`
			`disable_optimization=True,`
			`library_dir=library_dir,`
			`)`
bpo-40334: PEP 617 implementation: New PEG parser for CPython (GH-19503) Co-authored-by: Guido van Rossum <guido@python.org> Co-authored-by: Lysandros Nikolaou <lisandrosnik@gmail.com> 2020-04-22 19:29:27 -03:00

			`def print_memstats() -> bool:`
gh-105191: Cleanup peg generator; keep only necessary files (#105197) 2023-06-01 12:24:15 -03:00			`MiB: Final = 2**20`
bpo-40334: PEP 617 implementation: New PEG parser for CPython (GH-19503) Co-authored-by: Guido van Rossum <guido@python.org> Co-authored-by: Lysandros Nikolaou <lisandrosnik@gmail.com> 2020-04-22 19:29:27 -03:00			`try:`
gh-108455: peg_generator: install two stubs packages before running mypy (#108637) 2023-08-29 16:14:08 -03:00			`import psutil`
bpo-40334: PEP 617 implementation: New PEG parser for CPython (GH-19503) Co-authored-by: Guido van Rossum <guido@python.org> Co-authored-by: Lysandros Nikolaou <lisandrosnik@gmail.com> 2020-04-22 19:29:27 -03:00			`except ImportError:`
			`return False`
			`print("Memory stats:")`
			`process = psutil.Process()`
			`meminfo = process.memory_info()`
			`res = {}`
			`res["rss"] = meminfo.rss / MiB`
			`res["vms"] = meminfo.vms / MiB`
			`if sys.platform == "win32":`
			`res["maxrss"] = meminfo.peak_wset / MiB`
			`else:`
			`# See https://stackoverflow.com/questions/938733/total-memory-used-by-python-process`
			`import resource # Since it doesn't exist on Windows.`

			`rusage = resource.getrusage(resource.RUSAGE_SELF)`
			`if sys.platform == "darwin":`
			`factor = 1`
			`else:`
			`factor = 1024 # Linux`
			`res["maxrss"] = rusage.ru_maxrss * factor / MiB`
			`for key, value in res.items():`
			`print(f" {key:12.12s}: {value:10.0f} MiB")`
			`return True`