cpython/Tools/peg_generator/pegen/testutil.py

import importlib.util
import io
import os
import pathlib
import sys
import textwrap
import tokenize
import token

from typing import Any, cast, Dict, IO, Type, Final

from pegen.build import compile_c_extension
from pegen.c_generator import CParserGenerator
from pegen.grammar import Grammar
from pegen.grammar_parser import GeneratedParser as GrammarParser
from pegen.parser import Parser
from pegen.python_generator import PythonParserGenerator
from pegen.tokenizer import Tokenizer

ALL_TOKENS = token.tok_name
EXACT_TOKENS = token.EXACT_TOKEN_TYPES  # type: ignore
NON_EXACT_TOKENS = {
    name for index, name in token.tok_name.items() if index not in EXACT_TOKENS.values()
}


def generate_parser(grammar: Grammar) -> Type[Parser]:
    # Generate a parser.
    out = io.StringIO()
    genr = PythonParserGenerator(grammar, out)
    genr.generate("<string>")

    # Load the generated parser class.
    ns: Dict[str, Any] = {}
    exec(out.getvalue(), ns)
    return ns["GeneratedParser"]


def run_parser(file: IO[bytes], parser_class: Type[Parser], *, verbose: bool = False) -> Any:
    # Run a parser on a file (stream).
    tokenizer = Tokenizer(tokenize.generate_tokens(file.readline))  # type: ignore # typeshed issue #3515
    parser = parser_class(tokenizer, verbose=verbose)
    result = parser.start()
    if result is None:
        raise parser.make_syntax_error()
    return result


def parse_string(
    source: str, parser_class: Type[Parser], *, dedent: bool = True, verbose: bool = False
) -> Any:
    # Run the parser on a string.
    if dedent:
        source = textwrap.dedent(source)
    file = io.StringIO(source)
    return run_parser(file, parser_class, verbose=verbose)  # type: ignore # typeshed issue #3515


def make_parser(source: str) -> Type[Parser]:
    # Combine parse_string() and generate_parser().
    grammar = parse_string(source, GrammarParser)
    return generate_parser(grammar)


def import_file(full_name: str, path: str) -> Any:
    """Import a python module from a path"""

    spec = importlib.util.spec_from_file_location(full_name, path)
    mod = importlib.util.module_from_spec(spec)

    # We assume this is not None and has an exec_module() method.
    # See https://docs.python.org/3/reference/import.html?highlight=exec_module#loading
    loader = cast(Any, spec.loader)
    loader.exec_module(mod)
    return mod


def generate_c_parser_source(grammar: Grammar) -> str:
    out = io.StringIO()
    genr = CParserGenerator(grammar, ALL_TOKENS, EXACT_TOKENS, NON_EXACT_TOKENS, out)
    genr.generate("<string>")
    return out.getvalue()


def generate_parser_c_extension(
    grammar: Grammar, path: pathlib.PurePath, debug: bool = False
) -> Any:
    """Generate a parser c extension for the given grammar in the given path

    Returns a module object with a parse_string() method.
    TODO: express that using a Protocol.
    """
    # Make sure that the working directory is empty: reusing non-empty temporary
    # directories when generating extensions can lead to segmentation faults.
    # Check issue #95 (https://github.com/gvanrossum/pegen/issues/95) for more
    # context.
    assert not os.listdir(path)
    source = path / "parse.c"
    with open(source, "w") as file:
        genr = CParserGenerator(
            grammar, ALL_TOKENS, EXACT_TOKENS, NON_EXACT_TOKENS, file, debug=debug
        )
        genr.generate("parse.c")
    compile_c_extension(str(source), build_dir=str(path))


def print_memstats() -> bool:
    MiB: Final = 2 ** 20
    try:
        import psutil  # type: ignore
    except ImportError:
        return False
    print("Memory stats:")
    process = psutil.Process()
    meminfo = process.memory_info()
    res = {}
    res["rss"] = meminfo.rss / MiB
    res["vms"] = meminfo.vms / MiB
    if sys.platform == "win32":
        res["maxrss"] = meminfo.peak_wset / MiB
    else:
        # See https://stackoverflow.com/questions/938733/total-memory-used-by-python-process
        import resource  # Since it doesn't exist on Windows.

        rusage = resource.getrusage(resource.RUSAGE_SELF)
        if sys.platform == "darwin":
            factor = 1
        else:
            factor = 1024  # Linux
        res["maxrss"] = rusage.ru_maxrss * factor / MiB
    for key, value in res.items():
        print(f"  {key:12.12s}: {value:10.0f} MiB")
    return True
bpo-40334: PEP 617 implementation: New PEG parser for CPython (GH-19503) Co-authored-by: Guido van Rossum <guido@python.org> Co-authored-by: Lysandros Nikolaou <lisandrosnik@gmail.com> 2020-04-22 19:29:27 -03:00			`import importlib.util`
			`import io`
			`import os`
			`import pathlib`
			`import sys`
			`import textwrap`
			`import tokenize`
bpo-40334: Refactor peg_generator to receive a Tokens file when building c code (GH-19745) 2020-04-28 09:11:55 -03:00			`import token`
bpo-40334: PEP 617 implementation: New PEG parser for CPython (GH-19503) Co-authored-by: Guido van Rossum <guido@python.org> Co-authored-by: Lysandros Nikolaou <lisandrosnik@gmail.com> 2020-04-22 19:29:27 -03:00
			`from typing import Any, cast, Dict, IO, Type, Final`

			`from pegen.build import compile_c_extension`
			`from pegen.c_generator import CParserGenerator`
			`from pegen.grammar import Grammar`
			`from pegen.grammar_parser import GeneratedParser as GrammarParser`
			`from pegen.parser import Parser`
			`from pegen.python_generator import PythonParserGenerator`
			`from pegen.tokenizer import Tokenizer`

bpo-40334: use the TOKENS file when checking dangling rules (GH-19849) 2020-05-01 19:14:12 -03:00			`ALL_TOKENS = token.tok_name`
bpo-40334: Refactor peg_generator to receive a Tokens file when building c code (GH-19745) 2020-04-28 09:11:55 -03:00			`EXACT_TOKENS = token.EXACT_TOKEN_TYPES # type: ignore`
			`NON_EXACT_TOKENS = {`
			`name for index, name in token.tok_name.items() if index not in EXACT_TOKENS.values()`
			`}`

bpo-40334: PEP 617 implementation: New PEG parser for CPython (GH-19503) Co-authored-by: Guido van Rossum <guido@python.org> Co-authored-by: Lysandros Nikolaou <lisandrosnik@gmail.com> 2020-04-22 19:29:27 -03:00
			`def generate_parser(grammar: Grammar) -> Type[Parser]:`
			`# Generate a parser.`
			`out = io.StringIO()`
			`genr = PythonParserGenerator(grammar, out)`
			`genr.generate("<string>")`

			`# Load the generated parser class.`
			`ns: Dict[str, Any] = {}`
			`exec(out.getvalue(), ns)`
			`return ns["GeneratedParser"]`


			`def run_parser(file: IO[bytes], parser_class: Type[Parser], *, verbose: bool = False) -> Any:`
			`# Run a parser on a file (stream).`
			`tokenizer = Tokenizer(tokenize.generate_tokens(file.readline)) # type: ignore # typeshed issue #3515`
			`parser = parser_class(tokenizer, verbose=verbose)`
			`result = parser.start()`
			`if result is None:`
			`raise parser.make_syntax_error()`
			`return result`


			`def parse_string(`
			`source: str, parser_class: Type[Parser], *, dedent: bool = True, verbose: bool = False`
			`) -> Any:`
			`# Run the parser on a string.`
			`if dedent:`
			`source = textwrap.dedent(source)`
			`file = io.StringIO(source)`
			`return run_parser(file, parser_class, verbose=verbose) # type: ignore # typeshed issue #3515`


			`def make_parser(source: str) -> Type[Parser]:`
			`# Combine parse_string() and generate_parser().`
			`grammar = parse_string(source, GrammarParser)`
			`return generate_parser(grammar)`


			`def import_file(full_name: str, path: str) -> Any:`
			`"""Import a python module from a path"""`

			`spec = importlib.util.spec_from_file_location(full_name, path)`
			`mod = importlib.util.module_from_spec(spec)`

			`# We assume this is not None and has an exec_module() method.`
			`# See https://docs.python.org/3/reference/import.html?highlight=exec_module#loading`
			`loader = cast(Any, spec.loader)`
			`loader.exec_module(mod)`
			`return mod`


			`def generate_c_parser_source(grammar: Grammar) -> str:`
			`out = io.StringIO()`
bpo-40334: use the TOKENS file when checking dangling rules (GH-19849) 2020-05-01 19:14:12 -03:00			`genr = CParserGenerator(grammar, ALL_TOKENS, EXACT_TOKENS, NON_EXACT_TOKENS, out)`
bpo-40334: PEP 617 implementation: New PEG parser for CPython (GH-19503) Co-authored-by: Guido van Rossum <guido@python.org> Co-authored-by: Lysandros Nikolaou <lisandrosnik@gmail.com> 2020-04-22 19:29:27 -03:00			`genr.generate("<string>")`
			`return out.getvalue()`


			`def generate_parser_c_extension(`
			`grammar: Grammar, path: pathlib.PurePath, debug: bool = False`
			`) -> Any:`
			`"""Generate a parser c extension for the given grammar in the given path`

			`Returns a module object with a parse_string() method.`
			`TODO: express that using a Protocol.`
			`"""`
			`# Make sure that the working directory is empty: reusing non-empty temporary`
			`# directories when generating extensions can lead to segmentation faults.`
			`# Check issue #95 (https://github.com/gvanrossum/pegen/issues/95) for more`
			`# context.`
			`assert not os.listdir(path)`
			`source = path / "parse.c"`
			`with open(source, "w") as file:`
bpo-40334: use the TOKENS file when checking dangling rules (GH-19849) 2020-05-01 19:14:12 -03:00			`genr = CParserGenerator(`
			`grammar, ALL_TOKENS, EXACT_TOKENS, NON_EXACT_TOKENS, file, debug=debug`
			`)`
bpo-40334: PEP 617 implementation: New PEG parser for CPython (GH-19503) Co-authored-by: Guido van Rossum <guido@python.org> Co-authored-by: Lysandros Nikolaou <lisandrosnik@gmail.com> 2020-04-22 19:29:27 -03:00			`genr.generate("parse.c")`
bpo-40334: Rewrite test_c_parser to avoid memory leaks (GH-19694) Previously every test was building an extension module and loading it into sys.modules. The tearDown function was thus not able to clean up correctly, resulting in memory leaks. With this commit, every test function now builds the extension module and runs the actual test code in a new process (using assert_python_ok), so that sys.modules stays intact and no memory gets leaked. 2020-04-24 10:51:09 -03:00			`compile_c_extension(str(source), build_dir=str(path))`
bpo-40334: PEP 617 implementation: New PEG parser for CPython (GH-19503) Co-authored-by: Guido van Rossum <guido@python.org> Co-authored-by: Lysandros Nikolaou <lisandrosnik@gmail.com> 2020-04-22 19:29:27 -03:00

			`def print_memstats() -> bool:`
			`MiB: Final = 2 ** 20`
			`try:`
			`import psutil # type: ignore`
			`except ImportError:`
			`return False`
			`print("Memory stats:")`
			`process = psutil.Process()`
			`meminfo = process.memory_info()`
			`res = {}`
			`res["rss"] = meminfo.rss / MiB`
			`res["vms"] = meminfo.vms / MiB`
			`if sys.platform == "win32":`
			`res["maxrss"] = meminfo.peak_wset / MiB`
			`else:`
			`# See https://stackoverflow.com/questions/938733/total-memory-used-by-python-process`
			`import resource # Since it doesn't exist on Windows.`

			`rusage = resource.getrusage(resource.RUSAGE_SELF)`
			`if sys.platform == "darwin":`
			`factor = 1`
			`else:`
			`factor = 1024 # Linux`
			`res["maxrss"] = rusage.ru_maxrss * factor / MiB`
			`for key, value in res.items():`
			`print(f" {key:12.12s}: {value:10.0f} MiB")`
			`return True`