cpython/Tools/peg_generator/scripts/test_parse_directory.py

#!/usr/bin/env python3.8

import argparse
import ast
import os
import sys
import time
import traceback
import tokenize
from glob import glob, escape
from pathlib import PurePath

from typing import List, Optional, Any, Tuple

sys.path.insert(0, os.getcwd())
from pegen.ast_dump import ast_dump
from pegen.testutil import print_memstats

SUCCESS = "\033[92m"
FAIL = "\033[91m"
ENDC = "\033[0m"

COMPILE = 2
PARSE = 1
NOTREE = 0

argparser = argparse.ArgumentParser(
    prog="test_parse_directory",
    description="Helper program to test directories or files for pegen",
)
argparser.add_argument("-d", "--directory", help="Directory path containing files to test")
argparser.add_argument(
    "-e", "--exclude", action="append", default=[], help="Glob(s) for matching files to exclude"
)
argparser.add_argument(
    "-s", "--short", action="store_true", help="Only show errors, in a more Emacs-friendly format"
)
argparser.add_argument(
    "-v", "--verbose", action="store_true", help="Display detailed errors for failures"
)


def report_status(
    succeeded: bool,
    file: str,
    verbose: bool,
    error: Optional[Exception] = None,
    short: bool = False,
) -> None:
    if short and succeeded:
        return

    if succeeded is True:
        status = "OK"
        COLOR = SUCCESS
    else:
        status = "Fail"
        COLOR = FAIL

    if short:
        lineno = 0
        offset = 0
        if isinstance(error, SyntaxError):
            lineno = error.lineno or 1
            offset = error.offset or 1
            message = error.args[0]
        else:
            message = f"{error.__class__.__name__}: {error}"
        print(f"{file}:{lineno}:{offset}: {message}")
    else:
        print(f"{COLOR}{file:60} {status}{ENDC}")

        if error and verbose:
            print(f"  {str(error.__class__.__name__)}: {error}")


def parse_file(source: str, file: str) -> Tuple[Any, float]:
    t0 = time.time()
    result = ast.parse(source, filename=file)
    t1 = time.time()
    return result, t1 - t0


def generate_time_stats(files, total_seconds) -> None:
    total_files = len(files)
    total_bytes = 0
    total_lines = 0
    for file in files:
        # Count lines and bytes separately
        with open(file, "rb") as f:
            total_lines += sum(1 for _ in f)
            total_bytes += f.tell()

    print(
        f"Checked {total_files:,} files, {total_lines:,} lines,",
        f"{total_bytes:,} bytes in {total_seconds:,.3f} seconds.",
    )
    if total_seconds > 0:
        print(
            f"That's {total_lines / total_seconds :,.0f} lines/sec,",
            f"or {total_bytes / total_seconds :,.0f} bytes/sec.",
        )


def parse_directory(directory: str, verbose: bool, excluded_files: List[str], short: bool) -> int:
    # For a given directory, traverse files and attempt to parse each one
    # - Output success/failure for each file
    errors = 0
    files = []
    total_seconds = 0

    for file in sorted(glob(os.path.join(escape(directory), f"**/*.py"), recursive=True)):
        # Only attempt to parse Python files and files that are not excluded
        if any(PurePath(file).match(pattern) for pattern in excluded_files):
            continue

        with tokenize.open(file) as f:
            source = f.read()

        try:
            result, dt = parse_file(source, file)
            total_seconds += dt
            report_status(succeeded=True, file=file, verbose=verbose, short=short)
        except SyntaxError as error:
            report_status(succeeded=False, file=file, verbose=verbose, error=error, short=short)
            errors += 1
        files.append(file)

    generate_time_stats(files, total_seconds)
    if short:
        print_memstats()

    if errors:
        print(f"Encountered {errors} failures.", file=sys.stderr)
        return 1

    return 0


def main() -> None:
    args = argparser.parse_args()
    directory = args.directory
    verbose = args.verbose
    excluded_files = args.exclude
    short = args.short
    sys.exit(parse_directory(directory, verbose, excluded_files, short))


if __name__ == "__main__":
    main()
bpo-40334: PEP 617 implementation: New PEG parser for CPython (GH-19503) Co-authored-by: Guido van Rossum <guido@python.org> Co-authored-by: Lysandros Nikolaou <lisandrosnik@gmail.com> 2020-04-22 19:29:27 -03:00			`#!/usr/bin/env python3.8`

			`import argparse`
			`import ast`
			`import os`
			`import sys`
			`import time`
			`import traceback`
bpo-40688: Use the correct parser in the peg_generator scripts (GH-20235) The scripts in `Tools/peg_generator/scripts` mostly assume that `ast.parse` and `compile` use the old parser, since this was the state of things, while we were developing them. They need to be updated to always use the correct parser. `_peg_parser` is being extended to support both parsing and compiling with both parsers. 2020-05-25 16:51:58 -03:00			`import tokenize`
bpo-41043: Escape literal part of the path for glob(). (GH-20994) 2020-06-20 05:10:31 -03:00			`from glob import glob, escape`
bpo-40334: PEP 617 implementation: New PEG parser for CPython (GH-19503) Co-authored-by: Guido van Rossum <guido@python.org> Co-authored-by: Lysandros Nikolaou <lisandrosnik@gmail.com> 2020-04-22 19:29:27 -03:00			`from pathlib import PurePath`

Refactor scripts in Tools/peg_generator/scripts (GH-20401) 2020-06-06 01:21:40 -03:00			`from typing import List, Optional, Any, Tuple`
bpo-40334: PEP 617 implementation: New PEG parser for CPython (GH-19503) Co-authored-by: Guido van Rossum <guido@python.org> Co-authored-by: Lysandros Nikolaou <lisandrosnik@gmail.com> 2020-04-22 19:29:27 -03:00
			`sys.path.insert(0, os.getcwd())`
Fix some scripts in the peg generator folder (GH-19853) 2020-05-02 01:23:06 -03:00			`from pegen.ast_dump import ast_dump`
bpo-40334: PEP 617 implementation: New PEG parser for CPython (GH-19503) Co-authored-by: Guido van Rossum <guido@python.org> Co-authored-by: Lysandros Nikolaou <lisandrosnik@gmail.com> 2020-04-22 19:29:27 -03:00			`from pegen.testutil import print_memstats`

			`SUCCESS = "\033[92m"`
			`FAIL = "\033[91m"`
			`ENDC = "\033[0m"`

Refactor scripts in Tools/peg_generator/scripts (GH-20401) 2020-06-06 01:21:40 -03:00			`COMPILE = 2`
			`PARSE = 1`
			`NOTREE = 0`

bpo-40334: PEP 617 implementation: New PEG parser for CPython (GH-19503) Co-authored-by: Guido van Rossum <guido@python.org> Co-authored-by: Lysandros Nikolaou <lisandrosnik@gmail.com> 2020-04-22 19:29:27 -03:00			`argparser = argparse.ArgumentParser(`
			`prog="test_parse_directory",`
			`description="Helper program to test directories or files for pegen",`
			`)`
			`argparser.add_argument("-d", "--directory", help="Directory path containing files to test")`
			`argparser.add_argument(`
			`"-e", "--exclude", action="append", default=[], help="Glob(s) for matching files to exclude"`
			`)`
			`argparser.add_argument(`
			`"-s", "--short", action="store_true", help="Only show errors, in a more Emacs-friendly format"`
			`)`
			`argparser.add_argument(`
			`"-v", "--verbose", action="store_true", help="Display detailed errors for failures"`
			`)`


			`def report_status(`
			`succeeded: bool,`
			`file: str,`
			`verbose: bool,`
			`error: Optional[Exception] = None,`
			`short: bool = False,`
			`) -> None:`
			`if short and succeeded:`
			`return`

			`if succeeded is True:`
			`status = "OK"`
			`COLOR = SUCCESS`
			`else:`
			`status = "Fail"`
			`COLOR = FAIL`

			`if short:`
			`lineno = 0`
			`offset = 0`
			`if isinstance(error, SyntaxError):`
			`lineno = error.lineno or 1`
			`offset = error.offset or 1`
			`message = error.args[0]`
			`else:`
			`message = f"{error.__class__.__name__}: {error}"`
			`print(f"{file}:{lineno}:{offset}: {message}")`
			`else:`
			`print(f"{COLOR}{file:60} {status}{ENDC}")`

			`if error and verbose:`
			`print(f" {str(error.__class__.__name__)}: {error}")`


bpo-40939: Clean and adapt the peg_generator directory after deleting the old parser (GH-20822) 2020-06-11 21:55:35 -03:00			`def parse_file(source: str, file: str) -> Tuple[Any, float]:`
Refactor scripts in Tools/peg_generator/scripts (GH-20401) 2020-06-06 01:21:40 -03:00			`t0 = time.time()`
bpo-40939: Clean and adapt the peg_generator directory after deleting the old parser (GH-20822) 2020-06-11 21:55:35 -03:00			`result = ast.parse(source, filename=file)`
Refactor scripts in Tools/peg_generator/scripts (GH-20401) 2020-06-06 01:21:40 -03:00			`t1 = time.time()`
			`return result, t1 - t0`
bpo-40334: PEP 617 implementation: New PEG parser for CPython (GH-19503) Co-authored-by: Guido van Rossum <guido@python.org> Co-authored-by: Lysandros Nikolaou <lisandrosnik@gmail.com> 2020-04-22 19:29:27 -03:00

Refactor scripts in Tools/peg_generator/scripts (GH-20401) 2020-06-06 01:21:40 -03:00			`def generate_time_stats(files, total_seconds) -> None:`
bpo-40334: PEP 617 implementation: New PEG parser for CPython (GH-19503) Co-authored-by: Guido van Rossum <guido@python.org> Co-authored-by: Lysandros Nikolaou <lisandrosnik@gmail.com> 2020-04-22 19:29:27 -03:00			`total_files = len(files)`
			`total_bytes = 0`
			`total_lines = 0`
			`for file in files:`
			`# Count lines and bytes separately`
			`with open(file, "rb") as f:`
			`total_lines += sum(1 for _ in f)`
			`total_bytes += f.tell()`

			`print(`
			`f"Checked {total_files:,} files, {total_lines:,} lines,",`
			`f"{total_bytes:,} bytes in {total_seconds:,.3f} seconds.",`
			`)`
			`if total_seconds > 0:`
			`print(`
			`f"That's {total_lines / total_seconds :,.0f} lines/sec,",`
			`f"or {total_bytes / total_seconds :,.0f} bytes/sec.",`
			`)`

Refactor scripts in Tools/peg_generator/scripts (GH-20401) 2020-06-06 01:21:40 -03:00
bpo-40939: Clean and adapt the peg_generator directory after deleting the old parser (GH-20822) 2020-06-11 21:55:35 -03:00			`def parse_directory(directory: str, verbose: bool, excluded_files: List[str], short: bool) -> int:`
Refactor scripts in Tools/peg_generator/scripts (GH-20401) 2020-06-06 01:21:40 -03:00			`# For a given directory, traverse files and attempt to parse each one`
			`# - Output success/failure for each file`
			`errors = 0`
			`files = []`
			`total_seconds = 0`

bpo-41043: Escape literal part of the path for glob(). (GH-20994) 2020-06-20 05:10:31 -03:00			`for file in sorted(glob(os.path.join(escape(directory), f"*/.py"), recursive=True)):`
Refactor scripts in Tools/peg_generator/scripts (GH-20401) 2020-06-06 01:21:40 -03:00			`# Only attempt to parse Python files and files that are not excluded`
			`if any(PurePath(file).match(pattern) for pattern in excluded_files):`
			`continue`

			`with tokenize.open(file) as f:`
			`source = f.read()`

			`try:`
bpo-40939: Clean and adapt the peg_generator directory after deleting the old parser (GH-20822) 2020-06-11 21:55:35 -03:00			`result, dt = parse_file(source, file)`
Refactor scripts in Tools/peg_generator/scripts (GH-20401) 2020-06-06 01:21:40 -03:00			`total_seconds += dt`
			`report_status(succeeded=True, file=file, verbose=verbose, short=short)`
			`except SyntaxError as error:`
bpo-40939: Clean and adapt the peg_generator directory after deleting the old parser (GH-20822) 2020-06-11 21:55:35 -03:00			`report_status(succeeded=False, file=file, verbose=verbose, error=error, short=short)`
			`errors += 1`
Refactor scripts in Tools/peg_generator/scripts (GH-20401) 2020-06-06 01:21:40 -03:00			`files.append(file)`

			`generate_time_stats(files, total_seconds)`
bpo-40334: PEP 617 implementation: New PEG parser for CPython (GH-19503) Co-authored-by: Guido van Rossum <guido@python.org> Co-authored-by: Lysandros Nikolaou <lisandrosnik@gmail.com> 2020-04-22 19:29:27 -03:00			`if short:`
			`print_memstats()`

			`if errors:`
			`print(f"Encountered {errors} failures.", file=sys.stderr)`
			`return 1`

			`return 0`


			`def main() -> None:`
			`args = argparser.parse_args()`
			`directory = args.directory`
			`verbose = args.verbose`
			`excluded_files = args.exclude`
			`short = args.short`
bpo-40939: Clean and adapt the peg_generator directory after deleting the old parser (GH-20822) 2020-06-11 21:55:35 -03:00			`sys.exit(parse_directory(directory, verbose, excluded_files, short))`
bpo-40334: PEP 617 implementation: New PEG parser for CPython (GH-19503) Co-authored-by: Guido van Rossum <guido@python.org> Co-authored-by: Lysandros Nikolaou <lisandrosnik@gmail.com> 2020-04-22 19:29:27 -03:00

			`if __name__ == "__main__":`
			`main()`