From ba6fd87e41dceb01dcdacc57c722aca12cde42a9 Mon Sep 17 00:00:00 2001
From: Lysandros Nikolaou <lisandrosnik@gmail.com>
Date: Sat, 6 Jun 2020 07:21:40 +0300
Subject: [PATCH] Refactor scripts in Tools/peg_generator/scripts (GH-20401)

---
 Modules/_peg_parser.c                         |  16 +-
 Tools/peg_generator/Makefile                  |  14 +-
 Tools/peg_generator/scripts/benchmark.py      |  31 ++-
 .../peg_generator/scripts/grammar_grapher.py  |  19 +-
 Tools/peg_generator/scripts/show_parse.py     |  21 ++-
 .../scripts/test_parse_directory.py           | 177 ++++++++----------
 .../scripts/test_pypi_packages.py             |  15 +-
 7 files changed, 146 insertions(+), 147 deletions(-)
diff --git a/Modules/_peg_parser.c b/Modules/_peg_parser.c
index b66d5a83a84..ca2a3cf7b5f 100644
--- a/Modules/_peg_parser.c
+++ b/Modules/_peg_parser.c
@@ -80,14 +80,15 @@ _Py_compile_string(PyObject *self, PyObject *args, PyObject *kwds)
 PyObject *
 _Py_parse_string(PyObject *self, PyObject *args, PyObject *kwds)
 {
-    static char *keywords[] = {"string", "filename", "mode", "oldparser", NULL};
+    static char *keywords[] = {"string", "filename", "mode", "oldparser", "ast", NULL};
     char *the_string;
     char *filename = "<string>";
     char *mode_str = "exec";
     int oldparser = 0;
+    int ast = 1;
 
-    if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|ssp", keywords,
-            &the_string, &filename, &mode_str, &oldparser)) {
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|sspp", keywords,
+            &the_string, &filename, &mode_str, &oldparser, &ast)) {
         return NULL;
     }
 
@@ -110,7 +111,14 @@ _Py_parse_string(PyObject *self, PyObject *args, PyObject *kwds)
         return NULL;
     }
 
-    PyObject *result = PyAST_mod2obj(mod);
+    PyObject *result;
+    if (ast) {
+        result = PyAST_mod2obj(mod);
+    }
+    else {
+        Py_INCREF(Py_None);
+        result = Py_None;
+    }
     PyArena_Free(arena);
     return result;
 }
diff --git a/Tools/peg_generator/Makefile b/Tools/peg_generator/Makefile
index e7a190c1bcd..fb727c048b3 100644
--- a/Tools/peg_generator/Makefile
+++ b/Tools/peg_generator/Makefile
@@ -70,23 +70,21 @@ stats: peg_extension/parse.c data/xxl.py
 time: time_compile
 
 time_compile: venv data/xxl.py
-	$(VENVPYTHON) scripts/benchmark.py --parser=pegen --target=xxl compile
+	$(VENVPYTHON) scripts/benchmark.py --parser=new --target=xxl compile
 
 time_parse: venv data/xxl.py
-	$(VENVPYTHON) scripts/benchmark.py --parser=pegen --target=xxl parse
+	$(VENVPYTHON) scripts/benchmark.py --parser=new --target=xxl parse
 
 time_old: time_old_compile
 
 time_old_compile: venv data/xxl.py
-	$(VENVPYTHON) scripts/benchmark.py --parser=cpython --target=xxl compile
+	$(VENVPYTHON) scripts/benchmark.py --parser=old --target=xxl compile
 
 time_old_parse: venv data/xxl.py
-	$(VENVPYTHON) scripts/benchmark.py --parser=cpython --target=xxl parse
+	$(VENVPYTHON) scripts/benchmark.py --parser=old --target=xxl parse
 
 time_peg_dir: venv
 	$(VENVPYTHON) scripts/test_parse_directory.py \
-		--grammar-file $(GRAMMAR) \
-		--tokens-file $(TOKENS) \
 		-d $(TESTDIR) \
 		$(TESTFLAGS) \
 		--exclude "*/failset/*" \
@@ -95,12 +93,8 @@ time_peg_dir: venv
 
 time_stdlib: $(CPYTHON) venv
 	$(VENVPYTHON) scripts/test_parse_directory.py \
-		--grammar-file $(GRAMMAR) \
-		--tokens-file $(TOKENS) \
 		-d $(CPYTHON) \
 		$(TESTFLAGS) \
-		--exclude "*/test2to3/*" \
-		--exclude "*/test2to3/**/*" \
 		--exclude "*/bad*" \
 		--exclude "*/lib2to3/tests/data/*"
 
diff --git a/Tools/peg_generator/scripts/benchmark.py b/Tools/peg_generator/scripts/benchmark.py
index 71512c22a35..af356bed783 100644
--- a/Tools/peg_generator/scripts/benchmark.py
+++ b/Tools/peg_generator/scripts/benchmark.py
@@ -24,7 +24,7 @@ argparser = argparse.ArgumentParser(
 argparser.add_argument(
     "--parser",
     action="store",
-    choices=["pegen", "cpython"],
+    choices=["new", "old"],
     default="pegen",
     help="Which parser to benchmark (default is pegen)",
 )
@@ -40,7 +40,12 @@ subcommands = argparser.add_subparsers(title="Benchmarks", dest="subcommand")
 command_compile = subcommands.add_parser(
     "compile", help="Benchmark parsing and compiling to bytecode"
 )
-command_parse = subcommands.add_parser("parse", help="Benchmark parsing and generating an ast.AST")
+command_parse = subcommands.add_parser(
+    "parse", help="Benchmark parsing and generating an ast.AST"
+)
+command_notree = subcommands.add_parser(
+    "notree", help="Benchmark parsing and dumping the tree"
+)
 
 
 def benchmark(func):
@@ -62,7 +67,7 @@ def benchmark(func):
 
 @benchmark
 def time_compile(source, parser):
-    if parser == "cpython":
+    if parser == "old":
         return _peg_parser.compile_string(
             source,
             oldparser=True,
@@ -73,32 +78,40 @@ def time_compile(source, parser):
 
 @benchmark
 def time_parse(source, parser):
-    if parser == "cpython":
+    if parser == "old":
         return _peg_parser.parse_string(source, oldparser=True)
     else:
         return _peg_parser.parse_string(source)
 
 
+@benchmark
+def time_notree(source, parser):
+    if parser == "old":
+        return _peg_parser.parse_string(source, oldparser=True, ast=False)
+    else:
+        return _peg_parser.parse_string(source, ast=False)
+
+
 def run_benchmark_xxl(subcommand, parser, source):
     if subcommand == "compile":
         time_compile(source, parser)
     elif subcommand == "parse":
         time_parse(source, parser)
+    elif subcommand == "notree":
+        time_notree(source, parser)
 
 
 def run_benchmark_stdlib(subcommand, parser):
+    modes = {"compile": 2, "parse": 1, "notree": 0}
     for _ in range(3):
         parse_directory(
             "../../Lib",
-            "../../Grammar/python.gram",
-            "../../Grammar/Tokens",
             verbose=False,
             excluded_files=["*/bad*", "*/lib2to3/tests/data/*",],
-            skip_actions=False,
             tree_arg=0,
             short=True,
-            mode=2 if subcommand == "compile" else 1,
-            parser=parser,
+            mode=modes[subcommand],
+            oldparser=(parser == "old"),
         )
 
 
diff --git a/Tools/peg_generator/scripts/grammar_grapher.py b/Tools/peg_generator/scripts/grammar_grapher.py
index 3aa25466c70..4afdbce8f96 100755
--- a/Tools/peg_generator/scripts/grammar_grapher.py
+++ b/Tools/peg_generator/scripts/grammar_grapher.py
@@ -42,6 +42,13 @@ from pegen.grammar import (
 )
 
 argparser = argparse.ArgumentParser(prog="graph_grammar", description="Graph a grammar tree",)
+argparser.add_argument(
+    "-s",
+    "--start",
+    choices=["exec", "eval", "single"],
+    default="exec",
+    help="Choose the grammar's start rule (exec, eval or single)",
+)
 argparser.add_argument("grammar_file", help="The grammar file to graph")
 
 
@@ -91,19 +98,15 @@ def main() -> None:
         references[name] = set(references_for_item(rule))
 
     # Flatten the start node if has only a single reference
-    root_node = "start"
-    if start := references["start"]:
-        if len(start) == 1:
-            root_node = list(start)[0]
-            del references["start"]
+    root_node = {"exec": "file", "eval": "eval", "single": "interactive"}[args.start]
 
     print("digraph g1 {")
     print('\toverlap="scale";')  # Force twopi to scale the graph to avoid overlaps
     print(f'\troot="{root_node}";')
-    print(f"\t{root_node} [color=green, shape=circle]")
+    print(f"\t{root_node} [color=green, shape=circle];")
     for name, refs in references.items():
-        if refs:  # Ignore empty sets
-            print(f"\t{name} -> {','.join(refs)};")
+        for ref in refs:
+            print(f"\t{name} -> {ref};")
     print("}")
 
 
diff --git a/Tools/peg_generator/scripts/show_parse.py b/Tools/peg_generator/scripts/show_parse.py
index 1c1996f40f7..b4ee5a1b357 100755
--- a/Tools/peg_generator/scripts/show_parse.py
+++ b/Tools/peg_generator/scripts/show_parse.py
@@ -41,7 +41,13 @@ parser = argparse.ArgumentParser()
 parser.add_argument(
     "-d", "--diff", action="store_true", help="show diff between grammar and ast (requires -g)"
 )
-parser.add_argument("-g", "--grammar-file", help="grammar to use (default: use the ast module)")
+parser.add_argument(
+    "-p",
+    "--parser",
+    choices=["new", "old"],
+    default="new",
+    help="choose the parser to use"
+)
 parser.add_argument(
     "-m",
     "--multiline",
@@ -84,19 +90,18 @@ def print_parse(source: str, verbose: bool = False) -> None:
 
 def main() -> None:
     args = parser.parse_args()
-    if args.diff and not args.grammar_file:
-        parser.error("-d/--diff requires -g/--grammar-file")
+    new_parser = args.parser == "new"
     if args.multiline:
         sep = "\n"
     else:
         sep = " "
     program = sep.join(args.program)
-    if args.grammar_file:
+    if new_parser:
         tree = _peg_parser.parse_string(program)
 
         if args.diff:
-            a = tree
-            b = _peg_parser.parse_string(program, oldparser=True)
+            a = _peg_parser.parse_string(program, oldparser=True)
+            b = tree
             diff = diff_trees(a, b, args.verbose)
             if diff:
                 for line in diff:
@@ -104,11 +109,11 @@ def main() -> None:
             else:
                 print("# Trees are the same")
         else:
-            print(f"# Parsed using {args.grammar_file}")
+            print("# Parsed using the new parser")
             print(format_tree(tree, args.verbose))
     else:
         tree = _peg_parser.parse_string(program, oldparser=True)
-        print("# Parse using the old parser")
+        print("# Parsed using the old parser")
         print(format_tree(tree, args.verbose))
 
 
diff --git a/Tools/peg_generator/scripts/test_parse_directory.py b/Tools/peg_generator/scripts/test_parse_directory.py
index e88afe1539c..63204ce9dc1 100755
--- a/Tools/peg_generator/scripts/test_parse_directory.py
+++ b/Tools/peg_generator/scripts/test_parse_directory.py
@@ -11,7 +11,7 @@ import _peg_parser
 from glob import glob
 from pathlib import PurePath
 
-from typing import List, Optional, Any
+from typing import List, Optional, Any, Tuple
 
 sys.path.insert(0, os.getcwd())
 from pegen.ast_dump import ast_dump
@@ -22,13 +22,15 @@ SUCCESS = "\033[92m"
 FAIL = "\033[91m"
 ENDC = "\033[0m"
 
+COMPILE = 2
+PARSE = 1
+NOTREE = 0
+
 argparser = argparse.ArgumentParser(
     prog="test_parse_directory",
     description="Helper program to test directories or files for pegen",
 )
 argparser.add_argument("-d", "--directory", help="Directory path containing files to test")
-argparser.add_argument("--grammar-file", help="Grammar file path")
-argparser.add_argument("--tokens-file", help="Tokens file path")
 argparser.add_argument(
     "-e", "--exclude", action="append", default=[], help="Glob(s) for matching files to exclude"
 )
@@ -38,9 +40,6 @@ argparser.add_argument(
 argparser.add_argument(
     "-v", "--verbose", action="store_true", help="Display detailed errors for failures"
 )
-argparser.add_argument(
-    "--skip-actions", action="store_true", help="Suppress code emission for rule actions",
-)
 argparser.add_argument(
     "-t", "--tree", action="count", help="Compare parse tree to official AST", default=0
 )
@@ -113,92 +112,35 @@ def compare_trees(
     return 1
 
 
-def parse_directory(
-    directory: str,
-    grammar_file: str,
-    tokens_file: str,
-    verbose: bool,
-    excluded_files: List[str],
-    skip_actions: bool,
-    tree_arg: int,
-    short: bool,
-    mode: int,
-    parser: str,
-) -> int:
-    if parser == "cpython" and (tree_arg or mode == 0):
-        print("Cannot specify tree argument or mode=0 with the cpython parser.", file=sys.stderr)
-        return 1
-
-    if not directory:
-        print("You must specify a directory of files to test.", file=sys.stderr)
-        return 1
-
-    if grammar_file and tokens_file:
-        if not os.path.exists(grammar_file):
-            print(f"The specified grammar file, {grammar_file}, does not exist.", file=sys.stderr)
-            return 1
-    else:
-        print(
-            "A grammar file or a tokens file was not provided - attempting to use existing parser from stdlib...\n"
+def parse_file(source: str, file: str, mode: int, oldparser: bool) -> Tuple[Any, float]:
+    t0 = time.time()
+    if mode == COMPILE:
+        result = _peg_parser.compile_string(
+            source,
+            filename=file,
+            oldparser=oldparser,
+        )
+    else:
+        result = _peg_parser.parse_string(
+            source,
+            filename=file,
+            oldparser=oldparser,
+            ast=(mode == PARSE),
         )
-
-    if tree_arg:
-        assert mode == 1, "Mode should be 1 (parse), when comparing the generated trees"
-
-    # For a given directory, traverse files and attempt to parse each one
-    # - Output success/failure for each file
-    errors = 0
-    files = []
-    trees = {}  # Trees to compare (after everything else is done)
-    total_seconds = 0
-
-    for file in sorted(glob(f"{directory}/**/*.py", recursive=True)):
-        # Only attempt to parse Python files and files that are not excluded
-        should_exclude_file = False
-        for pattern in excluded_files:
-            if PurePath(file).match(pattern):
-                should_exclude_file = True
-                break
-
-        if not should_exclude_file:
-            with tokenize.open(file) as f:
-                source = f.read()
-            try:
-                t0 = time.time()
-                if mode == 2:
-                    result = _peg_parser.compile_string(
-                        source,
-                        filename=file,
-                        oldparser=parser == "cpython",
-                    )
-                else:
-                    result = _peg_parser.parse_string(
-                        source,
-                        filename=file,
-                        oldparser=parser == "cpython"
-                    )
-                t1 = time.time()
-                total_seconds += (t1 - t0)
-                if tree_arg:
-                    trees[file] = result
-                if not short:
-                    report_status(succeeded=True, file=file, verbose=verbose)
-            except Exception as error:
-                try:
-                    _peg_parser.parse_string(source, mode="exec", oldparser=True)
-                except Exception:
-                    if not short:
-                        print(f"File {file} cannot be parsed by either pegen or the ast module.")
-                else:
-                    report_status(
-                        succeeded=False, file=file, verbose=verbose, error=error, short=short
-                    )
-                    errors += 1
-            files.append(file)
     t1 = time.time()
+    return result, t1 - t0
 
+
+def is_parsing_failure(source: str) -> bool:
+    try:
+        _peg_parser.parse_string(source, mode="exec", oldparser=True)
+    except SyntaxError:
+        return False
+    return True
+
+
+def generate_time_stats(files, total_seconds) -> None:
     total_files = len(files)
-
     total_bytes = 0
     total_lines = 0
     for file in files:
@@ -217,6 +159,57 @@ def parse_directory(
             f"or {total_bytes / total_seconds :,.0f} bytes/sec.",
         )
 
+
+def parse_directory(
+    directory: str,
+    verbose: bool,
+    excluded_files: List[str],
+    tree_arg: int,
+    short: bool,
+    mode: int,
+    oldparser: bool,
+) -> int:
+    if tree_arg:
+        assert mode == PARSE, "Mode should be 1 (parse), when comparing the generated trees"
+
+    if oldparser and tree_arg:
+        print("Cannot specify tree argument with the cpython parser.", file=sys.stderr)
+        return 1
+
+    # For a given directory, traverse files and attempt to parse each one
+    # - Output success/failure for each file
+    errors = 0
+    files = []
+    trees = {}  # Trees to compare (after everything else is done)
+    total_seconds = 0
+
+    for file in sorted(glob(f"{directory}/**/*.py", recursive=True)):
+        # Only attempt to parse Python files and files that are not excluded
+        if any(PurePath(file).match(pattern) for pattern in excluded_files):
+            continue
+
+        with tokenize.open(file) as f:
+            source = f.read()
+
+        try:
+            result, dt = parse_file(source, file, mode, oldparser)
+            total_seconds += dt
+            if tree_arg:
+                trees[file] = result
+            report_status(succeeded=True, file=file, verbose=verbose, short=short)
+        except SyntaxError as error:
+            if is_parsing_failure(source):
+                print(f"File {file} cannot be parsed by either parser.")
+            else:
+                report_status(
+                    succeeded=False, file=file, verbose=verbose, error=error, short=short
+                )
+                errors += 1
+        files.append(file)
+
+    t1 = time.time()
+
+    generate_time_stats(files, total_seconds)
     if short:
         print_memstats()
 
@@ -240,26 +233,20 @@ def parse_directory(
 def main() -> None:
     args = argparser.parse_args()
     directory = args.directory
-    grammar_file = args.grammar_file
-    tokens_file = args.tokens_file
     verbose = args.verbose
     excluded_files = args.exclude
-    skip_actions = args.skip_actions
     tree = args.tree
     short = args.short
     mode = 1 if args.tree else 2
     sys.exit(
         parse_directory(
             directory,
-            grammar_file,
-            tokens_file,
             verbose,
             excluded_files,
-            skip_actions,
             tree,
             short,
             mode,
-            "pegen",
+            oldparser=False,
         )
     )
 
diff --git a/Tools/peg_generator/scripts/test_pypi_packages.py b/Tools/peg_generator/scripts/test_pypi_packages.py
index 98f77785cdd..f014753b3cd 100755
--- a/Tools/peg_generator/scripts/test_pypi_packages.py
+++ b/Tools/peg_generator/scripts/test_pypi_packages.py
@@ -57,22 +57,11 @@ def find_dirname(package_name: str) -> str:
 def run_tests(dirname: str, tree: int) -> int:
     return test_parse_directory.parse_directory(
         dirname,
-        HERE / ".." / ".." / ".." / "Grammar" / "python.gram",
-        HERE / ".." / ".." / ".." / "Grammar" / "Tokens",
         verbose=False,
-        excluded_files=[
-            "*/failset/*",
-            "*/failset/**",
-            "*/failset/**/*",
-            "*/test2to3/*",
-            "*/test2to3/**/*",
-            "*/bad*",
-            "*/lib2to3/tests/data/*",
-        ],
-        skip_actions=False,
+        excluded_files=[],
         tree_arg=tree,
         short=True,
-        mode=1,
+        mode=1 if tree else 0,
         parser="pegen",
     )