From ba6fd87e41dceb01dcdacc57c722aca12cde42a9 Mon Sep 17 00:00:00 2001 From: Lysandros Nikolaou Date: Sat, 6 Jun 2020 07:21:40 +0300 Subject: [PATCH] Refactor scripts in Tools/peg_generator/scripts (GH-20401) --- Modules/_peg_parser.c | 16 +- Tools/peg_generator/Makefile | 14 +- Tools/peg_generator/scripts/benchmark.py | 31 ++- .../peg_generator/scripts/grammar_grapher.py | 19 +- Tools/peg_generator/scripts/show_parse.py | 21 ++- .../scripts/test_parse_directory.py | 177 ++++++++---------- .../scripts/test_pypi_packages.py | 15 +- 7 files changed, 146 insertions(+), 147 deletions(-) diff --git a/Modules/_peg_parser.c b/Modules/_peg_parser.c index b66d5a83a84..ca2a3cf7b5f 100644 --- a/Modules/_peg_parser.c +++ b/Modules/_peg_parser.c @@ -80,14 +80,15 @@ _Py_compile_string(PyObject *self, PyObject *args, PyObject *kwds) PyObject * _Py_parse_string(PyObject *self, PyObject *args, PyObject *kwds) { - static char *keywords[] = {"string", "filename", "mode", "oldparser", NULL}; + static char *keywords[] = {"string", "filename", "mode", "oldparser", "ast", NULL}; char *the_string; char *filename = ""; char *mode_str = "exec"; int oldparser = 0; + int ast = 1; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|ssp", keywords, - &the_string, &filename, &mode_str, &oldparser)) { + if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|sspp", keywords, + &the_string, &filename, &mode_str, &oldparser, &ast)) { return NULL; } @@ -110,7 +111,14 @@ _Py_parse_string(PyObject *self, PyObject *args, PyObject *kwds) return NULL; } - PyObject *result = PyAST_mod2obj(mod); + PyObject *result; + if (ast) { + result = PyAST_mod2obj(mod); + } + else { + Py_INCREF(Py_None); + result = Py_None; + } PyArena_Free(arena); return result; } diff --git a/Tools/peg_generator/Makefile b/Tools/peg_generator/Makefile index e7a190c1bcd..fb727c048b3 100644 --- a/Tools/peg_generator/Makefile +++ b/Tools/peg_generator/Makefile @@ -70,23 +70,21 @@ stats: peg_extension/parse.c data/xxl.py time: time_compile time_compile: venv data/xxl.py - $(VENVPYTHON) scripts/benchmark.py --parser=pegen --target=xxl compile + $(VENVPYTHON) scripts/benchmark.py --parser=new --target=xxl compile time_parse: venv data/xxl.py - $(VENVPYTHON) scripts/benchmark.py --parser=pegen --target=xxl parse + $(VENVPYTHON) scripts/benchmark.py --parser=new --target=xxl parse time_old: time_old_compile time_old_compile: venv data/xxl.py - $(VENVPYTHON) scripts/benchmark.py --parser=cpython --target=xxl compile + $(VENVPYTHON) scripts/benchmark.py --parser=old --target=xxl compile time_old_parse: venv data/xxl.py - $(VENVPYTHON) scripts/benchmark.py --parser=cpython --target=xxl parse + $(VENVPYTHON) scripts/benchmark.py --parser=old --target=xxl parse time_peg_dir: venv $(VENVPYTHON) scripts/test_parse_directory.py \ - --grammar-file $(GRAMMAR) \ - --tokens-file $(TOKENS) \ -d $(TESTDIR) \ $(TESTFLAGS) \ --exclude "*/failset/*" \ @@ -95,12 +93,8 @@ time_peg_dir: venv time_stdlib: $(CPYTHON) venv $(VENVPYTHON) scripts/test_parse_directory.py \ - --grammar-file $(GRAMMAR) \ - --tokens-file $(TOKENS) \ -d $(CPYTHON) \ $(TESTFLAGS) \ - --exclude "*/test2to3/*" \ - --exclude "*/test2to3/**/*" \ --exclude "*/bad*" \ --exclude "*/lib2to3/tests/data/*" diff --git a/Tools/peg_generator/scripts/benchmark.py b/Tools/peg_generator/scripts/benchmark.py index 71512c22a35..af356bed783 100644 --- a/Tools/peg_generator/scripts/benchmark.py +++ b/Tools/peg_generator/scripts/benchmark.py @@ -24,7 +24,7 @@ argparser = argparse.ArgumentParser( argparser.add_argument( "--parser", action="store", - choices=["pegen", "cpython"], + choices=["new", "old"], default="pegen", help="Which parser to benchmark (default is pegen)", ) @@ -40,7 +40,12 @@ subcommands = argparser.add_subparsers(title="Benchmarks", dest="subcommand") command_compile = subcommands.add_parser( "compile", help="Benchmark parsing and compiling to bytecode" ) -command_parse = subcommands.add_parser("parse", help="Benchmark parsing and generating an ast.AST") +command_parse = subcommands.add_parser( + "parse", help="Benchmark parsing and generating an ast.AST" +) +command_notree = subcommands.add_parser( + "notree", help="Benchmark parsing and dumping the tree" +) def benchmark(func): @@ -62,7 +67,7 @@ def benchmark(func): @benchmark def time_compile(source, parser): - if parser == "cpython": + if parser == "old": return _peg_parser.compile_string( source, oldparser=True, @@ -73,32 +78,40 @@ def time_compile(source, parser): @benchmark def time_parse(source, parser): - if parser == "cpython": + if parser == "old": return _peg_parser.parse_string(source, oldparser=True) else: return _peg_parser.parse_string(source) +@benchmark +def time_notree(source, parser): + if parser == "old": + return _peg_parser.parse_string(source, oldparser=True, ast=False) + else: + return _peg_parser.parse_string(source, ast=False) + + def run_benchmark_xxl(subcommand, parser, source): if subcommand == "compile": time_compile(source, parser) elif subcommand == "parse": time_parse(source, parser) + elif subcommand == "notree": + time_notree(source, parser) def run_benchmark_stdlib(subcommand, parser): + modes = {"compile": 2, "parse": 1, "notree": 0} for _ in range(3): parse_directory( "../../Lib", - "../../Grammar/python.gram", - "../../Grammar/Tokens", verbose=False, excluded_files=["*/bad*", "*/lib2to3/tests/data/*",], - skip_actions=False, tree_arg=0, short=True, - mode=2 if subcommand == "compile" else 1, - parser=parser, + mode=modes[subcommand], + oldparser=(parser == "old"), ) diff --git a/Tools/peg_generator/scripts/grammar_grapher.py b/Tools/peg_generator/scripts/grammar_grapher.py index 3aa25466c70..4afdbce8f96 100755 --- a/Tools/peg_generator/scripts/grammar_grapher.py +++ b/Tools/peg_generator/scripts/grammar_grapher.py @@ -42,6 +42,13 @@ from pegen.grammar import ( ) argparser = argparse.ArgumentParser(prog="graph_grammar", description="Graph a grammar tree",) +argparser.add_argument( + "-s", + "--start", + choices=["exec", "eval", "single"], + default="exec", + help="Choose the grammar's start rule (exec, eval or single)", +) argparser.add_argument("grammar_file", help="The grammar file to graph") @@ -91,19 +98,15 @@ def main() -> None: references[name] = set(references_for_item(rule)) # Flatten the start node if has only a single reference - root_node = "start" - if start := references["start"]: - if len(start) == 1: - root_node = list(start)[0] - del references["start"] + root_node = {"exec": "file", "eval": "eval", "single": "interactive"}[args.start] print("digraph g1 {") print('\toverlap="scale";') # Force twopi to scale the graph to avoid overlaps print(f'\troot="{root_node}";') - print(f"\t{root_node} [color=green, shape=circle]") + print(f"\t{root_node} [color=green, shape=circle];") for name, refs in references.items(): - if refs: # Ignore empty sets - print(f"\t{name} -> {','.join(refs)};") + for ref in refs: + print(f"\t{name} -> {ref};") print("}") diff --git a/Tools/peg_generator/scripts/show_parse.py b/Tools/peg_generator/scripts/show_parse.py index 1c1996f40f7..b4ee5a1b357 100755 --- a/Tools/peg_generator/scripts/show_parse.py +++ b/Tools/peg_generator/scripts/show_parse.py @@ -41,7 +41,13 @@ parser = argparse.ArgumentParser() parser.add_argument( "-d", "--diff", action="store_true", help="show diff between grammar and ast (requires -g)" ) -parser.add_argument("-g", "--grammar-file", help="grammar to use (default: use the ast module)") +parser.add_argument( + "-p", + "--parser", + choices=["new", "old"], + default="new", + help="choose the parser to use" +) parser.add_argument( "-m", "--multiline", @@ -84,19 +90,18 @@ def print_parse(source: str, verbose: bool = False) -> None: def main() -> None: args = parser.parse_args() - if args.diff and not args.grammar_file: - parser.error("-d/--diff requires -g/--grammar-file") + new_parser = args.parser == "new" if args.multiline: sep = "\n" else: sep = " " program = sep.join(args.program) - if args.grammar_file: + if new_parser: tree = _peg_parser.parse_string(program) if args.diff: - a = tree - b = _peg_parser.parse_string(program, oldparser=True) + a = _peg_parser.parse_string(program, oldparser=True) + b = tree diff = diff_trees(a, b, args.verbose) if diff: for line in diff: @@ -104,11 +109,11 @@ def main() -> None: else: print("# Trees are the same") else: - print(f"# Parsed using {args.grammar_file}") + print("# Parsed using the new parser") print(format_tree(tree, args.verbose)) else: tree = _peg_parser.parse_string(program, oldparser=True) - print("# Parse using the old parser") + print("# Parsed using the old parser") print(format_tree(tree, args.verbose)) diff --git a/Tools/peg_generator/scripts/test_parse_directory.py b/Tools/peg_generator/scripts/test_parse_directory.py index e88afe1539c..63204ce9dc1 100755 --- a/Tools/peg_generator/scripts/test_parse_directory.py +++ b/Tools/peg_generator/scripts/test_parse_directory.py @@ -11,7 +11,7 @@ import _peg_parser from glob import glob from pathlib import PurePath -from typing import List, Optional, Any +from typing import List, Optional, Any, Tuple sys.path.insert(0, os.getcwd()) from pegen.ast_dump import ast_dump @@ -22,13 +22,15 @@ SUCCESS = "\033[92m" FAIL = "\033[91m" ENDC = "\033[0m" +COMPILE = 2 +PARSE = 1 +NOTREE = 0 + argparser = argparse.ArgumentParser( prog="test_parse_directory", description="Helper program to test directories or files for pegen", ) argparser.add_argument("-d", "--directory", help="Directory path containing files to test") -argparser.add_argument("--grammar-file", help="Grammar file path") -argparser.add_argument("--tokens-file", help="Tokens file path") argparser.add_argument( "-e", "--exclude", action="append", default=[], help="Glob(s) for matching files to exclude" ) @@ -38,9 +40,6 @@ argparser.add_argument( argparser.add_argument( "-v", "--verbose", action="store_true", help="Display detailed errors for failures" ) -argparser.add_argument( - "--skip-actions", action="store_true", help="Suppress code emission for rule actions", -) argparser.add_argument( "-t", "--tree", action="count", help="Compare parse tree to official AST", default=0 ) @@ -113,92 +112,35 @@ def compare_trees( return 1 -def parse_directory( - directory: str, - grammar_file: str, - tokens_file: str, - verbose: bool, - excluded_files: List[str], - skip_actions: bool, - tree_arg: int, - short: bool, - mode: int, - parser: str, -) -> int: - if parser == "cpython" and (tree_arg or mode == 0): - print("Cannot specify tree argument or mode=0 with the cpython parser.", file=sys.stderr) - return 1 - - if not directory: - print("You must specify a directory of files to test.", file=sys.stderr) - return 1 - - if grammar_file and tokens_file: - if not os.path.exists(grammar_file): - print(f"The specified grammar file, {grammar_file}, does not exist.", file=sys.stderr) - return 1 - else: - print( - "A grammar file or a tokens file was not provided - attempting to use existing parser from stdlib...\n" +def parse_file(source: str, file: str, mode: int, oldparser: bool) -> Tuple[Any, float]: + t0 = time.time() + if mode == COMPILE: + result = _peg_parser.compile_string( + source, + filename=file, + oldparser=oldparser, + ) + else: + result = _peg_parser.parse_string( + source, + filename=file, + oldparser=oldparser, + ast=(mode == PARSE), ) - - if tree_arg: - assert mode == 1, "Mode should be 1 (parse), when comparing the generated trees" - - # For a given directory, traverse files and attempt to parse each one - # - Output success/failure for each file - errors = 0 - files = [] - trees = {} # Trees to compare (after everything else is done) - total_seconds = 0 - - for file in sorted(glob(f"{directory}/**/*.py", recursive=True)): - # Only attempt to parse Python files and files that are not excluded - should_exclude_file = False - for pattern in excluded_files: - if PurePath(file).match(pattern): - should_exclude_file = True - break - - if not should_exclude_file: - with tokenize.open(file) as f: - source = f.read() - try: - t0 = time.time() - if mode == 2: - result = _peg_parser.compile_string( - source, - filename=file, - oldparser=parser == "cpython", - ) - else: - result = _peg_parser.parse_string( - source, - filename=file, - oldparser=parser == "cpython" - ) - t1 = time.time() - total_seconds += (t1 - t0) - if tree_arg: - trees[file] = result - if not short: - report_status(succeeded=True, file=file, verbose=verbose) - except Exception as error: - try: - _peg_parser.parse_string(source, mode="exec", oldparser=True) - except Exception: - if not short: - print(f"File {file} cannot be parsed by either pegen or the ast module.") - else: - report_status( - succeeded=False, file=file, verbose=verbose, error=error, short=short - ) - errors += 1 - files.append(file) t1 = time.time() + return result, t1 - t0 + +def is_parsing_failure(source: str) -> bool: + try: + _peg_parser.parse_string(source, mode="exec", oldparser=True) + except SyntaxError: + return False + return True + + +def generate_time_stats(files, total_seconds) -> None: total_files = len(files) - total_bytes = 0 total_lines = 0 for file in files: @@ -217,6 +159,57 @@ def parse_directory( f"or {total_bytes / total_seconds :,.0f} bytes/sec.", ) + +def parse_directory( + directory: str, + verbose: bool, + excluded_files: List[str], + tree_arg: int, + short: bool, + mode: int, + oldparser: bool, +) -> int: + if tree_arg: + assert mode == PARSE, "Mode should be 1 (parse), when comparing the generated trees" + + if oldparser and tree_arg: + print("Cannot specify tree argument with the cpython parser.", file=sys.stderr) + return 1 + + # For a given directory, traverse files and attempt to parse each one + # - Output success/failure for each file + errors = 0 + files = [] + trees = {} # Trees to compare (after everything else is done) + total_seconds = 0 + + for file in sorted(glob(f"{directory}/**/*.py", recursive=True)): + # Only attempt to parse Python files and files that are not excluded + if any(PurePath(file).match(pattern) for pattern in excluded_files): + continue + + with tokenize.open(file) as f: + source = f.read() + + try: + result, dt = parse_file(source, file, mode, oldparser) + total_seconds += dt + if tree_arg: + trees[file] = result + report_status(succeeded=True, file=file, verbose=verbose, short=short) + except SyntaxError as error: + if is_parsing_failure(source): + print(f"File {file} cannot be parsed by either parser.") + else: + report_status( + succeeded=False, file=file, verbose=verbose, error=error, short=short + ) + errors += 1 + files.append(file) + + t1 = time.time() + + generate_time_stats(files, total_seconds) if short: print_memstats() @@ -240,26 +233,20 @@ def parse_directory( def main() -> None: args = argparser.parse_args() directory = args.directory - grammar_file = args.grammar_file - tokens_file = args.tokens_file verbose = args.verbose excluded_files = args.exclude - skip_actions = args.skip_actions tree = args.tree short = args.short mode = 1 if args.tree else 2 sys.exit( parse_directory( directory, - grammar_file, - tokens_file, verbose, excluded_files, - skip_actions, tree, short, mode, - "pegen", + oldparser=False, ) ) diff --git a/Tools/peg_generator/scripts/test_pypi_packages.py b/Tools/peg_generator/scripts/test_pypi_packages.py index 98f77785cdd..f014753b3cd 100755 --- a/Tools/peg_generator/scripts/test_pypi_packages.py +++ b/Tools/peg_generator/scripts/test_pypi_packages.py @@ -57,22 +57,11 @@ def find_dirname(package_name: str) -> str: def run_tests(dirname: str, tree: int) -> int: return test_parse_directory.parse_directory( dirname, - HERE / ".." / ".." / ".." / "Grammar" / "python.gram", - HERE / ".." / ".." / ".." / "Grammar" / "Tokens", verbose=False, - excluded_files=[ - "*/failset/*", - "*/failset/**", - "*/failset/**/*", - "*/test2to3/*", - "*/test2to3/**/*", - "*/bad*", - "*/lib2to3/tests/data/*", - ], - skip_actions=False, + excluded_files=[], tree_arg=tree, short=True, - mode=1, + mode=1 if tree else 0, parser="pegen", )