Refactor scripts in Tools/peg_generator/scripts (GH-20401)

This commit is contained in:
Lysandros Nikolaou 2020-06-06 07:21:40 +03:00 committed by GitHub
parent 2e6593db00
commit ba6fd87e41
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 146 additions and 147 deletions

View File

@ -80,14 +80,15 @@ _Py_compile_string(PyObject *self, PyObject *args, PyObject *kwds)
PyObject * PyObject *
_Py_parse_string(PyObject *self, PyObject *args, PyObject *kwds) _Py_parse_string(PyObject *self, PyObject *args, PyObject *kwds)
{ {
static char *keywords[] = {"string", "filename", "mode", "oldparser", NULL}; static char *keywords[] = {"string", "filename", "mode", "oldparser", "ast", NULL};
char *the_string; char *the_string;
char *filename = "<string>"; char *filename = "<string>";
char *mode_str = "exec"; char *mode_str = "exec";
int oldparser = 0; int oldparser = 0;
int ast = 1;
if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|ssp", keywords, if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|sspp", keywords,
&the_string, &filename, &mode_str, &oldparser)) { &the_string, &filename, &mode_str, &oldparser, &ast)) {
return NULL; return NULL;
} }
@ -110,7 +111,14 @@ _Py_parse_string(PyObject *self, PyObject *args, PyObject *kwds)
return NULL; return NULL;
} }
PyObject *result = PyAST_mod2obj(mod); PyObject *result;
if (ast) {
result = PyAST_mod2obj(mod);
}
else {
Py_INCREF(Py_None);
result = Py_None;
}
PyArena_Free(arena); PyArena_Free(arena);
return result; return result;
} }

View File

@ -70,23 +70,21 @@ stats: peg_extension/parse.c data/xxl.py
time: time_compile time: time_compile
time_compile: venv data/xxl.py time_compile: venv data/xxl.py
$(VENVPYTHON) scripts/benchmark.py --parser=pegen --target=xxl compile $(VENVPYTHON) scripts/benchmark.py --parser=new --target=xxl compile
time_parse: venv data/xxl.py time_parse: venv data/xxl.py
$(VENVPYTHON) scripts/benchmark.py --parser=pegen --target=xxl parse $(VENVPYTHON) scripts/benchmark.py --parser=new --target=xxl parse
time_old: time_old_compile time_old: time_old_compile
time_old_compile: venv data/xxl.py time_old_compile: venv data/xxl.py
$(VENVPYTHON) scripts/benchmark.py --parser=cpython --target=xxl compile $(VENVPYTHON) scripts/benchmark.py --parser=old --target=xxl compile
time_old_parse: venv data/xxl.py time_old_parse: venv data/xxl.py
$(VENVPYTHON) scripts/benchmark.py --parser=cpython --target=xxl parse $(VENVPYTHON) scripts/benchmark.py --parser=old --target=xxl parse
time_peg_dir: venv time_peg_dir: venv
$(VENVPYTHON) scripts/test_parse_directory.py \ $(VENVPYTHON) scripts/test_parse_directory.py \
--grammar-file $(GRAMMAR) \
--tokens-file $(TOKENS) \
-d $(TESTDIR) \ -d $(TESTDIR) \
$(TESTFLAGS) \ $(TESTFLAGS) \
--exclude "*/failset/*" \ --exclude "*/failset/*" \
@ -95,12 +93,8 @@ time_peg_dir: venv
time_stdlib: $(CPYTHON) venv time_stdlib: $(CPYTHON) venv
$(VENVPYTHON) scripts/test_parse_directory.py \ $(VENVPYTHON) scripts/test_parse_directory.py \
--grammar-file $(GRAMMAR) \
--tokens-file $(TOKENS) \
-d $(CPYTHON) \ -d $(CPYTHON) \
$(TESTFLAGS) \ $(TESTFLAGS) \
--exclude "*/test2to3/*" \
--exclude "*/test2to3/**/*" \
--exclude "*/bad*" \ --exclude "*/bad*" \
--exclude "*/lib2to3/tests/data/*" --exclude "*/lib2to3/tests/data/*"

View File

@ -24,7 +24,7 @@ argparser = argparse.ArgumentParser(
argparser.add_argument( argparser.add_argument(
"--parser", "--parser",
action="store", action="store",
choices=["pegen", "cpython"], choices=["new", "old"],
default="pegen", default="pegen",
help="Which parser to benchmark (default is pegen)", help="Which parser to benchmark (default is pegen)",
) )
@ -40,7 +40,12 @@ subcommands = argparser.add_subparsers(title="Benchmarks", dest="subcommand")
command_compile = subcommands.add_parser( command_compile = subcommands.add_parser(
"compile", help="Benchmark parsing and compiling to bytecode" "compile", help="Benchmark parsing and compiling to bytecode"
) )
command_parse = subcommands.add_parser("parse", help="Benchmark parsing and generating an ast.AST") command_parse = subcommands.add_parser(
"parse", help="Benchmark parsing and generating an ast.AST"
)
command_notree = subcommands.add_parser(
"notree", help="Benchmark parsing and dumping the tree"
)
def benchmark(func): def benchmark(func):
@ -62,7 +67,7 @@ def benchmark(func):
@benchmark @benchmark
def time_compile(source, parser): def time_compile(source, parser):
if parser == "cpython": if parser == "old":
return _peg_parser.compile_string( return _peg_parser.compile_string(
source, source,
oldparser=True, oldparser=True,
@ -73,32 +78,40 @@ def time_compile(source, parser):
@benchmark @benchmark
def time_parse(source, parser): def time_parse(source, parser):
if parser == "cpython": if parser == "old":
return _peg_parser.parse_string(source, oldparser=True) return _peg_parser.parse_string(source, oldparser=True)
else: else:
return _peg_parser.parse_string(source) return _peg_parser.parse_string(source)
@benchmark
def time_notree(source, parser):
if parser == "old":
return _peg_parser.parse_string(source, oldparser=True, ast=False)
else:
return _peg_parser.parse_string(source, ast=False)
def run_benchmark_xxl(subcommand, parser, source): def run_benchmark_xxl(subcommand, parser, source):
if subcommand == "compile": if subcommand == "compile":
time_compile(source, parser) time_compile(source, parser)
elif subcommand == "parse": elif subcommand == "parse":
time_parse(source, parser) time_parse(source, parser)
elif subcommand == "notree":
time_notree(source, parser)
def run_benchmark_stdlib(subcommand, parser): def run_benchmark_stdlib(subcommand, parser):
modes = {"compile": 2, "parse": 1, "notree": 0}
for _ in range(3): for _ in range(3):
parse_directory( parse_directory(
"../../Lib", "../../Lib",
"../../Grammar/python.gram",
"../../Grammar/Tokens",
verbose=False, verbose=False,
excluded_files=["*/bad*", "*/lib2to3/tests/data/*",], excluded_files=["*/bad*", "*/lib2to3/tests/data/*",],
skip_actions=False,
tree_arg=0, tree_arg=0,
short=True, short=True,
mode=2 if subcommand == "compile" else 1, mode=modes[subcommand],
parser=parser, oldparser=(parser == "old"),
) )

View File

@ -42,6 +42,13 @@ from pegen.grammar import (
) )
argparser = argparse.ArgumentParser(prog="graph_grammar", description="Graph a grammar tree",) argparser = argparse.ArgumentParser(prog="graph_grammar", description="Graph a grammar tree",)
argparser.add_argument(
"-s",
"--start",
choices=["exec", "eval", "single"],
default="exec",
help="Choose the grammar's start rule (exec, eval or single)",
)
argparser.add_argument("grammar_file", help="The grammar file to graph") argparser.add_argument("grammar_file", help="The grammar file to graph")
@ -91,19 +98,15 @@ def main() -> None:
references[name] = set(references_for_item(rule)) references[name] = set(references_for_item(rule))
# Flatten the start node if has only a single reference # Flatten the start node if has only a single reference
root_node = "start" root_node = {"exec": "file", "eval": "eval", "single": "interactive"}[args.start]
if start := references["start"]:
if len(start) == 1:
root_node = list(start)[0]
del references["start"]
print("digraph g1 {") print("digraph g1 {")
print('\toverlap="scale";') # Force twopi to scale the graph to avoid overlaps print('\toverlap="scale";') # Force twopi to scale the graph to avoid overlaps
print(f'\troot="{root_node}";') print(f'\troot="{root_node}";')
print(f"\t{root_node} [color=green, shape=circle]") print(f"\t{root_node} [color=green, shape=circle];")
for name, refs in references.items(): for name, refs in references.items():
if refs: # Ignore empty sets for ref in refs:
print(f"\t{name} -> {','.join(refs)};") print(f"\t{name} -> {ref};")
print("}") print("}")

View File

@ -41,7 +41,13 @@ parser = argparse.ArgumentParser()
parser.add_argument( parser.add_argument(
"-d", "--diff", action="store_true", help="show diff between grammar and ast (requires -g)" "-d", "--diff", action="store_true", help="show diff between grammar and ast (requires -g)"
) )
parser.add_argument("-g", "--grammar-file", help="grammar to use (default: use the ast module)") parser.add_argument(
"-p",
"--parser",
choices=["new", "old"],
default="new",
help="choose the parser to use"
)
parser.add_argument( parser.add_argument(
"-m", "-m",
"--multiline", "--multiline",
@ -84,19 +90,18 @@ def print_parse(source: str, verbose: bool = False) -> None:
def main() -> None: def main() -> None:
args = parser.parse_args() args = parser.parse_args()
if args.diff and not args.grammar_file: new_parser = args.parser == "new"
parser.error("-d/--diff requires -g/--grammar-file")
if args.multiline: if args.multiline:
sep = "\n" sep = "\n"
else: else:
sep = " " sep = " "
program = sep.join(args.program) program = sep.join(args.program)
if args.grammar_file: if new_parser:
tree = _peg_parser.parse_string(program) tree = _peg_parser.parse_string(program)
if args.diff: if args.diff:
a = tree a = _peg_parser.parse_string(program, oldparser=True)
b = _peg_parser.parse_string(program, oldparser=True) b = tree
diff = diff_trees(a, b, args.verbose) diff = diff_trees(a, b, args.verbose)
if diff: if diff:
for line in diff: for line in diff:
@ -104,11 +109,11 @@ def main() -> None:
else: else:
print("# Trees are the same") print("# Trees are the same")
else: else:
print(f"# Parsed using {args.grammar_file}") print("# Parsed using the new parser")
print(format_tree(tree, args.verbose)) print(format_tree(tree, args.verbose))
else: else:
tree = _peg_parser.parse_string(program, oldparser=True) tree = _peg_parser.parse_string(program, oldparser=True)
print("# Parse using the old parser") print("# Parsed using the old parser")
print(format_tree(tree, args.verbose)) print(format_tree(tree, args.verbose))

View File

@ -11,7 +11,7 @@ import _peg_parser
from glob import glob from glob import glob
from pathlib import PurePath from pathlib import PurePath
from typing import List, Optional, Any from typing import List, Optional, Any, Tuple
sys.path.insert(0, os.getcwd()) sys.path.insert(0, os.getcwd())
from pegen.ast_dump import ast_dump from pegen.ast_dump import ast_dump
@ -22,13 +22,15 @@ SUCCESS = "\033[92m"
FAIL = "\033[91m" FAIL = "\033[91m"
ENDC = "\033[0m" ENDC = "\033[0m"
COMPILE = 2
PARSE = 1
NOTREE = 0
argparser = argparse.ArgumentParser( argparser = argparse.ArgumentParser(
prog="test_parse_directory", prog="test_parse_directory",
description="Helper program to test directories or files for pegen", description="Helper program to test directories or files for pegen",
) )
argparser.add_argument("-d", "--directory", help="Directory path containing files to test") argparser.add_argument("-d", "--directory", help="Directory path containing files to test")
argparser.add_argument("--grammar-file", help="Grammar file path")
argparser.add_argument("--tokens-file", help="Tokens file path")
argparser.add_argument( argparser.add_argument(
"-e", "--exclude", action="append", default=[], help="Glob(s) for matching files to exclude" "-e", "--exclude", action="append", default=[], help="Glob(s) for matching files to exclude"
) )
@ -38,9 +40,6 @@ argparser.add_argument(
argparser.add_argument( argparser.add_argument(
"-v", "--verbose", action="store_true", help="Display detailed errors for failures" "-v", "--verbose", action="store_true", help="Display detailed errors for failures"
) )
argparser.add_argument(
"--skip-actions", action="store_true", help="Suppress code emission for rule actions",
)
argparser.add_argument( argparser.add_argument(
"-t", "--tree", action="count", help="Compare parse tree to official AST", default=0 "-t", "--tree", action="count", help="Compare parse tree to official AST", default=0
) )
@ -113,92 +112,35 @@ def compare_trees(
return 1 return 1
def parse_directory( def parse_file(source: str, file: str, mode: int, oldparser: bool) -> Tuple[Any, float]:
directory: str,
grammar_file: str,
tokens_file: str,
verbose: bool,
excluded_files: List[str],
skip_actions: bool,
tree_arg: int,
short: bool,
mode: int,
parser: str,
) -> int:
if parser == "cpython" and (tree_arg or mode == 0):
print("Cannot specify tree argument or mode=0 with the cpython parser.", file=sys.stderr)
return 1
if not directory:
print("You must specify a directory of files to test.", file=sys.stderr)
return 1
if grammar_file and tokens_file:
if not os.path.exists(grammar_file):
print(f"The specified grammar file, {grammar_file}, does not exist.", file=sys.stderr)
return 1
else:
print(
"A grammar file or a tokens file was not provided - attempting to use existing parser from stdlib...\n"
)
if tree_arg:
assert mode == 1, "Mode should be 1 (parse), when comparing the generated trees"
# For a given directory, traverse files and attempt to parse each one
# - Output success/failure for each file
errors = 0
files = []
trees = {} # Trees to compare (after everything else is done)
total_seconds = 0
for file in sorted(glob(f"{directory}/**/*.py", recursive=True)):
# Only attempt to parse Python files and files that are not excluded
should_exclude_file = False
for pattern in excluded_files:
if PurePath(file).match(pattern):
should_exclude_file = True
break
if not should_exclude_file:
with tokenize.open(file) as f:
source = f.read()
try:
t0 = time.time() t0 = time.time()
if mode == 2: if mode == COMPILE:
result = _peg_parser.compile_string( result = _peg_parser.compile_string(
source, source,
filename=file, filename=file,
oldparser=parser == "cpython", oldparser=oldparser,
) )
else: else:
result = _peg_parser.parse_string( result = _peg_parser.parse_string(
source, source,
filename=file, filename=file,
oldparser=parser == "cpython" oldparser=oldparser,
ast=(mode == PARSE),
) )
t1 = time.time() t1 = time.time()
total_seconds += (t1 - t0) return result, t1 - t0
if tree_arg:
trees[file] = result
if not short: def is_parsing_failure(source: str) -> bool:
report_status(succeeded=True, file=file, verbose=verbose)
except Exception as error:
try: try:
_peg_parser.parse_string(source, mode="exec", oldparser=True) _peg_parser.parse_string(source, mode="exec", oldparser=True)
except Exception: except SyntaxError:
if not short: return False
print(f"File {file} cannot be parsed by either pegen or the ast module.") return True
else:
report_status(
succeeded=False, file=file, verbose=verbose, error=error, short=short
)
errors += 1
files.append(file)
t1 = time.time()
def generate_time_stats(files, total_seconds) -> None:
total_files = len(files) total_files = len(files)
total_bytes = 0 total_bytes = 0
total_lines = 0 total_lines = 0
for file in files: for file in files:
@ -217,6 +159,57 @@ def parse_directory(
f"or {total_bytes / total_seconds :,.0f} bytes/sec.", f"or {total_bytes / total_seconds :,.0f} bytes/sec.",
) )
def parse_directory(
directory: str,
verbose: bool,
excluded_files: List[str],
tree_arg: int,
short: bool,
mode: int,
oldparser: bool,
) -> int:
if tree_arg:
assert mode == PARSE, "Mode should be 1 (parse), when comparing the generated trees"
if oldparser and tree_arg:
print("Cannot specify tree argument with the cpython parser.", file=sys.stderr)
return 1
# For a given directory, traverse files and attempt to parse each one
# - Output success/failure for each file
errors = 0
files = []
trees = {} # Trees to compare (after everything else is done)
total_seconds = 0
for file in sorted(glob(f"{directory}/**/*.py", recursive=True)):
# Only attempt to parse Python files and files that are not excluded
if any(PurePath(file).match(pattern) for pattern in excluded_files):
continue
with tokenize.open(file) as f:
source = f.read()
try:
result, dt = parse_file(source, file, mode, oldparser)
total_seconds += dt
if tree_arg:
trees[file] = result
report_status(succeeded=True, file=file, verbose=verbose, short=short)
except SyntaxError as error:
if is_parsing_failure(source):
print(f"File {file} cannot be parsed by either parser.")
else:
report_status(
succeeded=False, file=file, verbose=verbose, error=error, short=short
)
errors += 1
files.append(file)
t1 = time.time()
generate_time_stats(files, total_seconds)
if short: if short:
print_memstats() print_memstats()
@ -240,26 +233,20 @@ def parse_directory(
def main() -> None: def main() -> None:
args = argparser.parse_args() args = argparser.parse_args()
directory = args.directory directory = args.directory
grammar_file = args.grammar_file
tokens_file = args.tokens_file
verbose = args.verbose verbose = args.verbose
excluded_files = args.exclude excluded_files = args.exclude
skip_actions = args.skip_actions
tree = args.tree tree = args.tree
short = args.short short = args.short
mode = 1 if args.tree else 2 mode = 1 if args.tree else 2
sys.exit( sys.exit(
parse_directory( parse_directory(
directory, directory,
grammar_file,
tokens_file,
verbose, verbose,
excluded_files, excluded_files,
skip_actions,
tree, tree,
short, short,
mode, mode,
"pegen", oldparser=False,
) )
) )

View File

@ -57,22 +57,11 @@ def find_dirname(package_name: str) -> str:
def run_tests(dirname: str, tree: int) -> int: def run_tests(dirname: str, tree: int) -> int:
return test_parse_directory.parse_directory( return test_parse_directory.parse_directory(
dirname, dirname,
HERE / ".." / ".." / ".." / "Grammar" / "python.gram",
HERE / ".." / ".." / ".." / "Grammar" / "Tokens",
verbose=False, verbose=False,
excluded_files=[ excluded_files=[],
"*/failset/*",
"*/failset/**",
"*/failset/**/*",
"*/test2to3/*",
"*/test2to3/**/*",
"*/bad*",
"*/lib2to3/tests/data/*",
],
skip_actions=False,
tree_arg=tree, tree_arg=tree,
short=True, short=True,
mode=1, mode=1 if tree else 0,
parser="pegen", parser="pegen",
) )