Refactor scripts in Tools/peg_generator/scripts (GH-20401)
(cherry picked from commit ba6fd87e41
)
Co-authored-by: Lysandros Nikolaou <lisandrosnik@gmail.com>
This commit is contained in:
parent
d5e7348e41
commit
18f1226884
|
@ -80,14 +80,15 @@ _Py_compile_string(PyObject *self, PyObject *args, PyObject *kwds)
|
|||
PyObject *
|
||||
_Py_parse_string(PyObject *self, PyObject *args, PyObject *kwds)
|
||||
{
|
||||
static char *keywords[] = {"string", "filename", "mode", "oldparser", NULL};
|
||||
static char *keywords[] = {"string", "filename", "mode", "oldparser", "ast", NULL};
|
||||
char *the_string;
|
||||
char *filename = "<string>";
|
||||
char *mode_str = "exec";
|
||||
int oldparser = 0;
|
||||
int ast = 1;
|
||||
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|ssp", keywords,
|
||||
&the_string, &filename, &mode_str, &oldparser)) {
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|sspp", keywords,
|
||||
&the_string, &filename, &mode_str, &oldparser, &ast)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@ -110,7 +111,14 @@ _Py_parse_string(PyObject *self, PyObject *args, PyObject *kwds)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
PyObject *result = PyAST_mod2obj(mod);
|
||||
PyObject *result;
|
||||
if (ast) {
|
||||
result = PyAST_mod2obj(mod);
|
||||
}
|
||||
else {
|
||||
Py_INCREF(Py_None);
|
||||
result = Py_None;
|
||||
}
|
||||
PyArena_Free(arena);
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -70,23 +70,21 @@ stats: peg_extension/parse.c data/xxl.py
|
|||
time: time_compile
|
||||
|
||||
time_compile: venv data/xxl.py
|
||||
$(VENVPYTHON) scripts/benchmark.py --parser=pegen --target=xxl compile
|
||||
$(VENVPYTHON) scripts/benchmark.py --parser=new --target=xxl compile
|
||||
|
||||
time_parse: venv data/xxl.py
|
||||
$(VENVPYTHON) scripts/benchmark.py --parser=pegen --target=xxl parse
|
||||
$(VENVPYTHON) scripts/benchmark.py --parser=new --target=xxl parse
|
||||
|
||||
time_old: time_old_compile
|
||||
|
||||
time_old_compile: venv data/xxl.py
|
||||
$(VENVPYTHON) scripts/benchmark.py --parser=cpython --target=xxl compile
|
||||
$(VENVPYTHON) scripts/benchmark.py --parser=old --target=xxl compile
|
||||
|
||||
time_old_parse: venv data/xxl.py
|
||||
$(VENVPYTHON) scripts/benchmark.py --parser=cpython --target=xxl parse
|
||||
$(VENVPYTHON) scripts/benchmark.py --parser=old --target=xxl parse
|
||||
|
||||
time_peg_dir: venv
|
||||
$(VENVPYTHON) scripts/test_parse_directory.py \
|
||||
--grammar-file $(GRAMMAR) \
|
||||
--tokens-file $(TOKENS) \
|
||||
-d $(TESTDIR) \
|
||||
$(TESTFLAGS) \
|
||||
--exclude "*/failset/*" \
|
||||
|
@ -95,12 +93,8 @@ time_peg_dir: venv
|
|||
|
||||
time_stdlib: $(CPYTHON) venv
|
||||
$(VENVPYTHON) scripts/test_parse_directory.py \
|
||||
--grammar-file $(GRAMMAR) \
|
||||
--tokens-file $(TOKENS) \
|
||||
-d $(CPYTHON) \
|
||||
$(TESTFLAGS) \
|
||||
--exclude "*/test2to3/*" \
|
||||
--exclude "*/test2to3/**/*" \
|
||||
--exclude "*/bad*" \
|
||||
--exclude "*/lib2to3/tests/data/*"
|
||||
|
||||
|
|
|
@ -24,7 +24,7 @@ argparser = argparse.ArgumentParser(
|
|||
argparser.add_argument(
|
||||
"--parser",
|
||||
action="store",
|
||||
choices=["pegen", "cpython"],
|
||||
choices=["new", "old"],
|
||||
default="pegen",
|
||||
help="Which parser to benchmark (default is pegen)",
|
||||
)
|
||||
|
@ -40,7 +40,12 @@ subcommands = argparser.add_subparsers(title="Benchmarks", dest="subcommand")
|
|||
command_compile = subcommands.add_parser(
|
||||
"compile", help="Benchmark parsing and compiling to bytecode"
|
||||
)
|
||||
command_parse = subcommands.add_parser("parse", help="Benchmark parsing and generating an ast.AST")
|
||||
command_parse = subcommands.add_parser(
|
||||
"parse", help="Benchmark parsing and generating an ast.AST"
|
||||
)
|
||||
command_notree = subcommands.add_parser(
|
||||
"notree", help="Benchmark parsing and dumping the tree"
|
||||
)
|
||||
|
||||
|
||||
def benchmark(func):
|
||||
|
@ -62,7 +67,7 @@ def benchmark(func):
|
|||
|
||||
@benchmark
|
||||
def time_compile(source, parser):
|
||||
if parser == "cpython":
|
||||
if parser == "old":
|
||||
return _peg_parser.compile_string(
|
||||
source,
|
||||
oldparser=True,
|
||||
|
@ -73,32 +78,40 @@ def time_compile(source, parser):
|
|||
|
||||
@benchmark
|
||||
def time_parse(source, parser):
|
||||
if parser == "cpython":
|
||||
if parser == "old":
|
||||
return _peg_parser.parse_string(source, oldparser=True)
|
||||
else:
|
||||
return _peg_parser.parse_string(source)
|
||||
|
||||
|
||||
@benchmark
|
||||
def time_notree(source, parser):
|
||||
if parser == "old":
|
||||
return _peg_parser.parse_string(source, oldparser=True, ast=False)
|
||||
else:
|
||||
return _peg_parser.parse_string(source, ast=False)
|
||||
|
||||
|
||||
def run_benchmark_xxl(subcommand, parser, source):
|
||||
if subcommand == "compile":
|
||||
time_compile(source, parser)
|
||||
elif subcommand == "parse":
|
||||
time_parse(source, parser)
|
||||
elif subcommand == "notree":
|
||||
time_notree(source, parser)
|
||||
|
||||
|
||||
def run_benchmark_stdlib(subcommand, parser):
|
||||
modes = {"compile": 2, "parse": 1, "notree": 0}
|
||||
for _ in range(3):
|
||||
parse_directory(
|
||||
"../../Lib",
|
||||
"../../Grammar/python.gram",
|
||||
"../../Grammar/Tokens",
|
||||
verbose=False,
|
||||
excluded_files=["*/bad*", "*/lib2to3/tests/data/*",],
|
||||
skip_actions=False,
|
||||
tree_arg=0,
|
||||
short=True,
|
||||
mode=2 if subcommand == "compile" else 1,
|
||||
parser=parser,
|
||||
mode=modes[subcommand],
|
||||
oldparser=(parser == "old"),
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -42,6 +42,13 @@ from pegen.grammar import (
|
|||
)
|
||||
|
||||
argparser = argparse.ArgumentParser(prog="graph_grammar", description="Graph a grammar tree",)
|
||||
argparser.add_argument(
|
||||
"-s",
|
||||
"--start",
|
||||
choices=["exec", "eval", "single"],
|
||||
default="exec",
|
||||
help="Choose the grammar's start rule (exec, eval or single)",
|
||||
)
|
||||
argparser.add_argument("grammar_file", help="The grammar file to graph")
|
||||
|
||||
|
||||
|
@ -91,19 +98,15 @@ def main() -> None:
|
|||
references[name] = set(references_for_item(rule))
|
||||
|
||||
# Flatten the start node if has only a single reference
|
||||
root_node = "start"
|
||||
if start := references["start"]:
|
||||
if len(start) == 1:
|
||||
root_node = list(start)[0]
|
||||
del references["start"]
|
||||
root_node = {"exec": "file", "eval": "eval", "single": "interactive"}[args.start]
|
||||
|
||||
print("digraph g1 {")
|
||||
print('\toverlap="scale";') # Force twopi to scale the graph to avoid overlaps
|
||||
print(f'\troot="{root_node}";')
|
||||
print(f"\t{root_node} [color=green, shape=circle]")
|
||||
print(f"\t{root_node} [color=green, shape=circle];")
|
||||
for name, refs in references.items():
|
||||
if refs: # Ignore empty sets
|
||||
print(f"\t{name} -> {','.join(refs)};")
|
||||
for ref in refs:
|
||||
print(f"\t{name} -> {ref};")
|
||||
print("}")
|
||||
|
||||
|
||||
|
|
|
@ -41,7 +41,13 @@ parser = argparse.ArgumentParser()
|
|||
parser.add_argument(
|
||||
"-d", "--diff", action="store_true", help="show diff between grammar and ast (requires -g)"
|
||||
)
|
||||
parser.add_argument("-g", "--grammar-file", help="grammar to use (default: use the ast module)")
|
||||
parser.add_argument(
|
||||
"-p",
|
||||
"--parser",
|
||||
choices=["new", "old"],
|
||||
default="new",
|
||||
help="choose the parser to use"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-m",
|
||||
"--multiline",
|
||||
|
@ -84,19 +90,18 @@ def print_parse(source: str, verbose: bool = False) -> None:
|
|||
|
||||
def main() -> None:
|
||||
args = parser.parse_args()
|
||||
if args.diff and not args.grammar_file:
|
||||
parser.error("-d/--diff requires -g/--grammar-file")
|
||||
new_parser = args.parser == "new"
|
||||
if args.multiline:
|
||||
sep = "\n"
|
||||
else:
|
||||
sep = " "
|
||||
program = sep.join(args.program)
|
||||
if args.grammar_file:
|
||||
if new_parser:
|
||||
tree = _peg_parser.parse_string(program)
|
||||
|
||||
if args.diff:
|
||||
a = tree
|
||||
b = _peg_parser.parse_string(program, oldparser=True)
|
||||
a = _peg_parser.parse_string(program, oldparser=True)
|
||||
b = tree
|
||||
diff = diff_trees(a, b, args.verbose)
|
||||
if diff:
|
||||
for line in diff:
|
||||
|
@ -104,11 +109,11 @@ def main() -> None:
|
|||
else:
|
||||
print("# Trees are the same")
|
||||
else:
|
||||
print(f"# Parsed using {args.grammar_file}")
|
||||
print("# Parsed using the new parser")
|
||||
print(format_tree(tree, args.verbose))
|
||||
else:
|
||||
tree = _peg_parser.parse_string(program, oldparser=True)
|
||||
print("# Parse using the old parser")
|
||||
print("# Parsed using the old parser")
|
||||
print(format_tree(tree, args.verbose))
|
||||
|
||||
|
||||
|
|
|
@ -11,7 +11,7 @@ import _peg_parser
|
|||
from glob import glob
|
||||
from pathlib import PurePath
|
||||
|
||||
from typing import List, Optional, Any
|
||||
from typing import List, Optional, Any, Tuple
|
||||
|
||||
sys.path.insert(0, os.getcwd())
|
||||
from pegen.ast_dump import ast_dump
|
||||
|
@ -22,13 +22,15 @@ SUCCESS = "\033[92m"
|
|||
FAIL = "\033[91m"
|
||||
ENDC = "\033[0m"
|
||||
|
||||
COMPILE = 2
|
||||
PARSE = 1
|
||||
NOTREE = 0
|
||||
|
||||
argparser = argparse.ArgumentParser(
|
||||
prog="test_parse_directory",
|
||||
description="Helper program to test directories or files for pegen",
|
||||
)
|
||||
argparser.add_argument("-d", "--directory", help="Directory path containing files to test")
|
||||
argparser.add_argument("--grammar-file", help="Grammar file path")
|
||||
argparser.add_argument("--tokens-file", help="Tokens file path")
|
||||
argparser.add_argument(
|
||||
"-e", "--exclude", action="append", default=[], help="Glob(s) for matching files to exclude"
|
||||
)
|
||||
|
@ -38,9 +40,6 @@ argparser.add_argument(
|
|||
argparser.add_argument(
|
||||
"-v", "--verbose", action="store_true", help="Display detailed errors for failures"
|
||||
)
|
||||
argparser.add_argument(
|
||||
"--skip-actions", action="store_true", help="Suppress code emission for rule actions",
|
||||
)
|
||||
argparser.add_argument(
|
||||
"-t", "--tree", action="count", help="Compare parse tree to official AST", default=0
|
||||
)
|
||||
|
@ -113,92 +112,35 @@ def compare_trees(
|
|||
return 1
|
||||
|
||||
|
||||
def parse_directory(
|
||||
directory: str,
|
||||
grammar_file: str,
|
||||
tokens_file: str,
|
||||
verbose: bool,
|
||||
excluded_files: List[str],
|
||||
skip_actions: bool,
|
||||
tree_arg: int,
|
||||
short: bool,
|
||||
mode: int,
|
||||
parser: str,
|
||||
) -> int:
|
||||
if parser == "cpython" and (tree_arg or mode == 0):
|
||||
print("Cannot specify tree argument or mode=0 with the cpython parser.", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if not directory:
|
||||
print("You must specify a directory of files to test.", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if grammar_file and tokens_file:
|
||||
if not os.path.exists(grammar_file):
|
||||
print(f"The specified grammar file, {grammar_file}, does not exist.", file=sys.stderr)
|
||||
return 1
|
||||
else:
|
||||
print(
|
||||
"A grammar file or a tokens file was not provided - attempting to use existing parser from stdlib...\n"
|
||||
def parse_file(source: str, file: str, mode: int, oldparser: bool) -> Tuple[Any, float]:
|
||||
t0 = time.time()
|
||||
if mode == COMPILE:
|
||||
result = _peg_parser.compile_string(
|
||||
source,
|
||||
filename=file,
|
||||
oldparser=oldparser,
|
||||
)
|
||||
else:
|
||||
result = _peg_parser.parse_string(
|
||||
source,
|
||||
filename=file,
|
||||
oldparser=oldparser,
|
||||
ast=(mode == PARSE),
|
||||
)
|
||||
|
||||
if tree_arg:
|
||||
assert mode == 1, "Mode should be 1 (parse), when comparing the generated trees"
|
||||
|
||||
# For a given directory, traverse files and attempt to parse each one
|
||||
# - Output success/failure for each file
|
||||
errors = 0
|
||||
files = []
|
||||
trees = {} # Trees to compare (after everything else is done)
|
||||
total_seconds = 0
|
||||
|
||||
for file in sorted(glob(f"{directory}/**/*.py", recursive=True)):
|
||||
# Only attempt to parse Python files and files that are not excluded
|
||||
should_exclude_file = False
|
||||
for pattern in excluded_files:
|
||||
if PurePath(file).match(pattern):
|
||||
should_exclude_file = True
|
||||
break
|
||||
|
||||
if not should_exclude_file:
|
||||
with tokenize.open(file) as f:
|
||||
source = f.read()
|
||||
try:
|
||||
t0 = time.time()
|
||||
if mode == 2:
|
||||
result = _peg_parser.compile_string(
|
||||
source,
|
||||
filename=file,
|
||||
oldparser=parser == "cpython",
|
||||
)
|
||||
else:
|
||||
result = _peg_parser.parse_string(
|
||||
source,
|
||||
filename=file,
|
||||
oldparser=parser == "cpython"
|
||||
)
|
||||
t1 = time.time()
|
||||
total_seconds += (t1 - t0)
|
||||
if tree_arg:
|
||||
trees[file] = result
|
||||
if not short:
|
||||
report_status(succeeded=True, file=file, verbose=verbose)
|
||||
except Exception as error:
|
||||
try:
|
||||
_peg_parser.parse_string(source, mode="exec", oldparser=True)
|
||||
except Exception:
|
||||
if not short:
|
||||
print(f"File {file} cannot be parsed by either pegen or the ast module.")
|
||||
else:
|
||||
report_status(
|
||||
succeeded=False, file=file, verbose=verbose, error=error, short=short
|
||||
)
|
||||
errors += 1
|
||||
files.append(file)
|
||||
t1 = time.time()
|
||||
return result, t1 - t0
|
||||
|
||||
|
||||
def is_parsing_failure(source: str) -> bool:
|
||||
try:
|
||||
_peg_parser.parse_string(source, mode="exec", oldparser=True)
|
||||
except SyntaxError:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def generate_time_stats(files, total_seconds) -> None:
|
||||
total_files = len(files)
|
||||
|
||||
total_bytes = 0
|
||||
total_lines = 0
|
||||
for file in files:
|
||||
|
@ -217,6 +159,57 @@ def parse_directory(
|
|||
f"or {total_bytes / total_seconds :,.0f} bytes/sec.",
|
||||
)
|
||||
|
||||
|
||||
def parse_directory(
|
||||
directory: str,
|
||||
verbose: bool,
|
||||
excluded_files: List[str],
|
||||
tree_arg: int,
|
||||
short: bool,
|
||||
mode: int,
|
||||
oldparser: bool,
|
||||
) -> int:
|
||||
if tree_arg:
|
||||
assert mode == PARSE, "Mode should be 1 (parse), when comparing the generated trees"
|
||||
|
||||
if oldparser and tree_arg:
|
||||
print("Cannot specify tree argument with the cpython parser.", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# For a given directory, traverse files and attempt to parse each one
|
||||
# - Output success/failure for each file
|
||||
errors = 0
|
||||
files = []
|
||||
trees = {} # Trees to compare (after everything else is done)
|
||||
total_seconds = 0
|
||||
|
||||
for file in sorted(glob(f"{directory}/**/*.py", recursive=True)):
|
||||
# Only attempt to parse Python files and files that are not excluded
|
||||
if any(PurePath(file).match(pattern) for pattern in excluded_files):
|
||||
continue
|
||||
|
||||
with tokenize.open(file) as f:
|
||||
source = f.read()
|
||||
|
||||
try:
|
||||
result, dt = parse_file(source, file, mode, oldparser)
|
||||
total_seconds += dt
|
||||
if tree_arg:
|
||||
trees[file] = result
|
||||
report_status(succeeded=True, file=file, verbose=verbose, short=short)
|
||||
except SyntaxError as error:
|
||||
if is_parsing_failure(source):
|
||||
print(f"File {file} cannot be parsed by either parser.")
|
||||
else:
|
||||
report_status(
|
||||
succeeded=False, file=file, verbose=verbose, error=error, short=short
|
||||
)
|
||||
errors += 1
|
||||
files.append(file)
|
||||
|
||||
t1 = time.time()
|
||||
|
||||
generate_time_stats(files, total_seconds)
|
||||
if short:
|
||||
print_memstats()
|
||||
|
||||
|
@ -240,26 +233,20 @@ def parse_directory(
|
|||
def main() -> None:
|
||||
args = argparser.parse_args()
|
||||
directory = args.directory
|
||||
grammar_file = args.grammar_file
|
||||
tokens_file = args.tokens_file
|
||||
verbose = args.verbose
|
||||
excluded_files = args.exclude
|
||||
skip_actions = args.skip_actions
|
||||
tree = args.tree
|
||||
short = args.short
|
||||
mode = 1 if args.tree else 2
|
||||
sys.exit(
|
||||
parse_directory(
|
||||
directory,
|
||||
grammar_file,
|
||||
tokens_file,
|
||||
verbose,
|
||||
excluded_files,
|
||||
skip_actions,
|
||||
tree,
|
||||
short,
|
||||
mode,
|
||||
"pegen",
|
||||
oldparser=False,
|
||||
)
|
||||
)
|
||||
|
||||
|
|
|
@ -57,22 +57,11 @@ def find_dirname(package_name: str) -> str:
|
|||
def run_tests(dirname: str, tree: int) -> int:
|
||||
return test_parse_directory.parse_directory(
|
||||
dirname,
|
||||
HERE / ".." / ".." / ".." / "Grammar" / "python.gram",
|
||||
HERE / ".." / ".." / ".." / "Grammar" / "Tokens",
|
||||
verbose=False,
|
||||
excluded_files=[
|
||||
"*/failset/*",
|
||||
"*/failset/**",
|
||||
"*/failset/**/*",
|
||||
"*/test2to3/*",
|
||||
"*/test2to3/**/*",
|
||||
"*/bad*",
|
||||
"*/lib2to3/tests/data/*",
|
||||
],
|
||||
skip_actions=False,
|
||||
excluded_files=[],
|
||||
tree_arg=tree,
|
||||
short=True,
|
||||
mode=1,
|
||||
mode=1 if tree else 0,
|
||||
parser="pegen",
|
||||
)
|
||||
|
||||
|
|
Loading…
Reference in New Issue