bpo-40939: Clean and adapt the peg_generator directory after deleting the old parser (GH-20822)
This commit is contained in:
parent
b4282dd150
commit
756180b4bf
|
@ -22,7 +22,7 @@ data/xxl.py:
|
|||
|
||||
build: peg_extension/parse.c
|
||||
|
||||
peg_extension/parse.c: $(GRAMMAR) $(TOKENS) pegen/*.py peg_extension/peg_extension.c ../../Parser/pegen/pegen.c ../../Parser/pegen/parse_string.c ../../Parser/pegen/*.h pegen/grammar_parser.py
|
||||
peg_extension/parse.c: $(GRAMMAR) $(TOKENS) pegen/*.py peg_extension/peg_extension.c ../../Parser/pegen.c ../../Parser/string_parser.c ../../Parser/*.h pegen/grammar_parser.py
|
||||
$(PYTHON) -m pegen -q c $(GRAMMAR) $(TOKENS) -o peg_extension/parse.c --compile-extension
|
||||
|
||||
clean:
|
||||
|
@ -70,18 +70,10 @@ stats: peg_extension/parse.c data/xxl.py
|
|||
time: time_compile
|
||||
|
||||
time_compile: venv data/xxl.py
|
||||
$(VENVPYTHON) scripts/benchmark.py --parser=new --target=xxl compile
|
||||
$(VENVPYTHON) scripts/benchmark.py --target=xxl compile
|
||||
|
||||
time_parse: venv data/xxl.py
|
||||
$(VENVPYTHON) scripts/benchmark.py --parser=new --target=xxl parse
|
||||
|
||||
time_old: time_old_compile
|
||||
|
||||
time_old_compile: venv data/xxl.py
|
||||
$(VENVPYTHON) scripts/benchmark.py --parser=old --target=xxl compile
|
||||
|
||||
time_old_parse: venv data/xxl.py
|
||||
$(VENVPYTHON) scripts/benchmark.py --parser=old --target=xxl parse
|
||||
$(VENVPYTHON) scripts/benchmark.py --target=xxl parse
|
||||
|
||||
time_peg_dir: venv
|
||||
$(VENVPYTHON) scripts/test_parse_directory.py \
|
||||
|
|
|
@ -41,9 +41,7 @@ def main():
|
|||
"grammar", type=str, help="The file with the grammar definition in PEG format"
|
||||
)
|
||||
parser.add_argument(
|
||||
"tokens_file",
|
||||
type=argparse.FileType("r"),
|
||||
help="The file with the token definitions"
|
||||
"tokens_file", type=argparse.FileType("r"), help="The file with the token definitions"
|
||||
)
|
||||
parser.add_argument(
|
||||
"keyword_file",
|
||||
|
@ -61,9 +59,7 @@ def main():
|
|||
gen.collect_todo()
|
||||
|
||||
with args.keyword_file as thefile:
|
||||
all_keywords = sorted(
|
||||
list(gen.callmakervisitor.keyword_cache.keys()) + EXTRA_KEYWORDS
|
||||
)
|
||||
all_keywords = sorted(list(gen.callmakervisitor.keyword_cache.keys()) + EXTRA_KEYWORDS)
|
||||
|
||||
keywords = ",\n ".join(map(repr, all_keywords))
|
||||
thefile.write(TEMPLATE.format(keywords=keywords))
|
||||
|
|
|
@ -6,13 +6,13 @@ import sys
|
|||
import os
|
||||
from time import time
|
||||
|
||||
import _peg_parser
|
||||
|
||||
try:
|
||||
import memory_profiler
|
||||
except ModuleNotFoundError:
|
||||
print("Please run `make venv` to create a virtual environment and install"
|
||||
" all the dependencies, before running this script.")
|
||||
print(
|
||||
"Please run `make venv` to create a virtual environment and install"
|
||||
" all the dependencies, before running this script."
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
sys.path.insert(0, os.getcwd())
|
||||
|
@ -21,13 +21,6 @@ from scripts.test_parse_directory import parse_directory
|
|||
argparser = argparse.ArgumentParser(
|
||||
prog="benchmark", description="Reproduce the various pegen benchmarks"
|
||||
)
|
||||
argparser.add_argument(
|
||||
"--parser",
|
||||
action="store",
|
||||
choices=["new", "old"],
|
||||
default="pegen",
|
||||
help="Which parser to benchmark (default is pegen)",
|
||||
)
|
||||
argparser.add_argument(
|
||||
"--target",
|
||||
action="store",
|
||||
|
@ -40,12 +33,7 @@ subcommands = argparser.add_subparsers(title="Benchmarks", dest="subcommand")
|
|||
command_compile = subcommands.add_parser(
|
||||
"compile", help="Benchmark parsing and compiling to bytecode"
|
||||
)
|
||||
command_parse = subcommands.add_parser(
|
||||
"parse", help="Benchmark parsing and generating an ast.AST"
|
||||
)
|
||||
command_notree = subcommands.add_parser(
|
||||
"notree", help="Benchmark parsing and dumping the tree"
|
||||
)
|
||||
command_parse = subcommands.add_parser("parse", help="Benchmark parsing and generating an ast.AST")
|
||||
|
||||
|
||||
def benchmark(func):
|
||||
|
@ -66,59 +54,37 @@ def benchmark(func):
|
|||
|
||||
|
||||
@benchmark
|
||||
def time_compile(source, parser):
|
||||
if parser == "old":
|
||||
return _peg_parser.compile_string(
|
||||
source,
|
||||
oldparser=True,
|
||||
)
|
||||
else:
|
||||
return _peg_parser.compile_string(source)
|
||||
def time_compile(source):
|
||||
return compile(source, "<string>", "exec")
|
||||
|
||||
|
||||
@benchmark
|
||||
def time_parse(source, parser):
|
||||
if parser == "old":
|
||||
return _peg_parser.parse_string(source, oldparser=True)
|
||||
else:
|
||||
return _peg_parser.parse_string(source)
|
||||
def time_parse(source):
|
||||
return ast.parse(source)
|
||||
|
||||
|
||||
@benchmark
|
||||
def time_notree(source, parser):
|
||||
if parser == "old":
|
||||
return _peg_parser.parse_string(source, oldparser=True, ast=False)
|
||||
else:
|
||||
return _peg_parser.parse_string(source, ast=False)
|
||||
|
||||
|
||||
def run_benchmark_xxl(subcommand, parser, source):
|
||||
def run_benchmark_xxl(subcommand, source):
|
||||
if subcommand == "compile":
|
||||
time_compile(source, parser)
|
||||
time_compile(source)
|
||||
elif subcommand == "parse":
|
||||
time_parse(source, parser)
|
||||
elif subcommand == "notree":
|
||||
time_notree(source, parser)
|
||||
time_parse(source)
|
||||
|
||||
|
||||
def run_benchmark_stdlib(subcommand, parser):
|
||||
modes = {"compile": 2, "parse": 1, "notree": 0}
|
||||
def run_benchmark_stdlib(subcommand):
|
||||
modes = {"compile": 2, "parse": 1}
|
||||
for _ in range(3):
|
||||
parse_directory(
|
||||
"../../Lib",
|
||||
verbose=False,
|
||||
excluded_files=["*/bad*", "*/lib2to3/tests/data/*",],
|
||||
tree_arg=0,
|
||||
short=True,
|
||||
mode=modes[subcommand],
|
||||
oldparser=(parser == "old"),
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
args = argparser.parse_args()
|
||||
subcommand = args.subcommand
|
||||
parser = args.parser
|
||||
target = args.target
|
||||
|
||||
if subcommand is None:
|
||||
|
@ -127,9 +93,9 @@ def main():
|
|||
if target == "xxl":
|
||||
with open(os.path.join("data", "xxl.py"), "r") as f:
|
||||
source = f.read()
|
||||
run_benchmark_xxl(subcommand, parser, source)
|
||||
run_benchmark_xxl(subcommand, source)
|
||||
elif target == "stdlib":
|
||||
run_benchmark_stdlib(subcommand, parser)
|
||||
run_benchmark_stdlib(subcommand)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
@ -14,8 +14,7 @@ INITIAL_NESTING_DEPTH, or NESTED_INCR_AMT variables.
|
|||
Usage: python -m scripts.find_max_nesting
|
||||
"""
|
||||
import sys
|
||||
|
||||
from _peg_parser import parse_string
|
||||
import ast
|
||||
|
||||
GRAMMAR_FILE = "data/python.gram"
|
||||
INITIAL_NESTING_DEPTH = 10
|
||||
|
@ -28,9 +27,8 @@ ENDC = "\033[0m"
|
|||
|
||||
def check_nested_expr(nesting_depth: int) -> bool:
|
||||
expr = f"{'(' * nesting_depth}0{')' * nesting_depth}"
|
||||
|
||||
try:
|
||||
parse_string(expr)
|
||||
ast.parse(expr)
|
||||
print(f"Nesting depth of {nesting_depth} is successful")
|
||||
return True
|
||||
except Exception as err:
|
||||
|
|
|
@ -1,121 +0,0 @@
|
|||
#!/usr/bin/env python3.8
|
||||
|
||||
"""Show the parse tree for a given program, nicely formatted.
|
||||
|
||||
Example:
|
||||
|
||||
$ scripts/show_parse.py a+b
|
||||
Module(
|
||||
body=[
|
||||
Expr(
|
||||
value=BinOp(
|
||||
left=Name(id="a", ctx=Load()), op=Add(), right=Name(id="b", ctx=Load())
|
||||
)
|
||||
)
|
||||
],
|
||||
type_ignores=[],
|
||||
)
|
||||
$
|
||||
|
||||
Use -v to show line numbers and column offsets.
|
||||
|
||||
The formatting is done using black. You can also import this module
|
||||
and call one of its functions.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import ast
|
||||
import difflib
|
||||
import os
|
||||
import sys
|
||||
import tempfile
|
||||
|
||||
import _peg_parser
|
||||
|
||||
from typing import List
|
||||
|
||||
sys.path.insert(0, os.getcwd())
|
||||
from pegen.ast_dump import ast_dump
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"-d", "--diff", action="store_true", help="show diff between grammar and ast (requires -g)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-p",
|
||||
"--parser",
|
||||
choices=["new", "old"],
|
||||
default="new",
|
||||
help="choose the parser to use"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-m",
|
||||
"--multiline",
|
||||
action="store_true",
|
||||
help="concatenate program arguments using newline instead of space",
|
||||
)
|
||||
parser.add_argument("-v", "--verbose", action="store_true", help="show line/column numbers")
|
||||
parser.add_argument("program", nargs="+", help="program to parse (will be concatenated)")
|
||||
|
||||
|
||||
def format_tree(tree: ast.AST, verbose: bool = False) -> str:
|
||||
with tempfile.NamedTemporaryFile("w+") as tf:
|
||||
tf.write(ast_dump(tree, include_attributes=verbose))
|
||||
tf.write("\n")
|
||||
tf.flush()
|
||||
cmd = f"black -q {tf.name}"
|
||||
sts = os.system(cmd)
|
||||
if sts:
|
||||
raise RuntimeError(f"Command {cmd!r} failed with status 0x{sts:x}")
|
||||
tf.seek(0)
|
||||
return tf.read()
|
||||
|
||||
|
||||
def diff_trees(a: ast.AST, b: ast.AST, verbose: bool = False) -> List[str]:
|
||||
sa = format_tree(a, verbose)
|
||||
sb = format_tree(b, verbose)
|
||||
la = sa.splitlines()
|
||||
lb = sb.splitlines()
|
||||
return list(difflib.unified_diff(la, lb, "a", "b", lineterm=""))
|
||||
|
||||
|
||||
def show_parse(source: str, verbose: bool = False) -> str:
|
||||
tree = _peg_parser.parse_string(source, oldparser=True)
|
||||
return format_tree(tree, verbose).rstrip("\n")
|
||||
|
||||
|
||||
def print_parse(source: str, verbose: bool = False) -> None:
|
||||
print(show_parse(source, verbose))
|
||||
|
||||
|
||||
def main() -> None:
|
||||
args = parser.parse_args()
|
||||
new_parser = args.parser == "new"
|
||||
if args.multiline:
|
||||
sep = "\n"
|
||||
else:
|
||||
sep = " "
|
||||
program = sep.join(args.program)
|
||||
if new_parser:
|
||||
tree = _peg_parser.parse_string(program)
|
||||
|
||||
if args.diff:
|
||||
a = _peg_parser.parse_string(program, oldparser=True)
|
||||
b = tree
|
||||
diff = diff_trees(a, b, args.verbose)
|
||||
if diff:
|
||||
for line in diff:
|
||||
print(line)
|
||||
else:
|
||||
print("# Trees are the same")
|
||||
else:
|
||||
print("# Parsed using the new parser")
|
||||
print(format_tree(tree, args.verbose))
|
||||
else:
|
||||
tree = _peg_parser.parse_string(program, oldparser=True)
|
||||
print("# Parsed using the old parser")
|
||||
print(format_tree(tree, args.verbose))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -7,7 +7,6 @@ import sys
|
|||
import time
|
||||
import traceback
|
||||
import tokenize
|
||||
import _peg_parser
|
||||
from glob import glob
|
||||
from pathlib import PurePath
|
||||
|
||||
|
@ -16,7 +15,6 @@ from typing import List, Optional, Any, Tuple
|
|||
sys.path.insert(0, os.getcwd())
|
||||
from pegen.ast_dump import ast_dump
|
||||
from pegen.testutil import print_memstats
|
||||
from scripts import show_parse
|
||||
|
||||
SUCCESS = "\033[92m"
|
||||
FAIL = "\033[91m"
|
||||
|
@ -40,9 +38,6 @@ argparser.add_argument(
|
|||
argparser.add_argument(
|
||||
"-v", "--verbose", action="store_true", help="Display detailed errors for failures"
|
||||
)
|
||||
argparser.add_argument(
|
||||
"-t", "--tree", action="count", help="Compare parse tree to official AST", default=0
|
||||
)
|
||||
|
||||
|
||||
def report_status(
|
||||
|
@ -79,66 +74,13 @@ def report_status(
|
|||
print(f" {str(error.__class__.__name__)}: {error}")
|
||||
|
||||
|
||||
def compare_trees(
|
||||
actual_tree: ast.AST, file: str, verbose: bool, include_attributes: bool = False,
|
||||
) -> int:
|
||||
with open(file) as f:
|
||||
expected_tree = _peg_parser.parse_string(f.read(), oldparser=True)
|
||||
|
||||
expected_text = ast_dump(expected_tree, include_attributes=include_attributes)
|
||||
actual_text = ast_dump(actual_tree, include_attributes=include_attributes)
|
||||
if actual_text == expected_text:
|
||||
if verbose:
|
||||
print("Tree for {file}:")
|
||||
print(show_parse.format_tree(actual_tree, include_attributes))
|
||||
return 0
|
||||
|
||||
print(f"Diffing ASTs for {file} ...")
|
||||
|
||||
expected = show_parse.format_tree(expected_tree, include_attributes)
|
||||
actual = show_parse.format_tree(actual_tree, include_attributes)
|
||||
|
||||
if verbose:
|
||||
print("Expected for {file}:")
|
||||
print(expected)
|
||||
print("Actual for {file}:")
|
||||
print(actual)
|
||||
print(f"Diff for {file}:")
|
||||
|
||||
diff = show_parse.diff_trees(expected_tree, actual_tree, include_attributes)
|
||||
for line in diff:
|
||||
print(line)
|
||||
|
||||
return 1
|
||||
|
||||
|
||||
def parse_file(source: str, file: str, mode: int, oldparser: bool) -> Tuple[Any, float]:
|
||||
def parse_file(source: str, file: str) -> Tuple[Any, float]:
|
||||
t0 = time.time()
|
||||
if mode == COMPILE:
|
||||
result = _peg_parser.compile_string(
|
||||
source,
|
||||
filename=file,
|
||||
oldparser=oldparser,
|
||||
)
|
||||
else:
|
||||
result = _peg_parser.parse_string(
|
||||
source,
|
||||
filename=file,
|
||||
oldparser=oldparser,
|
||||
ast=(mode == PARSE),
|
||||
)
|
||||
result = ast.parse(source, filename=file)
|
||||
t1 = time.time()
|
||||
return result, t1 - t0
|
||||
|
||||
|
||||
def is_parsing_failure(source: str) -> bool:
|
||||
try:
|
||||
_peg_parser.parse_string(source, mode="exec", oldparser=True)
|
||||
except SyntaxError:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def generate_time_stats(files, total_seconds) -> None:
|
||||
total_files = len(files)
|
||||
total_bytes = 0
|
||||
|
@ -160,27 +102,11 @@ def generate_time_stats(files, total_seconds) -> None:
|
|||
)
|
||||
|
||||
|
||||
def parse_directory(
|
||||
directory: str,
|
||||
verbose: bool,
|
||||
excluded_files: List[str],
|
||||
tree_arg: int,
|
||||
short: bool,
|
||||
mode: int,
|
||||
oldparser: bool,
|
||||
) -> int:
|
||||
if tree_arg:
|
||||
assert mode == PARSE, "Mode should be 1 (parse), when comparing the generated trees"
|
||||
|
||||
if oldparser and tree_arg:
|
||||
print("Cannot specify tree argument with the cpython parser.", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
def parse_directory(directory: str, verbose: bool, excluded_files: List[str], short: bool) -> int:
|
||||
# For a given directory, traverse files and attempt to parse each one
|
||||
# - Output success/failure for each file
|
||||
errors = 0
|
||||
files = []
|
||||
trees = {} # Trees to compare (after everything else is done)
|
||||
total_seconds = 0
|
||||
|
||||
for file in sorted(glob(f"{directory}/**/*.py", recursive=True)):
|
||||
|
@ -192,39 +118,20 @@ def parse_directory(
|
|||
source = f.read()
|
||||
|
||||
try:
|
||||
result, dt = parse_file(source, file, mode, oldparser)
|
||||
result, dt = parse_file(source, file)
|
||||
total_seconds += dt
|
||||
if tree_arg:
|
||||
trees[file] = result
|
||||
report_status(succeeded=True, file=file, verbose=verbose, short=short)
|
||||
except SyntaxError as error:
|
||||
if is_parsing_failure(source):
|
||||
print(f"File {file} cannot be parsed by either parser.")
|
||||
else:
|
||||
report_status(
|
||||
succeeded=False, file=file, verbose=verbose, error=error, short=short
|
||||
)
|
||||
report_status(succeeded=False, file=file, verbose=verbose, error=error, short=short)
|
||||
errors += 1
|
||||
files.append(file)
|
||||
|
||||
t1 = time.time()
|
||||
|
||||
generate_time_stats(files, total_seconds)
|
||||
if short:
|
||||
print_memstats()
|
||||
|
||||
if errors:
|
||||
print(f"Encountered {errors} failures.", file=sys.stderr)
|
||||
|
||||
# Compare trees (the dict is empty unless -t is given)
|
||||
compare_trees_errors = 0
|
||||
for file, tree in trees.items():
|
||||
if not short:
|
||||
print("Comparing ASTs for", file)
|
||||
if compare_trees(tree, file, verbose, tree_arg >= 2) == 1:
|
||||
compare_trees_errors += 1
|
||||
|
||||
if errors or compare_trees_errors:
|
||||
return 1
|
||||
|
||||
return 0
|
||||
|
@ -235,20 +142,8 @@ def main() -> None:
|
|||
directory = args.directory
|
||||
verbose = args.verbose
|
||||
excluded_files = args.exclude
|
||||
tree = args.tree
|
||||
short = args.short
|
||||
mode = 1 if args.tree else 2
|
||||
sys.exit(
|
||||
parse_directory(
|
||||
directory,
|
||||
verbose,
|
||||
excluded_files,
|
||||
tree,
|
||||
short,
|
||||
mode,
|
||||
oldparser=False,
|
||||
)
|
||||
)
|
||||
sys.exit(parse_directory(directory, verbose, excluded_files, short))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
Loading…
Reference in New Issue