bpo-40939: Clean and adapt the peg_generator directory after deleting the old parser (GH-20822)

This commit is contained in:
Pablo Galindo 2020-06-12 01:55:35 +01:00 committed by GitHub
parent b4282dd150
commit 756180b4bf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 30 additions and 304 deletions

View File

@ -22,7 +22,7 @@ data/xxl.py:
build: peg_extension/parse.c
peg_extension/parse.c: $(GRAMMAR) $(TOKENS) pegen/*.py peg_extension/peg_extension.c ../../Parser/pegen/pegen.c ../../Parser/pegen/parse_string.c ../../Parser/pegen/*.h pegen/grammar_parser.py
peg_extension/parse.c: $(GRAMMAR) $(TOKENS) pegen/*.py peg_extension/peg_extension.c ../../Parser/pegen.c ../../Parser/string_parser.c ../../Parser/*.h pegen/grammar_parser.py
$(PYTHON) -m pegen -q c $(GRAMMAR) $(TOKENS) -o peg_extension/parse.c --compile-extension
clean:
@ -70,18 +70,10 @@ stats: peg_extension/parse.c data/xxl.py
time: time_compile
time_compile: venv data/xxl.py
$(VENVPYTHON) scripts/benchmark.py --parser=new --target=xxl compile
$(VENVPYTHON) scripts/benchmark.py --target=xxl compile
time_parse: venv data/xxl.py
$(VENVPYTHON) scripts/benchmark.py --parser=new --target=xxl parse
time_old: time_old_compile
time_old_compile: venv data/xxl.py
$(VENVPYTHON) scripts/benchmark.py --parser=old --target=xxl compile
time_old_parse: venv data/xxl.py
$(VENVPYTHON) scripts/benchmark.py --parser=old --target=xxl parse
$(VENVPYTHON) scripts/benchmark.py --target=xxl parse
time_peg_dir: venv
$(VENVPYTHON) scripts/test_parse_directory.py \

View File

@ -41,9 +41,7 @@ def main():
"grammar", type=str, help="The file with the grammar definition in PEG format"
)
parser.add_argument(
"tokens_file",
type=argparse.FileType("r"),
help="The file with the token definitions"
"tokens_file", type=argparse.FileType("r"), help="The file with the token definitions"
)
parser.add_argument(
"keyword_file",
@ -61,9 +59,7 @@ def main():
gen.collect_todo()
with args.keyword_file as thefile:
all_keywords = sorted(
list(gen.callmakervisitor.keyword_cache.keys()) + EXTRA_KEYWORDS
)
all_keywords = sorted(list(gen.callmakervisitor.keyword_cache.keys()) + EXTRA_KEYWORDS)
keywords = ",\n ".join(map(repr, all_keywords))
thefile.write(TEMPLATE.format(keywords=keywords))

View File

@ -6,13 +6,13 @@ import sys
import os
from time import time
import _peg_parser
try:
import memory_profiler
except ModuleNotFoundError:
print("Please run `make venv` to create a virtual environment and install"
" all the dependencies, before running this script.")
print(
"Please run `make venv` to create a virtual environment and install"
" all the dependencies, before running this script."
)
sys.exit(1)
sys.path.insert(0, os.getcwd())
@ -21,13 +21,6 @@ from scripts.test_parse_directory import parse_directory
argparser = argparse.ArgumentParser(
prog="benchmark", description="Reproduce the various pegen benchmarks"
)
argparser.add_argument(
"--parser",
action="store",
choices=["new", "old"],
default="pegen",
help="Which parser to benchmark (default is pegen)",
)
argparser.add_argument(
"--target",
action="store",
@ -40,12 +33,7 @@ subcommands = argparser.add_subparsers(title="Benchmarks", dest="subcommand")
command_compile = subcommands.add_parser(
"compile", help="Benchmark parsing and compiling to bytecode"
)
command_parse = subcommands.add_parser(
"parse", help="Benchmark parsing and generating an ast.AST"
)
command_notree = subcommands.add_parser(
"notree", help="Benchmark parsing and dumping the tree"
)
command_parse = subcommands.add_parser("parse", help="Benchmark parsing and generating an ast.AST")
def benchmark(func):
@ -66,59 +54,37 @@ def benchmark(func):
@benchmark
def time_compile(source, parser):
if parser == "old":
return _peg_parser.compile_string(
source,
oldparser=True,
)
else:
return _peg_parser.compile_string(source)
def time_compile(source):
return compile(source, "<string>", "exec")
@benchmark
def time_parse(source, parser):
if parser == "old":
return _peg_parser.parse_string(source, oldparser=True)
else:
return _peg_parser.parse_string(source)
def time_parse(source):
return ast.parse(source)
@benchmark
def time_notree(source, parser):
if parser == "old":
return _peg_parser.parse_string(source, oldparser=True, ast=False)
else:
return _peg_parser.parse_string(source, ast=False)
def run_benchmark_xxl(subcommand, parser, source):
def run_benchmark_xxl(subcommand, source):
if subcommand == "compile":
time_compile(source, parser)
time_compile(source)
elif subcommand == "parse":
time_parse(source, parser)
elif subcommand == "notree":
time_notree(source, parser)
time_parse(source)
def run_benchmark_stdlib(subcommand, parser):
modes = {"compile": 2, "parse": 1, "notree": 0}
def run_benchmark_stdlib(subcommand):
modes = {"compile": 2, "parse": 1}
for _ in range(3):
parse_directory(
"../../Lib",
verbose=False,
excluded_files=["*/bad*", "*/lib2to3/tests/data/*",],
tree_arg=0,
short=True,
mode=modes[subcommand],
oldparser=(parser == "old"),
)
def main():
args = argparser.parse_args()
subcommand = args.subcommand
parser = args.parser
target = args.target
if subcommand is None:
@ -127,9 +93,9 @@ def main():
if target == "xxl":
with open(os.path.join("data", "xxl.py"), "r") as f:
source = f.read()
run_benchmark_xxl(subcommand, parser, source)
run_benchmark_xxl(subcommand, source)
elif target == "stdlib":
run_benchmark_stdlib(subcommand, parser)
run_benchmark_stdlib(subcommand)
if __name__ == "__main__":

View File

@ -14,8 +14,7 @@ INITIAL_NESTING_DEPTH, or NESTED_INCR_AMT variables.
Usage: python -m scripts.find_max_nesting
"""
import sys
from _peg_parser import parse_string
import ast
GRAMMAR_FILE = "data/python.gram"
INITIAL_NESTING_DEPTH = 10
@ -28,9 +27,8 @@ ENDC = "\033[0m"
def check_nested_expr(nesting_depth: int) -> bool:
expr = f"{'(' * nesting_depth}0{')' * nesting_depth}"
try:
parse_string(expr)
ast.parse(expr)
print(f"Nesting depth of {nesting_depth} is successful")
return True
except Exception as err:

View File

@ -1,121 +0,0 @@
#!/usr/bin/env python3.8
"""Show the parse tree for a given program, nicely formatted.
Example:
$ scripts/show_parse.py a+b
Module(
body=[
Expr(
value=BinOp(
left=Name(id="a", ctx=Load()), op=Add(), right=Name(id="b", ctx=Load())
)
)
],
type_ignores=[],
)
$
Use -v to show line numbers and column offsets.
The formatting is done using black. You can also import this module
and call one of its functions.
"""
import argparse
import ast
import difflib
import os
import sys
import tempfile
import _peg_parser
from typing import List
sys.path.insert(0, os.getcwd())
from pegen.ast_dump import ast_dump
parser = argparse.ArgumentParser()
parser.add_argument(
"-d", "--diff", action="store_true", help="show diff between grammar and ast (requires -g)"
)
parser.add_argument(
"-p",
"--parser",
choices=["new", "old"],
default="new",
help="choose the parser to use"
)
parser.add_argument(
"-m",
"--multiline",
action="store_true",
help="concatenate program arguments using newline instead of space",
)
parser.add_argument("-v", "--verbose", action="store_true", help="show line/column numbers")
parser.add_argument("program", nargs="+", help="program to parse (will be concatenated)")
def format_tree(tree: ast.AST, verbose: bool = False) -> str:
with tempfile.NamedTemporaryFile("w+") as tf:
tf.write(ast_dump(tree, include_attributes=verbose))
tf.write("\n")
tf.flush()
cmd = f"black -q {tf.name}"
sts = os.system(cmd)
if sts:
raise RuntimeError(f"Command {cmd!r} failed with status 0x{sts:x}")
tf.seek(0)
return tf.read()
def diff_trees(a: ast.AST, b: ast.AST, verbose: bool = False) -> List[str]:
sa = format_tree(a, verbose)
sb = format_tree(b, verbose)
la = sa.splitlines()
lb = sb.splitlines()
return list(difflib.unified_diff(la, lb, "a", "b", lineterm=""))
def show_parse(source: str, verbose: bool = False) -> str:
tree = _peg_parser.parse_string(source, oldparser=True)
return format_tree(tree, verbose).rstrip("\n")
def print_parse(source: str, verbose: bool = False) -> None:
print(show_parse(source, verbose))
def main() -> None:
args = parser.parse_args()
new_parser = args.parser == "new"
if args.multiline:
sep = "\n"
else:
sep = " "
program = sep.join(args.program)
if new_parser:
tree = _peg_parser.parse_string(program)
if args.diff:
a = _peg_parser.parse_string(program, oldparser=True)
b = tree
diff = diff_trees(a, b, args.verbose)
if diff:
for line in diff:
print(line)
else:
print("# Trees are the same")
else:
print("# Parsed using the new parser")
print(format_tree(tree, args.verbose))
else:
tree = _peg_parser.parse_string(program, oldparser=True)
print("# Parsed using the old parser")
print(format_tree(tree, args.verbose))
if __name__ == "__main__":
main()

View File

@ -7,7 +7,6 @@ import sys
import time
import traceback
import tokenize
import _peg_parser
from glob import glob
from pathlib import PurePath
@ -16,7 +15,6 @@ from typing import List, Optional, Any, Tuple
sys.path.insert(0, os.getcwd())
from pegen.ast_dump import ast_dump
from pegen.testutil import print_memstats
from scripts import show_parse
SUCCESS = "\033[92m"
FAIL = "\033[91m"
@ -40,9 +38,6 @@ argparser.add_argument(
argparser.add_argument(
"-v", "--verbose", action="store_true", help="Display detailed errors for failures"
)
argparser.add_argument(
"-t", "--tree", action="count", help="Compare parse tree to official AST", default=0
)
def report_status(
@ -79,66 +74,13 @@ def report_status(
print(f" {str(error.__class__.__name__)}: {error}")
def compare_trees(
actual_tree: ast.AST, file: str, verbose: bool, include_attributes: bool = False,
) -> int:
with open(file) as f:
expected_tree = _peg_parser.parse_string(f.read(), oldparser=True)
expected_text = ast_dump(expected_tree, include_attributes=include_attributes)
actual_text = ast_dump(actual_tree, include_attributes=include_attributes)
if actual_text == expected_text:
if verbose:
print("Tree for {file}:")
print(show_parse.format_tree(actual_tree, include_attributes))
return 0
print(f"Diffing ASTs for {file} ...")
expected = show_parse.format_tree(expected_tree, include_attributes)
actual = show_parse.format_tree(actual_tree, include_attributes)
if verbose:
print("Expected for {file}:")
print(expected)
print("Actual for {file}:")
print(actual)
print(f"Diff for {file}:")
diff = show_parse.diff_trees(expected_tree, actual_tree, include_attributes)
for line in diff:
print(line)
return 1
def parse_file(source: str, file: str, mode: int, oldparser: bool) -> Tuple[Any, float]:
def parse_file(source: str, file: str) -> Tuple[Any, float]:
t0 = time.time()
if mode == COMPILE:
result = _peg_parser.compile_string(
source,
filename=file,
oldparser=oldparser,
)
else:
result = _peg_parser.parse_string(
source,
filename=file,
oldparser=oldparser,
ast=(mode == PARSE),
)
result = ast.parse(source, filename=file)
t1 = time.time()
return result, t1 - t0
def is_parsing_failure(source: str) -> bool:
try:
_peg_parser.parse_string(source, mode="exec", oldparser=True)
except SyntaxError:
return False
return True
def generate_time_stats(files, total_seconds) -> None:
total_files = len(files)
total_bytes = 0
@ -160,27 +102,11 @@ def generate_time_stats(files, total_seconds) -> None:
)
def parse_directory(
directory: str,
verbose: bool,
excluded_files: List[str],
tree_arg: int,
short: bool,
mode: int,
oldparser: bool,
) -> int:
if tree_arg:
assert mode == PARSE, "Mode should be 1 (parse), when comparing the generated trees"
if oldparser and tree_arg:
print("Cannot specify tree argument with the cpython parser.", file=sys.stderr)
return 1
def parse_directory(directory: str, verbose: bool, excluded_files: List[str], short: bool) -> int:
# For a given directory, traverse files and attempt to parse each one
# - Output success/failure for each file
errors = 0
files = []
trees = {} # Trees to compare (after everything else is done)
total_seconds = 0
for file in sorted(glob(f"{directory}/**/*.py", recursive=True)):
@ -192,39 +118,20 @@ def parse_directory(
source = f.read()
try:
result, dt = parse_file(source, file, mode, oldparser)
result, dt = parse_file(source, file)
total_seconds += dt
if tree_arg:
trees[file] = result
report_status(succeeded=True, file=file, verbose=verbose, short=short)
except SyntaxError as error:
if is_parsing_failure(source):
print(f"File {file} cannot be parsed by either parser.")
else:
report_status(
succeeded=False, file=file, verbose=verbose, error=error, short=short
)
report_status(succeeded=False, file=file, verbose=verbose, error=error, short=short)
errors += 1
files.append(file)
t1 = time.time()
generate_time_stats(files, total_seconds)
if short:
print_memstats()
if errors:
print(f"Encountered {errors} failures.", file=sys.stderr)
# Compare trees (the dict is empty unless -t is given)
compare_trees_errors = 0
for file, tree in trees.items():
if not short:
print("Comparing ASTs for", file)
if compare_trees(tree, file, verbose, tree_arg >= 2) == 1:
compare_trees_errors += 1
if errors or compare_trees_errors:
return 1
return 0
@ -235,20 +142,8 @@ def main() -> None:
directory = args.directory
verbose = args.verbose
excluded_files = args.exclude
tree = args.tree
short = args.short
mode = 1 if args.tree else 2
sys.exit(
parse_directory(
directory,
verbose,
excluded_files,
tree,
short,
mode,
oldparser=False,
)
)
sys.exit(parse_directory(directory, verbose, excluded_files, short))
if __name__ == "__main__":