cpython/Tools/scripts/summarize_stats.py

395 lines
15 KiB
Python

"""Print a summary of specialization stats for all files in the
default stats folders.
"""
import collections
import os.path
import opcode
from datetime import date
import itertools
import sys
if os.name == "nt":
DEFAULT_DIR = "c:\\temp\\py_stats\\"
else:
DEFAULT_DIR = "/tmp/py_stats/"
#Create list of all instruction names
specialized = iter(opcode._specialized_instructions)
opname = ["<0>"]
for name in opcode.opname[1:]:
if name.startswith("<"):
try:
name = next(specialized)
except StopIteration:
pass
opname.append(name)
# opcode_name --> opcode
# Sort alphabetically.
opmap = {name: i for i, name in enumerate(opname)}
opmap = dict(sorted(opmap.items()))
TOTAL = "specialization.deferred", "specialization.hit", "specialization.miss", "execution_count"
def print_specialization_stats(name, family_stats, defines):
if "specializable" not in family_stats:
return
total = sum(family_stats.get(kind, 0) for kind in TOTAL)
if total == 0:
return
with Section(name, 3, f"specialization stats for {name} family"):
rows = []
for key in sorted(family_stats):
if key.startswith("specialization.failure_kinds"):
continue
if key in ("specialization.hit", "specialization.miss"):
label = key[len("specialization."):]
elif key == "execution_count":
label = "unquickened"
elif key in ("specialization.success", "specialization.failure", "specializable"):
continue
elif key.startswith("pair"):
continue
else:
label = key
rows.append((f"{label:>12}", f"{family_stats[key]:>12}", f"{100*family_stats[key]/total:0.1f}%"))
emit_table(("Kind", "Count", "Ratio"), rows)
print_title("Specialization attempts", 4)
total_attempts = 0
for key in ("specialization.success", "specialization.failure"):
total_attempts += family_stats.get(key, 0)
rows = []
if total_attempts:
for key in ("specialization.success", "specialization.failure"):
label = key[len("specialization."):]
label = label[0].upper() + label[1:]
val = family_stats.get(key, 0)
rows.append((label, val, f"{100*val/total_attempts:0.1f}%"))
emit_table(("", "Count:", "Ratio:"), rows)
total_failures = family_stats.get("specialization.failure", 0)
failure_kinds = [ 0 ] * 30
for key in family_stats:
if not key.startswith("specialization.failure_kind"):
continue
_, index = key[:-1].split("[")
index = int(index)
failure_kinds[index] = family_stats[key]
failures = [(value, index) for (index, value) in enumerate(failure_kinds)]
failures.sort(reverse=True)
rows = []
for value, index in failures:
if not value:
continue
rows.append((kind_to_text(index, defines, name), value, f"{100*value/total_failures:0.1f}%"))
emit_table(("Failure kind", "Count:", "Ratio:"), rows)
def gather_stats():
stats = collections.Counter()
for filename in os.listdir(DEFAULT_DIR):
with open(os.path.join(DEFAULT_DIR, filename)) as fd:
for line in fd:
try:
key, value = line.split(":")
except ValueError:
print (f"Unparsable line: '{line.strip()}' in {filename}", file=sys.stderr)
continue
key = key.strip()
value = int(value)
stats[key] += value
return stats
def extract_opcode_stats(stats):
opcode_stats = [ {} for _ in range(256) ]
for key, value in stats.items():
if not key.startswith("opcode"):
continue
n, _, rest = key[7:].partition("]")
opcode_stats[int(n)][rest.strip(".")] = value
return opcode_stats
def parse_kinds(spec_src, prefix="SPEC_FAIL"):
defines = collections.defaultdict(list)
start = "#define " + prefix + "_"
for line in spec_src:
line = line.strip()
if not line.startswith(start):
continue
line = line[len(start):]
name, val = line.split()
defines[int(val.strip())].append(name.strip())
return defines
def pretty(defname):
return defname.replace("_", " ").lower()
def kind_to_text(kind, defines, opname):
if kind < 7:
return pretty(defines[kind][0])
if opname.endswith("ATTR"):
opname = "ATTR"
if opname.endswith("SUBSCR"):
opname = "SUBSCR"
for name in defines[kind]:
if name.startswith(opname):
return pretty(name[len(opname)+1:])
return "kind " + str(kind)
def categorized_counts(opcode_stats):
basic = 0
specialized = 0
not_specialized = 0
specialized_instructions = {
op for op in opcode._specialized_instructions
if "__" not in op and "ADAPTIVE" not in op}
adaptive_instructions = {
op for op in opcode._specialized_instructions
if "ADAPTIVE" in op}
for i, opcode_stat in enumerate(opcode_stats):
if "execution_count" not in opcode_stat:
continue
count = opcode_stat['execution_count']
name = opname[i]
if "specializable" in opcode_stat:
not_specialized += count
elif name in adaptive_instructions:
not_specialized += count
elif name in specialized_instructions:
miss = opcode_stat.get("specialization.miss", 0)
not_specialized += miss
specialized += count - miss
else:
basic += count
return basic, not_specialized, specialized
def print_title(name, level=2):
print("#"*level, name)
print()
class Section:
def __init__(self, title, level=2, summary=None):
self.title = title
self.level = level
if summary is None:
self.summary = title.lower()
else:
self.summary = summary
def __enter__(self):
print_title(self.title, self.level)
print("<details>")
print("<summary>", self.summary, "</summary>")
print()
return self
def __exit__(*args):
print()
print("</details>")
print()
def to_str(x):
if isinstance(x, int):
return format(x, ",d")
else:
return str(x)
def emit_table(header, rows):
width = len(header)
header_line = "|"
under_line = "|"
for item in header:
under = "---"
if item.endswith(":"):
item = item[:-1]
under += ":"
header_line += item + " | "
under_line += under + "|"
print(header_line)
print(under_line)
for row in rows:
if width is not None and len(row) != width:
raise ValueError("Wrong number of elements in row '" + str(row) + "'")
print("|", " | ".join(to_str(i) for i in row), "|")
print()
def emit_execution_counts(opcode_stats, total):
with Section("Execution counts", summary="execution counts for all instructions"):
counts = []
for i, opcode_stat in enumerate(opcode_stats):
if "execution_count" in opcode_stat:
count = opcode_stat['execution_count']
miss = 0
if "specializable" not in opcode_stat:
miss = opcode_stat.get("specialization.miss")
counts.append((count, opname[i], miss))
counts.sort(reverse=True)
cumulative = 0
rows = []
for (count, name, miss) in counts:
cumulative += count
if miss:
miss = f"{100*miss/count:0.1f}%"
else:
miss = ""
rows.append((name, count, f"{100*count/total:0.1f}%",
f"{100*cumulative/total:0.1f}%", miss))
emit_table(
("Name", "Count:", "Self:", "Cumulative:", "Miss ratio:"),
rows
)
def emit_specialization_stats(opcode_stats):
spec_path = os.path.join(os.path.dirname(__file__), "../../Python/specialize.c")
with open(spec_path) as spec_src:
defines = parse_kinds(spec_src)
with Section("Specialization stats", summary="specialization stats by family"):
for i, opcode_stat in enumerate(opcode_stats):
name = opname[i]
print_specialization_stats(name, opcode_stat, defines)
def emit_specialization_overview(opcode_stats, total):
basic, not_specialized, specialized = categorized_counts(opcode_stats)
with Section("Specialization effectiveness"):
emit_table(("Instructions", "Count:", "Ratio:"), (
("Basic", basic, f"{basic*100/total:0.1f}%"),
("Not specialized", not_specialized, f"{not_specialized*100/total:0.1f}%"),
("Specialized", specialized, f"{specialized*100/total:0.1f}%"),
))
for title, field in (("Deferred", "specialization.deferred"), ("Misses", "specialization.miss")):
total = 0
counts = []
for i, opcode_stat in enumerate(opcode_stats):
value = opcode_stat.get(field, 0)
counts.append((value, opname[i]))
total += value
counts.sort(reverse=True)
if total:
with Section(f"{title} by instruction", 3):
rows = [ (name, count, f"{100*count/total:0.1f}%") for (count, name) in counts[:10] ]
emit_table(("Name", "Count:", "Ratio:"), rows)
def emit_call_stats(stats):
stats_path = os.path.join(os.path.dirname(__file__), "../../Include/pystats.h")
with open(stats_path) as stats_src:
defines = parse_kinds(stats_src, prefix="EVAL_CALL")
with Section("Call stats", summary="Inlined calls and frame stats"):
total = 0
for key, value in stats.items():
if "Calls to" in key:
total += value
rows = []
for key, value in stats.items():
if "Calls to" in key:
rows.append((key, value, f"{100*value/total:0.1f}%"))
elif key.startswith("Calls "):
name, index = key[:-1].split("[")
index = int(index)
label = name + " (" + pretty(defines[index][0]) + ")"
rows.append((label, value, f"{100*value/total:0.1f}%"))
for key, value in stats.items():
if key.startswith("Frame"):
rows.append((key, value, f"{100*value/total:0.1f}%"))
emit_table(("", "Count:", "Ratio:"), rows)
def emit_object_stats(stats):
with Section("Object stats", summary="allocations, frees and dict materializatons"):
total_materializations = stats.get("Object new values")
total_allocations = stats.get("Object allocations") + stats.get("Object allocations from freelist")
total_increfs = stats.get("Object interpreter increfs") + stats.get("Object increfs")
total_decrefs = stats.get("Object interpreter decrefs") + stats.get("Object decrefs")
rows = []
for key, value in stats.items():
if key.startswith("Object"):
if "materialize" in key:
ratio = f"{100*value/total_materializations:0.1f}%"
elif "allocations" in key:
ratio = f"{100*value/total_allocations:0.1f}%"
elif "increfs" in key:
ratio = f"{100*value/total_increfs:0.1f}%"
elif "decrefs" in key:
ratio = f"{100*value/total_decrefs:0.1f}%"
else:
ratio = ""
label = key[6:].strip()
label = label[0].upper() + label[1:]
rows.append((label, value, ratio))
emit_table(("", "Count:", "Ratio:"), rows)
def get_total(opcode_stats):
total = 0
for opcode_stat in opcode_stats:
if "execution_count" in opcode_stat:
total += opcode_stat['execution_count']
return total
def emit_pair_counts(opcode_stats, total):
pair_counts = []
for i, opcode_stat in enumerate(opcode_stats):
if i == 0:
continue
for key, value in opcode_stat.items():
if key.startswith("pair_count"):
x, _, _ = key[11:].partition("]")
if value:
pair_counts.append((value, (i, int(x))))
with Section("Pair counts", summary="Pair counts for top 100 pairs"):
pair_counts.sort(reverse=True)
cumulative = 0
rows = []
for (count, pair) in itertools.islice(pair_counts, 100):
i, j = pair
cumulative += count
rows.append((opname[i] + " " + opname[j], count, f"{100*count/total:0.1f}%",
f"{100*cumulative/total:0.1f}%"))
emit_table(("Pair", "Count:", "Self:", "Cumulative:"),
rows
)
with Section("Predecessor/Successor Pairs", summary="Top 5 predecessors and successors of each opcode"):
predecessors = collections.defaultdict(collections.Counter)
successors = collections.defaultdict(collections.Counter)
total_predecessors = collections.Counter()
total_successors = collections.Counter()
for count, (first, second) in pair_counts:
if count:
predecessors[second][first] = count
successors[first][second] = count
total_predecessors[second] += count
total_successors[first] += count
for name, i in opmap.items():
total1 = total_predecessors[i]
total2 = total_successors[i]
if total1 == 0 and total2 == 0:
continue
pred_rows = succ_rows = ()
if total1:
pred_rows = [(opname[pred], count, f"{count/total1:.1%}")
for (pred, count) in predecessors[i].most_common(5)]
if total2:
succ_rows = [(opname[succ], count, f"{count/total2:.1%}")
for (succ, count) in successors[i].most_common(5)]
with Section(name, 3, f"Successors and predecessors for {name}"):
emit_table(("Predecessors", "Count:", "Percentage:"),
pred_rows
)
emit_table(("Successors", "Count:", "Percentage:"),
succ_rows
)
def main():
stats = gather_stats()
opcode_stats = extract_opcode_stats(stats)
total = get_total(opcode_stats)
emit_execution_counts(opcode_stats, total)
emit_pair_counts(opcode_stats, total)
emit_specialization_stats(opcode_stats)
emit_specialization_overview(opcode_stats, total)
emit_call_stats(stats)
emit_object_stats(stats)
print("---")
print("Stats gathered on:", date.today())
if __name__ == "__main__":
main()