"""Print a summary of specialization stats for all files in the default stats folders. """ import collections import os.path import opcode from datetime import date import itertools import sys if os.name == "nt": DEFAULT_DIR = "c:\\temp\\py_stats\\" else: DEFAULT_DIR = "/tmp/py_stats/" #Create list of all instruction names specialized = iter(opcode._specialized_instructions) opname = ["<0>"] for name in opcode.opname[1:]: if name.startswith("<"): try: name = next(specialized) except StopIteration: pass opname.append(name) # opcode_name --> opcode # Sort alphabetically. opmap = {name: i for i, name in enumerate(opname)} opmap = dict(sorted(opmap.items())) TOTAL = "specialization.deferred", "specialization.hit", "specialization.miss", "execution_count" def print_specialization_stats(name, family_stats, defines): if "specializable" not in family_stats: return total = sum(family_stats.get(kind, 0) for kind in TOTAL) if total == 0: return with Section(name, 3, f"specialization stats for {name} family"): rows = [] for key in sorted(family_stats): if key.startswith("specialization.failure_kinds"): continue if key in ("specialization.hit", "specialization.miss"): label = key[len("specialization."):] elif key == "execution_count": label = "unquickened" elif key in ("specialization.success", "specialization.failure", "specializable"): continue elif key.startswith("pair"): continue else: label = key rows.append((f"{label:>12}", f"{family_stats[key]:>12}", f"{100*family_stats[key]/total:0.1f}%")) emit_table(("Kind", "Count", "Ratio"), rows) print_title("Specialization attempts", 4) total_attempts = 0 for key in ("specialization.success", "specialization.failure"): total_attempts += family_stats.get(key, 0) rows = [] if total_attempts: for key in ("specialization.success", "specialization.failure"): label = key[len("specialization."):] label = label[0].upper() + label[1:] val = family_stats.get(key, 0) rows.append((label, val, f"{100*val/total_attempts:0.1f}%")) emit_table(("", "Count:", "Ratio:"), rows) total_failures = family_stats.get("specialization.failure", 0) failure_kinds = [ 0 ] * 30 for key in family_stats: if not key.startswith("specialization.failure_kind"): continue _, index = key[:-1].split("[") index = int(index) failure_kinds[index] = family_stats[key] failures = [(value, index) for (index, value) in enumerate(failure_kinds)] failures.sort(reverse=True) rows = [] for value, index in failures: if not value: continue rows.append((kind_to_text(index, defines, name), value, f"{100*value/total_failures:0.1f}%")) emit_table(("Failure kind", "Count:", "Ratio:"), rows) def gather_stats(): stats = collections.Counter() for filename in os.listdir(DEFAULT_DIR): with open(os.path.join(DEFAULT_DIR, filename)) as fd: for line in fd: try: key, value = line.split(":") except ValueError: print (f"Unparsable line: '{line.strip()}' in {filename}", file=sys.stderr) continue key = key.strip() value = int(value) stats[key] += value return stats def extract_opcode_stats(stats): opcode_stats = [ {} for _ in range(256) ] for key, value in stats.items(): if not key.startswith("opcode"): continue n, _, rest = key[7:].partition("]") opcode_stats[int(n)][rest.strip(".")] = value return opcode_stats def parse_kinds(spec_src, prefix="SPEC_FAIL"): defines = collections.defaultdict(list) start = "#define " + prefix + "_" for line in spec_src: line = line.strip() if not line.startswith(start): continue line = line[len(start):] name, val = line.split() defines[int(val.strip())].append(name.strip()) return defines def pretty(defname): return defname.replace("_", " ").lower() def kind_to_text(kind, defines, opname): if kind < 7: return pretty(defines[kind][0]) if opname.endswith("ATTR"): opname = "ATTR" if opname.endswith("SUBSCR"): opname = "SUBSCR" for name in defines[kind]: if name.startswith(opname): return pretty(name[len(opname)+1:]) return "kind " + str(kind) def categorized_counts(opcode_stats): basic = 0 specialized = 0 not_specialized = 0 specialized_instructions = { op for op in opcode._specialized_instructions if "__" not in op and "ADAPTIVE" not in op} adaptive_instructions = { op for op in opcode._specialized_instructions if "ADAPTIVE" in op} for i, opcode_stat in enumerate(opcode_stats): if "execution_count" not in opcode_stat: continue count = opcode_stat['execution_count'] name = opname[i] if "specializable" in opcode_stat: not_specialized += count elif name in adaptive_instructions: not_specialized += count elif name in specialized_instructions: miss = opcode_stat.get("specialization.miss", 0) not_specialized += miss specialized += count - miss else: basic += count return basic, not_specialized, specialized def print_title(name, level=2): print("#"*level, name) print() class Section: def __init__(self, title, level=2, summary=None): self.title = title self.level = level if summary is None: self.summary = title.lower() else: self.summary = summary def __enter__(self): print_title(self.title, self.level) print("
") print("", self.summary, "") print() return self def __exit__(*args): print() print("
") print() def to_str(x): if isinstance(x, int): return format(x, ",d") else: return str(x) def emit_table(header, rows): width = len(header) header_line = "|" under_line = "|" for item in header: under = "---" if item.endswith(":"): item = item[:-1] under += ":" header_line += item + " | " under_line += under + "|" print(header_line) print(under_line) for row in rows: if width is not None and len(row) != width: raise ValueError("Wrong number of elements in row '" + str(row) + "'") print("|", " | ".join(to_str(i) for i in row), "|") print() def emit_execution_counts(opcode_stats, total): with Section("Execution counts", summary="execution counts for all instructions"): counts = [] for i, opcode_stat in enumerate(opcode_stats): if "execution_count" in opcode_stat: count = opcode_stat['execution_count'] miss = 0 if "specializable" not in opcode_stat: miss = opcode_stat.get("specialization.miss") counts.append((count, opname[i], miss)) counts.sort(reverse=True) cumulative = 0 rows = [] for (count, name, miss) in counts: cumulative += count if miss: miss = f"{100*miss/count:0.1f}%" else: miss = "" rows.append((name, count, f"{100*count/total:0.1f}%", f"{100*cumulative/total:0.1f}%", miss)) emit_table( ("Name", "Count:", "Self:", "Cumulative:", "Miss ratio:"), rows ) def emit_specialization_stats(opcode_stats): spec_path = os.path.join(os.path.dirname(__file__), "../../Python/specialize.c") with open(spec_path) as spec_src: defines = parse_kinds(spec_src) with Section("Specialization stats", summary="specialization stats by family"): for i, opcode_stat in enumerate(opcode_stats): name = opname[i] print_specialization_stats(name, opcode_stat, defines) def emit_specialization_overview(opcode_stats, total): basic, not_specialized, specialized = categorized_counts(opcode_stats) with Section("Specialization effectiveness"): emit_table(("Instructions", "Count:", "Ratio:"), ( ("Basic", basic, f"{basic*100/total:0.1f}%"), ("Not specialized", not_specialized, f"{not_specialized*100/total:0.1f}%"), ("Specialized", specialized, f"{specialized*100/total:0.1f}%"), )) for title, field in (("Deferred", "specialization.deferred"), ("Misses", "specialization.miss")): total = 0 counts = [] for i, opcode_stat in enumerate(opcode_stats): value = opcode_stat.get(field, 0) counts.append((value, opname[i])) total += value counts.sort(reverse=True) if total: with Section(f"{title} by instruction", 3): rows = [ (name, count, f"{100*count/total:0.1f}%") for (count, name) in counts[:10] ] emit_table(("Name", "Count:", "Ratio:"), rows) def emit_call_stats(stats): stats_path = os.path.join(os.path.dirname(__file__), "../../Include/pystats.h") with open(stats_path) as stats_src: defines = parse_kinds(stats_src, prefix="EVAL_CALL") with Section("Call stats", summary="Inlined calls and frame stats"): total = 0 for key, value in stats.items(): if "Calls to" in key: total += value rows = [] for key, value in stats.items(): if "Calls to" in key: rows.append((key, value, f"{100*value/total:0.1f}%")) elif key.startswith("Calls "): name, index = key[:-1].split("[") index = int(index) label = name + " (" + pretty(defines[index][0]) + ")" rows.append((label, value, f"{100*value/total:0.1f}%")) for key, value in stats.items(): if key.startswith("Frame"): rows.append((key, value, f"{100*value/total:0.1f}%")) emit_table(("", "Count:", "Ratio:"), rows) def emit_object_stats(stats): with Section("Object stats", summary="allocations, frees and dict materializatons"): total_materializations = stats.get("Object new values") total_allocations = stats.get("Object allocations") + stats.get("Object allocations from freelist") total_increfs = stats.get("Object interpreter increfs") + stats.get("Object increfs") total_decrefs = stats.get("Object interpreter decrefs") + stats.get("Object decrefs") rows = [] for key, value in stats.items(): if key.startswith("Object"): if "materialize" in key: ratio = f"{100*value/total_materializations:0.1f}%" elif "allocations" in key: ratio = f"{100*value/total_allocations:0.1f}%" elif "increfs" in key: ratio = f"{100*value/total_increfs:0.1f}%" elif "decrefs" in key: ratio = f"{100*value/total_decrefs:0.1f}%" else: ratio = "" label = key[6:].strip() label = label[0].upper() + label[1:] rows.append((label, value, ratio)) emit_table(("", "Count:", "Ratio:"), rows) def get_total(opcode_stats): total = 0 for opcode_stat in opcode_stats: if "execution_count" in opcode_stat: total += opcode_stat['execution_count'] return total def emit_pair_counts(opcode_stats, total): pair_counts = [] for i, opcode_stat in enumerate(opcode_stats): if i == 0: continue for key, value in opcode_stat.items(): if key.startswith("pair_count"): x, _, _ = key[11:].partition("]") if value: pair_counts.append((value, (i, int(x)))) with Section("Pair counts", summary="Pair counts for top 100 pairs"): pair_counts.sort(reverse=True) cumulative = 0 rows = [] for (count, pair) in itertools.islice(pair_counts, 100): i, j = pair cumulative += count rows.append((opname[i] + " " + opname[j], count, f"{100*count/total:0.1f}%", f"{100*cumulative/total:0.1f}%")) emit_table(("Pair", "Count:", "Self:", "Cumulative:"), rows ) with Section("Predecessor/Successor Pairs", summary="Top 5 predecessors and successors of each opcode"): predecessors = collections.defaultdict(collections.Counter) successors = collections.defaultdict(collections.Counter) total_predecessors = collections.Counter() total_successors = collections.Counter() for count, (first, second) in pair_counts: if count: predecessors[second][first] = count successors[first][second] = count total_predecessors[second] += count total_successors[first] += count for name, i in opmap.items(): total1 = total_predecessors[i] total2 = total_successors[i] if total1 == 0 and total2 == 0: continue pred_rows = succ_rows = () if total1: pred_rows = [(opname[pred], count, f"{count/total1:.1%}") for (pred, count) in predecessors[i].most_common(5)] if total2: succ_rows = [(opname[succ], count, f"{count/total2:.1%}") for (succ, count) in successors[i].most_common(5)] with Section(name, 3, f"Successors and predecessors for {name}"): emit_table(("Predecessors", "Count:", "Percentage:"), pred_rows ) emit_table(("Successors", "Count:", "Percentage:"), succ_rows ) def main(): stats = gather_stats() opcode_stats = extract_opcode_stats(stats) total = get_total(opcode_stats) emit_execution_counts(opcode_stats, total) emit_pair_counts(opcode_stats, total) emit_specialization_stats(opcode_stats) emit_specialization_overview(opcode_stats, total) emit_call_stats(stats) emit_object_stats(stats) print("---") print("Stats gathered on:", date.today()) if __name__ == "__main__": main()