2021-08-04 07:39:52 -03:00
|
|
|
"""Print a summary of specialization stats for all files in the
|
|
|
|
default stats folders.
|
|
|
|
"""
|
|
|
|
|
|
|
|
import collections
|
|
|
|
import os.path
|
2021-12-17 10:48:01 -04:00
|
|
|
import opcode
|
2022-02-09 08:30:26 -04:00
|
|
|
from datetime import date
|
2022-02-14 11:53:38 -04:00
|
|
|
import itertools
|
|
|
|
import argparse
|
2021-08-04 07:39:52 -03:00
|
|
|
|
|
|
|
if os.name == "nt":
|
|
|
|
DEFAULT_DIR = "c:\\temp\\py_stats\\"
|
|
|
|
else:
|
|
|
|
DEFAULT_DIR = "/tmp/py_stats/"
|
|
|
|
|
2021-12-17 10:48:01 -04:00
|
|
|
#Create list of all instruction names
|
|
|
|
specialized = iter(opcode._specialized_instructions)
|
|
|
|
opname = ["<0>"]
|
|
|
|
for name in opcode.opname[1:]:
|
|
|
|
if name.startswith("<"):
|
|
|
|
try:
|
|
|
|
name = next(specialized)
|
|
|
|
except StopIteration:
|
|
|
|
pass
|
|
|
|
opname.append(name)
|
2021-08-04 07:39:52 -03:00
|
|
|
|
2022-03-18 07:10:31 -03:00
|
|
|
# opcode_name --> opcode
|
|
|
|
# Sort alphabetically.
|
|
|
|
opmap = {name: i for i, name in enumerate(opname)}
|
|
|
|
opmap = dict(sorted(opmap.items()))
|
|
|
|
|
2021-12-17 10:48:01 -04:00
|
|
|
TOTAL = "specialization.deferred", "specialization.hit", "specialization.miss", "execution_count"
|
|
|
|
|
2022-02-09 08:30:26 -04:00
|
|
|
def print_specialization_stats(name, family_stats, defines):
|
2022-02-04 05:56:46 -04:00
|
|
|
if "specializable" not in family_stats:
|
2021-12-17 10:48:01 -04:00
|
|
|
return
|
2022-01-04 14:05:09 -04:00
|
|
|
total = sum(family_stats.get(kind, 0) for kind in TOTAL)
|
2021-08-04 07:39:52 -03:00
|
|
|
if total == 0:
|
|
|
|
return
|
2022-02-09 08:30:26 -04:00
|
|
|
with Section(name, 3, f"specialization stats for {name} family"):
|
|
|
|
rows = []
|
|
|
|
for key in sorted(family_stats):
|
|
|
|
if key.startswith("specialization.failure_kinds"):
|
|
|
|
continue
|
2022-02-22 13:18:10 -04:00
|
|
|
if key in ("specialization.hit", "specialization.miss"):
|
2022-02-09 08:30:26 -04:00
|
|
|
label = key[len("specialization."):]
|
|
|
|
elif key == "execution_count":
|
|
|
|
label = "unquickened"
|
2022-02-22 11:44:31 -04:00
|
|
|
elif key in ("specialization.success", "specialization.failure", "specializable"):
|
|
|
|
continue
|
|
|
|
elif key.startswith("pair"):
|
|
|
|
continue
|
2022-02-09 08:30:26 -04:00
|
|
|
else:
|
|
|
|
label = key
|
2022-02-22 11:44:31 -04:00
|
|
|
rows.append((f"{label:>12}", f"{family_stats[key]:>12}", f"{100*family_stats[key]/total:0.1f}%"))
|
2022-02-09 08:30:26 -04:00
|
|
|
emit_table(("Kind", "Count", "Ratio"), rows)
|
|
|
|
print_title("Specialization attempts", 4)
|
|
|
|
total_attempts = 0
|
|
|
|
for key in ("specialization.success", "specialization.failure"):
|
|
|
|
total_attempts += family_stats.get(key, 0)
|
|
|
|
rows = []
|
|
|
|
for key in ("specialization.success", "specialization.failure"):
|
2021-12-17 10:48:01 -04:00
|
|
|
label = key[len("specialization."):]
|
2022-02-09 08:30:26 -04:00
|
|
|
label = label[0].upper() + label[1:]
|
|
|
|
val = family_stats.get(key, 0)
|
|
|
|
rows.append((label, val, f"{100*val/total_attempts:0.1f}%"))
|
2022-02-10 07:01:18 -04:00
|
|
|
emit_table(("", "Count:", "Ratio:"), rows)
|
2022-02-09 08:30:26 -04:00
|
|
|
total_failures = family_stats.get("specialization.failure", 0)
|
|
|
|
failure_kinds = [ 0 ] * 30
|
|
|
|
for key in family_stats:
|
|
|
|
if not key.startswith("specialization.failure_kind"):
|
|
|
|
continue
|
|
|
|
_, index = key[:-1].split("[")
|
|
|
|
index = int(index)
|
|
|
|
failure_kinds[index] = family_stats[key]
|
|
|
|
failures = [(value, index) for (index, value) in enumerate(failure_kinds)]
|
|
|
|
failures.sort(reverse=True)
|
|
|
|
rows = []
|
|
|
|
for value, index in failures:
|
|
|
|
if not value:
|
|
|
|
continue
|
|
|
|
rows.append((kind_to_text(index, defines, name), value, f"{100*value/total_failures:0.1f}%"))
|
2022-02-10 07:01:18 -04:00
|
|
|
emit_table(("Failure kind", "Count:", "Ratio:"), rows)
|
2021-08-04 07:39:52 -03:00
|
|
|
|
2021-12-17 10:48:01 -04:00
|
|
|
def gather_stats():
|
|
|
|
stats = collections.Counter()
|
2021-08-04 07:39:52 -03:00
|
|
|
for filename in os.listdir(DEFAULT_DIR):
|
2021-12-17 10:48:01 -04:00
|
|
|
with open(os.path.join(DEFAULT_DIR, filename)) as fd:
|
|
|
|
for line in fd:
|
|
|
|
key, value = line.split(":")
|
|
|
|
key = key.strip()
|
2022-02-14 11:53:38 -04:00
|
|
|
value = int(value)
|
2021-12-17 10:48:01 -04:00
|
|
|
stats[key] += value
|
|
|
|
return stats
|
|
|
|
|
|
|
|
def extract_opcode_stats(stats):
|
|
|
|
opcode_stats = [ {} for _ in range(256) ]
|
|
|
|
for key, value in stats.items():
|
|
|
|
if not key.startswith("opcode"):
|
|
|
|
continue
|
|
|
|
n, _, rest = key[7:].partition("]")
|
|
|
|
opcode_stats[int(n)][rest.strip(".")] = value
|
|
|
|
return opcode_stats
|
|
|
|
|
2022-02-09 08:30:26 -04:00
|
|
|
def parse_kinds(spec_src):
|
|
|
|
defines = collections.defaultdict(list)
|
|
|
|
for line in spec_src:
|
|
|
|
line = line.strip()
|
|
|
|
if not line.startswith("#define SPEC_FAIL_"):
|
|
|
|
continue
|
|
|
|
line = line[len("#define SPEC_FAIL_"):]
|
|
|
|
name, val = line.split()
|
|
|
|
defines[int(val.strip())].append(name.strip())
|
|
|
|
return defines
|
|
|
|
|
|
|
|
def pretty(defname):
|
|
|
|
return defname.replace("_", " ").lower()
|
|
|
|
|
|
|
|
def kind_to_text(kind, defines, opname):
|
|
|
|
if kind < 7:
|
|
|
|
return pretty(defines[kind][0])
|
|
|
|
if opname.endswith("ATTR"):
|
|
|
|
opname = "ATTR"
|
|
|
|
if opname.endswith("SUBSCR"):
|
|
|
|
opname = "SUBSCR"
|
2022-02-10 07:47:52 -04:00
|
|
|
if opname.startswith("PRECALL"):
|
|
|
|
opname = "CALL"
|
2022-02-09 08:30:26 -04:00
|
|
|
for name in defines[kind]:
|
|
|
|
if name.startswith(opname):
|
|
|
|
return pretty(name[len(opname)+1:])
|
|
|
|
return "kind " + str(kind)
|
2021-12-17 10:48:01 -04:00
|
|
|
|
2022-02-08 07:50:02 -04:00
|
|
|
def categorized_counts(opcode_stats):
|
|
|
|
basic = 0
|
|
|
|
specialized = 0
|
|
|
|
not_specialized = 0
|
|
|
|
specialized_instructions = {
|
|
|
|
op for op in opcode._specialized_instructions
|
|
|
|
if "__" not in op and "ADAPTIVE" not in op}
|
|
|
|
adaptive_instructions = {
|
|
|
|
op for op in opcode._specialized_instructions
|
|
|
|
if "ADAPTIVE" in op}
|
|
|
|
for i, opcode_stat in enumerate(opcode_stats):
|
|
|
|
if "execution_count" not in opcode_stat:
|
|
|
|
continue
|
|
|
|
count = opcode_stat['execution_count']
|
|
|
|
name = opname[i]
|
|
|
|
if "specializable" in opcode_stat:
|
|
|
|
not_specialized += count
|
|
|
|
elif name in adaptive_instructions:
|
|
|
|
not_specialized += count
|
|
|
|
elif name in specialized_instructions:
|
|
|
|
miss = opcode_stat.get("specialization.miss", 0)
|
|
|
|
not_specialized += miss
|
|
|
|
specialized += count - miss
|
|
|
|
else:
|
|
|
|
basic += count
|
|
|
|
return basic, not_specialized, specialized
|
|
|
|
|
2022-02-09 08:30:26 -04:00
|
|
|
def print_title(name, level=2):
|
|
|
|
print("#"*level, name)
|
|
|
|
print()
|
|
|
|
|
|
|
|
class Section:
|
|
|
|
|
|
|
|
def __init__(self, title, level=2, summary=None):
|
|
|
|
self.title = title
|
|
|
|
self.level = level
|
|
|
|
if summary is None:
|
|
|
|
self.summary = title.lower()
|
|
|
|
else:
|
|
|
|
self.summary = summary
|
|
|
|
|
|
|
|
def __enter__(self):
|
|
|
|
print_title(self.title, self.level)
|
|
|
|
print("<details>")
|
|
|
|
print("<summary>", self.summary, "</summary>")
|
|
|
|
print()
|
|
|
|
return self
|
|
|
|
|
|
|
|
def __exit__(*args):
|
|
|
|
print()
|
|
|
|
print("</details>")
|
|
|
|
print()
|
|
|
|
|
|
|
|
def emit_table(header, rows):
|
|
|
|
width = len(header)
|
2022-02-10 07:01:18 -04:00
|
|
|
header_line = "|"
|
|
|
|
under_line = "|"
|
|
|
|
for item in header:
|
|
|
|
under = "---"
|
|
|
|
if item.endswith(":"):
|
|
|
|
item = item[:-1]
|
|
|
|
under += ":"
|
|
|
|
header_line += item + " | "
|
|
|
|
under_line += under + "|"
|
|
|
|
print(header_line)
|
|
|
|
print(under_line)
|
2022-02-09 08:30:26 -04:00
|
|
|
for row in rows:
|
|
|
|
if width is not None and len(row) != width:
|
|
|
|
raise ValueError("Wrong number of elements in row '" + str(rows) + "'")
|
|
|
|
print("|", " | ".join(str(i) for i in row), "|")
|
|
|
|
print()
|
|
|
|
|
|
|
|
def emit_execution_counts(opcode_stats, total):
|
|
|
|
with Section("Execution counts", summary="execution counts for all instructions"):
|
|
|
|
counts = []
|
|
|
|
for i, opcode_stat in enumerate(opcode_stats):
|
|
|
|
if "execution_count" in opcode_stat:
|
|
|
|
count = opcode_stat['execution_count']
|
|
|
|
miss = 0
|
|
|
|
if "specializable" not in opcode_stat:
|
|
|
|
miss = opcode_stat.get("specialization.miss")
|
|
|
|
counts.append((count, opname[i], miss))
|
|
|
|
counts.sort(reverse=True)
|
|
|
|
cumulative = 0
|
|
|
|
rows = []
|
|
|
|
for (count, name, miss) in counts:
|
|
|
|
cumulative += count
|
|
|
|
if miss:
|
|
|
|
miss = f"{100*miss/count:0.1f}%"
|
|
|
|
else:
|
|
|
|
miss = ""
|
|
|
|
rows.append((name, count, f"{100*count/total:0.1f}%",
|
|
|
|
f"{100*cumulative/total:0.1f}%", miss))
|
|
|
|
emit_table(
|
2022-02-10 07:01:18 -04:00
|
|
|
("Name", "Count:", "Self:", "Cumulative:", "Miss ratio:"),
|
2022-02-09 08:30:26 -04:00
|
|
|
rows
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
def emit_specialization_stats(opcode_stats):
|
|
|
|
spec_path = os.path.join(os.path.dirname(__file__), "../../Python/specialize.c")
|
|
|
|
with open(spec_path) as spec_src:
|
|
|
|
defines = parse_kinds(spec_src)
|
|
|
|
with Section("Specialization stats", summary="specialization stats by family"):
|
|
|
|
for i, opcode_stat in enumerate(opcode_stats):
|
|
|
|
name = opname[i]
|
|
|
|
print_specialization_stats(name, opcode_stat, defines)
|
|
|
|
|
|
|
|
def emit_specialization_overview(opcode_stats, total):
|
|
|
|
basic, not_specialized, specialized = categorized_counts(opcode_stats)
|
|
|
|
with Section("Specialization effectiveness"):
|
2022-02-10 07:01:18 -04:00
|
|
|
emit_table(("Instructions", "Count:", "Ratio:"), (
|
2022-02-09 08:30:26 -04:00
|
|
|
("Basic", basic, f"{basic*100/total:0.1f}%"),
|
|
|
|
("Not specialized", not_specialized, f"{not_specialized*100/total:0.1f}%"),
|
|
|
|
("Specialized", specialized, f"{specialized*100/total:0.1f}%"),
|
|
|
|
))
|
|
|
|
|
|
|
|
def emit_call_stats(stats):
|
|
|
|
with Section("Call stats", summary="Inlined calls and frame stats"):
|
|
|
|
total = 0
|
|
|
|
for key, value in stats.items():
|
|
|
|
if "Calls to" in key:
|
|
|
|
total += value
|
|
|
|
rows = []
|
|
|
|
for key, value in stats.items():
|
|
|
|
if "Calls to" in key:
|
|
|
|
rows.append((key, value, f"{100*value/total:0.1f}%"))
|
|
|
|
for key, value in stats.items():
|
|
|
|
if key.startswith("Frame"):
|
|
|
|
rows.append((key, value, f"{100*value/total:0.1f}%"))
|
2022-02-10 07:01:18 -04:00
|
|
|
emit_table(("", "Count:", "Ratio:"), rows)
|
2022-02-09 08:30:26 -04:00
|
|
|
|
|
|
|
def emit_object_stats(stats):
|
|
|
|
with Section("Object stats", summary="allocations, frees and dict materializatons"):
|
|
|
|
total = stats.get("Object new values")
|
|
|
|
rows = []
|
|
|
|
for key, value in stats.items():
|
|
|
|
if key.startswith("Object"):
|
|
|
|
if "materialize" in key:
|
|
|
|
materialize = f"{100*value/total:0.1f}%"
|
|
|
|
else:
|
|
|
|
materialize = ""
|
|
|
|
label = key[6:].strip()
|
|
|
|
label = label[0].upper() + label[1:]
|
|
|
|
rows.append((label, value, materialize))
|
2022-02-10 07:01:18 -04:00
|
|
|
emit_table(("", "Count:", "Ratio:"), rows)
|
2022-02-09 08:30:26 -04:00
|
|
|
|
2022-02-14 11:53:38 -04:00
|
|
|
def get_total(opcode_stats):
|
2021-12-17 10:48:01 -04:00
|
|
|
total = 0
|
2022-02-14 11:53:38 -04:00
|
|
|
for opcode_stat in opcode_stats:
|
2021-12-17 10:48:01 -04:00
|
|
|
if "execution_count" in opcode_stat:
|
2022-02-09 08:30:26 -04:00
|
|
|
total += opcode_stat['execution_count']
|
2022-02-14 11:53:38 -04:00
|
|
|
return total
|
|
|
|
|
|
|
|
def emit_pair_counts(opcode_stats, total):
|
2022-03-18 07:10:31 -03:00
|
|
|
pair_counts = []
|
|
|
|
for i, opcode_stat in enumerate(opcode_stats):
|
|
|
|
if i == 0:
|
|
|
|
continue
|
|
|
|
for key, value in opcode_stat.items():
|
|
|
|
if key.startswith("pair_count"):
|
|
|
|
x, _, _ = key[11:].partition("]")
|
|
|
|
if value:
|
|
|
|
pair_counts.append((value, (i, int(x))))
|
2022-02-14 11:53:38 -04:00
|
|
|
with Section("Pair counts", summary="Pair counts for top 100 pairs"):
|
|
|
|
pair_counts.sort(reverse=True)
|
|
|
|
cumulative = 0
|
|
|
|
rows = []
|
|
|
|
for (count, pair) in itertools.islice(pair_counts, 100):
|
|
|
|
i, j = pair
|
|
|
|
cumulative += count
|
|
|
|
rows.append((opname[i] + " " + opname[j], count, f"{100*count/total:0.1f}%",
|
|
|
|
f"{100*cumulative/total:0.1f}%"))
|
|
|
|
emit_table(("Pair", "Count:", "Self:", "Cumulative:"),
|
|
|
|
rows
|
|
|
|
)
|
2022-03-18 07:10:31 -03:00
|
|
|
with Section("Predecessor/Successor Pairs", summary="Top 3 predecessors and successors of each opcode"):
|
|
|
|
predecessors = collections.defaultdict(collections.Counter)
|
|
|
|
successors = collections.defaultdict(collections.Counter)
|
|
|
|
total_predecessors = collections.Counter()
|
|
|
|
total_successors = collections.Counter()
|
|
|
|
for count, (first, second) in pair_counts:
|
|
|
|
if count:
|
|
|
|
predecessors[second][first] = count
|
|
|
|
successors[first][second] = count
|
|
|
|
total_predecessors[second] += count
|
|
|
|
total_successors[first] += count
|
|
|
|
for name, i in opmap.items():
|
|
|
|
total1 = total_predecessors[i]
|
|
|
|
total2 = total_successors[i]
|
|
|
|
if total1 == 0 and total2 == 0:
|
|
|
|
continue
|
|
|
|
pred_rows = succ_rows = ()
|
|
|
|
if total1:
|
|
|
|
pred_rows = [(opname[pred], count, f"{count/total1:.1%}")
|
|
|
|
for (pred, count) in predecessors[i].most_common(3)]
|
|
|
|
if total2:
|
|
|
|
succ_rows = [(opname[succ], count, f"{count/total2:.1%}")
|
|
|
|
for (succ, count) in successors[i].most_common(3)]
|
|
|
|
with Section(name, 3, f"Successors and predecessors for {name}"):
|
|
|
|
emit_table(("Predecessors", "Count:", "Percentage:"),
|
|
|
|
pred_rows
|
|
|
|
)
|
|
|
|
emit_table(("Successors", "Count:", "Percentage:"),
|
|
|
|
succ_rows
|
|
|
|
)
|
2022-02-14 11:53:38 -04:00
|
|
|
|
|
|
|
def main():
|
|
|
|
stats = gather_stats()
|
|
|
|
opcode_stats = extract_opcode_stats(stats)
|
|
|
|
total = get_total(opcode_stats)
|
2022-02-09 08:30:26 -04:00
|
|
|
emit_execution_counts(opcode_stats, total)
|
2022-02-14 11:53:38 -04:00
|
|
|
emit_pair_counts(opcode_stats, total)
|
2022-02-09 08:30:26 -04:00
|
|
|
emit_specialization_stats(opcode_stats)
|
|
|
|
emit_specialization_overview(opcode_stats, total)
|
|
|
|
emit_call_stats(stats)
|
|
|
|
emit_object_stats(stats)
|
|
|
|
print("---")
|
|
|
|
print("Stats gathered on:", date.today())
|
2021-08-04 07:39:52 -03:00
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
main()
|