diff --git a/Include/cpython/pystats.h b/Include/cpython/pystats.h index e74fdd4d32e..38480a4f6cd 100644 --- a/Include/cpython/pystats.h +++ b/Include/cpython/pystats.h @@ -100,6 +100,7 @@ typedef struct _gc_stats { typedef struct _uop_stats { uint64_t execution_count; uint64_t miss; + uint64_t pair_count[MAX_UOP_ID + 1]; } UOpStats; #define _Py_UOP_HIST_SIZE 32 diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 688051bbff7..1ec0348d6e5 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -310,6 +310,13 @@ extern int _PyStaticCode_Init(PyCodeObject *co); #define GC_STAT_ADD(gen, name, n) do { if (_Py_stats) _Py_stats->gc_stats[(gen)].name += (n); } while (0) #define OPT_STAT_INC(name) do { if (_Py_stats) _Py_stats->optimization_stats.name++; } while (0) #define UOP_STAT_INC(opname, name) do { if (_Py_stats) { assert(opname < 512); _Py_stats->optimization_stats.opcode[opname].name++; } } while (0) +#define UOP_PAIR_INC(uopcode, lastuop) \ + do { \ + if (lastuop && _Py_stats) { \ + _Py_stats->optimization_stats.opcode[lastuop].pair_count[uopcode]++; \ + } \ + lastuop = uopcode; \ + } while (0) #define OPT_UNSUPPORTED_OPCODE(opname) do { if (_Py_stats) _Py_stats->optimization_stats.unsupported_opcode[opname]++; } while (0) #define OPT_ERROR_IN_OPCODE(opname) do { if (_Py_stats) _Py_stats->optimization_stats.error_in_opcode[opname]++; } while (0) #define OPT_HIST(length, name) \ @@ -337,6 +344,7 @@ PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void); #define GC_STAT_ADD(gen, name, n) ((void)0) #define OPT_STAT_INC(name) ((void)0) #define UOP_STAT_INC(opname, name) ((void)0) +#define UOP_PAIR_INC(uopcode, lastuop) ((void)0) #define OPT_UNSUPPORTED_OPCODE(opname) ((void)0) #define OPT_ERROR_IN_OPCODE(opname) ((void)0) #define OPT_HIST(length, name) ((void)0) diff --git a/Misc/ACKS b/Misc/ACKS index a108ec37d44..76d30b257b4 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -642,6 +642,7 @@ Neil Girdhar Matt Giuca Andrea Giudiceandrea Franz Glasner +Jeff Glass Wim Glenn Michael Goderbauer Karan Goel diff --git a/Python/ceval.c b/Python/ceval.c index f718a77fb02..c0783f7377a 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -995,6 +995,7 @@ enter_tier_two: ; // dummy statement after a label, before a declaration uint16_t uopcode; #ifdef Py_STATS + int lastuop = 0; uint64_t trace_uop_execution_counter = 0; #endif @@ -1018,6 +1019,7 @@ tier2_dispatch: next_uop++; OPT_STAT_INC(uops_executed); UOP_STAT_INC(uopcode, execution_count); + UOP_PAIR_INC(uopcode, lastuop); #ifdef Py_STATS trace_uop_execution_counter++; #endif diff --git a/Python/specialize.c b/Python/specialize.c index 0b4b199a23e..5e14bb56b30 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -11,6 +11,7 @@ #include "pycore_object.h" #include "pycore_opcode_metadata.h" // _PyOpcode_Caches #include "pycore_uop_metadata.h" // _PyOpcode_uop_name +#include "pycore_uop_ids.h" // MAX_UOP_ID #include "pycore_opcode_utils.h" // RESUME_AT_FUNC_START #include "pycore_pylifecycle.h" // _PyOS_URandomNonblock() #include "pycore_runtime.h" // _Py_ID() @@ -269,6 +270,14 @@ print_optimization_stats(FILE *out, OptimizationStats *stats) } } + for (int i = 1; i <= MAX_UOP_ID; i++){ + for (int j = 1; j <= MAX_UOP_ID; j++) { + if (stats->opcode[i].pair_count[j]) { + fprintf(out, "uop[%s].pair_count[%s] : %" PRIu64 "\n", + _PyOpcode_uop_name[i], _PyOpcode_uop_name[j], stats->opcode[i].pair_count[j]); + } + } + } for (int i = 0; i < MAX_UOP_ID; i++) { if (stats->error_in_opcode[i]) { fprintf( diff --git a/Tools/scripts/summarize_stats.py b/Tools/scripts/summarize_stats.py index f7ed98ff604..ffbc40e6a37 100644 --- a/Tools/scripts/summarize_stats.py +++ b/Tools/scripts/summarize_stats.py @@ -736,9 +736,9 @@ def execution_count_section() -> Section: ) -def pair_count_section() -> Section: +def pair_count_section(prefix: str, title=None) -> Section: def calc_pair_count_table(stats: Stats) -> Rows: - opcode_stats = stats.get_opcode_stats("opcode") + opcode_stats = stats.get_opcode_stats(prefix) pair_counts = opcode_stats.get_pair_counts() total = opcode_stats.get_total_execution_count() @@ -760,7 +760,7 @@ def pair_count_section() -> Section: return Section( "Pair counts", - "Pair counts for top 100 Tier 1 instructions", + f"Pair counts for top 100 {title if title else prefix} pairs", [ Table( ("Pair", "Count:", "Self:", "Cumulative:"), @@ -1232,6 +1232,7 @@ def optimization_section() -> Section: ) ], ) + yield pair_count_section(prefix="uop", title="Non-JIT uop") yield Section( "Unsupported opcodes", "", @@ -1292,7 +1293,7 @@ def meta_stats_section() -> Section: LAYOUT = [ execution_count_section(), - pair_count_section(), + pair_count_section("opcode"), pre_succ_pairs_section(), specialization_section(), specialization_effectiveness_section(),