diff --git a/Python/ceval.c b/Python/ceval.c index b4029d1081d..e6b5d3ae242 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -5403,6 +5403,7 @@ handle_eval_breaker: #define MISS_WITH_CACHE(opname) \ opname ## _miss: \ { \ + STAT_INC(opcode, miss); \ STAT_INC(opname, miss); \ _PyAdaptiveEntry *cache = &GET_CACHE()->adaptive; \ cache->counter--; \ diff --git a/Python/specialize.c b/Python/specialize.c index 4070d6a6a0b..b7ef478ee55 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -38,6 +38,33 @@ */ +/* Map from opcode to adaptive opcode. + Values of zero are ignored. */ +static uint8_t adaptive_opcodes[256] = { + [LOAD_ATTR] = LOAD_ATTR_ADAPTIVE, + [LOAD_GLOBAL] = LOAD_GLOBAL_ADAPTIVE, + [LOAD_METHOD] = LOAD_METHOD_ADAPTIVE, + [BINARY_SUBSCR] = BINARY_SUBSCR_ADAPTIVE, + [STORE_SUBSCR] = STORE_SUBSCR_ADAPTIVE, + [CALL] = CALL_ADAPTIVE, + [STORE_ATTR] = STORE_ATTR_ADAPTIVE, + [BINARY_OP] = BINARY_OP_ADAPTIVE, + [COMPARE_OP] = COMPARE_OP_ADAPTIVE, +}; + +/* The number of cache entries required for a "family" of instructions. */ +static uint8_t cache_requirements[256] = { + [LOAD_ATTR] = 2, /* _PyAdaptiveEntry and _PyAttrCache */ + [LOAD_GLOBAL] = 2, /* _PyAdaptiveEntry and _PyLoadGlobalCache */ + [LOAD_METHOD] = 3, /* _PyAdaptiveEntry, _PyAttrCache and _PyObjectCache */ + [BINARY_SUBSCR] = 2, /* _PyAdaptiveEntry, _PyObjectCache */ + [STORE_SUBSCR] = 0, + [CALL] = 2, /* _PyAdaptiveEntry and _PyObjectCache/_PyCallCache */ + [STORE_ATTR] = 2, /* _PyAdaptiveEntry and _PyAttrCache */ + [BINARY_OP] = 1, // _PyAdaptiveEntry + [COMPARE_OP] = 1, /* _PyAdaptiveEntry */ +}; + Py_ssize_t _Py_QuickenedCount = 0; #ifdef Py_STATS PyStats _py_stats = { 0 }; @@ -144,7 +171,14 @@ _Py_GetSpecializationStats(void) { static void print_spec_stats(FILE *out, OpcodeStats *stats) { + /* Mark some opcodes as specializable for stats, + * even though we don't specialize them yet. */ + fprintf(out, " opcode[%d].specializable : 1\n", FOR_ITER); + fprintf(out, " opcode[%d].specializable : 1\n", UNPACK_SEQUENCE); for (int i = 0; i < 256; i++) { + if (adaptive_opcodes[i]) { + fprintf(out, " opcode[%d].specializable : 1\n", i); + } PRINT_STAT(i, specialization.success); PRINT_STAT(i, specialization.failure); PRINT_STAT(i, specialization.hit); @@ -266,33 +300,6 @@ get_cache_count(SpecializedCacheOrInstruction *quickened) { return quickened[0].entry.zero.cache_count; } -/* Map from opcode to adaptive opcode. - Values of zero are ignored. */ -static uint8_t adaptive_opcodes[256] = { - [LOAD_ATTR] = LOAD_ATTR_ADAPTIVE, - [LOAD_GLOBAL] = LOAD_GLOBAL_ADAPTIVE, - [LOAD_METHOD] = LOAD_METHOD_ADAPTIVE, - [BINARY_SUBSCR] = BINARY_SUBSCR_ADAPTIVE, - [STORE_SUBSCR] = STORE_SUBSCR_ADAPTIVE, - [CALL] = CALL_ADAPTIVE, - [STORE_ATTR] = STORE_ATTR_ADAPTIVE, - [BINARY_OP] = BINARY_OP_ADAPTIVE, - [COMPARE_OP] = COMPARE_OP_ADAPTIVE, -}; - -/* The number of cache entries required for a "family" of instructions. */ -static uint8_t cache_requirements[256] = { - [LOAD_ATTR] = 2, /* _PyAdaptiveEntry and _PyAttrCache */ - [LOAD_GLOBAL] = 2, /* _PyAdaptiveEntry and _PyLoadGlobalCache */ - [LOAD_METHOD] = 3, /* _PyAdaptiveEntry, _PyAttrCache and _PyObjectCache */ - [BINARY_SUBSCR] = 2, /* _PyAdaptiveEntry, _PyObjectCache */ - [STORE_SUBSCR] = 0, - [CALL] = 2, /* _PyAdaptiveEntry and _PyObjectCache/_PyCallCache */ - [STORE_ATTR] = 2, /* _PyAdaptiveEntry and _PyAttrCache */ - [BINARY_OP] = 1, // _PyAdaptiveEntry - [COMPARE_OP] = 1, /* _PyAdaptiveEntry */ -}; - /* Return the oparg for the cache_offset and instruction index. * * If no cache is needed then return the original oparg. diff --git a/Tools/scripts/summarize_stats.py b/Tools/scripts/summarize_stats.py index f67a35a04a9..6d0020739a3 100644 --- a/Tools/scripts/summarize_stats.py +++ b/Tools/scripts/summarize_stats.py @@ -22,11 +22,10 @@ for name in opcode.opname[1:]: pass opname.append(name) - TOTAL = "specialization.deferred", "specialization.hit", "specialization.miss", "execution_count" def print_specialization_stats(name, family_stats): - if "specialization.failure" not in family_stats: + if "specializable" not in family_stats: return total = sum(family_stats.get(kind, 0) for kind in TOTAL) if total == 0: @@ -87,13 +86,18 @@ def main(): for i, opcode_stat in enumerate(opcode_stats): if "execution_count" in opcode_stat: count = opcode_stat['execution_count'] - counts.append((count, opname[i])) + miss = 0 + if "specializable" not in opcode_stat: + miss = opcode_stat.get("specialization.miss") + counts.append((count, opname[i], miss)) total += count counts.sort(reverse=True) cummulative = 0 - for (count, name) in counts: + for (count, name, miss) in counts: cummulative += count print(f"{name}: {count} {100*count/total:0.1f}% {100*cummulative/total:0.1f}%") + if miss: + print(f" Misses: {miss} {100*miss/count:0.1f}%") print("Specialization stats:") for i, opcode_stat in enumerate(opcode_stats): name = opname[i]