Add miss stats for specialized instructions. (GH-31108)

This commit is contained in:
Mark Shannon 2022-02-04 09:56:46 +00:00 committed by GitHub
parent ba650af7d6
commit 832876b992
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 43 additions and 31 deletions

View File

@ -5403,6 +5403,7 @@ handle_eval_breaker:
#define MISS_WITH_CACHE(opname) \ #define MISS_WITH_CACHE(opname) \
opname ## _miss: \ opname ## _miss: \
{ \ { \
STAT_INC(opcode, miss); \
STAT_INC(opname, miss); \ STAT_INC(opname, miss); \
_PyAdaptiveEntry *cache = &GET_CACHE()->adaptive; \ _PyAdaptiveEntry *cache = &GET_CACHE()->adaptive; \
cache->counter--; \ cache->counter--; \

View File

@ -38,6 +38,33 @@
<instr N-1> <instr N-1>
*/ */
/* Map from opcode to adaptive opcode.
Values of zero are ignored. */
static uint8_t adaptive_opcodes[256] = {
[LOAD_ATTR] = LOAD_ATTR_ADAPTIVE,
[LOAD_GLOBAL] = LOAD_GLOBAL_ADAPTIVE,
[LOAD_METHOD] = LOAD_METHOD_ADAPTIVE,
[BINARY_SUBSCR] = BINARY_SUBSCR_ADAPTIVE,
[STORE_SUBSCR] = STORE_SUBSCR_ADAPTIVE,
[CALL] = CALL_ADAPTIVE,
[STORE_ATTR] = STORE_ATTR_ADAPTIVE,
[BINARY_OP] = BINARY_OP_ADAPTIVE,
[COMPARE_OP] = COMPARE_OP_ADAPTIVE,
};
/* The number of cache entries required for a "family" of instructions. */
static uint8_t cache_requirements[256] = {
[LOAD_ATTR] = 2, /* _PyAdaptiveEntry and _PyAttrCache */
[LOAD_GLOBAL] = 2, /* _PyAdaptiveEntry and _PyLoadGlobalCache */
[LOAD_METHOD] = 3, /* _PyAdaptiveEntry, _PyAttrCache and _PyObjectCache */
[BINARY_SUBSCR] = 2, /* _PyAdaptiveEntry, _PyObjectCache */
[STORE_SUBSCR] = 0,
[CALL] = 2, /* _PyAdaptiveEntry and _PyObjectCache/_PyCallCache */
[STORE_ATTR] = 2, /* _PyAdaptiveEntry and _PyAttrCache */
[BINARY_OP] = 1, // _PyAdaptiveEntry
[COMPARE_OP] = 1, /* _PyAdaptiveEntry */
};
Py_ssize_t _Py_QuickenedCount = 0; Py_ssize_t _Py_QuickenedCount = 0;
#ifdef Py_STATS #ifdef Py_STATS
PyStats _py_stats = { 0 }; PyStats _py_stats = { 0 };
@ -144,7 +171,14 @@ _Py_GetSpecializationStats(void) {
static void static void
print_spec_stats(FILE *out, OpcodeStats *stats) print_spec_stats(FILE *out, OpcodeStats *stats)
{ {
/* Mark some opcodes as specializable for stats,
* even though we don't specialize them yet. */
fprintf(out, " opcode[%d].specializable : 1\n", FOR_ITER);
fprintf(out, " opcode[%d].specializable : 1\n", UNPACK_SEQUENCE);
for (int i = 0; i < 256; i++) { for (int i = 0; i < 256; i++) {
if (adaptive_opcodes[i]) {
fprintf(out, " opcode[%d].specializable : 1\n", i);
}
PRINT_STAT(i, specialization.success); PRINT_STAT(i, specialization.success);
PRINT_STAT(i, specialization.failure); PRINT_STAT(i, specialization.failure);
PRINT_STAT(i, specialization.hit); PRINT_STAT(i, specialization.hit);
@ -266,33 +300,6 @@ get_cache_count(SpecializedCacheOrInstruction *quickened) {
return quickened[0].entry.zero.cache_count; return quickened[0].entry.zero.cache_count;
} }
/* Map from opcode to adaptive opcode.
Values of zero are ignored. */
static uint8_t adaptive_opcodes[256] = {
[LOAD_ATTR] = LOAD_ATTR_ADAPTIVE,
[LOAD_GLOBAL] = LOAD_GLOBAL_ADAPTIVE,
[LOAD_METHOD] = LOAD_METHOD_ADAPTIVE,
[BINARY_SUBSCR] = BINARY_SUBSCR_ADAPTIVE,
[STORE_SUBSCR] = STORE_SUBSCR_ADAPTIVE,
[CALL] = CALL_ADAPTIVE,
[STORE_ATTR] = STORE_ATTR_ADAPTIVE,
[BINARY_OP] = BINARY_OP_ADAPTIVE,
[COMPARE_OP] = COMPARE_OP_ADAPTIVE,
};
/* The number of cache entries required for a "family" of instructions. */
static uint8_t cache_requirements[256] = {
[LOAD_ATTR] = 2, /* _PyAdaptiveEntry and _PyAttrCache */
[LOAD_GLOBAL] = 2, /* _PyAdaptiveEntry and _PyLoadGlobalCache */
[LOAD_METHOD] = 3, /* _PyAdaptiveEntry, _PyAttrCache and _PyObjectCache */
[BINARY_SUBSCR] = 2, /* _PyAdaptiveEntry, _PyObjectCache */
[STORE_SUBSCR] = 0,
[CALL] = 2, /* _PyAdaptiveEntry and _PyObjectCache/_PyCallCache */
[STORE_ATTR] = 2, /* _PyAdaptiveEntry and _PyAttrCache */
[BINARY_OP] = 1, // _PyAdaptiveEntry
[COMPARE_OP] = 1, /* _PyAdaptiveEntry */
};
/* Return the oparg for the cache_offset and instruction index. /* Return the oparg for the cache_offset and instruction index.
* *
* If no cache is needed then return the original oparg. * If no cache is needed then return the original oparg.

View File

@ -22,11 +22,10 @@ for name in opcode.opname[1:]:
pass pass
opname.append(name) opname.append(name)
TOTAL = "specialization.deferred", "specialization.hit", "specialization.miss", "execution_count" TOTAL = "specialization.deferred", "specialization.hit", "specialization.miss", "execution_count"
def print_specialization_stats(name, family_stats): def print_specialization_stats(name, family_stats):
if "specialization.failure" not in family_stats: if "specializable" not in family_stats:
return return
total = sum(family_stats.get(kind, 0) for kind in TOTAL) total = sum(family_stats.get(kind, 0) for kind in TOTAL)
if total == 0: if total == 0:
@ -87,13 +86,18 @@ def main():
for i, opcode_stat in enumerate(opcode_stats): for i, opcode_stat in enumerate(opcode_stats):
if "execution_count" in opcode_stat: if "execution_count" in opcode_stat:
count = opcode_stat['execution_count'] count = opcode_stat['execution_count']
counts.append((count, opname[i])) miss = 0
if "specializable" not in opcode_stat:
miss = opcode_stat.get("specialization.miss")
counts.append((count, opname[i], miss))
total += count total += count
counts.sort(reverse=True) counts.sort(reverse=True)
cummulative = 0 cummulative = 0
for (count, name) in counts: for (count, name, miss) in counts:
cummulative += count cummulative += count
print(f"{name}: {count} {100*count/total:0.1f}% {100*cummulative/total:0.1f}%") print(f"{name}: {count} {100*count/total:0.1f}% {100*cummulative/total:0.1f}%")
if miss:
print(f" Misses: {miss} {100*miss/count:0.1f}%")
print("Specialization stats:") print("Specialization stats:")
for i, opcode_stat in enumerate(opcode_stats): for i, opcode_stat in enumerate(opcode_stats):
name = opname[i] name = opname[i]