From 72867c962cc59c6d56805f86530696bea6beb039 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 2 May 2024 16:17:59 +0100 Subject: [PATCH] GH-118095: Unify the behavior of tier 2 FOR_ITER branch micro-ops (GH-118420) * Target _FOR_ITER_TIER_TWO at POP_TOP following the matching END_FOR * Modify _GUARD_NOT_EXHAUSTED_RANGE, _GUARD_NOT_EXHAUSTED_LIST and _GUARD_NOT_EXHAUSTED_TUPLE so that they also target the POP_TOP following the matching END_FOR --- Python/bytecodes.c | 4 +-- Python/executor_cases.c.h | 4 +-- Python/optimizer.c | 50 ++++++++++++++++++++++++++++++-------- Python/optimizer_symbols.c | 13 +++++++--- Python/specialize.c | 6 +++-- 5 files changed, 56 insertions(+), 21 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index b1be40dd38c..9769cfe68aa 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2610,9 +2610,7 @@ dummy_func( _PyErr_Clear(tstate); } /* iterator ended normally */ - Py_DECREF(iter); - STACK_SHRINK(1); - /* The translator sets the deopt target just past END_FOR */ + /* The translator sets the deopt target just past the matching END_FOR */ DEOPT_IF(true); } // Common case: no jump, leave it to the code generator diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index f97f279836e..03db9b623cb 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -2664,9 +2664,7 @@ _PyErr_Clear(tstate); } /* iterator ended normally */ - Py_DECREF(iter); - STACK_SHRINK(1); - /* The translator sets the deopt target just past END_FOR */ + /* The translator sets the deopt target just past the matching END_FOR */ if (true) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); diff --git a/Python/optimizer.c b/Python/optimizer.c index 050947396d1..56768ae8f54 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -23,6 +23,19 @@ #define MAX_EXECUTORS_SIZE 256 +#ifdef Py_DEBUG +static int +base_opcode(PyCodeObject *code, int offset) +{ + int opcode = _Py_GetBaseOpcode(code, offset); + if (opcode == ENTER_EXECUTOR) { + int oparg = _PyCode_CODE(code)[offset].op.arg; + _PyExecutorObject *ex = code->co_executors->executors[oparg]; + return ex->vm_data.opcode; + } + return opcode; +} +#endif static bool has_space_for_executor(PyCodeObject *code, _Py_CODEUNIT *instr) @@ -445,6 +458,14 @@ _PyUOp_Replacements[MAX_UOP_ID + 1] = { [_FOR_ITER] = _FOR_ITER_TIER_TWO, }; +static const uint8_t +is_for_iter_test[MAX_UOP_ID + 1] = { + [_GUARD_NOT_EXHAUSTED_RANGE] = 1, + [_GUARD_NOT_EXHAUSTED_LIST] = 1, + [_GUARD_NOT_EXHAUSTED_TUPLE] = 1, + [_FOR_ITER_TIER_TWO] = 1, +}; + static const uint16_t BRANCH_TO_GUARD[4][2] = { [POP_JUMP_IF_FALSE - POP_JUMP_IF_FALSE][0] = _GUARD_IS_TRUE_POP, @@ -594,7 +615,6 @@ top: // Jump here after _PUSH_FRAME or likely branches uint32_t opcode = instr->op.code; uint32_t oparg = instr->op.arg; - uint32_t extended = 0; DPRINTF(2, "%d: %s(%d)\n", target, _PyOpcode_OpName[opcode], oparg); @@ -608,7 +628,6 @@ top: // Jump here after _PUSH_FRAME or likely branches if (opcode == EXTENDED_ARG) { instr++; - extended = 1; opcode = instr->op.code; oparg = (oparg << 8) | instr->op.arg; if (opcode == EXTENDED_ARG) { @@ -772,12 +791,15 @@ top: // Jump here after _PUSH_FRAME or likely branches case OPARG_REPLACED: uop = _PyUOp_Replacements[uop]; assert(uop != 0); - if (uop == _FOR_ITER_TIER_TWO) { - target += 1 + INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 2 + extended; - assert(_PyCode_CODE(code)[target-2].op.code == END_FOR || - _PyCode_CODE(code)[target-2].op.code == INSTRUMENTED_END_FOR); - assert(_PyCode_CODE(code)[target-1].op.code == POP_TOP); +#ifdef Py_DEBUG + { + uint32_t next_inst = target + 1 + INLINE_CACHE_ENTRIES_FOR_ITER + (oparg > 255); + uint32_t jump_target = next_inst + oparg; + assert(base_opcode(code, jump_target) == END_FOR || + base_opcode(code, jump_target) == INSTRUMENTED_END_FOR); + assert(base_opcode(code, jump_target+1) == POP_TOP); } +#endif break; default: fprintf(stderr, @@ -1000,10 +1022,18 @@ prepare_for_execution(_PyUOpInstruction *buffer, int length) if (_PyUop_Flags[opcode] & (HAS_EXIT_FLAG | HAS_DEOPT_FLAG)) { uint16_t exit_op = (_PyUop_Flags[opcode] & HAS_EXIT_FLAG) ? _SIDE_EXIT : _DEOPT; - if (target != current_jump_target || current_exit_op != exit_op) { - make_exit(&buffer[next_spare], exit_op, target); + int32_t jump_target = target; + if (is_for_iter_test[opcode]) { + /* Target the POP_TOP immediately after the END_FOR, + * leaving only the iterator on the stack. */ + int extended_arg = inst->oparg > 255; + int32_t next_inst = target + 1 + INLINE_CACHE_ENTRIES_FOR_ITER + extended_arg; + jump_target = next_inst + inst->oparg + 1; + } + if (jump_target != current_jump_target || current_exit_op != exit_op) { + make_exit(&buffer[next_spare], exit_op, jump_target); current_exit_op = exit_op; - current_jump_target = target; + current_jump_target = jump_target; current_jump = next_spare; next_spare++; } diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index d52f490853c..4aeb04fe040 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -164,19 +164,26 @@ _Py_uop_sym_set_const(_Py_UopsSymbol *sym, PyObject *const_val) return true; } - bool _Py_uop_sym_set_null(_Py_UopsSymbol *sym) { + if (_Py_uop_sym_is_not_null(sym)) { + sym_set_bottom(sym); + return false; + } sym_set_flag(sym, IS_NULL); - return !_Py_uop_sym_is_bottom(sym); + return true; } bool _Py_uop_sym_set_non_null(_Py_UopsSymbol *sym) { + if (_Py_uop_sym_is_null(sym)) { + sym_set_bottom(sym); + return false; + } sym_set_flag(sym, NOT_NULL); - return !_Py_uop_sym_is_bottom(sym); + return true; } diff --git a/Python/specialize.c b/Python/specialize.c index ee517813721..72114f27f69 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -215,6 +215,7 @@ print_gc_stats(FILE *out, GCStats *stats) } } +#ifdef _Py_TIER2 static void print_histogram(FILE *out, const char *name, uint64_t hist[_Py_UOP_HIST_SIZE]) { @@ -249,7 +250,6 @@ print_optimization_stats(FILE *out, OptimizationStats *stats) stats->optimizer_failure_reason_no_memory); fprintf(out, "Optimizer remove globals builtins changed: %" PRIu64 "\n", stats->remove_globals_builtins_changed); fprintf(out, "Optimizer remove globals incorrect keys: %" PRIu64 "\n", stats->remove_globals_incorrect_keys); - for (int i = 0; i <= MAX_UOP_ID; i++) { if (stats->opcode[i].execution_count) { fprintf(out, "uops[%s].execution_count : %" PRIu64 "\n", _PyUOpName(i), stats->opcode[i].execution_count); @@ -258,7 +258,6 @@ print_optimization_stats(FILE *out, OptimizationStats *stats) fprintf(out, "uops[%s].specialization.miss : %" PRIu64 "\n", _PyUOpName(i), stats->opcode[i].miss); } } - for (int i = 0; i < 256; i++) { if (stats->unsupported_opcode[i]) { fprintf( @@ -289,6 +288,7 @@ print_optimization_stats(FILE *out, OptimizationStats *stats) } } } +#endif static void print_rare_event_stats(FILE *out, RareEventStats *stats) @@ -309,7 +309,9 @@ print_stats(FILE *out, PyStats *stats) print_call_stats(out, &stats->call_stats); print_object_stats(out, &stats->object_stats); print_gc_stats(out, stats->gc_stats); +#ifdef _Py_TIER2 print_optimization_stats(out, &stats->optimization_stats); +#endif print_rare_event_stats(out, &stats->rare_event_stats); }