diff --git a/Include/internal/pycore_backoff.h b/Include/internal/pycore_backoff.h index 0bcca1e769b..3db3aa3eb77 100644 --- a/Include/internal/pycore_backoff.h +++ b/Include/internal/pycore_backoff.h @@ -122,14 +122,14 @@ initial_jump_backoff_counter(void) * otherwise when a side exit warms up we may construct * a new trace before the Tier 1 code has properly re-specialized. * Backoff sequence 64, 128, 256, 512, 1024, 2048, 4096. */ -#define COLD_EXIT_INITIAL_VALUE 64 -#define COLD_EXIT_INITIAL_BACKOFF 6 +#define SIDE_EXIT_INITIAL_VALUE 64 +#define SIDE_EXIT_INITIAL_BACKOFF 6 static inline _Py_BackoffCounter initial_temperature_backoff_counter(void) { - return make_backoff_counter(COLD_EXIT_INITIAL_VALUE, - COLD_EXIT_INITIAL_BACKOFF); + return make_backoff_counter(SIDE_EXIT_INITIAL_VALUE, + SIDE_EXIT_INITIAL_BACKOFF); } /* Unreachable backoff counter. */ diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 81caaa5abb2..cf2cc7f5b4a 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -535,7 +535,7 @@ write_location_entry_start(uint8_t *ptr, int code, int length) #define ADAPTIVE_COOLDOWN_BACKOFF 0 // Can't assert this in pycore_backoff.h because of header order dependencies -static_assert(COLD_EXIT_INITIAL_VALUE > ADAPTIVE_COOLDOWN_VALUE, +static_assert(SIDE_EXIT_INITIAL_VALUE > ADAPTIVE_COOLDOWN_VALUE, "Cold exit value should be larger than adaptive cooldown value"); static inline _Py_BackoffCounter diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 1007f838b7e..bcbb8b73706 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -41,24 +41,18 @@ typedef struct { * the 32 bits between the oparg and operand are: * UOP_FORMAT_TARGET: * uint32_t target; - * UOP_FORMAT_EXIT - * uint16_t exit_index; - * uint16_t error_target; * UOP_FORMAT_JUMP * uint16_t jump_target; * uint16_t error_target; */ typedef struct { - uint16_t opcode:14; - uint16_t format:2; + uint16_t opcode:15; + uint16_t format:1; uint16_t oparg; union { uint32_t target; struct { - union { - uint16_t exit_index; - uint16_t jump_target; - }; + uint16_t jump_target; uint16_t error_target; }; }; @@ -160,9 +154,7 @@ struct _Py_UopsSymbol { }; #define UOP_FORMAT_TARGET 0 -#define UOP_FORMAT_EXIT 1 -#define UOP_FORMAT_JUMP 2 -#define UOP_FORMAT_UNUSED 3 +#define UOP_FORMAT_JUMP 1 static inline uint32_t uop_get_target(const _PyUOpInstruction *inst) { @@ -170,12 +162,6 @@ static inline uint32_t uop_get_target(const _PyUOpInstruction *inst) return inst->target; } -static inline uint16_t uop_get_exit_index(const _PyUOpInstruction *inst) -{ - assert(inst->format == UOP_FORMAT_EXIT); - return inst->exit_index; -} - static inline uint16_t uop_get_jump_target(const _PyUOpInstruction *inst) { assert(inst->format == UOP_FORMAT_JUMP); diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 85e689c0969..bd1d27b03b3 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -72,12 +72,11 @@ extern "C" { #define _CHECK_STACK_SPACE_OPERAND 337 #define _CHECK_VALIDITY 338 #define _CHECK_VALIDITY_AND_SET_IP 339 -#define _COLD_EXIT 340 -#define _COMPARE_OP 341 -#define _COMPARE_OP_FLOAT 342 -#define _COMPARE_OP_INT 343 -#define _COMPARE_OP_STR 344 -#define _CONTAINS_OP 345 +#define _COMPARE_OP 340 +#define _COMPARE_OP_FLOAT 341 +#define _COMPARE_OP_INT 342 +#define _COMPARE_OP_STR 343 +#define _CONTAINS_OP 344 #define _CONTAINS_OP_DICT CONTAINS_OP_DICT #define _CONTAINS_OP_SET CONTAINS_OP_SET #define _CONVERT_VALUE CONVERT_VALUE @@ -89,53 +88,53 @@ extern "C" { #define _DELETE_GLOBAL DELETE_GLOBAL #define _DELETE_NAME DELETE_NAME #define _DELETE_SUBSCR DELETE_SUBSCR -#define _DEOPT 346 +#define _DEOPT 345 #define _DICT_MERGE DICT_MERGE #define _DICT_UPDATE DICT_UPDATE -#define _DYNAMIC_EXIT 347 +#define _DYNAMIC_EXIT 346 #define _END_SEND END_SEND -#define _ERROR_POP_N 348 +#define _ERROR_POP_N 347 #define _EXIT_INIT_CHECK EXIT_INIT_CHECK -#define _EXPAND_METHOD 349 -#define _FATAL_ERROR 350 +#define _EXPAND_METHOD 348 +#define _FATAL_ERROR 349 #define _FORMAT_SIMPLE FORMAT_SIMPLE #define _FORMAT_WITH_SPEC FORMAT_WITH_SPEC -#define _FOR_ITER 351 -#define _FOR_ITER_GEN_FRAME 352 -#define _FOR_ITER_TIER_TWO 353 +#define _FOR_ITER 350 +#define _FOR_ITER_GEN_FRAME 351 +#define _FOR_ITER_TIER_TWO 352 #define _GET_AITER GET_AITER #define _GET_ANEXT GET_ANEXT #define _GET_AWAITABLE GET_AWAITABLE #define _GET_ITER GET_ITER #define _GET_LEN GET_LEN #define _GET_YIELD_FROM_ITER GET_YIELD_FROM_ITER -#define _GUARD_BOTH_FLOAT 354 -#define _GUARD_BOTH_INT 355 -#define _GUARD_BOTH_UNICODE 356 -#define _GUARD_BUILTINS_VERSION 357 -#define _GUARD_DORV_NO_DICT 358 -#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 359 -#define _GUARD_GLOBALS_VERSION 360 -#define _GUARD_IS_FALSE_POP 361 -#define _GUARD_IS_NONE_POP 362 -#define _GUARD_IS_NOT_NONE_POP 363 -#define _GUARD_IS_TRUE_POP 364 -#define _GUARD_KEYS_VERSION 365 -#define _GUARD_NOS_FLOAT 366 -#define _GUARD_NOS_INT 367 -#define _GUARD_NOT_EXHAUSTED_LIST 368 -#define _GUARD_NOT_EXHAUSTED_RANGE 369 -#define _GUARD_NOT_EXHAUSTED_TUPLE 370 -#define _GUARD_TOS_FLOAT 371 -#define _GUARD_TOS_INT 372 -#define _GUARD_TYPE_VERSION 373 -#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 374 -#define _INIT_CALL_PY_EXACT_ARGS 375 -#define _INIT_CALL_PY_EXACT_ARGS_0 376 -#define _INIT_CALL_PY_EXACT_ARGS_1 377 -#define _INIT_CALL_PY_EXACT_ARGS_2 378 -#define _INIT_CALL_PY_EXACT_ARGS_3 379 -#define _INIT_CALL_PY_EXACT_ARGS_4 380 +#define _GUARD_BOTH_FLOAT 353 +#define _GUARD_BOTH_INT 354 +#define _GUARD_BOTH_UNICODE 355 +#define _GUARD_BUILTINS_VERSION 356 +#define _GUARD_DORV_NO_DICT 357 +#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 358 +#define _GUARD_GLOBALS_VERSION 359 +#define _GUARD_IS_FALSE_POP 360 +#define _GUARD_IS_NONE_POP 361 +#define _GUARD_IS_NOT_NONE_POP 362 +#define _GUARD_IS_TRUE_POP 363 +#define _GUARD_KEYS_VERSION 364 +#define _GUARD_NOS_FLOAT 365 +#define _GUARD_NOS_INT 366 +#define _GUARD_NOT_EXHAUSTED_LIST 367 +#define _GUARD_NOT_EXHAUSTED_RANGE 368 +#define _GUARD_NOT_EXHAUSTED_TUPLE 369 +#define _GUARD_TOS_FLOAT 370 +#define _GUARD_TOS_INT 371 +#define _GUARD_TYPE_VERSION 372 +#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 373 +#define _INIT_CALL_PY_EXACT_ARGS 374 +#define _INIT_CALL_PY_EXACT_ARGS_0 375 +#define _INIT_CALL_PY_EXACT_ARGS_1 376 +#define _INIT_CALL_PY_EXACT_ARGS_2 377 +#define _INIT_CALL_PY_EXACT_ARGS_3 378 +#define _INIT_CALL_PY_EXACT_ARGS_4 379 #define _INSTRUMENTED_CALL INSTRUMENTED_CALL #define _INSTRUMENTED_CALL_FUNCTION_EX INSTRUMENTED_CALL_FUNCTION_EX #define _INSTRUMENTED_CALL_KW INSTRUMENTED_CALL_KW @@ -152,65 +151,65 @@ extern "C" { #define _INSTRUMENTED_RETURN_CONST INSTRUMENTED_RETURN_CONST #define _INSTRUMENTED_RETURN_VALUE INSTRUMENTED_RETURN_VALUE #define _INSTRUMENTED_YIELD_VALUE INSTRUMENTED_YIELD_VALUE -#define _INTERNAL_INCREMENT_OPT_COUNTER 381 -#define _IS_NONE 382 +#define _INTERNAL_INCREMENT_OPT_COUNTER 380 +#define _IS_NONE 381 #define _IS_OP IS_OP -#define _ITER_CHECK_LIST 383 -#define _ITER_CHECK_RANGE 384 -#define _ITER_CHECK_TUPLE 385 -#define _ITER_JUMP_LIST 386 -#define _ITER_JUMP_RANGE 387 -#define _ITER_JUMP_TUPLE 388 -#define _ITER_NEXT_LIST 389 -#define _ITER_NEXT_RANGE 390 -#define _ITER_NEXT_TUPLE 391 -#define _JUMP_TO_TOP 392 +#define _ITER_CHECK_LIST 382 +#define _ITER_CHECK_RANGE 383 +#define _ITER_CHECK_TUPLE 384 +#define _ITER_JUMP_LIST 385 +#define _ITER_JUMP_RANGE 386 +#define _ITER_JUMP_TUPLE 387 +#define _ITER_NEXT_LIST 388 +#define _ITER_NEXT_RANGE 389 +#define _ITER_NEXT_TUPLE 390 +#define _JUMP_TO_TOP 391 #define _LIST_APPEND LIST_APPEND #define _LIST_EXTEND LIST_EXTEND -#define _LOAD_ATTR 393 -#define _LOAD_ATTR_CLASS 394 -#define _LOAD_ATTR_CLASS_0 395 -#define _LOAD_ATTR_CLASS_1 396 +#define _LOAD_ATTR 392 +#define _LOAD_ATTR_CLASS 393 +#define _LOAD_ATTR_CLASS_0 394 +#define _LOAD_ATTR_CLASS_1 395 #define _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN -#define _LOAD_ATTR_INSTANCE_VALUE 397 -#define _LOAD_ATTR_INSTANCE_VALUE_0 398 -#define _LOAD_ATTR_INSTANCE_VALUE_1 399 -#define _LOAD_ATTR_METHOD_LAZY_DICT 400 -#define _LOAD_ATTR_METHOD_NO_DICT 401 -#define _LOAD_ATTR_METHOD_WITH_VALUES 402 -#define _LOAD_ATTR_MODULE 403 -#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 404 -#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 405 +#define _LOAD_ATTR_INSTANCE_VALUE 396 +#define _LOAD_ATTR_INSTANCE_VALUE_0 397 +#define _LOAD_ATTR_INSTANCE_VALUE_1 398 +#define _LOAD_ATTR_METHOD_LAZY_DICT 399 +#define _LOAD_ATTR_METHOD_NO_DICT 400 +#define _LOAD_ATTR_METHOD_WITH_VALUES 401 +#define _LOAD_ATTR_MODULE 402 +#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 403 +#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 404 #define _LOAD_ATTR_PROPERTY LOAD_ATTR_PROPERTY -#define _LOAD_ATTR_SLOT 406 -#define _LOAD_ATTR_SLOT_0 407 -#define _LOAD_ATTR_SLOT_1 408 -#define _LOAD_ATTR_WITH_HINT 409 +#define _LOAD_ATTR_SLOT 405 +#define _LOAD_ATTR_SLOT_0 406 +#define _LOAD_ATTR_SLOT_1 407 +#define _LOAD_ATTR_WITH_HINT 408 #define _LOAD_BUILD_CLASS LOAD_BUILD_CLASS #define _LOAD_COMMON_CONSTANT LOAD_COMMON_CONSTANT #define _LOAD_CONST LOAD_CONST -#define _LOAD_CONST_INLINE 410 -#define _LOAD_CONST_INLINE_BORROW 411 -#define _LOAD_CONST_INLINE_BORROW_WITH_NULL 412 -#define _LOAD_CONST_INLINE_WITH_NULL 413 +#define _LOAD_CONST_INLINE 409 +#define _LOAD_CONST_INLINE_BORROW 410 +#define _LOAD_CONST_INLINE_BORROW_WITH_NULL 411 +#define _LOAD_CONST_INLINE_WITH_NULL 412 #define _LOAD_DEREF LOAD_DEREF -#define _LOAD_FAST 414 -#define _LOAD_FAST_0 415 -#define _LOAD_FAST_1 416 -#define _LOAD_FAST_2 417 -#define _LOAD_FAST_3 418 -#define _LOAD_FAST_4 419 -#define _LOAD_FAST_5 420 -#define _LOAD_FAST_6 421 -#define _LOAD_FAST_7 422 +#define _LOAD_FAST 413 +#define _LOAD_FAST_0 414 +#define _LOAD_FAST_1 415 +#define _LOAD_FAST_2 416 +#define _LOAD_FAST_3 417 +#define _LOAD_FAST_4 418 +#define _LOAD_FAST_5 419 +#define _LOAD_FAST_6 420 +#define _LOAD_FAST_7 421 #define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR #define _LOAD_FAST_CHECK LOAD_FAST_CHECK #define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST #define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF #define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS -#define _LOAD_GLOBAL 423 -#define _LOAD_GLOBAL_BUILTINS 424 -#define _LOAD_GLOBAL_MODULE 425 +#define _LOAD_GLOBAL 422 +#define _LOAD_GLOBAL_BUILTINS 423 +#define _LOAD_GLOBAL_MODULE 424 #define _LOAD_LOCALS LOAD_LOCALS #define _LOAD_NAME LOAD_NAME #define _LOAD_SPECIAL LOAD_SPECIAL @@ -225,51 +224,51 @@ extern "C" { #define _MATCH_SEQUENCE MATCH_SEQUENCE #define _NOP NOP #define _POP_EXCEPT POP_EXCEPT -#define _POP_JUMP_IF_FALSE 426 -#define _POP_JUMP_IF_TRUE 427 +#define _POP_JUMP_IF_FALSE 425 +#define _POP_JUMP_IF_TRUE 426 #define _POP_TOP POP_TOP -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 428 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 427 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 429 +#define _PUSH_FRAME 428 #define _PUSH_NULL PUSH_NULL -#define _PY_FRAME_GENERAL 430 -#define _REPLACE_WITH_TRUE 431 +#define _PY_FRAME_GENERAL 429 +#define _REPLACE_WITH_TRUE 430 #define _RESUME_CHECK RESUME_CHECK #define _RETURN_GENERATOR RETURN_GENERATOR #define _RETURN_VALUE RETURN_VALUE -#define _SAVE_RETURN_OFFSET 432 -#define _SEND 433 +#define _SAVE_RETURN_OFFSET 431 +#define _SEND 432 #define _SEND_GEN SEND_GEN #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _START_EXECUTOR 434 -#define _STORE_ATTR 435 -#define _STORE_ATTR_INSTANCE_VALUE 436 -#define _STORE_ATTR_SLOT 437 -#define _STORE_ATTR_WITH_HINT 438 +#define _START_EXECUTOR 433 +#define _STORE_ATTR 434 +#define _STORE_ATTR_INSTANCE_VALUE 435 +#define _STORE_ATTR_SLOT 436 +#define _STORE_ATTR_WITH_HINT 437 #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 439 -#define _STORE_FAST_0 440 -#define _STORE_FAST_1 441 -#define _STORE_FAST_2 442 -#define _STORE_FAST_3 443 -#define _STORE_FAST_4 444 -#define _STORE_FAST_5 445 -#define _STORE_FAST_6 446 -#define _STORE_FAST_7 447 +#define _STORE_FAST 438 +#define _STORE_FAST_0 439 +#define _STORE_FAST_1 440 +#define _STORE_FAST_2 441 +#define _STORE_FAST_3 442 +#define _STORE_FAST_4 443 +#define _STORE_FAST_5 444 +#define _STORE_FAST_6 445 +#define _STORE_FAST_7 446 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME #define _STORE_SLICE STORE_SLICE -#define _STORE_SUBSCR 448 +#define _STORE_SUBSCR 447 #define _STORE_SUBSCR_DICT STORE_SUBSCR_DICT #define _STORE_SUBSCR_LIST_INT STORE_SUBSCR_LIST_INT #define _SWAP SWAP -#define _TIER2_RESUME_CHECK 449 -#define _TO_BOOL 450 +#define _TIER2_RESUME_CHECK 448 +#define _TO_BOOL 449 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT #define _TO_BOOL_LIST TO_BOOL_LIST @@ -279,13 +278,13 @@ extern "C" { #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 451 +#define _UNPACK_SEQUENCE 450 #define _UNPACK_SEQUENCE_LIST UNPACK_SEQUENCE_LIST #define _UNPACK_SEQUENCE_TUPLE UNPACK_SEQUENCE_TUPLE #define _UNPACK_SEQUENCE_TWO_TUPLE UNPACK_SEQUENCE_TWO_TUPLE #define _WITH_EXCEPT_START WITH_EXCEPT_START #define _YIELD_VALUE YIELD_VALUE -#define MAX_UOP_ID 451 +#define MAX_UOP_ID 450 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 5aef6ba6825..2a2d6e923b7 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -244,7 +244,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_SET_IP] = 0, [_CHECK_STACK_SPACE_OPERAND] = HAS_DEOPT_FLAG, [_SAVE_RETURN_OFFSET] = HAS_ARG_FLAG, - [_EXIT_TRACE] = 0, + [_EXIT_TRACE] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG, [_CHECK_VALIDITY] = HAS_DEOPT_FLAG, [_LOAD_CONST_INLINE] = HAS_PURE_FLAG, [_LOAD_CONST_INLINE_BORROW] = HAS_PURE_FLAG, @@ -253,7 +253,6 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_LOAD_CONST_INLINE_BORROW_WITH_NULL] = HAS_PURE_FLAG, [_CHECK_FUNCTION] = HAS_DEOPT_FLAG, [_INTERNAL_INCREMENT_OPT_COUNTER] = 0, - [_COLD_EXIT] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG, [_DYNAMIC_EXIT] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG, [_START_EXECUTOR] = HAS_DEOPT_FLAG, [_FATAL_ERROR] = 0, @@ -325,7 +324,6 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_CHECK_STACK_SPACE_OPERAND] = "_CHECK_STACK_SPACE_OPERAND", [_CHECK_VALIDITY] = "_CHECK_VALIDITY", [_CHECK_VALIDITY_AND_SET_IP] = "_CHECK_VALIDITY_AND_SET_IP", - [_COLD_EXIT] = "_COLD_EXIT", [_COMPARE_OP] = "_COMPARE_OP", [_COMPARE_OP_FLOAT] = "_COMPARE_OP_FLOAT", [_COMPARE_OP_INT] = "_COMPARE_OP_INT", @@ -984,8 +982,6 @@ int _PyUop_num_popped(int opcode, int oparg) return 0; case _INTERNAL_INCREMENT_OPT_COUNTER: return 1; - case _COLD_EXIT: - return 0; case _DYNAMIC_EXIT: return 0; case _START_EXECUTOR: diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-06-24-08-39-23.gh-issue-116017.-Bw2UY.rst b/Misc/NEWS.d/next/Core and Builtins/2024-06-24-08-39-23.gh-issue-116017.-Bw2UY.rst new file mode 100644 index 00000000000..3ca1b37f701 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-06-24-08-39-23.gh-issue-116017.-Bw2UY.rst @@ -0,0 +1,3 @@ +Simplify the warmup mechanism used for "side exits" in JIT code, resulting +in slightly better performance and slightly lower memory usage for most +platforms. diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 50978a0dc87..343481e9313 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4618,7 +4618,50 @@ dummy_func( } tier2 op(_EXIT_TRACE, (--)) { - EXIT_TO_TRACE(); + _PyExitData *exit = ¤t_executor->exits[oparg]; + PyCodeObject *code = _PyFrame_GetCode(frame); + _Py_CODEUNIT *target = _PyCode_CODE(code) + exit->target; + #if defined(Py_DEBUG) && !defined(_Py_JIT) + OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); + if (lltrace >= 2) { + printf("SIDE EXIT: [UOp "); + _PyUOpPrint(&next_uop[-1]); + printf(", exit %u, temp %d, target %d -> %s]\n", + oparg, exit->temperature.as_counter, + (int)(target - _PyCode_CODE(code)), + _PyOpcode_OpName[target->op.code]); + } + #endif + if (exit->executor == NULL) { + _Py_BackoffCounter temperature = exit->temperature; + if (!backoff_counter_triggers(temperature)) { + exit->temperature = advance_backoff_counter(temperature); + tstate->previous_executor = (PyObject *)current_executor; + GOTO_TIER_ONE(target); + } + _PyExecutorObject *executor; + if (target->op.code == ENTER_EXECUTOR) { + executor = code->co_executors->executors[target->op.arg]; + Py_INCREF(executor); + } + else { + int optimized = _PyOptimizer_Optimize(frame, target, stack_pointer, &executor); + if (optimized <= 0) { + exit->temperature = restart_backoff_counter(temperature); + if (optimized < 0) { + Py_DECREF(current_executor); + tstate->previous_executor = Py_None; + GOTO_UNWIND(); + } + tstate->previous_executor = (PyObject *)current_executor; + GOTO_TIER_ONE(target); + } + } + exit->executor = executor; + } + Py_INCREF(exit->executor); + tstate->previous_executor = (PyObject *)current_executor; + GOTO_TIER_TWO(exit->executor); } tier2 op(_CHECK_VALIDITY, (--)) { @@ -4659,47 +4702,21 @@ dummy_func( exe->count++; } - /* Only used for handling cold side exits, should never appear in - * a normal trace or as part of an instruction. - */ - tier2 op(_COLD_EXIT, (--)) { - _PyExecutorObject *previous = (_PyExecutorObject *)tstate->previous_executor; - _PyExitData *exit = &previous->exits[oparg]; - PyCodeObject *code = _PyFrame_GetCode(frame); - _Py_CODEUNIT *target = _PyCode_CODE(code) + exit->target; - _Py_BackoffCounter temperature = exit->temperature; - if (!backoff_counter_triggers(temperature)) { - exit->temperature = advance_backoff_counter(temperature); - GOTO_TIER_ONE(target); - } - _PyExecutorObject *executor; - if (target->op.code == ENTER_EXECUTOR) { - executor = code->co_executors->executors[target->op.arg]; - Py_INCREF(executor); - } - else { - int optimized = _PyOptimizer_Optimize(frame, target, stack_pointer, &executor); - if (optimized <= 0) { - exit->temperature = restart_backoff_counter(temperature); - if (optimized < 0) { - Py_DECREF(previous); - tstate->previous_executor = Py_None; - GOTO_UNWIND(); - } - GOTO_TIER_ONE(target); - } - } - /* We need two references. One to store in exit->executor and - * one to keep the executor alive when executing. */ - Py_INCREF(executor); - exit->executor = executor; - GOTO_TIER_TWO(executor); - } - tier2 op(_DYNAMIC_EXIT, (--)) { tstate->previous_executor = (PyObject *)current_executor; _PyExitData *exit = (_PyExitData *)¤t_executor->exits[oparg]; _Py_CODEUNIT *target = frame->instr_ptr; + #if defined(Py_DEBUG) && !defined(_Py_JIT) + OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); + if (lltrace >= 2) { + printf("DYNAMIC EXIT: [UOp "); + _PyUOpPrint(&next_uop[-1]); + printf(", exit %u, temp %d, target %d -> %s]\n", + oparg, exit->temperature.as_counter, + (int)(target - _PyCode_CODE(_PyFrame_GetCode(frame))), + _PyOpcode_OpName[target->op.code]); + } + #endif _PyExecutorObject *executor; if (target->op.code == ENTER_EXECUTOR) { PyCodeObject *code = (PyCodeObject *)frame->f_executable; diff --git a/Python/ceval.c b/Python/ceval.c index f4b3a417025..a71244676f3 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1054,13 +1054,13 @@ enter_tier_two: uint64_t trace_uop_execution_counter = 0; #endif - assert(next_uop->opcode == _START_EXECUTOR || next_uop->opcode == _COLD_EXIT); + assert(next_uop->opcode == _START_EXECUTOR); tier2_dispatch: for (;;) { uopcode = next_uop->opcode; #ifdef Py_DEBUG if (lltrace >= 3) { - if (next_uop->opcode == _START_EXECUTOR || next_uop->opcode == _COLD_EXIT) { + if (next_uop->opcode == _START_EXECUTOR) { printf("%4d uop: ", 0); } else { @@ -1148,25 +1148,6 @@ goto_to_tier1: tstate->previous_executor = NULL; DISPATCH(); -exit_to_trace: - assert(next_uop[-1].format == UOP_FORMAT_EXIT); - OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); - uint32_t exit_index = next_uop[-1].exit_index; - assert(exit_index < current_executor->exit_count); - _PyExitData *exit = ¤t_executor->exits[exit_index]; -#ifdef Py_DEBUG - if (lltrace >= 2) { - printf("SIDE EXIT: [UOp "); - _PyUOpPrint(&next_uop[-1]); - printf(", exit %u, temp %d, target %d -> %s]\n", - exit_index, exit->temperature.as_counter, exit->target, - _PyOpcode_OpName[_PyCode_CODE(_PyFrame_GetCode(frame))[exit->target].op.code]); - } -#endif - Py_INCREF(exit->executor); - tstate->previous_executor = (PyObject *)current_executor; - GOTO_TIER_TWO(exit->executor); - #endif // _Py_JIT #endif // _Py_TIER2 diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index f6d055a1dfa..595b72bfaf9 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -426,7 +426,7 @@ do { \ do { \ OPT_STAT_INC(traces_executed); \ next_uop = (EXECUTOR)->trace; \ - assert(next_uop->opcode == _START_EXECUTOR || next_uop->opcode == _COLD_EXIT); \ + assert(next_uop->opcode == _START_EXECUTOR); \ goto enter_tier_two; \ } while (0) #endif @@ -446,7 +446,6 @@ do { \ #define JUMP_TO_JUMP_TARGET() goto jump_to_jump_target #define JUMP_TO_ERROR() goto jump_to_error_target #define GOTO_UNWIND() goto error_tier_two -#define EXIT_TO_TRACE() goto exit_to_trace #define EXIT_TO_TIER1() goto exit_to_tier1 #define EXIT_TO_TIER1_DYNAMIC() goto exit_to_tier1_dynamic; diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 76b7a9b4b15..d70a57a9a8f 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -4824,7 +4824,51 @@ } case _EXIT_TRACE: { - EXIT_TO_TRACE(); + oparg = CURRENT_OPARG(); + _PyExitData *exit = ¤t_executor->exits[oparg]; + PyCodeObject *code = _PyFrame_GetCode(frame); + _Py_CODEUNIT *target = _PyCode_CODE(code) + exit->target; + #if defined(Py_DEBUG) && !defined(_Py_JIT) + OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); + if (lltrace >= 2) { + printf("SIDE EXIT: [UOp "); + _PyUOpPrint(&next_uop[-1]); + printf(", exit %u, temp %d, target %d -> %s]\n", + oparg, exit->temperature.as_counter, + (int)(target - _PyCode_CODE(code)), + _PyOpcode_OpName[target->op.code]); + } + #endif + if (exit->executor == NULL) { + _Py_BackoffCounter temperature = exit->temperature; + if (!backoff_counter_triggers(temperature)) { + exit->temperature = advance_backoff_counter(temperature); + tstate->previous_executor = (PyObject *)current_executor; + GOTO_TIER_ONE(target); + } + _PyExecutorObject *executor; + if (target->op.code == ENTER_EXECUTOR) { + executor = code->co_executors->executors[target->op.arg]; + Py_INCREF(executor); + } + else { + int optimized = _PyOptimizer_Optimize(frame, target, stack_pointer, &executor); + if (optimized <= 0) { + exit->temperature = restart_backoff_counter(temperature); + if (optimized < 0) { + Py_DECREF(current_executor); + tstate->previous_executor = Py_None; + GOTO_UNWIND(); + } + tstate->previous_executor = (PyObject *)current_executor; + GOTO_TIER_ONE(target); + } + } + exit->executor = executor; + } + Py_INCREF(exit->executor); + tstate->previous_executor = (PyObject *)current_executor; + GOTO_TIER_TWO(exit->executor); break; } @@ -4913,47 +4957,22 @@ break; } - case _COLD_EXIT: { - oparg = CURRENT_OPARG(); - _PyExecutorObject *previous = (_PyExecutorObject *)tstate->previous_executor; - _PyExitData *exit = &previous->exits[oparg]; - PyCodeObject *code = _PyFrame_GetCode(frame); - _Py_CODEUNIT *target = _PyCode_CODE(code) + exit->target; - _Py_BackoffCounter temperature = exit->temperature; - if (!backoff_counter_triggers(temperature)) { - exit->temperature = advance_backoff_counter(temperature); - GOTO_TIER_ONE(target); - } - _PyExecutorObject *executor; - if (target->op.code == ENTER_EXECUTOR) { - executor = code->co_executors->executors[target->op.arg]; - Py_INCREF(executor); - } - else { - int optimized = _PyOptimizer_Optimize(frame, target, stack_pointer, &executor); - if (optimized <= 0) { - exit->temperature = restart_backoff_counter(temperature); - if (optimized < 0) { - Py_DECREF(previous); - tstate->previous_executor = Py_None; - GOTO_UNWIND(); - } - GOTO_TIER_ONE(target); - } - } - /* We need two references. One to store in exit->executor and - * one to keep the executor alive when executing. */ - Py_INCREF(executor); - exit->executor = executor; - GOTO_TIER_TWO(executor); - break; - } - case _DYNAMIC_EXIT: { oparg = CURRENT_OPARG(); tstate->previous_executor = (PyObject *)current_executor; _PyExitData *exit = (_PyExitData *)¤t_executor->exits[oparg]; _Py_CODEUNIT *target = frame->instr_ptr; + #if defined(Py_DEBUG) && !defined(_Py_JIT) + OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); + if (lltrace >= 2) { + printf("DYNAMIC EXIT: [UOp "); + _PyUOpPrint(&next_uop[-1]); + printf(", exit %u, temp %d, target %d -> %s]\n", + oparg, exit->temperature.as_counter, + (int)(target - _PyCode_CODE(_PyFrame_GetCode(frame))), + _PyOpcode_OpName[target->op.code]); + } + #endif _PyExecutorObject *executor; if (target->op.code == ENTER_EXECUTOR) { PyCodeObject *code = (PyCodeObject *)frame->f_executable; diff --git a/Python/jit.c b/Python/jit.c index d0c0d24f453..33320761621 100644 --- a/Python/jit.c +++ b/Python/jit.c @@ -439,7 +439,7 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction trace[], siz group->emit(code, data, executor, NULL, instruction_starts); code += group->code_size; data += group->data_size; - assert(trace[0].opcode == _START_EXECUTOR || trace[0].opcode == _COLD_EXIT); + assert(trace[0].opcode == _START_EXECUTOR); for (size_t i = 0; i < length; i++) { const _PyUOpInstruction *instruction = &trace[i]; group = &stencil_groups[instruction->opcode]; diff --git a/Python/optimizer.c b/Python/optimizer.c index f7387dc0b27..561ec4efa4e 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -144,18 +144,6 @@ _Py_GetOptimizer(void) static _PyExecutorObject * make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFilter *dependencies); -static int -init_cold_exit_executor(_PyExecutorObject *executor, int oparg); - -/* It is impossible for the number of exits to reach 1/4 of the total length, - * as the number of exits cannot reach 1/3 of the number of non-exits, due to - * the presence of CHECK_VALIDITY checks and instructions to produce the values - * being checked in exits. */ -#define COLD_EXIT_COUNT (UOP_MAX_TRACE_LENGTH/4) - -static int cold_exits_initialized = 0; -static _PyExecutorObject COLD_EXITS[COLD_EXIT_COUNT] = { 0 }; - static const _PyBloomFilter EMPTY_FILTER = { 0 }; _PyOptimizerObject * @@ -164,14 +152,6 @@ _Py_SetOptimizer(PyInterpreterState *interp, _PyOptimizerObject *optimizer) if (optimizer == NULL) { optimizer = &_PyOptimizer_Default; } - else if (cold_exits_initialized == 0) { - cold_exits_initialized = 1; - for (int i = 0; i < COLD_EXIT_COUNT; i++) { - if (init_cold_exit_executor(&COLD_EXITS[i], i)) { - return NULL; - } - } - } _PyOptimizerObject *old = interp->optimizer; if (old == NULL) { old = &_PyOptimizer_Default; @@ -317,12 +297,6 @@ _PyUOpPrint(const _PyUOpInstruction *uop) uop->jump_target, (uint64_t)uop->operand); break; - case UOP_FORMAT_EXIT: - printf(" (%d, exit_index=%d, operand=%#" PRIx64, - uop->oparg, - uop->exit_index, - (uint64_t)uop->operand); - break; default: printf(" (%d, Unknown format)", uop->oparg); } @@ -1094,7 +1068,7 @@ sanity_check(_PyExecutorObject *executor) } bool ended = false; uint32_t i = 0; - CHECK(executor->trace[0].opcode == _START_EXECUTOR || executor->trace[0].opcode == _COLD_EXIT); + CHECK(executor->trace[0].opcode == _START_EXECUTOR); for (; i < executor->code_size; i++) { const _PyUOpInstruction *inst = &executor->trace[i]; uint16_t opcode = inst->opcode; @@ -1104,22 +1078,15 @@ sanity_check(_PyExecutorObject *executor) case UOP_FORMAT_TARGET: CHECK(target_unused(opcode)); break; - case UOP_FORMAT_EXIT: - CHECK(opcode == _EXIT_TRACE); - CHECK(inst->exit_index < executor->exit_count); - break; case UOP_FORMAT_JUMP: CHECK(inst->jump_target < executor->code_size); break; - case UOP_FORMAT_UNUSED: - CHECK(0); - break; } if (_PyUop_Flags[opcode] & HAS_ERROR_FLAG) { CHECK(inst->format == UOP_FORMAT_JUMP); CHECK(inst->error_target < executor->code_size); } - if (opcode == _JUMP_TO_TOP || opcode == _EXIT_TRACE || opcode == _COLD_EXIT) { + if (opcode == _JUMP_TO_TOP || opcode == _EXIT_TRACE) { ended = true; i++; break; @@ -1133,9 +1100,6 @@ sanity_check(_PyExecutorObject *executor) opcode == _DEOPT || opcode == _EXIT_TRACE || opcode == _ERROR_POP_N); - if (opcode == _EXIT_TRACE) { - CHECK(inst->format == UOP_FORMAT_EXIT); - } } } @@ -1157,9 +1121,8 @@ make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFil } /* Initialize exits */ - assert(exit_count < COLD_EXIT_COUNT); for (int i = 0; i < exit_count; i++) { - executor->exits[i].executor = &COLD_EXITS[i]; + executor->exits[i].executor = NULL; executor->exits[i].temperature = initial_temperature_backoff_counter(); } int next_exit = exit_count-1; @@ -1173,8 +1136,7 @@ make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFil assert(opcode != _POP_JUMP_IF_FALSE && opcode != _POP_JUMP_IF_TRUE); if (opcode == _EXIT_TRACE) { executor->exits[next_exit].target = buffer[i].target; - dest->exit_index = next_exit; - dest->format = UOP_FORMAT_EXIT; + dest->oparg = next_exit; next_exit--; } if (opcode == _DYNAMIC_EXIT) { @@ -1216,36 +1178,6 @@ make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFil return executor; } -static int -init_cold_exit_executor(_PyExecutorObject *executor, int oparg) -{ - _Py_SetImmortalUntracked((PyObject *)executor); - Py_SET_TYPE(executor, &_PyUOpExecutor_Type); - executor->trace = (_PyUOpInstruction *)executor->exits; - executor->code_size = 1; - executor->exit_count = 0; - _PyUOpInstruction *inst = (_PyUOpInstruction *)&executor->trace[0]; - inst->opcode = _COLD_EXIT; - inst->oparg = oparg; - executor->vm_data.valid = true; - executor->vm_data.linked = false; - for (int i = 0; i < _Py_BLOOM_FILTER_WORDS; i++) { - assert(executor->vm_data.bloom.bits[i] == 0); - } -#ifdef Py_DEBUG - sanity_check(executor); -#endif -#ifdef _Py_JIT - executor->jit_code = NULL; - executor->jit_side_entry = NULL; - executor->jit_size = 0; - if (_PyJIT_Compile(executor, executor->trace, 1)) { - return -1; - } -#endif - return 0; -} - #ifdef Py_STATS /* Returns the effective trace length. * Ignores NOPs and trailing exit and error handling.*/ @@ -1258,8 +1190,7 @@ int effective_trace_length(_PyUOpInstruction *buffer, int length) nop_count++; } if (opcode == _EXIT_TRACE || - opcode == _JUMP_TO_TOP || - opcode == _COLD_EXIT) { + opcode == _JUMP_TO_TOP) { return i+1-nop_count; } } @@ -1624,13 +1555,8 @@ executor_clear(_PyExecutorObject *executor) */ Py_INCREF(executor); for (uint32_t i = 0; i < executor->exit_count; i++) { - const _PyExecutorObject *cold = &COLD_EXITS[i]; - const _PyExecutorObject *side = executor->exits[i].executor; executor->exits[i].temperature = initial_unreachable_backoff_counter(); - if (side != cold) { - executor->exits[i].executor = cold; - Py_DECREF(side); - } + Py_CLEAR(executor->exits[i].executor); } _Py_ExecutorDetach(executor); Py_DECREF(executor); diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index a414b04fb6a..978aa911b52 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -2180,10 +2180,6 @@ break; } - case _COLD_EXIT: { - break; - } - case _DYNAMIC_EXIT: { break; } diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv index 466f25daa14..63b640e465a 100644 --- a/Tools/c-analyzer/cpython/ignored.tsv +++ b/Tools/c-analyzer/cpython/ignored.tsv @@ -388,10 +388,8 @@ Python/optimizer.c - _PyUOpExecutor_Type - Python/optimizer.c - _PyUOpOptimizer_Type - Python/optimizer.c - _PyOptimizer_Default - Python/optimizer.c - _ColdExit_Type - -Python/optimizer.c - COLD_EXITS - Python/optimizer.c - Py_FatalErrorExecutor - Python/optimizer.c - EMPTY_FILTER - -Python/optimizer.c - cold_exits_initialized - ##----------------------- ## test code diff --git a/Tools/jit/_stencils.py b/Tools/jit/_stencils.py index 755649dea54..ede5a9930e7 100644 --- a/Tools/jit/_stencils.py +++ b/Tools/jit/_stencils.py @@ -40,8 +40,6 @@ class HoleValue(enum.Enum): JUMP_TARGET = enum.auto() # The base address of the machine code for the error jump target (exposed as _JIT_ERROR_TARGET): ERROR_TARGET = enum.auto() - # The index of the exit to be jumped through (exposed as _JIT_EXIT_INDEX): - EXIT_INDEX = enum.auto() # A hardcoded value of zero (used for symbol lookups): ZERO = enum.auto() @@ -107,7 +105,6 @@ _HOLE_EXPRS = { HoleValue.TARGET: "instruction->target", HoleValue.JUMP_TARGET: "instruction_starts[instruction->jump_target]", HoleValue.ERROR_TARGET: "instruction_starts[instruction->error_target]", - HoleValue.EXIT_INDEX: "instruction->exit_index", HoleValue.ZERO: "", } diff --git a/Tools/jit/template.c b/Tools/jit/template.c index 2bcbf8d615b..ec7d033e89d 100644 --- a/Tools/jit/template.c +++ b/Tools/jit/template.c @@ -103,7 +103,6 @@ _JIT_ENTRY(_PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState uint64_t _operand = ((uint64_t)_operand_hi << 32) | _operand_lo; #endif PATCH_VALUE(uint32_t, _target, _JIT_TARGET) - PATCH_VALUE(uint16_t, _exit_index, _JIT_EXIT_INDEX) OPT_STAT_INC(uops_executed); UOP_STAT_INC(uopcode, execution_count); @@ -126,11 +125,4 @@ exit_to_tier1: exit_to_tier1_dynamic: tstate->previous_executor = (PyObject *)current_executor; GOTO_TIER_ONE(frame->instr_ptr); -exit_to_trace: - { - _PyExitData *exit = ¤t_executor->exits[_exit_index]; - Py_INCREF(exit->executor); - tstate->previous_executor = (PyObject *)current_executor; - GOTO_TIER_TWO(exit->executor); - } }