From ddf66b54edea1ea59fdf8a496ed0b64e16424375 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 24 Aug 2023 17:36:00 -0700 Subject: [PATCH] gh-106581: Split CALL_BOUND_METHOD_EXACT_ARGS into uops (#108462) Instead of using `GO_TO_INSTRUCTION(CALL_PY_EXACT_ARGS)` we just add the macro elements of the latter to the macro for the former. This requires lengthening the uops array in struct opcode_macro_expansion. (It also required changes to stacking.py that were merged already.) --- Include/internal/pycore_opcode_metadata.h | 41 ++++++---- Python/abstract_interp_cases.c.h | 10 +++ Python/bytecodes.c | 29 ++++--- Python/executor_cases.c.h | 26 ++++++ Python/generated_cases.c.h | 98 ++++++++++++++++++++--- Tools/cases_generator/generate_cases.py | 2 +- 6 files changed, 171 insertions(+), 35 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index cc8894ad539..bf7e9dcbffa 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -57,16 +57,18 @@ #define _ITER_CHECK_RANGE 329 #define _IS_ITER_EXHAUSTED_RANGE 330 #define _ITER_NEXT_RANGE 331 -#define _CHECK_PEP_523 332 -#define _CHECK_FUNCTION_EXACT_ARGS 333 -#define _CHECK_STACK_SPACE 334 -#define _INIT_CALL_PY_EXACT_ARGS 335 -#define _PUSH_FRAME 336 -#define _POP_JUMP_IF_FALSE 337 -#define _POP_JUMP_IF_TRUE 338 -#define JUMP_TO_TOP 339 -#define SAVE_CURRENT_IP 340 -#define INSERT 341 +#define _CHECK_CALL_BOUND_METHOD_EXACT_ARGS 332 +#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 333 +#define _CHECK_PEP_523 334 +#define _CHECK_FUNCTION_EXACT_ARGS 335 +#define _CHECK_STACK_SPACE 336 +#define _INIT_CALL_PY_EXACT_ARGS 337 +#define _PUSH_FRAME 338 +#define _POP_JUMP_IF_FALSE 339 +#define _POP_JUMP_IF_TRUE 340 +#define JUMP_TO_TOP 341 +#define SAVE_CURRENT_IP 342 +#define INSERT 343 extern int _PyOpcode_num_popped(int opcode, int oparg, bool jump); #ifdef NEED_OPCODE_METADATA @@ -484,7 +486,9 @@ int _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return 0; case CALL: return oparg + 2; - case CALL_BOUND_METHOD_EXACT_ARGS: + case _CHECK_CALL_BOUND_METHOD_EXACT_ARGS: + return oparg + 2; + case _INIT_CALL_BOUND_METHOD_EXACT_ARGS: return oparg + 2; case _CHECK_PEP_523: return 0; @@ -496,6 +500,8 @@ int _PyOpcode_num_popped(int opcode, int oparg, bool jump) { return oparg + 2; case _PUSH_FRAME: return 1; + case CALL_BOUND_METHOD_EXACT_ARGS: + return oparg + 2; case CALL_PY_EXACT_ARGS: return oparg + 2; case CALL_PY_WITH_DEFAULTS: @@ -1012,8 +1018,10 @@ int _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 0; case CALL: return 1; - case CALL_BOUND_METHOD_EXACT_ARGS: - return 1; + case _CHECK_CALL_BOUND_METHOD_EXACT_ARGS: + return oparg + 2; + case _INIT_CALL_BOUND_METHOD_EXACT_ARGS: + return oparg + 2; case _CHECK_PEP_523: return 0; case _CHECK_FUNCTION_EXACT_ARGS: @@ -1024,6 +1032,8 @@ int _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { return 1; case _PUSH_FRAME: return 1; + case CALL_BOUND_METHOD_EXACT_ARGS: + return 1; case CALL_PY_EXACT_ARGS: return 1; case CALL_PY_WITH_DEFAULTS: @@ -1163,7 +1173,7 @@ struct opcode_metadata { struct opcode_macro_expansion { int nuops; - struct { int16_t uop; int8_t size; int8_t offset; } uops[8]; + struct { int16_t uop; int8_t size; int8_t offset; } uops[12]; }; #define OPARG_FULL 0 @@ -1518,6 +1528,7 @@ const struct opcode_macro_expansion _PyOpcode_macro_expansion[OPCODE_MACRO_EXPAN [GET_YIELD_FROM_ITER] = { .nuops = 1, .uops = { { GET_YIELD_FROM_ITER, 0, 0 } } }, [WITH_EXCEPT_START] = { .nuops = 1, .uops = { { WITH_EXCEPT_START, 0, 0 } } }, [PUSH_EXC_INFO] = { .nuops = 1, .uops = { { PUSH_EXC_INFO, 0, 0 } } }, + [CALL_BOUND_METHOD_EXACT_ARGS] = { .nuops = 9, .uops = { { _CHECK_PEP_523, 0, 0 }, { _CHECK_CALL_BOUND_METHOD_EXACT_ARGS, 0, 0 }, { _INIT_CALL_BOUND_METHOD_EXACT_ARGS, 0, 0 }, { _CHECK_FUNCTION_EXACT_ARGS, 2, 1 }, { _CHECK_STACK_SPACE, 0, 0 }, { _INIT_CALL_PY_EXACT_ARGS, 0, 0 }, { SAVE_IP, 7, 3 }, { SAVE_CURRENT_IP, 0, 0 }, { _PUSH_FRAME, 0, 0 } } }, [CALL_PY_EXACT_ARGS] = { .nuops = 7, .uops = { { _CHECK_PEP_523, 0, 0 }, { _CHECK_FUNCTION_EXACT_ARGS, 2, 1 }, { _CHECK_STACK_SPACE, 0, 0 }, { _INIT_CALL_PY_EXACT_ARGS, 0, 0 }, { SAVE_IP, 7, 3 }, { SAVE_CURRENT_IP, 0, 0 }, { _PUSH_FRAME, 0, 0 } } }, [CALL_NO_KW_TYPE_1] = { .nuops = 1, .uops = { { CALL_NO_KW_TYPE_1, 0, 0 } } }, [CALL_NO_KW_STR_1] = { .nuops = 1, .uops = { { CALL_NO_KW_STR_1, 0, 0 } } }, @@ -1577,6 +1588,8 @@ const char * const _PyOpcode_uop_name[OPCODE_UOP_NAME_SIZE] = { [_ITER_CHECK_RANGE] = "_ITER_CHECK_RANGE", [_IS_ITER_EXHAUSTED_RANGE] = "_IS_ITER_EXHAUSTED_RANGE", [_ITER_NEXT_RANGE] = "_ITER_NEXT_RANGE", + [_CHECK_CALL_BOUND_METHOD_EXACT_ARGS] = "_CHECK_CALL_BOUND_METHOD_EXACT_ARGS", + [_INIT_CALL_BOUND_METHOD_EXACT_ARGS] = "_INIT_CALL_BOUND_METHOD_EXACT_ARGS", [_CHECK_PEP_523] = "_CHECK_PEP_523", [_CHECK_FUNCTION_EXACT_ARGS] = "_CHECK_FUNCTION_EXACT_ARGS", [_CHECK_STACK_SPACE] = "_CHECK_STACK_SPACE", diff --git a/Python/abstract_interp_cases.c.h b/Python/abstract_interp_cases.c.h index 1b99b929fa8..07e8a711575 100644 --- a/Python/abstract_interp_cases.c.h +++ b/Python/abstract_interp_cases.c.h @@ -621,6 +621,16 @@ break; } + case _CHECK_CALL_BOUND_METHOD_EXACT_ARGS: { + break; + } + + case _INIT_CALL_BOUND_METHOD_EXACT_ARGS: { + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-2 - oparg)), true); + PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1 - oparg)), true); + break; + } + case _CHECK_PEP_523: { break; } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index ae459cabadd..a5cb117c763 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2940,19 +2940,18 @@ dummy_func( CHECK_EVAL_BREAKER(); } - // Start out with [NULL, bound_method, arg1, arg2, ...] - // Transform to [callable, self, arg1, arg2, ...] - // Then fall through to CALL_PY_EXACT_ARGS - inst(CALL_BOUND_METHOD_EXACT_ARGS, (unused/1, unused/2, callable, null, unused[oparg] -- unused)) { + op(_CHECK_CALL_BOUND_METHOD_EXACT_ARGS, (callable, null, unused[oparg] -- callable, null, unused[oparg])) { DEOPT_IF(null != NULL, CALL); DEOPT_IF(Py_TYPE(callable) != &PyMethod_Type, CALL); + } + + op(_INIT_CALL_BOUND_METHOD_EXACT_ARGS, (callable, unused, unused[oparg] -- func, self, unused[oparg])) { STAT_INC(CALL, hit); - PyObject *self = ((PyMethodObject *)callable)->im_self; - PEEK(oparg + 1) = Py_NewRef(self); // self_or_null - PyObject *meth = ((PyMethodObject *)callable)->im_func; - PEEK(oparg + 2) = Py_NewRef(meth); // callable + self = Py_NewRef(((PyMethodObject *)callable)->im_self); + stack_pointer[-1 - oparg] = self; // Patch stack as it is used by _INIT_CALL_PY_EXACT_ARGS + func = Py_NewRef(((PyMethodObject *)callable)->im_func); + stack_pointer[-2 - oparg] = func; // This is used by CALL, upon deoptimization Py_DECREF(callable); - GO_TO_INSTRUCTION(CALL_PY_EXACT_ARGS); } op(_CHECK_PEP_523, (--)) { @@ -3010,6 +3009,18 @@ dummy_func( #endif } + macro(CALL_BOUND_METHOD_EXACT_ARGS) = + unused/1 + // Skip over the counter + _CHECK_PEP_523 + + _CHECK_CALL_BOUND_METHOD_EXACT_ARGS + + _INIT_CALL_BOUND_METHOD_EXACT_ARGS + + _CHECK_FUNCTION_EXACT_ARGS + + _CHECK_STACK_SPACE + + _INIT_CALL_PY_EXACT_ARGS + + SAVE_IP + // Tier 2 only; special-cased oparg + SAVE_CURRENT_IP + // Sets frame->prev_instr + _PUSH_FRAME; + macro(CALL_PY_EXACT_ARGS) = unused/1 + // Skip over the counter _CHECK_PEP_523 + diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 89a5bbfecde..85c60c276e0 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -2190,6 +2190,32 @@ break; } + case _CHECK_CALL_BOUND_METHOD_EXACT_ARGS: { + PyObject *null; + PyObject *callable; + null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + DEOPT_IF(null != NULL, CALL); + DEOPT_IF(Py_TYPE(callable) != &PyMethod_Type, CALL); + break; + } + + case _INIT_CALL_BOUND_METHOD_EXACT_ARGS: { + PyObject *callable; + PyObject *func; + PyObject *self; + callable = stack_pointer[-2 - oparg]; + STAT_INC(CALL, hit); + self = Py_NewRef(((PyMethodObject *)callable)->im_self); + stack_pointer[-1 - oparg] = self; // Patch stack as it is used by _INIT_CALL_PY_EXACT_ARGS + func = Py_NewRef(((PyMethodObject *)callable)->im_func); + stack_pointer[-2 - oparg] = func; // This is used by CALL, upon deoptimization + Py_DECREF(callable); + stack_pointer[-2 - oparg] = func; + stack_pointer[-1 - oparg] = self; + break; + } + case _CHECK_PEP_523: { DEOPT_IF(tstate->interp->eval_frame, CALL); break; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 1724d112317..4aa16f8311a 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -3795,23 +3795,99 @@ TARGET(CALL_BOUND_METHOD_EXACT_ARGS) { PyObject *null; PyObject *callable; + PyObject *self; + PyObject *self_or_null; + PyObject *func; + PyObject **args; + _PyInterpreterFrame *new_frame; + // _CHECK_PEP_523 + { + DEOPT_IF(tstate->interp->eval_frame, CALL); + } + // _CHECK_CALL_BOUND_METHOD_EXACT_ARGS null = stack_pointer[-1 - oparg]; callable = stack_pointer[-2 - oparg]; - DEOPT_IF(null != NULL, CALL); - DEOPT_IF(Py_TYPE(callable) != &PyMethod_Type, CALL); - STAT_INC(CALL, hit); - PyObject *self = ((PyMethodObject *)callable)->im_self; - PEEK(oparg + 1) = Py_NewRef(self); // self_or_null - PyObject *meth = ((PyMethodObject *)callable)->im_func; - PEEK(oparg + 2) = Py_NewRef(meth); // callable - Py_DECREF(callable); - GO_TO_INSTRUCTION(CALL_PY_EXACT_ARGS); + { + DEOPT_IF(null != NULL, CALL); + DEOPT_IF(Py_TYPE(callable) != &PyMethod_Type, CALL); + } + // _INIT_CALL_BOUND_METHOD_EXACT_ARGS + { + STAT_INC(CALL, hit); + self = Py_NewRef(((PyMethodObject *)callable)->im_self); + stack_pointer[-1 - oparg] = self; // Patch stack as it is used by _INIT_CALL_PY_EXACT_ARGS + func = Py_NewRef(((PyMethodObject *)callable)->im_func); + stack_pointer[-2 - oparg] = func; // This is used by CALL, upon deoptimization + Py_DECREF(callable); + } + // _CHECK_FUNCTION_EXACT_ARGS + self_or_null = self; + callable = func; + { + uint32_t func_version = read_u32(&next_instr[1].cache); + ASSERT_KWNAMES_IS_NULL(); + DEOPT_IF(!PyFunction_Check(callable), CALL); + PyFunctionObject *func = (PyFunctionObject *)callable; + DEOPT_IF(func->func_version != func_version, CALL); + PyCodeObject *code = (PyCodeObject *)func->func_code; + DEOPT_IF(code->co_argcount != oparg + (self_or_null != NULL), CALL); + } + // _CHECK_STACK_SPACE + { + PyFunctionObject *func = (PyFunctionObject *)callable; + PyCodeObject *code = (PyCodeObject *)func->func_code; + DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), CALL); + } + // _INIT_CALL_PY_EXACT_ARGS + args = stack_pointer - oparg; + { + int argcount = oparg; + if (self_or_null != NULL) { + args--; + argcount++; + } + STAT_INC(CALL, hit); + PyFunctionObject *func = (PyFunctionObject *)callable; + new_frame = _PyFrame_PushUnchecked(tstate, func, argcount); + for (int i = 0; i < argcount; i++) { + new_frame->localsplus[i] = args[i]; + } + } + // SAVE_CURRENT_IP + next_instr += 3; + { + #if TIER_ONE + frame->prev_instr = next_instr - 1; + #endif + #if TIER_TWO + // Relies on a preceding SAVE_IP + frame->prev_instr--; + #endif + } + // _PUSH_FRAME STACK_SHRINK(oparg); - STACK_SHRINK(1); + STACK_SHRINK(2); + { + // Write it out explicitly because it's subtly different. + // Eventually this should be the only occurrence of this code. + frame->return_offset = 0; + assert(tstate->interp->eval_frame == NULL); + _PyFrame_SetStackPointer(frame, stack_pointer); + new_frame->previous = frame; + CALL_STAT_INC(inlined_py_calls); + frame = tstate->current_frame = new_frame; + #if TIER_ONE + goto start_frame; + #endif + #if TIER_TWO + if (_Py_EnterRecursivePy(tstate)) goto pop_1_exit_unwind; + stack_pointer = _PyFrame_GetStackPointer(frame); + ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive; + #endif + } } TARGET(CALL_PY_EXACT_ARGS) { - PREDICTED(CALL_PY_EXACT_ARGS); PyObject *self_or_null; PyObject *callable; PyObject **args; diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index 8f9a6502e52..9400a0bce46 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -444,7 +444,7 @@ class Generator(Analyzer): with self.out.block("struct opcode_macro_expansion", ";"): self.out.emit("int nuops;") self.out.emit( - "struct { int16_t uop; int8_t size; int8_t offset; } uops[8];" + "struct { int16_t uop; int8_t size; int8_t offset; } uops[12];" ) self.out.emit("")