gh-106581: Split CALL_BOUND_METHOD_EXACT_ARGS into uops (#108462)

Instead of using `GO_TO_INSTRUCTION(CALL_PY_EXACT_ARGS)` we just add the macro elements of the latter to the macro for the former. This requires lengthening the uops array in struct opcode_macro_expansion. (It also required changes to stacking.py that were merged already.)
This commit is contained in:
Guido van Rossum 2023-08-24 17:36:00 -07:00 committed by GitHub
parent 546cab8444
commit ddf66b54ed
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 171 additions and 35 deletions

View File

@ -57,16 +57,18 @@
#define _ITER_CHECK_RANGE 329
#define _IS_ITER_EXHAUSTED_RANGE 330
#define _ITER_NEXT_RANGE 331
#define _CHECK_PEP_523 332
#define _CHECK_FUNCTION_EXACT_ARGS 333
#define _CHECK_STACK_SPACE 334
#define _INIT_CALL_PY_EXACT_ARGS 335
#define _PUSH_FRAME 336
#define _POP_JUMP_IF_FALSE 337
#define _POP_JUMP_IF_TRUE 338
#define JUMP_TO_TOP 339
#define SAVE_CURRENT_IP 340
#define INSERT 341
#define _CHECK_CALL_BOUND_METHOD_EXACT_ARGS 332
#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 333
#define _CHECK_PEP_523 334
#define _CHECK_FUNCTION_EXACT_ARGS 335
#define _CHECK_STACK_SPACE 336
#define _INIT_CALL_PY_EXACT_ARGS 337
#define _PUSH_FRAME 338
#define _POP_JUMP_IF_FALSE 339
#define _POP_JUMP_IF_TRUE 340
#define JUMP_TO_TOP 341
#define SAVE_CURRENT_IP 342
#define INSERT 343
extern int _PyOpcode_num_popped(int opcode, int oparg, bool jump);
#ifdef NEED_OPCODE_METADATA
@ -484,7 +486,9 @@ int _PyOpcode_num_popped(int opcode, int oparg, bool jump) {
return 0;
case CALL:
return oparg + 2;
case CALL_BOUND_METHOD_EXACT_ARGS:
case _CHECK_CALL_BOUND_METHOD_EXACT_ARGS:
return oparg + 2;
case _INIT_CALL_BOUND_METHOD_EXACT_ARGS:
return oparg + 2;
case _CHECK_PEP_523:
return 0;
@ -496,6 +500,8 @@ int _PyOpcode_num_popped(int opcode, int oparg, bool jump) {
return oparg + 2;
case _PUSH_FRAME:
return 1;
case CALL_BOUND_METHOD_EXACT_ARGS:
return oparg + 2;
case CALL_PY_EXACT_ARGS:
return oparg + 2;
case CALL_PY_WITH_DEFAULTS:
@ -1012,8 +1018,10 @@ int _PyOpcode_num_pushed(int opcode, int oparg, bool jump) {
return 0;
case CALL:
return 1;
case CALL_BOUND_METHOD_EXACT_ARGS:
return 1;
case _CHECK_CALL_BOUND_METHOD_EXACT_ARGS:
return oparg + 2;
case _INIT_CALL_BOUND_METHOD_EXACT_ARGS:
return oparg + 2;
case _CHECK_PEP_523:
return 0;
case _CHECK_FUNCTION_EXACT_ARGS:
@ -1024,6 +1032,8 @@ int _PyOpcode_num_pushed(int opcode, int oparg, bool jump) {
return 1;
case _PUSH_FRAME:
return 1;
case CALL_BOUND_METHOD_EXACT_ARGS:
return 1;
case CALL_PY_EXACT_ARGS:
return 1;
case CALL_PY_WITH_DEFAULTS:
@ -1163,7 +1173,7 @@ struct opcode_metadata {
struct opcode_macro_expansion {
int nuops;
struct { int16_t uop; int8_t size; int8_t offset; } uops[8];
struct { int16_t uop; int8_t size; int8_t offset; } uops[12];
};
#define OPARG_FULL 0
@ -1518,6 +1528,7 @@ const struct opcode_macro_expansion _PyOpcode_macro_expansion[OPCODE_MACRO_EXPAN
[GET_YIELD_FROM_ITER] = { .nuops = 1, .uops = { { GET_YIELD_FROM_ITER, 0, 0 } } },
[WITH_EXCEPT_START] = { .nuops = 1, .uops = { { WITH_EXCEPT_START, 0, 0 } } },
[PUSH_EXC_INFO] = { .nuops = 1, .uops = { { PUSH_EXC_INFO, 0, 0 } } },
[CALL_BOUND_METHOD_EXACT_ARGS] = { .nuops = 9, .uops = { { _CHECK_PEP_523, 0, 0 }, { _CHECK_CALL_BOUND_METHOD_EXACT_ARGS, 0, 0 }, { _INIT_CALL_BOUND_METHOD_EXACT_ARGS, 0, 0 }, { _CHECK_FUNCTION_EXACT_ARGS, 2, 1 }, { _CHECK_STACK_SPACE, 0, 0 }, { _INIT_CALL_PY_EXACT_ARGS, 0, 0 }, { SAVE_IP, 7, 3 }, { SAVE_CURRENT_IP, 0, 0 }, { _PUSH_FRAME, 0, 0 } } },
[CALL_PY_EXACT_ARGS] = { .nuops = 7, .uops = { { _CHECK_PEP_523, 0, 0 }, { _CHECK_FUNCTION_EXACT_ARGS, 2, 1 }, { _CHECK_STACK_SPACE, 0, 0 }, { _INIT_CALL_PY_EXACT_ARGS, 0, 0 }, { SAVE_IP, 7, 3 }, { SAVE_CURRENT_IP, 0, 0 }, { _PUSH_FRAME, 0, 0 } } },
[CALL_NO_KW_TYPE_1] = { .nuops = 1, .uops = { { CALL_NO_KW_TYPE_1, 0, 0 } } },
[CALL_NO_KW_STR_1] = { .nuops = 1, .uops = { { CALL_NO_KW_STR_1, 0, 0 } } },
@ -1577,6 +1588,8 @@ const char * const _PyOpcode_uop_name[OPCODE_UOP_NAME_SIZE] = {
[_ITER_CHECK_RANGE] = "_ITER_CHECK_RANGE",
[_IS_ITER_EXHAUSTED_RANGE] = "_IS_ITER_EXHAUSTED_RANGE",
[_ITER_NEXT_RANGE] = "_ITER_NEXT_RANGE",
[_CHECK_CALL_BOUND_METHOD_EXACT_ARGS] = "_CHECK_CALL_BOUND_METHOD_EXACT_ARGS",
[_INIT_CALL_BOUND_METHOD_EXACT_ARGS] = "_INIT_CALL_BOUND_METHOD_EXACT_ARGS",
[_CHECK_PEP_523] = "_CHECK_PEP_523",
[_CHECK_FUNCTION_EXACT_ARGS] = "_CHECK_FUNCTION_EXACT_ARGS",
[_CHECK_STACK_SPACE] = "_CHECK_STACK_SPACE",

View File

@ -621,6 +621,16 @@
break;
}
case _CHECK_CALL_BOUND_METHOD_EXACT_ARGS: {
break;
}
case _INIT_CALL_BOUND_METHOD_EXACT_ARGS: {
PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-2 - oparg)), true);
PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1 - oparg)), true);
break;
}
case _CHECK_PEP_523: {
break;
}

View File

@ -2940,19 +2940,18 @@ dummy_func(
CHECK_EVAL_BREAKER();
}
// Start out with [NULL, bound_method, arg1, arg2, ...]
// Transform to [callable, self, arg1, arg2, ...]
// Then fall through to CALL_PY_EXACT_ARGS
inst(CALL_BOUND_METHOD_EXACT_ARGS, (unused/1, unused/2, callable, null, unused[oparg] -- unused)) {
op(_CHECK_CALL_BOUND_METHOD_EXACT_ARGS, (callable, null, unused[oparg] -- callable, null, unused[oparg])) {
DEOPT_IF(null != NULL, CALL);
DEOPT_IF(Py_TYPE(callable) != &PyMethod_Type, CALL);
}
op(_INIT_CALL_BOUND_METHOD_EXACT_ARGS, (callable, unused, unused[oparg] -- func, self, unused[oparg])) {
STAT_INC(CALL, hit);
PyObject *self = ((PyMethodObject *)callable)->im_self;
PEEK(oparg + 1) = Py_NewRef(self); // self_or_null
PyObject *meth = ((PyMethodObject *)callable)->im_func;
PEEK(oparg + 2) = Py_NewRef(meth); // callable
self = Py_NewRef(((PyMethodObject *)callable)->im_self);
stack_pointer[-1 - oparg] = self; // Patch stack as it is used by _INIT_CALL_PY_EXACT_ARGS
func = Py_NewRef(((PyMethodObject *)callable)->im_func);
stack_pointer[-2 - oparg] = func; // This is used by CALL, upon deoptimization
Py_DECREF(callable);
GO_TO_INSTRUCTION(CALL_PY_EXACT_ARGS);
}
op(_CHECK_PEP_523, (--)) {
@ -3010,6 +3009,18 @@ dummy_func(
#endif
}
macro(CALL_BOUND_METHOD_EXACT_ARGS) =
unused/1 + // Skip over the counter
_CHECK_PEP_523 +
_CHECK_CALL_BOUND_METHOD_EXACT_ARGS +
_INIT_CALL_BOUND_METHOD_EXACT_ARGS +
_CHECK_FUNCTION_EXACT_ARGS +
_CHECK_STACK_SPACE +
_INIT_CALL_PY_EXACT_ARGS +
SAVE_IP + // Tier 2 only; special-cased oparg
SAVE_CURRENT_IP + // Sets frame->prev_instr
_PUSH_FRAME;
macro(CALL_PY_EXACT_ARGS) =
unused/1 + // Skip over the counter
_CHECK_PEP_523 +

View File

@ -2190,6 +2190,32 @@
break;
}
case _CHECK_CALL_BOUND_METHOD_EXACT_ARGS: {
PyObject *null;
PyObject *callable;
null = stack_pointer[-1 - oparg];
callable = stack_pointer[-2 - oparg];
DEOPT_IF(null != NULL, CALL);
DEOPT_IF(Py_TYPE(callable) != &PyMethod_Type, CALL);
break;
}
case _INIT_CALL_BOUND_METHOD_EXACT_ARGS: {
PyObject *callable;
PyObject *func;
PyObject *self;
callable = stack_pointer[-2 - oparg];
STAT_INC(CALL, hit);
self = Py_NewRef(((PyMethodObject *)callable)->im_self);
stack_pointer[-1 - oparg] = self; // Patch stack as it is used by _INIT_CALL_PY_EXACT_ARGS
func = Py_NewRef(((PyMethodObject *)callable)->im_func);
stack_pointer[-2 - oparg] = func; // This is used by CALL, upon deoptimization
Py_DECREF(callable);
stack_pointer[-2 - oparg] = func;
stack_pointer[-1 - oparg] = self;
break;
}
case _CHECK_PEP_523: {
DEOPT_IF(tstate->interp->eval_frame, CALL);
break;

View File

@ -3795,23 +3795,99 @@
TARGET(CALL_BOUND_METHOD_EXACT_ARGS) {
PyObject *null;
PyObject *callable;
PyObject *self;
PyObject *self_or_null;
PyObject *func;
PyObject **args;
_PyInterpreterFrame *new_frame;
// _CHECK_PEP_523
{
DEOPT_IF(tstate->interp->eval_frame, CALL);
}
// _CHECK_CALL_BOUND_METHOD_EXACT_ARGS
null = stack_pointer[-1 - oparg];
callable = stack_pointer[-2 - oparg];
DEOPT_IF(null != NULL, CALL);
DEOPT_IF(Py_TYPE(callable) != &PyMethod_Type, CALL);
STAT_INC(CALL, hit);
PyObject *self = ((PyMethodObject *)callable)->im_self;
PEEK(oparg + 1) = Py_NewRef(self); // self_or_null
PyObject *meth = ((PyMethodObject *)callable)->im_func;
PEEK(oparg + 2) = Py_NewRef(meth); // callable
Py_DECREF(callable);
GO_TO_INSTRUCTION(CALL_PY_EXACT_ARGS);
{
DEOPT_IF(null != NULL, CALL);
DEOPT_IF(Py_TYPE(callable) != &PyMethod_Type, CALL);
}
// _INIT_CALL_BOUND_METHOD_EXACT_ARGS
{
STAT_INC(CALL, hit);
self = Py_NewRef(((PyMethodObject *)callable)->im_self);
stack_pointer[-1 - oparg] = self; // Patch stack as it is used by _INIT_CALL_PY_EXACT_ARGS
func = Py_NewRef(((PyMethodObject *)callable)->im_func);
stack_pointer[-2 - oparg] = func; // This is used by CALL, upon deoptimization
Py_DECREF(callable);
}
// _CHECK_FUNCTION_EXACT_ARGS
self_or_null = self;
callable = func;
{
uint32_t func_version = read_u32(&next_instr[1].cache);
ASSERT_KWNAMES_IS_NULL();
DEOPT_IF(!PyFunction_Check(callable), CALL);
PyFunctionObject *func = (PyFunctionObject *)callable;
DEOPT_IF(func->func_version != func_version, CALL);
PyCodeObject *code = (PyCodeObject *)func->func_code;
DEOPT_IF(code->co_argcount != oparg + (self_or_null != NULL), CALL);
}
// _CHECK_STACK_SPACE
{
PyFunctionObject *func = (PyFunctionObject *)callable;
PyCodeObject *code = (PyCodeObject *)func->func_code;
DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), CALL);
}
// _INIT_CALL_PY_EXACT_ARGS
args = stack_pointer - oparg;
{
int argcount = oparg;
if (self_or_null != NULL) {
args--;
argcount++;
}
STAT_INC(CALL, hit);
PyFunctionObject *func = (PyFunctionObject *)callable;
new_frame = _PyFrame_PushUnchecked(tstate, func, argcount);
for (int i = 0; i < argcount; i++) {
new_frame->localsplus[i] = args[i];
}
}
// SAVE_CURRENT_IP
next_instr += 3;
{
#if TIER_ONE
frame->prev_instr = next_instr - 1;
#endif
#if TIER_TWO
// Relies on a preceding SAVE_IP
frame->prev_instr--;
#endif
}
// _PUSH_FRAME
STACK_SHRINK(oparg);
STACK_SHRINK(1);
STACK_SHRINK(2);
{
// Write it out explicitly because it's subtly different.
// Eventually this should be the only occurrence of this code.
frame->return_offset = 0;
assert(tstate->interp->eval_frame == NULL);
_PyFrame_SetStackPointer(frame, stack_pointer);
new_frame->previous = frame;
CALL_STAT_INC(inlined_py_calls);
frame = tstate->current_frame = new_frame;
#if TIER_ONE
goto start_frame;
#endif
#if TIER_TWO
if (_Py_EnterRecursivePy(tstate)) goto pop_1_exit_unwind;
stack_pointer = _PyFrame_GetStackPointer(frame);
ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive;
#endif
}
}
TARGET(CALL_PY_EXACT_ARGS) {
PREDICTED(CALL_PY_EXACT_ARGS);
PyObject *self_or_null;
PyObject *callable;
PyObject **args;

View File

@ -444,7 +444,7 @@ class Generator(Analyzer):
with self.out.block("struct opcode_macro_expansion", ";"):
self.out.emit("int nuops;")
self.out.emit(
"struct { int16_t uop; int8_t size; int8_t offset; } uops[8];"
"struct { int16_t uop; int8_t size; int8_t offset; } uops[12];"
)
self.out.emit("")