gh-98003: Inline call frames for CALL_FUNCTION_EX (GH-98004)

This commit is contained in:
Ken Jin 2023-04-30 21:08:26 +08:00 committed by GitHub
parent accb417c33
commit ed95e8cbd4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 148 additions and 55 deletions

View File

@ -116,6 +116,16 @@ _PyObject_FastCallTstate(PyThreadState *tstate, PyObject *func, PyObject *const
return _PyObject_VectorcallTstate(tstate, func, args, (size_t)nargs, NULL);
}
PyObject *const *
_PyStack_UnpackDict(PyThreadState *tstate,
PyObject *const *args, Py_ssize_t nargs,
PyObject *kwargs, PyObject **p_kwnames);
void
_PyStack_UnpackDict_Free(PyObject *const *stack, Py_ssize_t nargs,
PyObject *kwnames);
void _PyStack_UnpackDict_FreeNoDecRef(PyObject *const *stack, PyObject *kwnames);
#ifdef __cplusplus
}

View File

@ -0,0 +1,3 @@
Complex function calls are now faster and consume no C stack
space.

View File

@ -8,16 +8,6 @@
#include "pycore_tuple.h" // _PyTuple_ITEMS()
static PyObject *const *
_PyStack_UnpackDict(PyThreadState *tstate,
PyObject *const *args, Py_ssize_t nargs,
PyObject *kwargs, PyObject **p_kwnames);
static void
_PyStack_UnpackDict_Free(PyObject *const *stack, Py_ssize_t nargs,
PyObject *kwnames);
static PyObject *
null_error(PyThreadState *tstate)
{
@ -965,7 +955,7 @@ _PyStack_AsDict(PyObject *const *values, PyObject *kwnames)
The newly allocated argument vector supports PY_VECTORCALL_ARGUMENTS_OFFSET.
When done, you must call _PyStack_UnpackDict_Free(stack, nargs, kwnames) */
static PyObject *const *
PyObject *const *
_PyStack_UnpackDict(PyThreadState *tstate,
PyObject *const *args, Py_ssize_t nargs,
PyObject *kwargs, PyObject **p_kwnames)
@ -1034,7 +1024,7 @@ _PyStack_UnpackDict(PyThreadState *tstate,
return stack;
}
static void
void
_PyStack_UnpackDict_Free(PyObject *const *stack, Py_ssize_t nargs,
PyObject *kwnames)
{
@ -1042,6 +1032,12 @@ _PyStack_UnpackDict_Free(PyObject *const *stack, Py_ssize_t nargs,
for (Py_ssize_t i = 0; i < n; i++) {
Py_DECREF(stack[i]);
}
_PyStack_UnpackDict_FreeNoDecRef(stack, kwnames);
}
void
_PyStack_UnpackDict_FreeNoDecRef(PyObject *const *stack, PyObject *kwnames)
{
PyMem_Free((PyObject **)stack - 1);
Py_DECREF(kwnames);
}

View File

@ -3103,6 +3103,25 @@ dummy_func(
}
}
else {
if (Py_TYPE(func) == &PyFunction_Type &&
tstate->interp->eval_frame == NULL &&
((PyFunctionObject *)func)->vectorcall == _PyFunction_Vectorcall) {
assert(PyTuple_CheckExact(callargs));
Py_ssize_t nargs = PyTuple_GET_SIZE(callargs);
int code_flags = ((PyCodeObject *)PyFunction_GET_CODE(func))->co_flags;
PyObject *locals = code_flags & CO_OPTIMIZED ? NULL : Py_NewRef(PyFunction_GET_GLOBALS(func));
_PyInterpreterFrame *new_frame = _PyEvalFramePushAndInit_Ex(tstate,
(PyFunctionObject *)func, locals,
nargs, callargs, kwargs);
// Need to manually shrink the stack since we exit with DISPATCH_INLINED.
STACK_SHRINK(oparg + 3);
if (new_frame == NULL) {
goto error;
}
frame->return_offset = 0;
DISPATCH_INLINED(new_frame);
}
result = PyObject_Call(func, callargs, kwargs);
}
DECREF_INPUTS();

View File

@ -212,6 +212,9 @@ static _PyInterpreterFrame *
_PyEvalFramePushAndInit(PyThreadState *tstate, PyFunctionObject *func,
PyObject *locals, PyObject* const* args,
size_t argcount, PyObject *kwnames);
static _PyInterpreterFrame *
_PyEvalFramePushAndInit_Ex(PyThreadState *tstate, PyFunctionObject *func,
PyObject *locals, Py_ssize_t nargs, PyObject *callargs, PyObject *kwargs);
static void
_PyEvalFrameClearAndPop(PyThreadState *tstate, _PyInterpreterFrame *frame);
@ -1501,6 +1504,49 @@ fail:
return NULL;
}
/* Same as _PyEvalFramePushAndInit but takes an args tuple and kwargs dict.
Steals references to func, callargs and kwargs.
*/
static _PyInterpreterFrame *
_PyEvalFramePushAndInit_Ex(PyThreadState *tstate, PyFunctionObject *func,
PyObject *locals, Py_ssize_t nargs, PyObject *callargs, PyObject *kwargs)
{
bool has_dict = (kwargs != NULL && PyDict_GET_SIZE(kwargs) > 0);
PyObject *kwnames = NULL;
PyObject *const *newargs;
if (has_dict) {
newargs = _PyStack_UnpackDict(tstate, _PyTuple_ITEMS(callargs), nargs, kwargs, &kwnames);
if (newargs == NULL) {
Py_DECREF(func);
goto error;
}
}
else {
newargs = &PyTuple_GET_ITEM(callargs, 0);
/* We need to incref all our args since the new frame steals the references. */
for (Py_ssize_t i = 0; i < nargs; ++i) {
Py_INCREF(PyTuple_GET_ITEM(callargs, i));
}
}
_PyInterpreterFrame *new_frame = _PyEvalFramePushAndInit(
tstate, (PyFunctionObject *)func, locals,
newargs, nargs, kwnames
);
if (has_dict) {
_PyStack_UnpackDict_FreeNoDecRef(newargs, kwnames);
}
/* No need to decref func here because the reference has been stolen by
_PyEvalFramePushAndInit.
*/
Py_DECREF(callargs);
Py_XDECREF(kwargs);
return new_frame;
error:
Py_DECREF(callargs);
Py_XDECREF(kwargs);
return NULL;
}
PyObject *
_PyEval_Vector(PyThreadState *tstate, PyFunctionObject *func,
PyObject *locals,

View File

@ -4296,16 +4296,35 @@
}
}
else {
if (Py_TYPE(func) == &PyFunction_Type &&
tstate->interp->eval_frame == NULL &&
((PyFunctionObject *)func)->vectorcall == _PyFunction_Vectorcall) {
assert(PyTuple_CheckExact(callargs));
Py_ssize_t nargs = PyTuple_GET_SIZE(callargs);
int code_flags = ((PyCodeObject *)PyFunction_GET_CODE(func))->co_flags;
PyObject *locals = code_flags & CO_OPTIMIZED ? NULL : Py_NewRef(PyFunction_GET_GLOBALS(func));
_PyInterpreterFrame *new_frame = _PyEvalFramePushAndInit_Ex(tstate,
(PyFunctionObject *)func, locals,
nargs, callargs, kwargs);
// Need to manually shrink the stack since we exit with DISPATCH_INLINED.
STACK_SHRINK(oparg + 3);
if (new_frame == NULL) {
goto error;
}
frame->return_offset = 0;
DISPATCH_INLINED(new_frame);
}
result = PyObject_Call(func, callargs, kwargs);
}
#line 4302 "Python/generated_cases.c.h"
#line 4321 "Python/generated_cases.c.h"
Py_DECREF(func);
Py_DECREF(callargs);
Py_XDECREF(kwargs);
#line 3109 "Python/bytecodes.c"
#line 3128 "Python/bytecodes.c"
assert(PEEK(3 + (oparg & 1)) == NULL);
if (result == NULL) { STACK_SHRINK(((oparg & 1) ? 1 : 0)); goto pop_3_error; }
#line 4309 "Python/generated_cases.c.h"
#line 4328 "Python/generated_cases.c.h"
STACK_SHRINK(((oparg & 1) ? 1 : 0));
STACK_SHRINK(2);
stack_pointer[-1] = result;
@ -4320,7 +4339,7 @@
PyObject *kwdefaults = (oparg & 0x02) ? stack_pointer[-(1 + ((oparg & 0x08) ? 1 : 0) + ((oparg & 0x04) ? 1 : 0) + ((oparg & 0x02) ? 1 : 0))] : NULL;
PyObject *defaults = (oparg & 0x01) ? stack_pointer[-(1 + ((oparg & 0x08) ? 1 : 0) + ((oparg & 0x04) ? 1 : 0) + ((oparg & 0x02) ? 1 : 0) + ((oparg & 0x01) ? 1 : 0))] : NULL;
PyObject *func;
#line 3119 "Python/bytecodes.c"
#line 3138 "Python/bytecodes.c"
PyFunctionObject *func_obj = (PyFunctionObject *)
PyFunction_New(codeobj, GLOBALS());
@ -4349,14 +4368,14 @@
func_obj->func_version = ((PyCodeObject *)codeobj)->co_version;
func = (PyObject *)func_obj;
#line 4353 "Python/generated_cases.c.h"
#line 4372 "Python/generated_cases.c.h"
STACK_SHRINK(((oparg & 0x01) ? 1 : 0) + ((oparg & 0x02) ? 1 : 0) + ((oparg & 0x04) ? 1 : 0) + ((oparg & 0x08) ? 1 : 0));
stack_pointer[-1] = func;
DISPATCH();
}
TARGET(RETURN_GENERATOR) {
#line 3150 "Python/bytecodes.c"
#line 3169 "Python/bytecodes.c"
assert(PyFunction_Check(frame->f_funcobj));
PyFunctionObject *func = (PyFunctionObject *)frame->f_funcobj;
PyGenObject *gen = (PyGenObject *)_Py_MakeCoro(func);
@ -4377,7 +4396,7 @@
frame = cframe.current_frame = prev;
_PyFrame_StackPush(frame, (PyObject *)gen);
goto resume_frame;
#line 4381 "Python/generated_cases.c.h"
#line 4400 "Python/generated_cases.c.h"
}
TARGET(BUILD_SLICE) {
@ -4385,15 +4404,15 @@
PyObject *stop = stack_pointer[-(1 + ((oparg == 3) ? 1 : 0))];
PyObject *start = stack_pointer[-(2 + ((oparg == 3) ? 1 : 0))];
PyObject *slice;
#line 3173 "Python/bytecodes.c"
#line 3192 "Python/bytecodes.c"
slice = PySlice_New(start, stop, step);
#line 4391 "Python/generated_cases.c.h"
#line 4410 "Python/generated_cases.c.h"
Py_DECREF(start);
Py_DECREF(stop);
Py_XDECREF(step);
#line 3175 "Python/bytecodes.c"
#line 3194 "Python/bytecodes.c"
if (slice == NULL) { STACK_SHRINK(((oparg == 3) ? 1 : 0)); goto pop_2_error; }
#line 4397 "Python/generated_cases.c.h"
#line 4416 "Python/generated_cases.c.h"
STACK_SHRINK(((oparg == 3) ? 1 : 0));
STACK_SHRINK(1);
stack_pointer[-1] = slice;
@ -4404,7 +4423,7 @@
PyObject *fmt_spec = ((oparg & FVS_MASK) == FVS_HAVE_SPEC) ? stack_pointer[-((((oparg & FVS_MASK) == FVS_HAVE_SPEC) ? 1 : 0))] : NULL;
PyObject *value = stack_pointer[-(1 + (((oparg & FVS_MASK) == FVS_HAVE_SPEC) ? 1 : 0))];
PyObject *result;
#line 3179 "Python/bytecodes.c"
#line 3198 "Python/bytecodes.c"
/* Handles f-string value formatting. */
PyObject *(*conv_fn)(PyObject *);
int which_conversion = oparg & FVC_MASK;
@ -4439,7 +4458,7 @@
Py_DECREF(value);
Py_XDECREF(fmt_spec);
if (result == NULL) { STACK_SHRINK((((oparg & FVS_MASK) == FVS_HAVE_SPEC) ? 1 : 0)); goto pop_1_error; }
#line 4443 "Python/generated_cases.c.h"
#line 4462 "Python/generated_cases.c.h"
STACK_SHRINK((((oparg & FVS_MASK) == FVS_HAVE_SPEC) ? 1 : 0));
stack_pointer[-1] = result;
DISPATCH();
@ -4448,10 +4467,10 @@
TARGET(COPY) {
PyObject *bottom = stack_pointer[-(1 + (oparg-1))];
PyObject *top;
#line 3216 "Python/bytecodes.c"
#line 3235 "Python/bytecodes.c"
assert(oparg > 0);
top = Py_NewRef(bottom);
#line 4455 "Python/generated_cases.c.h"
#line 4474 "Python/generated_cases.c.h"
STACK_GROW(1);
stack_pointer[-1] = top;
DISPATCH();
@ -4463,7 +4482,7 @@
PyObject *rhs = stack_pointer[-1];
PyObject *lhs = stack_pointer[-2];
PyObject *res;
#line 3221 "Python/bytecodes.c"
#line 3240 "Python/bytecodes.c"
#if ENABLE_SPECIALIZATION
_PyBinaryOpCache *cache = (_PyBinaryOpCache *)next_instr;
if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) {
@ -4478,12 +4497,12 @@
assert((unsigned)oparg < Py_ARRAY_LENGTH(binary_ops));
assert(binary_ops[oparg]);
res = binary_ops[oparg](lhs, rhs);
#line 4482 "Python/generated_cases.c.h"
#line 4501 "Python/generated_cases.c.h"
Py_DECREF(lhs);
Py_DECREF(rhs);
#line 3236 "Python/bytecodes.c"
#line 3255 "Python/bytecodes.c"
if (res == NULL) goto pop_2_error;
#line 4487 "Python/generated_cases.c.h"
#line 4506 "Python/generated_cases.c.h"
STACK_SHRINK(1);
stack_pointer[-1] = res;
next_instr += 1;
@ -4493,16 +4512,16 @@
TARGET(SWAP) {
PyObject *top = stack_pointer[-1];
PyObject *bottom = stack_pointer[-(2 + (oparg-2))];
#line 3241 "Python/bytecodes.c"
#line 3260 "Python/bytecodes.c"
assert(oparg >= 2);
#line 4499 "Python/generated_cases.c.h"
#line 4518 "Python/generated_cases.c.h"
stack_pointer[-1] = bottom;
stack_pointer[-(2 + (oparg-2))] = top;
DISPATCH();
}
TARGET(INSTRUMENTED_LINE) {
#line 3245 "Python/bytecodes.c"
#line 3264 "Python/bytecodes.c"
_Py_CODEUNIT *here = next_instr-1;
_PyFrame_SetStackPointer(frame, stack_pointer);
int original_opcode = _Py_call_instrumentation_line(
@ -4522,11 +4541,11 @@
}
opcode = original_opcode;
DISPATCH_GOTO();
#line 4526 "Python/generated_cases.c.h"
#line 4545 "Python/generated_cases.c.h"
}
TARGET(INSTRUMENTED_INSTRUCTION) {
#line 3267 "Python/bytecodes.c"
#line 3286 "Python/bytecodes.c"
int next_opcode = _Py_call_instrumentation_instruction(
tstate, frame, next_instr-1);
if (next_opcode < 0) goto error;
@ -4538,26 +4557,26 @@
assert(next_opcode > 0 && next_opcode < 256);
opcode = next_opcode;
DISPATCH_GOTO();
#line 4542 "Python/generated_cases.c.h"
#line 4561 "Python/generated_cases.c.h"
}
TARGET(INSTRUMENTED_JUMP_FORWARD) {
#line 3281 "Python/bytecodes.c"
#line 3300 "Python/bytecodes.c"
INSTRUMENTED_JUMP(next_instr-1, next_instr+oparg, PY_MONITORING_EVENT_JUMP);
#line 4548 "Python/generated_cases.c.h"
#line 4567 "Python/generated_cases.c.h"
DISPATCH();
}
TARGET(INSTRUMENTED_JUMP_BACKWARD) {
#line 3285 "Python/bytecodes.c"
#line 3304 "Python/bytecodes.c"
INSTRUMENTED_JUMP(next_instr-1, next_instr-oparg, PY_MONITORING_EVENT_JUMP);
#line 4555 "Python/generated_cases.c.h"
#line 4574 "Python/generated_cases.c.h"
CHECK_EVAL_BREAKER();
DISPATCH();
}
TARGET(INSTRUMENTED_POP_JUMP_IF_TRUE) {
#line 3290 "Python/bytecodes.c"
#line 3309 "Python/bytecodes.c"
PyObject *cond = POP();
int err = PyObject_IsTrue(cond);
Py_DECREF(cond);
@ -4566,12 +4585,12 @@
assert(err == 0 || err == 1);
int offset = err*oparg;
INSTRUMENTED_JUMP(here, next_instr + offset, PY_MONITORING_EVENT_BRANCH);
#line 4570 "Python/generated_cases.c.h"
#line 4589 "Python/generated_cases.c.h"
DISPATCH();
}
TARGET(INSTRUMENTED_POP_JUMP_IF_FALSE) {
#line 3301 "Python/bytecodes.c"
#line 3320 "Python/bytecodes.c"
PyObject *cond = POP();
int err = PyObject_IsTrue(cond);
Py_DECREF(cond);
@ -4580,12 +4599,12 @@
assert(err == 0 || err == 1);
int offset = (1-err)*oparg;
INSTRUMENTED_JUMP(here, next_instr + offset, PY_MONITORING_EVENT_BRANCH);
#line 4584 "Python/generated_cases.c.h"
#line 4603 "Python/generated_cases.c.h"
DISPATCH();
}
TARGET(INSTRUMENTED_POP_JUMP_IF_NONE) {
#line 3312 "Python/bytecodes.c"
#line 3331 "Python/bytecodes.c"
PyObject *value = POP();
_Py_CODEUNIT *here = next_instr-1;
int offset;
@ -4598,12 +4617,12 @@
offset = 0;
}
INSTRUMENTED_JUMP(here, next_instr + offset, PY_MONITORING_EVENT_BRANCH);
#line 4602 "Python/generated_cases.c.h"
#line 4621 "Python/generated_cases.c.h"
DISPATCH();
}
TARGET(INSTRUMENTED_POP_JUMP_IF_NOT_NONE) {
#line 3327 "Python/bytecodes.c"
#line 3346 "Python/bytecodes.c"
PyObject *value = POP();
_Py_CODEUNIT *here = next_instr-1;
int offset;
@ -4616,30 +4635,30 @@
offset = oparg;
}
INSTRUMENTED_JUMP(here, next_instr + offset, PY_MONITORING_EVENT_BRANCH);
#line 4620 "Python/generated_cases.c.h"
#line 4639 "Python/generated_cases.c.h"
DISPATCH();
}
TARGET(EXTENDED_ARG) {
#line 3342 "Python/bytecodes.c"
#line 3361 "Python/bytecodes.c"
assert(oparg);
opcode = next_instr->op.code;
oparg = oparg << 8 | next_instr->op.arg;
PRE_DISPATCH_GOTO();
DISPATCH_GOTO();
#line 4631 "Python/generated_cases.c.h"
#line 4650 "Python/generated_cases.c.h"
}
TARGET(CACHE) {
#line 3350 "Python/bytecodes.c"
#line 3369 "Python/bytecodes.c"
assert(0 && "Executing a cache.");
Py_UNREACHABLE();
#line 4638 "Python/generated_cases.c.h"
#line 4657 "Python/generated_cases.c.h"
}
TARGET(RESERVED) {
#line 3355 "Python/bytecodes.c"
#line 3374 "Python/bytecodes.c"
assert(0 && "Executing RESERVED instruction.");
Py_UNREACHABLE();
#line 4645 "Python/generated_cases.c.h"
#line 4664 "Python/generated_cases.c.h"
}