gh-106581: Project through calls (#108067)

This finishes the work begun in gh-107760. When, while projecting a superblock, we encounter a call to a short, simple function, the superblock will now enter the function using `_PUSH_FRAME`, continue through it, and leave it using `_POP_FRAME`, and then continue through the original code. Multiple frame pushes and pops are even possible. It is also possible to stop appending to the superblock in the middle of a called function, when running out of space or encountering an unsupported bytecode.
This commit is contained in:
Guido van Rossum 2023-08-17 11:29:58 -07:00 committed by GitHub
parent 292a22bdc2
commit 61c7249759
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 409 additions and 109 deletions

View File

@ -171,6 +171,7 @@ void _PyEval_FormatKwargsError(PyThreadState *tstate, PyObject *func, PyObject *
PyObject *_PyEval_MatchClass(PyThreadState *tstate, PyObject *subject, PyObject *type, Py_ssize_t nargs, PyObject *kwargs);
PyObject *_PyEval_MatchKeys(PyThreadState *tstate, PyObject *map, PyObject *keys);
int _PyEval_UnpackIterable(PyThreadState *tstate, PyObject *v, int argcnt, int argcntafter, PyObject **sp);
void _PyEval_FrameClearAndPop(PyThreadState *tstate, _PyInterpreterFrame *frame);
#ifdef __cplusplus

View File

@ -16,13 +16,22 @@ extern PyObject* _PyFunction_Vectorcall(
#define FUNC_MAX_WATCHERS 8
#define FUNC_VERSION_CACHE_SIZE (1<<12) /* Must be a power of 2 */
struct _py_func_state {
uint32_t next_version;
// Borrowed references to function objects whose
// func_version % FUNC_VERSION_CACHE_SIZE
// once was equal to the index in the table.
// They are cleared when the function is deallocated.
PyFunctionObject *func_version_cache[FUNC_VERSION_CACHE_SIZE];
};
extern PyFunctionObject* _PyFunction_FromConstructor(PyFrameConstructor *constr);
extern uint32_t _PyFunction_GetVersionForCurrentState(PyFunctionObject *func);
extern void _PyFunction_SetVersion(PyFunctionObject *func, uint32_t version);
PyFunctionObject *_PyFunction_LookupByVersion(uint32_t version);
extern PyObject *_Py_set_function_type_params(
PyThreadState* unused, PyObject *func, PyObject *type_params);

View File

@ -33,35 +33,36 @@
#define _BINARY_OP_SUBTRACT_FLOAT 309
#define _GUARD_BOTH_UNICODE 310
#define _BINARY_OP_ADD_UNICODE 311
#define _LOAD_LOCALS 312
#define _LOAD_FROM_DICT_OR_GLOBALS 313
#define _GUARD_GLOBALS_VERSION 314
#define _GUARD_BUILTINS_VERSION 315
#define _LOAD_GLOBAL_MODULE 316
#define _LOAD_GLOBAL_BUILTINS 317
#define _GUARD_TYPE_VERSION 318
#define _CHECK_MANAGED_OBJECT_HAS_VALUES 319
#define _LOAD_ATTR_INSTANCE_VALUE 320
#define IS_NONE 321
#define _ITER_CHECK_LIST 322
#define _IS_ITER_EXHAUSTED_LIST 323
#define _ITER_NEXT_LIST 324
#define _ITER_CHECK_TUPLE 325
#define _IS_ITER_EXHAUSTED_TUPLE 326
#define _ITER_NEXT_TUPLE 327
#define _ITER_CHECK_RANGE 328
#define _IS_ITER_EXHAUSTED_RANGE 329
#define _ITER_NEXT_RANGE 330
#define _CHECK_PEP_523 331
#define _CHECK_FUNCTION_EXACT_ARGS 332
#define _CHECK_STACK_SPACE 333
#define _INIT_CALL_PY_EXACT_ARGS 334
#define _PUSH_FRAME 335
#define _POP_JUMP_IF_FALSE 336
#define _POP_JUMP_IF_TRUE 337
#define JUMP_TO_TOP 338
#define SAVE_CURRENT_IP 339
#define INSERT 340
#define _POP_FRAME 312
#define _LOAD_LOCALS 313
#define _LOAD_FROM_DICT_OR_GLOBALS 314
#define _GUARD_GLOBALS_VERSION 315
#define _GUARD_BUILTINS_VERSION 316
#define _LOAD_GLOBAL_MODULE 317
#define _LOAD_GLOBAL_BUILTINS 318
#define _GUARD_TYPE_VERSION 319
#define _CHECK_MANAGED_OBJECT_HAS_VALUES 320
#define _LOAD_ATTR_INSTANCE_VALUE 321
#define IS_NONE 322
#define _ITER_CHECK_LIST 323
#define _IS_ITER_EXHAUSTED_LIST 324
#define _ITER_NEXT_LIST 325
#define _ITER_CHECK_TUPLE 326
#define _IS_ITER_EXHAUSTED_TUPLE 327
#define _ITER_NEXT_TUPLE 328
#define _ITER_CHECK_RANGE 329
#define _IS_ITER_EXHAUSTED_RANGE 330
#define _ITER_NEXT_RANGE 331
#define _CHECK_PEP_523 332
#define _CHECK_FUNCTION_EXACT_ARGS 333
#define _CHECK_STACK_SPACE 334
#define _INIT_CALL_PY_EXACT_ARGS 335
#define _PUSH_FRAME 336
#define _POP_JUMP_IF_FALSE 337
#define _POP_JUMP_IF_TRUE 338
#define JUMP_TO_TOP 339
#define SAVE_CURRENT_IP 340
#define INSERT 341
extern int _PyOpcode_num_popped(int opcode, int oparg, bool jump);
#ifdef NEED_OPCODE_METADATA
@ -197,6 +198,8 @@ int _PyOpcode_num_popped(int opcode, int oparg, bool jump) {
return oparg;
case INTERPRETER_EXIT:
return 1;
case _POP_FRAME:
return 1;
case RETURN_VALUE:
return 1;
case INSTRUMENTED_RETURN_VALUE:
@ -723,6 +726,8 @@ int _PyOpcode_num_pushed(int opcode, int oparg, bool jump) {
return 0;
case INTERPRETER_EXIT:
return 0;
case _POP_FRAME:
return 0;
case RETURN_VALUE:
return 0;
case INSTRUMENTED_RETURN_VALUE:
@ -1191,7 +1196,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = {
[STORE_FAST_STORE_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG },
[POP_TOP] = { true, INSTR_FMT_IX, 0 },
[PUSH_NULL] = { true, INSTR_FMT_IX, 0 },
[END_FOR] = { true, INSTR_FMT_IB, 0 },
[END_FOR] = { true, INSTR_FMT_IX, 0 },
[INSTRUMENTED_END_FOR] = { true, INSTR_FMT_IX, 0 },
[END_SEND] = { true, INSTR_FMT_IX, 0 },
[INSTRUMENTED_END_SEND] = { true, INSTR_FMT_IX, 0 },
@ -1205,14 +1210,14 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = {
[TO_BOOL_STR] = { true, INSTR_FMT_IXC00, 0 },
[TO_BOOL_ALWAYS_TRUE] = { true, INSTR_FMT_IXC00, 0 },
[UNARY_INVERT] = { true, INSTR_FMT_IX, 0 },
[BINARY_OP_MULTIPLY_INT] = { true, INSTR_FMT_IBC, 0 },
[BINARY_OP_ADD_INT] = { true, INSTR_FMT_IBC, 0 },
[BINARY_OP_SUBTRACT_INT] = { true, INSTR_FMT_IBC, 0 },
[BINARY_OP_MULTIPLY_FLOAT] = { true, INSTR_FMT_IBC, 0 },
[BINARY_OP_ADD_FLOAT] = { true, INSTR_FMT_IBC, 0 },
[BINARY_OP_SUBTRACT_FLOAT] = { true, INSTR_FMT_IBC, 0 },
[BINARY_OP_ADD_UNICODE] = { true, INSTR_FMT_IBC, 0 },
[BINARY_OP_INPLACE_ADD_UNICODE] = { true, INSTR_FMT_IB, HAS_LOCAL_FLAG },
[BINARY_OP_MULTIPLY_INT] = { true, INSTR_FMT_IXC, 0 },
[BINARY_OP_ADD_INT] = { true, INSTR_FMT_IXC, 0 },
[BINARY_OP_SUBTRACT_INT] = { true, INSTR_FMT_IXC, 0 },
[BINARY_OP_MULTIPLY_FLOAT] = { true, INSTR_FMT_IXC, 0 },
[BINARY_OP_ADD_FLOAT] = { true, INSTR_FMT_IXC, 0 },
[BINARY_OP_SUBTRACT_FLOAT] = { true, INSTR_FMT_IXC, 0 },
[BINARY_OP_ADD_UNICODE] = { true, INSTR_FMT_IXC, 0 },
[BINARY_OP_INPLACE_ADD_UNICODE] = { true, INSTR_FMT_IX, HAS_LOCAL_FLAG },
[BINARY_SUBSCR] = { true, INSTR_FMT_IXC, 0 },
[BINARY_SLICE] = { true, INSTR_FMT_IX, 0 },
[STORE_SLICE] = { true, INSTR_FMT_IX, 0 },
@ -1259,7 +1264,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE] = {
[DELETE_ATTR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG },
[STORE_GLOBAL] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG },
[DELETE_GLOBAL] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG },
[LOAD_LOCALS] = { true, INSTR_FMT_IB, 0 },
[LOAD_LOCALS] = { true, INSTR_FMT_IX, 0 },
[LOAD_NAME] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG },
[LOAD_FROM_DICT_OR_GLOBALS] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG },
[LOAD_GLOBAL] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG | HAS_NAME_FLAG },
@ -1400,6 +1405,7 @@ extern const struct opcode_macro_expansion _PyOpcode_macro_expansion[OPCODE_MACR
#ifdef NEED_OPCODE_METADATA
const struct opcode_macro_expansion _PyOpcode_macro_expansion[OPCODE_MACRO_EXPANSION_SIZE] = {
[NOP] = { .nuops = 1, .uops = { { NOP, 0, 0 } } },
[RESUME] = { .nuops = 1, .uops = { { RESUME, 0, 0 } } },
[LOAD_FAST_CHECK] = { .nuops = 1, .uops = { { LOAD_FAST_CHECK, 0, 0 } } },
[LOAD_FAST] = { .nuops = 1, .uops = { { LOAD_FAST, 0, 0 } } },
[LOAD_FAST_AND_CLEAR] = { .nuops = 1, .uops = { { LOAD_FAST_AND_CLEAR, 0, 0 } } },
@ -1444,6 +1450,8 @@ const struct opcode_macro_expansion _PyOpcode_macro_expansion[OPCODE_MACRO_EXPAN
[DELETE_SUBSCR] = { .nuops = 1, .uops = { { DELETE_SUBSCR, 0, 0 } } },
[CALL_INTRINSIC_1] = { .nuops = 1, .uops = { { CALL_INTRINSIC_1, 0, 0 } } },
[CALL_INTRINSIC_2] = { .nuops = 1, .uops = { { CALL_INTRINSIC_2, 0, 0 } } },
[RETURN_VALUE] = { .nuops = 3, .uops = { { SAVE_IP, 7, 0 }, { SAVE_CURRENT_IP, 0, 0 }, { _POP_FRAME, 0, 0 } } },
[RETURN_CONST] = { .nuops = 4, .uops = { { LOAD_CONST, 0, 0 }, { SAVE_IP, 7, 0 }, { SAVE_CURRENT_IP, 0, 0 }, { _POP_FRAME, 0, 0 } } },
[GET_AITER] = { .nuops = 1, .uops = { { GET_AITER, 0, 0 } } },
[GET_ANEXT] = { .nuops = 1, .uops = { { GET_ANEXT, 0, 0 } } },
[GET_AWAITABLE] = { .nuops = 1, .uops = { { GET_AWAITABLE, 0, 0 } } },
@ -1545,6 +1553,7 @@ const char * const _PyOpcode_uop_name[OPCODE_UOP_NAME_SIZE] = {
[_BINARY_OP_SUBTRACT_FLOAT] = "_BINARY_OP_SUBTRACT_FLOAT",
[_GUARD_BOTH_UNICODE] = "_GUARD_BOTH_UNICODE",
[_BINARY_OP_ADD_UNICODE] = "_BINARY_OP_ADD_UNICODE",
[_POP_FRAME] = "_POP_FRAME",
[_LOAD_LOCALS] = "_LOAD_LOCALS",
[_LOAD_FROM_DICT_OR_GLOBALS] = "_LOAD_FROM_DICT_OR_GLOBALS",
[_GUARD_GLOBALS_VERSION] = "_GUARD_GLOBALS_VERSION",

View File

@ -2633,6 +2633,7 @@ class TestUops(unittest.TestCase):
self.assertIsNotNone(ex)
uops = {opname for opname, _, _ in ex}
self.assertIn("_PUSH_FRAME", uops)
self.assertIn("_BINARY_OP_ADD_INT", uops)

View File

@ -264,7 +264,7 @@ class CodeTest(unittest.TestCase):
("co_posonlyargcount", 0),
("co_kwonlyargcount", 0),
("co_nlocals", 1),
("co_stacksize", 0),
("co_stacksize", 1),
("co_flags", code.co_flags | inspect.CO_COROUTINE),
("co_firstlineno", 100),
("co_code", code2.co_code),

View File

@ -396,6 +396,9 @@ init_code(PyCodeObject *co, struct _PyCodeConstructor *con)
int nlocals, ncellvars, nfreevars;
get_localsplus_counts(con->localsplusnames, con->localspluskinds,
&nlocals, &ncellvars, &nfreevars);
if (con->stacksize == 0) {
con->stacksize = 1;
}
co->co_filename = Py_NewRef(con->filename);
co->co_name = Py_NewRef(con->name);

View File

@ -223,7 +223,73 @@ error:
return NULL;
}
uint32_t _PyFunction_GetVersionForCurrentState(PyFunctionObject *func)
/*
Function versions
-----------------
Function versions are used to detect when a function object has been
updated, invalidating inline cache data used by the `CALL` bytecode
(notably `CALL_PY_EXACT_ARGS` and a few other `CALL` specializations).
They are also used by the Tier 2 superblock creation code to find
the function being called (and from there the code object).
How does a function's `func_version` field get initialized?
- `PyFunction_New` and friends initialize it to 0.
- The `MAKE_FUNCTION` instruction sets it from the code's `co_version`.
- It is reset to 0 when various attributes like `__code__` are set.
- A new version is allocated by `_PyFunction_GetVersionForCurrentState`
when the specializer needs a version and the version is 0.
The latter allocates versions using a counter in the interpreter state;
when the counter wraps around to 0, no more versions are allocated.
There is one other special case: functions with a non-standard
`vectorcall` field are not given a version.
When the function version is 0, the `CALL` bytecode is not specialized.
Code object versions
--------------------
So where to code objects get their `co_version`? There is a single
static global counter, `_Py_next_func_version`. This is initialized in
the generated (!) file `Python/deepfreeze/deepfreeze.c`, to 1 plus the
number of deep-frozen function objects in that file.
(In `_bootstrap_python.c` and `freeze_module.c` it is initialized to 1.)
Code objects get a new `co_version` allocated from this counter upon
creation. Since code objects are nominally immutable, `co_version` can
not be invalidated. The only way it can be 0 is when 2**32 or more
code objects have been created during the process's lifetime.
(The counter isn't reset by `fork()`, extending the lifetime.)
*/
void
_PyFunction_SetVersion(PyFunctionObject *func, uint32_t version)
{
func->func_version = version;
if (version != 0) {
PyInterpreterState *interp = _PyInterpreterState_GET();
interp->func_state.func_version_cache[
version % FUNC_VERSION_CACHE_SIZE] = func;
}
}
PyFunctionObject *
_PyFunction_LookupByVersion(uint32_t version)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
PyFunctionObject *func = interp->func_state.func_version_cache[
version % FUNC_VERSION_CACHE_SIZE];
if (func != NULL && func->func_version == version) {
return (PyFunctionObject *)Py_NewRef(func);
}
return NULL;
}
uint32_t
_PyFunction_GetVersionForCurrentState(PyFunctionObject *func)
{
if (func->func_version != 0) {
return func->func_version;
@ -236,7 +302,7 @@ uint32_t _PyFunction_GetVersionForCurrentState(PyFunctionObject *func)
return 0;
}
uint32_t v = interp->func_state.next_version++;
func->func_version = v;
_PyFunction_SetVersion(func, v);
return v;
}
@ -851,6 +917,15 @@ func_dealloc(PyFunctionObject *op)
if (op->func_weakreflist != NULL) {
PyObject_ClearWeakRefs((PyObject *) op);
}
if (op->func_version != 0) {
PyInterpreterState *interp = _PyInterpreterState_GET();
PyFunctionObject **slot =
interp->func_state.func_version_cache
+ (op->func_version % FUNC_VERSION_CACHE_SIZE);
if (*slot == op) {
*slot = NULL;
}
}
(void)func_clear(op);
// These aren't cleared by func_clear().
Py_DECREF(op->func_code);

View File

@ -7,6 +7,10 @@
break;
}
case RESUME: {
break;
}
case POP_TOP: {
STACK_SHRINK(1);
break;
@ -191,6 +195,11 @@
break;
}
case _POP_FRAME: {
STACK_SHRINK(1);
break;
}
case GET_AITER: {
PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)PARTITIONNODE_NULLROOT, PEEK(-(-1)), true);
break;

View File

@ -133,6 +133,7 @@ dummy_func(
}
inst(RESUME, (--)) {
#if TIER_ONE
assert(frame == tstate->current_frame);
/* Possibly combine this with eval breaker */
if (_PyFrame_GetCode(frame)->_co_instrumentation_version != tstate->interp->monitoring_version) {
@ -140,7 +141,9 @@ dummy_func(
ERROR_IF(err, error);
next_instr--;
}
else if (oparg < 2) {
else
#endif
if (oparg < 2) {
CHECK_EVAL_BREAKER();
}
}
@ -757,21 +760,37 @@ dummy_func(
return retval;
}
inst(RETURN_VALUE, (retval --)) {
STACK_SHRINK(1);
// The stack effect here is ambiguous.
// We definitely pop the return value off the stack on entry.
// We also push it onto the stack on exit, but that's a
// different frame, and it's accounted for by _PUSH_FRAME.
op(_POP_FRAME, (retval --)) {
assert(EMPTY());
_PyFrame_SetStackPointer(frame, stack_pointer);
_Py_LeaveRecursiveCallPy(tstate);
assert(frame != &entry_frame);
// GH-99729: We need to unlink the frame *before* clearing it:
_PyInterpreterFrame *dying = frame;
#if TIER_ONE
assert(frame != &entry_frame);
#endif
frame = tstate->current_frame = dying->previous;
_PyEvalFrameClearAndPop(tstate, dying);
_PyEval_FrameClearAndPop(tstate, dying);
frame->prev_instr += frame->return_offset;
_PyFrame_StackPush(frame, retval);
#if TIER_ONE
goto resume_frame;
#endif
#if TIER_TWO
stack_pointer = _PyFrame_GetStackPointer(frame);
ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive;
#endif
}
macro(RETURN_VALUE) =
SAVE_IP + // Tier 2 only; special-cased oparg
SAVE_CURRENT_IP + // Sets frame->prev_instr
_POP_FRAME;
inst(INSTRUMENTED_RETURN_VALUE, (retval --)) {
int err = _Py_call_instrumentation_arg(
tstate, PY_MONITORING_EVENT_PY_RETURN,
@ -785,27 +804,17 @@ dummy_func(
// GH-99729: We need to unlink the frame *before* clearing it:
_PyInterpreterFrame *dying = frame;
frame = tstate->current_frame = dying->previous;
_PyEvalFrameClearAndPop(tstate, dying);
_PyEval_FrameClearAndPop(tstate, dying);
frame->prev_instr += frame->return_offset;
_PyFrame_StackPush(frame, retval);
goto resume_frame;
}
inst(RETURN_CONST, (--)) {
PyObject *retval = GETITEM(FRAME_CO_CONSTS, oparg);
Py_INCREF(retval);
assert(EMPTY());
_PyFrame_SetStackPointer(frame, stack_pointer);
_Py_LeaveRecursiveCallPy(tstate);
assert(frame != &entry_frame);
// GH-99729: We need to unlink the frame *before* clearing it:
_PyInterpreterFrame *dying = frame;
frame = tstate->current_frame = dying->previous;
_PyEvalFrameClearAndPop(tstate, dying);
frame->prev_instr += frame->return_offset;
_PyFrame_StackPush(frame, retval);
goto resume_frame;
}
macro(RETURN_CONST) =
LOAD_CONST +
SAVE_IP + // Tier 2 only; special-cased oparg
SAVE_CURRENT_IP + // Sets frame->prev_instr
_POP_FRAME;
inst(INSTRUMENTED_RETURN_CONST, (--)) {
PyObject *retval = GETITEM(FRAME_CO_CONSTS, oparg);
@ -821,7 +830,7 @@ dummy_func(
// GH-99729: We need to unlink the frame *before* clearing it:
_PyInterpreterFrame *dying = frame;
frame = tstate->current_frame = dying->previous;
_PyEvalFrameClearAndPop(tstate, dying);
_PyEval_FrameClearAndPop(tstate, dying);
frame->prev_instr += frame->return_offset;
_PyFrame_StackPush(frame, retval);
goto resume_frame;
@ -3545,7 +3554,8 @@ dummy_func(
goto error;
}
func_obj->func_version = ((PyCodeObject *)codeobj)->co_version;
_PyFunction_SetVersion(
func_obj, ((PyCodeObject *)codeobj)->co_version);
func = (PyObject *)func_obj;
}

View File

@ -222,8 +222,6 @@ _PyEvalFramePushAndInit(PyThreadState *tstate, PyFunctionObject *func,
static _PyInterpreterFrame *
_PyEvalFramePushAndInit_Ex(PyThreadState *tstate, PyFunctionObject *func,
PyObject *locals, Py_ssize_t nargs, PyObject *callargs, PyObject *kwargs);
static void
_PyEvalFrameClearAndPop(PyThreadState *tstate, _PyInterpreterFrame *frame);
#ifdef HAVE_ERRNO_H
#include <errno.h>
@ -603,10 +601,6 @@ int _Py_CheckRecursiveCallPy(
}
static inline void _Py_LeaveRecursiveCallPy(PyThreadState *tstate) {
tstate->py_recursion_remaining++;
}
static const _Py_CODEUNIT _Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS[] = {
/* Put a NOP at the start, so that the IP points into
* the code, rather than before it */
@ -731,7 +725,7 @@ resume_frame:
// When tracing executed uops, also trace bytecode
char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG");
if (uop_debug != NULL && *uop_debug >= '0') {
lltrace = (*uop_debug - '0') >= 4; // TODO: Parse an int and all that
lltrace = (*uop_debug - '0') >= 5; // TODO: Parse an int and all that
}
}
}
@ -918,7 +912,7 @@ exit_unwind:
// GH-99729: We need to unlink the frame *before* clearing it:
_PyInterpreterFrame *dying = frame;
frame = tstate->current_frame = dying->previous;
_PyEvalFrameClearAndPop(tstate, dying);
_PyEval_FrameClearAndPop(tstate, dying);
frame->return_offset = 0;
if (frame == &entry_frame) {
/* Restore previous frame and exit */
@ -1487,8 +1481,8 @@ clear_gen_frame(PyThreadState *tstate, _PyInterpreterFrame * frame)
frame->previous = NULL;
}
static void
_PyEvalFrameClearAndPop(PyThreadState *tstate, _PyInterpreterFrame * frame)
void
_PyEval_FrameClearAndPop(PyThreadState *tstate, _PyInterpreterFrame * frame)
{
if (frame->owner == FRAME_OWNED_BY_THREAD) {
clear_thread_frame(tstate, frame);

View File

@ -369,3 +369,7 @@ static inline int _Py_EnterRecursivePy(PyThreadState *tstate) {
return (tstate->py_recursion_remaining-- <= 0) &&
_Py_CheckRecursiveCallPy(tstate);
}
static inline void _Py_LeaveRecursiveCallPy(PyThreadState *tstate) {
tstate->py_recursion_remaining++;
}

View File

@ -7,6 +7,23 @@
break;
}
case RESUME: {
#if TIER_ONE
assert(frame == tstate->current_frame);
/* Possibly combine this with eval breaker */
if (_PyFrame_GetCode(frame)->_co_instrumentation_version != tstate->interp->monitoring_version) {
int err = _Py_Instrument(_PyFrame_GetCode(frame), tstate->interp);
if (err) goto error;
next_instr--;
}
else
#endif
if (oparg < 2) {
CHECK_EVAL_BREAKER();
}
break;
}
case LOAD_FAST_CHECK: {
PyObject *value;
value = GETLOCAL(oparg);
@ -666,6 +683,32 @@
break;
}
case _POP_FRAME: {
PyObject *retval;
retval = stack_pointer[-1];
STACK_SHRINK(1);
assert(EMPTY());
_PyFrame_SetStackPointer(frame, stack_pointer);
_Py_LeaveRecursiveCallPy(tstate);
// GH-99729: We need to unlink the frame *before* clearing it:
_PyInterpreterFrame *dying = frame;
#if TIER_ONE
assert(frame != &entry_frame);
#endif
frame = tstate->current_frame = dying->previous;
_PyEval_FrameClearAndPop(tstate, dying);
frame->prev_instr += frame->return_offset;
_PyFrame_StackPush(frame, retval);
#if TIER_ONE
goto resume_frame;
#endif
#if TIER_TWO
stack_pointer = _PyFrame_GetStackPointer(frame);
ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive;
#endif
break;
}
case GET_AITER: {
PyObject *obj;
PyObject *iter;
@ -2607,7 +2650,8 @@
goto error;
}
func_obj->func_version = ((PyCodeObject *)codeobj)->co_version;
_PyFunction_SetVersion(
func_obj, ((PyCodeObject *)codeobj)->co_version);
func = (PyObject *)func_obj;
stack_pointer[-1] = func;
break;

View File

@ -8,6 +8,7 @@
}
TARGET(RESUME) {
#if TIER_ONE
assert(frame == tstate->current_frame);
/* Possibly combine this with eval breaker */
if (_PyFrame_GetCode(frame)->_co_instrumentation_version != tstate->interp->monitoring_version) {
@ -15,7 +16,9 @@
if (err) goto error;
next_instr--;
}
else if (oparg < 2) {
else
#endif
if (oparg < 2) {
CHECK_EVAL_BREAKER();
}
DISPATCH();
@ -970,20 +973,40 @@
TARGET(RETURN_VALUE) {
PyObject *retval;
// SAVE_CURRENT_IP
{
#if TIER_ONE
frame->prev_instr = next_instr - 1;
#endif
#if TIER_TWO
// Relies on a preceding SAVE_IP
frame->prev_instr--;
#endif
}
// _POP_FRAME
retval = stack_pointer[-1];
STACK_SHRINK(1);
assert(EMPTY());
_PyFrame_SetStackPointer(frame, stack_pointer);
_Py_LeaveRecursiveCallPy(tstate);
assert(frame != &entry_frame);
// GH-99729: We need to unlink the frame *before* clearing it:
_PyInterpreterFrame *dying = frame;
frame = tstate->current_frame = dying->previous;
_PyEvalFrameClearAndPop(tstate, dying);
frame->prev_instr += frame->return_offset;
_PyFrame_StackPush(frame, retval);
goto resume_frame;
STACK_SHRINK(1);
{
assert(EMPTY());
_PyFrame_SetStackPointer(frame, stack_pointer);
_Py_LeaveRecursiveCallPy(tstate);
// GH-99729: We need to unlink the frame *before* clearing it:
_PyInterpreterFrame *dying = frame;
#if TIER_ONE
assert(frame != &entry_frame);
#endif
frame = tstate->current_frame = dying->previous;
_PyEval_FrameClearAndPop(tstate, dying);
frame->prev_instr += frame->return_offset;
_PyFrame_StackPush(frame, retval);
#if TIER_ONE
goto resume_frame;
#endif
#if TIER_TWO
stack_pointer = _PyFrame_GetStackPointer(frame);
ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive;
#endif
}
}
TARGET(INSTRUMENTED_RETURN_VALUE) {
@ -1001,7 +1024,7 @@
// GH-99729: We need to unlink the frame *before* clearing it:
_PyInterpreterFrame *dying = frame;
frame = tstate->current_frame = dying->previous;
_PyEvalFrameClearAndPop(tstate, dying);
_PyEval_FrameClearAndPop(tstate, dying);
frame->prev_instr += frame->return_offset;
_PyFrame_StackPush(frame, retval);
goto resume_frame;
@ -1009,19 +1032,46 @@
}
TARGET(RETURN_CONST) {
PyObject *retval = GETITEM(FRAME_CO_CONSTS, oparg);
Py_INCREF(retval);
assert(EMPTY());
_PyFrame_SetStackPointer(frame, stack_pointer);
_Py_LeaveRecursiveCallPy(tstate);
assert(frame != &entry_frame);
// GH-99729: We need to unlink the frame *before* clearing it:
_PyInterpreterFrame *dying = frame;
frame = tstate->current_frame = dying->previous;
_PyEvalFrameClearAndPop(tstate, dying);
frame->prev_instr += frame->return_offset;
_PyFrame_StackPush(frame, retval);
goto resume_frame;
PyObject *value;
PyObject *retval;
// LOAD_CONST
{
value = GETITEM(FRAME_CO_CONSTS, oparg);
Py_INCREF(value);
}
// SAVE_CURRENT_IP
{
#if TIER_ONE
frame->prev_instr = next_instr - 1;
#endif
#if TIER_TWO
// Relies on a preceding SAVE_IP
frame->prev_instr--;
#endif
}
// _POP_FRAME
retval = value;
{
assert(EMPTY());
_PyFrame_SetStackPointer(frame, stack_pointer);
_Py_LeaveRecursiveCallPy(tstate);
// GH-99729: We need to unlink the frame *before* clearing it:
_PyInterpreterFrame *dying = frame;
#if TIER_ONE
assert(frame != &entry_frame);
#endif
frame = tstate->current_frame = dying->previous;
_PyEval_FrameClearAndPop(tstate, dying);
frame->prev_instr += frame->return_offset;
_PyFrame_StackPush(frame, retval);
#if TIER_ONE
goto resume_frame;
#endif
#if TIER_TWO
stack_pointer = _PyFrame_GetStackPointer(frame);
ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive;
#endif
}
}
TARGET(INSTRUMENTED_RETURN_CONST) {
@ -1038,7 +1088,7 @@
// GH-99729: We need to unlink the frame *before* clearing it:
_PyInterpreterFrame *dying = frame;
frame = tstate->current_frame = dying->previous;
_PyEvalFrameClearAndPop(tstate, dying);
_PyEval_FrameClearAndPop(tstate, dying);
frame->prev_instr += frame->return_offset;
_PyFrame_StackPush(frame, retval);
goto resume_frame;
@ -4575,7 +4625,8 @@
goto error;
}
func_obj->func_version = ((PyCodeObject *)codeobj)->co_version;
_PyFunction_SetVersion(
func_obj, ((PyCodeObject *)codeobj)->co_version);
func = (PyObject *)func_obj;
stack_pointer[-1] = func;
DISPATCH();

View File

@ -373,6 +373,8 @@ static PyTypeObject UOpExecutor_Type = {
.tp_as_sequence = &uop_as_sequence,
};
#define TRACE_STACK_SIZE 5
static int
translate_bytecode_to_trace(
PyCodeObject *code,
@ -380,10 +382,16 @@ translate_bytecode_to_trace(
_PyUOpInstruction *trace,
int buffer_size)
{
PyCodeObject *initial_code = code;
_Py_CODEUNIT *initial_instr = instr;
int trace_length = 0;
int max_length = buffer_size;
int reserved = 0;
struct {
PyCodeObject *code;
_Py_CODEUNIT *instr;
} trace_stack[TRACE_STACK_SIZE];
int trace_stack_depth = 0;
#ifdef Py_DEBUG
char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG");
@ -441,6 +449,24 @@ translate_bytecode_to_trace(
// Reserve space for main+stub uops, plus 2 for SAVE_IP and EXIT_TRACE
#define RESERVE(main, stub) RESERVE_RAW((main) + (stub) + 2, uop_name(opcode))
// Trace stack operations (used by _PUSH_FRAME, _POP_FRAME)
#define TRACE_STACK_PUSH() \
if (trace_stack_depth >= TRACE_STACK_SIZE) { \
DPRINTF(2, "Trace stack overflow\n"); \
ADD_TO_TRACE(SAVE_IP, 0, 0); \
goto done; \
} \
trace_stack[trace_stack_depth].code = code; \
trace_stack[trace_stack_depth].instr = instr; \
trace_stack_depth++;
#define TRACE_STACK_POP() \
if (trace_stack_depth <= 0) { \
Py_FatalError("Trace stack underflow\n"); \
} \
trace_stack_depth--; \
code = trace_stack[trace_stack_depth].code; \
instr = trace_stack[trace_stack_depth].instr;
DPRINTF(4,
"Optimizing %s (%s:%d) at byte offset %d\n",
PyUnicode_AsUTF8(code->co_qualname),
@ -448,6 +474,7 @@ translate_bytecode_to_trace(
code->co_firstlineno,
2 * INSTR_IP(initial_instr, code));
top: // Jump here after _PUSH_FRAME
for (;;) {
RESERVE_RAW(2, "epilogue"); // Always need space for SAVE_IP and EXIT_TRACE
ADD_TO_TRACE(SAVE_IP, INSTR_IP(instr, code), 0);
@ -508,7 +535,7 @@ pop_jump_if_bool:
case JUMP_BACKWARD:
{
if (instr + 2 - oparg == initial_instr) {
if (instr + 2 - oparg == initial_instr && code == initial_code) {
RESERVE(1, 0);
ADD_TO_TRACE(JUMP_TO_TOP, 0, 0);
}
@ -573,6 +600,14 @@ pop_jump_if_bool:
// Reserve space for nuops (+ SAVE_IP + EXIT_TRACE)
int nuops = expansion->nuops;
RESERVE(nuops, 0);
if (expansion->uops[nuops-1].uop == _POP_FRAME) {
// Check for trace stack underflow now:
// We can't bail e.g. in the middle of
// LOAD_CONST + _POP_FRAME.
if (trace_stack_depth == 0) {
DPRINTF(2, "Trace stack underflow\n");
goto done;}
}
uint32_t orig_oparg = oparg; // For OPARG_TOP/BOTTOM
for (int i = 0; i < nuops; i++) {
oparg = orig_oparg;
@ -619,8 +654,57 @@ pop_jump_if_bool:
Py_FatalError("garbled expansion");
}
ADD_TO_TRACE(expansion->uops[i].uop, oparg, operand);
if (expansion->uops[i].uop == _POP_FRAME) {
TRACE_STACK_POP();
DPRINTF(2,
"Returning to %s (%s:%d) at byte offset %d\n",
PyUnicode_AsUTF8(code->co_qualname),
PyUnicode_AsUTF8(code->co_filename),
code->co_firstlineno,
2 * INSTR_IP(instr, code));
goto top;
}
if (expansion->uops[i].uop == _PUSH_FRAME) {
assert(i + 1 == nuops);
int func_version_offset =
offsetof(_PyCallCache, func_version)/sizeof(_Py_CODEUNIT)
// Add one to account for the actual opcode/oparg pair:
+ 1;
uint32_t func_version = read_u32(&instr[func_version_offset].cache);
PyFunctionObject *func = _PyFunction_LookupByVersion(func_version);
DPRINTF(3, "Function object: %p\n", func);
if (func != NULL) {
PyCodeObject *new_code = (PyCodeObject *)PyFunction_GET_CODE(func);
if (new_code == code) {
// Recursive call, bail (we could be here forever).
DPRINTF(2, "Bailing on recursive call to %s (%s:%d)\n",
PyUnicode_AsUTF8(new_code->co_qualname),
PyUnicode_AsUTF8(new_code->co_filename),
new_code->co_firstlineno);
ADD_TO_TRACE(SAVE_IP, 0, 0);
goto done;
}
if (new_code->co_version != func_version) {
// func.__code__ was updated.
// Perhaps it may happen again, so don't bother tracing.
// TODO: Reason about this -- is it better to bail or not?
DPRINTF(2, "Bailing because co_version != func_version\n");
ADD_TO_TRACE(SAVE_IP, 0, 0);
goto done;
}
// Increment IP to the return address
instr += _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] + 1;
TRACE_STACK_PUSH();
code = new_code;
instr = _PyCode_CODE(code);
DPRINTF(2,
"Continuing in %s (%s:%d) at byte offset %d\n",
PyUnicode_AsUTF8(code->co_qualname),
PyUnicode_AsUTF8(code->co_filename),
code->co_firstlineno,
2 * INSTR_IP(instr, code));
goto top;
}
ADD_TO_TRACE(SAVE_IP, 0, 0);
goto done;
}
@ -639,6 +723,10 @@ pop_jump_if_bool:
} // End for (;;)
done:
while (trace_stack_depth > 0) {
TRACE_STACK_POP();
}
assert(code == initial_code);
// Skip short traces like SAVE_IP, LOAD_FAST, SAVE_IP, EXIT_TRACE
if (trace_length > 3) {
ADD_TO_TRACE(EXIT_TRACE, 0, 0);

View File

@ -364,10 +364,12 @@ class Analyzer:
case Instruction() as instr:
part, offset = self.analyze_instruction(instr, offset)
parts.append(part)
flags.add(instr.instr_flags)
if instr.name != "SAVE_IP":
# SAVE_IP in a macro is a no-op in Tier 1
flags.add(instr.instr_flags)
case _:
typing.assert_never(component)
format = "IB"
format = "IB" if flags.HAS_ARG_FLAG else "IX"
if offset:
format += "C" + "0" * (offset - 1)
return MacroInstruction(macro.name, format, flags, macro, parts, offset)

View File

@ -380,7 +380,7 @@ def write_components(
poke.as_stack_effect(lax=True),
)
if mgr.instr.name == "_PUSH_FRAME":
if mgr.instr.name in ("_PUSH_FRAME", "_POP_FRAME"):
# Adjust stack to min_offset (input effects materialized)
out.stack_adjust(mgr.min_offset.deep, mgr.min_offset.high)
# Use clone() since adjust_inverse() mutates final_offset