From f180b31e7629d36265fa36f1560365358b4fd47c Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 25 Apr 2024 11:32:47 +0100 Subject: [PATCH] GH-118095: Handle `RETURN_GENERATOR` in tier 2 (GH-118180) --- Include/internal/pycore_ceval.h | 2 +- Include/internal/pycore_frame.h | 14 ++++++-- Include/internal/pycore_opcode_metadata.h | 3 +- Include/internal/pycore_uop_ids.h | 1 + Include/internal/pycore_uop_metadata.h | 4 +++ Lib/test/test_capi/test_opt.py | 12 +++++++ Objects/frameobject.c | 5 --- Python/bytecodes.c | 24 ++++--------- Python/ceval_macros.h | 12 +++++++ Python/executor_cases.c.h | 43 ++++++++++++++++------- Python/frame.c | 12 ------- Python/generated_cases.c.h | 40 +++++++-------------- Python/optimizer.c | 5 +-- Python/optimizer_analysis.c | 2 +- Python/optimizer_bytecodes.c | 22 ++++++++++++ Python/optimizer_cases.c.h | 23 ++++++++++++ 16 files changed, 143 insertions(+), 81 deletions(-) diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h index 946f82ae3c2..8d88b5c1d15 100644 --- a/Include/internal/pycore_ceval.h +++ b/Include/internal/pycore_ceval.h @@ -182,7 +182,7 @@ static inline void _Py_LeaveRecursiveCall(void) { extern struct _PyInterpreterFrame* _PyEval_GetFrame(void); -extern PyObject* _Py_MakeCoro(PyFunctionObject *func); +PyAPI_FUNC(PyObject *)_Py_MakeCoro(PyFunctionObject *func); /* Handle signals, pending calls, GIL drop request and asynchronous exception */ diff --git a/Include/internal/pycore_frame.h b/Include/internal/pycore_frame.h index 74d9e4cac72..f913928f38b 100644 --- a/Include/internal/pycore_frame.h +++ b/Include/internal/pycore_frame.h @@ -110,7 +110,17 @@ _PyFrame_NumSlotsForCodeObject(PyCodeObject *code) return code->co_framesize - FRAME_SPECIALS_SIZE; } -void _PyFrame_Copy(_PyInterpreterFrame *src, _PyInterpreterFrame *dest); +static inline void _PyFrame_Copy(_PyInterpreterFrame *src, _PyInterpreterFrame *dest) +{ + assert(src->stacktop >= _PyFrame_GetCode(src)->co_nlocalsplus); + *dest = *src; + for (int i = 1; i < src->stacktop; i++) { + dest->localsplus[i] = src->localsplus[i]; + } + // Don't leave a dangling pointer to the old frame when creating generators + // and coroutines: + dest->previous = NULL; +} /* Consumes reference to func and locals. Does not initialize frame->previous, which happens @@ -256,7 +266,7 @@ _PyThreadState_HasStackSpace(PyThreadState *tstate, int size) extern _PyInterpreterFrame * _PyThreadState_PushFrame(PyThreadState *tstate, size_t size); -void _PyThreadState_PopFrame(PyThreadState *tstate, _PyInterpreterFrame *frame); +PyAPI_FUNC(void) _PyThreadState_PopFrame(PyThreadState *tstate, _PyInterpreterFrame *frame); /* Pushes a frame without checking for space. * Must be guarded by _PyThreadState_HasStackSpace() diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 5636debbf4a..400d7c334db 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -805,7 +805,7 @@ int _PyOpcode_num_pushed(int opcode, int oparg) { case RETURN_CONST: return 0; case RETURN_GENERATOR: - return 0; + return 1; case RETURN_VALUE: return 0; case SEND: @@ -1310,6 +1310,7 @@ _PyOpcode_macro_expansion[256] = { [PUSH_NULL] = { .nuops = 1, .uops = { { _PUSH_NULL, 0, 0 } } }, [RESUME_CHECK] = { .nuops = 1, .uops = { { _RESUME_CHECK, 0, 0 } } }, [RETURN_CONST] = { .nuops = 2, .uops = { { _LOAD_CONST, 0, 0 }, { _POP_FRAME, 0, 0 } } }, + [RETURN_GENERATOR] = { .nuops = 1, .uops = { { _RETURN_GENERATOR, 0, 0 } } }, [RETURN_VALUE] = { .nuops = 1, .uops = { { _POP_FRAME, 0, 0 } } }, [SETUP_ANNOTATIONS] = { .nuops = 1, .uops = { { _SETUP_ANNOTATIONS, 0, 0 } } }, [SET_ADD] = { .nuops = 1, .uops = { { _SET_ADD, 0, 0 } } }, diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index f0558743b32..bb49d6e77d2 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -231,6 +231,7 @@ extern "C" { #define _PUSH_NULL PUSH_NULL #define _REPLACE_WITH_TRUE 424 #define _RESUME_CHECK RESUME_CHECK +#define _RETURN_GENERATOR RETURN_GENERATOR #define _SAVE_RETURN_OFFSET 425 #define _SEND 426 #define _SEND_GEN SEND_GEN diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 2da4c4d4e21..b8cdfae8391 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -219,6 +219,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_CALL_METHOD_DESCRIPTOR_FAST] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_MAKE_FUNCTION] = HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, [_SET_FUNCTION_ATTRIBUTE] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG, + [_RETURN_GENERATOR] = HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, [_BUILD_SLICE] = HAS_ARG_FLAG | HAS_ERROR_FLAG, [_CONVERT_VALUE] = HAS_ARG_FLAG | HAS_ERROR_FLAG, [_FORMAT_SIMPLE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, @@ -445,6 +446,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_PUSH_NULL] = "_PUSH_NULL", [_REPLACE_WITH_TRUE] = "_REPLACE_WITH_TRUE", [_RESUME_CHECK] = "_RESUME_CHECK", + [_RETURN_GENERATOR] = "_RETURN_GENERATOR", [_SAVE_RETURN_OFFSET] = "_SAVE_RETURN_OFFSET", [_SETUP_ANNOTATIONS] = "_SETUP_ANNOTATIONS", [_SET_ADD] = "_SET_ADD", @@ -894,6 +896,8 @@ int _PyUop_num_popped(int opcode, int oparg) return 1; case _SET_FUNCTION_ATTRIBUTE: return 2; + case _RETURN_GENERATOR: + return 0; case _BUILD_SLICE: return 2 + ((oparg == 3) ? 1 : 0); case _CONVERT_VALUE: diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index c004f463770..e2e772a52d7 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -1286,5 +1286,17 @@ class TestUopsOptimization(unittest.TestCase): self.assertEqual(res, 32 * 32) self.assertIsNone(ex) + def test_return_generator(self): + def gen(): + yield None + def testfunc(n): + for i in range(n): + gen() + return i + res, ex = self._run_with_optimizer(testfunc, 20) + self.assertEqual(res, 19) + self.assertIsNotNone(ex) + self.assertIn("_RETURN_GENERATOR", get_opnames(ex)) + if __name__ == "__main__": unittest.main() diff --git a/Objects/frameobject.c b/Objects/frameobject.c index d55c246d80d..07b7ef3df46 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -304,11 +304,6 @@ mark_stacks(PyCodeObject *code_obj, int len) stacks[i] = UNINITIALIZED; } stacks[0] = EMPTY_STACK; - if (code_obj->co_flags & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR)) - { - // Generators get sent None while starting: - stacks[0] = push_value(stacks[0], Object); - } int todo = 1; while (todo) { todo = 0; diff --git a/Python/bytecodes.c b/Python/bytecodes.c index c31617d35b0..48550491491 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -837,12 +837,7 @@ dummy_func( _PyFrame_StackPush(frame, retval); LOAD_SP(); LOAD_IP(frame->return_offset); -#if LLTRACE && TIER_ONE - lltrace = maybe_lltrace_resume_frame(frame, &entry_frame, GLOBALS()); - if (lltrace < 0) { - goto exit_unwind; - } -#endif + LLTRACE_RESUME_FRAME(); } macro(RETURN_VALUE) = @@ -3186,12 +3181,7 @@ dummy_func( tstate->py_recursion_remaining--; LOAD_SP(); LOAD_IP(0); -#if LLTRACE && TIER_ONE - lltrace = maybe_lltrace_resume_frame(frame, &entry_frame, GLOBALS()); - if (lltrace < 0) { - goto exit_unwind; - } -#endif + LLTRACE_RESUME_FRAME(); } macro(CALL_BOUND_METHOD_EXACT_ARGS) = @@ -3877,7 +3867,7 @@ dummy_func( } } - tier1 inst(RETURN_GENERATOR, (--)) { + inst(RETURN_GENERATOR, (-- res)) { assert(PyFunction_Check(frame->f_funcobj)); PyFunctionObject *func = (PyFunctionObject *)frame->f_funcobj; PyGenObject *gen = (PyGenObject *)_Py_MakeCoro(func); @@ -3887,19 +3877,19 @@ dummy_func( assert(EMPTY()); _PyFrame_SetStackPointer(frame, stack_pointer); _PyInterpreterFrame *gen_frame = (_PyInterpreterFrame *)gen->gi_iframe; - frame->instr_ptr = next_instr; + frame->instr_ptr++; _PyFrame_Copy(frame, gen_frame); assert(frame->frame_obj == NULL); gen->gi_frame_state = FRAME_CREATED; gen_frame->owner = FRAME_OWNED_BY_GENERATOR; _Py_LeaveRecursiveCallPy(tstate); - assert(frame != &entry_frame); + res = (PyObject *)gen; _PyInterpreterFrame *prev = frame->previous; _PyThreadState_PopFrame(tstate, frame); frame = tstate->current_frame = prev; - _PyFrame_StackPush(frame, (PyObject *)gen); LOAD_IP(frame->return_offset); - goto resume_frame; + LOAD_SP(); + LLTRACE_RESUME_FRAME(); } inst(BUILD_SLICE, (start, stop, step if (oparg == 3) -- slice)) { diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index 224cd1da7d4..871d1747e2b 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -86,6 +86,18 @@ #define PRE_DISPATCH_GOTO() ((void)0) #endif +#if LLTRACE +#define LLTRACE_RESUME_FRAME() \ +do { \ + lltrace = maybe_lltrace_resume_frame(frame, &entry_frame, GLOBALS()); \ + if (lltrace < 0) { \ + goto exit_unwind; \ + } \ +} while (0) +#else +#define LLTRACE_RESUME_FRAME() ((void)0) +#endif + #ifdef Py_GIL_DISABLED #define QSBR_QUIESCENT_STATE(tstate) _Py_qsbr_quiescent_state(((_PyThreadStateImpl *)tstate)->qsbr) #else diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 7403d6fdaf0..1eb3da9b700 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -988,12 +988,7 @@ _PyFrame_StackPush(frame, retval); LOAD_SP(); LOAD_IP(frame->return_offset); - #if LLTRACE && TIER_ONE - lltrace = maybe_lltrace_resume_frame(frame, &entry_frame, GLOBALS()); - if (lltrace < 0) { - goto exit_unwind; - } - #endif + LLTRACE_RESUME_FRAME(); break; } @@ -3213,12 +3208,7 @@ tstate->py_recursion_remaining--; LOAD_SP(); LOAD_IP(0); - #if LLTRACE && TIER_ONE - lltrace = maybe_lltrace_resume_frame(frame, &entry_frame, GLOBALS()); - if (lltrace < 0) { - goto exit_unwind; - } - #endif + LLTRACE_RESUME_FRAME(); break; } @@ -3833,6 +3823,35 @@ break; } + case _RETURN_GENERATOR: { + PyObject *res; + assert(PyFunction_Check(frame->f_funcobj)); + PyFunctionObject *func = (PyFunctionObject *)frame->f_funcobj; + PyGenObject *gen = (PyGenObject *)_Py_MakeCoro(func); + if (gen == NULL) { + JUMP_TO_ERROR(); + } + assert(EMPTY()); + _PyFrame_SetStackPointer(frame, stack_pointer); + _PyInterpreterFrame *gen_frame = (_PyInterpreterFrame *)gen->gi_iframe; + frame->instr_ptr++; + _PyFrame_Copy(frame, gen_frame); + assert(frame->frame_obj == NULL); + gen->gi_frame_state = FRAME_CREATED; + gen_frame->owner = FRAME_OWNED_BY_GENERATOR; + _Py_LeaveRecursiveCallPy(tstate); + res = (PyObject *)gen; + _PyInterpreterFrame *prev = frame->previous; + _PyThreadState_PopFrame(tstate, frame); + frame = tstate->current_frame = prev; + LOAD_IP(frame->return_offset); + LOAD_SP(); + LLTRACE_RESUME_FRAME(); + stack_pointer[0] = res; + stack_pointer += 1; + break; + } + case _BUILD_SLICE: { PyObject *step = NULL; PyObject *stop; diff --git a/Python/frame.c b/Python/frame.c index f88a8f0d73d..db9d13359a2 100644 --- a/Python/frame.c +++ b/Python/frame.c @@ -53,18 +53,6 @@ _PyFrame_MakeAndSetFrameObject(_PyInterpreterFrame *frame) return f; } -void -_PyFrame_Copy(_PyInterpreterFrame *src, _PyInterpreterFrame *dest) -{ - assert(src->stacktop >= _PyFrame_GetCode(src)->co_nlocalsplus); - Py_ssize_t size = ((char*)&src->localsplus[src->stacktop]) - (char *)src; - memcpy(dest, src, size); - // Don't leave a dangling pointer to the old frame when creating generators - // and coroutines: - dest->previous = NULL; -} - - static void take_ownership(PyFrameObject *f, _PyInterpreterFrame *frame) { diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 058cac8bedd..0c58f3f87d4 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -997,12 +997,7 @@ tstate->py_recursion_remaining--; LOAD_SP(); LOAD_IP(0); - #if LLTRACE && TIER_ONE - lltrace = maybe_lltrace_resume_frame(frame, &entry_frame, GLOBALS()); - if (lltrace < 0) { - goto exit_unwind; - } - #endif + LLTRACE_RESUME_FRAME(); } DISPATCH(); } @@ -1786,12 +1781,7 @@ tstate->py_recursion_remaining--; LOAD_SP(); LOAD_IP(0); - #if LLTRACE && TIER_ONE - lltrace = maybe_lltrace_resume_frame(frame, &entry_frame, GLOBALS()); - if (lltrace < 0) { - goto exit_unwind; - } - #endif + LLTRACE_RESUME_FRAME(); } DISPATCH(); } @@ -4992,12 +4982,7 @@ _PyFrame_StackPush(frame, retval); LOAD_SP(); LOAD_IP(frame->return_offset); - #if LLTRACE && TIER_ONE - lltrace = maybe_lltrace_resume_frame(frame, &entry_frame, GLOBALS()); - if (lltrace < 0) { - goto exit_unwind; - } - #endif + LLTRACE_RESUME_FRAME(); } DISPATCH(); } @@ -5006,6 +4991,7 @@ frame->instr_ptr = next_instr; next_instr += 1; INSTRUCTION_STATS(RETURN_GENERATOR); + PyObject *res; assert(PyFunction_Check(frame->f_funcobj)); PyFunctionObject *func = (PyFunctionObject *)frame->f_funcobj; PyGenObject *gen = (PyGenObject *)_Py_MakeCoro(func); @@ -5015,19 +5001,22 @@ assert(EMPTY()); _PyFrame_SetStackPointer(frame, stack_pointer); _PyInterpreterFrame *gen_frame = (_PyInterpreterFrame *)gen->gi_iframe; - frame->instr_ptr = next_instr; + frame->instr_ptr++; _PyFrame_Copy(frame, gen_frame); assert(frame->frame_obj == NULL); gen->gi_frame_state = FRAME_CREATED; gen_frame->owner = FRAME_OWNED_BY_GENERATOR; _Py_LeaveRecursiveCallPy(tstate); - assert(frame != &entry_frame); + res = (PyObject *)gen; _PyInterpreterFrame *prev = frame->previous; _PyThreadState_PopFrame(tstate, frame); frame = tstate->current_frame = prev; - _PyFrame_StackPush(frame, (PyObject *)gen); LOAD_IP(frame->return_offset); - goto resume_frame; + LOAD_SP(); + LLTRACE_RESUME_FRAME(); + stack_pointer[0] = res; + stack_pointer += 1; + DISPATCH(); } TARGET(RETURN_VALUE) { @@ -5050,12 +5039,7 @@ _PyFrame_StackPush(frame, retval); LOAD_SP(); LOAD_IP(frame->return_offset); - #if LLTRACE && TIER_ONE - lltrace = maybe_lltrace_resume_frame(frame, &entry_frame, GLOBALS()); - if (lltrace < 0) { - goto exit_unwind; - } - #endif + LLTRACE_RESUME_FRAME(); DISPATCH(); } diff --git a/Python/optimizer.c b/Python/optimizer.c index b17c2998e25..e5c70f72f9c 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -697,7 +697,8 @@ top: // Jump here after _PUSH_FRAME or likely branches // Reserve space for nuops (+ _SET_IP + _EXIT_TRACE) int nuops = expansion->nuops; RESERVE(nuops + 1); /* One extra for exit */ - if (expansion->uops[nuops-1].uop == _POP_FRAME) { + int16_t last_op = expansion->uops[nuops-1].uop; + if (last_op == _POP_FRAME || last_op == _RETURN_GENERATOR) { // Check for trace stack underflow now: // We can't bail e.g. in the middle of // LOAD_CONST + _POP_FRAME. @@ -756,7 +757,7 @@ top: // Jump here after _PUSH_FRAME or likely branches Py_FatalError("garbled expansion"); } - if (uop == _POP_FRAME) { + if (uop == _POP_FRAME || uop == _RETURN_GENERATOR) { TRACE_STACK_POP(); /* Set the operand to the function or code object returned to, * to assist optimization passes. (See _PUSH_FRAME below.) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index a76edd62c94..9315d7228b5 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -369,7 +369,7 @@ eliminate_pop_guard(_PyUOpInstruction *this_instr, bool exit) static PyCodeObject * get_code(_PyUOpInstruction *op) { - assert(op->opcode == _PUSH_FRAME || op->opcode == _POP_FRAME); + assert(op->opcode == _PUSH_FRAME || op->opcode == _POP_FRAME || op->opcode == _RETURN_GENERATOR); PyCodeObject *co = NULL; uint64_t operand = op->operand; if (operand == 0) { diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 481fb8387af..8bc56342774 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -651,6 +651,28 @@ dummy_func(void) { } } + op(_RETURN_GENERATOR, ( -- res)) { + SYNC_SP(); + ctx->frame->stack_pointer = stack_pointer; + frame_pop(ctx); + stack_pointer = ctx->frame->stack_pointer; + OUT_OF_SPACE_IF_NULL(res = sym_new_unknown(ctx)); + + /* Stack space handling */ + assert(corresponding_check_stack == NULL); + assert(co != NULL); + int framesize = co->co_framesize; + assert(framesize > 0); + assert(framesize <= curr_space); + curr_space -= framesize; + + co = get_code(this_instr); + if (co == NULL) { + // might be impossible, but bailing is still safe + goto done; + } + } + op(_CHECK_STACK_SPACE, ( --)) { assert(corresponding_check_stack == NULL); corresponding_check_stack = this_instr; diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 0a7d96d30ad..4f0941a3cc3 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -1840,6 +1840,29 @@ break; } + case _RETURN_GENERATOR: { + _Py_UopsSymbol *res; + ctx->frame->stack_pointer = stack_pointer; + frame_pop(ctx); + stack_pointer = ctx->frame->stack_pointer; + OUT_OF_SPACE_IF_NULL(res = sym_new_unknown(ctx)); + /* Stack space handling */ + assert(corresponding_check_stack == NULL); + assert(co != NULL); + int framesize = co->co_framesize; + assert(framesize > 0); + assert(framesize <= curr_space); + curr_space -= framesize; + co = get_code(this_instr); + if (co == NULL) { + // might be impossible, but bailing is still safe + goto done; + } + stack_pointer[0] = res; + stack_pointer += 1; + break; + } + case _BUILD_SLICE: { _Py_UopsSymbol *slice; slice = sym_new_not_null(ctx);