From f6fab21721c8aedc5dca97dbeb6292a067c19bf1 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 1 May 2024 11:34:50 +0100 Subject: [PATCH] GH-118095: Make invalidating and clearing executors memory safe (GH-118459) --- Include/cpython/optimizer.h | 3 +- Objects/codeobject.c | 3 +- Python/bytecodes.c | 12 +++- Python/generated_cases.c.h | 12 +++- Python/optimizer.c | 115 +++++++++++++++++++++++++----------- 5 files changed, 103 insertions(+), 42 deletions(-) diff --git a/Include/cpython/optimizer.h b/Include/cpython/optimizer.h index 819251a25bb..a169280b26a 100644 --- a/Include/cpython/optimizer.h +++ b/Include/cpython/optimizer.h @@ -24,6 +24,7 @@ typedef struct { uint8_t opcode; uint8_t oparg; uint8_t valid; + uint8_t linked; int index; // Index of ENTER_EXECUTOR (if code isn't NULL, below). _PyBloomFilter bloom; _PyExecutorLinkListNode links; @@ -135,7 +136,7 @@ PyAPI_FUNC(_PyOptimizerObject *) PyUnstable_GetOptimizer(void); PyAPI_FUNC(_PyExecutorObject *) PyUnstable_GetExecutor(PyCodeObject *code, int offset); void _Py_ExecutorInit(_PyExecutorObject *, const _PyBloomFilter *); -void _Py_ExecutorClear(_PyExecutorObject *); +void _Py_ExecutorDetach(_PyExecutorObject *); void _Py_BloomFilter_Init(_PyBloomFilter *); void _Py_BloomFilter_Add(_PyBloomFilter *bloom, void *obj); PyAPI_FUNC(void) _Py_Executor_DependsOn(_PyExecutorObject *executor, void *obj); diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 605167c5c0b..810f847485a 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -1504,7 +1504,8 @@ clear_executors(PyCodeObject *co) assert(co->co_executors); for (int i = 0; i < co->co_executors->size; i++) { if (co->co_executors->executors[i]) { - _Py_ExecutorClear(co->co_executors->executors[i]); + _Py_ExecutorDetach(co->co_executors->executors[i]); + assert(co->co_executors->executors[i] == NULL); } } PyMem_Free(co->co_executors); diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 28766d6d97c..002b5a3529c 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -163,9 +163,15 @@ dummy_func( if ((oparg & RESUME_OPARG_LOCATION_MASK) < RESUME_AFTER_YIELD_FROM) { CHECK_EVAL_BREAKER(); } - #if ENABLE_SPECIALIZATION - FT_ATOMIC_STORE_UINT8_RELAXED(this_instr->op.code, RESUME_CHECK); - #endif /* ENABLE_SPECIALIZATION */ + assert(this_instr->op.code == RESUME || + this_instr->op.code == RESUME_CHECK || + this_instr->op.code == INSTRUMENTED_RESUME || + this_instr->op.code == ENTER_EXECUTOR); + if (this_instr->op.code == RESUME) { + #if ENABLE_SPECIALIZATION + FT_ATOMIC_STORE_UINT8_RELAXED(this_instr->op.code, RESUME_CHECK); + #endif /* ENABLE_SPECIALIZATION */ + } } } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 602cce4beb1..32b485ecb9d 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -4961,9 +4961,15 @@ if ((oparg & RESUME_OPARG_LOCATION_MASK) < RESUME_AFTER_YIELD_FROM) { CHECK_EVAL_BREAKER(); } - #if ENABLE_SPECIALIZATION - FT_ATOMIC_STORE_UINT8_RELAXED(this_instr->op.code, RESUME_CHECK); - #endif /* ENABLE_SPECIALIZATION */ + assert(this_instr->op.code == RESUME || + this_instr->op.code == RESUME_CHECK || + this_instr->op.code == INSTRUMENTED_RESUME || + this_instr->op.code == ENTER_EXECUTOR); + if (this_instr->op.code == RESUME) { + #if ENABLE_SPECIALIZATION + FT_ATOMIC_STORE_UINT8_RELAXED(this_instr->op.code, RESUME_CHECK); + #endif /* ENABLE_SPECIALIZATION */ + } } DISPATCH(); } diff --git a/Python/optimizer.c b/Python/optimizer.c index a5e7430c464..6576aa1cddc 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -75,7 +75,7 @@ insert_executor(PyCodeObject *code, _Py_CODEUNIT *instr, int index, _PyExecutorO Py_INCREF(executor); if (instr->op.code == ENTER_EXECUTOR) { assert(index == instr->op.arg); - _Py_ExecutorClear(code->co_executors->executors[index]); + _Py_ExecutorDetach(code->co_executors->executors[index]); } else { assert(code->co_executors->size == index); @@ -270,10 +270,14 @@ static PyMethodDef executor_methods[] = { ///////////////////// Experimental UOp Optimizer ///////////////////// +static int executor_clear(_PyExecutorObject *executor); +static void unlink_executor(_PyExecutorObject *executor); + static void uop_dealloc(_PyExecutorObject *self) { _PyObject_GC_UNTRACK(self); - _Py_ExecutorClear(self); + assert(self->vm_data.code == NULL); + unlink_executor(self); #ifdef _Py_JIT _PyJIT_Free(self); #endif @@ -379,13 +383,6 @@ PySequenceMethods uop_as_sequence = { .sq_item = (ssizeargfunc)uop_item, }; -static int -executor_clear(PyObject *o) -{ - _Py_ExecutorClear((_PyExecutorObject *)o); - return 0; -} - static int executor_traverse(PyObject *o, visitproc visit, void *arg) { @@ -412,7 +409,7 @@ PyTypeObject _PyUOpExecutor_Type = { .tp_as_sequence = &uop_as_sequence, .tp_methods = executor_methods, .tp_traverse = executor_traverse, - .tp_clear = executor_clear, + .tp_clear = (inquiry)executor_clear, .tp_is_gc = executor_is_gc, }; @@ -1190,6 +1187,7 @@ init_cold_exit_executor(_PyExecutorObject *executor, int oparg) inst->opcode = _COLD_EXIT; inst->oparg = oparg; executor->vm_data.valid = true; + executor->vm_data.linked = false; for (int i = 0; i < BLOOM_FILTER_WORDS; i++) { assert(executor->vm_data.bloom.bits[i] == 0); } @@ -1328,7 +1326,7 @@ PyTypeObject _PyCounterExecutor_Type = { .tp_dealloc = (destructor)counter_dealloc, .tp_methods = executor_methods, .tp_traverse = executor_traverse, - .tp_clear = executor_clear, + .tp_clear = (inquiry)executor_clear, }; static int @@ -1503,15 +1501,13 @@ link_executor(_PyExecutorObject *executor) links->next = NULL; } else { - _PyExecutorObject *next = head->vm_data.links.next; - links->previous = head; - links->next = next; - if (next != NULL) { - next->vm_data.links.previous = executor; - } - head->vm_data.links.next = executor; + assert(head->vm_data.links.previous == NULL); + links->previous = NULL; + links->next = head; + head->vm_data.links.previous = executor; + interp->executor_list_head = executor; } - executor->vm_data.valid = true; + executor->vm_data.linked = true; /* executor_list_head must be first in list */ assert(interp->executor_list_head->vm_data.links.previous == NULL); } @@ -1519,7 +1515,11 @@ link_executor(_PyExecutorObject *executor) static void unlink_executor(_PyExecutorObject *executor) { + if (!executor->vm_data.linked) { + return; + } _PyExecutorLinkListNode *links = &executor->vm_data.links; + assert(executor->vm_data.valid); _PyExecutorObject *next = links->next; _PyExecutorObject *prev = links->previous; if (next != NULL) { @@ -1534,7 +1534,7 @@ unlink_executor(_PyExecutorObject *executor) assert(interp->executor_list_head == executor); interp->executor_list_head = next; } - executor->vm_data.valid = false; + executor->vm_data.linked = false; } /* This must be called by optimizers before using the executor */ @@ -1548,23 +1548,15 @@ _Py_ExecutorInit(_PyExecutorObject *executor, const _PyBloomFilter *dependency_s link_executor(executor); } -/* This must be called by executors during dealloc */ +/* Detaches the executor from the code object (if any) that + * holds a reference to it */ void -_Py_ExecutorClear(_PyExecutorObject *executor) +_Py_ExecutorDetach(_PyExecutorObject *executor) { - if (!executor->vm_data.valid) { - return; - } - unlink_executor(executor); PyCodeObject *code = executor->vm_data.code; if (code == NULL) { return; } - for (uint32_t i = 0; i < executor->exit_count; i++) { - Py_DECREF(executor->exits[i].executor); - executor->exits[i].executor = &COLD_EXITS[i]; - executor->exits[i].temperature = initial_unreachable_backoff_counter(); - } _Py_CODEUNIT *instruction = &_PyCode_CODE(code)[executor->vm_data.index]; assert(instruction->op.code == ENTER_EXECUTOR); int index = instruction->op.arg; @@ -1572,7 +1564,36 @@ _Py_ExecutorClear(_PyExecutorObject *executor) instruction->op.code = executor->vm_data.opcode; instruction->op.arg = executor->vm_data.oparg; executor->vm_data.code = NULL; - Py_CLEAR(code->co_executors->executors[index]); + code->co_executors->executors[index] = NULL; + Py_DECREF(executor); +} + +static int +executor_clear(_PyExecutorObject *executor) +{ + if (!executor->vm_data.valid) { + return 0; + } + assert(executor->vm_data.valid == 1); + unlink_executor(executor); + executor->vm_data.valid = 0; + /* It is possible for an executor to form a reference + * cycle with itself, so decref'ing a side exit could + * free the executor unless we hold a strong reference to it + */ + Py_INCREF(executor); + for (uint32_t i = 0; i < executor->exit_count; i++) { + const _PyExecutorObject *cold = &COLD_EXITS[i]; + const _PyExecutorObject *side = executor->exits[i].executor; + executor->exits[i].temperature = initial_unreachable_backoff_counter(); + if (side != cold) { + executor->exits[i].executor = cold; + Py_DECREF(side); + } + } + _Py_ExecutorDetach(executor); + Py_DECREF(executor); + return 0; } void @@ -1593,17 +1614,42 @@ _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj, int is _Py_BloomFilter_Add(&obj_filter, obj); /* Walk the list of executors */ /* TO DO -- Use a tree to avoid traversing as many objects */ + bool no_memory = false; + PyObject *invalidate = PyList_New(0); + if (invalidate == NULL) { + PyErr_Clear(); + no_memory = true; + } + /* Clearing an executor can deallocate others, so we need to make a list of + * executors to invalidate first */ for (_PyExecutorObject *exec = interp->executor_list_head; exec != NULL;) { assert(exec->vm_data.valid); _PyExecutorObject *next = exec->vm_data.links.next; if (bloom_filter_may_contain(&exec->vm_data.bloom, &obj_filter)) { - _Py_ExecutorClear(exec); + unlink_executor(exec); + if (no_memory) { + exec->vm_data.valid = 0; + } else { + if (PyList_Append(invalidate, (PyObject *)exec) < 0) { + PyErr_Clear(); + no_memory = true; + exec->vm_data.valid = 0; + } + } if (is_invalidation) { OPT_STAT_INC(executors_invalidated); } } exec = next; } + if (invalidate != NULL) { + for (Py_ssize_t i = 0; i < PyList_GET_SIZE(invalidate); i++) { + _PyExecutorObject *exec = (_PyExecutorObject *)PyList_GET_ITEM(invalidate, i); + executor_clear(exec); + } + Py_DECREF(invalidate); + } + return; } /* Invalidate all executors */ @@ -1612,12 +1658,13 @@ _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation) { while (interp->executor_list_head) { _PyExecutorObject *executor = interp->executor_list_head; + assert(executor->vm_data.valid == 1 && executor->vm_data.linked == 1); if (executor->vm_data.code) { // Clear the entire code object so its co_executors array be freed: _PyCode_Clear_Executors(executor->vm_data.code); } else { - _Py_ExecutorClear(executor); + executor_clear(executor); } if (is_invalidation) { OPT_STAT_INC(executors_invalidated);