mirror of https://github.com/python/cpython
gh-118332: Fix deadlock involving stop the world (#118412)
Avoid detaching thread state when stopping the world. When re-attaching the thread state, the thread would attempt to resume the top-most critical section, which might now be held by a thread paused for our stop-the-world request.
This commit is contained in:
parent
4a1cf66c5c
commit
b2c3b70c71
|
@ -150,8 +150,10 @@ PyAPI_FUNC(void) PyEvent_Wait(PyEvent *evt);
|
|||
|
||||
// Wait for the event to be set, or until the timeout expires. If the event is
|
||||
// already set, then this returns immediately. Returns 1 if the event was set,
|
||||
// and 0 if the timeout expired or thread was interrupted.
|
||||
PyAPI_FUNC(int) PyEvent_WaitTimed(PyEvent *evt, PyTime_t timeout_ns);
|
||||
// and 0 if the timeout expired or thread was interrupted. If `detach` is
|
||||
// true, then the thread will detach/release the GIL while waiting.
|
||||
PyAPI_FUNC(int)
|
||||
PyEvent_WaitTimed(PyEvent *evt, PyTime_t timeout_ns, int detach);
|
||||
|
||||
// _PyRawMutex implements a word-sized mutex that that does not depend on the
|
||||
// parking lot API, and therefore can be used in the parking lot
|
||||
|
|
|
@ -204,6 +204,90 @@ test_critical_sections_threads(PyObject *self, PyObject *Py_UNUSED(args))
|
|||
Py_DECREF(test_data.obj1);
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
static void
|
||||
pysleep(int ms)
|
||||
{
|
||||
#ifdef MS_WINDOWS
|
||||
Sleep(ms);
|
||||
#else
|
||||
usleep(ms * 1000);
|
||||
#endif
|
||||
}
|
||||
|
||||
struct test_data_gc {
|
||||
PyObject *obj;
|
||||
Py_ssize_t num_threads;
|
||||
Py_ssize_t id;
|
||||
Py_ssize_t countdown;
|
||||
PyEvent done_event;
|
||||
PyEvent ready;
|
||||
};
|
||||
|
||||
static void
|
||||
thread_gc(void *arg)
|
||||
{
|
||||
struct test_data_gc *test_data = arg;
|
||||
PyGILState_STATE gil = PyGILState_Ensure();
|
||||
|
||||
Py_ssize_t id = _Py_atomic_add_ssize(&test_data->id, 1);
|
||||
if (id == test_data->num_threads - 1) {
|
||||
_PyEvent_Notify(&test_data->ready);
|
||||
}
|
||||
else {
|
||||
// wait for all test threads to more reliably reproduce the issue.
|
||||
PyEvent_Wait(&test_data->ready);
|
||||
}
|
||||
|
||||
if (id == 0) {
|
||||
Py_BEGIN_CRITICAL_SECTION(test_data->obj);
|
||||
// pause long enough that the lock would be handed off directly to
|
||||
// a waiting thread.
|
||||
pysleep(5);
|
||||
PyGC_Collect();
|
||||
Py_END_CRITICAL_SECTION();
|
||||
}
|
||||
else if (id == 1) {
|
||||
pysleep(1);
|
||||
Py_BEGIN_CRITICAL_SECTION(test_data->obj);
|
||||
pysleep(1);
|
||||
Py_END_CRITICAL_SECTION();
|
||||
}
|
||||
else if (id == 2) {
|
||||
// sleep long enough so that thread 0 is waiting to stop the world
|
||||
pysleep(6);
|
||||
Py_BEGIN_CRITICAL_SECTION(test_data->obj);
|
||||
pysleep(1);
|
||||
Py_END_CRITICAL_SECTION();
|
||||
}
|
||||
|
||||
PyGILState_Release(gil);
|
||||
if (_Py_atomic_add_ssize(&test_data->countdown, -1) == 1) {
|
||||
// last thread to finish sets done_event
|
||||
_PyEvent_Notify(&test_data->done_event);
|
||||
}
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
test_critical_sections_gc(PyObject *self, PyObject *Py_UNUSED(args))
|
||||
{
|
||||
// gh-118332: Contended critical sections should not deadlock with GC
|
||||
const Py_ssize_t NUM_THREADS = 3;
|
||||
struct test_data_gc test_data = {
|
||||
.obj = PyDict_New(),
|
||||
.countdown = NUM_THREADS,
|
||||
.num_threads = NUM_THREADS,
|
||||
};
|
||||
assert(test_data.obj != NULL);
|
||||
|
||||
for (int i = 0; i < NUM_THREADS; i++) {
|
||||
PyThread_start_new_thread(&thread_gc, &test_data);
|
||||
}
|
||||
PyEvent_Wait(&test_data.done_event);
|
||||
Py_DECREF(test_data.obj);
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static PyMethodDef test_methods[] = {
|
||||
|
@ -212,6 +296,7 @@ static PyMethodDef test_methods[] = {
|
|||
{"test_critical_sections_suspend", test_critical_sections_suspend, METH_NOARGS},
|
||||
#ifdef Py_CAN_START_THREADS
|
||||
{"test_critical_sections_threads", test_critical_sections_threads, METH_NOARGS},
|
||||
{"test_critical_sections_gc", test_critical_sections_gc, METH_NOARGS},
|
||||
#endif
|
||||
{NULL, NULL} /* sentinel */
|
||||
};
|
||||
|
|
|
@ -501,7 +501,8 @@ ThreadHandle_join(ThreadHandle *self, PyTime_t timeout_ns)
|
|||
|
||||
// Wait until the deadline for the thread to exit.
|
||||
PyTime_t deadline = timeout_ns != -1 ? _PyDeadline_Init(timeout_ns) : 0;
|
||||
while (!PyEvent_WaitTimed(&self->thread_is_exiting, timeout_ns)) {
|
||||
int detach = 1;
|
||||
while (!PyEvent_WaitTimed(&self->thread_is_exiting, timeout_ns, detach)) {
|
||||
if (deadline) {
|
||||
// _PyDeadline_Get will return a negative value if the deadline has
|
||||
// been exceeded.
|
||||
|
|
|
@ -277,12 +277,12 @@ _PyEvent_Notify(PyEvent *evt)
|
|||
void
|
||||
PyEvent_Wait(PyEvent *evt)
|
||||
{
|
||||
while (!PyEvent_WaitTimed(evt, -1))
|
||||
while (!PyEvent_WaitTimed(evt, -1, /*detach=*/1))
|
||||
;
|
||||
}
|
||||
|
||||
int
|
||||
PyEvent_WaitTimed(PyEvent *evt, PyTime_t timeout_ns)
|
||||
PyEvent_WaitTimed(PyEvent *evt, PyTime_t timeout_ns, int detach)
|
||||
{
|
||||
for (;;) {
|
||||
uint8_t v = _Py_atomic_load_uint8(&evt->v);
|
||||
|
@ -298,7 +298,7 @@ PyEvent_WaitTimed(PyEvent *evt, PyTime_t timeout_ns)
|
|||
|
||||
uint8_t expected = _Py_HAS_PARKED;
|
||||
(void) _PyParkingLot_Park(&evt->v, &expected, sizeof(evt->v),
|
||||
timeout_ns, NULL, 1);
|
||||
timeout_ns, NULL, detach);
|
||||
|
||||
return _Py_atomic_load_uint8(&evt->v) == _Py_LOCKED;
|
||||
}
|
||||
|
|
|
@ -2238,7 +2238,8 @@ stop_the_world(struct _stoptheworld_state *stw)
|
|||
}
|
||||
|
||||
PyTime_t wait_ns = 1000*1000; // 1ms (arbitrary, may need tuning)
|
||||
if (PyEvent_WaitTimed(&stw->stop_event, wait_ns)) {
|
||||
int detach = 0;
|
||||
if (PyEvent_WaitTimed(&stw->stop_event, wait_ns, detach)) {
|
||||
assert(stw->thread_countdown == 0);
|
||||
break;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue