From be1dfccdf2c5c7671b8a549e969b8cf7d60d9936 Mon Sep 17 00:00:00 2001 From: Brett Simmers Date: Thu, 23 May 2024 16:59:35 -0400 Subject: [PATCH] gh-118727: Don't drop the GIL in `drop_gil()` unless the current thread holds it (#118745) `drop_gil()` assumes that its caller is attached, which means that the current thread holds the GIL if and only if the GIL is enabled, and the enabled-state of the GIL won't change. This isn't true, though, because `detach_thread()` calls `_PyEval_ReleaseLock()` after detaching and `_PyThreadState_DeleteCurrent()` calls it after removing the current thread from consideration for stop-the-world requests (effectively detaching it). Fix this by remembering whether or not a thread acquired the GIL when it last attached, in `PyThreadState._status.holds_gil`, and check this in `drop_gil()` instead of `gil->enabled`. This fixes a crash in `test_multiprocessing_pool_circular_import()`, so I've reenabled it. --- Include/cpython/pystate.h | 4 +- Include/internal/pycore_ceval.h | 7 +- .../test_importlib/test_threaded_import.py | 5 +- Python/ceval_gil.c | 106 ++++++++++-------- Python/pystate.c | 11 +- 5 files changed, 73 insertions(+), 60 deletions(-) diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h index 2df9ecd6d52..ed3ee090ae5 100644 --- a/Include/cpython/pystate.h +++ b/Include/cpython/pystate.h @@ -83,6 +83,8 @@ struct _ts { unsigned int bound_gilstate:1; /* Currently in use (maybe holds the GIL). */ unsigned int active:1; + /* Currently holds the GIL. */ + unsigned int holds_gil:1; /* various stages of finalization */ unsigned int finalizing:1; @@ -90,7 +92,7 @@ struct _ts { unsigned int finalized:1; /* padding to align to 4 bytes */ - unsigned int :24; + unsigned int :23; } _status; #ifdef Py_BUILD_CORE # define _PyThreadState_WHENCE_NOTSET -1 diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h index 48ad0678995..bd3ba1225f2 100644 --- a/Include/internal/pycore_ceval.h +++ b/Include/internal/pycore_ceval.h @@ -131,11 +131,10 @@ extern int _PyEval_ThreadsInitialized(void); extern void _PyEval_InitGIL(PyThreadState *tstate, int own_gil); extern void _PyEval_FiniGIL(PyInterpreterState *interp); -// Acquire the GIL and return 1. In free-threaded builds, this function may -// return 0 to indicate that the GIL was disabled and therefore not acquired. -extern int _PyEval_AcquireLock(PyThreadState *tstate); +extern void _PyEval_AcquireLock(PyThreadState *tstate); -extern void _PyEval_ReleaseLock(PyInterpreterState *, PyThreadState *); +extern void _PyEval_ReleaseLock(PyInterpreterState *, PyThreadState *, + int final_release); #ifdef Py_GIL_DISABLED // Returns 0 or 1 if the GIL for the given thread's interpreter is disabled or diff --git a/Lib/test/test_importlib/test_threaded_import.py b/Lib/test/test_importlib/test_threaded_import.py index 3477112927a..9af1e4d505c 100644 --- a/Lib/test/test_importlib/test_threaded_import.py +++ b/Lib/test/test_importlib/test_threaded_import.py @@ -17,7 +17,7 @@ from test import support from test.support import verbose from test.support.import_helper import forget, mock_register_at_fork from test.support.os_helper import (TESTFN, unlink, rmtree) -from test.support import script_helper, threading_helper, requires_gil_enabled +from test.support import script_helper, threading_helper threading_helper.requires_working_threading(module=True) @@ -248,9 +248,6 @@ class ThreadedImportTests(unittest.TestCase): 'partial', 'cfimport.py') script_helper.assert_python_ok(fn) - # gh-118727 and gh-118729: pool_in_threads.py may crash in free-threaded - # builds, which can hang the Tsan test so temporarily skip it for now. - @requires_gil_enabled("gh-118727: test may crash in free-threaded builds") def test_multiprocessing_pool_circular_import(self): # Regression test for bpo-41567 fn = os.path.join(os.path.dirname(__file__), diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index 7a54c185303..5617504a495 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -205,32 +205,39 @@ static void recreate_gil(struct _gil_runtime_state *gil) } #endif -static void -drop_gil_impl(struct _gil_runtime_state *gil) +static inline void +drop_gil_impl(PyThreadState *tstate, struct _gil_runtime_state *gil) { MUTEX_LOCK(gil->mutex); _Py_ANNOTATE_RWLOCK_RELEASED(&gil->locked, /*is_write=*/1); _Py_atomic_store_int_relaxed(&gil->locked, 0); + if (tstate != NULL) { + tstate->_status.holds_gil = 0; + } COND_SIGNAL(gil->cond); MUTEX_UNLOCK(gil->mutex); } static void -drop_gil(PyInterpreterState *interp, PyThreadState *tstate) +drop_gil(PyInterpreterState *interp, PyThreadState *tstate, int final_release) { struct _ceval_state *ceval = &interp->ceval; - /* If tstate is NULL, the caller is indicating that we're releasing + /* If final_release is true, the caller is indicating that we're releasing the GIL for the last time in this thread. This is particularly relevant when the current thread state is finalizing or its interpreter is finalizing (either may be in an inconsistent state). In that case the current thread will definitely never try to acquire the GIL again. */ // XXX It may be more correct to check tstate->_status.finalizing. - // XXX assert(tstate == NULL || !tstate->_status.cleared); + // XXX assert(final_release || !tstate->_status.cleared); + assert(final_release || tstate != NULL); struct _gil_runtime_state *gil = ceval->gil; #ifdef Py_GIL_DISABLED - if (!_Py_atomic_load_int_relaxed(&gil->enabled)) { + // Check if we have the GIL before dropping it. tstate will be NULL if + // take_gil() detected that this thread has been destroyed, in which case + // we know we have the GIL. + if (tstate != NULL && !tstate->_status.holds_gil) { return; } #endif @@ -238,26 +245,23 @@ drop_gil(PyInterpreterState *interp, PyThreadState *tstate) Py_FatalError("drop_gil: GIL is not locked"); } - /* tstate is allowed to be NULL (early interpreter init) */ - if (tstate != NULL) { + if (!final_release) { /* Sub-interpreter support: threads might have been switched under our feet using PyThreadState_Swap(). Fix the GIL last holder variable so that our heuristics work. */ _Py_atomic_store_ptr_relaxed(&gil->last_holder, tstate); } - drop_gil_impl(gil); + drop_gil_impl(tstate, gil); #ifdef FORCE_SWITCHING - /* We check tstate first in case we might be releasing the GIL for - the last time in this thread. In that case there's a possible - race with tstate->interp getting deleted after gil->mutex is - unlocked and before the following code runs, leading to a crash. - We can use (tstate == NULL) to indicate the thread is done with - the GIL, and that's the only time we might delete the - interpreter, so checking tstate first prevents the crash. - See https://github.com/python/cpython/issues/104341. */ - if (tstate != NULL && + /* We might be releasing the GIL for the last time in this thread. In that + case there's a possible race with tstate->interp getting deleted after + gil->mutex is unlocked and before the following code runs, leading to a + crash. We can use final_release to indicate the thread is done with the + GIL, and that's the only time we might delete the interpreter. See + https://github.com/python/cpython/issues/104341. */ + if (!final_release && _Py_eval_breaker_bit_is_set(tstate, _PY_GIL_DROP_REQUEST_BIT)) { MUTEX_LOCK(gil->switch_mutex); /* Not switched yet => wait */ @@ -284,7 +288,7 @@ drop_gil(PyInterpreterState *interp, PyThreadState *tstate) tstate must be non-NULL. Returns 1 if the GIL was acquired, or 0 if not. */ -static int +static void take_gil(PyThreadState *tstate) { int err = errno; @@ -309,7 +313,7 @@ take_gil(PyThreadState *tstate) struct _gil_runtime_state *gil = interp->ceval.gil; #ifdef Py_GIL_DISABLED if (!_Py_atomic_load_int_relaxed(&gil->enabled)) { - return 0; + return; } #endif @@ -358,10 +362,10 @@ take_gil(PyThreadState *tstate) if (!_Py_atomic_load_int_relaxed(&gil->enabled)) { // Another thread disabled the GIL between our check above and // now. Don't take the GIL, signal any other waiting threads, and - // return 0. + // return. COND_SIGNAL(gil->cond); MUTEX_UNLOCK(gil->mutex); - return 0; + return; } #endif @@ -393,20 +397,21 @@ take_gil(PyThreadState *tstate) in take_gil() while the main thread called wait_for_thread_shutdown() from Py_Finalize(). */ MUTEX_UNLOCK(gil->mutex); - /* Passing NULL to drop_gil() indicates that this thread is about to - terminate and will never hold the GIL again. */ - drop_gil(interp, NULL); + /* tstate could be a dangling pointer, so don't pass it to + drop_gil(). */ + drop_gil(interp, NULL, 1); PyThread_exit_thread(); } assert(_PyThreadState_CheckConsistency(tstate)); + tstate->_status.holds_gil = 1; _Py_unset_eval_breaker_bit(tstate, _PY_GIL_DROP_REQUEST_BIT); update_eval_breaker_for_thread(interp, tstate); MUTEX_UNLOCK(gil->mutex); errno = err; - return 1; + return; } void _PyEval_SetSwitchInterval(unsigned long microseconds) @@ -451,10 +456,17 @@ PyEval_ThreadsInitialized(void) static inline int current_thread_holds_gil(struct _gil_runtime_state *gil, PyThreadState *tstate) { - if (((PyThreadState*)_Py_atomic_load_ptr_relaxed(&gil->last_holder)) != tstate) { - return 0; - } - return _Py_atomic_load_int_relaxed(&gil->locked); + int holds_gil = tstate->_status.holds_gil; + + // holds_gil is the source of truth; check that last_holder and gil->locked + // are consistent with it. + int locked = _Py_atomic_load_int_relaxed(&gil->locked); + int is_last_holder = + ((PyThreadState*)_Py_atomic_load_ptr_relaxed(&gil->last_holder)) == tstate; + assert(!holds_gil || locked); + assert(!holds_gil || is_last_holder); + + return holds_gil; } #endif @@ -563,23 +575,24 @@ PyEval_ReleaseLock(void) /* This function must succeed when the current thread state is NULL. We therefore avoid PyThreadState_Get() which dumps a fatal error in debug mode. */ - drop_gil(tstate->interp, tstate); -} - -int -_PyEval_AcquireLock(PyThreadState *tstate) -{ - _Py_EnsureTstateNotNULL(tstate); - return take_gil(tstate); + drop_gil(tstate->interp, tstate, 0); } void -_PyEval_ReleaseLock(PyInterpreterState *interp, PyThreadState *tstate) +_PyEval_AcquireLock(PyThreadState *tstate) { - /* If tstate is NULL then we do not expect the current thread - to acquire the GIL ever again. */ - assert(tstate == NULL || tstate->interp == interp); - drop_gil(interp, tstate); + _Py_EnsureTstateNotNULL(tstate); + take_gil(tstate); +} + +void +_PyEval_ReleaseLock(PyInterpreterState *interp, + PyThreadState *tstate, + int final_release) +{ + assert(tstate != NULL); + assert(tstate->interp == interp); + drop_gil(interp, tstate, final_release); } void @@ -1136,7 +1149,12 @@ _PyEval_DisableGIL(PyThreadState *tstate) // // Drop the GIL, which will wake up any threads waiting in take_gil() // and let them resume execution without the GIL. - drop_gil_impl(gil); + drop_gil_impl(tstate, gil); + + // If another thread asked us to drop the GIL, they should be + // free-threading by now. Remove any such request so we have a clean + // slate if/when the GIL is enabled again. + _Py_unset_eval_breaker_bit(tstate, _PY_GIL_DROP_REQUEST_BIT); return 1; } return 0; diff --git a/Python/pystate.c b/Python/pystate.c index 0832b37c278..1ea1ad982a0 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -1843,7 +1843,7 @@ _PyThreadState_DeleteCurrent(PyThreadState *tstate) #endif current_fast_clear(tstate->interp->runtime); tstate_delete_common(tstate); - _PyEval_ReleaseLock(tstate->interp, NULL); + _PyEval_ReleaseLock(tstate->interp, tstate, 1); free_threadstate((_PyThreadStateImpl *)tstate); } @@ -2068,7 +2068,7 @@ _PyThreadState_Attach(PyThreadState *tstate) while (1) { - int acquired_gil = _PyEval_AcquireLock(tstate); + _PyEval_AcquireLock(tstate); // XXX assert(tstate_is_alive(tstate)); current_fast_set(&_PyRuntime, tstate); @@ -2079,20 +2079,17 @@ _PyThreadState_Attach(PyThreadState *tstate) } #ifdef Py_GIL_DISABLED - if (_PyEval_IsGILEnabled(tstate) != acquired_gil) { + if (_PyEval_IsGILEnabled(tstate) && !tstate->_status.holds_gil) { // The GIL was enabled between our call to _PyEval_AcquireLock() // and when we attached (the GIL can't go from enabled to disabled // here because only a thread holding the GIL can disable // it). Detach and try again. - assert(!acquired_gil); tstate_set_detached(tstate, _Py_THREAD_DETACHED); tstate_deactivate(tstate); current_fast_clear(&_PyRuntime); continue; } _Py_qsbr_attach(((_PyThreadStateImpl *)tstate)->qsbr); -#else - (void)acquired_gil; #endif break; } @@ -2123,7 +2120,7 @@ detach_thread(PyThreadState *tstate, int detached_state) tstate_deactivate(tstate); tstate_set_detached(tstate, detached_state); current_fast_clear(&_PyRuntime); - _PyEval_ReleaseLock(tstate->interp, tstate); + _PyEval_ReleaseLock(tstate->interp, tstate, 0); } void