gh-118727: Don't drop the GIL in `drop_gil()` unless the current thread holds it (#118745)

`drop_gil()` assumes that its caller is attached, which means that the current
thread holds the GIL if and only if the GIL is enabled, and the enabled-state
of the GIL won't change. This isn't true, though, because `detach_thread()`
calls `_PyEval_ReleaseLock()` after detaching and
`_PyThreadState_DeleteCurrent()` calls it after removing the current thread
from consideration for stop-the-world requests (effectively detaching it).

Fix this by remembering whether or not a thread acquired the GIL when it last
attached, in `PyThreadState._status.holds_gil`, and check this in `drop_gil()`
instead of `gil->enabled`.

This fixes a crash in `test_multiprocessing_pool_circular_import()`, so I've
reenabled it.
This commit is contained in:
Brett Simmers 2024-05-23 16:59:35 -04:00 committed by GitHub
parent b30d30c747
commit be1dfccdf2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 73 additions and 60 deletions

View File

@ -83,6 +83,8 @@ struct _ts {
unsigned int bound_gilstate:1;
/* Currently in use (maybe holds the GIL). */
unsigned int active:1;
/* Currently holds the GIL. */
unsigned int holds_gil:1;
/* various stages of finalization */
unsigned int finalizing:1;
@ -90,7 +92,7 @@ struct _ts {
unsigned int finalized:1;
/* padding to align to 4 bytes */
unsigned int :24;
unsigned int :23;
} _status;
#ifdef Py_BUILD_CORE
# define _PyThreadState_WHENCE_NOTSET -1

View File

@ -131,11 +131,10 @@ extern int _PyEval_ThreadsInitialized(void);
extern void _PyEval_InitGIL(PyThreadState *tstate, int own_gil);
extern void _PyEval_FiniGIL(PyInterpreterState *interp);
// Acquire the GIL and return 1. In free-threaded builds, this function may
// return 0 to indicate that the GIL was disabled and therefore not acquired.
extern int _PyEval_AcquireLock(PyThreadState *tstate);
extern void _PyEval_AcquireLock(PyThreadState *tstate);
extern void _PyEval_ReleaseLock(PyInterpreterState *, PyThreadState *);
extern void _PyEval_ReleaseLock(PyInterpreterState *, PyThreadState *,
int final_release);
#ifdef Py_GIL_DISABLED
// Returns 0 or 1 if the GIL for the given thread's interpreter is disabled or

View File

@ -17,7 +17,7 @@ from test import support
from test.support import verbose
from test.support.import_helper import forget, mock_register_at_fork
from test.support.os_helper import (TESTFN, unlink, rmtree)
from test.support import script_helper, threading_helper, requires_gil_enabled
from test.support import script_helper, threading_helper
threading_helper.requires_working_threading(module=True)
@ -248,9 +248,6 @@ class ThreadedImportTests(unittest.TestCase):
'partial', 'cfimport.py')
script_helper.assert_python_ok(fn)
# gh-118727 and gh-118729: pool_in_threads.py may crash in free-threaded
# builds, which can hang the Tsan test so temporarily skip it for now.
@requires_gil_enabled("gh-118727: test may crash in free-threaded builds")
def test_multiprocessing_pool_circular_import(self):
# Regression test for bpo-41567
fn = os.path.join(os.path.dirname(__file__),

View File

@ -205,32 +205,39 @@ static void recreate_gil(struct _gil_runtime_state *gil)
}
#endif
static void
drop_gil_impl(struct _gil_runtime_state *gil)
static inline void
drop_gil_impl(PyThreadState *tstate, struct _gil_runtime_state *gil)
{
MUTEX_LOCK(gil->mutex);
_Py_ANNOTATE_RWLOCK_RELEASED(&gil->locked, /*is_write=*/1);
_Py_atomic_store_int_relaxed(&gil->locked, 0);
if (tstate != NULL) {
tstate->_status.holds_gil = 0;
}
COND_SIGNAL(gil->cond);
MUTEX_UNLOCK(gil->mutex);
}
static void
drop_gil(PyInterpreterState *interp, PyThreadState *tstate)
drop_gil(PyInterpreterState *interp, PyThreadState *tstate, int final_release)
{
struct _ceval_state *ceval = &interp->ceval;
/* If tstate is NULL, the caller is indicating that we're releasing
/* If final_release is true, the caller is indicating that we're releasing
the GIL for the last time in this thread. This is particularly
relevant when the current thread state is finalizing or its
interpreter is finalizing (either may be in an inconsistent
state). In that case the current thread will definitely
never try to acquire the GIL again. */
// XXX It may be more correct to check tstate->_status.finalizing.
// XXX assert(tstate == NULL || !tstate->_status.cleared);
// XXX assert(final_release || !tstate->_status.cleared);
assert(final_release || tstate != NULL);
struct _gil_runtime_state *gil = ceval->gil;
#ifdef Py_GIL_DISABLED
if (!_Py_atomic_load_int_relaxed(&gil->enabled)) {
// Check if we have the GIL before dropping it. tstate will be NULL if
// take_gil() detected that this thread has been destroyed, in which case
// we know we have the GIL.
if (tstate != NULL && !tstate->_status.holds_gil) {
return;
}
#endif
@ -238,26 +245,23 @@ drop_gil(PyInterpreterState *interp, PyThreadState *tstate)
Py_FatalError("drop_gil: GIL is not locked");
}
/* tstate is allowed to be NULL (early interpreter init) */
if (tstate != NULL) {
if (!final_release) {
/* Sub-interpreter support: threads might have been switched
under our feet using PyThreadState_Swap(). Fix the GIL last
holder variable so that our heuristics work. */
_Py_atomic_store_ptr_relaxed(&gil->last_holder, tstate);
}
drop_gil_impl(gil);
drop_gil_impl(tstate, gil);
#ifdef FORCE_SWITCHING
/* We check tstate first in case we might be releasing the GIL for
the last time in this thread. In that case there's a possible
race with tstate->interp getting deleted after gil->mutex is
unlocked and before the following code runs, leading to a crash.
We can use (tstate == NULL) to indicate the thread is done with
the GIL, and that's the only time we might delete the
interpreter, so checking tstate first prevents the crash.
See https://github.com/python/cpython/issues/104341. */
if (tstate != NULL &&
/* We might be releasing the GIL for the last time in this thread. In that
case there's a possible race with tstate->interp getting deleted after
gil->mutex is unlocked and before the following code runs, leading to a
crash. We can use final_release to indicate the thread is done with the
GIL, and that's the only time we might delete the interpreter. See
https://github.com/python/cpython/issues/104341. */
if (!final_release &&
_Py_eval_breaker_bit_is_set(tstate, _PY_GIL_DROP_REQUEST_BIT)) {
MUTEX_LOCK(gil->switch_mutex);
/* Not switched yet => wait */
@ -284,7 +288,7 @@ drop_gil(PyInterpreterState *interp, PyThreadState *tstate)
tstate must be non-NULL.
Returns 1 if the GIL was acquired, or 0 if not. */
static int
static void
take_gil(PyThreadState *tstate)
{
int err = errno;
@ -309,7 +313,7 @@ take_gil(PyThreadState *tstate)
struct _gil_runtime_state *gil = interp->ceval.gil;
#ifdef Py_GIL_DISABLED
if (!_Py_atomic_load_int_relaxed(&gil->enabled)) {
return 0;
return;
}
#endif
@ -358,10 +362,10 @@ take_gil(PyThreadState *tstate)
if (!_Py_atomic_load_int_relaxed(&gil->enabled)) {
// Another thread disabled the GIL between our check above and
// now. Don't take the GIL, signal any other waiting threads, and
// return 0.
// return.
COND_SIGNAL(gil->cond);
MUTEX_UNLOCK(gil->mutex);
return 0;
return;
}
#endif
@ -393,20 +397,21 @@ take_gil(PyThreadState *tstate)
in take_gil() while the main thread called
wait_for_thread_shutdown() from Py_Finalize(). */
MUTEX_UNLOCK(gil->mutex);
/* Passing NULL to drop_gil() indicates that this thread is about to
terminate and will never hold the GIL again. */
drop_gil(interp, NULL);
/* tstate could be a dangling pointer, so don't pass it to
drop_gil(). */
drop_gil(interp, NULL, 1);
PyThread_exit_thread();
}
assert(_PyThreadState_CheckConsistency(tstate));
tstate->_status.holds_gil = 1;
_Py_unset_eval_breaker_bit(tstate, _PY_GIL_DROP_REQUEST_BIT);
update_eval_breaker_for_thread(interp, tstate);
MUTEX_UNLOCK(gil->mutex);
errno = err;
return 1;
return;
}
void _PyEval_SetSwitchInterval(unsigned long microseconds)
@ -451,10 +456,17 @@ PyEval_ThreadsInitialized(void)
static inline int
current_thread_holds_gil(struct _gil_runtime_state *gil, PyThreadState *tstate)
{
if (((PyThreadState*)_Py_atomic_load_ptr_relaxed(&gil->last_holder)) != tstate) {
return 0;
}
return _Py_atomic_load_int_relaxed(&gil->locked);
int holds_gil = tstate->_status.holds_gil;
// holds_gil is the source of truth; check that last_holder and gil->locked
// are consistent with it.
int locked = _Py_atomic_load_int_relaxed(&gil->locked);
int is_last_holder =
((PyThreadState*)_Py_atomic_load_ptr_relaxed(&gil->last_holder)) == tstate;
assert(!holds_gil || locked);
assert(!holds_gil || is_last_holder);
return holds_gil;
}
#endif
@ -563,23 +575,24 @@ PyEval_ReleaseLock(void)
/* This function must succeed when the current thread state is NULL.
We therefore avoid PyThreadState_Get() which dumps a fatal error
in debug mode. */
drop_gil(tstate->interp, tstate);
}
int
_PyEval_AcquireLock(PyThreadState *tstate)
{
_Py_EnsureTstateNotNULL(tstate);
return take_gil(tstate);
drop_gil(tstate->interp, tstate, 0);
}
void
_PyEval_ReleaseLock(PyInterpreterState *interp, PyThreadState *tstate)
_PyEval_AcquireLock(PyThreadState *tstate)
{
/* If tstate is NULL then we do not expect the current thread
to acquire the GIL ever again. */
assert(tstate == NULL || tstate->interp == interp);
drop_gil(interp, tstate);
_Py_EnsureTstateNotNULL(tstate);
take_gil(tstate);
}
void
_PyEval_ReleaseLock(PyInterpreterState *interp,
PyThreadState *tstate,
int final_release)
{
assert(tstate != NULL);
assert(tstate->interp == interp);
drop_gil(interp, tstate, final_release);
}
void
@ -1136,7 +1149,12 @@ _PyEval_DisableGIL(PyThreadState *tstate)
//
// Drop the GIL, which will wake up any threads waiting in take_gil()
// and let them resume execution without the GIL.
drop_gil_impl(gil);
drop_gil_impl(tstate, gil);
// If another thread asked us to drop the GIL, they should be
// free-threading by now. Remove any such request so we have a clean
// slate if/when the GIL is enabled again.
_Py_unset_eval_breaker_bit(tstate, _PY_GIL_DROP_REQUEST_BIT);
return 1;
}
return 0;

View File

@ -1843,7 +1843,7 @@ _PyThreadState_DeleteCurrent(PyThreadState *tstate)
#endif
current_fast_clear(tstate->interp->runtime);
tstate_delete_common(tstate);
_PyEval_ReleaseLock(tstate->interp, NULL);
_PyEval_ReleaseLock(tstate->interp, tstate, 1);
free_threadstate((_PyThreadStateImpl *)tstate);
}
@ -2068,7 +2068,7 @@ _PyThreadState_Attach(PyThreadState *tstate)
while (1) {
int acquired_gil = _PyEval_AcquireLock(tstate);
_PyEval_AcquireLock(tstate);
// XXX assert(tstate_is_alive(tstate));
current_fast_set(&_PyRuntime, tstate);
@ -2079,20 +2079,17 @@ _PyThreadState_Attach(PyThreadState *tstate)
}
#ifdef Py_GIL_DISABLED
if (_PyEval_IsGILEnabled(tstate) != acquired_gil) {
if (_PyEval_IsGILEnabled(tstate) && !tstate->_status.holds_gil) {
// The GIL was enabled between our call to _PyEval_AcquireLock()
// and when we attached (the GIL can't go from enabled to disabled
// here because only a thread holding the GIL can disable
// it). Detach and try again.
assert(!acquired_gil);
tstate_set_detached(tstate, _Py_THREAD_DETACHED);
tstate_deactivate(tstate);
current_fast_clear(&_PyRuntime);
continue;
}
_Py_qsbr_attach(((_PyThreadStateImpl *)tstate)->qsbr);
#else
(void)acquired_gil;
#endif
break;
}
@ -2123,7 +2120,7 @@ detach_thread(PyThreadState *tstate, int detached_state)
tstate_deactivate(tstate);
tstate_set_detached(tstate, detached_state);
current_fast_clear(&_PyRuntime);
_PyEval_ReleaseLock(tstate->interp, tstate);
_PyEval_ReleaseLock(tstate->interp, tstate, 0);
}
void