diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h index 6e5f2289cb6..c40f9e7393a 100644 --- a/Include/internal/pycore_pystate.h +++ b/Include/internal/pycore_pystate.h @@ -64,17 +64,14 @@ _Py_ThreadCanHandlePendingCalls(void) /* Variable and macro for in-line access to current thread and interpreter state */ -static inline PyThreadState* -_PyRuntimeState_GetThreadState(_PyRuntimeState *runtime) -{ - return (PyThreadState*)_Py_atomic_load_relaxed(&runtime->tstate_current); -} +#if defined(HAVE_THREAD_LOCAL) && !defined(Py_BUILD_CORE_MODULE) +extern _Py_thread_local PyThreadState *_Py_tss_tstate; +#endif +PyAPI_DATA(PyThreadState *) _PyThreadState_GetCurrent(void); /* Get the current Python thread state. - Efficient macro reading directly the 'tstate_current' atomic - variable. The macro is unsafe: it does not check for error and it can - return NULL. + This function is unsafe: it does not check for error and it can return NULL. The caller must hold the GIL. @@ -82,9 +79,20 @@ _PyRuntimeState_GetThreadState(_PyRuntimeState *runtime) static inline PyThreadState* _PyThreadState_GET(void) { - return _PyRuntimeState_GetThreadState(&_PyRuntime); +#if defined(HAVE_THREAD_LOCAL) && !defined(Py_BUILD_CORE_MODULE) + return _Py_tss_tstate; +#else + return _PyThreadState_GetCurrent(); +#endif } +static inline PyThreadState* +_PyRuntimeState_GetThreadState(_PyRuntimeState *Py_UNUSED(runtime)) +{ + return _PyThreadState_GET(); +} + + static inline void _Py_EnsureFuncTstateNotNULL(const char *func, PyThreadState *tstate) { diff --git a/Include/internal/pycore_runtime.h b/Include/internal/pycore_runtime.h index 3ebe49926ed..2a3fd8ab281 100644 --- a/Include/internal/pycore_runtime.h +++ b/Include/internal/pycore_runtime.h @@ -119,9 +119,6 @@ typedef struct pyruntimestate { unsigned long main_thread; - /* Assuming the current thread holds the GIL, this is the - PyThreadState for the current thread. */ - _Py_atomic_address tstate_current; /* Used for the thread state bound to the current thread. */ Py_tss_t autoTSSkey; diff --git a/Include/pyport.h b/Include/pyport.h index 5e226f5cb46..bd0ba6d0681 100644 --- a/Include/pyport.h +++ b/Include/pyport.h @@ -662,6 +662,27 @@ extern char * _getpty(int *, int, mode_t, int); # define WITH_THREAD #endif +#ifdef WITH_THREAD +# ifdef Py_BUILD_CORE +# ifdef HAVE_THREAD_LOCAL +# error "HAVE_THREAD_LOCAL is already defined" +# endif +# define HAVE_THREAD_LOCAL 1 +# ifdef thread_local +# define _Py_thread_local thread_local +# elif __STDC_VERSION__ >= 201112L && !defined(__STDC_NO_THREADS__) +# define _Py_thread_local _Thread_local +# elif defined(_MSC_VER) /* AKA NT_THREADS */ +# define _Py_thread_local __declspec(thread) +# elif defined(__GNUC__) /* includes clang */ +# define _Py_thread_local __thread +# else + // fall back to the PyThread_tss_*() API, or ignore. +# undef HAVE_THREAD_LOCAL +# endif +# endif +#endif + /* Check that ALT_SOABI is consistent with Py_TRACE_REFS: ./configure --with-trace-refs should must be used to define Py_TRACE_REFS */ #if defined(ALT_SOABI) && defined(Py_TRACE_REFS) diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-04-07-12-18-41.gh-issue-103323.9802br.rst b/Misc/NEWS.d/next/Core and Builtins/2023-04-07-12-18-41.gh-issue-103323.9802br.rst new file mode 100644 index 00000000000..347c91d973e --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-04-07-12-18-41.gh-issue-103323.9802br.rst @@ -0,0 +1,3 @@ +We've replaced our use of ``_PyRuntime.tstate_current`` with a thread-local +variable. This is a fairly low-level implementation detail, and there +should be no change in behavior. diff --git a/Python/pystate.c b/Python/pystate.c index 1e04887ef04..d108cfc7e50 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -60,23 +60,43 @@ extern "C" { For each of these functions, the GIL must be held by the current thread. */ + +#ifdef HAVE_THREAD_LOCAL +_Py_thread_local PyThreadState *_Py_tss_tstate = NULL; +#endif + static inline PyThreadState * -current_fast_get(_PyRuntimeState *runtime) +current_fast_get(_PyRuntimeState *Py_UNUSED(runtime)) { - return (PyThreadState*)_Py_atomic_load_relaxed(&runtime->tstate_current); +#ifdef HAVE_THREAD_LOCAL + return _Py_tss_tstate; +#else + // XXX Fall back to the PyThread_tss_*() API. +# error "no supported thread-local variable storage classifier" +#endif } static inline void -current_fast_set(_PyRuntimeState *runtime, PyThreadState *tstate) +current_fast_set(_PyRuntimeState *Py_UNUSED(runtime), PyThreadState *tstate) { assert(tstate != NULL); - _Py_atomic_store_relaxed(&runtime->tstate_current, (uintptr_t)tstate); +#ifdef HAVE_THREAD_LOCAL + _Py_tss_tstate = tstate; +#else + // XXX Fall back to the PyThread_tss_*() API. +# error "no supported thread-local variable storage classifier" +#endif } static inline void -current_fast_clear(_PyRuntimeState *runtime) +current_fast_clear(_PyRuntimeState *Py_UNUSED(runtime)) { - _Py_atomic_store_relaxed(&runtime->tstate_current, (uintptr_t)NULL); +#ifdef HAVE_THREAD_LOCAL + _Py_tss_tstate = NULL; +#else + // XXX Fall back to the PyThread_tss_*() API. +# error "no supported thread-local variable storage classifier" +#endif } #define tstate_verify_not_active(tstate) \ @@ -84,6 +104,12 @@ current_fast_clear(_PyRuntimeState *runtime) _Py_FatalErrorFormat(__func__, "tstate %p is still current", tstate); \ } +PyThreadState * +_PyThreadState_GetCurrent(void) +{ + return current_fast_get(&_PyRuntime); +} + //------------------------------------------------ // the thread state bound to the current OS thread