diff --git a/Include/cpython/pyatomic.h b/Include/cpython/pyatomic.h index 55a139bb915..4ecef4f56ed 100644 --- a/Include/cpython/pyatomic.h +++ b/Include/cpython/pyatomic.h @@ -510,6 +510,9 @@ _Py_atomic_load_ssize_acquire(const Py_ssize_t *obj); // See https://en.cppreference.com/w/cpp/atomic/atomic_thread_fence static inline void _Py_atomic_fence_seq_cst(void); +// Acquire fence +static inline void _Py_atomic_fence_acquire(void); + // Release fence static inline void _Py_atomic_fence_release(void); diff --git a/Include/cpython/pyatomic_gcc.h b/Include/cpython/pyatomic_gcc.h index f2ebdeeb552..ef09954d53a 100644 --- a/Include/cpython/pyatomic_gcc.h +++ b/Include/cpython/pyatomic_gcc.h @@ -542,6 +542,10 @@ static inline void _Py_atomic_fence_seq_cst(void) { __atomic_thread_fence(__ATOMIC_SEQ_CST); } + static inline void +_Py_atomic_fence_acquire(void) +{ __atomic_thread_fence(__ATOMIC_ACQUIRE); } + static inline void _Py_atomic_fence_release(void) { __atomic_thread_fence(__ATOMIC_RELEASE); } diff --git a/Include/cpython/pyatomic_msc.h b/Include/cpython/pyatomic_msc.h index f32995c1f57..84da21bdcbf 100644 --- a/Include/cpython/pyatomic_msc.h +++ b/Include/cpython/pyatomic_msc.h @@ -1066,6 +1066,18 @@ _Py_atomic_fence_seq_cst(void) #else # error "no implementation of _Py_atomic_fence_seq_cst" #endif +} + + static inline void +_Py_atomic_fence_acquire(void) +{ +#if defined(_M_ARM64) + __dmb(_ARM64_BARRIER_ISHLD); +#elif defined(_M_X64) || defined(_M_IX86) + _ReadBarrier(); +#else +# error "no implementation of _Py_atomic_fence_acquire" +#endif } static inline void diff --git a/Include/cpython/pyatomic_std.h b/Include/cpython/pyatomic_std.h index 0cdce4e6dd3..7c71e94c68f 100644 --- a/Include/cpython/pyatomic_std.h +++ b/Include/cpython/pyatomic_std.h @@ -961,6 +961,13 @@ _Py_atomic_fence_seq_cst(void) atomic_thread_fence(memory_order_seq_cst); } + static inline void +_Py_atomic_fence_acquire(void) +{ + _Py_USING_STD; + atomic_thread_fence(memory_order_acquire); +} + static inline void _Py_atomic_fence_release(void) { diff --git a/Include/internal/pycore_lock.h b/Include/internal/pycore_lock.h index e6930cc8eec..2a18bb76447 100644 --- a/Include/internal/pycore_lock.h +++ b/Include/internal/pycore_lock.h @@ -228,12 +228,12 @@ PyAPI_FUNC(void) _PySeqLock_AbandonWrite(_PySeqLock *seqlock); PyAPI_FUNC(uint32_t) _PySeqLock_BeginRead(_PySeqLock *seqlock); // End the read operation and confirm that the sequence number has not changed. -// Returns 1 if the read was successful or 0 if the read should be re-tried. -PyAPI_FUNC(uint32_t) _PySeqLock_EndRead(_PySeqLock *seqlock, uint32_t previous); +// Returns 1 if the read was successful or 0 if the read should be retried. +PyAPI_FUNC(int) _PySeqLock_EndRead(_PySeqLock *seqlock, uint32_t previous); // Check if the lock was held during a fork and clear the lock. Returns 1 -// if the lock was held and any associated datat should be cleared. -PyAPI_FUNC(uint32_t) _PySeqLock_AfterFork(_PySeqLock *seqlock); +// if the lock was held and any associated data should be cleared. +PyAPI_FUNC(int) _PySeqLock_AfterFork(_PySeqLock *seqlock); #ifdef __cplusplus } diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-07-04-23-38-30.gh-issue-121368.m3EF9E.rst b/Misc/NEWS.d/next/Core and Builtins/2024-07-04-23-38-30.gh-issue-121368.m3EF9E.rst new file mode 100644 index 00000000000..3df5b216cbc --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-07-04-23-38-30.gh-issue-121368.m3EF9E.rst @@ -0,0 +1,3 @@ +Fix race condition in ``_PyType_Lookup`` in the free-threaded build due to +a missing memory fence. This could lead to ``_PyType_Lookup`` returning +incorrect results on arm64. diff --git a/Modules/_testcapi/pyatomic.c b/Modules/_testcapi/pyatomic.c index 4f72844535e..850de6f9c33 100644 --- a/Modules/_testcapi/pyatomic.c +++ b/Modules/_testcapi/pyatomic.c @@ -125,6 +125,7 @@ test_atomic_fences(PyObject *self, PyObject *obj) { // Just make sure that the fences compile. We are not // testing any synchronizing ordering. _Py_atomic_fence_seq_cst(); + _Py_atomic_fence_acquire(); _Py_atomic_fence_release(); Py_RETURN_NONE; } diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 13eac2efb98..4fc241e1190 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -5205,7 +5205,7 @@ _PyType_LookupRef(PyTypeObject *type, PyObject *name) #ifdef Py_GIL_DISABLED // synchronize-with other writing threads by doing an acquire load on the sequence while (1) { - int sequence = _PySeqLock_BeginRead(&entry->sequence); + uint32_t sequence = _PySeqLock_BeginRead(&entry->sequence); uint32_t entry_version = _Py_atomic_load_uint32_relaxed(&entry->version); uint32_t type_version = _Py_atomic_load_uint32_acquire(&type->tp_version_tag); if (entry_version == type_version && diff --git a/Python/lock.c b/Python/lock.c index 7c6a5175e88..57675fe1873 100644 --- a/Python/lock.c +++ b/Python/lock.c @@ -514,6 +514,7 @@ void _PySeqLock_LockWrite(_PySeqLock *seqlock) } else if (_Py_atomic_compare_exchange_uint32(&seqlock->sequence, &prev, prev + 1)) { // We've locked the cache + _Py_atomic_fence_release(); break; } else { @@ -547,28 +548,31 @@ uint32_t _PySeqLock_BeginRead(_PySeqLock *seqlock) return sequence; } -uint32_t _PySeqLock_EndRead(_PySeqLock *seqlock, uint32_t previous) +int _PySeqLock_EndRead(_PySeqLock *seqlock, uint32_t previous) { - // Synchronize again and validate that the entry hasn't been updated - // while we were readying the values. - if (_Py_atomic_load_uint32_acquire(&seqlock->sequence) == previous) { - return 1; - } + // gh-121368: We need an explicit acquire fence here to ensure that + // this load of the sequence number is not reordered before any loads + // within the read lock. + _Py_atomic_fence_acquire(); - _Py_yield(); - return 0; + if (_Py_atomic_load_uint32_relaxed(&seqlock->sequence) == previous) { + return 1; + } + + _Py_yield(); + return 0; } -uint32_t _PySeqLock_AfterFork(_PySeqLock *seqlock) +int _PySeqLock_AfterFork(_PySeqLock *seqlock) { // Synchronize again and validate that the entry hasn't been updated // while we were readying the values. - if (SEQLOCK_IS_UPDATING(seqlock->sequence)) { + if (SEQLOCK_IS_UPDATING(seqlock->sequence)) { seqlock->sequence = 0; return 1; - } + } - return 0; + return 0; } #undef PyMutex_Lock