From df7317904849a41d51db39d92c5d431a18e22637 Mon Sep 17 00:00:00 2001 From: mpage Date: Mon, 8 Apr 2024 07:58:38 -0700 Subject: [PATCH] gh-111926: Make weakrefs thread-safe in free-threaded builds (#117168) Most mutable data is protected by a striped lock that is keyed on the referenced object's address. The weakref's hash is protected using the weakref's per-object lock. Note that this only affects free-threaded builds. Apart from some minor refactoring, the added code is all either gated by `ifdef`s or is a no-op (e.g. `Py_BEGIN_CRITICAL_SECTION`). --- Include/cpython/weakrefobject.h | 8 + Include/internal/pycore_interp.h | 7 + Include/internal/pycore_object.h | 40 +- .../internal/pycore_pyatomic_ft_wrappers.h | 5 + Include/internal/pycore_weakref.h | 73 ++- Lib/test/test_sys.py | 8 +- Lib/test/test_weakref.py | 19 + Modules/_sqlite/blob.c | 5 +- Modules/_sqlite/connection.c | 4 +- Modules/_ssl.c | 13 +- Modules/_ssl/debughelpers.c | 6 +- Modules/_weakref.c | 42 +- Modules/clinic/_weakref.c.h | 20 +- Objects/dictobject.c | 8 +- Objects/typeobject.c | 12 +- Objects/weakrefobject.c | 537 ++++++++++-------- Python/pystate.c | 9 + 17 files changed, 490 insertions(+), 326 deletions(-) diff --git a/Include/cpython/weakrefobject.h b/Include/cpython/weakrefobject.h index 1559e2def61..9a796098c6b 100644 --- a/Include/cpython/weakrefobject.h +++ b/Include/cpython/weakrefobject.h @@ -30,6 +30,14 @@ struct _PyWeakReference { PyWeakReference *wr_prev; PyWeakReference *wr_next; vectorcallfunc vectorcall; + +#ifdef Py_GIL_DISABLED + /* Pointer to the lock used when clearing in free-threaded builds. + * Normally this can be derived from wr_object, but in some cases we need + * to lock after wr_object has been set to Py_None. + */ + struct _PyMutex *weakrefs_lock; +#endif }; Py_DEPRECATED(3.13) static inline PyObject* PyWeakref_GET_OBJECT(PyObject *ref_obj) diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index b5cea863ff3..1bb123b8607 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -59,6 +59,12 @@ struct _stoptheworld_state { PyThreadState *requester; // Thread that requested the pause (may be NULL). }; +#ifdef Py_GIL_DISABLED +// This should be prime but otherwise the choice is arbitrary. A larger value +// increases concurrency at the expense of memory. +# define NUM_WEAKREF_LIST_LOCKS 127 +#endif + /* cross-interpreter data registry */ /* Tracks some rare events per-interpreter, used by the optimizer to turn on/off @@ -203,6 +209,7 @@ struct _is { #if defined(Py_GIL_DISABLED) struct _mimalloc_interp_state mimalloc; struct _brc_state brc; // biased reference counting state + PyMutex weakref_locks[NUM_WEAKREF_LIST_LOCKS]; #endif // Per-interpreter state for the obmalloc allocator. For the main diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index 4fc5e9bf653..1e1b664000f 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -426,7 +426,7 @@ _Py_TryIncRefShared(PyObject *op) /* Tries to incref the object op and ensures that *src still points to it. */ static inline int -_Py_TryIncref(PyObject **src, PyObject *op) +_Py_TryIncrefCompare(PyObject **src, PyObject *op) { if (_Py_TryIncrefFast(op)) { return 1; @@ -452,7 +452,7 @@ _Py_XGetRef(PyObject **ptr) if (value == NULL) { return value; } - if (_Py_TryIncref(ptr, value)) { + if (_Py_TryIncrefCompare(ptr, value)) { return value; } } @@ -467,7 +467,7 @@ _Py_TryXGetRef(PyObject **ptr) if (value == NULL) { return value; } - if (_Py_TryIncref(ptr, value)) { + if (_Py_TryIncrefCompare(ptr, value)) { return value; } return NULL; @@ -506,8 +506,42 @@ _Py_XNewRefWithLock(PyObject *obj) return _Py_NewRefWithLock(obj); } +static inline void +_PyObject_SetMaybeWeakref(PyObject *op) +{ + if (_Py_IsImmortal(op)) { + return; + } + for (;;) { + Py_ssize_t shared = _Py_atomic_load_ssize_relaxed(&op->ob_ref_shared); + if ((shared & _Py_REF_SHARED_FLAG_MASK) != 0) { + // Nothing to do if it's in WEAKREFS, QUEUED, or MERGED states. + return; + } + if (_Py_atomic_compare_exchange_ssize( + &op->ob_ref_shared, &shared, shared | _Py_REF_MAYBE_WEAKREF)) { + return; + } + } +} + #endif +/* Tries to incref op and returns 1 if successful or 0 otherwise. */ +static inline int +_Py_TryIncref(PyObject *op) +{ +#ifdef Py_GIL_DISABLED + return _Py_TryIncrefFast(op) || _Py_TryIncRefShared(op); +#else + if (Py_REFCNT(op) > 0) { + Py_INCREF(op); + return 1; + } + return 0; +#endif +} + #ifdef Py_REF_DEBUG extern void _PyInterpreterState_FinalizeRefTotal(PyInterpreterState *); extern void _Py_FinalizeRefTotal(_PyRuntimeState *); diff --git a/Include/internal/pycore_pyatomic_ft_wrappers.h b/Include/internal/pycore_pyatomic_ft_wrappers.h index e441600d54e..2514f51f1b0 100644 --- a/Include/internal/pycore_pyatomic_ft_wrappers.h +++ b/Include/internal/pycore_pyatomic_ft_wrappers.h @@ -20,9 +20,12 @@ extern "C" { #endif #ifdef Py_GIL_DISABLED +#define FT_ATOMIC_LOAD_PTR(value) _Py_atomic_load_ptr(&value) #define FT_ATOMIC_LOAD_SSIZE(value) _Py_atomic_load_ssize(&value) #define FT_ATOMIC_LOAD_SSIZE_RELAXED(value) \ _Py_atomic_load_ssize_relaxed(&value) +#define FT_ATOMIC_STORE_PTR(value, new_value) \ + _Py_atomic_store_ptr(&value, new_value) #define FT_ATOMIC_STORE_PTR_RELAXED(value, new_value) \ _Py_atomic_store_ptr_relaxed(&value, new_value) #define FT_ATOMIC_STORE_PTR_RELEASE(value, new_value) \ @@ -30,8 +33,10 @@ extern "C" { #define FT_ATOMIC_STORE_SSIZE_RELAXED(value, new_value) \ _Py_atomic_store_ssize_relaxed(&value, new_value) #else +#define FT_ATOMIC_LOAD_PTR(value) value #define FT_ATOMIC_LOAD_SSIZE(value) value #define FT_ATOMIC_LOAD_SSIZE_RELAXED(value) value +#define FT_ATOMIC_STORE_PTR(value, new_value) value = new_value #define FT_ATOMIC_STORE_PTR_RELAXED(value, new_value) value = new_value #define FT_ATOMIC_STORE_PTR_RELEASE(value, new_value) value = new_value #define FT_ATOMIC_STORE_SSIZE_RELAXED(value, new_value) value = new_value diff --git a/Include/internal/pycore_weakref.h b/Include/internal/pycore_weakref.h index dea267b4903..e057a27340f 100644 --- a/Include/internal/pycore_weakref.h +++ b/Include/internal/pycore_weakref.h @@ -9,7 +9,35 @@ extern "C" { #endif #include "pycore_critical_section.h" // Py_BEGIN_CRITICAL_SECTION() +#include "pycore_lock.h" #include "pycore_object.h" // _Py_REF_IS_MERGED() +#include "pycore_pyatomic_ft_wrappers.h" + +#ifdef Py_GIL_DISABLED + +#define WEAKREF_LIST_LOCK(obj) \ + _PyInterpreterState_GET() \ + ->weakref_locks[((uintptr_t)obj) % NUM_WEAKREF_LIST_LOCKS] + +// Lock using the referenced object +#define LOCK_WEAKREFS(obj) \ + PyMutex_LockFlags(&WEAKREF_LIST_LOCK(obj), _Py_LOCK_DONT_DETACH) +#define UNLOCK_WEAKREFS(obj) PyMutex_Unlock(&WEAKREF_LIST_LOCK(obj)) + +// Lock using a weakref +#define LOCK_WEAKREFS_FOR_WR(wr) \ + PyMutex_LockFlags(wr->weakrefs_lock, _Py_LOCK_DONT_DETACH) +#define UNLOCK_WEAKREFS_FOR_WR(wr) PyMutex_Unlock(wr->weakrefs_lock) + +#else + +#define LOCK_WEAKREFS(obj) +#define UNLOCK_WEAKREFS(obj) + +#define LOCK_WEAKREFS_FOR_WR(wr) +#define UNLOCK_WEAKREFS_FOR_WR(wr) + +#endif static inline int _is_dead(PyObject *obj) { @@ -30,53 +58,64 @@ static inline int _is_dead(PyObject *obj) static inline PyObject* _PyWeakref_GET_REF(PyObject *ref_obj) { assert(PyWeakref_Check(ref_obj)); - PyObject *ret = NULL; - Py_BEGIN_CRITICAL_SECTION(ref_obj); PyWeakReference *ref = _Py_CAST(PyWeakReference*, ref_obj); - PyObject *obj = ref->wr_object; + PyObject *obj = FT_ATOMIC_LOAD_PTR(ref->wr_object); if (obj == Py_None) { // clear_weakref() was called - goto end; + return NULL; } - if (_is_dead(obj)) { - goto end; + LOCK_WEAKREFS(obj); +#ifdef Py_GIL_DISABLED + if (ref->wr_object == Py_None) { + // clear_weakref() was called + UNLOCK_WEAKREFS(obj); + return NULL; } -#if !defined(Py_GIL_DISABLED) - assert(Py_REFCNT(obj) > 0); #endif - ret = Py_NewRef(obj); -end: - Py_END_CRITICAL_SECTION(); - return ret; + if (_Py_TryIncref(obj)) { + UNLOCK_WEAKREFS(obj); + return obj; + } + UNLOCK_WEAKREFS(obj); + return NULL; } static inline int _PyWeakref_IS_DEAD(PyObject *ref_obj) { assert(PyWeakref_Check(ref_obj)); int ret = 0; - Py_BEGIN_CRITICAL_SECTION(ref_obj); PyWeakReference *ref = _Py_CAST(PyWeakReference*, ref_obj); - PyObject *obj = ref->wr_object; + PyObject *obj = FT_ATOMIC_LOAD_PTR(ref->wr_object); if (obj == Py_None) { // clear_weakref() was called ret = 1; } else { + LOCK_WEAKREFS(obj); // See _PyWeakref_GET_REF() for the rationale of this test +#ifdef Py_GIL_DISABLED + ret = (ref->wr_object == Py_None) || _is_dead(obj); +#else ret = _is_dead(obj); +#endif + UNLOCK_WEAKREFS(obj); } - Py_END_CRITICAL_SECTION(); return ret; } -extern Py_ssize_t _PyWeakref_GetWeakrefCount(PyWeakReference *head); +extern Py_ssize_t _PyWeakref_GetWeakrefCount(PyObject *obj); + +// Clear all the weak references to obj but leave their callbacks uncalled and +// intact. +extern void _PyWeakref_ClearWeakRefsExceptCallbacks(PyObject *obj); extern void _PyWeakref_ClearRef(PyWeakReference *self); +PyAPI_FUNC(int) _PyWeakref_IsDead(PyObject *weakref); + #ifdef __cplusplus } #endif #endif /* !Py_INTERNAL_WEAKREF_H */ - diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 6a66df4e897..ab26bf56d9c 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -1708,11 +1708,15 @@ class SizeofTest(unittest.TestCase): # TODO: add check that forces layout of unicodefields # weakref import weakref - check(weakref.ref(int), size('2Pn3P')) + if support.Py_GIL_DISABLED: + expected = size('2Pn4P') + else: + expected = size('2Pn3P') + check(weakref.ref(int), expected) # weakproxy # XXX # weakcallableproxy - check(weakref.proxy(int), size('2Pn3P')) + check(weakref.proxy(int), expected) def check_slots(self, obj, base, extra): expected = sys.getsizeof(base) + struct.calcsize(extra) diff --git a/Lib/test/test_weakref.py b/Lib/test/test_weakref.py index 6fbd292c1e6..d0e8df4ea82 100644 --- a/Lib/test/test_weakref.py +++ b/Lib/test/test_weakref.py @@ -1907,6 +1907,25 @@ class MappingTestCase(TestBase): self.assertEqual(len(d), 1) o = None # lose ref + @support.cpython_only + def test_weak_valued_consistency(self): + # A single-threaded, deterministic repro for issue #28427: old keys + # should not remove new values from WeakValueDictionary. This relies on + # an implementation detail of CPython's WeakValueDictionary (its + # underlying dictionary of KeyedRefs) to reproduce the issue. + d = weakref.WeakValueDictionary() + with support.disable_gc(): + d[10] = RefCycle() + # Keep the KeyedRef alive after it's replaced so that GC will invoke + # the callback. + wr = d.data[10] + # Replace the value with something that isn't cyclic garbage + o = RefCycle() + d[10] = o + # Trigger GC, which will invoke the callback for `wr` + gc.collect() + self.assertEqual(len(d), 1) + def check_threaded_weak_dict_copy(self, type_, deepcopy): # `type_` should be either WeakKeyDictionary or WeakValueDictionary. # `deepcopy` should be either True or False. diff --git a/Modules/_sqlite/blob.c b/Modules/_sqlite/blob.c index f099020c5f4..7deb58bf1b9 100644 --- a/Modules/_sqlite/blob.c +++ b/Modules/_sqlite/blob.c @@ -4,7 +4,6 @@ #include "blob.h" #include "util.h" -#include "pycore_weakref.h" // _PyWeakref_GET_REF() #define clinic_state() (pysqlite_get_state_by_type(Py_TYPE(self))) #include "clinic/blob.c.h" @@ -102,8 +101,8 @@ pysqlite_close_all_blobs(pysqlite_Connection *self) { for (int i = 0; i < PyList_GET_SIZE(self->blobs); i++) { PyObject *weakref = PyList_GET_ITEM(self->blobs, i); - PyObject *blob = _PyWeakref_GET_REF(weakref); - if (blob == NULL) { + PyObject *blob; + if (!PyWeakref_GetRef(weakref, &blob)) { continue; } close_blob((pysqlite_Blob *)blob); diff --git a/Modules/_sqlite/connection.c b/Modules/_sqlite/connection.c index f97afcf5fcf..74984ca5365 100644 --- a/Modules/_sqlite/connection.c +++ b/Modules/_sqlite/connection.c @@ -38,7 +38,7 @@ #include "pycore_modsupport.h" // _PyArg_NoKeywords() #include "pycore_pyerrors.h" // _PyErr_ChainExceptions1() #include "pycore_pylifecycle.h" // _Py_IsInterpreterFinalizing() -#include "pycore_weakref.h" // _PyWeakref_IS_DEAD() +#include "pycore_weakref.h" #include @@ -1065,7 +1065,7 @@ static void _pysqlite_drop_unused_cursor_references(pysqlite_Connection* self) for (Py_ssize_t i = 0; i < PyList_Size(self->cursors); i++) { PyObject* weakref = PyList_GetItem(self->cursors, i); - if (_PyWeakref_IS_DEAD(weakref)) { + if (_PyWeakref_IsDead(weakref)) { continue; } if (PyList_Append(new_list, weakref) != 0) { diff --git a/Modules/_ssl.c b/Modules/_ssl.c index fbf914c4321..f7fdbf4b6f9 100644 --- a/Modules/_ssl.c +++ b/Modules/_ssl.c @@ -29,7 +29,6 @@ #include "pycore_fileutils.h" // _PyIsSelectable_fd() #include "pycore_pyerrors.h" // _PyErr_ChainExceptions1() #include "pycore_time.h" // _PyDeadline_Init() -#include "pycore_weakref.h" // _PyWeakref_GET_REF() /* Include symbols from _socket module */ #include "socketmodule.h" @@ -392,8 +391,8 @@ typedef enum { // Return a borrowed reference. static inline PySocketSockObject* GET_SOCKET(PySSLSocket *obj) { if (obj->Socket) { - PyObject *sock = _PyWeakref_GET_REF(obj->Socket); - if (sock != NULL) { + PyObject *sock; + if (PyWeakref_GetRef(obj->Socket, &sock)) { // GET_SOCKET() returns a borrowed reference Py_DECREF(sock); } @@ -2205,8 +2204,8 @@ PySSL_get_owner(PySSLSocket *self, void *c) if (self->owner == NULL) { Py_RETURN_NONE; } - PyObject *owner = _PyWeakref_GET_REF(self->owner); - if (owner == NULL) { + PyObject *owner; + if (!PyWeakref_GetRef(self->owner, &owner)) { Py_RETURN_NONE; } return owner; @@ -4433,9 +4432,9 @@ _servername_callback(SSL *s, int *al, void *args) * will be passed. If both do not exist only then the C-level object is * passed. */ if (ssl->owner) - ssl_socket = _PyWeakref_GET_REF(ssl->owner); + PyWeakref_GetRef(ssl->owner, &ssl_socket); else if (ssl->Socket) - ssl_socket = _PyWeakref_GET_REF(ssl->Socket); + PyWeakref_GetRef(ssl->Socket, &ssl_socket); else ssl_socket = Py_NewRef(ssl); diff --git a/Modules/_ssl/debughelpers.c b/Modules/_ssl/debughelpers.c index 07e9ce7a6fc..9c87f8b4d21 100644 --- a/Modules/_ssl/debughelpers.c +++ b/Modules/_ssl/debughelpers.c @@ -28,12 +28,12 @@ _PySSL_msg_callback(int write_p, int version, int content_type, PyObject *ssl_socket; /* ssl.SSLSocket or ssl.SSLObject */ if (ssl_obj->owner) - ssl_socket = _PyWeakref_GET_REF(ssl_obj->owner); + PyWeakref_GetRef(ssl_obj->owner, &ssl_socket); else if (ssl_obj->Socket) - ssl_socket = _PyWeakref_GET_REF(ssl_obj->Socket); + PyWeakref_GetRef(ssl_obj->Socket, &ssl_socket); else ssl_socket = (PyObject *)Py_NewRef(ssl_obj); - assert(ssl_socket != NULL); // _PyWeakref_GET_REF() can return NULL + assert(ssl_socket != NULL); // PyWeakref_GetRef() can return NULL /* assume that OpenSSL verifies all payload and buf len is of sufficient length */ diff --git a/Modules/_weakref.c b/Modules/_weakref.c index 7225dbc9ce4..1ea3ed5e40b 100644 --- a/Modules/_weakref.c +++ b/Modules/_weakref.c @@ -14,7 +14,6 @@ module _weakref #include "clinic/_weakref.c.h" /*[clinic input] -@critical_section object _weakref.getweakrefcount -> Py_ssize_t object: object @@ -25,14 +24,9 @@ Return the number of weak references to 'object'. static Py_ssize_t _weakref_getweakrefcount_impl(PyObject *module, PyObject *object) -/*[clinic end generated code: output=301806d59558ff3e input=6535a580f1d0ebdc]*/ +/*[clinic end generated code: output=301806d59558ff3e input=7d4d04fcaccf64d5]*/ { - if (!_PyType_SUPPORTS_WEAKREFS(Py_TYPE(object))) { - return 0; - } - PyWeakReference **list = GET_WEAKREFS_LISTPTR(object); - Py_ssize_t count = _PyWeakref_GetWeakrefCount(*list); - return count; + return _PyWeakref_GetWeakrefCount(object); } @@ -77,7 +71,6 @@ _weakref__remove_dead_weakref_impl(PyObject *module, PyObject *dct, /*[clinic input] -@critical_section object _weakref.getweakrefs object: object / @@ -86,26 +79,39 @@ Return a list of all weak reference objects pointing to 'object'. [clinic start generated code]*/ static PyObject * -_weakref_getweakrefs_impl(PyObject *module, PyObject *object) -/*[clinic end generated code: output=5ec268989fb8f035 input=3dea95b8f5b31bbb]*/ +_weakref_getweakrefs(PyObject *module, PyObject *object) +/*[clinic end generated code: output=25c7731d8e011824 input=00c6d0e5d3206693]*/ { if (!_PyType_SUPPORTS_WEAKREFS(Py_TYPE(object))) { return PyList_New(0); } - PyWeakReference **list = GET_WEAKREFS_LISTPTR(object); - Py_ssize_t count = _PyWeakref_GetWeakrefCount(*list); - - PyObject *result = PyList_New(count); + PyObject *result = PyList_New(0); if (result == NULL) { return NULL; } - PyWeakReference *current = *list; - for (Py_ssize_t i = 0; i < count; ++i) { - PyList_SET_ITEM(result, i, Py_NewRef(current)); + LOCK_WEAKREFS(object); + PyWeakReference *current = *GET_WEAKREFS_LISTPTR(object); + while (current != NULL) { + PyObject *curobj = (PyObject *) current; + if (_Py_TryIncref(curobj)) { + if (PyList_Append(result, curobj)) { + UNLOCK_WEAKREFS(object); + Py_DECREF(curobj); + Py_DECREF(result); + return NULL; + } + else { + // Undo our _Py_TryIncref. This is safe to do with the lock + // held in free-threaded builds; the list holds a reference to + // curobj so we're guaranteed not to invoke the destructor. + Py_DECREF(curobj); + } + } current = current->wr_next; } + UNLOCK_WEAKREFS(object); return result; } diff --git a/Modules/clinic/_weakref.c.h b/Modules/clinic/_weakref.c.h index 550b6c4d71a..8d7bc5dc936 100644 --- a/Modules/clinic/_weakref.c.h +++ b/Modules/clinic/_weakref.c.h @@ -2,7 +2,6 @@ preserve [clinic start generated code]*/ -#include "pycore_critical_section.h"// Py_BEGIN_CRITICAL_SECTION() #include "pycore_modsupport.h" // _PyArg_CheckPositional() PyDoc_STRVAR(_weakref_getweakrefcount__doc__, @@ -23,9 +22,7 @@ _weakref_getweakrefcount(PyObject *module, PyObject *object) PyObject *return_value = NULL; Py_ssize_t _return_value; - Py_BEGIN_CRITICAL_SECTION(object); _return_value = _weakref_getweakrefcount_impl(module, object); - Py_END_CRITICAL_SECTION(); if ((_return_value == -1) && PyErr_Occurred()) { goto exit; } @@ -79,21 +76,6 @@ PyDoc_STRVAR(_weakref_getweakrefs__doc__, #define _WEAKREF_GETWEAKREFS_METHODDEF \ {"getweakrefs", (PyCFunction)_weakref_getweakrefs, METH_O, _weakref_getweakrefs__doc__}, -static PyObject * -_weakref_getweakrefs_impl(PyObject *module, PyObject *object); - -static PyObject * -_weakref_getweakrefs(PyObject *module, PyObject *object) -{ - PyObject *return_value = NULL; - - Py_BEGIN_CRITICAL_SECTION(object); - return_value = _weakref_getweakrefs_impl(module, object); - Py_END_CRITICAL_SECTION(); - - return return_value; -} - PyDoc_STRVAR(_weakref_proxy__doc__, "proxy($module, object, callback=None, /)\n" "--\n" @@ -130,4 +112,4 @@ skip_optional: exit: return return_value; } -/*[clinic end generated code: output=d5d30707212a9870 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=60f59adc1dc9eab8 input=a9049054013a1b77]*/ diff --git a/Objects/dictobject.c b/Objects/dictobject.c index b62d39ad6c5..9218b1aa470 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -1286,7 +1286,7 @@ Py_ssize_t compare_unicode_generic_threadsafe(PyDictObject *mp, PyDictKeysObject assert(!PyUnicode_CheckExact(key)); if (startkey != NULL) { - if (!_Py_TryIncref(&ep->me_key, startkey)) { + if (!_Py_TryIncrefCompare(&ep->me_key, startkey)) { return DKIX_KEY_CHANGED; } @@ -1334,7 +1334,7 @@ compare_unicode_unicode_threadsafe(PyDictObject *mp, PyDictKeysObject *dk, return unicode_get_hash(startkey) == hash && unicode_eq(startkey, key); } else { - if (!_Py_TryIncref(&ep->me_key, startkey)) { + if (!_Py_TryIncrefCompare(&ep->me_key, startkey)) { return DKIX_KEY_CHANGED; } if (unicode_get_hash(startkey) == hash && unicode_eq(startkey, key)) { @@ -1364,7 +1364,7 @@ Py_ssize_t compare_generic_threadsafe(PyDictObject *mp, PyDictKeysObject *dk, } Py_ssize_t ep_hash = _Py_atomic_load_ssize_relaxed(&ep->me_hash); if (ep_hash == hash) { - if (startkey == NULL || !_Py_TryIncref(&ep->me_key, startkey)) { + if (startkey == NULL || !_Py_TryIncrefCompare(&ep->me_key, startkey)) { return DKIX_KEY_CHANGED; } int cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); @@ -5308,7 +5308,7 @@ acquire_key_value(PyObject **key_loc, PyObject *value, PyObject **value_loc, } if (out_value) { - if (!_Py_TryIncref(value_loc, value)) { + if (!_Py_TryIncrefCompare(value_loc, value)) { if (out_key) { Py_DECREF(*out_key); } diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 51ceb7d7de1..e9f2d2577e9 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -378,7 +378,7 @@ _PyType_GetMRO(PyTypeObject *self) if (mro == NULL) { return NULL; } - if (_Py_TryIncref(&self->tp_mro, mro)) { + if (_Py_TryIncrefCompare(&self->tp_mro, mro)) { return mro; } @@ -2193,15 +2193,7 @@ subtype_dealloc(PyObject *self) finalizers since they might rely on part of the object being finalized that has already been destroyed. */ if (type->tp_weaklistoffset && !base->tp_weaklistoffset) { - /* Modeled after GET_WEAKREFS_LISTPTR(). - - This is never triggered for static types so we can avoid the - (slightly) more costly _PyObject_GET_WEAKREFS_LISTPTR(). */ - PyWeakReference **list = \ - _PyObject_GET_WEAKREFS_LISTPTR_FROM_OFFSET(self); - while (*list) { - _PyWeakref_ClearRef(*list); - } + _PyWeakref_ClearWeakRefsExceptCallbacks(self); } } diff --git a/Objects/weakrefobject.c b/Objects/weakrefobject.c index d8dd6aea3af..206107e8505 100644 --- a/Objects/weakrefobject.c +++ b/Objects/weakrefobject.c @@ -1,24 +1,58 @@ #include "Python.h" +#include "pycore_critical_section.h" +#include "pycore_lock.h" #include "pycore_modsupport.h" // _PyArg_NoKwnames() #include "pycore_object.h" // _PyObject_GET_WEAKREFS_LISTPTR() #include "pycore_pyerrors.h" // _PyErr_ChainExceptions1() +#include "pycore_pystate.h" #include "pycore_weakref.h" // _PyWeakref_GET_REF() +#ifdef Py_GIL_DISABLED +/* + * Thread-safety for free-threaded builds + * ====================================== + * + * In free-threaded builds we need to protect mutable state of: + * + * - The weakref (wr_object, hash, wr_callback) + * - The referenced object (its head-of-list pointer) + * - The linked list of weakrefs + * + * For now we've chosen to address this in a straightforward way: + * + * - The weakref's hash is protected using the weakref's per-object lock. + * - The other mutable is protected by a striped lock keyed on the referenced + * object's address. + * - The striped lock must be locked using `_Py_LOCK_DONT_DETACH` in order to + * support atomic deletion from WeakValueDictionaries. As a result, we must + * be careful not to perform any operations that could suspend while the + * lock is held. + * + * Since the world is stopped when the GC runs, it is free to clear weakrefs + * without acquiring any locks. + */ +#endif #define GET_WEAKREFS_LISTPTR(o) \ ((PyWeakReference **) _PyObject_GET_WEAKREFS_LISTPTR(o)) Py_ssize_t -_PyWeakref_GetWeakrefCount(PyWeakReference *head) +_PyWeakref_GetWeakrefCount(PyObject *obj) { - Py_ssize_t count = 0; + if (!_PyType_SUPPORTS_WEAKREFS(Py_TYPE(obj))) { + return 0; + } + LOCK_WEAKREFS(obj); + Py_ssize_t count = 0; + PyWeakReference *head = *GET_WEAKREFS_LISTPTR(obj); while (head != NULL) { ++count; head = head->wr_next; } + UNLOCK_WEAKREFS(obj); return count; } @@ -33,54 +67,55 @@ init_weakref(PyWeakReference *self, PyObject *ob, PyObject *callback) self->wr_next = NULL; self->wr_callback = Py_XNewRef(callback); self->vectorcall = weakref_vectorcall; +#ifdef Py_GIL_DISABLED + self->weakrefs_lock = &WEAKREF_LIST_LOCK(ob); + _PyObject_SetMaybeWeakref(ob); + _PyObject_SetMaybeWeakref((PyObject *)self); +#endif } -static PyWeakReference * -new_weakref(PyObject *ob, PyObject *callback) -{ - PyWeakReference *result; - - result = PyObject_GC_New(PyWeakReference, &_PyWeakref_RefType); - if (result) { - init_weakref(result, ob, callback); - PyObject_GC_Track(result); - } - return result; -} - - -/* This function clears the passed-in reference and removes it from the - * list of weak references for the referent. This is the only code that - * removes an item from the doubly-linked list of weak references for an - * object; it is also responsible for clearing the callback slot. - */ +// Clear the weakref and steal its callback into `callback`, if provided. static void -clear_weakref(PyWeakReference *self) +clear_weakref_lock_held(PyWeakReference *self, PyObject **callback) { - PyObject *callback = self->wr_callback; - if (self->wr_object != Py_None) { PyWeakReference **list = GET_WEAKREFS_LISTPTR(self->wr_object); - - if (*list == self) - /* If 'self' is the end of the list (and thus self->wr_next == NULL) - then the weakref list itself (and thus the value of *list) will - end up being set to NULL. */ - *list = self->wr_next; - self->wr_object = Py_None; - if (self->wr_prev != NULL) + if (*list == self) { + /* If 'self' is the end of the list (and thus self->wr_next == + NULL) then the weakref list itself (and thus the value of *list) + will end up being set to NULL. */ + FT_ATOMIC_STORE_PTR(*list, self->wr_next); + } + FT_ATOMIC_STORE_PTR(self->wr_object, Py_None); + if (self->wr_prev != NULL) { self->wr_prev->wr_next = self->wr_next; - if (self->wr_next != NULL) + } + if (self->wr_next != NULL) { self->wr_next->wr_prev = self->wr_prev; + } self->wr_prev = NULL; self->wr_next = NULL; } if (callback != NULL) { - Py_DECREF(callback); + *callback = self->wr_callback; self->wr_callback = NULL; } } +// Clear the weakref and its callback +static void +clear_weakref(PyWeakReference *self) +{ + PyObject *callback = NULL; + // self->wr_object may be Py_None if the GC cleared the weakref, so lock + // using the pointer in the weakref. + LOCK_WEAKREFS_FOR_WR(self); + clear_weakref_lock_held(self, &callback); + UNLOCK_WEAKREFS_FOR_WR(self); + Py_XDECREF(callback); +} + + /* Cyclic gc uses this to *just* clear the passed-in reference, leaving * the callback intact and uncalled. It must be possible to call self's * tp_dealloc() after calling this, so self has to be left in a sane enough @@ -95,15 +130,9 @@ clear_weakref(PyWeakReference *self) void _PyWeakref_ClearRef(PyWeakReference *self) { - PyObject *callback; - assert(self != NULL); assert(PyWeakref_Check(self)); - /* Preserve and restore the callback around clear_weakref. */ - callback = self->wr_callback; - self->wr_callback = NULL; - clear_weakref(self); - self->wr_callback = callback; + clear_weakref_lock_held(self, NULL); } static void @@ -126,7 +155,11 @@ gc_traverse(PyWeakReference *self, visitproc visit, void *arg) static int gc_clear(PyWeakReference *self) { - clear_weakref(self); + PyObject *callback; + // The world is stopped during GC in free-threaded builds. It's safe to + // call this without holding the lock. + clear_weakref_lock_held(self, &callback); + Py_XDECREF(callback); return 0; } @@ -150,7 +183,7 @@ weakref_vectorcall(PyObject *self, PyObject *const *args, } static Py_hash_t -weakref_hash(PyWeakReference *self) +weakref_hash_lock_held(PyWeakReference *self) { if (self->hash != -1) return self->hash; @@ -164,6 +197,15 @@ weakref_hash(PyWeakReference *self) return self->hash; } +static Py_hash_t +weakref_hash(PyWeakReference *self) +{ + Py_hash_t hash; + Py_BEGIN_CRITICAL_SECTION(self); + hash = weakref_hash_lock_held(self); + Py_END_CRITICAL_SECTION(); + return hash; +} static PyObject * weakref_repr(PyObject *self) @@ -276,6 +318,128 @@ insert_head(PyWeakReference *newref, PyWeakReference **list) *list = newref; } +/* See if we can reuse either the basic ref or proxy in list instead of + * creating a new weakref + */ +static PyWeakReference * +try_reuse_basic_ref(PyWeakReference *list, PyTypeObject *type, + PyObject *callback) +{ + if (callback != NULL) { + return NULL; + } + + PyWeakReference *ref, *proxy; + get_basic_refs(list, &ref, &proxy); + + PyWeakReference *cand = NULL; + if (type == &_PyWeakref_RefType) { + cand = ref; + } + if ((type == &_PyWeakref_ProxyType) || + (type == &_PyWeakref_CallableProxyType)) { + cand = proxy; + } + + if (cand != NULL && _Py_TryIncref((PyObject *) cand)) { + return cand; + } + return NULL; +} + +static int +is_basic_ref(PyWeakReference *ref) +{ + return (ref->wr_callback == NULL) && PyWeakref_CheckRefExact(ref); +} + +static int +is_basic_proxy(PyWeakReference *proxy) +{ + return (proxy->wr_callback == NULL) && PyWeakref_CheckProxy(proxy); +} + +static int +is_basic_ref_or_proxy(PyWeakReference *wr) +{ + return is_basic_ref(wr) || is_basic_proxy(wr); +} + +/* Insert `newref` in the appropriate position in `list` */ +static void +insert_weakref(PyWeakReference *newref, PyWeakReference **list) +{ + PyWeakReference *ref, *proxy; + get_basic_refs(*list, &ref, &proxy); + + PyWeakReference *prev; + if (is_basic_ref(newref)) { + prev = NULL; + } + else if (is_basic_proxy(newref)) { + prev = ref; + } + else { + prev = (proxy == NULL) ? ref : proxy; + } + + if (prev == NULL) { + insert_head(newref, list); + } + else { + insert_after(newref, prev); + } +} + +static PyWeakReference * +allocate_weakref(PyTypeObject *type, PyObject *obj, PyObject *callback) +{ + PyWeakReference *newref = (PyWeakReference *) type->tp_alloc(type, 0); + if (newref == NULL) { + return NULL; + } + init_weakref(newref, obj, callback); + return newref; +} + +static PyWeakReference * +get_or_create_weakref(PyTypeObject *type, PyObject *obj, PyObject *callback) +{ + if (!_PyType_SUPPORTS_WEAKREFS(Py_TYPE(obj))) { + PyErr_Format(PyExc_TypeError, + "cannot create weak reference to '%s' object", + Py_TYPE(obj)->tp_name); + return NULL; + } + if (callback == Py_None) + callback = NULL; + + PyWeakReference **list = GET_WEAKREFS_LISTPTR(obj); + if ((type == &_PyWeakref_RefType) || + (type == &_PyWeakref_ProxyType) || + (type == &_PyWeakref_CallableProxyType)) + { + LOCK_WEAKREFS(obj); + PyWeakReference *basic_ref = try_reuse_basic_ref(*list, type, callback); + if (basic_ref != NULL) { + UNLOCK_WEAKREFS(obj); + return basic_ref; + } + PyWeakReference *newref = allocate_weakref(type, obj, callback); + insert_weakref(newref, list); + UNLOCK_WEAKREFS(obj); + return newref; + } + else { + // We may not be able to safely allocate inside the lock + PyWeakReference *newref = allocate_weakref(type, obj, callback); + LOCK_WEAKREFS(obj); + insert_weakref(newref, list); + UNLOCK_WEAKREFS(obj); + return newref; + } +} + static int parse_weakref_init_args(const char *funcname, PyObject *args, PyObject *kwargs, PyObject **obp, PyObject **callbackp) @@ -286,54 +450,11 @@ parse_weakref_init_args(const char *funcname, PyObject *args, PyObject *kwargs, static PyObject * weakref___new__(PyTypeObject *type, PyObject *args, PyObject *kwargs) { - PyWeakReference *self = NULL; PyObject *ob, *callback = NULL; - if (parse_weakref_init_args("__new__", args, kwargs, &ob, &callback)) { - PyWeakReference *ref, *proxy; - PyWeakReference **list; - - if (!_PyType_SUPPORTS_WEAKREFS(Py_TYPE(ob))) { - PyErr_Format(PyExc_TypeError, - "cannot create weak reference to '%s' object", - Py_TYPE(ob)->tp_name); - return NULL; - } - if (callback == Py_None) - callback = NULL; - list = GET_WEAKREFS_LISTPTR(ob); - get_basic_refs(*list, &ref, &proxy); - if (callback == NULL && type == &_PyWeakref_RefType) { - if (ref != NULL) { - /* We can re-use an existing reference. */ - return Py_NewRef(ref); - } - } - /* We have to create a new reference. */ - /* Note: the tp_alloc() can trigger cyclic GC, so the weakref - list on ob can be mutated. This means that the ref and - proxy pointers we got back earlier may have been collected, - so we need to compute these values again before we use - them. */ - self = (PyWeakReference *) (type->tp_alloc(type, 0)); - if (self != NULL) { - init_weakref(self, ob, callback); - if (callback == NULL && type == &_PyWeakref_RefType) { - insert_head(self, list); - } - else { - PyWeakReference *prev; - - get_basic_refs(*list, &ref, &proxy); - prev = (proxy == NULL) ? ref : proxy; - if (prev == NULL) - insert_head(self, list); - else - insert_after(self, prev); - } - } + return (PyObject *)get_or_create_weakref(type, ob, callback); } - return (PyObject *)self; + return NULL; } static int @@ -562,8 +683,6 @@ static void proxy_dealloc(PyWeakReference *self) { PyObject_GC_UnTrack(self); - if (self->wr_callback != NULL) - PyObject_GC_UnTrack((PyObject *)self); clear_weakref(self); PyObject_GC_Del(self); } @@ -784,104 +903,21 @@ _PyWeakref_CallableProxyType = { proxy_iternext, /* tp_iternext */ }; - - PyObject * PyWeakref_NewRef(PyObject *ob, PyObject *callback) { - PyWeakReference *result = NULL; - PyWeakReference **list; - PyWeakReference *ref, *proxy; - - if (!_PyType_SUPPORTS_WEAKREFS(Py_TYPE(ob))) { - PyErr_Format(PyExc_TypeError, - "cannot create weak reference to '%s' object", - Py_TYPE(ob)->tp_name); - return NULL; - } - list = GET_WEAKREFS_LISTPTR(ob); - get_basic_refs(*list, &ref, &proxy); - if (callback == Py_None) - callback = NULL; - if (callback == NULL) - /* return existing weak reference if it exists */ - result = ref; - if (result != NULL) - Py_INCREF(result); - else { - /* We do not need to recompute ref/proxy; new_weakref() cannot - trigger GC. - */ - result = new_weakref(ob, callback); - if (result != NULL) { - if (callback == NULL) { - assert(ref == NULL); - insert_head(result, list); - } - else { - PyWeakReference *prev; - - prev = (proxy == NULL) ? ref : proxy; - if (prev == NULL) - insert_head(result, list); - else - insert_after(result, prev); - } - } - } - return (PyObject *) result; + return (PyObject *)get_or_create_weakref(&_PyWeakref_RefType, ob, + callback); } - PyObject * PyWeakref_NewProxy(PyObject *ob, PyObject *callback) { - PyWeakReference *result = NULL; - PyWeakReference **list; - PyWeakReference *ref, *proxy; - - if (!_PyType_SUPPORTS_WEAKREFS(Py_TYPE(ob))) { - PyErr_Format(PyExc_TypeError, - "cannot create weak reference to '%s' object", - Py_TYPE(ob)->tp_name); - return NULL; + PyTypeObject *type = &_PyWeakref_ProxyType; + if (PyCallable_Check(ob)) { + type = &_PyWeakref_CallableProxyType; } - list = GET_WEAKREFS_LISTPTR(ob); - get_basic_refs(*list, &ref, &proxy); - if (callback == Py_None) - callback = NULL; - if (callback == NULL) - /* attempt to return an existing weak reference if it exists */ - result = proxy; - if (result != NULL) - Py_INCREF(result); - else { - /* We do not need to recompute ref/proxy; new_weakref cannot - trigger GC. - */ - result = new_weakref(ob, callback); - if (result != NULL) { - PyWeakReference *prev; - - if (PyCallable_Check(ob)) { - Py_SET_TYPE(result, &_PyWeakref_CallableProxyType); - } - else { - Py_SET_TYPE(result, &_PyWeakref_ProxyType); - } - if (callback == NULL) { - prev = ref; - } - else - prev = (proxy == NULL) ? ref : proxy; - - if (prev == NULL) - insert_head(result, list); - else - insert_after(result, prev); - } - } - return (PyObject *) result; + return (PyObject *)get_or_create_weakref(type, ob, callback); } @@ -950,68 +986,73 @@ PyObject_ClearWeakRefs(PyObject *object) PyErr_BadInternalCall(); return; } + list = GET_WEAKREFS_LISTPTR(object); - /* Remove the callback-less basic and proxy references */ - if (*list != NULL && (*list)->wr_callback == NULL) { - clear_weakref(*list); - if (*list != NULL && (*list)->wr_callback == NULL) - clear_weakref(*list); + if (FT_ATOMIC_LOAD_PTR(list) == NULL) { + // Fast path for the common case + return; } - if (*list != NULL) { - PyWeakReference *current = *list; - Py_ssize_t count = _PyWeakref_GetWeakrefCount(current); - PyObject *exc = PyErr_GetRaisedException(); - if (count == 1) { - PyObject *callback = current->wr_callback; + /* Remove the callback-less basic and proxy references, which always appear + at the head of the list. + */ + for (int done = 0; !done;) { + LOCK_WEAKREFS(object); + if (*list != NULL && is_basic_ref_or_proxy(*list)) { + PyObject *callback; + clear_weakref_lock_held(*list, &callback); + assert(callback == NULL); + } + done = (*list == NULL) || !is_basic_ref_or_proxy(*list); + UNLOCK_WEAKREFS(object); + } - current->wr_callback = NULL; - clear_weakref(current); - if (callback != NULL) { - if (Py_REFCNT((PyObject *)current) > 0) { - handle_callback(current, callback); - } - Py_DECREF(callback); + /* Deal with non-canonical (subtypes or refs with callbacks) references. */ + Py_ssize_t num_weakrefs = _PyWeakref_GetWeakrefCount(object); + if (num_weakrefs == 0) { + return; + } + + PyObject *exc = PyErr_GetRaisedException(); + PyObject *tuple = PyTuple_New(num_weakrefs * 2); + if (tuple == NULL) { + _PyErr_ChainExceptions1(exc); + return; + } + + Py_ssize_t num_items = 0; + for (int done = 0; !done;) { + PyObject *callback = NULL; + LOCK_WEAKREFS(object); + PyWeakReference *cur = *list; + if (cur != NULL) { + clear_weakref_lock_held(cur, &callback); + if (_Py_TryIncref((PyObject *) cur)) { + assert(num_items / 2 < num_weakrefs); + PyTuple_SET_ITEM(tuple, num_items, (PyObject *) cur); + PyTuple_SET_ITEM(tuple, num_items + 1, callback); + num_items += 2; + callback = NULL; } } - else { - PyObject *tuple; - Py_ssize_t i = 0; + done = (*list == NULL); + UNLOCK_WEAKREFS(object); - tuple = PyTuple_New(count * 2); - if (tuple == NULL) { - _PyErr_ChainExceptions1(exc); - return; - } - - for (i = 0; i < count; ++i) { - PyWeakReference *next = current->wr_next; - - if (Py_REFCNT((PyObject *)current) > 0) { - PyTuple_SET_ITEM(tuple, i * 2, Py_NewRef(current)); - PyTuple_SET_ITEM(tuple, i * 2 + 1, current->wr_callback); - } - else { - Py_DECREF(current->wr_callback); - } - current->wr_callback = NULL; - clear_weakref(current); - current = next; - } - for (i = 0; i < count; ++i) { - PyObject *callback = PyTuple_GET_ITEM(tuple, i * 2 + 1); - - /* The tuple may have slots left to NULL */ - if (callback != NULL) { - PyObject *item = PyTuple_GET_ITEM(tuple, i * 2); - handle_callback((PyWeakReference *)item, callback); - } - } - Py_DECREF(tuple); - } - assert(!PyErr_Occurred()); - PyErr_SetRaisedException(exc); + Py_XDECREF(callback); } + + for (Py_ssize_t i = 0; i < num_items; i += 2) { + PyObject *callback = PyTuple_GET_ITEM(tuple, i + 1); + if (callback != NULL) { + PyObject *weakref = PyTuple_GET_ITEM(tuple, i); + handle_callback((PyWeakReference *)weakref, callback); + } + } + + Py_DECREF(tuple); + + assert(!PyErr_Occurred()); + PyErr_SetRaisedException(exc); } /* This function is called by _PyStaticType_Dealloc() to clear weak references. @@ -1025,10 +1066,30 @@ _PyStaticType_ClearWeakRefs(PyInterpreterState *interp, PyTypeObject *type) { static_builtin_state *state = _PyStaticType_GetState(interp, type); PyObject **list = _PyStaticType_GET_WEAKREFS_LISTPTR(state); - while (*list != NULL) { - /* Note that clear_weakref() pops the first ref off the type's - weaklist before clearing its wr_object and wr_callback. - That is how we're able to loop over the list. */ - clear_weakref((PyWeakReference *)*list); + // This is safe to do without holding the lock in free-threaded builds; + // there is only one thread running and no new threads can be created. + while (*list) { + _PyWeakref_ClearRef((PyWeakReference *)*list); } } + +void +_PyWeakref_ClearWeakRefsExceptCallbacks(PyObject *obj) +{ + /* Modeled after GET_WEAKREFS_LISTPTR(). + + This is never triggered for static types so we can avoid the + (slightly) more costly _PyObject_GET_WEAKREFS_LISTPTR(). */ + PyWeakReference **list = _PyObject_GET_WEAKREFS_LISTPTR_FROM_OFFSET(obj); + LOCK_WEAKREFS(obj); + while (*list) { + _PyWeakref_ClearRef(*list); + } + UNLOCK_WEAKREFS(obj); +} + +int +_PyWeakref_IsDead(PyObject *weakref) +{ + return _PyWeakref_IS_DEAD(weakref); +} diff --git a/Python/pystate.c b/Python/pystate.c index 892e740493c..cee481c564b 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -506,6 +506,15 @@ _PyRuntimeState_ReInitThreads(_PyRuntimeState *runtime) for (size_t i = 0; i < Py_ARRAY_LENGTH(locks); i++) { _PyMutex_at_fork_reinit(locks[i]); } +#ifdef Py_GIL_DISABLED + for (PyInterpreterState *interp = runtime->interpreters.head; + interp != NULL; interp = interp->next) + { + for (int i = 0; i < NUM_WEAKREF_LIST_LOCKS; i++) { + _PyMutex_at_fork_reinit(&interp->weakref_locks[i]); + } + } +#endif _PyTypes_AfterFork();