diff --git a/Include/dictobject.h b/Include/dictobject.h index ba90aaf676c..e0e1c26af00 100644 --- a/Include/dictobject.h +++ b/Include/dictobject.h @@ -75,6 +75,8 @@ PyAPI_FUNC(int) PyDict_DelItem(PyObject *mp, PyObject *key); #ifndef Py_LIMITED_API PyAPI_FUNC(int) _PyDict_DelItem_KnownHash(PyObject *mp, PyObject *key, Py_hash_t hash); +PyAPI_FUNC(int) _PyDict_DelItemIf(PyObject *mp, PyObject *key, + int (*predicate)(PyObject *value)); #endif PyAPI_FUNC(void) PyDict_Clear(PyObject *mp); PyAPI_FUNC(int) PyDict_Next( diff --git a/Lib/test/test_weakref.py b/Lib/test/test_weakref.py index 28a1cc2bfa9..1aa354019d8 100644 --- a/Lib/test/test_weakref.py +++ b/Lib/test/test_weakref.py @@ -1673,6 +1673,18 @@ class MappingTestCase(TestBase): x = d.pop(10, 10) self.assertIsNot(x, None) # we never put None in there! + def test_threaded_weak_valued_consistency(self): + # Issue #28427: old keys should not remove new values from + # WeakValueDictionary when collecting from another thread. + d = weakref.WeakValueDictionary() + with collect_in_thread(): + for i in range(200000): + o = RefCycle() + d[10] = o + # o is still alive, so the dict can't be empty + self.assertEqual(len(d), 1) + o = None # lose ref + from test import mapping_tests diff --git a/Lib/weakref.py b/Lib/weakref.py index 9f8ef3eb8c6..aaebd0c464a 100644 --- a/Lib/weakref.py +++ b/Lib/weakref.py @@ -16,7 +16,8 @@ from _weakref import ( proxy, CallableProxyType, ProxyType, - ReferenceType) + ReferenceType, + _remove_dead_weakref) from _weakrefset import WeakSet, _IterationGuard @@ -111,7 +112,9 @@ class WeakValueDictionary(collections.MutableMapping): if self._iterating: self._pending_removals.append(wr.key) else: - del self.data[wr.key] + # Atomic removal is necessary since this function + # can be called asynchronously by the GC + _remove_dead_weakref(d, wr.key) self._remove = remove # A list of keys to be removed self._pending_removals = [] @@ -125,9 +128,12 @@ class WeakValueDictionary(collections.MutableMapping): # We shouldn't encounter any KeyError, because this method should # always be called *before* mutating the dict. while l: - del d[l.pop()] + key = l.pop() + _remove_dead_weakref(d, key) def __getitem__(self, key): + if self._pending_removals: + self._commit_removals() o = self.data[key]() if o is None: raise KeyError(key) @@ -140,9 +146,13 @@ class WeakValueDictionary(collections.MutableMapping): del self.data[key] def __len__(self): - return len(self.data) - len(self._pending_removals) + if self._pending_removals: + self._commit_removals() + return len(self.data) def __contains__(self, key): + if self._pending_removals: + self._commit_removals() try: o = self.data[key]() except KeyError: @@ -158,6 +168,8 @@ class WeakValueDictionary(collections.MutableMapping): self.data[key] = KeyedRef(value, self._remove, key) def copy(self): + if self._pending_removals: + self._commit_removals() new = WeakValueDictionary() for key, wr in self.data.items(): o = wr() @@ -169,6 +181,8 @@ class WeakValueDictionary(collections.MutableMapping): def __deepcopy__(self, memo): from copy import deepcopy + if self._pending_removals: + self._commit_removals() new = self.__class__() for key, wr in self.data.items(): o = wr() @@ -177,6 +191,8 @@ class WeakValueDictionary(collections.MutableMapping): return new def get(self, key, default=None): + if self._pending_removals: + self._commit_removals() try: wr = self.data[key] except KeyError: @@ -190,6 +206,8 @@ class WeakValueDictionary(collections.MutableMapping): return o def items(self): + if self._pending_removals: + self._commit_removals() with _IterationGuard(self): for k, wr in self.data.items(): v = wr() @@ -197,6 +215,8 @@ class WeakValueDictionary(collections.MutableMapping): yield k, v def keys(self): + if self._pending_removals: + self._commit_removals() with _IterationGuard(self): for k, wr in self.data.items(): if wr() is not None: @@ -214,10 +234,14 @@ class WeakValueDictionary(collections.MutableMapping): keep the values around longer than needed. """ + if self._pending_removals: + self._commit_removals() with _IterationGuard(self): yield from self.data.values() def values(self): + if self._pending_removals: + self._commit_removals() with _IterationGuard(self): for wr in self.data.values(): obj = wr() @@ -290,6 +314,8 @@ class WeakValueDictionary(collections.MutableMapping): keep the values around longer than needed. """ + if self._pending_removals: + self._commit_removals() return list(self.data.values()) diff --git a/Misc/NEWS b/Misc/NEWS index 65d2aee36dc..d11136d78e8 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -138,6 +138,9 @@ Core and Builtins Library ------- +- Issue #28427: old keys should not remove new values from + WeakValueDictionary when collecting from another thread. + - Issue 28923: Remove editor artifacts from Tix.py. - Issue #28871: Fixed a crash when deallocate deep ElementTree. diff --git a/Modules/_weakref.c b/Modules/_weakref.c index 805d6d09857..f9c68d6a640 100644 --- a/Modules/_weakref.c +++ b/Modules/_weakref.c @@ -35,6 +35,46 @@ _weakref_getweakrefcount_impl(PyObject *module, PyObject *object) } +static int +is_dead_weakref(PyObject *value) +{ + if (!PyWeakref_Check(value)) { + PyErr_SetString(PyExc_TypeError, "not a weakref"); + return -1; + } + return PyWeakref_GET_OBJECT(value) == Py_None; +} + +/*[clinic input] + +_weakref._remove_dead_weakref -> object + + dct: object(subclass_of='&PyDict_Type') + key: object + / + +Atomically remove key from dict if it points to a dead weakref. +[clinic start generated code]*/ + +static PyObject * +_weakref__remove_dead_weakref_impl(PyObject *module, PyObject *dct, + PyObject *key) +/*[clinic end generated code: output=d9ff53061fcb875c input=19fc91f257f96a1d]*/ +{ + if (_PyDict_DelItemIf(dct, key, is_dead_weakref) < 0) { + if (PyErr_ExceptionMatches(PyExc_KeyError)) + /* This function is meant to allow safe weak-value dicts + with GC in another thread (see issue #28427), so it's + ok if the key doesn't exist anymore. + */ + PyErr_Clear(); + else + return NULL; + } + Py_RETURN_NONE; +} + + PyDoc_STRVAR(weakref_getweakrefs__doc__, "getweakrefs(object) -- return a list of all weak reference objects\n" "that point to 'object'."); @@ -88,6 +128,7 @@ weakref_proxy(PyObject *self, PyObject *args) static PyMethodDef weakref_functions[] = { _WEAKREF_GETWEAKREFCOUNT_METHODDEF + _WEAKREF__REMOVE_DEAD_WEAKREF_METHODDEF {"getweakrefs", weakref_getweakrefs, METH_O, weakref_getweakrefs__doc__}, {"proxy", weakref_proxy, METH_VARARGS, diff --git a/Modules/clinic/_weakref.c.h b/Modules/clinic/_weakref.c.h index b83b33e4a18..2d93679e123 100644 --- a/Modules/clinic/_weakref.c.h +++ b/Modules/clinic/_weakref.c.h @@ -28,4 +28,33 @@ _weakref_getweakrefcount(PyObject *module, PyObject *object) exit: return return_value; } -/*[clinic end generated code: output=d9086c8576d46933 input=a9049054013a1b77]*/ + +PyDoc_STRVAR(_weakref__remove_dead_weakref__doc__, +"_remove_dead_weakref($module, dct, key, /)\n" +"--\n" +"\n" +"Atomically remove key from dict if it points to a dead weakref."); + +#define _WEAKREF__REMOVE_DEAD_WEAKREF_METHODDEF \ + {"_remove_dead_weakref", (PyCFunction)_weakref__remove_dead_weakref, METH_VARARGS, _weakref__remove_dead_weakref__doc__}, + +static PyObject * +_weakref__remove_dead_weakref_impl(PyObject *module, PyObject *dct, + PyObject *key); + +static PyObject * +_weakref__remove_dead_weakref(PyObject *module, PyObject *args) +{ + PyObject *return_value = NULL; + PyObject *dct; + PyObject *key; + + if (!PyArg_ParseTuple(args, "O!O:_remove_dead_weakref", + &PyDict_Type, &dct, &key)) + goto exit; + return_value = _weakref__remove_dead_weakref_impl(module, dct, key); + +exit: + return return_value; +} +/*[clinic end generated code: output=5764cb64a6f66ffd input=a9049054013a1b77]*/ diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 795c1f6de15..747d2189285 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -1246,13 +1246,31 @@ _PyDict_SetItem_KnownHash(PyObject *op, PyObject *key, PyObject *value, return insertdict(mp, key, hash, value); } +static int +delitem_common(PyDictObject *mp, PyDictKeyEntry *ep, PyObject **value_addr) +{ + PyObject *old_key, *old_value; + + old_value = *value_addr; + *value_addr = NULL; + mp->ma_used--; + if (!_PyDict_HasSplitTable(mp)) { + ENSURE_ALLOWS_DELETIONS(mp); + old_key = ep->me_key; + Py_INCREF(dummy); + ep->me_key = dummy; + Py_DECREF(old_key); + } + Py_DECREF(old_value); + return 0; +} + int PyDict_DelItem(PyObject *op, PyObject *key) { PyDictObject *mp; Py_hash_t hash; PyDictKeyEntry *ep; - PyObject *old_key, *old_value; PyObject **value_addr; if (!PyDict_Check(op)) { @@ -1274,18 +1292,7 @@ PyDict_DelItem(PyObject *op, PyObject *key) _PyErr_SetKeyError(key); return -1; } - old_value = *value_addr; - *value_addr = NULL; - mp->ma_used--; - if (!_PyDict_HasSplitTable(mp)) { - ENSURE_ALLOWS_DELETIONS(mp); - old_key = ep->me_key; - Py_INCREF(dummy); - ep->me_key = dummy; - Py_DECREF(old_key); - } - Py_DECREF(old_value); - return 0; + return delitem_common(mp, ep, value_addr); } int @@ -1293,7 +1300,6 @@ _PyDict_DelItem_KnownHash(PyObject *op, PyObject *key, Py_hash_t hash) { PyDictObject *mp; PyDictKeyEntry *ep; - PyObject *old_key, *old_value; PyObject **value_addr; if (!PyDict_Check(op)) { @@ -1310,20 +1316,45 @@ _PyDict_DelItem_KnownHash(PyObject *op, PyObject *key, Py_hash_t hash) _PyErr_SetKeyError(key); return -1; } - old_value = *value_addr; - *value_addr = NULL; - mp->ma_used--; - if (!_PyDict_HasSplitTable(mp)) { - ENSURE_ALLOWS_DELETIONS(mp); - old_key = ep->me_key; - Py_INCREF(dummy); - ep->me_key = dummy; - Py_DECREF(old_key); - } - Py_DECREF(old_value); - return 0; + return delitem_common(mp, ep, value_addr); } +int +_PyDict_DelItemIf(PyObject *op, PyObject *key, + int (*predicate)(PyObject *value)) +{ + PyDictObject *mp; + Py_hash_t hash; + PyDictKeyEntry *ep; + PyObject **value_addr; + int res; + + if (!PyDict_Check(op)) { + PyErr_BadInternalCall(); + return -1; + } + assert(key); + hash = PyObject_Hash(key); + if (hash == -1) + return -1; + mp = (PyDictObject *)op; + ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr); + if (ep == NULL) + return -1; + if (*value_addr == NULL) { + _PyErr_SetKeyError(key); + return -1; + } + res = predicate(*value_addr); + if (res == -1) + return -1; + if (res > 0) + return delitem_common(mp, ep, value_addr); + else + return 0; +} + + void PyDict_Clear(PyObject *op) {