gh-112075: Fix dict thread safety issues (#119288)

Fix dict thread safety issues
This commit is contained in:
Germán Méndez Bravo 2024-05-24 10:39:06 -07:00 committed by GitHub
parent bf5b6467f8
commit f0ed1863bd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 41 additions and 25 deletions

View File

@ -154,6 +154,11 @@ ASSERT_DICT_LOCKED(PyObject *op)
_Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(op); _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(op);
} }
#define ASSERT_DICT_LOCKED(op) ASSERT_DICT_LOCKED(_Py_CAST(PyObject*, op)) #define ASSERT_DICT_LOCKED(op) ASSERT_DICT_LOCKED(_Py_CAST(PyObject*, op))
#define ASSERT_WORLD_STOPPED_OR_DICT_LOCKED(op) \
if (!_PyInterpreterState_GET()->stoptheworld.world_stopped) { \
ASSERT_DICT_LOCKED(op); \
}
#define IS_DICT_SHARED(mp) _PyObject_GC_IS_SHARED(mp) #define IS_DICT_SHARED(mp) _PyObject_GC_IS_SHARED(mp)
#define SET_DICT_SHARED(mp) _PyObject_GC_SET_SHARED(mp) #define SET_DICT_SHARED(mp) _PyObject_GC_SET_SHARED(mp)
#define LOAD_INDEX(keys, size, idx) _Py_atomic_load_int##size##_relaxed(&((const int##size##_t*)keys->dk_indices)[idx]); #define LOAD_INDEX(keys, size, idx) _Py_atomic_load_int##size##_relaxed(&((const int##size##_t*)keys->dk_indices)[idx]);
@ -221,6 +226,7 @@ static inline void split_keys_entry_added(PyDictKeysObject *keys)
#else /* Py_GIL_DISABLED */ #else /* Py_GIL_DISABLED */
#define ASSERT_DICT_LOCKED(op) #define ASSERT_DICT_LOCKED(op)
#define ASSERT_WORLD_STOPPED_OR_DICT_LOCKED(op)
#define LOCK_KEYS(keys) #define LOCK_KEYS(keys)
#define UNLOCK_KEYS(keys) #define UNLOCK_KEYS(keys)
#define ASSERT_KEYS_LOCKED(keys) #define ASSERT_KEYS_LOCKED(keys)
@ -473,7 +479,7 @@ dictkeys_decref(PyInterpreterState *interp, PyDictKeysObject *dk, bool use_qsbr)
if (FT_ATOMIC_LOAD_SSIZE_RELAXED(dk->dk_refcnt) == _Py_IMMORTAL_REFCNT) { if (FT_ATOMIC_LOAD_SSIZE_RELAXED(dk->dk_refcnt) == _Py_IMMORTAL_REFCNT) {
return; return;
} }
assert(dk->dk_refcnt > 0); assert(FT_ATOMIC_LOAD_SSIZE(dk->dk_refcnt) > 0);
#ifdef Py_REF_DEBUG #ifdef Py_REF_DEBUG
_Py_DecRefTotal(_PyThreadState_GET()); _Py_DecRefTotal(_PyThreadState_GET());
#endif #endif
@ -670,6 +676,8 @@ dump_entries(PyDictKeysObject *dk)
int int
_PyDict_CheckConsistency(PyObject *op, int check_content) _PyDict_CheckConsistency(PyObject *op, int check_content)
{ {
ASSERT_WORLD_STOPPED_OR_DICT_LOCKED(op);
#define CHECK(expr) \ #define CHECK(expr) \
do { if (!(expr)) { _PyObject_ASSERT_FAILED_MSG(op, Py_STRINGIFY(expr)); } } while (0) do { if (!(expr)) { _PyObject_ASSERT_FAILED_MSG(op, Py_STRINGIFY(expr)); } } while (0)
@ -1580,6 +1588,8 @@ _PyDict_MaybeUntrack(PyObject *op)
PyObject *value; PyObject *value;
Py_ssize_t i, numentries; Py_ssize_t i, numentries;
ASSERT_WORLD_STOPPED_OR_DICT_LOCKED(op);
if (!PyDict_CheckExact(op) || !_PyObject_GC_IS_TRACKED(op)) if (!PyDict_CheckExact(op) || !_PyObject_GC_IS_TRACKED(op))
return; return;
@ -1722,13 +1732,14 @@ static void
insert_split_value(PyInterpreterState *interp, PyDictObject *mp, PyObject *key, PyObject *value, Py_ssize_t ix) insert_split_value(PyInterpreterState *interp, PyDictObject *mp, PyObject *key, PyObject *value, Py_ssize_t ix)
{ {
assert(PyUnicode_CheckExact(key)); assert(PyUnicode_CheckExact(key));
ASSERT_DICT_LOCKED(mp);
MAINTAIN_TRACKING(mp, key, value); MAINTAIN_TRACKING(mp, key, value);
PyObject *old_value = mp->ma_values->values[ix]; PyObject *old_value = mp->ma_values->values[ix];
if (old_value == NULL) { if (old_value == NULL) {
uint64_t new_version = _PyDict_NotifyEvent(interp, PyDict_EVENT_ADDED, mp, key, value); uint64_t new_version = _PyDict_NotifyEvent(interp, PyDict_EVENT_ADDED, mp, key, value);
STORE_SPLIT_VALUE(mp, ix, Py_NewRef(value)); STORE_SPLIT_VALUE(mp, ix, Py_NewRef(value));
_PyDictValues_AddToInsertionOrder(mp->ma_values, ix); _PyDictValues_AddToInsertionOrder(mp->ma_values, ix);
mp->ma_used++; STORE_USED(mp, mp->ma_used + 1);
mp->ma_version_tag = new_version; mp->ma_version_tag = new_version;
} }
else { else {
@ -1792,7 +1803,7 @@ insertdict(PyInterpreterState *interp, PyDictObject *mp,
goto Fail; goto Fail;
} }
mp->ma_version_tag = new_version; mp->ma_version_tag = new_version;
mp->ma_used++; STORE_USED(mp, mp->ma_used + 1);
ASSERT_CONSISTENT(mp); ASSERT_CONSISTENT(mp);
return 0; return 0;
} }
@ -1861,7 +1872,7 @@ insert_to_emptydict(PyInterpreterState *interp, PyDictObject *mp,
ep->me_hash = hash; ep->me_hash = hash;
STORE_VALUE(ep, value); STORE_VALUE(ep, value);
} }
FT_ATOMIC_STORE_SSIZE_RELAXED(mp->ma_used, FT_ATOMIC_LOAD_SSIZE_RELAXED(mp->ma_used) + 1); STORE_USED(mp, mp->ma_used + 1);
mp->ma_version_tag = new_version; mp->ma_version_tag = new_version;
newkeys->dk_usable--; newkeys->dk_usable--;
newkeys->dk_nentries++; newkeys->dk_nentries++;
@ -1870,11 +1881,7 @@ insert_to_emptydict(PyInterpreterState *interp, PyDictObject *mp,
// the case where we're inserting from the non-owner thread. We don't use // the case where we're inserting from the non-owner thread. We don't use
// set_keys here because the transition from empty to non-empty is safe // set_keys here because the transition from empty to non-empty is safe
// as the empty keys will never be freed. // as the empty keys will never be freed.
#ifdef Py_GIL_DISABLED FT_ATOMIC_STORE_PTR_RELEASE(mp->ma_keys, newkeys);
_Py_atomic_store_ptr_release(&mp->ma_keys, newkeys);
#else
mp->ma_keys = newkeys;
#endif
return 0; return 0;
} }
@ -2580,7 +2587,7 @@ delitem_common(PyDictObject *mp, Py_hash_t hash, Py_ssize_t ix,
Py_ssize_t hashpos = lookdict_index(mp->ma_keys, hash, ix); Py_ssize_t hashpos = lookdict_index(mp->ma_keys, hash, ix);
assert(hashpos >= 0); assert(hashpos >= 0);
FT_ATOMIC_STORE_SSIZE_RELAXED(mp->ma_used, FT_ATOMIC_LOAD_SSIZE(mp->ma_used) - 1); STORE_USED(mp, mp->ma_used - 1);
mp->ma_version_tag = new_version; mp->ma_version_tag = new_version;
if (_PyDict_HasSplitTable(mp)) { if (_PyDict_HasSplitTable(mp)) {
assert(old_value == mp->ma_values->values[ix]); assert(old_value == mp->ma_values->values[ix]);
@ -2752,7 +2759,7 @@ clear_lock_held(PyObject *op)
// We don't inc ref empty keys because they're immortal // We don't inc ref empty keys because they're immortal
ensure_shared_on_resize(mp); ensure_shared_on_resize(mp);
mp->ma_version_tag = new_version; mp->ma_version_tag = new_version;
mp->ma_used = 0; STORE_USED(mp, 0);
if (oldvalues == NULL) { if (oldvalues == NULL) {
set_keys(mp, Py_EMPTY_KEYS); set_keys(mp, Py_EMPTY_KEYS);
assert(oldkeys->dk_refcnt == 1); assert(oldkeys->dk_refcnt == 1);
@ -3191,6 +3198,8 @@ dict_repr_lock_held(PyObject *self)
_PyUnicodeWriter writer; _PyUnicodeWriter writer;
int first; int first;
ASSERT_DICT_LOCKED(mp);
i = Py_ReprEnter((PyObject *)mp); i = Py_ReprEnter((PyObject *)mp);
if (i != 0) { if (i != 0) {
return i > 0 ? PyUnicode_FromString("{...}") : NULL; return i > 0 ? PyUnicode_FromString("{...}") : NULL;
@ -3279,8 +3288,7 @@ dict_repr(PyObject *self)
static Py_ssize_t static Py_ssize_t
dict_length(PyObject *self) dict_length(PyObject *self)
{ {
PyDictObject *mp = (PyDictObject *)self; return FT_ATOMIC_LOAD_SSIZE_RELAXED(((PyDictObject *)self)->ma_used);
return _Py_atomic_load_ssize_relaxed(&mp->ma_used);
} }
static PyObject * static PyObject *
@ -3672,6 +3680,9 @@ PyDict_MergeFromSeq2(PyObject *d, PyObject *seq2, int override)
static int static int
dict_dict_merge(PyInterpreterState *interp, PyDictObject *mp, PyDictObject *other, int override) dict_dict_merge(PyInterpreterState *interp, PyDictObject *mp, PyDictObject *other, int override)
{ {
ASSERT_DICT_LOCKED(mp);
ASSERT_DICT_LOCKED(other);
if (other == mp || other->ma_used == 0) if (other == mp || other->ma_used == 0)
/* a.update(a) or a.update({}); nothing to do */ /* a.update(a) or a.update({}); nothing to do */
return 0; return 0;
@ -3699,7 +3710,7 @@ dict_dict_merge(PyInterpreterState *interp, PyDictObject *mp, PyDictObject *othe
ensure_shared_on_resize(mp); ensure_shared_on_resize(mp);
dictkeys_decref(interp, mp->ma_keys, IS_DICT_SHARED(mp)); dictkeys_decref(interp, mp->ma_keys, IS_DICT_SHARED(mp));
mp->ma_keys = keys; mp->ma_keys = keys;
mp->ma_used = other->ma_used; STORE_USED(mp, other->ma_used);
mp->ma_version_tag = new_version; mp->ma_version_tag = new_version;
ASSERT_CONSISTENT(mp); ASSERT_CONSISTENT(mp);
@ -4034,7 +4045,7 @@ PyDict_Size(PyObject *mp)
PyErr_BadInternalCall(); PyErr_BadInternalCall();
return -1; return -1;
} }
return ((PyDictObject *)mp)->ma_used; return FT_ATOMIC_LOAD_SSIZE_RELAXED(((PyDictObject *)mp)->ma_used);
} }
/* Return 1 if dicts equal, 0 if not, -1 if error. /* Return 1 if dicts equal, 0 if not, -1 if error.
@ -4291,7 +4302,7 @@ dict_setdefault_ref_lock_held(PyObject *d, PyObject *key, PyObject *default_valu
} }
MAINTAIN_TRACKING(mp, key, value); MAINTAIN_TRACKING(mp, key, value);
mp->ma_used++; STORE_USED(mp, mp->ma_used + 1);
mp->ma_version_tag = new_version; mp->ma_version_tag = new_version;
assert(mp->ma_keys->dk_usable >= 0); assert(mp->ma_keys->dk_usable >= 0);
ASSERT_CONSISTENT(mp); ASSERT_CONSISTENT(mp);
@ -4413,6 +4424,8 @@ dict_popitem_impl(PyDictObject *self)
uint64_t new_version; uint64_t new_version;
PyInterpreterState *interp = _PyInterpreterState_GET(); PyInterpreterState *interp = _PyInterpreterState_GET();
ASSERT_DICT_LOCKED(self);
/* Allocate the result tuple before checking the size. Believe it /* Allocate the result tuple before checking the size. Believe it
* or not, this allocation could trigger a garbage collection which * or not, this allocation could trigger a garbage collection which
* could empty the dict, so if we checked the size first and that * could empty the dict, so if we checked the size first and that
@ -4952,19 +4965,21 @@ typedef struct {
static PyObject * static PyObject *
dictiter_new(PyDictObject *dict, PyTypeObject *itertype) dictiter_new(PyDictObject *dict, PyTypeObject *itertype)
{ {
Py_ssize_t used;
dictiterobject *di; dictiterobject *di;
di = PyObject_GC_New(dictiterobject, itertype); di = PyObject_GC_New(dictiterobject, itertype);
if (di == NULL) { if (di == NULL) {
return NULL; return NULL;
} }
di->di_dict = (PyDictObject*)Py_NewRef(dict); di->di_dict = (PyDictObject*)Py_NewRef(dict);
di->di_used = dict->ma_used; used = FT_ATOMIC_LOAD_SSIZE_RELAXED(dict->ma_used);
di->len = dict->ma_used; di->di_used = used;
di->len = used;
if (itertype == &PyDictRevIterKey_Type || if (itertype == &PyDictRevIterKey_Type ||
itertype == &PyDictRevIterItem_Type || itertype == &PyDictRevIterItem_Type ||
itertype == &PyDictRevIterValue_Type) { itertype == &PyDictRevIterValue_Type) {
if (_PyDict_HasSplitTable(dict)) { if (_PyDict_HasSplitTable(dict)) {
di->di_pos = dict->ma_used - 1; di->di_pos = used - 1;
} }
else { else {
di->di_pos = load_keys_nentries(dict) - 1; di->di_pos = load_keys_nentries(dict) - 1;
@ -5013,8 +5028,8 @@ dictiter_len(PyObject *self, PyObject *Py_UNUSED(ignored))
{ {
dictiterobject *di = (dictiterobject *)self; dictiterobject *di = (dictiterobject *)self;
Py_ssize_t len = 0; Py_ssize_t len = 0;
if (di->di_dict != NULL && di->di_used == di->di_dict->ma_used) if (di->di_dict != NULL && di->di_used == FT_ATOMIC_LOAD_SSIZE_RELAXED(di->di_dict->ma_used))
len = di->len; len = FT_ATOMIC_LOAD_SSIZE_RELAXED(di->len);
return PyLong_FromSize_t(len); return PyLong_FromSize_t(len);
} }
@ -5297,6 +5312,7 @@ dictiter_iternextitem_lock_held(PyDictObject *d, PyObject *self,
Py_ssize_t i; Py_ssize_t i;
assert (PyDict_Check(d)); assert (PyDict_Check(d));
ASSERT_DICT_LOCKED(d);
if (di->di_used != d->ma_used) { if (di->di_used != d->ma_used) {
PyErr_SetString(PyExc_RuntimeError, PyErr_SetString(PyExc_RuntimeError,
@ -5811,7 +5827,7 @@ dictview_len(PyObject *self)
_PyDictViewObject *dv = (_PyDictViewObject *)self; _PyDictViewObject *dv = (_PyDictViewObject *)self;
Py_ssize_t len = 0; Py_ssize_t len = 0;
if (dv->dv_dict != NULL) if (dv->dv_dict != NULL)
len = dv->dv_dict->ma_used; len = FT_ATOMIC_LOAD_SSIZE_RELAXED(dv->dv_dict->ma_used);
return len; return len;
} }
@ -6820,7 +6836,7 @@ store_instance_attr_lock_held(PyObject *obj, PyDictValues *values,
_PyDictValues_AddToInsertionOrder(values, ix); _PyDictValues_AddToInsertionOrder(values, ix);
if (dict) { if (dict) {
assert(dict->ma_values == values); assert(dict->ma_values == values);
dict->ma_used++; STORE_USED(dict, dict->ma_used + 1);
} }
} }
else { else {
@ -6828,7 +6844,7 @@ store_instance_attr_lock_held(PyObject *obj, PyDictValues *values,
delete_index_from_values(values, ix); delete_index_from_values(values, ix);
if (dict) { if (dict) {
assert(dict->ma_values == values); assert(dict->ma_values == values);
dict->ma_used--; STORE_USED(dict, dict->ma_used - 1);
} }
} }
Py_DECREF(old_value); Py_DECREF(old_value);
@ -7039,7 +7055,7 @@ _PyObject_IsInstanceDictEmpty(PyObject *obj)
if (dict == NULL) { if (dict == NULL) {
return 1; return 1;
} }
return ((PyDictObject *)dict)->ma_used == 0; return FT_ATOMIC_LOAD_SSIZE_RELAXED(((PyDictObject *)dict)->ma_used) == 0;
} }
int int