From 1002fbe12e0bd8c9a54bc5addbf5d94a5b35f91f Mon Sep 17 00:00:00 2001 From: Dino Viehland Date: Thu, 22 Feb 2024 12:02:39 -0800 Subject: [PATCH] gh-112075: Iterating a dict shouldn't require locks (#115108) Makes iteration of a dict be lock free for the forward iteration case. --- Include/internal/pycore_dict.h | 1 + Objects/dictobject.c | 328 ++++++++++++++++++++++++++------- 2 files changed, 267 insertions(+), 62 deletions(-) diff --git a/Include/internal/pycore_dict.h b/Include/internal/pycore_dict.h index 5a496d59ab7..d1a0010b9a8 100644 --- a/Include/internal/pycore_dict.h +++ b/Include/internal/pycore_dict.h @@ -212,6 +212,7 @@ static inline PyDictUnicodeEntry* DK_UNICODE_ENTRIES(PyDictKeysObject *dk) { #define DICT_WATCHER_AND_MODIFICATION_MASK ((1 << (DICT_MAX_WATCHERS + DICT_WATCHED_MUTATION_BITS)) - 1) #define DICT_VALUES_SIZE(values) ((uint8_t *)values)[-1] +#define DICT_VALUES_USED_SIZE(values) ((uint8_t *)values)[-2] #ifdef Py_GIL_DISABLED #define DICT_NEXT_VERSION(INTERP) \ diff --git a/Objects/dictobject.c b/Objects/dictobject.c index ed92998a23a..5ae4c3dbea2 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -115,19 +115,20 @@ As a consequence of this, split keys have a maximum size of 16. #define PyDict_MINSIZE 8 #include "Python.h" -#include "pycore_bitutils.h" // _Py_bit_length -#include "pycore_call.h" // _PyObject_CallNoArgs() -#include "pycore_ceval.h" // _PyEval_GetBuiltin() -#include "pycore_code.h" // stats -#include "pycore_critical_section.h" // Py_BEGIN_CRITICAL_SECTION, Py_END_CRITICAL_SECTION -#include "pycore_dict.h" // export _PyDict_SizeOf() -#include "pycore_freelist.h" // _PyFreeListState_GET() -#include "pycore_gc.h" // _PyObject_GC_IS_TRACKED() -#include "pycore_object.h" // _PyObject_GC_TRACK(), _PyDebugAllocatorStats() -#include "pycore_pyerrors.h" // _PyErr_GetRaisedException() -#include "pycore_pystate.h" // _PyThreadState_GET() -#include "pycore_setobject.h" // _PySet_NextEntry() -#include "stringlib/eq.h" // unicode_eq() +#include "pycore_bitutils.h" // _Py_bit_length +#include "pycore_call.h" // _PyObject_CallNoArgs() +#include "pycore_ceval.h" // _PyEval_GetBuiltin() +#include "pycore_code.h" // stats +#include "pycore_critical_section.h" // Py_BEGIN_CRITICAL_SECTION, Py_END_CRITICAL_SECTION +#include "pycore_dict.h" // export _PyDict_SizeOf() +#include "pycore_freelist.h" // _PyFreeListState_GET() +#include "pycore_gc.h" // _PyObject_GC_IS_TRACKED() +#include "pycore_object.h" // _PyObject_GC_TRACK(), _PyDebugAllocatorStats() +#include "pycore_pyatomic_ft_wrappers.h" // FT_ATOMIC_LOAD_SSIZE_RELAXED +#include "pycore_pyerrors.h" // _PyErr_GetRaisedException() +#include "pycore_pystate.h" // _PyThreadState_GET() +#include "pycore_setobject.h" // _PySet_NextEntry() +#include "stringlib/eq.h" // unicode_eq() #include @@ -158,6 +159,14 @@ ASSERT_DICT_LOCKED(PyObject *op) #define STORE_INDEX(keys, size, idx, value) _Py_atomic_store_int##size##_relaxed(&((int##size##_t*)keys->dk_indices)[idx], (int##size##_t)value); #define ASSERT_OWNED_OR_SHARED(mp) \ assert(_Py_IsOwnedByCurrentThread((PyObject *)mp) || IS_DICT_SHARED(mp)); +#define LOAD_KEYS_NENTRIES(d) + +static inline Py_ssize_t +load_keys_nentries(PyDictObject *mp) +{ + PyDictKeysObject *keys = _Py_atomic_load_ptr(&mp->ma_keys); + return _Py_atomic_load_ssize(&keys->dk_nentries); +} static inline void set_keys(PyDictObject *mp, PyDictKeysObject *keys) @@ -232,6 +241,13 @@ set_values(PyDictObject *mp, PyDictValues *values) mp->ma_values = values; } +static inline Py_ssize_t +load_keys_nentries(PyDictObject *mp) +{ + return mp->ma_keys->dk_nentries; +} + + #endif #define PERTURB_SHIFT 5 @@ -4858,7 +4874,7 @@ dictiter_new(PyDictObject *dict, PyTypeObject *itertype) di->di_pos = dict->ma_used - 1; } else { - di->di_pos = dict->ma_keys->dk_nentries - 1; + di->di_pos = load_keys_nentries(dict) - 1; } } else { @@ -4925,6 +4941,14 @@ static PyMethodDef dictiter_methods[] = { {NULL, NULL} /* sentinel */ }; +#ifdef Py_GIL_DISABLED + +static int +dictiter_iternext_threadsafe(PyDictObject *d, PyObject *self, + PyObject **out_key, PyObject **out_value); + +#else /* Py_GIL_DISABLED */ + static PyObject* dictiter_iternextkey_lock_held(PyDictObject *d, PyObject *self) { @@ -4992,6 +5016,8 @@ fail: return NULL; } +#endif /* Py_GIL_DISABLED */ + static PyObject* dictiter_iternextkey(PyObject *self) { @@ -5002,9 +5028,13 @@ dictiter_iternextkey(PyObject *self) return NULL; PyObject *value; - Py_BEGIN_CRITICAL_SECTION(d); +#ifdef Py_GIL_DISABLED + if (!dictiter_iternext_threadsafe(d, self, &value, NULL) == 0) { + value = NULL; + } +#else value = dictiter_iternextkey_lock_held(d, self); - Py_END_CRITICAL_SECTION(); +#endif return value; } @@ -5042,6 +5072,8 @@ PyTypeObject PyDictIterKey_Type = { 0, }; +#ifndef Py_GIL_DISABLED + static PyObject * dictiter_iternextvalue_lock_held(PyDictObject *d, PyObject *self) { @@ -5107,6 +5139,8 @@ fail: return NULL; } +#endif /* Py_GIL_DISABLED */ + static PyObject * dictiter_iternextvalue(PyObject *self) { @@ -5117,9 +5151,13 @@ dictiter_iternextvalue(PyObject *self) return NULL; PyObject *value; - Py_BEGIN_CRITICAL_SECTION(d); +#ifdef Py_GIL_DISABLED + if (!dictiter_iternext_threadsafe(d, self, NULL, &value) == 0) { + value = NULL; + } +#else value = dictiter_iternextvalue_lock_held(d, self); - Py_END_CRITICAL_SECTION(); +#endif return value; } @@ -5157,11 +5195,12 @@ PyTypeObject PyDictIterValue_Type = { 0, }; -static PyObject * -dictiter_iternextitem_lock_held(PyDictObject *d, PyObject *self) +static int +dictiter_iternextitem_lock_held(PyDictObject *d, PyObject *self, + PyObject **out_key, PyObject **out_value) { dictiterobject *di = (dictiterobject *)self; - PyObject *key, *value, *result; + PyObject *key, *value; Py_ssize_t i; assert (PyDict_Check(d)); @@ -5170,10 +5209,11 @@ dictiter_iternextitem_lock_held(PyDictObject *d, PyObject *self) PyErr_SetString(PyExc_RuntimeError, "dictionary changed size during iteration"); di->di_used = -1; /* Make this state sticky */ - return NULL; + return -1; } - i = di->di_pos; + i = FT_ATOMIC_LOAD_SSIZE_RELAXED(di->di_pos); + assert(i >= 0); if (_PyDict_HasSplitTable(d)) { if (i >= d->ma_used) @@ -5216,34 +5256,184 @@ dictiter_iternextitem_lock_held(PyDictObject *d, PyObject *self) } di->di_pos = i+1; di->len--; - result = di->di_result; - if (Py_REFCNT(result) == 1) { - PyObject *oldkey = PyTuple_GET_ITEM(result, 0); - PyObject *oldvalue = PyTuple_GET_ITEM(result, 1); - PyTuple_SET_ITEM(result, 0, Py_NewRef(key)); - PyTuple_SET_ITEM(result, 1, Py_NewRef(value)); - Py_INCREF(result); - Py_DECREF(oldkey); - Py_DECREF(oldvalue); - // bpo-42536: The GC may have untracked this result tuple. Since we're - // recycling it, make sure it's tracked again: - if (!_PyObject_GC_IS_TRACKED(result)) { - _PyObject_GC_TRACK(result); - } + if (out_key != NULL) { + *out_key = Py_NewRef(key); } - else { - result = PyTuple_New(2); - if (result == NULL) - return NULL; - PyTuple_SET_ITEM(result, 0, Py_NewRef(key)); - PyTuple_SET_ITEM(result, 1, Py_NewRef(value)); + if (out_value != NULL) { + *out_value = Py_NewRef(value); } - return result; + return 0; fail: di->di_dict = NULL; Py_DECREF(d); - return NULL; + return -1; +} + +#ifdef Py_GIL_DISABLED + +// Grabs the key and/or value from the provided locations and if successful +// returns them with an increased reference count. If either one is unsucessful +// nothing is incref'd and returns -1. +static int +acquire_key_value(PyObject **key_loc, PyObject *value, PyObject **value_loc, + PyObject **out_key, PyObject **out_value) +{ + if (out_key) { + *out_key = _Py_TryXGetRef(key_loc); + if (*out_key == NULL) { + return -1; + } + } + + if (out_value) { + if (!_Py_TryIncref(value_loc, value)) { + if (out_key) { + Py_DECREF(*out_key); + } + return -1; + } + *out_value = value; + } + + return 0; +} + +static Py_ssize_t +load_values_used_size(PyDictValues *values) +{ + return (Py_ssize_t)_Py_atomic_load_uint8(&DICT_VALUES_USED_SIZE(values)); +} + +static int +dictiter_iternext_threadsafe(PyDictObject *d, PyObject *self, + PyObject **out_key, PyObject **out_value) +{ + dictiterobject *di = (dictiterobject *)self; + Py_ssize_t i; + PyDictKeysObject *k; + + assert (PyDict_Check(d)); + + if (di->di_used != _Py_atomic_load_ssize_relaxed(&d->ma_used)) { + PyErr_SetString(PyExc_RuntimeError, + "dictionary changed size during iteration"); + di->di_used = -1; /* Make this state sticky */ + return -1; + } + + ensure_shared_on_read(d); + + i = _Py_atomic_load_ssize_relaxed(&di->di_pos); + k = _Py_atomic_load_ptr_relaxed(&d->ma_keys); + assert(i >= 0); + if (_PyDict_HasSplitTable(d)) { + PyDictValues *values = _Py_atomic_load_ptr_relaxed(&d->ma_values); + if (values == NULL) { + goto concurrent_modification; + } + + Py_ssize_t used = load_values_used_size(values); + if (i >= used) { + goto fail; + } + + // We're racing against writes to the order from delete_index_from_values, but + // single threaded can suffer from concurrent modification to those as well and + // can have either duplicated or skipped attributes, so we strive to do no better + // here. + int index = get_index_from_order(d, i); + PyObject *value = _Py_atomic_load_ptr(&values->values[index]); + if (acquire_key_value(&DK_UNICODE_ENTRIES(k)[index].me_key, value, + &values->values[index], out_key, out_value) < 0) { + goto try_locked; + } + } + else { + Py_ssize_t n = _Py_atomic_load_ssize_relaxed(&k->dk_nentries); + if (DK_IS_UNICODE(k)) { + PyDictUnicodeEntry *entry_ptr = &DK_UNICODE_ENTRIES(k)[i]; + PyObject *value; + while (i < n && + (value = _Py_atomic_load_ptr(&entry_ptr->me_value)) == NULL) { + entry_ptr++; + i++; + } + if (i >= n) + goto fail; + + if (acquire_key_value(&entry_ptr->me_key, value, + &entry_ptr->me_value, out_key, out_value) < 0) { + goto try_locked; + } + } + else { + PyDictKeyEntry *entry_ptr = &DK_ENTRIES(k)[i]; + PyObject *value; + while (i < n && + (value = _Py_atomic_load_ptr(&entry_ptr->me_value)) == NULL) { + entry_ptr++; + i++; + } + + if (i >= n) + goto fail; + + if (acquire_key_value(&entry_ptr->me_key, value, + &entry_ptr->me_value, out_key, out_value) < 0) { + goto try_locked; + } + } + } + // We found an element (key), but did not expect it + Py_ssize_t len; + if ((len = _Py_atomic_load_ssize_relaxed(&di->len)) == 0) { + goto concurrent_modification; + } + + _Py_atomic_store_ssize_relaxed(&di->di_pos, i + 1); + _Py_atomic_store_ssize_relaxed(&di->len, len - 1); + return 0; + +concurrent_modification: + PyErr_SetString(PyExc_RuntimeError, + "dictionary keys changed during iteration"); + +fail: + di->di_dict = NULL; + Py_DECREF(d); + return -1; + + int res; +try_locked: + Py_BEGIN_CRITICAL_SECTION(d); + res = dictiter_iternextitem_lock_held(d, self, out_key, out_value); + Py_END_CRITICAL_SECTION(); + return res; +} + +#endif + +static bool +has_unique_reference(PyObject *op) +{ +#ifdef Py_GIL_DISABLED + return (_Py_IsOwnedByCurrentThread(op) && + op->ob_ref_local == 1 && + _Py_atomic_load_ssize_relaxed(&op->ob_ref_shared) == 0); +#else + return Py_REFCNT(op) == 1; +#endif +} + +static bool +acquire_iter_result(PyObject *result) +{ + if (has_unique_reference(result)) { + Py_INCREF(result); + return true; + } + return false; } static PyObject * @@ -5255,11 +5445,37 @@ dictiter_iternextitem(PyObject *self) if (d == NULL) return NULL; - PyObject *item; - Py_BEGIN_CRITICAL_SECTION(d); - item = dictiter_iternextitem_lock_held(d, self); - Py_END_CRITICAL_SECTION(); - return item; + PyObject *key, *value; +#ifdef Py_GIL_DISABLED + if (dictiter_iternext_threadsafe(d, self, &key, &value) == 0) { +#else + if (dictiter_iternextitem_lock_held(d, self, &key, &value) == 0) { + +#endif + PyObject *result = di->di_result; + if (acquire_iter_result(result)) { + PyObject *oldkey = PyTuple_GET_ITEM(result, 0); + PyObject *oldvalue = PyTuple_GET_ITEM(result, 1); + PyTuple_SET_ITEM(result, 0, key); + PyTuple_SET_ITEM(result, 1, value); + Py_DECREF(oldkey); + Py_DECREF(oldvalue); + // bpo-42536: The GC may have untracked this result tuple. Since we're + // recycling it, make sure it's tracked again: + if (!_PyObject_GC_IS_TRACKED(result)) { + _PyObject_GC_TRACK(result); + } + } + else { + result = PyTuple_New(2); + if (result == NULL) + return NULL; + PyTuple_SET_ITEM(result, 0, key); + PyTuple_SET_ITEM(result, 1, value); + } + return result; + } + return NULL; } PyTypeObject PyDictIterItem_Type = { @@ -6423,18 +6639,6 @@ _PyObject_MakeDictFromInstanceAttributes(PyObject *obj, PyDictValues *values) return make_dict_from_instance_attributes(interp, keys, values); } -static bool -has_unique_reference(PyObject *op) -{ -#ifdef Py_GIL_DISABLED - return (_Py_IsOwnedByCurrentThread(op) && - op->ob_ref_local == 1 && - _Py_atomic_load_ssize_relaxed(&op->ob_ref_shared) == 0); -#else - return Py_REFCNT(op) == 1; -#endif -} - // Return true if the dict was dematerialized, false otherwise. bool _PyObject_MakeInstanceAttributesFromDict(PyObject *obj, PyDictOrValues *dorv)