gh-112075: refactor dictionary lookup functions for better re-usability (#114629)

Refactor dict lookup functions to use force inline helpers
This commit is contained in:
Dino Viehland 2024-01-30 09:33:36 -08:00 committed by GitHub
parent 39d102c2ee
commit 0990d55725
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 118 additions and 120 deletions

View File

@ -874,140 +874,138 @@ lookdict_index(PyDictKeysObject *k, Py_hash_t hash, Py_ssize_t index)
Py_UNREACHABLE(); Py_UNREACHABLE();
} }
static inline Py_ALWAYS_INLINE Py_ssize_t
do_lookup(PyDictObject *mp, PyDictKeysObject *dk, PyObject *key, Py_hash_t hash,
Py_ssize_t (*check_lookup)(PyDictObject *, PyDictKeysObject *, void *, Py_ssize_t ix, PyObject *key, Py_hash_t))
{
void *ep0 = _DK_ENTRIES(dk);
size_t mask = DK_MASK(dk);
size_t perturb = hash;
size_t i = (size_t)hash & mask;
Py_ssize_t ix;
for (;;) {
ix = dictkeys_get_index(dk, i);
if (ix >= 0) {
Py_ssize_t cmp = check_lookup(mp, dk, ep0, ix, key, hash);
if (cmp < 0) {
return cmp;
} else if (cmp) {
return ix;
}
}
else if (ix == DKIX_EMPTY) {
return DKIX_EMPTY;
}
perturb >>= PERTURB_SHIFT;
i = mask & (i*5 + perturb + 1);
// Manual loop unrolling
ix = dictkeys_get_index(dk, i);
if (ix >= 0) {
Py_ssize_t cmp = check_lookup(mp, dk, ep0, ix, key, hash);
if (cmp < 0) {
return cmp;
} else if (cmp) {
return ix;
}
}
else if (ix == DKIX_EMPTY) {
return DKIX_EMPTY;
}
perturb >>= PERTURB_SHIFT;
i = mask & (i*5 + perturb + 1);
}
Py_UNREACHABLE();
}
static inline Py_ALWAYS_INLINE Py_ssize_t
compare_unicode_generic(PyDictObject *mp, PyDictKeysObject *dk,
void *ep0, Py_ssize_t ix, PyObject *key, Py_hash_t hash)
{
PyDictUnicodeEntry *ep = &((PyDictUnicodeEntry *)ep0)[ix];
assert(ep->me_key != NULL);
assert(PyUnicode_CheckExact(ep->me_key));
assert(!PyUnicode_CheckExact(key));
// TODO: Thread safety
if (unicode_get_hash(ep->me_key) == hash) {
PyObject *startkey = ep->me_key;
Py_INCREF(startkey);
int cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
Py_DECREF(startkey);
if (cmp < 0) {
return DKIX_ERROR;
}
if (dk == mp->ma_keys && ep->me_key == startkey) {
return cmp;
}
else {
/* The dict was mutated, restart */
return DKIX_KEY_CHANGED;
}
}
return 0;
}
// Search non-Unicode key from Unicode table // Search non-Unicode key from Unicode table
static Py_ssize_t static Py_ssize_t
unicodekeys_lookup_generic(PyDictObject *mp, PyDictKeysObject* dk, PyObject *key, Py_hash_t hash) unicodekeys_lookup_generic(PyDictObject *mp, PyDictKeysObject* dk, PyObject *key, Py_hash_t hash)
{ {
PyDictUnicodeEntry *ep0 = DK_UNICODE_ENTRIES(dk); return do_lookup(mp, dk, key, hash, compare_unicode_generic);
size_t mask = DK_MASK(dk); }
size_t perturb = hash;
size_t i = (size_t)hash & mask; static inline Py_ALWAYS_INLINE Py_ssize_t
Py_ssize_t ix; compare_unicode_unicode(PyDictObject *mp, PyDictKeysObject *dk,
for (;;) { void *ep0, Py_ssize_t ix, PyObject *key, Py_hash_t hash)
ix = dictkeys_get_index(dk, i); {
if (ix >= 0) { PyDictUnicodeEntry *ep = &((PyDictUnicodeEntry *)ep0)[ix];
PyDictUnicodeEntry *ep = &ep0[ix]; assert(ep->me_key != NULL);
assert(ep->me_key != NULL); assert(PyUnicode_CheckExact(ep->me_key));
assert(PyUnicode_CheckExact(ep->me_key)); if (ep->me_key == key ||
if (ep->me_key == key) { (unicode_get_hash(ep->me_key) == hash && unicode_eq(ep->me_key, key))) {
return ix; return 1;
} }
if (unicode_get_hash(ep->me_key) == hash) { return 0;
PyObject *startkey = ep->me_key;
Py_INCREF(startkey);
int cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
Py_DECREF(startkey);
if (cmp < 0) {
return DKIX_ERROR;
}
if (dk == mp->ma_keys && ep->me_key == startkey) {
if (cmp > 0) {
return ix;
}
}
else {
/* The dict was mutated, restart */
return DKIX_KEY_CHANGED;
}
}
}
else if (ix == DKIX_EMPTY) {
return DKIX_EMPTY;
}
perturb >>= PERTURB_SHIFT;
i = mask & (i*5 + perturb + 1);
}
Py_UNREACHABLE();
} }
// Search Unicode key from Unicode table.
static Py_ssize_t _Py_HOT_FUNCTION static Py_ssize_t _Py_HOT_FUNCTION
unicodekeys_lookup_unicode(PyDictKeysObject* dk, PyObject *key, Py_hash_t hash) unicodekeys_lookup_unicode(PyDictKeysObject* dk, PyObject *key, Py_hash_t hash)
{ {
PyDictUnicodeEntry *ep0 = DK_UNICODE_ENTRIES(dk); return do_lookup(NULL, dk, key, hash, compare_unicode_unicode);
size_t mask = DK_MASK(dk); }
size_t perturb = hash;
size_t i = (size_t)hash & mask; static inline Py_ALWAYS_INLINE Py_ssize_t
Py_ssize_t ix; compare_generic(PyDictObject *mp, PyDictKeysObject *dk,
for (;;) { void *ep0, Py_ssize_t ix, PyObject *key, Py_hash_t hash)
ix = dictkeys_get_index(dk, i); {
if (ix >= 0) { PyDictKeyEntry *ep = &((PyDictKeyEntry *)ep0)[ix];
PyDictUnicodeEntry *ep = &ep0[ix]; assert(ep->me_key != NULL);
assert(ep->me_key != NULL); if (ep->me_key == key) {
assert(PyUnicode_CheckExact(ep->me_key)); return 1;
if (ep->me_key == key || }
(unicode_get_hash(ep->me_key) == hash && unicode_eq(ep->me_key, key))) { if (ep->me_hash == hash) {
return ix; PyObject *startkey = ep->me_key;
} Py_INCREF(startkey);
} int cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
else if (ix == DKIX_EMPTY) { Py_DECREF(startkey);
return DKIX_EMPTY; if (cmp < 0) {
} return DKIX_ERROR;
perturb >>= PERTURB_SHIFT; }
i = mask & (i*5 + perturb + 1); if (dk == mp->ma_keys && ep->me_key == startkey) {
// Manual loop unrolling return cmp;
ix = dictkeys_get_index(dk, i); }
if (ix >= 0) { else {
PyDictUnicodeEntry *ep = &ep0[ix]; /* The dict was mutated, restart */
assert(ep->me_key != NULL); return DKIX_KEY_CHANGED;
assert(PyUnicode_CheckExact(ep->me_key)); }
if (ep->me_key == key || }
(unicode_get_hash(ep->me_key) == hash && unicode_eq(ep->me_key, key))) { return 0;
return ix;
}
}
else if (ix == DKIX_EMPTY) {
return DKIX_EMPTY;
}
perturb >>= PERTURB_SHIFT;
i = mask & (i*5 + perturb + 1);
}
Py_UNREACHABLE();
} }
// Search key from Generic table.
static Py_ssize_t static Py_ssize_t
dictkeys_generic_lookup(PyDictObject *mp, PyDictKeysObject* dk, PyObject *key, Py_hash_t hash) dictkeys_generic_lookup(PyDictObject *mp, PyDictKeysObject* dk, PyObject *key, Py_hash_t hash)
{ {
PyDictKeyEntry *ep0 = DK_ENTRIES(dk); return do_lookup(mp, dk, key, hash, compare_generic);
size_t mask = DK_MASK(dk);
size_t perturb = hash;
size_t i = (size_t)hash & mask;
Py_ssize_t ix;
for (;;) {
ix = dictkeys_get_index(dk, i);
if (ix >= 0) {
PyDictKeyEntry *ep = &ep0[ix];
assert(ep->me_key != NULL);
if (ep->me_key == key) {
return ix;
}
if (ep->me_hash == hash) {
PyObject *startkey = ep->me_key;
Py_INCREF(startkey);
int cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
Py_DECREF(startkey);
if (cmp < 0) {
return DKIX_ERROR;
}
if (dk == mp->ma_keys && ep->me_key == startkey) {
if (cmp > 0) {
return ix;
}
}
else {
/* The dict was mutated, restart */
return DKIX_KEY_CHANGED;
}
}
}
else if (ix == DKIX_EMPTY) {
return DKIX_EMPTY;
}
perturb >>= PERTURB_SHIFT;
i = mask & (i*5 + perturb + 1);
}
Py_UNREACHABLE();
} }
/* Lookup a string in a (all unicode) dict keys. /* Lookup a string in a (all unicode) dict keys.