bpo-44206: Add a version number to dictionary keys (GH-26333)

* Store log2(size) instead of size in dict-keys.

* Use enum instead of function pointer to record kind of keys.

* Add version number to dict keys.
This commit is contained in:
Mark Shannon 2021-05-28 09:54:10 +01:00 committed by GitHub
parent 8994e9c2cd
commit f8a95df84b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 223 additions and 317 deletions

View File

@ -82,3 +82,7 @@ typedef struct {
PyAPI_FUNC(PyObject *) _PyDictView_New(PyObject *, PyTypeObject *); PyAPI_FUNC(PyObject *) _PyDictView_New(PyObject *, PyTypeObject *);
PyAPI_FUNC(PyObject *) _PyDictView_Intersect(PyObject* self, PyObject *other); PyAPI_FUNC(PyObject *) _PyDictView_Intersect(PyObject* self, PyObject *other);
/* Gets a version number unique to the current state of the keys of dict, if possible.
* Returns the version number, or zero if it was not possible to get a version number. */
uint32_t _PyDictKeys_GetVersionForCurrentState(PyDictObject *dict);

View File

@ -752,7 +752,7 @@ class CPythonOrderedDictTests(OrderedDictTests, unittest.TestCase):
check = self.check_sizeof check = self.check_sizeof
basicsize = size('nQ2P' + '3PnPn2P') basicsize = size('nQ2P' + '3PnPn2P')
keysize = calcsize('2nP2n') keysize = calcsize('n2BI2n')
entrysize = calcsize('n2P') entrysize = calcsize('n2P')
p = calcsize('P') p = calcsize('P')

View File

@ -22,6 +22,7 @@ import warnings
# strings to intern in test_intern() # strings to intern in test_intern()
INTERN_NUMRUNS = 0 INTERN_NUMRUNS = 0
DICT_KEY_STRUCT_FORMAT = 'n2BI2n'
class DisplayHookTest(unittest.TestCase): class DisplayHookTest(unittest.TestCase):
@ -1229,9 +1230,9 @@ class SizeofTest(unittest.TestCase):
# empty dict # empty dict
check({}, size('nQ2P')) check({}, size('nQ2P'))
# dict # dict
check({"a": 1}, size('nQ2P') + calcsize('2nP2n') + 8 + (8*2//3)*calcsize('n2P')) check({"a": 1}, size('nQ2P') + calcsize(DICT_KEY_STRUCT_FORMAT) + 8 + (8*2//3)*calcsize('n2P'))
longdict = {1:1, 2:2, 3:3, 4:4, 5:5, 6:6, 7:7, 8:8} longdict = {1:1, 2:2, 3:3, 4:4, 5:5, 6:6, 7:7, 8:8}
check(longdict, size('nQ2P') + calcsize('2nP2n') + 16 + (16*2//3)*calcsize('n2P')) check(longdict, size('nQ2P') + calcsize(DICT_KEY_STRUCT_FORMAT) + 16 + (16*2//3)*calcsize('n2P'))
# dictionary-keyview # dictionary-keyview
check({}.keys(), size('P')) check({}.keys(), size('P'))
# dictionary-valueview # dictionary-valueview
@ -1385,13 +1386,13 @@ class SizeofTest(unittest.TestCase):
'5P') '5P')
class newstyleclass(object): pass class newstyleclass(object): pass
# Separate block for PyDictKeysObject with 8 keys and 5 entries # Separate block for PyDictKeysObject with 8 keys and 5 entries
check(newstyleclass, s + calcsize("2nP2n0P") + 8 + 5*calcsize("n2P")) check(newstyleclass, s + calcsize(DICT_KEY_STRUCT_FORMAT) + 8 + 5*calcsize("n2P"))
# dict with shared keys # dict with shared keys
check(newstyleclass().__dict__, size('nQ2P') + 5*self.P) check(newstyleclass().__dict__, size('nQ2P') + 5*self.P)
o = newstyleclass() o = newstyleclass()
o.a = o.b = o.c = o.d = o.e = o.f = o.g = o.h = 1 o.a = o.b = o.c = o.d = o.e = o.f = o.g = o.h = 1
# Separate block for PyDictKeysObject with 16 keys and 10 entries # Separate block for PyDictKeysObject with 16 keys and 10 entries
check(newstyleclass, s + calcsize("2nP2n0P") + 16 + 10*calcsize("n2P")) check(newstyleclass, s + calcsize(DICT_KEY_STRUCT_FORMAT) + 16 + 10*calcsize("n2P"))
# dict with shared keys # dict with shared keys
check(newstyleclass().__dict__, size('nQ2P') + 10*self.P) check(newstyleclass().__dict__, size('nQ2P') + 10*self.P)
# unicode # unicode

View File

@ -8,37 +8,34 @@ typedef struct {
PyObject *me_value; /* This field is only meaningful for combined tables */ PyObject *me_value; /* This field is only meaningful for combined tables */
} PyDictKeyEntry; } PyDictKeyEntry;
/* dict_lookup_func() returns index of entry which can be used like DK_ENTRIES(dk)[index]. /* _Py_dict_lookup() returns index of entry which can be used like DK_ENTRIES(dk)[index].
* -1 when no entry found, -3 when compare raises error. * -1 when no entry found, -3 when compare raises error.
*/ */
typedef Py_ssize_t (*dict_lookup_func) Py_ssize_t _Py_dict_lookup(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject **value_addr);
(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject **value_addr);
#define DKIX_EMPTY (-1) #define DKIX_EMPTY (-1)
#define DKIX_DUMMY (-2) /* Used internally */ #define DKIX_DUMMY (-2) /* Used internally */
#define DKIX_ERROR (-3) #define DKIX_ERROR (-3)
typedef enum {
DICT_KEYS_GENERAL = 0,
DICT_KEYS_UNICODE = 1,
DICT_KEYS_SPLIT = 2
} DictKeysKind;
/* See dictobject.c for actual layout of DictKeysObject */ /* See dictobject.c for actual layout of DictKeysObject */
struct _dictkeysobject { struct _dictkeysobject {
Py_ssize_t dk_refcnt; Py_ssize_t dk_refcnt;
/* Size of the hash table (dk_indices). It must be a power of 2. */ /* Size of the hash table (dk_indices). It must be a power of 2. */
Py_ssize_t dk_size; uint8_t dk_log2_size;
/* Function to lookup in the hash table (dk_indices): /* Kind of keys */
uint8_t dk_kind;
- lookdict(): general-purpose, and may return DKIX_ERROR if (and /* Version number -- Reset to 0 by any modification to keys */
only if) a comparison raises an exception. uint32_t dk_version;
- lookdict_unicode(): specialized to Unicode string keys, comparison of
which can never raise an exception; that function can never return
DKIX_ERROR.
- lookdict_unicode_nodummy(): similar to lookdict_unicode() but further
specialized for Unicode string keys that cannot be the <dummy> value.
- lookdict_split(): Version of lookdict() for split tables. */
dict_lookup_func dk_lookup;
/* Number of usable entries in dk_entries. */ /* Number of usable entries in dk_entries. */
Py_ssize_t dk_usable; Py_ssize_t dk_usable;

View File

@ -18,8 +18,8 @@ layout:
+---------------+ +---------------+
| dk_refcnt | | dk_refcnt |
| dk_size | | dk_log2_size |
| dk_lookup | | dk_kind |
| dk_usable | | dk_usable |
| dk_nentries | | dk_nentries |
+---------------+ +---------------+
@ -108,6 +108,7 @@ converting the dict to the combined table.
* Making this 8, rather than 4 reduces the number of resizes for most * Making this 8, rather than 4 reduces the number of resizes for most
* dictionaries, without any significant extra memory use. * dictionaries, without any significant extra memory use.
*/ */
#define PyDict_LOG_MINSIZE 3
#define PyDict_MINSIZE 8 #define PyDict_MINSIZE 8
#include "Python.h" #include "Python.h"
@ -226,18 +227,7 @@ equally good collision statistics, needed less code & used less memory.
*/ */
/* forward declarations */ static int dictresize(PyDictObject *mp, uint8_t log_newsize);
static Py_ssize_t lookdict(PyDictObject *mp, PyObject *key,
Py_hash_t hash, PyObject **value_addr);
static Py_ssize_t lookdict_unicode(PyDictObject *mp, PyObject *key,
Py_hash_t hash, PyObject **value_addr);
static Py_ssize_t
lookdict_unicode_nodummy(PyDictObject *mp, PyObject *key,
Py_hash_t hash, PyObject **value_addr);
static Py_ssize_t lookdict_split(PyDictObject *mp, PyObject *key,
Py_hash_t hash, PyObject **value_addr);
static int dictresize(PyDictObject *mp, Py_ssize_t newsize);
static PyObject* dict_iter(PyDictObject *dict); static PyObject* dict_iter(PyDictObject *dict);
@ -295,24 +285,25 @@ _PyDict_DebugMallocStats(FILE *out)
state->numfree, sizeof(PyDictObject)); state->numfree, sizeof(PyDictObject));
} }
#define DK_LOG_SIZE(dk) ((dk)->dk_log2_size)
#define DK_SIZE(dk) ((dk)->dk_size)
#if SIZEOF_VOID_P > 4 #if SIZEOF_VOID_P > 4
#define DK_IXSIZE(dk) \ #define DK_SIZE(dk) (((int64_t)1)<<DK_LOG_SIZE(dk))
(DK_SIZE(dk) <= 0xff ? \ #define DK_IXSIZE(dk) \
1 : DK_SIZE(dk) <= 0xffff ? \ (DK_LOG_SIZE(dk) <= 7 ? \
2 : DK_SIZE(dk) <= 0xffffffff ? \ 1 : DK_LOG_SIZE(dk) <= 15 ? \
2 : DK_LOG_SIZE(dk) <= 31 ? \
4 : sizeof(int64_t)) 4 : sizeof(int64_t))
#else #else
#define DK_IXSIZE(dk) \ #define DK_SIZE(dk) (1<<DK_LOG_SIZE(dk))
(DK_SIZE(dk) <= 0xff ? \ #define DK_IXSIZE(dk) \
1 : DK_SIZE(dk) <= 0xffff ? \ (DK_LOG_SIZE(dk) <= 7 ? \
1 : DK_LOG_SIZE(dk) <= 15 ? \
2 : sizeof(int32_t)) 2 : sizeof(int32_t))
#endif #endif
#define DK_ENTRIES(dk) \ #define DK_ENTRIES(dk) \
((PyDictKeyEntry*)(&((int8_t*)((dk)->dk_indices))[DK_SIZE(dk) * DK_IXSIZE(dk)])) ((PyDictKeyEntry*)(&((int8_t*)((dk)->dk_indices))[DK_SIZE(dk) * DK_IXSIZE(dk)]))
#define DK_MASK(dk) (((dk)->dk_size)-1) #define DK_MASK(dk) (DK_SIZE(dk)-1)
#define IS_POWER_OF_2(x) (((x) & (x-1)) == 0) #define IS_POWER_OF_2(x) (((x) & (x-1)) == 0)
static void free_keys_object(PyDictKeysObject *keys); static void free_keys_object(PyDictKeysObject *keys);
@ -374,6 +365,7 @@ dictkeys_set_index(PyDictKeysObject *keys, Py_ssize_t i, Py_ssize_t ix)
Py_ssize_t s = DK_SIZE(keys); Py_ssize_t s = DK_SIZE(keys);
assert(ix >= DKIX_DUMMY); assert(ix >= DKIX_DUMMY);
assert(keys->dk_version == 0);
if (s <= 0xff) { if (s <= 0xff) {
int8_t *indices = (int8_t*)(keys->dk_indices); int8_t *indices = (int8_t*)(keys->dk_indices);
@ -413,25 +405,25 @@ dictkeys_set_index(PyDictKeysObject *keys, Py_ssize_t i, Py_ssize_t ix)
#define USABLE_FRACTION(n) (((n) << 1)/3) #define USABLE_FRACTION(n) (((n) << 1)/3)
/* Find the smallest dk_size >= minsize. */ /* Find the smallest dk_size >= minsize. */
static inline Py_ssize_t static inline uint8_t
calculate_keysize(Py_ssize_t minsize) calculate_log2_keysize(Py_ssize_t minsize)
{ {
#if SIZEOF_LONG == SIZEOF_SIZE_T #if SIZEOF_LONG == SIZEOF_SIZE_T
minsize = (minsize | PyDict_MINSIZE) - 1; minsize = (minsize | PyDict_MINSIZE) - 1;
return 1LL << _Py_bit_length(minsize | (PyDict_MINSIZE-1)); return _Py_bit_length(minsize | (PyDict_MINSIZE-1));
#elif defined(_MSC_VER) #elif defined(_MSC_VER)
// On 64bit Windows, sizeof(long) == 4. // On 64bit Windows, sizeof(long) == 4.
minsize = (minsize | PyDict_MINSIZE) - 1; minsize = (minsize | PyDict_MINSIZE) - 1;
unsigned long msb; unsigned long msb;
_BitScanReverse64(&msb, (uint64_t)minsize); _BitScanReverse64(&msb, (uint64_t)minsize);
return 1LL << (msb + 1); return msb + 1;
#else #else
Py_ssize_t size; uint8_t log2_size;
for (size = PyDict_MINSIZE; for (log2_size = PyDict_LOG_MINSIZE;
size < minsize && size > 0; (((Py_ssize_t)1) << log2_size) < minsize;
size <<= 1) log2_size++)
; ;
return size; return log2_size;
#endif #endif
} }
@ -440,10 +432,10 @@ calculate_keysize(Py_ssize_t minsize)
* This can be used to reserve enough size to insert n entries without * This can be used to reserve enough size to insert n entries without
* resizing. * resizing.
*/ */
static inline Py_ssize_t static inline uint8_t
estimate_keysize(Py_ssize_t n) estimate_log2_keysize(Py_ssize_t n)
{ {
return calculate_keysize((n*3 + 1) / 2); return calculate_log2_keysize((n*3 + 1) / 2);
} }
@ -459,18 +451,14 @@ estimate_keysize(Py_ssize_t n)
*/ */
#define GROWTH_RATE(d) ((d)->ma_used*3) #define GROWTH_RATE(d) ((d)->ma_used*3)
#define ENSURE_ALLOWS_DELETIONS(d) \
if ((d)->ma_keys->dk_lookup == lookdict_unicode_nodummy) { \
(d)->ma_keys->dk_lookup = lookdict_unicode; \
}
/* This immutable, empty PyDictKeysObject is used for PyDict_Clear() /* This immutable, empty PyDictKeysObject is used for PyDict_Clear()
* (which cannot fail and thus can do no allocation). * (which cannot fail and thus can do no allocation).
*/ */
static PyDictKeysObject empty_keys_struct = { static PyDictKeysObject empty_keys_struct = {
1, /* dk_refcnt */ 1, /* dk_refcnt */
1, /* dk_size */ 0, /* dk_log2_size */
lookdict_split, /* dk_lookup */ DICT_KEYS_SPLIT, /* dk_kind */
1, /* dk_version */
0, /* dk_usable (immutable) */ 0, /* dk_usable (immutable) */
0, /* dk_nentries */ 0, /* dk_nentries */
{DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, {DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY,
@ -503,10 +491,9 @@ _PyDict_CheckConsistency(PyObject *op, int check_content)
PyDictKeysObject *keys = mp->ma_keys; PyDictKeysObject *keys = mp->ma_keys;
int splitted = _PyDict_HasSplitTable(mp); int splitted = _PyDict_HasSplitTable(mp);
Py_ssize_t usable = USABLE_FRACTION(keys->dk_size); Py_ssize_t usable = USABLE_FRACTION(DK_SIZE(keys));
CHECK(0 <= mp->ma_used && mp->ma_used <= usable); CHECK(0 <= mp->ma_used && mp->ma_used <= usable);
CHECK(IS_POWER_OF_2(keys->dk_size));
CHECK(0 <= keys->dk_usable && keys->dk_usable <= usable); CHECK(0 <= keys->dk_usable && keys->dk_usable <= usable);
CHECK(0 <= keys->dk_nentries && keys->dk_nentries <= usable); CHECK(0 <= keys->dk_nentries && keys->dk_nentries <= usable);
CHECK(keys->dk_usable + keys->dk_nentries <= usable); CHECK(keys->dk_usable + keys->dk_nentries <= usable);
@ -520,7 +507,7 @@ _PyDict_CheckConsistency(PyObject *op, int check_content)
PyDictKeyEntry *entries = DK_ENTRIES(keys); PyDictKeyEntry *entries = DK_ENTRIES(keys);
Py_ssize_t i; Py_ssize_t i;
for (i=0; i < keys->dk_size; i++) { for (i=0; i < DK_SIZE(keys); i++) {
Py_ssize_t ix = dictkeys_get_index(keys, i); Py_ssize_t ix = dictkeys_get_index(keys, i);
CHECK(DKIX_DUMMY <= ix && ix <= usable); CHECK(DKIX_DUMMY <= ix && ix <= usable);
} }
@ -563,23 +550,22 @@ _PyDict_CheckConsistency(PyObject *op, int check_content)
static PyDictKeysObject* static PyDictKeysObject*
new_keys_object(Py_ssize_t size) new_keys_object(uint8_t log2_size)
{ {
PyDictKeysObject *dk; PyDictKeysObject *dk;
Py_ssize_t es, usable; Py_ssize_t es, usable;
assert(size >= PyDict_MINSIZE); assert(log2_size >= PyDict_LOG_MINSIZE);
assert(IS_POWER_OF_2(size));
usable = USABLE_FRACTION(size); usable = USABLE_FRACTION(1<<log2_size);
if (size <= 0xff) { if (log2_size <= 7) {
es = 1; es = 1;
} }
else if (size <= 0xffff) { else if (log2_size <= 15) {
es = 2; es = 2;
} }
#if SIZEOF_VOID_P > 4 #if SIZEOF_VOID_P > 4
else if (size <= 0xffffffff) { else if (log2_size <= 31) {
es = 4; es = 4;
} }
#endif #endif
@ -592,13 +578,13 @@ new_keys_object(Py_ssize_t size)
// new_keys_object() must not be called after _PyDict_Fini() // new_keys_object() must not be called after _PyDict_Fini()
assert(state->keys_numfree != -1); assert(state->keys_numfree != -1);
#endif #endif
if (size == PyDict_MINSIZE && state->keys_numfree > 0) { if (log2_size == PyDict_LOG_MINSIZE && state->keys_numfree > 0) {
dk = state->keys_free_list[--state->keys_numfree]; dk = state->keys_free_list[--state->keys_numfree];
} }
else else
{ {
dk = PyObject_Malloc(sizeof(PyDictKeysObject) dk = PyObject_Malloc(sizeof(PyDictKeysObject)
+ es * size + (es<<log2_size)
+ sizeof(PyDictKeyEntry) * usable); + sizeof(PyDictKeyEntry) * usable);
if (dk == NULL) { if (dk == NULL) {
PyErr_NoMemory(); PyErr_NoMemory();
@ -609,11 +595,12 @@ new_keys_object(Py_ssize_t size)
_Py_RefTotal++; _Py_RefTotal++;
#endif #endif
dk->dk_refcnt = 1; dk->dk_refcnt = 1;
dk->dk_size = size; dk->dk_log2_size = log2_size;
dk->dk_usable = usable; dk->dk_usable = usable;
dk->dk_lookup = lookdict_unicode_nodummy; dk->dk_kind = DICT_KEYS_UNICODE;
dk->dk_nentries = 0; dk->dk_nentries = 0;
memset(&dk->dk_indices[0], 0xff, es * size); dk->dk_version = 0;
memset(&dk->dk_indices[0], 0xff, es * (1<<log2_size));
memset(DK_ENTRIES(dk), 0, sizeof(PyDictKeyEntry) * usable); memset(DK_ENTRIES(dk), 0, sizeof(PyDictKeyEntry) * usable);
return dk; return dk;
} }
@ -632,7 +619,7 @@ free_keys_object(PyDictKeysObject *keys)
// free_keys_object() must not be called after _PyDict_Fini() // free_keys_object() must not be called after _PyDict_Fini()
assert(state->keys_numfree != -1); assert(state->keys_numfree != -1);
#endif #endif
if (keys->dk_size == PyDict_MINSIZE && state->keys_numfree < PyDict_MAXFREELIST) { if (DK_SIZE(keys) == PyDict_MINSIZE && state->keys_numfree < PyDict_MAXFREELIST) {
state->keys_free_list[state->keys_numfree++] = keys; state->keys_free_list[state->keys_numfree++] = keys;
return; return;
} }
@ -778,36 +765,62 @@ probe indices are computed as explained earlier.
All arithmetic on hash should ignore overflow. All arithmetic on hash should ignore overflow.
The details in this version are due to Tim Peters, building on many past _Py_dict_lookup() is general-purpose, and may return DKIX_ERROR if (and only if) a
contributions by Reimer Behrends, Jyrki Alakuijala, Vladimir Marangozov and
Christian Tismer.
lookdict() is general-purpose, and may return DKIX_ERROR if (and only if) a
comparison raises an exception. comparison raises an exception.
lookdict_unicode() below is specialized to string keys, comparison of which can When the key isn't found a DKIX_EMPTY is returned.
never raise an exception; that function can never return DKIX_ERROR when key
is string. Otherwise, it falls back to lookdict().
lookdict_unicode_nodummy is further specialized for string keys that cannot be
the <dummy> value.
For both, when the key isn't found a DKIX_EMPTY is returned.
*/ */
static Py_ssize_t _Py_HOT_FUNCTION Py_ssize_t _Py_HOT_FUNCTION
lookdict(PyDictObject *mp, PyObject *key, _Py_dict_lookup(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject **value_addr)
Py_hash_t hash, PyObject **value_addr)
{ {
size_t i, mask, perturb;
PyDictKeysObject *dk; PyDictKeysObject *dk;
PyDictKeyEntry *ep0; start:
top:
dk = mp->ma_keys; dk = mp->ma_keys;
ep0 = DK_ENTRIES(dk); DictKeysKind kind = dk->dk_kind;
mask = DK_MASK(dk); PyDictKeyEntry *ep0 = DK_ENTRIES(dk);
perturb = hash; size_t mask = DK_MASK(dk);
i = (size_t)hash & mask; size_t perturb = hash;
size_t i = (size_t)hash & mask;
Py_ssize_t ix;
if (PyUnicode_CheckExact(key) && kind != DICT_KEYS_GENERAL) {
/* Strings only */
for (;;) {
ix = dictkeys_get_index(mp->ma_keys, i);
if (ix >= 0) {
PyDictKeyEntry *ep = &ep0[ix];
assert(ep->me_key != NULL);
assert(PyUnicode_CheckExact(ep->me_key));
if (ep->me_key == key ||
(ep->me_hash == hash && unicode_eq(ep->me_key, key))) {
goto found;
}
}
else if (ix == DKIX_EMPTY) {
*value_addr = NULL;
return DKIX_EMPTY;
}
perturb >>= PERTURB_SHIFT;
i = mask & (i*5 + perturb + 1);
ix = dictkeys_get_index(mp->ma_keys, i);
if (ix >= 0) {
PyDictKeyEntry *ep = &ep0[ix];
assert(ep->me_key != NULL);
assert(PyUnicode_CheckExact(ep->me_key));
if (ep->me_key == key ||
(ep->me_hash == hash && unicode_eq(ep->me_key, key))) {
goto found;
}
}
else if (ix == DKIX_EMPTY) {
*value_addr = NULL;
return DKIX_EMPTY;
}
perturb >>= PERTURB_SHIFT;
i = mask & (i*5 + perturb + 1);
}
Py_UNREACHABLE();
}
for (;;) { for (;;) {
Py_ssize_t ix = dictkeys_get_index(dk, i); ix = dictkeys_get_index(dk, i);
if (ix == DKIX_EMPTY) { if (ix == DKIX_EMPTY) {
*value_addr = NULL; *value_addr = NULL;
return ix; return ix;
@ -816,8 +829,7 @@ top:
PyDictKeyEntry *ep = &ep0[ix]; PyDictKeyEntry *ep = &ep0[ix];
assert(ep->me_key != NULL); assert(ep->me_key != NULL);
if (ep->me_key == key) { if (ep->me_key == key) {
*value_addr = ep->me_value; goto found;
return ix;
} }
if (ep->me_hash == hash) { if (ep->me_hash == hash) {
PyObject *startkey = ep->me_key; PyObject *startkey = ep->me_key;
@ -830,13 +842,12 @@ top:
} }
if (dk == mp->ma_keys && ep->me_key == startkey) { if (dk == mp->ma_keys && ep->me_key == startkey) {
if (cmp > 0) { if (cmp > 0) {
*value_addr = ep->me_value; goto found;
return ix;
} }
} }
else { else {
/* The dict was mutated, restart */ /* The dict was mutated, restart */
goto top; goto start;
} }
} }
} }
@ -844,133 +855,14 @@ top:
i = (i*5 + perturb + 1) & mask; i = (i*5 + perturb + 1) & mask;
} }
Py_UNREACHABLE(); Py_UNREACHABLE();
} found:
if (dk->dk_kind == DICT_KEYS_SPLIT) {
/* Specialized version for string-only keys */ *value_addr = mp->ma_values[ix];
static Py_ssize_t _Py_HOT_FUNCTION
lookdict_unicode(PyDictObject *mp, PyObject *key,
Py_hash_t hash, PyObject **value_addr)
{
assert(mp->ma_values == NULL);
/* Make sure this function doesn't have to handle non-unicode keys,
including subclasses of str; e.g., one reason to subclass
unicodes is to override __eq__, and for speed we don't cater to
that here. */
if (!PyUnicode_CheckExact(key)) {
return lookdict(mp, key, hash, value_addr);
} }
else {
PyDictKeyEntry *ep0 = DK_ENTRIES(mp->ma_keys); *value_addr = ep0[ix].me_value;
size_t mask = DK_MASK(mp->ma_keys);
size_t perturb = (size_t)hash;
size_t i = (size_t)hash & mask;
for (;;) {
Py_ssize_t ix = dictkeys_get_index(mp->ma_keys, i);
if (ix == DKIX_EMPTY) {
*value_addr = NULL;
return DKIX_EMPTY;
}
if (ix >= 0) {
PyDictKeyEntry *ep = &ep0[ix];
assert(ep->me_key != NULL);
assert(PyUnicode_CheckExact(ep->me_key));
if (ep->me_key == key ||
(ep->me_hash == hash && unicode_eq(ep->me_key, key))) {
*value_addr = ep->me_value;
return ix;
}
}
perturb >>= PERTURB_SHIFT;
i = mask & (i*5 + perturb + 1);
} }
Py_UNREACHABLE(); return ix;
}
/* Faster version of lookdict_unicode when it is known that no <dummy> keys
* will be present. */
static Py_ssize_t _Py_HOT_FUNCTION
lookdict_unicode_nodummy(PyDictObject *mp, PyObject *key,
Py_hash_t hash, PyObject **value_addr)
{
assert(mp->ma_values == NULL);
/* Make sure this function doesn't have to handle non-unicode keys,
including subclasses of str; e.g., one reason to subclass
unicodes is to override __eq__, and for speed we don't cater to
that here. */
if (!PyUnicode_CheckExact(key)) {
return lookdict(mp, key, hash, value_addr);
}
PyDictKeyEntry *ep0 = DK_ENTRIES(mp->ma_keys);
size_t mask = DK_MASK(mp->ma_keys);
size_t perturb = (size_t)hash;
size_t i = (size_t)hash & mask;
for (;;) {
Py_ssize_t ix = dictkeys_get_index(mp->ma_keys, i);
assert (ix != DKIX_DUMMY);
if (ix == DKIX_EMPTY) {
*value_addr = NULL;
return DKIX_EMPTY;
}
PyDictKeyEntry *ep = &ep0[ix];
assert(ep->me_key != NULL);
assert(PyUnicode_CheckExact(ep->me_key));
if (ep->me_key == key ||
(ep->me_hash == hash && unicode_eq(ep->me_key, key))) {
*value_addr = ep->me_value;
return ix;
}
perturb >>= PERTURB_SHIFT;
i = mask & (i*5 + perturb + 1);
}
Py_UNREACHABLE();
}
/* Version of lookdict for split tables.
* All split tables and only split tables use this lookup function.
* Split tables only contain unicode keys and no dummy keys,
* so algorithm is the same as lookdict_unicode_nodummy.
*/
static Py_ssize_t _Py_HOT_FUNCTION
lookdict_split(PyDictObject *mp, PyObject *key,
Py_hash_t hash, PyObject **value_addr)
{
/* mp must split table */
assert(mp->ma_values != NULL);
if (!PyUnicode_CheckExact(key)) {
Py_ssize_t ix = lookdict(mp, key, hash, value_addr);
if (ix >= 0) {
*value_addr = mp->ma_values[ix];
}
return ix;
}
PyDictKeyEntry *ep0 = DK_ENTRIES(mp->ma_keys);
size_t mask = DK_MASK(mp->ma_keys);
size_t perturb = (size_t)hash;
size_t i = (size_t)hash & mask;
for (;;) {
Py_ssize_t ix = dictkeys_get_index(mp->ma_keys, i);
assert (ix != DKIX_DUMMY);
if (ix == DKIX_EMPTY) {
*value_addr = NULL;
return DKIX_EMPTY;
}
PyDictKeyEntry *ep = &ep0[ix];
assert(ep->me_key != NULL);
assert(PyUnicode_CheckExact(ep->me_key));
if (ep->me_key == key ||
(ep->me_hash == hash && unicode_eq(ep->me_key, key))) {
*value_addr = mp->ma_values[ix];
return ix;
}
perturb >>= PERTURB_SHIFT;
i = mask & (i*5 + perturb + 1);
}
Py_UNREACHABLE();
} }
int int
@ -980,7 +872,7 @@ _PyDict_HasOnlyStringKeys(PyObject *dict)
PyObject *key, *value; PyObject *key, *value;
assert(PyDict_Check(dict)); assert(PyDict_Check(dict));
/* Shortcut */ /* Shortcut */
if (((PyDictObject *)dict)->ma_keys->dk_lookup != lookdict) if (((PyDictObject *)dict)->ma_keys->dk_kind != DICT_KEYS_GENERAL)
return 1; return 1;
while (PyDict_Next(dict, &pos, &key, &value)) while (PyDict_Next(dict, &pos, &key, &value))
if (!PyUnicode_Check(key)) if (!PyUnicode_Check(key))
@ -1057,7 +949,7 @@ find_empty_slot(PyDictKeysObject *keys, Py_hash_t hash)
static int static int
insertion_resize(PyDictObject *mp) insertion_resize(PyDictObject *mp)
{ {
return dictresize(mp, calculate_keysize(GROWTH_RATE(mp))); return dictresize(mp, calculate_log2_keysize(GROWTH_RATE(mp)));
} }
/* /*
@ -1078,7 +970,7 @@ insertdict(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *value)
goto Fail; goto Fail;
} }
Py_ssize_t ix = mp->ma_keys->dk_lookup(mp, key, hash, &old_value); Py_ssize_t ix = _Py_dict_lookup(mp, key, hash, &old_value);
if (ix == DKIX_ERROR) if (ix == DKIX_ERROR)
goto Fail; goto Fail;
@ -1097,14 +989,15 @@ insertdict(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *value)
if (ix == DKIX_EMPTY) { if (ix == DKIX_EMPTY) {
/* Insert into new slot. */ /* Insert into new slot. */
mp->ma_keys->dk_version = 0;
assert(old_value == NULL); assert(old_value == NULL);
if (mp->ma_keys->dk_usable <= 0) { if (mp->ma_keys->dk_usable <= 0) {
/* Need to resize. */ /* Need to resize. */
if (insertion_resize(mp) < 0) if (insertion_resize(mp) < 0)
goto Fail; goto Fail;
} }
if (!PyUnicode_CheckExact(key) && mp->ma_keys->dk_lookup != lookdict) { if (!PyUnicode_CheckExact(key) && mp->ma_keys->dk_kind != DICT_KEYS_GENERAL) {
mp->ma_keys->dk_lookup = lookdict; mp->ma_keys->dk_kind = DICT_KEYS_GENERAL;
} }
Py_ssize_t hashpos = find_empty_slot(mp->ma_keys, hash); Py_ssize_t hashpos = find_empty_slot(mp->ma_keys, hash);
ep = &DK_ENTRIES(mp->ma_keys)[mp->ma_keys->dk_nentries]; ep = &DK_ENTRIES(mp->ma_keys)[mp->ma_keys->dk_nentries];
@ -1160,12 +1053,12 @@ insert_to_emptydict(PyDictObject *mp, PyObject *key, Py_hash_t hash,
{ {
assert(mp->ma_keys == Py_EMPTY_KEYS); assert(mp->ma_keys == Py_EMPTY_KEYS);
PyDictKeysObject *newkeys = new_keys_object(PyDict_MINSIZE); PyDictKeysObject *newkeys = new_keys_object(PyDict_LOG_MINSIZE);
if (newkeys == NULL) { if (newkeys == NULL) {
return -1; return -1;
} }
if (!PyUnicode_CheckExact(key)) { if (!PyUnicode_CheckExact(key)) {
newkeys->dk_lookup = lookdict; newkeys->dk_kind = DICT_KEYS_GENERAL;
} }
dictkeys_decref(Py_EMPTY_KEYS); dictkeys_decref(Py_EMPTY_KEYS);
mp->ma_keys = newkeys; mp->ma_keys = newkeys;
@ -1217,19 +1110,18 @@ After resizing a table is always combined,
but can be resplit by make_keys_shared(). but can be resplit by make_keys_shared().
*/ */
static int static int
dictresize(PyDictObject *mp, Py_ssize_t newsize) dictresize(PyDictObject *mp, uint8_t log2_newsize)
{ {
Py_ssize_t numentries; Py_ssize_t numentries;
PyDictKeysObject *oldkeys; PyDictKeysObject *oldkeys;
PyObject **oldvalues; PyObject **oldvalues;
PyDictKeyEntry *oldentries, *newentries; PyDictKeyEntry *oldentries, *newentries;
if (newsize <= 0) { if (log2_newsize >= SIZEOF_SIZE_T*8) {
PyErr_NoMemory(); PyErr_NoMemory();
return -1; return -1;
} }
assert(IS_POWER_OF_2(newsize)); assert(log2_newsize >= PyDict_LOG_MINSIZE);
assert(newsize >= PyDict_MINSIZE);
oldkeys = mp->ma_keys; oldkeys = mp->ma_keys;
@ -1239,15 +1131,15 @@ dictresize(PyDictObject *mp, Py_ssize_t newsize)
*/ */
/* Allocate a new table. */ /* Allocate a new table. */
mp->ma_keys = new_keys_object(newsize); mp->ma_keys = new_keys_object(log2_newsize);
if (mp->ma_keys == NULL) { if (mp->ma_keys == NULL) {
mp->ma_keys = oldkeys; mp->ma_keys = oldkeys;
return -1; return -1;
} }
// New table must be large enough. // New table must be large enough.
assert(mp->ma_keys->dk_usable >= mp->ma_used); assert(mp->ma_keys->dk_usable >= mp->ma_used);
if (oldkeys->dk_lookup == lookdict) if (oldkeys->dk_kind == DICT_KEYS_GENERAL)
mp->ma_keys->dk_lookup = lookdict; mp->ma_keys->dk_kind = DICT_KEYS_GENERAL;
numentries = mp->ma_used; numentries = mp->ma_used;
oldentries = DK_ENTRIES(oldkeys); oldentries = DK_ENTRIES(oldkeys);
@ -1287,7 +1179,7 @@ dictresize(PyDictObject *mp, Py_ssize_t newsize)
} }
} }
assert(oldkeys->dk_lookup != lookdict_split); assert(oldkeys->dk_kind != DICT_KEYS_SPLIT);
assert(oldkeys->dk_refcnt == 1); assert(oldkeys->dk_refcnt == 1);
#ifdef Py_REF_DEBUG #ifdef Py_REF_DEBUG
_Py_RefTotal--; _Py_RefTotal--;
@ -1297,7 +1189,7 @@ dictresize(PyDictObject *mp, Py_ssize_t newsize)
// dictresize() must not be called after _PyDict_Fini() // dictresize() must not be called after _PyDict_Fini()
assert(state->keys_numfree != -1); assert(state->keys_numfree != -1);
#endif #endif
if (oldkeys->dk_size == PyDict_MINSIZE && if (DK_SIZE(oldkeys) == PyDict_MINSIZE &&
state->keys_numfree < PyDict_MAXFREELIST) state->keys_numfree < PyDict_MAXFREELIST)
{ {
state->keys_free_list[state->keys_numfree++] = oldkeys; state->keys_free_list[state->keys_numfree++] = oldkeys;
@ -1328,15 +1220,15 @@ make_keys_shared(PyObject *op)
PyDictKeyEntry *ep0; PyDictKeyEntry *ep0;
PyObject **values; PyObject **values;
assert(mp->ma_keys->dk_refcnt == 1); assert(mp->ma_keys->dk_refcnt == 1);
if (mp->ma_keys->dk_lookup == lookdict) { if (mp->ma_keys->dk_kind == DICT_KEYS_GENERAL) {
return NULL; return NULL;
} }
else if (mp->ma_keys->dk_lookup == lookdict_unicode) { else if (mp->ma_used > mp->ma_keys->dk_nentries) {
/* Remove dummy keys */ /* Remove dummy keys */
if (dictresize(mp, DK_SIZE(mp->ma_keys))) if (dictresize(mp, DK_LOG_SIZE(mp->ma_keys)))
return NULL; return NULL;
} }
assert(mp->ma_keys->dk_lookup == lookdict_unicode_nodummy); assert(mp->ma_used == mp->ma_keys->dk_nentries);
/* Copy values into a new array */ /* Copy values into a new array */
ep0 = DK_ENTRIES(mp->ma_keys); ep0 = DK_ENTRIES(mp->ma_keys);
size = USABLE_FRACTION(DK_SIZE(mp->ma_keys)); size = USABLE_FRACTION(DK_SIZE(mp->ma_keys));
@ -1350,7 +1242,7 @@ make_keys_shared(PyObject *op)
values[i] = ep0[i].me_value; values[i] = ep0[i].me_value;
ep0[i].me_value = NULL; ep0[i].me_value = NULL;
} }
mp->ma_keys->dk_lookup = lookdict_split; mp->ma_keys->dk_kind = DICT_KEYS_SPLIT;
mp->ma_values = values; mp->ma_values = values;
} }
dictkeys_incref(mp->ma_keys); dictkeys_incref(mp->ma_keys);
@ -1360,8 +1252,9 @@ make_keys_shared(PyObject *op)
PyObject * PyObject *
_PyDict_NewPresized(Py_ssize_t minused) _PyDict_NewPresized(Py_ssize_t minused)
{ {
const Py_ssize_t max_presize = 128 * 1024; const uint8_t log2_max_presize = 17;
Py_ssize_t newsize; const Py_ssize_t max_presize = ((Py_ssize_t)1) << log2_max_presize;
uint8_t log2_newsize;
PyDictKeysObject *new_keys; PyDictKeysObject *new_keys;
if (minused <= USABLE_FRACTION(PyDict_MINSIZE)) { if (minused <= USABLE_FRACTION(PyDict_MINSIZE)) {
@ -1372,13 +1265,13 @@ _PyDict_NewPresized(Py_ssize_t minused)
* large dict or MemoryError. * large dict or MemoryError.
*/ */
if (minused > USABLE_FRACTION(max_presize)) { if (minused > USABLE_FRACTION(max_presize)) {
newsize = max_presize; log2_newsize = log2_max_presize;
} }
else { else {
newsize = estimate_keysize(minused); log2_newsize = estimate_log2_keysize(minused);
} }
new_keys = new_keys_object(newsize); new_keys = new_keys_object(log2_newsize);
if (new_keys == NULL) if (new_keys == NULL)
return NULL; return NULL;
return new_dict(new_keys, NULL); return new_dict(new_keys, NULL);
@ -1426,14 +1319,13 @@ PyDict_GetItem(PyObject *op, PyObject *key)
Py_ssize_t ix; Py_ssize_t ix;
_PyErr_Fetch(tstate, &exc_type, &exc_value, &exc_tb); _PyErr_Fetch(tstate, &exc_type, &exc_value, &exc_tb);
ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value); ix = _Py_dict_lookup(mp, key, hash, &value);
/* Ignore any exception raised by the lookup */ /* Ignore any exception raised by the lookup */
_PyErr_Restore(tstate, exc_type, exc_value, exc_tb); _PyErr_Restore(tstate, exc_type, exc_value, exc_tb);
if (ix < 0) {
return NULL; assert(ix >= 0 || value == NULL);
}
return value; return value;
} }
@ -1450,7 +1342,7 @@ _PyDict_GetItemHint(PyDictObject *mp, PyObject *key,
PyDictKeyEntry *ep = DK_ENTRIES(mp->ma_keys) + (size_t)hint; PyDictKeyEntry *ep = DK_ENTRIES(mp->ma_keys) + (size_t)hint;
if (ep->me_key == key) { if (ep->me_key == key) {
if (mp->ma_keys->dk_lookup == lookdict_split) { if (mp->ma_keys->dk_kind == DICT_KEYS_SPLIT) {
assert(mp->ma_values != NULL); assert(mp->ma_values != NULL);
res = mp->ma_values[(size_t)hint]; res = mp->ma_values[(size_t)hint];
} }
@ -1472,7 +1364,7 @@ _PyDict_GetItemHint(PyDictObject *mp, PyObject *key,
} }
} }
return (mp->ma_keys->dk_lookup)(mp, key, hash, value); return _Py_dict_lookup(mp, key, hash, value);
} }
/* Same as PyDict_GetItemWithError() but with hash supplied by caller. /* Same as PyDict_GetItemWithError() but with hash supplied by caller.
@ -1491,10 +1383,8 @@ _PyDict_GetItem_KnownHash(PyObject *op, PyObject *key, Py_hash_t hash)
return NULL; return NULL;
} }
ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value); ix = _Py_dict_lookup(mp, key, hash, &value);
if (ix < 0) { assert(ix >= 0 || value == NULL);
return NULL;
}
return value; return value;
} }
@ -1523,9 +1413,8 @@ PyDict_GetItemWithError(PyObject *op, PyObject *key)
} }
} }
ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value); ix = _Py_dict_lookup(mp, key, hash, &value);
if (ix < 0) assert(ix >= 0 || value == NULL);
return NULL;
return value; return value;
} }
@ -1577,16 +1466,15 @@ _PyDict_LoadGlobal(PyDictObject *globals, PyDictObject *builtins, PyObject *key)
} }
/* namespace 1: globals */ /* namespace 1: globals */
ix = globals->ma_keys->dk_lookup(globals, key, hash, &value); ix = _Py_dict_lookup(globals, key, hash, &value);
if (ix == DKIX_ERROR) if (ix == DKIX_ERROR)
return NULL; return NULL;
if (ix != DKIX_EMPTY && value != NULL) if (ix != DKIX_EMPTY && value != NULL)
return value; return value;
/* namespace 2: builtins */ /* namespace 2: builtins */
ix = builtins->ma_keys->dk_lookup(builtins, key, hash, &value); ix = _Py_dict_lookup(builtins, key, hash, &value);
if (ix < 0) assert(ix >= 0 || value == NULL);
return NULL;
return value; return value;
} }
@ -1659,7 +1547,7 @@ delitem_common(PyDictObject *mp, Py_hash_t hash, Py_ssize_t ix,
mp->ma_version_tag = DICT_NEXT_VERSION(); mp->ma_version_tag = DICT_NEXT_VERSION();
ep = &DK_ENTRIES(mp->ma_keys)[ix]; ep = &DK_ENTRIES(mp->ma_keys)[ix];
dictkeys_set_index(mp->ma_keys, hashpos, DKIX_DUMMY); dictkeys_set_index(mp->ma_keys, hashpos, DKIX_DUMMY);
ENSURE_ALLOWS_DELETIONS(mp); mp->ma_keys->dk_version = 0;
old_key = ep->me_key; old_key = ep->me_key;
ep->me_key = NULL; ep->me_key = NULL;
ep->me_value = NULL; ep->me_value = NULL;
@ -1699,7 +1587,7 @@ _PyDict_DelItem_KnownHash(PyObject *op, PyObject *key, Py_hash_t hash)
assert(key); assert(key);
assert(hash != -1); assert(hash != -1);
mp = (PyDictObject *)op; mp = (PyDictObject *)op;
ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &old_value); ix = _Py_dict_lookup(mp, key, hash, &old_value);
if (ix == DKIX_ERROR) if (ix == DKIX_ERROR)
return -1; return -1;
if (ix == DKIX_EMPTY || old_value == NULL) { if (ix == DKIX_EMPTY || old_value == NULL) {
@ -1709,10 +1597,10 @@ _PyDict_DelItem_KnownHash(PyObject *op, PyObject *key, Py_hash_t hash)
// Split table doesn't allow deletion. Combine it. // Split table doesn't allow deletion. Combine it.
if (_PyDict_HasSplitTable(mp)) { if (_PyDict_HasSplitTable(mp)) {
if (dictresize(mp, DK_SIZE(mp->ma_keys))) { if (dictresize(mp, DK_LOG_SIZE(mp->ma_keys))) {
return -1; return -1;
} }
ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &old_value); ix = _Py_dict_lookup(mp, key, hash, &old_value);
assert(ix >= 0); assert(ix >= 0);
} }
@ -1742,7 +1630,7 @@ _PyDict_DelItemIf(PyObject *op, PyObject *key,
if (hash == -1) if (hash == -1)
return -1; return -1;
mp = (PyDictObject *)op; mp = (PyDictObject *)op;
ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &old_value); ix = _Py_dict_lookup(mp, key, hash, &old_value);
if (ix == DKIX_ERROR) if (ix == DKIX_ERROR)
return -1; return -1;
if (ix == DKIX_EMPTY || old_value == NULL) { if (ix == DKIX_EMPTY || old_value == NULL) {
@ -1752,10 +1640,10 @@ _PyDict_DelItemIf(PyObject *op, PyObject *key,
// Split table doesn't allow deletion. Combine it. // Split table doesn't allow deletion. Combine it.
if (_PyDict_HasSplitTable(mp)) { if (_PyDict_HasSplitTable(mp)) {
if (dictresize(mp, DK_SIZE(mp->ma_keys))) { if (dictresize(mp, DK_LOG_SIZE(mp->ma_keys))) {
return -1; return -1;
} }
ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &old_value); ix = _Py_dict_lookup(mp, key, hash, &old_value);
assert(ix >= 0); assert(ix >= 0);
} }
@ -1902,7 +1790,7 @@ _PyDict_Pop_KnownHash(PyObject *dict, PyObject *key, Py_hash_t hash, PyObject *d
_PyErr_SetKeyError(key); _PyErr_SetKeyError(key);
return NULL; return NULL;
} }
ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &old_value); ix = _Py_dict_lookup(mp, key, hash, &old_value);
if (ix == DKIX_ERROR) if (ix == DKIX_ERROR)
return NULL; return NULL;
if (ix == DKIX_EMPTY || old_value == NULL) { if (ix == DKIX_EMPTY || old_value == NULL) {
@ -1916,10 +1804,10 @@ _PyDict_Pop_KnownHash(PyObject *dict, PyObject *key, Py_hash_t hash, PyObject *d
// Split table doesn't allow deletion. Combine it. // Split table doesn't allow deletion. Combine it.
if (_PyDict_HasSplitTable(mp)) { if (_PyDict_HasSplitTable(mp)) {
if (dictresize(mp, DK_SIZE(mp->ma_keys))) { if (dictresize(mp, DK_LOG_SIZE(mp->ma_keys))) {
return NULL; return NULL;
} }
ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &old_value); ix = _Py_dict_lookup(mp, key, hash, &old_value);
assert(ix >= 0); assert(ix >= 0);
} }
@ -1930,7 +1818,7 @@ _PyDict_Pop_KnownHash(PyObject *dict, PyObject *key, Py_hash_t hash, PyObject *d
mp->ma_version_tag = DICT_NEXT_VERSION(); mp->ma_version_tag = DICT_NEXT_VERSION();
dictkeys_set_index(mp->ma_keys, hashpos, DKIX_DUMMY); dictkeys_set_index(mp->ma_keys, hashpos, DKIX_DUMMY);
ep = &DK_ENTRIES(mp->ma_keys)[ix]; ep = &DK_ENTRIES(mp->ma_keys)[ix];
ENSURE_ALLOWS_DELETIONS(mp); mp->ma_keys->dk_version = 0;
old_key = ep->me_key; old_key = ep->me_key;
ep->me_key = NULL; ep->me_key = NULL;
ep->me_value = NULL; ep->me_value = NULL;
@ -1983,7 +1871,7 @@ _PyDict_FromKeys(PyObject *cls, PyObject *iterable, PyObject *value)
PyObject *key; PyObject *key;
Py_hash_t hash; Py_hash_t hash;
if (dictresize(mp, estimate_keysize(PyDict_GET_SIZE(iterable)))) { if (dictresize(mp, estimate_log2_keysize(PyDict_GET_SIZE(iterable)))) {
Py_DECREF(d); Py_DECREF(d);
return NULL; return NULL;
} }
@ -2002,7 +1890,7 @@ _PyDict_FromKeys(PyObject *cls, PyObject *iterable, PyObject *value)
PyObject *key; PyObject *key;
Py_hash_t hash; Py_hash_t hash;
if (dictresize(mp, estimate_keysize(PySet_GET_SIZE(iterable)))) { if (dictresize(mp, estimate_log2_keysize(PySet_GET_SIZE(iterable)))) {
Py_DECREF(d); Py_DECREF(d);
return NULL; return NULL;
} }
@ -2192,7 +2080,7 @@ dict_subscript(PyDictObject *mp, PyObject *key)
if (hash == -1) if (hash == -1)
return NULL; return NULL;
} }
ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value); ix = _Py_dict_lookup(mp, key, hash, &value);
if (ix == DKIX_ERROR) if (ix == DKIX_ERROR)
return NULL; return NULL;
if (ix == DKIX_EMPTY || value == NULL) { if (ix == DKIX_EMPTY || value == NULL) {
@ -2578,8 +2466,8 @@ dict_merge(PyObject *a, PyObject *b, int override)
// If other is clean, combined, and just allocated, just clone it. // If other is clean, combined, and just allocated, just clone it.
if (other->ma_values == NULL && if (other->ma_values == NULL &&
other->ma_used == okeys->dk_nentries && other->ma_used == okeys->dk_nentries &&
(okeys->dk_size == PyDict_MINSIZE || (DK_SIZE(okeys) == PyDict_MINSIZE ||
USABLE_FRACTION(okeys->dk_size/2) < other->ma_used)) { USABLE_FRACTION(DK_SIZE(okeys)/2) < other->ma_used)) {
PyDictKeysObject *keys = clone_combined_dict_keys(other); PyDictKeysObject *keys = clone_combined_dict_keys(other);
if (keys == NULL) { if (keys == NULL) {
return -1; return -1;
@ -2610,8 +2498,8 @@ dict_merge(PyObject *a, PyObject *b, int override)
* incrementally resizing as we insert new items. Expect * incrementally resizing as we insert new items. Expect
* that there will be no (or few) overlapping keys. * that there will be no (or few) overlapping keys.
*/ */
if (USABLE_FRACTION(mp->ma_keys->dk_size) < other->ma_used) { if (USABLE_FRACTION(DK_SIZE(mp->ma_keys)) < other->ma_used) {
if (dictresize(mp, estimate_keysize(mp->ma_used + other->ma_used))) { if (dictresize(mp, estimate_log2_keysize(mp->ma_used + other->ma_used))) {
return -1; return -1;
} }
} }
@ -2908,7 +2796,7 @@ dict_equal(PyDictObject *a, PyDictObject *b)
/* ditto for key */ /* ditto for key */
Py_INCREF(key); Py_INCREF(key);
/* reuse the known hash value */ /* reuse the known hash value */
b->ma_keys->dk_lookup(b, key, ep->me_hash, &bval); _Py_dict_lookup(b, key, ep->me_hash, &bval);
if (bval == NULL) { if (bval == NULL) {
Py_DECREF(key); Py_DECREF(key);
Py_DECREF(aval); Py_DECREF(aval);
@ -2975,7 +2863,7 @@ dict___contains__(PyDictObject *self, PyObject *key)
if (hash == -1) if (hash == -1)
return NULL; return NULL;
} }
ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value); ix = _Py_dict_lookup(mp, key, hash, &value);
if (ix == DKIX_ERROR) if (ix == DKIX_ERROR)
return NULL; return NULL;
if (ix == DKIX_EMPTY || value == NULL) if (ix == DKIX_EMPTY || value == NULL)
@ -3007,7 +2895,7 @@ dict_get_impl(PyDictObject *self, PyObject *key, PyObject *default_value)
if (hash == -1) if (hash == -1)
return NULL; return NULL;
} }
ix = (self->ma_keys->dk_lookup) (self, key, hash, &val); ix = _Py_dict_lookup(self, key, hash, &val);
if (ix == DKIX_ERROR) if (ix == DKIX_ERROR)
return NULL; return NULL;
if (ix == DKIX_EMPTY || val == NULL) { if (ix == DKIX_EMPTY || val == NULL) {
@ -3047,7 +2935,7 @@ PyDict_SetDefault(PyObject *d, PyObject *key, PyObject *defaultobj)
return NULL; return NULL;
} }
Py_ssize_t ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value); Py_ssize_t ix = _Py_dict_lookup(mp, key, hash, &value);
if (ix == DKIX_ERROR) if (ix == DKIX_ERROR)
return NULL; return NULL;
@ -3061,6 +2949,7 @@ PyDict_SetDefault(PyObject *d, PyObject *key, PyObject *defaultobj)
} }
if (ix == DKIX_EMPTY) { if (ix == DKIX_EMPTY) {
mp->ma_keys->dk_version = 0;
PyDictKeyEntry *ep, *ep0; PyDictKeyEntry *ep, *ep0;
value = defaultobj; value = defaultobj;
if (mp->ma_keys->dk_usable <= 0) { if (mp->ma_keys->dk_usable <= 0) {
@ -3068,8 +2957,8 @@ PyDict_SetDefault(PyObject *d, PyObject *key, PyObject *defaultobj)
return NULL; return NULL;
} }
} }
if (!PyUnicode_CheckExact(key) && mp->ma_keys->dk_lookup != lookdict) { if (!PyUnicode_CheckExact(key) && mp->ma_keys->dk_kind != DICT_KEYS_GENERAL) {
mp->ma_keys->dk_lookup = lookdict; mp->ma_keys->dk_kind = DICT_KEYS_GENERAL;
} }
Py_ssize_t hashpos = find_empty_slot(mp->ma_keys, hash); Py_ssize_t hashpos = find_empty_slot(mp->ma_keys, hash);
ep0 = DK_ENTRIES(mp->ma_keys); ep0 = DK_ENTRIES(mp->ma_keys);
@ -3194,13 +3083,13 @@ dict_popitem_impl(PyDictObject *self)
return NULL; return NULL;
} }
/* Convert split table to combined table */ /* Convert split table to combined table */
if (self->ma_keys->dk_lookup == lookdict_split) { if (self->ma_keys->dk_kind == DICT_KEYS_SPLIT) {
if (dictresize(self, DK_SIZE(self->ma_keys))) { if (dictresize(self, DK_LOG_SIZE(self->ma_keys))) {
Py_DECREF(res); Py_DECREF(res);
return NULL; return NULL;
} }
} }
ENSURE_ALLOWS_DELETIONS(self); self->ma_keys->dk_version = 0;
/* Pop last item */ /* Pop last item */
ep0 = DK_ENTRIES(self->ma_keys); ep0 = DK_ENTRIES(self->ma_keys);
@ -3236,7 +3125,7 @@ dict_traverse(PyObject *op, visitproc visit, void *arg)
PyDictKeyEntry *entries = DK_ENTRIES(keys); PyDictKeyEntry *entries = DK_ENTRIES(keys);
Py_ssize_t i, n = keys->dk_nentries; Py_ssize_t i, n = keys->dk_nentries;
if (keys->dk_lookup == lookdict) { if (keys->dk_kind == DICT_KEYS_GENERAL) {
for (i = 0; i < n; i++) { for (i = 0; i < n; i++) {
if (entries[i].me_value != NULL) { if (entries[i].me_value != NULL) {
Py_VISIT(entries[i].me_value); Py_VISIT(entries[i].me_value);
@ -3401,7 +3290,7 @@ PyDict_Contains(PyObject *op, PyObject *key)
if (hash == -1) if (hash == -1)
return -1; return -1;
} }
ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value); ix = _Py_dict_lookup(mp, key, hash, &value);
if (ix == DKIX_ERROR) if (ix == DKIX_ERROR)
return -1; return -1;
return (ix != DKIX_EMPTY && value != NULL); return (ix != DKIX_EMPTY && value != NULL);
@ -3415,7 +3304,7 @@ _PyDict_Contains_KnownHash(PyObject *op, PyObject *key, Py_hash_t hash)
PyObject *value; PyObject *value;
Py_ssize_t ix; Py_ssize_t ix;
ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value); ix = _Py_dict_lookup(mp, key, hash, &value);
if (ix == DKIX_ERROR) if (ix == DKIX_ERROR)
return -1; return -1;
return (ix != DKIX_EMPTY && value != NULL); return (ix != DKIX_EMPTY && value != NULL);
@ -4974,12 +4863,12 @@ dictvalues_reversed(_PyDictViewObject *dv, PyObject *Py_UNUSED(ignored))
PyDictKeysObject * PyDictKeysObject *
_PyDict_NewKeysForClass(void) _PyDict_NewKeysForClass(void)
{ {
PyDictKeysObject *keys = new_keys_object(PyDict_MINSIZE); PyDictKeysObject *keys = new_keys_object(PyDict_LOG_MINSIZE);
if (keys == NULL) { if (keys == NULL) {
PyErr_Clear(); PyErr_Clear();
} }
else { else {
keys->dk_lookup = lookdict_split; keys->dk_kind = DICT_KEYS_SPLIT;
} }
return keys; return keys;
} }
@ -5091,3 +4980,18 @@ _PyDictKeys_DecRef(PyDictKeysObject *keys)
{ {
dictkeys_decref(keys); dictkeys_decref(keys);
} }
static uint32_t next_dict_keys_version = 2;
uint32_t _PyDictKeys_GetVersionForCurrentState(PyDictObject *dict)
{
if (dict->ma_keys->dk_version != 0) {
return dict->ma_keys->dk_version;
}
if (next_dict_keys_version == 0) {
return 0;
}
uint32_t v = next_dict_keys_version++;
dict->ma_keys->dk_version = v;
return v;
}

View File

@ -40,9 +40,9 @@ we've considered:
The approach with the least performance impact (time and space) is #2, The approach with the least performance impact (time and space) is #2,
mirroring the key order of dict's dk_entries with an array of node pointers. mirroring the key order of dict's dk_entries with an array of node pointers.
While lookdict() and friends (dk_lookup) don't give us the index into the While _Py_dict_lookup() does not give us the index into the array,
array, we make use of pointer arithmetic to get that index. An alternative we make use of pointer arithmetic to get that index. An alternative would
would be to refactor lookdict() to provide the index, explicitly exposing be to refactor _Py_dict_lookup() to provide the index, explicitly exposing
the implementation detail. We could even just use a custom lookup function the implementation detail. We could even just use a custom lookup function
for OrderedDict that facilitates our need. However, both approaches are for OrderedDict that facilitates our need. However, both approaches are
significantly more complicated than just using pointer arithmetic. significantly more complicated than just using pointer arithmetic.
@ -535,7 +535,7 @@ _odict_get_index_raw(PyODictObject *od, PyObject *key, Py_hash_t hash)
PyDictKeysObject *keys = ((PyDictObject *)od)->ma_keys; PyDictKeysObject *keys = ((PyDictObject *)od)->ma_keys;
Py_ssize_t ix; Py_ssize_t ix;
ix = (keys->dk_lookup)((PyDictObject *)od, key, hash, &value); ix = _Py_dict_lookup((PyDictObject *)od, key, hash, &value);
if (ix == DKIX_EMPTY) { if (ix == DKIX_EMPTY) {
return keys->dk_nentries; /* index of new entry */ return keys->dk_nentries; /* index of new entry */
} }
@ -553,7 +553,7 @@ _odict_resize(PyODictObject *od)
_ODictNode **fast_nodes, *node; _ODictNode **fast_nodes, *node;
/* Initialize a new "fast nodes" table. */ /* Initialize a new "fast nodes" table. */
size = ((PyDictObject *)od)->ma_keys->dk_size; size = 1 << (((PyDictObject *)od)->ma_keys->dk_log2_size);
fast_nodes = PyMem_NEW(_ODictNode *, size); fast_nodes = PyMem_NEW(_ODictNode *, size);
if (fast_nodes == NULL) { if (fast_nodes == NULL) {
PyErr_NoMemory(); PyErr_NoMemory();
@ -592,7 +592,7 @@ _odict_get_index(PyODictObject *od, PyObject *key, Py_hash_t hash)
/* Ensure od_fast_nodes and dk_entries are in sync. */ /* Ensure od_fast_nodes and dk_entries are in sync. */
if (od->od_resize_sentinel != keys || if (od->od_resize_sentinel != keys ||
od->od_fast_nodes_size != keys->dk_size) { od->od_fast_nodes_size != (1 << (keys->dk_log2_size))) {
int resize_res = _odict_resize(od); int resize_res = _odict_resize(od);
if (resize_res < 0) if (resize_res < 0)
return -1; return -1;

View File

@ -730,7 +730,7 @@ class PyDictObjectPtr(PyObjectPtr):
def _get_entries(self, keys): def _get_entries(self, keys):
dk_nentries = int(keys['dk_nentries']) dk_nentries = int(keys['dk_nentries'])
dk_size = int(keys['dk_size']) dk_size = 1<<int(keys['dk_log2_size'])
try: try:
# <= Python 3.5 # <= Python 3.5
return keys['dk_entries'], dk_size return keys['dk_entries'], dk_size