bpo-41493: Refactoring dictresize (GH-21751)

Split newsize calculation into new function. dictresize() now accepts exact newsize.
This commit is contained in:
Inada Naoki 2020-08-07 14:08:55 +09:00 committed by GitHub
parent 5f0769a752
commit d9323a8c6e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 41 additions and 26 deletions

View File

@ -111,6 +111,7 @@ converting the dict to the combined table.
#define PyDict_MINSIZE 8 #define PyDict_MINSIZE 8
#include "Python.h" #include "Python.h"
#include "pycore_bitutils.h" // _Py_bit_length
#include "pycore_gc.h" // _PyObject_GC_IS_TRACKED() #include "pycore_gc.h" // _PyObject_GC_IS_TRACKED()
#include "pycore_object.h" // _PyObject_GC_TRACK() #include "pycore_object.h" // _PyObject_GC_TRACK()
#include "pycore_pyerrors.h" // _PyErr_Fetch() #include "pycore_pyerrors.h" // _PyErr_Fetch()
@ -236,7 +237,7 @@ lookdict_unicode_nodummy(PyDictObject *mp, PyObject *key,
static Py_ssize_t lookdict_split(PyDictObject *mp, PyObject *key, static Py_ssize_t lookdict_split(PyDictObject *mp, PyObject *key,
Py_hash_t hash, PyObject **value_addr); Py_hash_t hash, PyObject **value_addr);
static int dictresize(PyDictObject *mp, Py_ssize_t minused); static int dictresize(PyDictObject *mp, Py_ssize_t newsize);
static PyObject* dict_iter(PyDictObject *dict); static PyObject* dict_iter(PyDictObject *dict);
@ -411,18 +412,40 @@ dictkeys_set_index(PyDictKeysObject *keys, Py_ssize_t i, Py_ssize_t ix)
*/ */
#define USABLE_FRACTION(n) (((n) << 1)/3) #define USABLE_FRACTION(n) (((n) << 1)/3)
/* ESTIMATE_SIZE is reverse function of USABLE_FRACTION. /* Find the smallest dk_size >= minsize. */
static inline Py_ssize_t
calculate_keysize(Py_ssize_t minsize)
{
#if SIZEOF_LONG == SIZEOF_SIZE_T
minsize = (minsize | PyDict_MINSIZE) - 1;
return 1LL << _Py_bit_length(minsize | (PyDict_MINSIZE-1));
#elif defined(_MSC_VER)
// On 64bit Windows, sizeof(long) == 4.
minsize = (minsize | PyDict_MINSIZE) - 1;
unsigned long msb;
_BitScanReverse64(&msb, (uint64_t)minsize);
return 1LL << (msb + 1);
#else
Py_ssize_t size;
for (size = PyDict_MINSIZE;
size < minsize && size > 0;
size <<= 1)
;
return size;
#endif
}
/* estimate_keysize is reverse function of USABLE_FRACTION.
*
* This can be used to reserve enough size to insert n entries without * This can be used to reserve enough size to insert n entries without
* resizing. * resizing.
*/ */
#define ESTIMATE_SIZE(n) (((n)*3+1) >> 1) static inline Py_ssize_t
estimate_keysize(Py_ssize_t n)
{
return calculate_keysize((n*3 + 1) / 2);
}
/* Alternative fraction that is otherwise close enough to 2n/3 to make
* little difference. 8 * 2/3 == 8 * 5/8 == 5. 16 * 2/3 == 16 * 5/8 == 10.
* 32 * 2/3 = 21, 32 * 5/8 = 20.
* Its advantage is that it is faster to compute on machines with slow division.
* #define USABLE_FRACTION(n) (((n) >> 1) + ((n) >> 2) - ((n) >> 3))
*/
/* GROWTH_RATE. Growth rate upon hitting maximum load. /* GROWTH_RATE. Growth rate upon hitting maximum load.
* Currently set to used*3. * Currently set to used*3.
@ -1036,7 +1059,7 @@ find_empty_slot(PyDictKeysObject *keys, Py_hash_t hash)
static int static int
insertion_resize(PyDictObject *mp) insertion_resize(PyDictObject *mp)
{ {
return dictresize(mp, GROWTH_RATE(mp)); return dictresize(mp, calculate_keysize(GROWTH_RATE(mp)));
} }
/* /*
@ -1194,22 +1217,19 @@ After resizing a table is always combined,
but can be resplit by make_keys_shared(). but can be resplit by make_keys_shared().
*/ */
static int static int
dictresize(PyDictObject *mp, Py_ssize_t minsize) dictresize(PyDictObject *mp, Py_ssize_t newsize)
{ {
Py_ssize_t newsize, numentries; Py_ssize_t numentries;
PyDictKeysObject *oldkeys; PyDictKeysObject *oldkeys;
PyObject **oldvalues; PyObject **oldvalues;
PyDictKeyEntry *oldentries, *newentries; PyDictKeyEntry *oldentries, *newentries;
/* Find the smallest table size > minused. */
for (newsize = PyDict_MINSIZE;
newsize < minsize && newsize > 0;
newsize <<= 1)
;
if (newsize <= 0) { if (newsize <= 0) {
PyErr_NoMemory(); PyErr_NoMemory();
return -1; return -1;
} }
assert(IS_POWER_OF_2(newsize));
assert(newsize >= PyDict_MINSIZE);
oldkeys = mp->ma_keys; oldkeys = mp->ma_keys;
@ -1355,13 +1375,8 @@ _PyDict_NewPresized(Py_ssize_t minused)
newsize = max_presize; newsize = max_presize;
} }
else { else {
Py_ssize_t minsize = ESTIMATE_SIZE(minused); newsize = estimate_keysize(minused);
newsize = PyDict_MINSIZE*2;
while (newsize < minsize) {
newsize <<= 1;
}
} }
assert(IS_POWER_OF_2(newsize));
new_keys = new_keys_object(newsize); new_keys = new_keys_object(newsize);
if (new_keys == NULL) if (new_keys == NULL)
@ -1930,7 +1945,7 @@ _PyDict_FromKeys(PyObject *cls, PyObject *iterable, PyObject *value)
PyObject *key; PyObject *key;
Py_hash_t hash; Py_hash_t hash;
if (dictresize(mp, ESTIMATE_SIZE(PyDict_GET_SIZE(iterable)))) { if (dictresize(mp, estimate_keysize(PyDict_GET_SIZE(iterable)))) {
Py_DECREF(d); Py_DECREF(d);
return NULL; return NULL;
} }
@ -1949,7 +1964,7 @@ _PyDict_FromKeys(PyObject *cls, PyObject *iterable, PyObject *value)
PyObject *key; PyObject *key;
Py_hash_t hash; Py_hash_t hash;
if (dictresize(mp, ESTIMATE_SIZE(PySet_GET_SIZE(iterable)))) { if (dictresize(mp, estimate_keysize(PySet_GET_SIZE(iterable)))) {
Py_DECREF(d); Py_DECREF(d);
return NULL; return NULL;
} }
@ -2558,7 +2573,7 @@ dict_merge(PyObject *a, PyObject *b, int override)
* that there will be no (or few) overlapping keys. * that there will be no (or few) overlapping keys.
*/ */
if (USABLE_FRACTION(mp->ma_keys->dk_size) < other->ma_used) { if (USABLE_FRACTION(mp->ma_keys->dk_size) < other->ma_used) {
if (dictresize(mp, ESTIMATE_SIZE(mp->ma_used + other->ma_used))) { if (dictresize(mp, estimate_keysize(mp->ma_used + other->ma_used))) {
return -1; return -1;
} }
} }