bpo-41493: Refactoring dictresize (GH-21751)
Split newsize calculation into new function. dictresize() now accepts exact newsize.
This commit is contained in:
parent
5f0769a752
commit
d9323a8c6e
|
@ -111,6 +111,7 @@ converting the dict to the combined table.
|
||||||
#define PyDict_MINSIZE 8
|
#define PyDict_MINSIZE 8
|
||||||
|
|
||||||
#include "Python.h"
|
#include "Python.h"
|
||||||
|
#include "pycore_bitutils.h" // _Py_bit_length
|
||||||
#include "pycore_gc.h" // _PyObject_GC_IS_TRACKED()
|
#include "pycore_gc.h" // _PyObject_GC_IS_TRACKED()
|
||||||
#include "pycore_object.h" // _PyObject_GC_TRACK()
|
#include "pycore_object.h" // _PyObject_GC_TRACK()
|
||||||
#include "pycore_pyerrors.h" // _PyErr_Fetch()
|
#include "pycore_pyerrors.h" // _PyErr_Fetch()
|
||||||
|
@ -236,7 +237,7 @@ lookdict_unicode_nodummy(PyDictObject *mp, PyObject *key,
|
||||||
static Py_ssize_t lookdict_split(PyDictObject *mp, PyObject *key,
|
static Py_ssize_t lookdict_split(PyDictObject *mp, PyObject *key,
|
||||||
Py_hash_t hash, PyObject **value_addr);
|
Py_hash_t hash, PyObject **value_addr);
|
||||||
|
|
||||||
static int dictresize(PyDictObject *mp, Py_ssize_t minused);
|
static int dictresize(PyDictObject *mp, Py_ssize_t newsize);
|
||||||
|
|
||||||
static PyObject* dict_iter(PyDictObject *dict);
|
static PyObject* dict_iter(PyDictObject *dict);
|
||||||
|
|
||||||
|
@ -411,18 +412,40 @@ dictkeys_set_index(PyDictKeysObject *keys, Py_ssize_t i, Py_ssize_t ix)
|
||||||
*/
|
*/
|
||||||
#define USABLE_FRACTION(n) (((n) << 1)/3)
|
#define USABLE_FRACTION(n) (((n) << 1)/3)
|
||||||
|
|
||||||
/* ESTIMATE_SIZE is reverse function of USABLE_FRACTION.
|
/* Find the smallest dk_size >= minsize. */
|
||||||
|
static inline Py_ssize_t
|
||||||
|
calculate_keysize(Py_ssize_t minsize)
|
||||||
|
{
|
||||||
|
#if SIZEOF_LONG == SIZEOF_SIZE_T
|
||||||
|
minsize = (minsize | PyDict_MINSIZE) - 1;
|
||||||
|
return 1LL << _Py_bit_length(minsize | (PyDict_MINSIZE-1));
|
||||||
|
#elif defined(_MSC_VER)
|
||||||
|
// On 64bit Windows, sizeof(long) == 4.
|
||||||
|
minsize = (minsize | PyDict_MINSIZE) - 1;
|
||||||
|
unsigned long msb;
|
||||||
|
_BitScanReverse64(&msb, (uint64_t)minsize);
|
||||||
|
return 1LL << (msb + 1);
|
||||||
|
#else
|
||||||
|
Py_ssize_t size;
|
||||||
|
for (size = PyDict_MINSIZE;
|
||||||
|
size < minsize && size > 0;
|
||||||
|
size <<= 1)
|
||||||
|
;
|
||||||
|
return size;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/* estimate_keysize is reverse function of USABLE_FRACTION.
|
||||||
|
*
|
||||||
* This can be used to reserve enough size to insert n entries without
|
* This can be used to reserve enough size to insert n entries without
|
||||||
* resizing.
|
* resizing.
|
||||||
*/
|
*/
|
||||||
#define ESTIMATE_SIZE(n) (((n)*3+1) >> 1)
|
static inline Py_ssize_t
|
||||||
|
estimate_keysize(Py_ssize_t n)
|
||||||
|
{
|
||||||
|
return calculate_keysize((n*3 + 1) / 2);
|
||||||
|
}
|
||||||
|
|
||||||
/* Alternative fraction that is otherwise close enough to 2n/3 to make
|
|
||||||
* little difference. 8 * 2/3 == 8 * 5/8 == 5. 16 * 2/3 == 16 * 5/8 == 10.
|
|
||||||
* 32 * 2/3 = 21, 32 * 5/8 = 20.
|
|
||||||
* Its advantage is that it is faster to compute on machines with slow division.
|
|
||||||
* #define USABLE_FRACTION(n) (((n) >> 1) + ((n) >> 2) - ((n) >> 3))
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* GROWTH_RATE. Growth rate upon hitting maximum load.
|
/* GROWTH_RATE. Growth rate upon hitting maximum load.
|
||||||
* Currently set to used*3.
|
* Currently set to used*3.
|
||||||
|
@ -1036,7 +1059,7 @@ find_empty_slot(PyDictKeysObject *keys, Py_hash_t hash)
|
||||||
static int
|
static int
|
||||||
insertion_resize(PyDictObject *mp)
|
insertion_resize(PyDictObject *mp)
|
||||||
{
|
{
|
||||||
return dictresize(mp, GROWTH_RATE(mp));
|
return dictresize(mp, calculate_keysize(GROWTH_RATE(mp)));
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1194,22 +1217,19 @@ After resizing a table is always combined,
|
||||||
but can be resplit by make_keys_shared().
|
but can be resplit by make_keys_shared().
|
||||||
*/
|
*/
|
||||||
static int
|
static int
|
||||||
dictresize(PyDictObject *mp, Py_ssize_t minsize)
|
dictresize(PyDictObject *mp, Py_ssize_t newsize)
|
||||||
{
|
{
|
||||||
Py_ssize_t newsize, numentries;
|
Py_ssize_t numentries;
|
||||||
PyDictKeysObject *oldkeys;
|
PyDictKeysObject *oldkeys;
|
||||||
PyObject **oldvalues;
|
PyObject **oldvalues;
|
||||||
PyDictKeyEntry *oldentries, *newentries;
|
PyDictKeyEntry *oldentries, *newentries;
|
||||||
|
|
||||||
/* Find the smallest table size > minused. */
|
|
||||||
for (newsize = PyDict_MINSIZE;
|
|
||||||
newsize < minsize && newsize > 0;
|
|
||||||
newsize <<= 1)
|
|
||||||
;
|
|
||||||
if (newsize <= 0) {
|
if (newsize <= 0) {
|
||||||
PyErr_NoMemory();
|
PyErr_NoMemory();
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
assert(IS_POWER_OF_2(newsize));
|
||||||
|
assert(newsize >= PyDict_MINSIZE);
|
||||||
|
|
||||||
oldkeys = mp->ma_keys;
|
oldkeys = mp->ma_keys;
|
||||||
|
|
||||||
|
@ -1355,13 +1375,8 @@ _PyDict_NewPresized(Py_ssize_t minused)
|
||||||
newsize = max_presize;
|
newsize = max_presize;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
Py_ssize_t minsize = ESTIMATE_SIZE(minused);
|
newsize = estimate_keysize(minused);
|
||||||
newsize = PyDict_MINSIZE*2;
|
|
||||||
while (newsize < minsize) {
|
|
||||||
newsize <<= 1;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
assert(IS_POWER_OF_2(newsize));
|
|
||||||
|
|
||||||
new_keys = new_keys_object(newsize);
|
new_keys = new_keys_object(newsize);
|
||||||
if (new_keys == NULL)
|
if (new_keys == NULL)
|
||||||
|
@ -1930,7 +1945,7 @@ _PyDict_FromKeys(PyObject *cls, PyObject *iterable, PyObject *value)
|
||||||
PyObject *key;
|
PyObject *key;
|
||||||
Py_hash_t hash;
|
Py_hash_t hash;
|
||||||
|
|
||||||
if (dictresize(mp, ESTIMATE_SIZE(PyDict_GET_SIZE(iterable)))) {
|
if (dictresize(mp, estimate_keysize(PyDict_GET_SIZE(iterable)))) {
|
||||||
Py_DECREF(d);
|
Py_DECREF(d);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
@ -1949,7 +1964,7 @@ _PyDict_FromKeys(PyObject *cls, PyObject *iterable, PyObject *value)
|
||||||
PyObject *key;
|
PyObject *key;
|
||||||
Py_hash_t hash;
|
Py_hash_t hash;
|
||||||
|
|
||||||
if (dictresize(mp, ESTIMATE_SIZE(PySet_GET_SIZE(iterable)))) {
|
if (dictresize(mp, estimate_keysize(PySet_GET_SIZE(iterable)))) {
|
||||||
Py_DECREF(d);
|
Py_DECREF(d);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
@ -2558,7 +2573,7 @@ dict_merge(PyObject *a, PyObject *b, int override)
|
||||||
* that there will be no (or few) overlapping keys.
|
* that there will be no (or few) overlapping keys.
|
||||||
*/
|
*/
|
||||||
if (USABLE_FRACTION(mp->ma_keys->dk_size) < other->ma_used) {
|
if (USABLE_FRACTION(mp->ma_keys->dk_size) < other->ma_used) {
|
||||||
if (dictresize(mp, ESTIMATE_SIZE(mp->ma_used + other->ma_used))) {
|
if (dictresize(mp, estimate_keysize(mp->ma_used + other->ma_used))) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue