Issue #28201: Dict reduces possibility of 2nd conflict in hash table.

Do perturb shift after first conflict.
This commit is contained in:
INADA Naoki 2016-10-06 15:22:28 +09:00
commit ecdd36ff88
2 changed files with 25 additions and 16 deletions

View File

@ -10,6 +10,9 @@ What's New in Python 3.7.0 alpha 1
Core and Builtins Core and Builtins
----------------- -----------------
- Issue #28201: Dict reduces possibility of 2nd conflict in hash table when
hashes have same lower bits.
- Issue #28350: String constants with null character no longer interned. - Issue #28350: String constants with null character no longer interned.
- Issue #26617: Fix crash when GC runs during weakref callbacks. - Issue #26617: Fix crash when GC runs during weakref callbacks.

View File

@ -184,8 +184,8 @@ The other half of the strategy is to get the other bits of the hash code
into play. This is done by initializing a (unsigned) vrbl "perturb" to the into play. This is done by initializing a (unsigned) vrbl "perturb" to the
full hash code, and changing the recurrence to: full hash code, and changing the recurrence to:
j = (5*j) + 1 + perturb;
perturb >>= PERTURB_SHIFT; perturb >>= PERTURB_SHIFT;
j = (5*j) + 1 + perturb;
use j % 2**i as the next table index; use j % 2**i as the next table index;
Now the probe sequence depends (eventually) on every bit in the hash code, Now the probe sequence depends (eventually) on every bit in the hash code,
@ -630,7 +630,7 @@ PyDict_New(void)
static Py_ssize_t static Py_ssize_t
lookdict_index(PyDictKeysObject *k, Py_hash_t hash, Py_ssize_t index) lookdict_index(PyDictKeysObject *k, Py_hash_t hash, Py_ssize_t index)
{ {
size_t i, perturb; size_t i;
size_t mask = DK_MASK(k); size_t mask = DK_MASK(k);
Py_ssize_t ix; Py_ssize_t ix;
@ -643,7 +643,8 @@ lookdict_index(PyDictKeysObject *k, Py_hash_t hash, Py_ssize_t index)
return DKIX_EMPTY; return DKIX_EMPTY;
} }
for (perturb = hash; ; perturb >>= PERTURB_SHIFT) { for (size_t perturb = hash;;) {
perturb >>= PERTURB_SHIFT;
i = mask & ((i << 2) + i + perturb + 1); i = mask & ((i << 2) + i + perturb + 1);
ix = dk_get_index(k, i); ix = dk_get_index(k, i);
if (ix == index) { if (ix == index) {
@ -685,7 +686,7 @@ static Py_ssize_t
lookdict(PyDictObject *mp, PyObject *key, lookdict(PyDictObject *mp, PyObject *key,
Py_hash_t hash, PyObject ***value_addr, Py_ssize_t *hashpos) Py_hash_t hash, PyObject ***value_addr, Py_ssize_t *hashpos)
{ {
size_t i, perturb, mask; size_t i, mask;
Py_ssize_t ix, freeslot; Py_ssize_t ix, freeslot;
int cmp; int cmp;
PyDictKeysObject *dk; PyDictKeysObject *dk;
@ -740,7 +741,8 @@ top:
freeslot = -1; freeslot = -1;
} }
for (perturb = hash; ; perturb >>= PERTURB_SHIFT) { for (size_t perturb = hash;;) {
perturb >>= PERTURB_SHIFT;
i = ((i << 2) + i + perturb + 1) & mask; i = ((i << 2) + i + perturb + 1) & mask;
ix = dk_get_index(dk, i); ix = dk_get_index(dk, i);
if (ix == DKIX_EMPTY) { if (ix == DKIX_EMPTY) {
@ -797,7 +799,7 @@ static Py_ssize_t
lookdict_unicode(PyDictObject *mp, PyObject *key, lookdict_unicode(PyDictObject *mp, PyObject *key,
Py_hash_t hash, PyObject ***value_addr, Py_ssize_t *hashpos) Py_hash_t hash, PyObject ***value_addr, Py_ssize_t *hashpos)
{ {
size_t i, perturb; size_t i;
size_t mask = DK_MASK(mp->ma_keys); size_t mask = DK_MASK(mp->ma_keys);
Py_ssize_t ix, freeslot; Py_ssize_t ix, freeslot;
PyDictKeyEntry *ep, *ep0 = DK_ENTRIES(mp->ma_keys); PyDictKeyEntry *ep, *ep0 = DK_ENTRIES(mp->ma_keys);
@ -835,7 +837,8 @@ lookdict_unicode(PyDictObject *mp, PyObject *key,
freeslot = -1; freeslot = -1;
} }
for (perturb = hash; ; perturb >>= PERTURB_SHIFT) { for (size_t perturb = hash;;) {
perturb >>= PERTURB_SHIFT;
i = mask & ((i << 2) + i + perturb + 1); i = mask & ((i << 2) + i + perturb + 1);
ix = dk_get_index(mp->ma_keys, i); ix = dk_get_index(mp->ma_keys, i);
if (ix == DKIX_EMPTY) { if (ix == DKIX_EMPTY) {
@ -872,7 +875,7 @@ lookdict_unicode_nodummy(PyDictObject *mp, PyObject *key,
Py_hash_t hash, PyObject ***value_addr, Py_hash_t hash, PyObject ***value_addr,
Py_ssize_t *hashpos) Py_ssize_t *hashpos)
{ {
size_t i, perturb; size_t i;
size_t mask = DK_MASK(mp->ma_keys); size_t mask = DK_MASK(mp->ma_keys);
Py_ssize_t ix; Py_ssize_t ix;
PyDictKeyEntry *ep, *ep0 = DK_ENTRIES(mp->ma_keys); PyDictKeyEntry *ep, *ep0 = DK_ENTRIES(mp->ma_keys);
@ -905,7 +908,8 @@ lookdict_unicode_nodummy(PyDictObject *mp, PyObject *key,
*value_addr = &ep->me_value; *value_addr = &ep->me_value;
return ix; return ix;
} }
for (perturb = hash; ; perturb >>= PERTURB_SHIFT) { for (size_t perturb = hash;;) {
perturb >>= PERTURB_SHIFT;
i = mask & ((i << 2) + i + perturb + 1); i = mask & ((i << 2) + i + perturb + 1);
ix = dk_get_index(mp->ma_keys, i); ix = dk_get_index(mp->ma_keys, i);
assert (ix != DKIX_DUMMY); assert (ix != DKIX_DUMMY);
@ -938,7 +942,7 @@ static Py_ssize_t
lookdict_split(PyDictObject *mp, PyObject *key, lookdict_split(PyDictObject *mp, PyObject *key,
Py_hash_t hash, PyObject ***value_addr, Py_ssize_t *hashpos) Py_hash_t hash, PyObject ***value_addr, Py_ssize_t *hashpos)
{ {
size_t i, perturb; size_t i;
size_t mask = DK_MASK(mp->ma_keys); size_t mask = DK_MASK(mp->ma_keys);
Py_ssize_t ix; Py_ssize_t ix;
PyDictKeyEntry *ep, *ep0 = DK_ENTRIES(mp->ma_keys); PyDictKeyEntry *ep, *ep0 = DK_ENTRIES(mp->ma_keys);
@ -971,7 +975,8 @@ lookdict_split(PyDictObject *mp, PyObject *key,
*value_addr = &mp->ma_values[ix]; *value_addr = &mp->ma_values[ix];
return ix; return ix;
} }
for (perturb = hash; ; perturb >>= PERTURB_SHIFT) { for (size_t perturb = hash;;) {
perturb >>= PERTURB_SHIFT;
i = mask & ((i << 2) + i + perturb + 1); i = mask & ((i << 2) + i + perturb + 1);
ix = dk_get_index(mp->ma_keys, i); ix = dk_get_index(mp->ma_keys, i);
if (ix == DKIX_EMPTY) { if (ix == DKIX_EMPTY) {
@ -1064,7 +1069,7 @@ static void
find_empty_slot(PyDictObject *mp, PyObject *key, Py_hash_t hash, find_empty_slot(PyDictObject *mp, PyObject *key, Py_hash_t hash,
PyObject ***value_addr, Py_ssize_t *hashpos) PyObject ***value_addr, Py_ssize_t *hashpos)
{ {
size_t i, perturb; size_t i;
size_t mask = DK_MASK(mp->ma_keys); size_t mask = DK_MASK(mp->ma_keys);
Py_ssize_t ix; Py_ssize_t ix;
PyDictKeyEntry *ep, *ep0 = DK_ENTRIES(mp->ma_keys); PyDictKeyEntry *ep, *ep0 = DK_ENTRIES(mp->ma_keys);
@ -1077,7 +1082,8 @@ find_empty_slot(PyDictObject *mp, PyObject *key, Py_hash_t hash,
mp->ma_keys->dk_lookup = lookdict; mp->ma_keys->dk_lookup = lookdict;
i = hash & mask; i = hash & mask;
ix = dk_get_index(mp->ma_keys, i); ix = dk_get_index(mp->ma_keys, i);
for (perturb = hash; ix != DKIX_EMPTY; perturb >>= PERTURB_SHIFT) { for (size_t perturb = hash; ix != DKIX_EMPTY;) {
perturb >>= PERTURB_SHIFT;
i = (i << 2) + i + perturb + 1; i = (i << 2) + i + perturb + 1;
ix = dk_get_index(mp->ma_keys, i & mask); ix = dk_get_index(mp->ma_keys, i & mask);
} }
@ -1202,7 +1208,7 @@ static void
insertdict_clean(PyDictObject *mp, PyObject *key, Py_hash_t hash, insertdict_clean(PyDictObject *mp, PyObject *key, Py_hash_t hash,
PyObject *value) PyObject *value)
{ {
size_t i, perturb; size_t i;
PyDictKeysObject *k = mp->ma_keys; PyDictKeysObject *k = mp->ma_keys;
size_t mask = (size_t)DK_SIZE(k)-1; size_t mask = (size_t)DK_SIZE(k)-1;
PyDictKeyEntry *ep0 = DK_ENTRIES(mp->ma_keys); PyDictKeyEntry *ep0 = DK_ENTRIES(mp->ma_keys);
@ -1213,8 +1219,8 @@ insertdict_clean(PyDictObject *mp, PyObject *key, Py_hash_t hash,
assert(key != NULL); assert(key != NULL);
assert(PyUnicode_CheckExact(key) || k->dk_lookup == lookdict); assert(PyUnicode_CheckExact(key) || k->dk_lookup == lookdict);
i = hash & mask; i = hash & mask;
for (perturb = hash; dk_get_index(k, i) != DKIX_EMPTY; for (size_t perturb = hash; dk_get_index(k, i) != DKIX_EMPTY;) {
perturb >>= PERTURB_SHIFT) { perturb >>= PERTURB_SHIFT;
i = mask & ((i << 2) + i + perturb + 1); i = mask & ((i << 2) + i + perturb + 1);
} }
ep = &ep0[k->dk_nentries]; ep = &ep0[k->dk_nentries];