SF bug #942952: Weakness in tuple hash

(Basic approach and test concept by Tim Peters.)

* Improved the hash to reduce collisions.
* Added the torture test to the test suite.
This commit is contained in:
Raymond Hettinger 2004-06-01 06:36:24 +00:00
parent 504239fb38
commit 41bd02256f
3 changed files with 25 additions and 2 deletions

View File

@ -41,6 +41,25 @@ class TupleTest(seq_tests.CommonTest):
yield i
self.assertEqual(list(tuple(f())), range(1000))
def test_hash(self):
# See SF bug 942952: Weakness in tuple hash
# The hash should:
# be non-commutative
# should spread-out closely spaced values
# should not exhibit cancellation in tuples like (x,(x,y))
# should be distinct from element hashes: hash(x)!=hash((x,))
# This test exercises those cases.
# For a pure random hash and N=50, the expected number of collisions
# is 7.3. Here we allow twice that number.
# Any worse and the hash function is sorely suspect.
N=50
base = range(N)
xp = [(i, j) for i in base for j in base]
inps = base + [(i, j) for i in base for j in xp] + \
[(i, j) for i in xp for j in base] + xp + zip(base)
collisions = len(inps) - len(set(map(hash, inps)))
self.assert_(collisions <= 15)
def test_main():
test_support.run_unittest(TupleTest)

View File

@ -12,6 +12,9 @@ What's New in Python 2.4 alpha 1?
Core and builtins
-----------------
- Improved the tuple hashing algorithm to give fewer collisions in
common cases. Fixes bug #942952.
- Implemented generator expressions (PEP 289). Coded by Jiwon Seo.
- Enabled the profiling of C extension functions (and builtins) - check

View File

@ -262,15 +262,16 @@ tuplehash(PyTupleObject *v)
register long x, y;
register int len = v->ob_size;
register PyObject **p;
long mult = 1000003L;
x = 0x345678L;
p = v->ob_item;
while (--len >= 0) {
y = PyObject_Hash(*p++);
if (y == -1)
return -1;
x = (1000003*x) ^ y;
x = (x ^ y) * mult;
mult += 69068L + len + len;
}
x ^= v->ob_size;
if (x == -1)
x = -2;
return x;