From 41bd02256f5a2348d2de3d6e5fdfcaeb2fcaaebc Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Tue, 1 Jun 2004 06:36:24 +0000 Subject: [PATCH] SF bug #942952: Weakness in tuple hash (Basic approach and test concept by Tim Peters.) * Improved the hash to reduce collisions. * Added the torture test to the test suite. --- Lib/test/test_tuple.py | 19 +++++++++++++++++++ Misc/NEWS | 3 +++ Objects/tupleobject.c | 5 +++-- 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_tuple.py b/Lib/test/test_tuple.py index 4fe299f1826..a3f40dd07d6 100644 --- a/Lib/test/test_tuple.py +++ b/Lib/test/test_tuple.py @@ -41,6 +41,25 @@ class TupleTest(seq_tests.CommonTest): yield i self.assertEqual(list(tuple(f())), range(1000)) + def test_hash(self): + # See SF bug 942952: Weakness in tuple hash + # The hash should: + # be non-commutative + # should spread-out closely spaced values + # should not exhibit cancellation in tuples like (x,(x,y)) + # should be distinct from element hashes: hash(x)!=hash((x,)) + # This test exercises those cases. + # For a pure random hash and N=50, the expected number of collisions + # is 7.3. Here we allow twice that number. + # Any worse and the hash function is sorely suspect. + + N=50 + base = range(N) + xp = [(i, j) for i in base for j in base] + inps = base + [(i, j) for i in base for j in xp] + \ + [(i, j) for i in xp for j in base] + xp + zip(base) + collisions = len(inps) - len(set(map(hash, inps))) + self.assert_(collisions <= 15) def test_main(): test_support.run_unittest(TupleTest) diff --git a/Misc/NEWS b/Misc/NEWS index ab705af7bdc..2c5dfd9b95f 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -12,6 +12,9 @@ What's New in Python 2.4 alpha 1? Core and builtins ----------------- +- Improved the tuple hashing algorithm to give fewer collisions in + common cases. Fixes bug #942952. + - Implemented generator expressions (PEP 289). Coded by Jiwon Seo. - Enabled the profiling of C extension functions (and builtins) - check diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index 159dc44fd07..4cb80f0ed50 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -262,15 +262,16 @@ tuplehash(PyTupleObject *v) register long x, y; register int len = v->ob_size; register PyObject **p; + long mult = 1000003L; x = 0x345678L; p = v->ob_item; while (--len >= 0) { y = PyObject_Hash(*p++); if (y == -1) return -1; - x = (1000003*x) ^ y; + x = (x ^ y) * mult; + mult += 69068L + len + len; } - x ^= v->ob_size; if (x == -1) x = -2; return x;