* Simplify hash function and add test to show effectiveness of the hash

function.

* Add a better test for deepcopying.

* Add tests to show the __init__() function works like it does for list
  and tuple.  Add related test.

* Have shallow copies of frozensets return self.  Add related test.

* Have frozenset(f) return f if f is already a frozenset. Add related test.

* Beefed-up some existing tests.
This commit is contained in:
Raymond Hettinger 2003-11-23 02:49:05 +00:00
parent baf0f8f24d
commit 49ba4c39c4
2 changed files with 98 additions and 27 deletions

View File

@ -38,15 +38,11 @@ class TestJointOps(unittest.TestCase):
s = self.thetype([frozenset(self.letters)])
self.assert_(self.thetype(self.letters) in s)
def test_copy(self):
dup = self.s.copy()
self.assertEqual(self.s, dup)
self.assertNotEqual(id(self.s), id(dup))
def test_union(self):
u = self.s.union(self.otherword)
for c in self.letters:
self.assertEqual(c in u, c in self.d or c in self.otherword)
self.assertEqual(self.s, self.thetype(self.word))
self.assertEqual(type(u), self.thetype)
self.assertRaises(PassThru, self.s.union, check_pass_thru())
self.assertRaises(TypeError, self.s.union, [[]])
@ -66,6 +62,7 @@ class TestJointOps(unittest.TestCase):
i = self.s.intersection(self.otherword)
for c in self.letters:
self.assertEqual(c in i, c in self.d and c in self.otherword)
self.assertEqual(self.s, self.thetype(self.word))
self.assertEqual(type(i), self.thetype)
self.assertRaises(PassThru, self.s.intersection, check_pass_thru())
@ -84,6 +81,7 @@ class TestJointOps(unittest.TestCase):
i = self.s.difference(self.otherword)
for c in self.letters:
self.assertEqual(c in i, c in self.d and c not in self.otherword)
self.assertEqual(self.s, self.thetype(self.word))
self.assertEqual(type(i), self.thetype)
self.assertRaises(PassThru, self.s.difference, check_pass_thru())
self.assertRaises(TypeError, self.s.difference, [[]])
@ -103,6 +101,7 @@ class TestJointOps(unittest.TestCase):
i = self.s.symmetric_difference(self.otherword)
for c in self.letters:
self.assertEqual(c in i, (c in self.d) ^ (c in self.otherword))
self.assertEqual(self.s, self.thetype(self.word))
self.assertEqual(type(i), self.thetype)
self.assertRaises(PassThru, self.s.symmetric_difference, check_pass_thru())
self.assertRaises(TypeError, self.s.symmetric_difference, [[]])
@ -155,16 +154,38 @@ class TestJointOps(unittest.TestCase):
dup = pickle.loads(p)
self.assertEqual(self.s, dup, "%s != %s" % (self.s, dup))
def test_deepcopy(self):
class Tracer:
def __init__(self, value):
self.value = value
def __hash__(self):
return self.value
def __deepcopy__(self, memo=None):
return Tracer(self.value + 1)
t = Tracer(10)
s = self.thetype([t])
dup = copy.deepcopy(s)
self.assertNotEqual(id(s), id(dup))
for elem in dup:
newt = elem
self.assertNotEqual(id(t), id(newt))
self.assertEqual(t.value + 1, newt.value)
class TestSet(TestJointOps):
thetype = set
def test_init(self):
s = set()
s = self.thetype()
s.__init__(self.word)
self.assertEqual(s, set(self.word))
s.__init__(self.otherword)
self.assertEqual(s, set(self.otherword))
def test_constructor_identity(self):
s = self.thetype(range(3))
t = self.thetype(s)
self.assertNotEqual(id(s), id(t))
def test_hash(self):
self.assertRaises(TypeError, hash, self.s)
@ -172,6 +193,11 @@ class TestSet(TestJointOps):
self.s.clear()
self.assertEqual(self.s, set([]))
def test_copy(self):
dup = self.s.copy()
self.assertEqual(self.s, dup)
self.assertNotEqual(id(self.s), id(dup))
def test_add(self):
self.s.add('Q')
self.assert_('Q' in self.s)
@ -285,17 +311,27 @@ class TestFrozenSet(TestJointOps):
thetype = frozenset
def test_init(self):
s = frozenset()
s.__init__(self.word)
self.assertEqual(s, frozenset())
s = self.thetype(self.word)
s.__init__(self.otherword)
self.assertEqual(s, set(self.word))
def test_constructor_identity(self):
s = self.thetype(range(3))
t = self.thetype(s)
self.assertEqual(id(s), id(t))
def test_hash(self):
self.assertEqual(hash(frozenset('abcdeb')), hash(frozenset('ebecda')))
self.assertEqual(hash(self.thetype('abcdeb')),
hash(self.thetype('ebecda')))
def test_copy(self):
dup = self.s.copy()
self.assertEqual(id(self.s), id(dup))
def test_frozen_as_dictkey(self):
seq = range(10) + list('abcdefg') + ['apple']
key1 = frozenset(seq)
key2 = frozenset(reversed(seq))
key1 = self.thetype(seq)
key2 = self.thetype(reversed(seq))
self.assertEqual(key1, key2)
self.assertNotEqual(id(key1), id(key2))
d = {}
@ -303,15 +339,38 @@ class TestFrozenSet(TestJointOps):
self.assertEqual(d[key2], 42)
def test_hash_caching(self):
f = frozenset('abcdcda')
f = self.thetype('abcdcda')
self.assertEqual(hash(f), hash(f))
def test_hash_effectiveness(self):
n = 13
rng = range(n)
hashvalues = set()
for i in xrange(2**n):
combination = [j for j in rng if (1<<j)&i]
hashvalues.add(hash(self.thetype(combination)))
self.assert_(len(hashvalues) >= 2**(n-2))
class FrozenSetSubclass(frozenset):
pass
class TestFrozenSetSubclass(TestFrozenSet):
thetype = FrozenSetSubclass
def test_constructor_identity(self):
s = self.thetype(range(3))
t = self.thetype(s)
self.assertNotEqual(id(s), id(t))
def test_copy(self):
dup = self.s.copy()
self.assertNotEqual(id(self.s), id(dup))
def test_nested_empty_constructor(self):
s = self.thetype()
t = self.thetype(s)
self.assertEqual(s, t)
# Tests taken from test_sets.py =============================================
empty_set = set()

View File

@ -64,6 +64,10 @@ frozenset_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
if (!PyArg_UnpackTuple(args, type->tp_name, 0, 1, &iterable))
return NULL;
if (iterable != NULL && iterable->ob_type == &PyFrozenSet_Type) {
Py_INCREF(iterable);
return iterable;
}
return make_new_set(type, iterable);
}
@ -154,6 +158,16 @@ set_copy(PySetObject *so)
return (PyObject *)newso;
}
static PyObject *
frozenset_copy(PySetObject *so)
{
if (so->ob_type == &PyFrozenSet_Type) {
Py_INCREF(so);
return (PyObject *)so;
}
return set_copy(so);
}
PyDoc_STRVAR(copy_doc, "Return a shallow copy of a set.");
static PyObject *
@ -686,7 +700,7 @@ frozenset_hash(PyObject *self)
{
PyObject *it, *item;
PySetObject *so = (PySetObject *)self;
long hash = 0, x;
long hash = 0;
if (so->hash != -1)
return so->hash;
@ -696,14 +710,12 @@ frozenset_hash(PyObject *self)
return -1;
while ((item = PyIter_Next(it)) != NULL) {
x = PyObject_Hash(item);
/* Applying x*(x+1) breaks-up linear relationships so that
h(1) ^ h(2) will be less likely to coincide with hash(3).
Multiplying by a large prime increases the dispersion
between consecutive hashes. Adding one bit from the
original restores the one bit lost during the multiply
(all the products are even numbers). */
hash ^= (x * (x+1) * 3644798167) | (x&1);
/* Multiplying by a large prime increases the bit dispersion for
closely spaced hash values. The is important because some
use cases have many combinations of a small number of
elements with nearby hashes so that many distinct combinations
collapse to only a handful of distinct hash values. */
hash ^= PyObject_Hash(item) * 3644798167;
Py_DECREF(item);
}
Py_DECREF(it);
@ -1096,17 +1108,17 @@ PyTypeObject PySet_Type = {
static PyMethodDef frozenset_methods[] = {
{"copy", (PyCFunction)set_copy, METH_NOARGS,
{"copy", (PyCFunction)frozenset_copy, METH_NOARGS,
copy_doc},
{"__copy__", (PyCFunction)set_copy, METH_NOARGS,
{"__copy__", (PyCFunction)frozenset_copy, METH_NOARGS,
copy_doc},
{"difference",(PyCFunction)set_difference, METH_O,
{"difference", (PyCFunction)set_difference, METH_O,
difference_doc},
{"intersection",(PyCFunction)set_intersection, METH_O,
intersection_doc},
{"issubset",(PyCFunction)set_issubset, METH_O,
{"issubset", (PyCFunction)set_issubset, METH_O,
issubset_doc},
{"issuperset",(PyCFunction)set_issuperset, METH_O,
{"issuperset", (PyCFunction)set_issuperset, METH_O,
issuperset_doc},
{"__reduce__", (PyCFunction)set_reduce, METH_NOARGS,
reduce_doc},