From 1bff7969832877398b129a97ad8d307c27c80fba Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Mon, 19 Feb 2007 03:04:45 +0000 Subject: [PATCH] Extend work on revision 52962: Eliminate redundant calls to PyObject_Hash(). --- Include/dictobject.h | 3 +++ Lib/test/test_set.py | 20 ++++++++++++++++++++ Objects/dictobject.c | 39 +++++++++++++++++++++++++++++++++++++++ Objects/setobject.c | 18 +++++++++++------- 4 files changed, 73 insertions(+), 7 deletions(-) diff --git a/Include/dictobject.h b/Include/dictobject.h index fd3d1fc3529..44b0838e042 100644 --- a/Include/dictobject.h +++ b/Include/dictobject.h @@ -100,12 +100,15 @@ PyAPI_FUNC(int) PyDict_DelItem(PyObject *mp, PyObject *key); PyAPI_FUNC(void) PyDict_Clear(PyObject *mp); PyAPI_FUNC(int) PyDict_Next( PyObject *mp, Py_ssize_t *pos, PyObject **key, PyObject **value); +PyAPI_FUNC(int) _PyDict_Next( + PyObject *mp, Py_ssize_t *pos, PyObject **key, PyObject **value, long *hash); PyAPI_FUNC(PyObject *) PyDict_Keys(PyObject *mp); PyAPI_FUNC(PyObject *) PyDict_Values(PyObject *mp); PyAPI_FUNC(PyObject *) PyDict_Items(PyObject *mp); PyAPI_FUNC(Py_ssize_t) PyDict_Size(PyObject *mp); PyAPI_FUNC(PyObject *) PyDict_Copy(PyObject *mp); PyAPI_FUNC(int) PyDict_Contains(PyObject *mp, PyObject *key); +PyAPI_FUNC(int) _PyDict_Contains(PyObject *mp, PyObject *key, long hash); /* PyDict_Update(mp, other) is equivalent to PyDict_Merge(mp, other, 1). */ PyAPI_FUNC(int) PyDict_Update(PyObject *mp, PyObject *other); diff --git a/Lib/test/test_set.py b/Lib/test/test_set.py index 49bdec34454..45f61b2e8d7 100644 --- a/Lib/test/test_set.py +++ b/Lib/test/test_set.py @@ -26,6 +26,14 @@ class ReprWrapper: def __repr__(self): return repr(self.value) +class HashCountingInt(int): + 'int-like object that counts the number of times __hash__ is called' + def __init__(self, *args): + self.hash_count = 0 + def __hash__(self): + self.hash_count += 1 + return int.__hash__(self) + class TestJointOps(unittest.TestCase): # Tests common to both set and frozenset @@ -270,6 +278,18 @@ class TestJointOps(unittest.TestCase): fo.close() os.remove(test_support.TESTFN) + def test_do_not_rehash_dict_keys(self): + n = 10 + d = dict.fromkeys(map(HashCountingInt, xrange(n))) + self.assertEqual(sum(elem.hash_count for elem in d), n) + s = self.thetype(d) + self.assertEqual(sum(elem.hash_count for elem in d), n) + s.difference(d) + self.assertEqual(sum(elem.hash_count for elem in d), n) + if hasattr(s, 'symmetric_difference_update'): + s.symmetric_difference_update(d) + self.assertEqual(sum(elem.hash_count for elem in d), n) + class TestSet(TestJointOps): thetype = set diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 901e33383f2..1cb3ee6ad86 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -803,6 +803,34 @@ PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey, PyObject **pvalue) return 1; } +/* Internal version of PyDict_Next that returns a hash value in addition to the key and value.*/ +int +_PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey, PyObject **pvalue, long *phash) +{ + register Py_ssize_t i; + register Py_ssize_t mask; + register dictentry *ep; + + if (!PyDict_Check(op)) + return 0; + i = *ppos; + if (i < 0) + return 0; + ep = ((dictobject *)op)->ma_table; + mask = ((dictobject *)op)->ma_mask; + while (i <= mask && ep[i].me_value == NULL) + i++; + *ppos = i+1; + if (i > mask) + return 0; + *phash = (long)(ep[i].me_hash); + if (pkey) + *pkey = ep[i].me_key; + if (pvalue) + *pvalue = ep[i].me_value; + return 1; +} + /* Methods */ static void @@ -1987,6 +2015,17 @@ PyDict_Contains(PyObject *op, PyObject *key) return ep == NULL ? -1 : (ep->me_value != NULL); } +/* Internal version of PyDict_Contains used when the hash value is already known */ +int +_PyDict_Contains(PyObject *op, PyObject *key, long hash) +{ + dictobject *mp = (dictobject *)op; + dictentry *ep; + + ep = (mp->ma_lookup)(mp, key, hash); + return ep == NULL ? -1 : (ep->me_value != NULL); +} + /* Hack to implement "key in dict" */ static PySequenceMethods dict_as_sequence = { 0, /* sq_length */ diff --git a/Objects/setobject.c b/Objects/setobject.c index fc9d8234f43..1f06cee5a11 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -918,8 +918,14 @@ set_update_internal(PySetObject *so, PyObject *other) if (PyDict_CheckExact(other)) { PyObject *value; Py_ssize_t pos = 0; - while (PyDict_Next(other, &pos, &key, &value)) { - if (set_add_key(so, key) == -1) + long hash; + + while (_PyDict_Next(other, &pos, &key, &value, &hash)) { + setentry an_entry; + + an_entry.hash = hash; + an_entry.key = key; + if (set_add_entry(so, &an_entry) == -1) return -1; } return 0; @@ -1382,7 +1388,7 @@ set_difference(PySetObject *so, PyObject *other) setentry entrycopy; entrycopy.hash = entry->hash; entrycopy.key = entry->key; - if (!PyDict_Contains(other, entry->key)) { + if (!_PyDict_Contains(other, entry->key, entry->hash)) { if (set_add_entry((PySetObject *)result, &entrycopy) == -1) { Py_DECREF(result); return NULL; @@ -1453,12 +1459,10 @@ set_symmetric_difference_update(PySetObject *so, PyObject *other) if (PyDict_CheckExact(other)) { PyObject *value; int rv; - while (PyDict_Next(other, &pos, &key, &value)) { + long hash; + while (_PyDict_Next(other, &pos, &key, &value, &hash)) { setentry an_entry; - long hash = PyObject_Hash(key); - if (hash == -1) - return NULL; an_entry.hash = hash; an_entry.key = key; rv = set_discard_entry(so, &an_entry);