From 82d73dd459d911dcb12a48db12b7f8b3eb0f6b1b Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Thu, 20 Nov 2003 22:54:33 +0000 Subject: [PATCH] Three minor performance improvements: * Improve the hash function to increase the chance that distinct sets will have distinct xor'd hash totals. * Use PyDict_Merge where possible (it is faster than an equivalent iter/set pair). * Don't rebuild dictionaries where the input already has one. --- Objects/setobject.c | 53 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 41 insertions(+), 12 deletions(-) diff --git a/Objects/setobject.c b/Objects/setobject.c index 2a53cbad5e2..d7190b59b0c 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -138,6 +138,15 @@ set_union(PySetObject *so, PyObject *other) result = (PySetObject *)set_copy(so); if (result == NULL) return NULL; + + if (PyAnySet_Check(other)) { + if (PyDict_Merge(result->data, ((PySetObject *)other)->data, 0) == -1) { + Py_DECREF(result); + return NULL; + } + return (PyObject *)result; + } + it = PyObject_GetIter(other); if (it == NULL) { Py_DECREF(result); @@ -171,6 +180,13 @@ set_union_update(PySetObject *so, PyObject *other) { PyObject *item, *data, *it; + if (PyAnySet_Check(other)) { + if (PyDict_Merge(so->data, ((PySetObject *)other)->data, 0) == -1) + return NULL; + Py_INCREF(so); + return (PyObject *)so; + } + it = PyObject_GetIter(other); if (it == NULL) return NULL; @@ -434,7 +450,7 @@ set_isub(PySetObject *so, PyObject *other) static PyObject * set_symmetric_difference(PySetObject *so, PyObject *other) { - PySetObject *result, *otherset; + PySetObject *result, *otherset=NULL; PyObject *item, *selfdata, *otherdata, *tgtdata, *it; selfdata = so->data; @@ -444,16 +460,22 @@ set_symmetric_difference(PySetObject *so, PyObject *other) return NULL; tgtdata = result->data; - otherset = (PySetObject *)make_new_set(so->ob_type, other); - if (otherset == NULL) { - Py_DECREF(result); - return NULL; - } - otherdata = otherset->data; + if (PyDict_Check(other)) + otherdata = other; + else if (PyAnySet_Check(other)) + otherdata = ((PySetObject *)other)->data; + else { + otherset = (PySetObject *)make_new_set(so->ob_type, other); + if (otherset == NULL) { + Py_DECREF(result); + return NULL; + } + otherdata = otherset->data; + } it = PyObject_GetIter(otherdata); if (it == NULL) { - Py_DECREF(otherset); + Py_XDECREF(otherset); Py_DECREF(result); return NULL; } @@ -463,7 +485,7 @@ set_symmetric_difference(PySetObject *so, PyObject *other) PyErr_Clear(); if (PyDict_SetItem(tgtdata, item, Py_True) == -1) { Py_DECREF(it); - Py_DECREF(otherset); + Py_XDECREF(otherset); Py_DECREF(result); Py_DECREF(item); return NULL; @@ -472,7 +494,7 @@ set_symmetric_difference(PySetObject *so, PyObject *other) Py_DECREF(item); } Py_DECREF(it); - Py_DECREF(otherset); + Py_XDECREF(otherset); if (PyErr_Occurred()) { Py_DECREF(result); return NULL; @@ -627,7 +649,7 @@ frozenset_hash(PyObject *self) { PyObject *it, *item; PySetObject *so = (PySetObject *)self; - long hash = 0; + long hash = 0, x; if (so->hash != -1) return so->hash; @@ -637,7 +659,14 @@ frozenset_hash(PyObject *self) return -1; while ((item = PyIter_Next(it)) != NULL) { - hash ^= PyObject_Hash(item); + x = PyObject_Hash(item); + /* Applying x*(x+1) breaks-up linear relationships so that + h(1) ^ h(2) will be less likely to coincide with hash(3). + Multiplying by a large prime increases the dispersion + between consecutive hashes. Adding one bit from the + original restores the one bit lost during the multiply + (all the products are even numbers). */ + hash ^= (x * (x+1) * 3644798167) | (x&1); Py_DECREF(item); } Py_DECREF(it);