Three minor performance improvements:

* Improve the hash function to increase the chance that distinct sets will
  have distinct xor'd hash totals.

* Use PyDict_Merge where possible (it is faster than an equivalent iter/set
  pair).

* Don't rebuild dictionaries where the input already has one.
This commit is contained in:
Raymond Hettinger 2003-11-20 22:54:33 +00:00
parent dff9dbdb38
commit 82d73dd459
1 changed files with 41 additions and 12 deletions

View File

@ -138,6 +138,15 @@ set_union(PySetObject *so, PyObject *other)
result = (PySetObject *)set_copy(so);
if (result == NULL)
return NULL;
if (PyAnySet_Check(other)) {
if (PyDict_Merge(result->data, ((PySetObject *)other)->data, 0) == -1) {
Py_DECREF(result);
return NULL;
}
return (PyObject *)result;
}
it = PyObject_GetIter(other);
if (it == NULL) {
Py_DECREF(result);
@ -171,6 +180,13 @@ set_union_update(PySetObject *so, PyObject *other)
{
PyObject *item, *data, *it;
if (PyAnySet_Check(other)) {
if (PyDict_Merge(so->data, ((PySetObject *)other)->data, 0) == -1)
return NULL;
Py_INCREF(so);
return (PyObject *)so;
}
it = PyObject_GetIter(other);
if (it == NULL)
return NULL;
@ -434,7 +450,7 @@ set_isub(PySetObject *so, PyObject *other)
static PyObject *
set_symmetric_difference(PySetObject *so, PyObject *other)
{
PySetObject *result, *otherset;
PySetObject *result, *otherset=NULL;
PyObject *item, *selfdata, *otherdata, *tgtdata, *it;
selfdata = so->data;
@ -444,16 +460,22 @@ set_symmetric_difference(PySetObject *so, PyObject *other)
return NULL;
tgtdata = result->data;
otherset = (PySetObject *)make_new_set(so->ob_type, other);
if (otherset == NULL) {
Py_DECREF(result);
return NULL;
}
otherdata = otherset->data;
if (PyDict_Check(other))
otherdata = other;
else if (PyAnySet_Check(other))
otherdata = ((PySetObject *)other)->data;
else {
otherset = (PySetObject *)make_new_set(so->ob_type, other);
if (otherset == NULL) {
Py_DECREF(result);
return NULL;
}
otherdata = otherset->data;
}
it = PyObject_GetIter(otherdata);
if (it == NULL) {
Py_DECREF(otherset);
Py_XDECREF(otherset);
Py_DECREF(result);
return NULL;
}
@ -463,7 +485,7 @@ set_symmetric_difference(PySetObject *so, PyObject *other)
PyErr_Clear();
if (PyDict_SetItem(tgtdata, item, Py_True) == -1) {
Py_DECREF(it);
Py_DECREF(otherset);
Py_XDECREF(otherset);
Py_DECREF(result);
Py_DECREF(item);
return NULL;
@ -472,7 +494,7 @@ set_symmetric_difference(PySetObject *so, PyObject *other)
Py_DECREF(item);
}
Py_DECREF(it);
Py_DECREF(otherset);
Py_XDECREF(otherset);
if (PyErr_Occurred()) {
Py_DECREF(result);
return NULL;
@ -627,7 +649,7 @@ frozenset_hash(PyObject *self)
{
PyObject *it, *item;
PySetObject *so = (PySetObject *)self;
long hash = 0;
long hash = 0, x;
if (so->hash != -1)
return so->hash;
@ -637,7 +659,14 @@ frozenset_hash(PyObject *self)
return -1;
while ((item = PyIter_Next(it)) != NULL) {
hash ^= PyObject_Hash(item);
x = PyObject_Hash(item);
/* Applying x*(x+1) breaks-up linear relationships so that
h(1) ^ h(2) will be less likely to coincide with hash(3).
Multiplying by a large prime increases the dispersion
between consecutive hashes. Adding one bit from the
original restores the one bit lost during the multiply
(all the products are even numbers). */
hash ^= (x * (x+1) * 3644798167) | (x&1);
Py_DECREF(item);
}
Py_DECREF(it);