From 88ea166dadb8aeb34541a0a464662dea222629e5 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Thu, 29 Aug 2019 09:02:58 -0700 Subject: [PATCH] bpo-8425: Fast path for set inplace difference when the second set is large (GH-15590) --- .../2019-08-29-01-55-38.bpo-8425.FTq4A8.rst | 3 +++ Objects/setobject.c | 20 +++++++++++++++++-- 2 files changed, 21 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2019-08-29-01-55-38.bpo-8425.FTq4A8.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2019-08-29-01-55-38.bpo-8425.FTq4A8.rst b/Misc/NEWS.d/next/Core and Builtins/2019-08-29-01-55-38.bpo-8425.FTq4A8.rst new file mode 100644 index 00000000000..8e5ec0bfe87 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2019-08-29-01-55-38.bpo-8425.FTq4A8.rst @@ -0,0 +1,3 @@ +Optimize set difference_update for the case when the other set is much +larger than the base set. (Suggested by Evgeny Kapun with code contributed +by Michele OrrĂ¹). diff --git a/Objects/setobject.c b/Objects/setobject.c index 56858dbccfe..fafc2fa9e46 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -1463,9 +1463,25 @@ set_difference_update_internal(PySetObject *so, PyObject *other) setentry *entry; Py_ssize_t pos = 0; - while (set_next((PySetObject *)other, &pos, &entry)) - if (set_discard_entry(so, entry->key, entry->hash) < 0) + /* Optimization: When the other set is more than 8 times + larger than the base set, replace the other set with + interesection of the two sets. + */ + if ((PySet_GET_SIZE(other) >> 3) > PySet_GET_SIZE(so)) { + other = set_intersection(so, other); + if (other == NULL) return -1; + } else { + Py_INCREF(other); + } + + while (set_next((PySetObject *)other, &pos, &entry)) + if (set_discard_entry(so, entry->key, entry->hash) < 0) { + Py_DECREF(other); + return -1; + } + + Py_DECREF(other); } else { PyObject *key, *it; it = PyObject_GetIter(other);