Issue #23119: Simplify setobject by inlining the special case for unicode equality testing.

This commit is contained in:
Raymond Hettinger 2015-01-25 16:12:49 -08:00
parent bbd3aa8ece
commit 93035c44fd
3 changed files with 13 additions and 73 deletions

View File

@ -35,7 +35,7 @@ Invariants for frozensets:
*/
typedef struct _setobject {
typedef struct {
PyObject_HEAD
Py_ssize_t fill; /* Number active and dummy entries*/
@ -53,7 +53,6 @@ typedef struct _setobject {
* runtime null-tests.
*/
setentry *table;
setentry *(*lookup)(struct _setobject *so, PyObject *key, Py_hash_t hash);
Py_hash_t hash; /* Only used by frozenset objects */
setentry smalltable[PySet_MINSIZE];

View File

@ -994,7 +994,7 @@ class SizeofTest(unittest.TestCase):
# frozenset
PySet_MINSIZE = 8
samples = [[], range(10), range(50)]
s = size('3n2P' + PySet_MINSIZE*'nP' + '2nP')
s = size('3nP' + PySet_MINSIZE*'nP' + '2nP')
for sample in samples:
minused = len(sample)
if minused == 0: tmp = 1

View File

@ -69,6 +69,10 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash)
PyObject *startkey = entry->key;
if (startkey == key)
return entry;
if (PyUnicode_CheckExact(startkey)
&& PyUnicode_CheckExact(key)
&& unicode_eq(startkey, key))
return entry;
Py_INCREF(startkey);
cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
Py_DECREF(startkey);
@ -90,6 +94,10 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash)
PyObject *startkey = entry->key;
if (startkey == key)
return entry;
if (PyUnicode_CheckExact(startkey)
&& PyUnicode_CheckExact(key)
&& unicode_eq(startkey, key))
return entry;
Py_INCREF(startkey);
cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
Py_DECREF(startkey);
@ -115,68 +123,6 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash)
return freeslot == NULL ? entry : freeslot;
}
/*
* Hacked up version of set_lookkey which can assume keys are always unicode;
* This means we can always use unicode_eq directly and not have to check to
* see if the comparison altered the table.
*/
static setentry *
set_lookkey_unicode(PySetObject *so, PyObject *key, Py_hash_t hash)
{
setentry *table = so->table;
setentry *freeslot = NULL;
setentry *entry;
size_t perturb = hash;
size_t mask = so->mask;
size_t i = (size_t)hash;
size_t j;
/* Make sure this function doesn't have to handle non-unicode keys,
including subclasses of str; e.g., one reason to subclass
strings is to override __eq__, and for speed we don't cater to
that here. */
if (!PyUnicode_CheckExact(key)) { /* unlikely */
so->lookup = set_lookkey;
return set_lookkey(so, key, hash);
}
entry = &table[i & mask];
if (entry->key == NULL)
return entry;
while (1) {
if (entry->hash == hash
&& (entry->key == key
|| (entry->key != dummy /* unlikely */
&& unicode_eq(entry->key, key)))) /* likely */
return entry;
if (entry->key == dummy && freeslot == NULL)
freeslot = entry;
for (j = 1 ; j <= LINEAR_PROBES ; j++) {
entry = &table[(i + j) & mask];
if (entry->key == NULL)
goto found_null;
if (entry->hash == hash
&& (entry->key == key
|| (entry->key != dummy /* unlikely */
&& unicode_eq(entry->key, key)))) /* likely */
return entry;
if (entry->key == dummy && freeslot == NULL)
freeslot = entry;
}
perturb >>= PERTURB_SHIFT;
i = i * 5 + 1 + perturb;
entry = &table[i & mask];
if (entry->key == NULL)
goto found_null;
}
found_null:
return freeslot == NULL ? entry : freeslot;
}
/*
Internal routine used by set_table_resize() to insert an item which is
known to be absent from the set. This routine also assumes that
@ -225,8 +171,7 @@ set_insert_key(PySetObject *so, PyObject *key, Py_hash_t hash)
{
setentry *entry;
assert(so->lookup != NULL);
entry = so->lookup(so, key, hash);
entry = set_lookkey(so, key, hash);
if (entry == NULL)
return -1;
if (entry->key == NULL) {
@ -385,7 +330,7 @@ set_discard_entry(PySetObject *so, setentry *oldentry)
setentry *entry;
PyObject *old_key;
entry = (so->lookup)(so, oldentry->key, oldentry->hash);
entry = set_lookkey(so, oldentry->key, oldentry->hash);
if (entry == NULL)
return -1;
if (entry->key == NULL || entry->key == dummy)
@ -631,7 +576,7 @@ set_contains_entry(PySetObject *so, setentry *entry)
PyObject *key;
setentry *lu_entry;
lu_entry = (so->lookup)(so, entry->key, entry->hash);
lu_entry = set_lookkey(so, entry->key, entry->hash);
if (lu_entry == NULL)
return -1;
key = lu_entry->key;
@ -994,7 +939,6 @@ make_new_set(PyTypeObject *type, PyObject *iterable)
so->used = 0;
so->mask = PySet_MINSIZE - 1;
so->table = so->smalltable;
so->lookup = set_lookkey_unicode;
so->hash = -1;
so->finger = 0;
so->weakreflist = NULL;
@ -1095,7 +1039,6 @@ set_swap_bodies(PySetObject *a, PySetObject *b)
{
Py_ssize_t t;
setentry *u;
setentry *(*f)(PySetObject *so, PyObject *key, Py_ssize_t hash);
setentry tab[PySet_MINSIZE];
Py_hash_t h;
@ -1111,8 +1054,6 @@ set_swap_bodies(PySetObject *a, PySetObject *b)
a->table = a->smalltable;
b->table = u;
f = a->lookup; a->lookup = b->lookup; b->lookup = f;
if (a->table == a->smalltable || b->table == b->smalltable) {
memcpy(tab, a->smalltable, sizeof(tab));
memcpy(a->smalltable, b->smalltable, sizeof(tab));