From 775ebe29a50d500e3a0710486bcf64574982b826 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Fri, 8 Dec 2006 18:12:24 +0000 Subject: [PATCH] Backport fixes to set objects: rev 52964 sf 1576657 KeyError unpacks tuple arguments rev 52963 sf 1456209 obscure resizing vulnerability rev 52962 redundant calls to PyObject_Hash() --- Lib/test/test_set.py | 11 ++++++ Misc/NEWS | 10 +++++- Objects/setobject.c | 80 ++++++++++++++++++++++++++++++++++++++------ 3 files changed, 89 insertions(+), 12 deletions(-) diff --git a/Lib/test/test_set.py b/Lib/test/test_set.py index 0268be272d8..422cc41c256 100644 --- a/Lib/test/test_set.py +++ b/Lib/test/test_set.py @@ -293,6 +293,17 @@ class TestSet(TestJointOps): self.assert_(self.thetype(self.word) not in s) self.assertRaises(KeyError, self.s.remove, self.thetype(self.word)) + def test_remove_keyerror_unpacking(self): + # bug: www.python.org/sf/1576657 + for v1 in ['Q', (1,)]: + try: + self.s.remove(v1) + except KeyError, e: + v2 = e.args[0] + self.assertEqual(v1, v2) + else: + self.fail() + def test_discard(self): self.s.discard('a') self.assert_('a' not in self.s) diff --git a/Misc/NEWS b/Misc/NEWS index c9ca09567c1..91e2bcfb95a 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -12,12 +12,20 @@ What's New in Python 2.5.1c1? Core and builtins ----------------- +- Bug #1456209: In some obscure cases it was possible for a class with a + custom ``__eq__()`` method to confuse set internals when class instances + were used as a set's elements and the ``__eq__()`` method mutated the set. + +- Eliminated unnecessary repeated calls to hash() by set.intersection() and + set.symmetric_difference_update(). + - Bug #1591996: Correctly forward exception in instance_contains(). - Bug #1588287: fix invalid assertion for `1,2` in debug builds. - Bug #1576657: when setting a KeyError for a tuple key, make sure that - the tuple isn't used as the "exception arguments tuple". + the tuple isn't used as the "exception arguments tuple". Applied to + both sets and dictionaries. - Bug #1565514, SystemError not raised on too many nested blocks. diff --git a/Objects/setobject.c b/Objects/setobject.c index 9d72b33c0d3..d33fff93fe2 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -10,6 +10,20 @@ #include "Python.h" #include "structmember.h" +/* Set a key error with the specified argument, wrapping it in a + * tuple automatically so that tuple keys are not unpacked as the + * exception arguments. */ +static void +set_key_error(PyObject *arg) +{ + PyObject *tup; + tup = PyTuple_Pack(1, arg); + if (!tup) + return; /* caller will expect error to be set anyway */ + PyErr_SetObject(PyExc_KeyError, tup); + Py_DECREF(tup); +} + /* This must be >= 1. */ #define PERTURB_SHIFT 5 @@ -185,7 +199,7 @@ set_lookkey_string(PySetObject *so, PyObject *key, register long hash) /* Internal routine to insert a new key into the table. -Used both by the internal resize routine and by the public insert routine. +Used by the public insert routine. Eats a reference to key. */ static int @@ -217,6 +231,35 @@ set_insert_key(register PySetObject *so, PyObject *key, long hash) return 0; } +/* +Internal routine used by set_table_resize() to insert an item which is +known to be absent from the set. This routine also assumes that +the set contains no deleted entries. Besides the performance benefit, +using set_insert_clean() in set_table_resize() is dangerous (SF bug #1456209). +Note that no refcounts are changed by this routine; if needed, the caller +is responsible for incref'ing `key`. +*/ +static void +set_insert_clean(register PySetObject *so, PyObject *key, long hash) +{ + register size_t i; + register size_t perturb; + register size_t mask = (size_t)so->mask; + setentry *table = so->table; + register setentry *entry; + + i = hash & mask; + entry = &table[i]; + for (perturb = hash; entry->key != NULL; perturb >>= PERTURB_SHIFT) { + i = (i << 2) + i + perturb + 1; + entry = &table[i & mask]; + } + so->fill++; + entry->key = key; + entry->hash = hash; + so->used++; +} + /* Restructure the table by allocating a new table and reinserting all keys again. When entries have been deleted, the new table may @@ -298,11 +341,7 @@ set_table_resize(PySetObject *so, Py_ssize_t minused) } else { /* ACTIVE */ --i; - if(set_insert_key(so, entry->key, entry->hash) == -1) { - if (is_oldtable_malloced) - PyMem_DEL(oldtable); - return -1; - } + set_insert_clean(so, entry->key, entry->hash); } } @@ -1164,7 +1203,19 @@ set_intersection(PySetObject *so, PyObject *other) } while ((key = PyIter_Next(it)) != NULL) { - int rv = set_contains_key(so, key); + int rv; + setentry entry; + long hash = PyObject_Hash(key); + + if (hash == -1) { + Py_DECREF(it); + Py_DECREF(result); + Py_DECREF(key); + return NULL; + } + entry.hash = hash; + entry.key = key; + rv = set_contains_entry(so, &entry); if (rv == -1) { Py_DECREF(it); Py_DECREF(result); @@ -1172,7 +1223,7 @@ set_intersection(PySetObject *so, PyObject *other) return NULL; } if (rv) { - if (set_add_key(result, key) == -1) { + if (set_add_entry(result, &entry) == -1) { Py_DECREF(it); Py_DECREF(result); Py_DECREF(key); @@ -1383,11 +1434,18 @@ set_symmetric_difference_update(PySetObject *so, PyObject *other) PyObject *value; int rv; while (PyDict_Next(other, &pos, &key, &value)) { - rv = set_discard_key(so, key); + setentry an_entry; + long hash = PyObject_Hash(key); + + if (hash == -1) + return NULL; + an_entry.hash = hash; + an_entry.key = key; + rv = set_discard_entry(so, &an_entry); if (rv == -1) return NULL; if (rv == DISCARD_NOTFOUND) { - if (set_add_key(so, key) == -1) + if (set_add_entry(so, &an_entry) == -1) return NULL; } } @@ -1640,7 +1698,7 @@ set_remove(PySetObject *so, PyObject *key) Py_DECREF(tmpkey); return result; } else if (rv == DISCARD_NOTFOUND) { - PyErr_SetObject(PyExc_KeyError, key); + set_key_error(key); return NULL; } Py_RETURN_NONE;