[ 1497053 ] Let dicts propagate the exceptions in user __eq__().

[ 1456209 ] dictresize() vulnerability ( <- backport candidate ).
This commit is contained in:
Armin Rigo 2006-06-01 13:19:12 +00:00
parent e08940ef6c
commit 35f6d36951
6 changed files with 183 additions and 101 deletions

View File

@ -1,32 +0,0 @@
# http://www.python.org/sf/1456209
# A dictresize() attack. If oldtable == mp->ma_smalltable then pure
# Python code can mangle with mp->ma_smalltable while it is being walked
# over.
class X(object):
def __hash__(self):
return 5
def __eq__(self, other):
if resizing:
d.clear()
return False
d = {}
resizing = False
d[X()] = 1
d[X()] = 2
d[X()] = 3
d[X()] = 4
d[X()] = 5
# now trigger a resize
resizing = True
d[9] = 6
# ^^^ I get Segmentation fault or Illegal instruction here.

View File

@ -1,6 +1,21 @@
test_operations test_operations
3. Operations 3. Operations
XXX Mostly not yet implemented XXX Mostly not yet implemented
3.1 Dictionary lookups succeed even if __cmp__() raises an exception 3.1 Dictionary lookups fail if __cmp__() raises an exception
raising error raising error
No exception passed through. d[x2] = 2: caught the RuntimeError outside
raising error
z = d[x2]: caught the RuntimeError outside
raising error
x2 in d: caught the RuntimeError outside
raising error
d.has_key(x2): caught the RuntimeError outside
raising error
d.get(x2): caught the RuntimeError outside
raising error
d.setdefault(x2, 42): caught the RuntimeError outside
raising error
d.pop(x2): caught the RuntimeError outside
raising error
d.update({x2: 2}): caught the RuntimeError outside
resize bugs not triggered.

View File

@ -5,27 +5,16 @@ print '3. Operations'
print 'XXX Mostly not yet implemented' print 'XXX Mostly not yet implemented'
print '3.1 Dictionary lookups succeed even if __cmp__() raises an exception' print '3.1 Dictionary lookups fail if __cmp__() raises an exception'
# SourceForge bug #112558:
# http://sourceforge.net/bugs/?func=detailbug&bug_id=112558&group_id=5470
class BadDictKey: class BadDictKey:
already_printed_raising_error = 0
def __hash__(self): def __hash__(self):
return hash(self.__class__) return hash(self.__class__)
def __cmp__(self, other): def __cmp__(self, other):
if isinstance(other, self.__class__): if isinstance(other, self.__class__):
if not BadDictKey.already_printed_raising_error: print "raising error"
# How many times __cmp__ gets called depends on the hash
# code and the internals of the dict implementation; we
# know it will be called at least once, but that's it.
# already_printed_raising_error makes sure the expected-
# output file prints the msg at most once.
BadDictKey.already_printed_raising_error = 1
print "raising error"
raise RuntimeError, "gotcha" raise RuntimeError, "gotcha"
return other return other
@ -33,8 +22,21 @@ d = {}
x1 = BadDictKey() x1 = BadDictKey()
x2 = BadDictKey() x2 = BadDictKey()
d[x1] = 1 d[x1] = 1
d[x2] = 2 for stmt in ['d[x2] = 2',
print "No exception passed through." 'z = d[x2]',
'x2 in d',
'd.has_key(x2)',
'd.get(x2)',
'd.setdefault(x2, 42)',
'd.pop(x2)',
'd.update({x2: 2})']:
try:
exec stmt
except RuntimeError:
print "%s: caught the RuntimeError outside" % (stmt,)
else:
print "%s: No exception passed through!" # old CPython behavior
# Dict resizing bug, found by Jack Jansen in 2.2 CVS development. # Dict resizing bug, found by Jack Jansen in 2.2 CVS development.
# This version got an assert failure in debug build, infinite loop in # This version got an assert failure in debug build, infinite loop in
@ -50,3 +52,27 @@ for i in range(5):
del d[i] del d[i]
for i in range(5, 9): # i==8 was the problem for i in range(5, 9): # i==8 was the problem
d[i] = i d[i] = i
# Another dict resizing bug (SF bug #1456209).
# This caused Segmentation faults or Illegal instructions.
class X(object):
def __hash__(self):
return 5
def __eq__(self, other):
if resizing:
d.clear()
return False
d = {}
resizing = False
d[X()] = 1
d[X()] = 2
d[X()] = 3
d[X()] = 4
d[X()] = 5
# now trigger a resize
resizing = True
d[9] = 6
print 'resize bugs not triggered.'

View File

@ -59,6 +59,11 @@ Core and builtins
- Patch #1479181: split open() and file() from being aliases for each other. - Patch #1479181: split open() and file() from being aliases for each other.
- Patch #1497053: Exceptions occurring in __eq__() methods were always
silently ignored by dictionaries when comparing keys. They are now
passed through (except when using the C API function PyDict_GetItem(),
whose semantics did not change).
Extension Modules Extension Modules
----------------- -----------------

View File

@ -241,10 +241,7 @@ lookdict(dictobject *mp, PyObject *key, register long hash)
register Py_ssize_t mask = mp->ma_mask; register Py_ssize_t mask = mp->ma_mask;
dictentry *ep0 = mp->ma_table; dictentry *ep0 = mp->ma_table;
register dictentry *ep; register dictentry *ep;
register int restore_error;
register int checked_error;
register int cmp; register int cmp;
PyObject *err_type, *err_value, *err_tb;
PyObject *startkey; PyObject *startkey;
i = hash & mask; i = hash & mask;
@ -252,24 +249,17 @@ lookdict(dictobject *mp, PyObject *key, register long hash)
if (ep->me_key == NULL || ep->me_key == key) if (ep->me_key == NULL || ep->me_key == key)
return ep; return ep;
restore_error = checked_error = 0;
if (ep->me_key == dummy) if (ep->me_key == dummy)
freeslot = ep; freeslot = ep;
else { else {
if (ep->me_hash == hash) { if (ep->me_hash == hash) {
/* error can't have been checked yet */
checked_error = 1;
if (PyErr_Occurred()) {
restore_error = 1;
PyErr_Fetch(&err_type, &err_value, &err_tb);
}
startkey = ep->me_key; startkey = ep->me_key;
cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
if (cmp < 0) if (cmp < 0)
PyErr_Clear(); return NULL;
if (ep0 == mp->ma_table && ep->me_key == startkey) { if (ep0 == mp->ma_table && ep->me_key == startkey) {
if (cmp > 0) if (cmp > 0)
goto Done; return ep;
} }
else { else {
/* The compare did major nasty stuff to the /* The compare did major nasty stuff to the
@ -277,8 +267,7 @@ lookdict(dictobject *mp, PyObject *key, register long hash)
* XXX A clever adversary could prevent this * XXX A clever adversary could prevent this
* XXX from terminating. * XXX from terminating.
*/ */
ep = lookdict(mp, key, hash); return lookdict(mp, key, hash);
goto Done;
} }
} }
freeslot = NULL; freeslot = NULL;
@ -289,29 +278,18 @@ lookdict(dictobject *mp, PyObject *key, register long hash)
for (perturb = hash; ; perturb >>= PERTURB_SHIFT) { for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
i = (i << 2) + i + perturb + 1; i = (i << 2) + i + perturb + 1;
ep = &ep0[i & mask]; ep = &ep0[i & mask];
if (ep->me_key == NULL) { if (ep->me_key == NULL)
if (freeslot != NULL) return freeslot == NULL ? ep : freeslot;
ep = freeslot;
break;
}
if (ep->me_key == key) if (ep->me_key == key)
break; return ep;
if (ep->me_hash == hash && ep->me_key != dummy) { if (ep->me_hash == hash && ep->me_key != dummy) {
if (!checked_error) {
checked_error = 1;
if (PyErr_Occurred()) {
restore_error = 1;
PyErr_Fetch(&err_type, &err_value,
&err_tb);
}
}
startkey = ep->me_key; startkey = ep->me_key;
cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
if (cmp < 0) if (cmp < 0)
PyErr_Clear(); return NULL;
if (ep0 == mp->ma_table && ep->me_key == startkey) { if (ep0 == mp->ma_table && ep->me_key == startkey) {
if (cmp > 0) if (cmp > 0)
break; return ep;
} }
else { else {
/* The compare did major nasty stuff to the /* The compare did major nasty stuff to the
@ -319,18 +297,12 @@ lookdict(dictobject *mp, PyObject *key, register long hash)
* XXX A clever adversary could prevent this * XXX A clever adversary could prevent this
* XXX from terminating. * XXX from terminating.
*/ */
ep = lookdict(mp, key, hash); return lookdict(mp, key, hash);
break;
} }
} }
else if (ep->me_key == dummy && freeslot == NULL) else if (ep->me_key == dummy && freeslot == NULL)
freeslot = ep; freeslot = ep;
} }
Done:
if (restore_error)
PyErr_Restore(err_type, err_value, err_tb);
return ep;
} }
/* /*
@ -400,7 +372,7 @@ Internal routine to insert a new item into the table.
Used both by the internal resize routine and by the public insert routine. Used both by the internal resize routine and by the public insert routine.
Eats a reference to key and one to value. Eats a reference to key and one to value.
*/ */
static void static int
insertdict(register dictobject *mp, PyObject *key, long hash, PyObject *value) insertdict(register dictobject *mp, PyObject *key, long hash, PyObject *value)
{ {
PyObject *old_value; PyObject *old_value;
@ -409,6 +381,11 @@ insertdict(register dictobject *mp, PyObject *key, long hash, PyObject *value)
assert(mp->ma_lookup != NULL); assert(mp->ma_lookup != NULL);
ep = mp->ma_lookup(mp, key, hash); ep = mp->ma_lookup(mp, key, hash);
if (ep == NULL) {
Py_DECREF(key);
Py_DECREF(value);
return -1;
}
if (ep->me_value != NULL) { if (ep->me_value != NULL) {
old_value = ep->me_value; old_value = ep->me_value;
ep->me_value = value; ep->me_value = value;
@ -427,6 +404,36 @@ insertdict(register dictobject *mp, PyObject *key, long hash, PyObject *value)
ep->me_value = value; ep->me_value = value;
mp->ma_used++; mp->ma_used++;
} }
return 0;
}
/*
Internal routine used by dictresize() to insert an item which is
known to be absent from the dict. This routine also assumes that
the dict contains no deleted entries. Besides the performance benefit,
using insertdict() in dictresize() is dangerous (SF bug #1456209).
*/
static void
insertdict_clean(register dictobject *mp, PyObject *key, long hash,
PyObject *value)
{
register Py_ssize_t i;
register size_t perturb;
register unsigned int mask = mp->ma_mask;
dictentry *ep0 = mp->ma_table;
register dictentry *ep;
i = hash & mask;
ep = &ep0[i];
for (perturb = hash; ep->me_key != NULL; perturb >>= PERTURB_SHIFT) {
i = (i << 2) + i + perturb + 1;
ep = &ep0[i & mask];
}
mp->ma_fill++;
ep->me_key = key;
ep->me_hash = (Py_ssize_t)hash;
ep->me_value = value;
mp->ma_used++;
} }
/* /*
@ -501,7 +508,8 @@ dictresize(dictobject *mp, Py_ssize_t minused)
for (ep = oldtable; i > 0; ep++) { for (ep = oldtable; i > 0; ep++) {
if (ep->me_value != NULL) { /* active entry */ if (ep->me_value != NULL) { /* active entry */
--i; --i;
insertdict(mp, ep->me_key, ep->me_hash, ep->me_value); insertdict_clean(mp, ep->me_key, (long)ep->me_hash,
ep->me_value);
} }
else if (ep->me_key != NULL) { /* dummy entry */ else if (ep->me_key != NULL) { /* dummy entry */
--i; --i;
@ -521,6 +529,8 @@ PyDict_GetItem(PyObject *op, PyObject *key)
{ {
long hash; long hash;
dictobject *mp = (dictobject *)op; dictobject *mp = (dictobject *)op;
dictentry *ep;
PyThreadState *tstate;
if (!PyDict_Check(op)) { if (!PyDict_Check(op)) {
return NULL; return NULL;
} }
@ -533,7 +543,29 @@ PyDict_GetItem(PyObject *op, PyObject *key)
return NULL; return NULL;
} }
} }
return (mp->ma_lookup)(mp, key, hash)->me_value;
/* We can arrive here with a NULL tstate during initialization:
try running "python -Wi" for an example related to string
interning. Let's just hope that no exception occurs then... */
tstate = PyThreadState_GET();
if (tstate != NULL && tstate->curexc_type != NULL) {
/* preserve the existing exception */
PyObject *err_type, *err_value, *err_tb;
PyErr_Fetch(&err_type, &err_value, &err_tb);
ep = (mp->ma_lookup)(mp, key, hash);
/* ignore errors */
PyErr_Restore(err_type, err_value, err_tb);
if (ep == NULL)
return NULL;
}
else {
ep = (mp->ma_lookup)(mp, key, hash);
if (ep == NULL) {
PyErr_Clear();
return NULL;
}
}
return ep->me_value;
} }
/* CAUTION: PyDict_SetItem() must guarantee that it won't resize the /* CAUTION: PyDict_SetItem() must guarantee that it won't resize the
@ -568,7 +600,8 @@ PyDict_SetItem(register PyObject *op, PyObject *key, PyObject *value)
n_used = mp->ma_used; n_used = mp->ma_used;
Py_INCREF(value); Py_INCREF(value);
Py_INCREF(key); Py_INCREF(key);
insertdict(mp, key, hash, value); if (insertdict(mp, key, hash, value) != 0)
return -1;
/* If we added a key, we can safely resize. Otherwise just return! /* If we added a key, we can safely resize. Otherwise just return!
* If fill >= 2/3 size, adjust size. Normally, this doubles or * If fill >= 2/3 size, adjust size. Normally, this doubles or
* quaduples the size, but it's also possible for the dict to shrink * quaduples the size, but it's also possible for the dict to shrink
@ -608,6 +641,8 @@ PyDict_DelItem(PyObject *op, PyObject *key)
} }
mp = (dictobject *)op; mp = (dictobject *)op;
ep = (mp->ma_lookup)(mp, key, hash); ep = (mp->ma_lookup)(mp, key, hash);
if (ep == NULL)
return -1;
if (ep->me_value == NULL) { if (ep->me_value == NULL) {
PyErr_SetObject(PyExc_KeyError, key); PyErr_SetObject(PyExc_KeyError, key);
return -1; return -1;
@ -893,6 +928,7 @@ dict_subscript(dictobject *mp, register PyObject *key)
{ {
PyObject *v; PyObject *v;
long hash; long hash;
dictentry *ep;
assert(mp->ma_table != NULL); assert(mp->ma_table != NULL);
if (!PyString_CheckExact(key) || if (!PyString_CheckExact(key) ||
(hash = ((PyStringObject *) key)->ob_shash) == -1) { (hash = ((PyStringObject *) key)->ob_shash) == -1) {
@ -900,7 +936,10 @@ dict_subscript(dictobject *mp, register PyObject *key)
if (hash == -1) if (hash == -1)
return NULL; return NULL;
} }
v = (mp->ma_lookup)(mp, key, hash) -> me_value; ep = (mp->ma_lookup)(mp, key, hash);
if (ep == NULL)
return NULL;
v = ep->me_value;
if (v == NULL) { if (v == NULL) {
if (!PyDict_CheckExact(mp)) { if (!PyDict_CheckExact(mp)) {
/* Look up __missing__ method if we're a subclass. */ /* Look up __missing__ method if we're a subclass. */
@ -1258,9 +1297,10 @@ PyDict_Merge(PyObject *a, PyObject *b, int override)
PyDict_GetItem(a, entry->me_key) == NULL)) { PyDict_GetItem(a, entry->me_key) == NULL)) {
Py_INCREF(entry->me_key); Py_INCREF(entry->me_key);
Py_INCREF(entry->me_value); Py_INCREF(entry->me_value);
insertdict(mp, entry->me_key, if (insertdict(mp, entry->me_key,
(long)entry->me_hash, (long)entry->me_hash,
entry->me_value); entry->me_value) != 0)
return -1;
} }
} }
} }
@ -1568,13 +1608,17 @@ dict_has_key(register dictobject *mp, PyObject *key)
{ {
long hash; long hash;
register long ok; register long ok;
dictentry *ep;
if (!PyString_CheckExact(key) || if (!PyString_CheckExact(key) ||
(hash = ((PyStringObject *) key)->ob_shash) == -1) { (hash = ((PyStringObject *) key)->ob_shash) == -1) {
hash = PyObject_Hash(key); hash = PyObject_Hash(key);
if (hash == -1) if (hash == -1)
return NULL; return NULL;
} }
ok = (mp->ma_lookup)(mp, key, hash)->me_value != NULL; ep = (mp->ma_lookup)(mp, key, hash);
if (ep == NULL)
return NULL;
ok = ep->me_value != NULL;
return PyBool_FromLong(ok); return PyBool_FromLong(ok);
} }
@ -1585,6 +1629,7 @@ dict_get(register dictobject *mp, PyObject *args)
PyObject *failobj = Py_None; PyObject *failobj = Py_None;
PyObject *val = NULL; PyObject *val = NULL;
long hash; long hash;
dictentry *ep;
if (!PyArg_UnpackTuple(args, "get", 1, 2, &key, &failobj)) if (!PyArg_UnpackTuple(args, "get", 1, 2, &key, &failobj))
return NULL; return NULL;
@ -1595,8 +1640,10 @@ dict_get(register dictobject *mp, PyObject *args)
if (hash == -1) if (hash == -1)
return NULL; return NULL;
} }
val = (mp->ma_lookup)(mp, key, hash)->me_value; ep = (mp->ma_lookup)(mp, key, hash);
if (ep == NULL)
return NULL;
val = ep->me_value;
if (val == NULL) if (val == NULL)
val = failobj; val = failobj;
Py_INCREF(val); Py_INCREF(val);
@ -1611,6 +1658,7 @@ dict_setdefault(register dictobject *mp, PyObject *args)
PyObject *failobj = Py_None; PyObject *failobj = Py_None;
PyObject *val = NULL; PyObject *val = NULL;
long hash; long hash;
dictentry *ep;
if (!PyArg_UnpackTuple(args, "setdefault", 1, 2, &key, &failobj)) if (!PyArg_UnpackTuple(args, "setdefault", 1, 2, &key, &failobj))
return NULL; return NULL;
@ -1621,7 +1669,10 @@ dict_setdefault(register dictobject *mp, PyObject *args)
if (hash == -1) if (hash == -1)
return NULL; return NULL;
} }
val = (mp->ma_lookup)(mp, key, hash)->me_value; ep = (mp->ma_lookup)(mp, key, hash);
if (ep == NULL)
return NULL;
val = ep->me_value;
if (val == NULL) { if (val == NULL) {
val = failobj; val = failobj;
if (PyDict_SetItem((PyObject*)mp, key, failobj)) if (PyDict_SetItem((PyObject*)mp, key, failobj))
@ -1665,6 +1716,8 @@ dict_pop(dictobject *mp, PyObject *args)
return NULL; return NULL;
} }
ep = (mp->ma_lookup)(mp, key, hash); ep = (mp->ma_lookup)(mp, key, hash);
if (ep == NULL)
return NULL;
if (ep->me_value == NULL) { if (ep->me_value == NULL) {
if (deflt) { if (deflt) {
Py_INCREF(deflt); Py_INCREF(deflt);
@ -1884,6 +1937,7 @@ PyDict_Contains(PyObject *op, PyObject *key)
{ {
long hash; long hash;
dictobject *mp = (dictobject *)op; dictobject *mp = (dictobject *)op;
dictentry *ep;
if (!PyString_CheckExact(key) || if (!PyString_CheckExact(key) ||
(hash = ((PyStringObject *) key)->ob_shash) == -1) { (hash = ((PyStringObject *) key)->ob_shash) == -1) {
@ -1891,7 +1945,10 @@ PyDict_Contains(PyObject *op, PyObject *key)
if (hash == -1) if (hash == -1)
return -1; return -1;
} }
return (mp->ma_lookup)(mp, key, hash)->me_value != NULL; ep = (mp->ma_lookup)(mp, key, hash);
if (ep == NULL)
return -1;
return ep->me_value != NULL;
} }
/* Hack to implement "key in dict" */ /* Hack to implement "key in dict" */

View File

@ -1855,15 +1855,26 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag)
long hash = ((PyStringObject *)w)->ob_shash; long hash = ((PyStringObject *)w)->ob_shash;
if (hash != -1) { if (hash != -1) {
PyDictObject *d; PyDictObject *d;
PyDictEntry *e;
d = (PyDictObject *)(f->f_globals); d = (PyDictObject *)(f->f_globals);
x = d->ma_lookup(d, w, hash)->me_value; e = d->ma_lookup(d, w, hash);
if (e == NULL) {
x = NULL;
break;
}
x = e->me_value;
if (x != NULL) { if (x != NULL) {
Py_INCREF(x); Py_INCREF(x);
PUSH(x); PUSH(x);
continue; continue;
} }
d = (PyDictObject *)(f->f_builtins); d = (PyDictObject *)(f->f_builtins);
x = d->ma_lookup(d, w, hash)->me_value; e = d->ma_lookup(d, w, hash);
if (e == NULL) {
x = NULL;
break;
}
x = e->me_value;
if (x != NULL) { if (x != NULL) {
Py_INCREF(x); Py_INCREF(x);
PUSH(x); PUSH(x);