SF bug #422121 Insecurities in dict comparison.

Fixed a half dozen ways in which general dict comparison could crash
Python (even cause Win98SE to reboot) in the presence of kay and/or
value comparison routines that mutate the dict during dict comparison.
Bugfix candidate.
This commit is contained in:
Tim Peters 2001-05-10 08:32:44 +00:00
parent 66aaaae54c
commit 95bf9390a4
4 changed files with 239 additions and 34 deletions

View File

@ -0,0 +1 @@
test_mutants

138
Lib/test/test_mutants.py Normal file
View File

@ -0,0 +1,138 @@
from test_support import verbose
import random
# From SF bug #422121: Insecurities in dict comparison.
# Safety of code doing comparisons has been an historical Python waak spot.
# The problem is that comparison of structures in written in C *naturally*
# wants to hold on to things like the size of the container, or "the
# biggest" containee so far, across a traversal of the container; but
# code to do containee comparisons can call back into Python and mutate
# the container in arbitrary ways while the C loop is in midstream. If the
# C code isn't extremely paranoid about digging things out of memory on
# each trip, and artificially boosting refcounts for the duration, anything
# from infinite loops to OS crashes can result (yes, I use Windows <wink>).
#
# The other problem is that code designed to provoke a weakness is usually
# white-box code, and so catches only the particular vulnerabilities the
# author knew to protect against. For example, Python's list.sort() code
# went thru many iterations as one "new" vulnerability after another was
# discovered.
#
# So the dict comparison test here uses a black-box approach instead,
# generating dicts of various sizes at random, and performing random
# mutations on them at random times. This proved very effective,
# triggering at least six distinct failure modes the first 20 times I
# ran it. Indeed, at the start, the driver never got beyond 6 iterations
# before the test died.
# The dicts are global to make it easy to mutate tham from within functions.
dict1 = {}
dict2 = {}
# The current set of keys in dict1 and dict2. These are materialized as
# lists to make it easy to pick a dict key at random.
dict1keys = []
dict2keys = []
# Global flag telling maybe_mutate() wether to *consider* mutating.
mutate = 0
# If global mutate is true, consider mutating a dict. May or may not
# mutate a dict even if mutate is true. If it does decide to mutate a
# dict, it picks one of {dict1, dict2} at random, and deletes a random
# entry from it.
def maybe_mutate():
if not mutate:
return
if random.random() < 0.5:
return
if random.random() < 0.5:
target, keys = dict1, dict1keys
else:
target, keys = dict2, dict2keys
if keys:
i = random.randrange(len(keys))
key = keys[i]
del target[key]
# CAUTION: don't use keys.remove(key) here. Or do <wink>. The
# point is that .remove() would trigger more comparisons, and so
# also more calls to this routine. We're mutating often enough
# without that.
del keys[i]
# A horrid class that triggers random mutations of dict1 and dict2 when
# instances are compared.
class Horrid:
def __init__(self, i):
# Comparison outcomes are determined by the value of i.
self.i = i
# An artificial hashcode is selected at random so that we don't
# have any systematic relationship between comparsion outcomes
# (based on self.i and other.i) and relative position within the
# hawh vector (based on hashcode).
self.hashcode = random.randrange(1000000000)
def __hash__(self):
return self.hashcode
def __cmp__(self, other):
maybe_mutate() # The point of the test.
return cmp(self.i, other.i)
def __repr__(self):
return "Horrid(%d)" % self.i
# Fill dict d with numentries (Horrid(i), Horrid(j)) key-value pairs,
# where i and j are selected at random from the candidates list.
# Return d.keys() after filling.
def fill_dict(d, candidates, numentries):
d.clear()
for i in xrange(numentries):
d[Horrid(random.choice(candidates))] = \
Horrid(random.choice(candidates))
return d.keys()
# Test one pair of randomly generated dicts, each with n entries.
# Note that dict comparison is trivial if they don't have the same number
# of entires (then the "shorter" dict is instantly considered to be the
# smaller one, without even looking at the entries).
def test_one(n):
global mutate, dict1, dict2, dict1keys, dict2keys
# Fill the dicts without mutating them.
mutate = 0
dict1keys = fill_dict(dict1, range(n), n)
dict2keys = fill_dict(dict2, range(n), n)
# Enable mutation, then compare the dicts so long as they have the
# same size.
mutate = 1
if verbose:
print "trying w/ lengths", len(dict1), len(dict2),
while dict1 and len(dict1) == len(dict2):
if verbose:
print ".",
c = cmp(dict1, dict2)
if verbose:
print
# Run test_one n times. At the start (before the bugs were fixed), 20
# consecutive runs of this test each blew up on or before the sixth time
# test_one was run. So n doesn't have to be large to get an interesting
# test.
# OTOH, calling with large n is also interesting, to ensure that the fixed
# code doesn't hold on to refcounts *too* long (in which case memory would
# leak).
def test(n):
for i in xrange(n):
test_one(random.randrange(1, 100))
# See last comment block for clues about good values for n.
test(100)

View File

@ -47,6 +47,11 @@ Core
- Comparing dictionary objects via == and != is faster, and now works even - Comparing dictionary objects via == and != is faster, and now works even
if the keys and values don't support comparisons other than ==. if the keys and values don't support comparisons other than ==.
- Comparing dictionaries in ways other than == and != is slower: there were
insecurities in the dict comparison implementation that could cause Python
to crash if the element comparison routines for the dict keys and/or
values mutated the dicts. Making the code bulletproof slowed it down.
What's New in Python 2.1 (final)? What's New in Python 2.1 (final)?
================================= =================================

View File

@ -981,43 +981,83 @@ PyDict_Items(PyObject *mp)
/* Subroutine which returns the smallest key in a for which b's value /* Subroutine which returns the smallest key in a for which b's value
is different or absent. The value is returned too, through the is different or absent. The value is returned too, through the
pval argument. No reference counts are incremented. */ pval argument. Both are NULL if no key in a is found for which b's status
differs. The refcounts on (and only on) non-NULL *pval and function return
values must be decremented by the caller (characterize() increments them
to ensure that mutating comparison and PyDict_GetItem calls can't delete
them before the caller is done looking at them). */
static PyObject * static PyObject *
characterize(dictobject *a, dictobject *b, PyObject **pval) characterize(dictobject *a, dictobject *b, PyObject **pval)
{ {
PyObject *diff = NULL; PyObject *akey = NULL; /* smallest key in a s.t. a[akey] != b[akey] */
PyObject *aval = NULL; /* a[akey] */
int i, cmp; int i, cmp;
*pval = NULL;
for (i = 0; i < a->ma_size; i++) { for (i = 0; i < a->ma_size; i++) {
if (a->ma_table[i].me_value != NULL) { PyObject *thiskey, *thisaval, *thisbval;
PyObject *key = a->ma_table[i].me_key; if (a->ma_table[i].me_value == NULL)
PyObject *aval, *bval; continue;
if (diff != NULL) { thiskey = a->ma_table[i].me_key;
cmp = PyObject_RichCompareBool(diff, key, Py_LT); Py_INCREF(thiskey); /* keep alive across compares */
if (cmp < 0) if (akey != NULL) {
return NULL; cmp = PyObject_RichCompareBool(akey, thiskey, Py_LT);
if (cmp > 0) if (cmp < 0) {
Py_DECREF(thiskey);
goto Fail;
}
if (cmp > 0 ||
i >= a->ma_size ||
a->ma_table[i].me_value == NULL)
{
/* Not the *smallest* a key; or maybe it is
* but the compare shrunk the dict so we can't
* find its associated value anymore; or
* maybe it is but the compare deleted the
* a[thiskey] entry.
*/
Py_DECREF(thiskey);
continue; continue;
} }
aval = a->ma_table[i].me_value; }
bval = PyDict_GetItem((PyObject *)b, key);
if (bval == NULL) /* Compare a[thiskey] to b[thiskey]; cmp <- true iff equal. */
thisaval = a->ma_table[i].me_value;
assert(thisaval);
Py_INCREF(thisaval); /* keep alive */
thisbval = PyDict_GetItem((PyObject *)b, thiskey);
if (thisbval == NULL)
cmp = 0; cmp = 0;
else { else {
cmp = PyObject_RichCompareBool(aval, bval, Py_EQ); /* both dicts have thiskey: same values? */
if (cmp < 0) cmp = PyObject_RichCompareBool(
return NULL; thisaval, thisbval, Py_EQ);
if (cmp < 0) {
Py_DECREF(thiskey);
Py_DECREF(thisaval);
goto Fail;
}
}
if (cmp == 0) {
/* New winner. */
Py_XDECREF(akey);
Py_XDECREF(aval);
akey = thiskey;
aval = thisaval;
}
else {
Py_DECREF(thiskey);
Py_DECREF(thisaval);
}
} }
if (cmp == 0)
{
diff = key;
*pval = aval; *pval = aval;
} return akey;
}
} Fail:
return diff; Py_XDECREF(akey);
Py_XDECREF(aval);
*pval = NULL;
return NULL;
} }
static int static int
@ -1031,19 +1071,40 @@ dict_compare(dictobject *a, dictobject *b)
return -1; /* a is shorter */ return -1; /* a is shorter */
else if (a->ma_used > b->ma_used) else if (a->ma_used > b->ma_used)
return 1; /* b is shorter */ return 1; /* b is shorter */
/* Same length -- check all keys */ /* Same length -- check all keys */
bdiff = bval = NULL;
adiff = characterize(a, b, &aval); adiff = characterize(a, b, &aval);
if (adiff == NULL && PyErr_Occurred()) if (adiff == NULL) {
return -1; assert(!aval);
if (adiff == NULL) /* Either an error, or a is a subst with the same length so
return 0; /* a is a subset with the same length */ * must be equal.
*/
res = PyErr_Occurred() ? -1 : 0;
goto Finished;
}
bdiff = characterize(b, a, &bval); bdiff = characterize(b, a, &bval);
if (bdiff == NULL && PyErr_Occurred()) if (bdiff == NULL && PyErr_Occurred()) {
return -1; assert(!bval);
/* bdiff == NULL would be impossible now */ res = -1;
goto Finished;
}
res = 0;
if (bdiff) {
/* bdiff == NULL "should be" impossible now, but perhaps
* the last comparison done by the characterize() on a had
* the side effect of making the dicts equal!
*/
res = PyObject_Compare(adiff, bdiff); res = PyObject_Compare(adiff, bdiff);
if (res == 0) }
if (res == 0 && bval != NULL)
res = PyObject_Compare(aval, bval); res = PyObject_Compare(aval, bval);
Finished:
Py_XDECREF(adiff);
Py_XDECREF(bdiff);
Py_XDECREF(aval);
Py_XDECREF(bval);
return res; return res;
} }