diff --git a/Lib/test/output/test_mutants b/Lib/test/output/test_mutants new file mode 100644 index 00000000000..259c4f56bc5 --- /dev/null +++ b/Lib/test/output/test_mutants @@ -0,0 +1 @@ +test_mutants diff --git a/Lib/test/test_mutants.py b/Lib/test/test_mutants.py new file mode 100644 index 00000000000..42efb6c88da --- /dev/null +++ b/Lib/test/test_mutants.py @@ -0,0 +1,138 @@ +from test_support import verbose +import random + +# From SF bug #422121: Insecurities in dict comparison. + +# Safety of code doing comparisons has been an historical Python waak spot. +# The problem is that comparison of structures in written in C *naturally* +# wants to hold on to things like the size of the container, or "the +# biggest" containee so far, across a traversal of the container; but +# code to do containee comparisons can call back into Python and mutate +# the container in arbitrary ways while the C loop is in midstream. If the +# C code isn't extremely paranoid about digging things out of memory on +# each trip, and artificially boosting refcounts for the duration, anything +# from infinite loops to OS crashes can result (yes, I use Windows ). +# +# The other problem is that code designed to provoke a weakness is usually +# white-box code, and so catches only the particular vulnerabilities the +# author knew to protect against. For example, Python's list.sort() code +# went thru many iterations as one "new" vulnerability after another was +# discovered. +# +# So the dict comparison test here uses a black-box approach instead, +# generating dicts of various sizes at random, and performing random +# mutations on them at random times. This proved very effective, +# triggering at least six distinct failure modes the first 20 times I +# ran it. Indeed, at the start, the driver never got beyond 6 iterations +# before the test died. + +# The dicts are global to make it easy to mutate tham from within functions. +dict1 = {} +dict2 = {} + +# The current set of keys in dict1 and dict2. These are materialized as +# lists to make it easy to pick a dict key at random. +dict1keys = [] +dict2keys = [] + +# Global flag telling maybe_mutate() wether to *consider* mutating. +mutate = 0 + +# If global mutate is true, consider mutating a dict. May or may not +# mutate a dict even if mutate is true. If it does decide to mutate a +# dict, it picks one of {dict1, dict2} at random, and deletes a random +# entry from it. + +def maybe_mutate(): + if not mutate: + return + if random.random() < 0.5: + return + if random.random() < 0.5: + target, keys = dict1, dict1keys + else: + target, keys = dict2, dict2keys + if keys: + i = random.randrange(len(keys)) + key = keys[i] + del target[key] + # CAUTION: don't use keys.remove(key) here. Or do . The + # point is that .remove() would trigger more comparisons, and so + # also more calls to this routine. We're mutating often enough + # without that. + del keys[i] + +# A horrid class that triggers random mutations of dict1 and dict2 when +# instances are compared. + +class Horrid: + def __init__(self, i): + # Comparison outcomes are determined by the value of i. + self.i = i + + # An artificial hashcode is selected at random so that we don't + # have any systematic relationship between comparsion outcomes + # (based on self.i and other.i) and relative position within the + # hawh vector (based on hashcode). + self.hashcode = random.randrange(1000000000) + + def __hash__(self): + return self.hashcode + + def __cmp__(self, other): + maybe_mutate() # The point of the test. + return cmp(self.i, other.i) + + def __repr__(self): + return "Horrid(%d)" % self.i + +# Fill dict d with numentries (Horrid(i), Horrid(j)) key-value pairs, +# where i and j are selected at random from the candidates list. +# Return d.keys() after filling. + +def fill_dict(d, candidates, numentries): + d.clear() + for i in xrange(numentries): + d[Horrid(random.choice(candidates))] = \ + Horrid(random.choice(candidates)) + return d.keys() + +# Test one pair of randomly generated dicts, each with n entries. +# Note that dict comparison is trivial if they don't have the same number +# of entires (then the "shorter" dict is instantly considered to be the +# smaller one, without even looking at the entries). + +def test_one(n): + global mutate, dict1, dict2, dict1keys, dict2keys + + # Fill the dicts without mutating them. + mutate = 0 + dict1keys = fill_dict(dict1, range(n), n) + dict2keys = fill_dict(dict2, range(n), n) + + # Enable mutation, then compare the dicts so long as they have the + # same size. + mutate = 1 + if verbose: + print "trying w/ lengths", len(dict1), len(dict2), + while dict1 and len(dict1) == len(dict2): + if verbose: + print ".", + c = cmp(dict1, dict2) + if verbose: + print + +# Run test_one n times. At the start (before the bugs were fixed), 20 +# consecutive runs of this test each blew up on or before the sixth time +# test_one was run. So n doesn't have to be large to get an interesting +# test. +# OTOH, calling with large n is also interesting, to ensure that the fixed +# code doesn't hold on to refcounts *too* long (in which case memory would +# leak). + +def test(n): + for i in xrange(n): + test_one(random.randrange(1, 100)) + +# See last comment block for clues about good values for n. +test(100) diff --git a/Misc/NEWS b/Misc/NEWS index 79066131a4a..8b4021f85d5 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -47,6 +47,11 @@ Core - Comparing dictionary objects via == and != is faster, and now works even if the keys and values don't support comparisons other than ==. +- Comparing dictionaries in ways other than == and != is slower: there were + insecurities in the dict comparison implementation that could cause Python + to crash if the element comparison routines for the dict keys and/or + values mutated the dicts. Making the code bulletproof slowed it down. + What's New in Python 2.1 (final)? ================================= diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 56cc08f35c4..3f77427aca5 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -981,43 +981,83 @@ PyDict_Items(PyObject *mp) /* Subroutine which returns the smallest key in a for which b's value is different or absent. The value is returned too, through the - pval argument. No reference counts are incremented. */ + pval argument. Both are NULL if no key in a is found for which b's status + differs. The refcounts on (and only on) non-NULL *pval and function return + values must be decremented by the caller (characterize() increments them + to ensure that mutating comparison and PyDict_GetItem calls can't delete + them before the caller is done looking at them). */ static PyObject * characterize(dictobject *a, dictobject *b, PyObject **pval) { - PyObject *diff = NULL; + PyObject *akey = NULL; /* smallest key in a s.t. a[akey] != b[akey] */ + PyObject *aval = NULL; /* a[akey] */ int i, cmp; - *pval = NULL; for (i = 0; i < a->ma_size; i++) { - if (a->ma_table[i].me_value != NULL) { - PyObject *key = a->ma_table[i].me_key; - PyObject *aval, *bval; - if (diff != NULL) { - cmp = PyObject_RichCompareBool(diff, key, Py_LT); - if (cmp < 0) - return NULL; - if (cmp > 0) + PyObject *thiskey, *thisaval, *thisbval; + if (a->ma_table[i].me_value == NULL) + continue; + thiskey = a->ma_table[i].me_key; + Py_INCREF(thiskey); /* keep alive across compares */ + if (akey != NULL) { + cmp = PyObject_RichCompareBool(akey, thiskey, Py_LT); + if (cmp < 0) { + Py_DECREF(thiskey); + goto Fail; + } + if (cmp > 0 || + i >= a->ma_size || + a->ma_table[i].me_value == NULL) + { + /* Not the *smallest* a key; or maybe it is + * but the compare shrunk the dict so we can't + * find its associated value anymore; or + * maybe it is but the compare deleted the + * a[thiskey] entry. + */ + Py_DECREF(thiskey); continue; } - aval = a->ma_table[i].me_value; - bval = PyDict_GetItem((PyObject *)b, key); - if (bval == NULL) - cmp = 0; - else { - cmp = PyObject_RichCompareBool(aval, bval, Py_EQ); - if (cmp < 0) - return NULL; - } - if (cmp == 0) - { - diff = key; - *pval = aval; + } + + /* Compare a[thiskey] to b[thiskey]; cmp <- true iff equal. */ + thisaval = a->ma_table[i].me_value; + assert(thisaval); + Py_INCREF(thisaval); /* keep alive */ + thisbval = PyDict_GetItem((PyObject *)b, thiskey); + if (thisbval == NULL) + cmp = 0; + else { + /* both dicts have thiskey: same values? */ + cmp = PyObject_RichCompareBool( + thisaval, thisbval, Py_EQ); + if (cmp < 0) { + Py_DECREF(thiskey); + Py_DECREF(thisaval); + goto Fail; } } + if (cmp == 0) { + /* New winner. */ + Py_XDECREF(akey); + Py_XDECREF(aval); + akey = thiskey; + aval = thisaval; + } + else { + Py_DECREF(thiskey); + Py_DECREF(thisaval); + } } - return diff; + *pval = aval; + return akey; + +Fail: + Py_XDECREF(akey); + Py_XDECREF(aval); + *pval = NULL; + return NULL; } static int @@ -1031,19 +1071,40 @@ dict_compare(dictobject *a, dictobject *b) return -1; /* a is shorter */ else if (a->ma_used > b->ma_used) return 1; /* b is shorter */ + /* Same length -- check all keys */ + bdiff = bval = NULL; adiff = characterize(a, b, &aval); - if (adiff == NULL && PyErr_Occurred()) - return -1; - if (adiff == NULL) - return 0; /* a is a subset with the same length */ + if (adiff == NULL) { + assert(!aval); + /* Either an error, or a is a subst with the same length so + * must be equal. + */ + res = PyErr_Occurred() ? -1 : 0; + goto Finished; + } bdiff = characterize(b, a, &bval); - if (bdiff == NULL && PyErr_Occurred()) - return -1; - /* bdiff == NULL would be impossible now */ - res = PyObject_Compare(adiff, bdiff); - if (res == 0) + if (bdiff == NULL && PyErr_Occurred()) { + assert(!bval); + res = -1; + goto Finished; + } + res = 0; + if (bdiff) { + /* bdiff == NULL "should be" impossible now, but perhaps + * the last comparison done by the characterize() on a had + * the side effect of making the dicts equal! + */ + res = PyObject_Compare(adiff, bdiff); + } + if (res == 0 && bval != NULL) res = PyObject_Compare(aval, bval); + +Finished: + Py_XDECREF(adiff); + Py_XDECREF(bdiff); + Py_XDECREF(aval); + Py_XDECREF(bval); return res; }