mirror of https://github.com/python/cpython
SF bug #422121 Insecurities in dict comparison.
Fixed a half dozen ways in which general dict comparison could crash Python (even cause Win98SE to reboot) in the presence of kay and/or value comparison routines that mutate the dict during dict comparison. Bugfix candidate.
This commit is contained in:
parent
66aaaae54c
commit
95bf9390a4
|
@ -0,0 +1 @@
|
||||||
|
test_mutants
|
|
@ -0,0 +1,138 @@
|
||||||
|
from test_support import verbose
|
||||||
|
import random
|
||||||
|
|
||||||
|
# From SF bug #422121: Insecurities in dict comparison.
|
||||||
|
|
||||||
|
# Safety of code doing comparisons has been an historical Python waak spot.
|
||||||
|
# The problem is that comparison of structures in written in C *naturally*
|
||||||
|
# wants to hold on to things like the size of the container, or "the
|
||||||
|
# biggest" containee so far, across a traversal of the container; but
|
||||||
|
# code to do containee comparisons can call back into Python and mutate
|
||||||
|
# the container in arbitrary ways while the C loop is in midstream. If the
|
||||||
|
# C code isn't extremely paranoid about digging things out of memory on
|
||||||
|
# each trip, and artificially boosting refcounts for the duration, anything
|
||||||
|
# from infinite loops to OS crashes can result (yes, I use Windows <wink>).
|
||||||
|
#
|
||||||
|
# The other problem is that code designed to provoke a weakness is usually
|
||||||
|
# white-box code, and so catches only the particular vulnerabilities the
|
||||||
|
# author knew to protect against. For example, Python's list.sort() code
|
||||||
|
# went thru many iterations as one "new" vulnerability after another was
|
||||||
|
# discovered.
|
||||||
|
#
|
||||||
|
# So the dict comparison test here uses a black-box approach instead,
|
||||||
|
# generating dicts of various sizes at random, and performing random
|
||||||
|
# mutations on them at random times. This proved very effective,
|
||||||
|
# triggering at least six distinct failure modes the first 20 times I
|
||||||
|
# ran it. Indeed, at the start, the driver never got beyond 6 iterations
|
||||||
|
# before the test died.
|
||||||
|
|
||||||
|
# The dicts are global to make it easy to mutate tham from within functions.
|
||||||
|
dict1 = {}
|
||||||
|
dict2 = {}
|
||||||
|
|
||||||
|
# The current set of keys in dict1 and dict2. These are materialized as
|
||||||
|
# lists to make it easy to pick a dict key at random.
|
||||||
|
dict1keys = []
|
||||||
|
dict2keys = []
|
||||||
|
|
||||||
|
# Global flag telling maybe_mutate() wether to *consider* mutating.
|
||||||
|
mutate = 0
|
||||||
|
|
||||||
|
# If global mutate is true, consider mutating a dict. May or may not
|
||||||
|
# mutate a dict even if mutate is true. If it does decide to mutate a
|
||||||
|
# dict, it picks one of {dict1, dict2} at random, and deletes a random
|
||||||
|
# entry from it.
|
||||||
|
|
||||||
|
def maybe_mutate():
|
||||||
|
if not mutate:
|
||||||
|
return
|
||||||
|
if random.random() < 0.5:
|
||||||
|
return
|
||||||
|
if random.random() < 0.5:
|
||||||
|
target, keys = dict1, dict1keys
|
||||||
|
else:
|
||||||
|
target, keys = dict2, dict2keys
|
||||||
|
if keys:
|
||||||
|
i = random.randrange(len(keys))
|
||||||
|
key = keys[i]
|
||||||
|
del target[key]
|
||||||
|
# CAUTION: don't use keys.remove(key) here. Or do <wink>. The
|
||||||
|
# point is that .remove() would trigger more comparisons, and so
|
||||||
|
# also more calls to this routine. We're mutating often enough
|
||||||
|
# without that.
|
||||||
|
del keys[i]
|
||||||
|
|
||||||
|
# A horrid class that triggers random mutations of dict1 and dict2 when
|
||||||
|
# instances are compared.
|
||||||
|
|
||||||
|
class Horrid:
|
||||||
|
def __init__(self, i):
|
||||||
|
# Comparison outcomes are determined by the value of i.
|
||||||
|
self.i = i
|
||||||
|
|
||||||
|
# An artificial hashcode is selected at random so that we don't
|
||||||
|
# have any systematic relationship between comparsion outcomes
|
||||||
|
# (based on self.i and other.i) and relative position within the
|
||||||
|
# hawh vector (based on hashcode).
|
||||||
|
self.hashcode = random.randrange(1000000000)
|
||||||
|
|
||||||
|
def __hash__(self):
|
||||||
|
return self.hashcode
|
||||||
|
|
||||||
|
def __cmp__(self, other):
|
||||||
|
maybe_mutate() # The point of the test.
|
||||||
|
return cmp(self.i, other.i)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return "Horrid(%d)" % self.i
|
||||||
|
|
||||||
|
# Fill dict d with numentries (Horrid(i), Horrid(j)) key-value pairs,
|
||||||
|
# where i and j are selected at random from the candidates list.
|
||||||
|
# Return d.keys() after filling.
|
||||||
|
|
||||||
|
def fill_dict(d, candidates, numentries):
|
||||||
|
d.clear()
|
||||||
|
for i in xrange(numentries):
|
||||||
|
d[Horrid(random.choice(candidates))] = \
|
||||||
|
Horrid(random.choice(candidates))
|
||||||
|
return d.keys()
|
||||||
|
|
||||||
|
# Test one pair of randomly generated dicts, each with n entries.
|
||||||
|
# Note that dict comparison is trivial if they don't have the same number
|
||||||
|
# of entires (then the "shorter" dict is instantly considered to be the
|
||||||
|
# smaller one, without even looking at the entries).
|
||||||
|
|
||||||
|
def test_one(n):
|
||||||
|
global mutate, dict1, dict2, dict1keys, dict2keys
|
||||||
|
|
||||||
|
# Fill the dicts without mutating them.
|
||||||
|
mutate = 0
|
||||||
|
dict1keys = fill_dict(dict1, range(n), n)
|
||||||
|
dict2keys = fill_dict(dict2, range(n), n)
|
||||||
|
|
||||||
|
# Enable mutation, then compare the dicts so long as they have the
|
||||||
|
# same size.
|
||||||
|
mutate = 1
|
||||||
|
if verbose:
|
||||||
|
print "trying w/ lengths", len(dict1), len(dict2),
|
||||||
|
while dict1 and len(dict1) == len(dict2):
|
||||||
|
if verbose:
|
||||||
|
print ".",
|
||||||
|
c = cmp(dict1, dict2)
|
||||||
|
if verbose:
|
||||||
|
print
|
||||||
|
|
||||||
|
# Run test_one n times. At the start (before the bugs were fixed), 20
|
||||||
|
# consecutive runs of this test each blew up on or before the sixth time
|
||||||
|
# test_one was run. So n doesn't have to be large to get an interesting
|
||||||
|
# test.
|
||||||
|
# OTOH, calling with large n is also interesting, to ensure that the fixed
|
||||||
|
# code doesn't hold on to refcounts *too* long (in which case memory would
|
||||||
|
# leak).
|
||||||
|
|
||||||
|
def test(n):
|
||||||
|
for i in xrange(n):
|
||||||
|
test_one(random.randrange(1, 100))
|
||||||
|
|
||||||
|
# See last comment block for clues about good values for n.
|
||||||
|
test(100)
|
|
@ -47,6 +47,11 @@ Core
|
||||||
- Comparing dictionary objects via == and != is faster, and now works even
|
- Comparing dictionary objects via == and != is faster, and now works even
|
||||||
if the keys and values don't support comparisons other than ==.
|
if the keys and values don't support comparisons other than ==.
|
||||||
|
|
||||||
|
- Comparing dictionaries in ways other than == and != is slower: there were
|
||||||
|
insecurities in the dict comparison implementation that could cause Python
|
||||||
|
to crash if the element comparison routines for the dict keys and/or
|
||||||
|
values mutated the dicts. Making the code bulletproof slowed it down.
|
||||||
|
|
||||||
|
|
||||||
What's New in Python 2.1 (final)?
|
What's New in Python 2.1 (final)?
|
||||||
=================================
|
=================================
|
||||||
|
|
|
@ -981,43 +981,83 @@ PyDict_Items(PyObject *mp)
|
||||||
|
|
||||||
/* Subroutine which returns the smallest key in a for which b's value
|
/* Subroutine which returns the smallest key in a for which b's value
|
||||||
is different or absent. The value is returned too, through the
|
is different or absent. The value is returned too, through the
|
||||||
pval argument. No reference counts are incremented. */
|
pval argument. Both are NULL if no key in a is found for which b's status
|
||||||
|
differs. The refcounts on (and only on) non-NULL *pval and function return
|
||||||
|
values must be decremented by the caller (characterize() increments them
|
||||||
|
to ensure that mutating comparison and PyDict_GetItem calls can't delete
|
||||||
|
them before the caller is done looking at them). */
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
characterize(dictobject *a, dictobject *b, PyObject **pval)
|
characterize(dictobject *a, dictobject *b, PyObject **pval)
|
||||||
{
|
{
|
||||||
PyObject *diff = NULL;
|
PyObject *akey = NULL; /* smallest key in a s.t. a[akey] != b[akey] */
|
||||||
|
PyObject *aval = NULL; /* a[akey] */
|
||||||
int i, cmp;
|
int i, cmp;
|
||||||
|
|
||||||
*pval = NULL;
|
|
||||||
for (i = 0; i < a->ma_size; i++) {
|
for (i = 0; i < a->ma_size; i++) {
|
||||||
if (a->ma_table[i].me_value != NULL) {
|
PyObject *thiskey, *thisaval, *thisbval;
|
||||||
PyObject *key = a->ma_table[i].me_key;
|
if (a->ma_table[i].me_value == NULL)
|
||||||
PyObject *aval, *bval;
|
continue;
|
||||||
if (diff != NULL) {
|
thiskey = a->ma_table[i].me_key;
|
||||||
cmp = PyObject_RichCompareBool(diff, key, Py_LT);
|
Py_INCREF(thiskey); /* keep alive across compares */
|
||||||
if (cmp < 0)
|
if (akey != NULL) {
|
||||||
return NULL;
|
cmp = PyObject_RichCompareBool(akey, thiskey, Py_LT);
|
||||||
if (cmp > 0)
|
if (cmp < 0) {
|
||||||
|
Py_DECREF(thiskey);
|
||||||
|
goto Fail;
|
||||||
|
}
|
||||||
|
if (cmp > 0 ||
|
||||||
|
i >= a->ma_size ||
|
||||||
|
a->ma_table[i].me_value == NULL)
|
||||||
|
{
|
||||||
|
/* Not the *smallest* a key; or maybe it is
|
||||||
|
* but the compare shrunk the dict so we can't
|
||||||
|
* find its associated value anymore; or
|
||||||
|
* maybe it is but the compare deleted the
|
||||||
|
* a[thiskey] entry.
|
||||||
|
*/
|
||||||
|
Py_DECREF(thiskey);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
aval = a->ma_table[i].me_value;
|
}
|
||||||
bval = PyDict_GetItem((PyObject *)b, key);
|
|
||||||
if (bval == NULL)
|
/* Compare a[thiskey] to b[thiskey]; cmp <- true iff equal. */
|
||||||
|
thisaval = a->ma_table[i].me_value;
|
||||||
|
assert(thisaval);
|
||||||
|
Py_INCREF(thisaval); /* keep alive */
|
||||||
|
thisbval = PyDict_GetItem((PyObject *)b, thiskey);
|
||||||
|
if (thisbval == NULL)
|
||||||
cmp = 0;
|
cmp = 0;
|
||||||
else {
|
else {
|
||||||
cmp = PyObject_RichCompareBool(aval, bval, Py_EQ);
|
/* both dicts have thiskey: same values? */
|
||||||
if (cmp < 0)
|
cmp = PyObject_RichCompareBool(
|
||||||
return NULL;
|
thisaval, thisbval, Py_EQ);
|
||||||
|
if (cmp < 0) {
|
||||||
|
Py_DECREF(thiskey);
|
||||||
|
Py_DECREF(thisaval);
|
||||||
|
goto Fail;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (cmp == 0) {
|
||||||
|
/* New winner. */
|
||||||
|
Py_XDECREF(akey);
|
||||||
|
Py_XDECREF(aval);
|
||||||
|
akey = thiskey;
|
||||||
|
aval = thisaval;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
Py_DECREF(thiskey);
|
||||||
|
Py_DECREF(thisaval);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (cmp == 0)
|
|
||||||
{
|
|
||||||
diff = key;
|
|
||||||
*pval = aval;
|
*pval = aval;
|
||||||
}
|
return akey;
|
||||||
}
|
|
||||||
}
|
Fail:
|
||||||
return diff;
|
Py_XDECREF(akey);
|
||||||
|
Py_XDECREF(aval);
|
||||||
|
*pval = NULL;
|
||||||
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
|
@ -1031,19 +1071,40 @@ dict_compare(dictobject *a, dictobject *b)
|
||||||
return -1; /* a is shorter */
|
return -1; /* a is shorter */
|
||||||
else if (a->ma_used > b->ma_used)
|
else if (a->ma_used > b->ma_used)
|
||||||
return 1; /* b is shorter */
|
return 1; /* b is shorter */
|
||||||
|
|
||||||
/* Same length -- check all keys */
|
/* Same length -- check all keys */
|
||||||
|
bdiff = bval = NULL;
|
||||||
adiff = characterize(a, b, &aval);
|
adiff = characterize(a, b, &aval);
|
||||||
if (adiff == NULL && PyErr_Occurred())
|
if (adiff == NULL) {
|
||||||
return -1;
|
assert(!aval);
|
||||||
if (adiff == NULL)
|
/* Either an error, or a is a subst with the same length so
|
||||||
return 0; /* a is a subset with the same length */
|
* must be equal.
|
||||||
|
*/
|
||||||
|
res = PyErr_Occurred() ? -1 : 0;
|
||||||
|
goto Finished;
|
||||||
|
}
|
||||||
bdiff = characterize(b, a, &bval);
|
bdiff = characterize(b, a, &bval);
|
||||||
if (bdiff == NULL && PyErr_Occurred())
|
if (bdiff == NULL && PyErr_Occurred()) {
|
||||||
return -1;
|
assert(!bval);
|
||||||
/* bdiff == NULL would be impossible now */
|
res = -1;
|
||||||
|
goto Finished;
|
||||||
|
}
|
||||||
|
res = 0;
|
||||||
|
if (bdiff) {
|
||||||
|
/* bdiff == NULL "should be" impossible now, but perhaps
|
||||||
|
* the last comparison done by the characterize() on a had
|
||||||
|
* the side effect of making the dicts equal!
|
||||||
|
*/
|
||||||
res = PyObject_Compare(adiff, bdiff);
|
res = PyObject_Compare(adiff, bdiff);
|
||||||
if (res == 0)
|
}
|
||||||
|
if (res == 0 && bval != NULL)
|
||||||
res = PyObject_Compare(aval, bval);
|
res = PyObject_Compare(aval, bval);
|
||||||
|
|
||||||
|
Finished:
|
||||||
|
Py_XDECREF(adiff);
|
||||||
|
Py_XDECREF(bdiff);
|
||||||
|
Py_XDECREF(aval);
|
||||||
|
Py_XDECREF(bval);
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue