Cleanup yielding a small speed boost: before rich comparisons were

introduced, list.sort() was rewritten to use only the "< or not <?"
distinction.  After rich comparisons were introduced, docompare() was
fiddled to translate a Py_LT Boolean result into the old "-1 for <,
0 for ==, 1 for >" flavor of outcome, and the sorting code was left
alone.  This left things more obscure than they should be, and turns
out it also cost measurable cycles.

So:  The old CMPERROR novelty is gone.  docompare() is renamed to islt(),
and now has the same return conditinos as PyObject_RichCompareBool.  The
SETK macro is renamed to ISLT, and is even weirder than before (don't
complain unless you want to maintain the sort code <wink>).

Overall, this yields a 1-2% speedup in the usual (no explicit function
passed to list.sort()) case when sorting arrays of floats (as sortperf.py
does).  The boost is higher for arrays of ints.
This commit is contained in:
Tim Peters 2002-07-19 03:30:57 +00:00
parent 3b01a1217f
commit a8c974c157
1 changed files with 32 additions and 50 deletions

View File

@ -758,39 +758,28 @@ reverse_slice(PyObject **lo, PyObject **hi)
/* New quicksort implementation for arrays of object pointers.
Thanks to discussions with Tim Peters. */
/* CMPERROR is returned by our comparison function when an error
occurred. This is the largest negative integer (0x80000000 on a
32-bit system). */
#define CMPERROR ( (int) ((unsigned int)1 << (8*sizeof(int) - 1)) )
/* Comparison function. Takes care of calling a user-supplied
comparison function (any callable Python object). Calls the
standard comparison function, PyObject_Compare(), if the user-
supplied function is NULL. */
standard comparison function, PyObject_RichCompareBool(), if the user-
supplied function is NULL.
Returns <0 on error, >0 if x < y, 0 if x >= y. */
static int
docompare(PyObject *x, PyObject *y, PyObject *compare)
islt(PyObject *x, PyObject *y, PyObject *compare)
{
PyObject *res;
PyObject *args;
int i;
if (compare == NULL) {
/* NOTE: we rely on the fact here that the sorting algorithm
only ever checks whether k<0, i.e., whether x<y. So we
invoke the rich comparison function with Py_LT ('<'), and
return -1 when it returns true and 0 when it returns
false. */
i = PyObject_RichCompareBool(x, y, Py_LT);
if (i < 0)
return CMPERROR;
else
return -i;
}
if (compare == NULL)
return PyObject_RichCompareBool(x, y, Py_LT);
/* Call the user's comparison function and translate the 3-way
* result into true or false (or error).
*/
args = PyTuple_New(2);
if (args == NULL)
return CMPERROR;
return -1;
Py_INCREF(x);
Py_INCREF(y);
PyTuple_SET_ITEM(args, 0, x);
@ -798,20 +787,16 @@ docompare(PyObject *x, PyObject *y, PyObject *compare)
res = PyObject_Call(compare, args, NULL);
Py_DECREF(args);
if (res == NULL)
return CMPERROR;
return -1;
if (!PyInt_Check(res)) {
Py_DECREF(res);
PyErr_SetString(PyExc_TypeError,
"comparison function must return int");
return CMPERROR;
return -1;
}
i = PyInt_AsLong(res);
Py_DECREF(res);
if (i < 0)
return -1;
if (i > 0)
return 1;
return 0;
return i < 0;
}
/* MINSIZE is the smallest array that will get a full-blown samplesort
@ -850,17 +835,21 @@ docompare(PyObject *x, PyObject *y, PyObject *compare)
exactly in two. */
#define STACKSIZE 60
#define SETK(X,Y) if ((k = docompare(X,Y,compare))==CMPERROR) goto fail
/* Compare X to Y via islt(). Goto "fail" if the comparison raises an
error. Else "k" is set to true iff X<Y, and an "if (k)" block is
started. It makes more sense in context <wink>. X and Y are PyObject*s.
*/
#define IFLT(X, Y) if ((k = islt(X, Y, compare)) < 0) goto fail; \
if (k)
/* binarysort is the best method for sorting small arrays: it does
few compares, but can do data movement quadratic in the number of
elements.
[lo, hi) is a contiguous slice of a list, and is sorted via
binary insertion.
binary insertion. This sort is stable.
On entry, must have lo <= start <= hi, and that [lo, start) is already
sorted (pass start == lo if you don't know!).
If docompare complains (returns CMPERROR) return -1, else 0.
If islt() complains return -1, else 0.
Even in case of error, the output slice will be some permutation of
the input (nothing is lost or duplicated).
*/
@ -869,12 +858,12 @@ static int
binarysort(PyObject **lo, PyObject **hi, PyObject **start, PyObject *compare)
/* compare -- comparison function object, or NULL for default */
{
/* assert lo <= start <= hi
assert [lo, start) is sorted */
register int k;
register PyObject **l, **p, **r;
register PyObject *pivot;
assert(lo <= start && start <= hi);
/* assert [lo, start) is sorted */
if (lo == start)
++start;
for (; start < hi; ++start) {
@ -884,8 +873,7 @@ binarysort(PyObject **lo, PyObject **hi, PyObject **start, PyObject *compare)
pivot = *r;
do {
p = l + ((r - l) >> 1);
SETK(pivot, *p);
if (k < 0)
IFLT(pivot, *p)
r = p;
else
l = p + 1;
@ -906,7 +894,7 @@ binarysort(PyObject **lo, PyObject **hi, PyObject **start, PyObject *compare)
/* samplesortslice is the sorting workhorse.
[lo, hi) is a contiguous slice of a list, to be sorted in place.
On entry, must have lo <= hi,
If docompare complains (returns CMPERROR) return -1, else 0.
If islt() complains return -1, else 0.
Even in case of error, the output slice will be some permutation of
the input (nothing is lost or duplicated).
@ -1023,8 +1011,7 @@ samplesortslice(PyObject **lo, PyObject **hi, PyObject *compare)
*/
/* assert lo < hi */
for (r = lo+1; r < hi; ++r) {
SETK(*r, *(r-1));
if (k < 0)
IFLT(*r, *(r-1))
break;
}
/* [lo,r) is sorted, [r,hi) unknown. Get out cheap if there are
@ -1036,8 +1023,7 @@ samplesortslice(PyObject **lo, PyObject **hi, PyObject *compare)
benchmark-driven silliness <wink>. */
/* assert lo < hi */
for (r = lo+1; r < hi; ++r) {
SETK(*(r-1), *r);
if (k < 0)
IFLT(*(r-1), *r)
break;
}
if (hi - r <= MAXMERGE) {
@ -1192,8 +1178,7 @@ samplesortslice(PyObject **lo, PyObject **hi, PyObject *compare)
do {
/* slide l right, looking for key >= pivot */
do {
SETK(*l, pivot);
if (k < 0)
IFLT(*l, pivot)
++l;
else
break;
@ -1202,8 +1187,7 @@ samplesortslice(PyObject **lo, PyObject **hi, PyObject *compare)
/* slide r left, looking for key < pivot */
while (l < r) {
register PyObject *rval = *r--;
SETK(rval, pivot);
if (k < 0) {
IFLT(rval, pivot) {
/* swap and advance */
r[1] = *l;
*l++ = rval;
@ -1219,8 +1203,7 @@ samplesortslice(PyObject **lo, PyObject **hi, PyObject *compare)
everything to the right of r is >= pivot */
if (l == r) {
SETK(*r, pivot);
if (k < 0)
IFLT(*r, pivot)
++l;
else
--r;
@ -1249,8 +1232,7 @@ samplesortslice(PyObject **lo, PyObject **hi, PyObject *compare)
*/
while (l < hi) {
/* pivot <= *l known */
SETK(pivot, *l);
if (k < 0)
IFLT(pivot, *l)
break;
else
/* <= and not < implies == */
@ -1290,7 +1272,7 @@ samplesortslice(PyObject **lo, PyObject **hi, PyObject *compare)
return -1;
}
#undef SETK
#undef IFLT
static PyTypeObject immutable_list_type;