bpo-45530: speed listobject.c's unsafe_tuple_compare() (GH-29076)

Keep track of whether unsafe_tuple_compare() calls are resolved by the very
first tuple elements, and adjust strategy accordingly. This can significantly
cut the number of calls made to the full-blown PyObject_RichCompareBool(),
and especially when duplicates are rare.

Co-authored-by: Łukasz Langa <lukasz@langa.pl>
This commit is contained in:
Tim Peters 2021-10-24 22:27:24 -05:00 committed by GitHub
parent 07236d562e
commit 51ed2c56a1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 81 additions and 13 deletions

View File

@ -1424,6 +1424,8 @@ Note that ``a op1 b op2 c`` doesn't imply any kind of comparison between *a* and
*c*, so that, e.g., ``x < y > z`` is perfectly legal (though perhaps not
pretty).
.. _expressions-value-comparisons:
Value comparisons
-----------------

View File

@ -470,6 +470,13 @@ Changes in the Python API
the ``'utf-8'`` encoding.
(Contributed by Srinivas Reddy Thatiparthy (శ్రీనివాస్ రెడ్డి తాటిపర్తి) in :issue:`41137`.)
* When sorting using tuples as keys, the order of the result may differ
from earlier releases if the tuple elements don't define a total
ordering (see :ref:`expressions-value-comparisons` for
information on total ordering). It's generally true that the result
of sorting simply isn't well-defined in the absence of a total ordering
on list elements.
Build Changes
=============

View File

@ -0,0 +1,8 @@
Cases of sorting using tuples as keys may now be significantly faster
in some cases. Patch by Tim Peters.
The order of the result may differ from earlier releases if the tuple
elements don't define a total ordering (see
:ref:`expressions-value-comparisons` for information on
total ordering). It's generally true that the result of sorting simply
isn't well-defined in the absence of a total ordering on list elements.

View File

@ -1220,6 +1220,13 @@ struct s_MergeState {
* of tuples. It may be set to safe_object_compare, but the idea is that hopefully
* we can assume more, and use one of the special-case compares. */
int (*tuple_elem_compare)(PyObject *, PyObject *, MergeState *);
/* Used by unsafe_tuple_compare to record whether the very first tuple
* elements resolved the last comparison attempt. If so, next time a
* method that may avoid PyObject_RichCompareBool() entirely is tried.
* 0 for false, 1 for true.
*/
int first_tuple_items_resolved_it;
};
/* binarysort is the best method for sorting small arrays: it does
@ -2190,7 +2197,24 @@ unsafe_float_compare(PyObject *v, PyObject *w, MergeState *ms)
* using the same pre-sort check as we use for ms->key_compare,
* but run on the list [x[0] for x in L]. This allows us to optimize compares
* on two levels (as long as [x[0] for x in L] is type-homogeneous.) The idea is
* that most tuple compares don't involve x[1:]. */
* that most tuple compares don't involve x[1:].
* However, that may not be right. When it is right, we can win by calling the
* relatively cheap ms->tuple_elem_compare on the first pair of elements, to
* see whether v[0] < w[0] or w[0] < v[0]. If either are so, we're done.
* Else we proceed as in the tuple compare, comparing the remaining pairs via
* the probably more expensive PyObject_RichCompareBool(..., Py_EQ) until (if
* ever) that says "no, not equal!". Then, if we're still on the first pair,
* ms->tuple_elem_compare can resolve it, else PyObject_RichCompareBool(...,
* Py_LT) finishes the job.
* In any case, ms->first_tuple_items_resolved_it keeps track of whether the
* most recent tuple comparison was resolved by the first pair. If so, the
* next attempt starts by trying the cheap tests on the first pair again, else
* PyObject_RichCompareBool(..., Py_EQ) is used from the start.
* There are cases where PyObject_RichCompareBool(..., Py_EQ) is much cheaper!
* For example, that can return "almost immediately" if passed the same
* object twice (it special-cases object identity for Py_EQ), which can,
* potentially, be unboundedly faster than ms->tuple_elem_compare.
*/
static int
unsafe_tuple_compare(PyObject *v, PyObject *w, MergeState *ms)
{
@ -2206,26 +2230,52 @@ unsafe_tuple_compare(PyObject *v, PyObject *w, MergeState *ms)
vt = (PyTupleObject *)v;
wt = (PyTupleObject *)w;
i = 0;
if (ms->first_tuple_items_resolved_it) {
/* See whether fast compares of the first elements settle it. */
k = ms->tuple_elem_compare(vt->ob_item[0], wt->ob_item[0], ms);
if (k) /* error, or v < w */
return k;
k = ms->tuple_elem_compare(wt->ob_item[0], vt->ob_item[0], ms);
if (k > 0) /* w < v */
return 0;
if (k < 0) /* error */
return -1;
/* We have
* not (v[0] < w[0]) and not (w[0] < v[0])
* which implies, for a total order, that the first elements are
* equal. So skip them in the loop.
*/
i = 1;
ms->first_tuple_items_resolved_it = 0;
}
/* Now first_tuple_items_resolved_it was 0 on entry, or was forced to 0
* at the end of the `if` block just above.
*/
assert(! ms->first_tuple_items_resolved_it);
vlen = Py_SIZE(vt);
wlen = Py_SIZE(wt);
for (i = 0; i < vlen && i < wlen; i++) {
for (; i < vlen && i < wlen; i++) {
k = PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i], Py_EQ);
if (!k) { /* not equal */
if (i) {
return PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i],
Py_LT);
}
else {
ms->first_tuple_items_resolved_it = 1;
return ms->tuple_elem_compare(vt->ob_item[0], wt->ob_item[0],
ms);
}
}
if (k < 0)
return -1;
if (!k)
break;
}
/* all equal until we fell off the end */
return vlen < wlen;
if (i >= vlen || i >= wlen)
return vlen < wlen;
if (i == 0)
return ms->tuple_elem_compare(vt->ob_item[i], wt->ob_item[i], ms);
else
return PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i], Py_LT);
}
}
/* An adaptive, stable, natural mergesort. See listsort.txt.
* Returns Py_None on success, NULL on error. Even in case of error, the
@ -2408,6 +2458,7 @@ list_sort_impl(PyListObject *self, PyObject *keyfunc, int reverse)
}
ms.key_compare = unsafe_tuple_compare;
ms.first_tuple_items_resolved_it = 1; /* be optimistic */
}
}
/* End of pre-sort check: ms is now set properly! */