mirror of https://github.com/python/cpython
bpo-45530: speed listobject.c's unsafe_tuple_compare() (GH-29076)
Keep track of whether unsafe_tuple_compare() calls are resolved by the very first tuple elements, and adjust strategy accordingly. This can significantly cut the number of calls made to the full-blown PyObject_RichCompareBool(), and especially when duplicates are rare. Co-authored-by: Łukasz Langa <lukasz@langa.pl>
This commit is contained in:
parent
07236d562e
commit
51ed2c56a1
|
@ -1424,6 +1424,8 @@ Note that ``a op1 b op2 c`` doesn't imply any kind of comparison between *a* and
|
|||
*c*, so that, e.g., ``x < y > z`` is perfectly legal (though perhaps not
|
||||
pretty).
|
||||
|
||||
.. _expressions-value-comparisons:
|
||||
|
||||
Value comparisons
|
||||
-----------------
|
||||
|
||||
|
|
|
@ -470,6 +470,13 @@ Changes in the Python API
|
|||
the ``'utf-8'`` encoding.
|
||||
(Contributed by Srinivas Reddy Thatiparthy (శ్రీనివాస్ రెడ్డి తాటిపర్తి) in :issue:`41137`.)
|
||||
|
||||
* When sorting using tuples as keys, the order of the result may differ
|
||||
from earlier releases if the tuple elements don't define a total
|
||||
ordering (see :ref:`expressions-value-comparisons` for
|
||||
information on total ordering). It's generally true that the result
|
||||
of sorting simply isn't well-defined in the absence of a total ordering
|
||||
on list elements.
|
||||
|
||||
|
||||
Build Changes
|
||||
=============
|
||||
|
|
|
@ -0,0 +1,8 @@
|
|||
Cases of sorting using tuples as keys may now be significantly faster
|
||||
in some cases. Patch by Tim Peters.
|
||||
|
||||
The order of the result may differ from earlier releases if the tuple
|
||||
elements don't define a total ordering (see
|
||||
:ref:`expressions-value-comparisons` for information on
|
||||
total ordering). It's generally true that the result of sorting simply
|
||||
isn't well-defined in the absence of a total ordering on list elements.
|
|
@ -1220,6 +1220,13 @@ struct s_MergeState {
|
|||
* of tuples. It may be set to safe_object_compare, but the idea is that hopefully
|
||||
* we can assume more, and use one of the special-case compares. */
|
||||
int (*tuple_elem_compare)(PyObject *, PyObject *, MergeState *);
|
||||
|
||||
/* Used by unsafe_tuple_compare to record whether the very first tuple
|
||||
* elements resolved the last comparison attempt. If so, next time a
|
||||
* method that may avoid PyObject_RichCompareBool() entirely is tried.
|
||||
* 0 for false, 1 for true.
|
||||
*/
|
||||
int first_tuple_items_resolved_it;
|
||||
};
|
||||
|
||||
/* binarysort is the best method for sorting small arrays: it does
|
||||
|
@ -2190,7 +2197,24 @@ unsafe_float_compare(PyObject *v, PyObject *w, MergeState *ms)
|
|||
* using the same pre-sort check as we use for ms->key_compare,
|
||||
* but run on the list [x[0] for x in L]. This allows us to optimize compares
|
||||
* on two levels (as long as [x[0] for x in L] is type-homogeneous.) The idea is
|
||||
* that most tuple compares don't involve x[1:]. */
|
||||
* that most tuple compares don't involve x[1:].
|
||||
* However, that may not be right. When it is right, we can win by calling the
|
||||
* relatively cheap ms->tuple_elem_compare on the first pair of elements, to
|
||||
* see whether v[0] < w[0] or w[0] < v[0]. If either are so, we're done.
|
||||
* Else we proceed as in the tuple compare, comparing the remaining pairs via
|
||||
* the probably more expensive PyObject_RichCompareBool(..., Py_EQ) until (if
|
||||
* ever) that says "no, not equal!". Then, if we're still on the first pair,
|
||||
* ms->tuple_elem_compare can resolve it, else PyObject_RichCompareBool(...,
|
||||
* Py_LT) finishes the job.
|
||||
* In any case, ms->first_tuple_items_resolved_it keeps track of whether the
|
||||
* most recent tuple comparison was resolved by the first pair. If so, the
|
||||
* next attempt starts by trying the cheap tests on the first pair again, else
|
||||
* PyObject_RichCompareBool(..., Py_EQ) is used from the start.
|
||||
* There are cases where PyObject_RichCompareBool(..., Py_EQ) is much cheaper!
|
||||
* For example, that can return "almost immediately" if passed the same
|
||||
* object twice (it special-cases object identity for Py_EQ), which can,
|
||||
* potentially, be unboundedly faster than ms->tuple_elem_compare.
|
||||
*/
|
||||
static int
|
||||
unsafe_tuple_compare(PyObject *v, PyObject *w, MergeState *ms)
|
||||
{
|
||||
|
@ -2206,26 +2230,52 @@ unsafe_tuple_compare(PyObject *v, PyObject *w, MergeState *ms)
|
|||
|
||||
vt = (PyTupleObject *)v;
|
||||
wt = (PyTupleObject *)w;
|
||||
i = 0;
|
||||
if (ms->first_tuple_items_resolved_it) {
|
||||
/* See whether fast compares of the first elements settle it. */
|
||||
k = ms->tuple_elem_compare(vt->ob_item[0], wt->ob_item[0], ms);
|
||||
if (k) /* error, or v < w */
|
||||
return k;
|
||||
k = ms->tuple_elem_compare(wt->ob_item[0], vt->ob_item[0], ms);
|
||||
if (k > 0) /* w < v */
|
||||
return 0;
|
||||
if (k < 0) /* error */
|
||||
return -1;
|
||||
/* We have
|
||||
* not (v[0] < w[0]) and not (w[0] < v[0])
|
||||
* which implies, for a total order, that the first elements are
|
||||
* equal. So skip them in the loop.
|
||||
*/
|
||||
i = 1;
|
||||
ms->first_tuple_items_resolved_it = 0;
|
||||
}
|
||||
/* Now first_tuple_items_resolved_it was 0 on entry, or was forced to 0
|
||||
* at the end of the `if` block just above.
|
||||
*/
|
||||
assert(! ms->first_tuple_items_resolved_it);
|
||||
|
||||
vlen = Py_SIZE(vt);
|
||||
wlen = Py_SIZE(wt);
|
||||
|
||||
for (i = 0; i < vlen && i < wlen; i++) {
|
||||
for (; i < vlen && i < wlen; i++) {
|
||||
k = PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i], Py_EQ);
|
||||
if (!k) { /* not equal */
|
||||
if (i) {
|
||||
return PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i],
|
||||
Py_LT);
|
||||
}
|
||||
else {
|
||||
ms->first_tuple_items_resolved_it = 1;
|
||||
return ms->tuple_elem_compare(vt->ob_item[0], wt->ob_item[0],
|
||||
ms);
|
||||
}
|
||||
}
|
||||
if (k < 0)
|
||||
return -1;
|
||||
if (!k)
|
||||
break;
|
||||
}
|
||||
/* all equal until we fell off the end */
|
||||
return vlen < wlen;
|
||||
|
||||
if (i >= vlen || i >= wlen)
|
||||
return vlen < wlen;
|
||||
|
||||
if (i == 0)
|
||||
return ms->tuple_elem_compare(vt->ob_item[i], wt->ob_item[i], ms);
|
||||
else
|
||||
return PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i], Py_LT);
|
||||
}
|
||||
}
|
||||
|
||||
/* An adaptive, stable, natural mergesort. See listsort.txt.
|
||||
* Returns Py_None on success, NULL on error. Even in case of error, the
|
||||
|
@ -2408,6 +2458,7 @@ list_sort_impl(PyListObject *self, PyObject *keyfunc, int reverse)
|
|||
}
|
||||
|
||||
ms.key_compare = unsafe_tuple_compare;
|
||||
ms.first_tuple_items_resolved_it = 1; /* be optimistic */
|
||||
}
|
||||
}
|
||||
/* End of pre-sort check: ms is now set properly! */
|
||||
|
|
Loading…
Reference in New Issue