Factor-out common code. Also, optimize common cases by preallocating space on the stack. GH-8738

Improves speed by 9 to 10ns per call.
This commit is contained in:
Raymond Hettinger 2018-08-11 18:39:05 -07:00 committed by GitHub
parent 1399074535
commit c630e10440
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 56 additions and 41 deletions

View File

@ -2032,10 +2032,10 @@ math_fmod_impl(PyObject *module, double x, double y)
} }
/* /*
Given an *n* length *vec* of non-negative, non-nan, non-inf values Given an *n* length *vec* of non-negative values
where *max* is the largest value in the vector, compute: where *max* is the largest value in the vector, compute:
sum((x / max) ** 2 for x in vec) max * sqrt(sum((x / max) ** 2 for x in vec))
When a maximum value is found, it is swapped to the end. This When a maximum value is found, it is swapped to the end. This
lets us skip one loop iteration and just add 1.0 at the end. lets us skip one loop iteration and just add 1.0 at the end.
@ -2045,19 +2045,31 @@ Kahan summation is used to improve accuracy. The *csum*
variable tracks the cumulative sum and *frac* tracks variable tracks the cumulative sum and *frac* tracks
fractional round-off error for the most recent addition. fractional round-off error for the most recent addition.
The value of the *max* variable must be present in *vec*
or should equal to 0.0 when n==0. Likewise, *max* will
be INF if an infinity is present in the vec.
The *found_nan* variable indicates whether some member of
the *vec* is a NaN.
*/ */
static inline double static inline double
scaled_vector_squared(Py_ssize_t n, double *vec, double max) vector_norm(Py_ssize_t n, double *vec, double max, int found_nan)
{ {
double x, csum = 0.0, oldcsum, frac = 0.0; double x, csum = 0.0, oldcsum, frac = 0.0;
Py_ssize_t i; Py_ssize_t i;
if (Py_IS_INFINITY(max)) {
return max;
}
if (found_nan) {
return Py_NAN;
}
if (max == 0.0) { if (max == 0.0) {
return 0.0; return 0.0;
} }
assert(n > 0); assert(n > 0);
for (i=0 ; i<n-1 ; i++) { for (i=0 ; i < n-1 ; i++) {
x = vec[i]; x = vec[i];
if (x == max) { if (x == max) {
x = vec[n-1]; x = vec[n-1];
@ -2071,9 +2083,11 @@ scaled_vector_squared(Py_ssize_t n, double *vec, double max)
} }
assert(vec[n-1] == max); assert(vec[n-1] == max);
csum += 1.0 - frac; csum += 1.0 - frac;
return csum; return max * sqrt(csum);
} }
#define NUM_STACK_ELEMS 16
/*[clinic input] /*[clinic input]
math.dist math.dist
@ -2095,11 +2109,12 @@ math_dist_impl(PyObject *module, PyObject *p, PyObject *q)
/*[clinic end generated code: output=56bd9538d06bbcfe input=937122eaa5f19272]*/ /*[clinic end generated code: output=56bd9538d06bbcfe input=937122eaa5f19272]*/
{ {
PyObject *item; PyObject *item;
double *diffs;
double max = 0.0; double max = 0.0;
double x, px, qx, result; double x, px, qx, result;
Py_ssize_t i, m, n; Py_ssize_t i, m, n;
int found_nan = 0; int found_nan = 0;
double diffs_on_stack[NUM_STACK_ELEMS];
double *diffs = diffs_on_stack;
m = PyTuple_GET_SIZE(p); m = PyTuple_GET_SIZE(p);
n = PyTuple_GET_SIZE(q); n = PyTuple_GET_SIZE(q);
@ -2109,22 +2124,22 @@ math_dist_impl(PyObject *module, PyObject *p, PyObject *q)
return NULL; return NULL;
} }
diffs = (double *) PyObject_Malloc(n * sizeof(double)); if (n > NUM_STACK_ELEMS) {
if (diffs == NULL) { diffs = (double *) PyObject_Malloc(n * sizeof(double));
return NULL; if (diffs == NULL) {
return NULL;
}
} }
for (i=0 ; i<n ; i++) { for (i=0 ; i<n ; i++) {
item = PyTuple_GET_ITEM(p, i); item = PyTuple_GET_ITEM(p, i);
px = PyFloat_AsDouble(item); px = PyFloat_AsDouble(item);
if (px == -1.0 && PyErr_Occurred()) { if (px == -1.0 && PyErr_Occurred()) {
PyObject_Free(diffs); goto error_exit;
return NULL;
} }
item = PyTuple_GET_ITEM(q, i); item = PyTuple_GET_ITEM(q, i);
qx = PyFloat_AsDouble(item); qx = PyFloat_AsDouble(item);
if (qx == -1.0 && PyErr_Occurred()) { if (qx == -1.0 && PyErr_Occurred()) {
PyObject_Free(diffs); goto error_exit;
return NULL;
} }
x = fabs(px - qx); x = fabs(px - qx);
diffs[i] = x; diffs[i] = x;
@ -2133,19 +2148,17 @@ math_dist_impl(PyObject *module, PyObject *p, PyObject *q)
max = x; max = x;
} }
} }
if (Py_IS_INFINITY(max)) { result = vector_norm(n, diffs, max, found_nan);
result = max; if (diffs != diffs_on_stack) {
goto done; PyObject_Free(diffs);
} }
if (found_nan) {
result = Py_NAN;
goto done;
}
result = max * sqrt(scaled_vector_squared(n, diffs, max));
done:
PyObject_Free(diffs);
return PyFloat_FromDouble(result); return PyFloat_FromDouble(result);
error_exit:
if (diffs != diffs_on_stack) {
PyObject_Free(diffs);
}
return NULL;
} }
/* AC: cannot convert yet, waiting for *args support */ /* AC: cannot convert yet, waiting for *args support */
@ -2154,21 +2167,23 @@ math_hypot(PyObject *self, PyObject *args)
{ {
Py_ssize_t i, n; Py_ssize_t i, n;
PyObject *item; PyObject *item;
double *coordinates;
double max = 0.0; double max = 0.0;
double x, result; double x, result;
int found_nan = 0; int found_nan = 0;
double coord_on_stack[NUM_STACK_ELEMS];
double *coordinates = coord_on_stack;
n = PyTuple_GET_SIZE(args); n = PyTuple_GET_SIZE(args);
coordinates = (double *) PyObject_Malloc(n * sizeof(double)); if (n > NUM_STACK_ELEMS) {
if (coordinates == NULL) coordinates = (double *) PyObject_Malloc(n * sizeof(double));
return NULL; if (coordinates == NULL)
return NULL;
}
for (i=0 ; i<n ; i++) { for (i=0 ; i<n ; i++) {
item = PyTuple_GET_ITEM(args, i); item = PyTuple_GET_ITEM(args, i);
x = PyFloat_AsDouble(item); x = PyFloat_AsDouble(item);
if (x == -1.0 && PyErr_Occurred()) { if (x == -1.0 && PyErr_Occurred()) {
PyObject_Free(coordinates); goto error_exit;
return NULL;
} }
x = fabs(x); x = fabs(x);
coordinates[i] = x; coordinates[i] = x;
@ -2177,21 +2192,21 @@ math_hypot(PyObject *self, PyObject *args)
max = x; max = x;
} }
} }
if (Py_IS_INFINITY(max)) { result = vector_norm(n, coordinates, max, found_nan);
result = max; if (coordinates != coord_on_stack) {
goto done; PyObject_Free(coordinates);
} }
if (found_nan) {
result = Py_NAN;
goto done;
}
result = max * sqrt(scaled_vector_squared(n, coordinates, max));
done:
PyObject_Free(coordinates);
return PyFloat_FromDouble(result); return PyFloat_FromDouble(result);
error_exit:
if (coordinates != coord_on_stack) {
PyObject_Free(coordinates);
}
return NULL;
} }
#undef NUM_STACK_ELEMS
PyDoc_STRVAR(math_hypot_doc, PyDoc_STRVAR(math_hypot_doc,
"hypot(*coordinates) -> value\n\n\ "hypot(*coordinates) -> value\n\n\
Multidimensional Euclidean distance from the origin to a point.\n\ Multidimensional Euclidean distance from the origin to a point.\n\