Factor-out common code. Also, optimize common cases by preallocating space on the stack. GH-8738
Improves speed by 9 to 10ns per call.
This commit is contained in:
parent
1399074535
commit
c630e10440
|
@ -2032,10 +2032,10 @@ math_fmod_impl(PyObject *module, double x, double y)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Given an *n* length *vec* of non-negative, non-nan, non-inf values
|
Given an *n* length *vec* of non-negative values
|
||||||
where *max* is the largest value in the vector, compute:
|
where *max* is the largest value in the vector, compute:
|
||||||
|
|
||||||
sum((x / max) ** 2 for x in vec)
|
max * sqrt(sum((x / max) ** 2 for x in vec))
|
||||||
|
|
||||||
When a maximum value is found, it is swapped to the end. This
|
When a maximum value is found, it is swapped to the end. This
|
||||||
lets us skip one loop iteration and just add 1.0 at the end.
|
lets us skip one loop iteration and just add 1.0 at the end.
|
||||||
|
@ -2045,19 +2045,31 @@ Kahan summation is used to improve accuracy. The *csum*
|
||||||
variable tracks the cumulative sum and *frac* tracks
|
variable tracks the cumulative sum and *frac* tracks
|
||||||
fractional round-off error for the most recent addition.
|
fractional round-off error for the most recent addition.
|
||||||
|
|
||||||
|
The value of the *max* variable must be present in *vec*
|
||||||
|
or should equal to 0.0 when n==0. Likewise, *max* will
|
||||||
|
be INF if an infinity is present in the vec.
|
||||||
|
|
||||||
|
The *found_nan* variable indicates whether some member of
|
||||||
|
the *vec* is a NaN.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static inline double
|
static inline double
|
||||||
scaled_vector_squared(Py_ssize_t n, double *vec, double max)
|
vector_norm(Py_ssize_t n, double *vec, double max, int found_nan)
|
||||||
{
|
{
|
||||||
double x, csum = 0.0, oldcsum, frac = 0.0;
|
double x, csum = 0.0, oldcsum, frac = 0.0;
|
||||||
Py_ssize_t i;
|
Py_ssize_t i;
|
||||||
|
|
||||||
|
if (Py_IS_INFINITY(max)) {
|
||||||
|
return max;
|
||||||
|
}
|
||||||
|
if (found_nan) {
|
||||||
|
return Py_NAN;
|
||||||
|
}
|
||||||
if (max == 0.0) {
|
if (max == 0.0) {
|
||||||
return 0.0;
|
return 0.0;
|
||||||
}
|
}
|
||||||
assert(n > 0);
|
assert(n > 0);
|
||||||
for (i=0 ; i<n-1 ; i++) {
|
for (i=0 ; i < n-1 ; i++) {
|
||||||
x = vec[i];
|
x = vec[i];
|
||||||
if (x == max) {
|
if (x == max) {
|
||||||
x = vec[n-1];
|
x = vec[n-1];
|
||||||
|
@ -2071,9 +2083,11 @@ scaled_vector_squared(Py_ssize_t n, double *vec, double max)
|
||||||
}
|
}
|
||||||
assert(vec[n-1] == max);
|
assert(vec[n-1] == max);
|
||||||
csum += 1.0 - frac;
|
csum += 1.0 - frac;
|
||||||
return csum;
|
return max * sqrt(csum);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define NUM_STACK_ELEMS 16
|
||||||
|
|
||||||
/*[clinic input]
|
/*[clinic input]
|
||||||
math.dist
|
math.dist
|
||||||
|
|
||||||
|
@ -2095,11 +2109,12 @@ math_dist_impl(PyObject *module, PyObject *p, PyObject *q)
|
||||||
/*[clinic end generated code: output=56bd9538d06bbcfe input=937122eaa5f19272]*/
|
/*[clinic end generated code: output=56bd9538d06bbcfe input=937122eaa5f19272]*/
|
||||||
{
|
{
|
||||||
PyObject *item;
|
PyObject *item;
|
||||||
double *diffs;
|
|
||||||
double max = 0.0;
|
double max = 0.0;
|
||||||
double x, px, qx, result;
|
double x, px, qx, result;
|
||||||
Py_ssize_t i, m, n;
|
Py_ssize_t i, m, n;
|
||||||
int found_nan = 0;
|
int found_nan = 0;
|
||||||
|
double diffs_on_stack[NUM_STACK_ELEMS];
|
||||||
|
double *diffs = diffs_on_stack;
|
||||||
|
|
||||||
m = PyTuple_GET_SIZE(p);
|
m = PyTuple_GET_SIZE(p);
|
||||||
n = PyTuple_GET_SIZE(q);
|
n = PyTuple_GET_SIZE(q);
|
||||||
|
@ -2109,22 +2124,22 @@ math_dist_impl(PyObject *module, PyObject *p, PyObject *q)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
}
|
}
|
||||||
diffs = (double *) PyObject_Malloc(n * sizeof(double));
|
if (n > NUM_STACK_ELEMS) {
|
||||||
if (diffs == NULL) {
|
diffs = (double *) PyObject_Malloc(n * sizeof(double));
|
||||||
return NULL;
|
if (diffs == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
for (i=0 ; i<n ; i++) {
|
for (i=0 ; i<n ; i++) {
|
||||||
item = PyTuple_GET_ITEM(p, i);
|
item = PyTuple_GET_ITEM(p, i);
|
||||||
px = PyFloat_AsDouble(item);
|
px = PyFloat_AsDouble(item);
|
||||||
if (px == -1.0 && PyErr_Occurred()) {
|
if (px == -1.0 && PyErr_Occurred()) {
|
||||||
PyObject_Free(diffs);
|
goto error_exit;
|
||||||
return NULL;
|
|
||||||
}
|
}
|
||||||
item = PyTuple_GET_ITEM(q, i);
|
item = PyTuple_GET_ITEM(q, i);
|
||||||
qx = PyFloat_AsDouble(item);
|
qx = PyFloat_AsDouble(item);
|
||||||
if (qx == -1.0 && PyErr_Occurred()) {
|
if (qx == -1.0 && PyErr_Occurred()) {
|
||||||
PyObject_Free(diffs);
|
goto error_exit;
|
||||||
return NULL;
|
|
||||||
}
|
}
|
||||||
x = fabs(px - qx);
|
x = fabs(px - qx);
|
||||||
diffs[i] = x;
|
diffs[i] = x;
|
||||||
|
@ -2133,19 +2148,17 @@ math_dist_impl(PyObject *module, PyObject *p, PyObject *q)
|
||||||
max = x;
|
max = x;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (Py_IS_INFINITY(max)) {
|
result = vector_norm(n, diffs, max, found_nan);
|
||||||
result = max;
|
if (diffs != diffs_on_stack) {
|
||||||
goto done;
|
PyObject_Free(diffs);
|
||||||
}
|
}
|
||||||
if (found_nan) {
|
|
||||||
result = Py_NAN;
|
|
||||||
goto done;
|
|
||||||
}
|
|
||||||
result = max * sqrt(scaled_vector_squared(n, diffs, max));
|
|
||||||
|
|
||||||
done:
|
|
||||||
PyObject_Free(diffs);
|
|
||||||
return PyFloat_FromDouble(result);
|
return PyFloat_FromDouble(result);
|
||||||
|
|
||||||
|
error_exit:
|
||||||
|
if (diffs != diffs_on_stack) {
|
||||||
|
PyObject_Free(diffs);
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* AC: cannot convert yet, waiting for *args support */
|
/* AC: cannot convert yet, waiting for *args support */
|
||||||
|
@ -2154,21 +2167,23 @@ math_hypot(PyObject *self, PyObject *args)
|
||||||
{
|
{
|
||||||
Py_ssize_t i, n;
|
Py_ssize_t i, n;
|
||||||
PyObject *item;
|
PyObject *item;
|
||||||
double *coordinates;
|
|
||||||
double max = 0.0;
|
double max = 0.0;
|
||||||
double x, result;
|
double x, result;
|
||||||
int found_nan = 0;
|
int found_nan = 0;
|
||||||
|
double coord_on_stack[NUM_STACK_ELEMS];
|
||||||
|
double *coordinates = coord_on_stack;
|
||||||
|
|
||||||
n = PyTuple_GET_SIZE(args);
|
n = PyTuple_GET_SIZE(args);
|
||||||
coordinates = (double *) PyObject_Malloc(n * sizeof(double));
|
if (n > NUM_STACK_ELEMS) {
|
||||||
if (coordinates == NULL)
|
coordinates = (double *) PyObject_Malloc(n * sizeof(double));
|
||||||
return NULL;
|
if (coordinates == NULL)
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
for (i=0 ; i<n ; i++) {
|
for (i=0 ; i<n ; i++) {
|
||||||
item = PyTuple_GET_ITEM(args, i);
|
item = PyTuple_GET_ITEM(args, i);
|
||||||
x = PyFloat_AsDouble(item);
|
x = PyFloat_AsDouble(item);
|
||||||
if (x == -1.0 && PyErr_Occurred()) {
|
if (x == -1.0 && PyErr_Occurred()) {
|
||||||
PyObject_Free(coordinates);
|
goto error_exit;
|
||||||
return NULL;
|
|
||||||
}
|
}
|
||||||
x = fabs(x);
|
x = fabs(x);
|
||||||
coordinates[i] = x;
|
coordinates[i] = x;
|
||||||
|
@ -2177,21 +2192,21 @@ math_hypot(PyObject *self, PyObject *args)
|
||||||
max = x;
|
max = x;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (Py_IS_INFINITY(max)) {
|
result = vector_norm(n, coordinates, max, found_nan);
|
||||||
result = max;
|
if (coordinates != coord_on_stack) {
|
||||||
goto done;
|
PyObject_Free(coordinates);
|
||||||
}
|
}
|
||||||
if (found_nan) {
|
|
||||||
result = Py_NAN;
|
|
||||||
goto done;
|
|
||||||
}
|
|
||||||
result = max * sqrt(scaled_vector_squared(n, coordinates, max));
|
|
||||||
|
|
||||||
done:
|
|
||||||
PyObject_Free(coordinates);
|
|
||||||
return PyFloat_FromDouble(result);
|
return PyFloat_FromDouble(result);
|
||||||
|
|
||||||
|
error_exit:
|
||||||
|
if (coordinates != coord_on_stack) {
|
||||||
|
PyObject_Free(coordinates);
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#undef NUM_STACK_ELEMS
|
||||||
|
|
||||||
PyDoc_STRVAR(math_hypot_doc,
|
PyDoc_STRVAR(math_hypot_doc,
|
||||||
"hypot(*coordinates) -> value\n\n\
|
"hypot(*coordinates) -> value\n\n\
|
||||||
Multidimensional Euclidean distance from the origin to a point.\n\
|
Multidimensional Euclidean distance from the origin to a point.\n\
|
||||||
|
|
Loading…
Reference in New Issue