bpo-41513: Improve speed and accuracy of math.hypot() (GH-21803)
This commit is contained in:
parent
39dab24621
commit
fff3c28052
|
@ -795,7 +795,8 @@ class MathTests(unittest.TestCase):
|
||||||
# Verify scaling for extremely large values
|
# Verify scaling for extremely large values
|
||||||
fourthmax = FLOAT_MAX / 4.0
|
fourthmax = FLOAT_MAX / 4.0
|
||||||
for n in range(32):
|
for n in range(32):
|
||||||
self.assertEqual(hypot(*([fourthmax]*n)), fourthmax * math.sqrt(n))
|
self.assertTrue(math.isclose(hypot(*([fourthmax]*n)),
|
||||||
|
fourthmax * math.sqrt(n)))
|
||||||
|
|
||||||
# Verify scaling for extremely small values
|
# Verify scaling for extremely small values
|
||||||
for exp in range(32):
|
for exp in range(32):
|
||||||
|
@ -904,8 +905,8 @@ class MathTests(unittest.TestCase):
|
||||||
for n in range(32):
|
for n in range(32):
|
||||||
p = (fourthmax,) * n
|
p = (fourthmax,) * n
|
||||||
q = (0.0,) * n
|
q = (0.0,) * n
|
||||||
self.assertEqual(dist(p, q), fourthmax * math.sqrt(n))
|
self.assertTrue(math.isclose(dist(p, q), fourthmax * math.sqrt(n)))
|
||||||
self.assertEqual(dist(q, p), fourthmax * math.sqrt(n))
|
self.assertTrue(math.isclose(dist(q, p), fourthmax * math.sqrt(n)))
|
||||||
|
|
||||||
# Verify scaling for extremely small values
|
# Verify scaling for extremely small values
|
||||||
for exp in range(32):
|
for exp in range(32):
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
Minor algorithmic improvement to math.hypot() and math.dist() giving small
|
||||||
|
gains in speed and accuracy.
|
|
@ -2406,6 +2406,13 @@ math_fmod_impl(PyObject *module, double x, double y)
|
||||||
/*
|
/*
|
||||||
Given an *n* length *vec* of values and a value *max*, compute:
|
Given an *n* length *vec* of values and a value *max*, compute:
|
||||||
|
|
||||||
|
sqrt(sum((x * scale) ** 2 for x in vec)) / scale
|
||||||
|
|
||||||
|
where scale is the first power of two
|
||||||
|
greater than max.
|
||||||
|
|
||||||
|
or compute:
|
||||||
|
|
||||||
max * sqrt(sum((x / max) ** 2 for x in vec))
|
max * sqrt(sum((x / max) ** 2 for x in vec))
|
||||||
|
|
||||||
The value of the *max* variable must be non-negative and
|
The value of the *max* variable must be non-negative and
|
||||||
|
@ -2425,19 +2432,25 @@ The *csum* variable tracks the cumulative sum and *frac* tracks
|
||||||
the cumulative fractional errors at each step. Since this
|
the cumulative fractional errors at each step. Since this
|
||||||
variant assumes that |csum| >= |x| at each step, we establish
|
variant assumes that |csum| >= |x| at each step, we establish
|
||||||
the precondition by starting the accumulation from 1.0 which
|
the precondition by starting the accumulation from 1.0 which
|
||||||
represents the largest possible value of (x/max)**2.
|
represents the largest possible value of (x*scale)**2 or (x/max)**2.
|
||||||
|
|
||||||
After the loop is finished, the initial 1.0 is subtracted out
|
After the loop is finished, the initial 1.0 is subtracted out
|
||||||
for a net zero effect on the final sum. Since *csum* will be
|
for a net zero effect on the final sum. Since *csum* will be
|
||||||
greater than 1.0, the subtraction of 1.0 will not cause
|
greater than 1.0, the subtraction of 1.0 will not cause
|
||||||
fractional digits to be dropped from *csum*.
|
fractional digits to be dropped from *csum*.
|
||||||
|
|
||||||
|
To get the full benefit from compensated summation, the
|
||||||
|
largest addend should be in the range: 0.5 <= x <= 1.0.
|
||||||
|
Accordingly, scaling or division by *max* should not be skipped
|
||||||
|
even if not otherwise needed to prevent overflow or loss of precision.
|
||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static inline double
|
static inline double
|
||||||
vector_norm(Py_ssize_t n, double *vec, double max, int found_nan)
|
vector_norm(Py_ssize_t n, double *vec, double max, int found_nan)
|
||||||
{
|
{
|
||||||
double x, csum = 1.0, oldcsum, frac = 0.0;
|
double x, csum = 1.0, oldcsum, frac = 0.0, scale;
|
||||||
|
int max_e;
|
||||||
Py_ssize_t i;
|
Py_ssize_t i;
|
||||||
|
|
||||||
if (Py_IS_INFINITY(max)) {
|
if (Py_IS_INFINITY(max)) {
|
||||||
|
@ -2449,14 +2462,36 @@ vector_norm(Py_ssize_t n, double *vec, double max, int found_nan)
|
||||||
if (max == 0.0 || n <= 1) {
|
if (max == 0.0 || n <= 1) {
|
||||||
return max;
|
return max;
|
||||||
}
|
}
|
||||||
|
frexp(max, &max_e);
|
||||||
|
if (max_e >= -1023) {
|
||||||
|
scale = ldexp(1.0, -max_e);
|
||||||
|
assert(max * scale >= 0.5);
|
||||||
|
assert(max * scale < 1.0);
|
||||||
|
for (i=0 ; i < n ; i++) {
|
||||||
|
x = vec[i];
|
||||||
|
assert(Py_IS_FINITE(x) && fabs(x) <= max);
|
||||||
|
x *= scale;
|
||||||
|
x = x*x;
|
||||||
|
assert(x <= 1.0);
|
||||||
|
assert(csum >= x);
|
||||||
|
oldcsum = csum;
|
||||||
|
csum += x;
|
||||||
|
frac += (oldcsum - csum) + x;
|
||||||
|
}
|
||||||
|
return sqrt(csum - 1.0 + frac) / scale;
|
||||||
|
}
|
||||||
|
/* When max_e < -1023, ldexp(1.0, -max_e) overflows.
|
||||||
|
So instead of multiplying by a scale, we just divide by *max*.
|
||||||
|
*/
|
||||||
for (i=0 ; i < n ; i++) {
|
for (i=0 ; i < n ; i++) {
|
||||||
x = vec[i];
|
x = vec[i];
|
||||||
assert(Py_IS_FINITE(x) && fabs(x) <= max);
|
assert(Py_IS_FINITE(x) && fabs(x) <= max);
|
||||||
x /= max;
|
x /= max;
|
||||||
x = x*x;
|
x = x*x;
|
||||||
|
assert(x <= 1.0);
|
||||||
|
assert(csum >= x);
|
||||||
oldcsum = csum;
|
oldcsum = csum;
|
||||||
csum += x;
|
csum += x;
|
||||||
assert(csum >= x);
|
|
||||||
frac += (oldcsum - csum) + x;
|
frac += (oldcsum - csum) + x;
|
||||||
}
|
}
|
||||||
return max * sqrt(csum - 1.0 + frac);
|
return max * sqrt(csum - 1.0 + frac);
|
||||||
|
|
Loading…
Reference in New Issue