Simplify and improve accuracy for subnormals in hypot() (GH-102785)

This commit is contained in:
Raymond Hettinger 2023-03-17 14:06:52 -05:00 committed by GitHub
parent 174c4bfd0f
commit 72186aa637
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 30 additions and 37 deletions

View File

@ -2498,7 +2498,7 @@ References:
static inline double
vector_norm(Py_ssize_t n, double *vec, double max, int found_nan)
{
double x, h, scale, oldcsum, csum = 1.0, frac1 = 0.0, frac2 = 0.0;
double x, h, scale, csum = 1.0, frac1 = 0.0, frac2 = 0.0;
DoubleLength pr, sm;
int max_e;
Py_ssize_t i;
@ -2513,49 +2513,42 @@ vector_norm(Py_ssize_t n, double *vec, double max, int found_nan)
return max;
}
frexp(max, &max_e);
if (max_e >= -1023) {
scale = ldexp(1.0, -max_e);
assert(max * scale >= 0.5);
assert(max * scale < 1.0);
if (max_e < -1023) {
/* When max_e < -1023, ldexp(1.0, -max_e) would overflow.
So we first perform lossless scaling from subnormals back to normals,
then recurse back to vector_norm(), and then finally undo the scaling.
*/
for (i=0 ; i < n ; i++) {
x = vec[i];
assert(Py_IS_FINITE(x) && fabs(x) <= max);
x *= scale;
assert(fabs(x) < 1.0);
pr = dl_mul(x, x);
assert(pr.hi <= 1.0);
sm = dl_fast_sum(csum, pr.hi);
csum = sm.hi;
frac1 += pr.lo;
frac2 += sm.lo;
vec[i] /= DBL_MIN;
}
h = sqrt(csum - 1.0 + (frac1 + frac2));
pr = dl_mul(-h, h);
return DBL_MIN * vector_norm(n, vec, max / DBL_MIN, found_nan);
}
scale = ldexp(1.0, -max_e);
assert(max * scale >= 0.5);
assert(max * scale < 1.0);
for (i=0 ; i < n ; i++) {
x = vec[i];
assert(Py_IS_FINITE(x) && fabs(x) <= max);
x *= scale;
assert(fabs(x) < 1.0);
pr = dl_mul(x, x);
assert(pr.hi <= 1.0);
sm = dl_fast_sum(csum, pr.hi);
csum = sm.hi;
frac1 += pr.lo;
frac2 += sm.lo;
x = csum - 1.0 + (frac1 + frac2);
return (h + x / (2.0 * h)) / scale;
}
/* When max_e < -1023, ldexp(1.0, -max_e) overflows.
So instead of multiplying by a scale, we just divide by *max*.
*/
for (i=0 ; i < n ; i++) {
x = vec[i];
assert(Py_IS_FINITE(x) && fabs(x) <= max);
x /= max;
x = x*x;
assert(x <= 1.0);
assert(fabs(csum) >= fabs(x));
oldcsum = csum;
csum += x;
frac1 += (oldcsum - csum) + x;
}
return max * sqrt(csum - 1.0 + frac1);
h = sqrt(csum - 1.0 + (frac1 + frac2));
pr = dl_mul(-h, h);
sm = dl_fast_sum(csum, pr.hi);
csum = sm.hi;
frac1 += pr.lo;
frac2 += sm.lo;
x = csum - 1.0 + (frac1 + frac2);
return (h + x / (2.0 * h)) / scale;
}
#define NUM_STACK_ELEMS 16