mirror of https://github.com/python/cpython
Simplify and improve accuracy for subnormals in hypot() (GH-102785)
This commit is contained in:
parent
174c4bfd0f
commit
72186aa637
|
@ -2498,7 +2498,7 @@ References:
|
|||
static inline double
|
||||
vector_norm(Py_ssize_t n, double *vec, double max, int found_nan)
|
||||
{
|
||||
double x, h, scale, oldcsum, csum = 1.0, frac1 = 0.0, frac2 = 0.0;
|
||||
double x, h, scale, csum = 1.0, frac1 = 0.0, frac2 = 0.0;
|
||||
DoubleLength pr, sm;
|
||||
int max_e;
|
||||
Py_ssize_t i;
|
||||
|
@ -2513,49 +2513,42 @@ vector_norm(Py_ssize_t n, double *vec, double max, int found_nan)
|
|||
return max;
|
||||
}
|
||||
frexp(max, &max_e);
|
||||
if (max_e >= -1023) {
|
||||
scale = ldexp(1.0, -max_e);
|
||||
assert(max * scale >= 0.5);
|
||||
assert(max * scale < 1.0);
|
||||
if (max_e < -1023) {
|
||||
/* When max_e < -1023, ldexp(1.0, -max_e) would overflow.
|
||||
So we first perform lossless scaling from subnormals back to normals,
|
||||
then recurse back to vector_norm(), and then finally undo the scaling.
|
||||
*/
|
||||
for (i=0 ; i < n ; i++) {
|
||||
x = vec[i];
|
||||
assert(Py_IS_FINITE(x) && fabs(x) <= max);
|
||||
|
||||
x *= scale;
|
||||
assert(fabs(x) < 1.0);
|
||||
|
||||
pr = dl_mul(x, x);
|
||||
assert(pr.hi <= 1.0);
|
||||
|
||||
sm = dl_fast_sum(csum, pr.hi);
|
||||
csum = sm.hi;
|
||||
frac1 += pr.lo;
|
||||
frac2 += sm.lo;
|
||||
vec[i] /= DBL_MIN;
|
||||
}
|
||||
h = sqrt(csum - 1.0 + (frac1 + frac2));
|
||||
pr = dl_mul(-h, h);
|
||||
return DBL_MIN * vector_norm(n, vec, max / DBL_MIN, found_nan);
|
||||
}
|
||||
scale = ldexp(1.0, -max_e);
|
||||
assert(max * scale >= 0.5);
|
||||
assert(max * scale < 1.0);
|
||||
for (i=0 ; i < n ; i++) {
|
||||
x = vec[i];
|
||||
assert(Py_IS_FINITE(x) && fabs(x) <= max);
|
||||
|
||||
x *= scale;
|
||||
assert(fabs(x) < 1.0);
|
||||
|
||||
pr = dl_mul(x, x);
|
||||
assert(pr.hi <= 1.0);
|
||||
|
||||
sm = dl_fast_sum(csum, pr.hi);
|
||||
csum = sm.hi;
|
||||
frac1 += pr.lo;
|
||||
frac2 += sm.lo;
|
||||
x = csum - 1.0 + (frac1 + frac2);
|
||||
return (h + x / (2.0 * h)) / scale;
|
||||
}
|
||||
/* When max_e < -1023, ldexp(1.0, -max_e) overflows.
|
||||
So instead of multiplying by a scale, we just divide by *max*.
|
||||
*/
|
||||
for (i=0 ; i < n ; i++) {
|
||||
x = vec[i];
|
||||
assert(Py_IS_FINITE(x) && fabs(x) <= max);
|
||||
x /= max;
|
||||
x = x*x;
|
||||
assert(x <= 1.0);
|
||||
assert(fabs(csum) >= fabs(x));
|
||||
oldcsum = csum;
|
||||
csum += x;
|
||||
frac1 += (oldcsum - csum) + x;
|
||||
}
|
||||
return max * sqrt(csum - 1.0 + frac1);
|
||||
h = sqrt(csum - 1.0 + (frac1 + frac2));
|
||||
pr = dl_mul(-h, h);
|
||||
sm = dl_fast_sum(csum, pr.hi);
|
||||
csum = sm.hi;
|
||||
frac1 += pr.lo;
|
||||
frac2 += sm.lo;
|
||||
x = csum - 1.0 + (frac1 + frac2);
|
||||
return (h + x / (2.0 * h)) / scale;
|
||||
}
|
||||
|
||||
#define NUM_STACK_ELEMS 16
|
||||
|
|
Loading…
Reference in New Issue