mirror of https://github.com/python/cpython
GH-100485: Create an alternative code path when an accurate fma() implementation is not available (#101567)
This commit is contained in:
parent
a89e6713c4
commit
5a2b984568
|
@ -1450,6 +1450,11 @@ class MathTests(unittest.TestCase):
|
|||
n = 20 # Length of vectors
|
||||
c = 1e30 # Target condition number
|
||||
|
||||
# If the following test fails, it means that the C math library
|
||||
# implementation of fma() is not compliant with the C99 standard
|
||||
# and is inaccurate. To solve this problem, make a new build
|
||||
# with the symbol UNRELIABLE_FMA defined. That will enable a
|
||||
# slower but accurate code path that avoids the fma() call.
|
||||
relative_err = median(Trial(math.sumprod, c, n) for i in range(times))
|
||||
self.assertLess(relative_err, 1e-16)
|
||||
|
||||
|
|
|
@ -2851,6 +2851,8 @@ dl_sum(double a, double b)
|
|||
return (DoubleLength) {x, y};
|
||||
}
|
||||
|
||||
#ifndef UNRELIABLE_FMA
|
||||
|
||||
static DoubleLength
|
||||
dl_mul(double x, double y)
|
||||
{
|
||||
|
@ -2860,6 +2862,47 @@ dl_mul(double x, double y)
|
|||
return (DoubleLength) {z, zz};
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/*
|
||||
The default implementation of dl_mul() depends on the C math library
|
||||
having an accurate fma() function as required by § 7.12.13.1 of the
|
||||
C99 standard.
|
||||
|
||||
The UNRELIABLE_FMA option is provided as a slower but accurate
|
||||
alternative for builds where the fma() function is found wanting.
|
||||
The speed penalty may be modest (17% slower on an Apple M1 Max),
|
||||
so don't hesitate to enable this build option.
|
||||
|
||||
The algorithms are from the T. J. Dekker paper:
|
||||
A Floating-Point Technique for Extending the Available Precision
|
||||
https://csclub.uwaterloo.ca/~pbarfuss/dekker1971.pdf
|
||||
*/
|
||||
|
||||
static DoubleLength
|
||||
dl_split(double x) {
|
||||
// Dekker (5.5) and (5.6).
|
||||
double t = x * 134217729.0; // Veltkamp constant = 2.0 ** 27 + 1
|
||||
double hi = t - (t - x);
|
||||
double lo = x - hi;
|
||||
return (DoubleLength) {hi, lo};
|
||||
}
|
||||
|
||||
static DoubleLength
|
||||
dl_mul(double x, double y)
|
||||
{
|
||||
// Dekker (5.12) and mul12()
|
||||
DoubleLength xx = dl_split(x);
|
||||
DoubleLength yy = dl_split(y);
|
||||
double p = xx.hi * yy.hi;
|
||||
double q = xx.hi * yy.lo + xx.lo * yy.hi;
|
||||
double z = p + q;
|
||||
double zz = p - z + q + xx.lo * yy.lo;
|
||||
return (DoubleLength) {z, zz};
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
typedef struct { double hi; double lo; double tiny; } TripleLength;
|
||||
|
||||
static const TripleLength tl_zero = {0.0, 0.0, 0.0};
|
||||
|
|
Loading…
Reference in New Issue