diff --git a/Doc/library/math.rst b/Doc/library/math.rst index c8090b5afd2..df4ec1b69f5 100644 --- a/Doc/library/math.rst +++ b/Doc/library/math.rst @@ -76,6 +76,42 @@ Number-theoretic and representation functions: apart" the internal representation of a float in a portable way. +.. function:: fsum(iterable) + + Return an accurate floating point sum of values in the iterable. Avoids + loss of precision by tracking multiple intermediate partial sums. The + algorithm's accuracy depends on IEEE-754 arithmetic guarantees and the + typical case where the rounding mode is half-even. + + .. note:: + + On platforms where arithmetic results are not correctly rounded, + :func:`fsum` may occasionally produce incorrect results; these + results should be no less accurate than those from the builtin + :func:`sum` function, but nevertheless may have arbitrarily + large relative error. + + In particular, this affects some older Intel hardware (for + example Pentium and earlier x86 processors) that makes use of + 'extended precision' floating-point registers with 64 bits of + precision instead of the 53 bits of precision provided by a C + double. Arithmetic operations using these registers may be + doubly rounded (rounded first to 64 bits, and then rerounded to + 53 bits), leading to incorrectly rounded results. To test + whether your machine is one of those affected, try the following + at a Python prompt:: + + >>> 1e16 + 2.9999 + 10000000000000002.0 + + Machines subject to the double-rounding problem described above + are likely to print ``10000000000000004.0`` instead of + ``10000000000000002.0``. + + + .. versionadded:: 2.6 + + .. function:: isinf(x) Checks if the float *x* is positive or negative infinite. @@ -100,12 +136,6 @@ Number-theoretic and representation functions: Return the fractional and integer parts of *x*. Both results carry the sign of *x*, and both are floats. -.. function:: sum(iterable) - - Return an accurate floating point sum of values in the iterable. Avoids - loss of precision by tracking multiple intermediate partial sums. The - algorithm's accuracy depends on IEEE-754 arithmetic guarantees and the - typical case where the rounding mode is half-even. .. function:: trunc(x) diff --git a/Doc/whatsnew/2.6.rst b/Doc/whatsnew/2.6.rst index a8d89cb1a9e..9959ecd74cc 100644 --- a/Doc/whatsnew/2.6.rst +++ b/Doc/whatsnew/2.6.rst @@ -1537,7 +1537,7 @@ Here are all of the changes that Python 2.6 makes to the core Python language. * :func:`~math.factorial` computes the factorial of a number. (Contributed by Raymond Hettinger; :issue:`2138`.) - * :func:`~math.sum` adds up the stream of numbers from an iterable, + * :func:`~math.fsum` adds up the stream of numbers from an iterable, and is careful to avoid loss of precision by calculating partial sums. (Contributed by Jean Brouwers, Raymond Hettinger, and Mark Dickinson; :issue:`2819`.) diff --git a/Lib/test/test_math.py b/Lib/test/test_math.py index 1eafebad222..9e646ac29ce 100644 --- a/Lib/test/test_math.py +++ b/Lib/test/test_math.py @@ -359,6 +359,102 @@ class MathTests(unittest.TestCase): self.assertEquals(math.frexp(NINF)[0], NINF) self.assert_(math.isnan(math.frexp(NAN)[0])) + def testFsum(self): + # math.fsum relies on exact rounding for correct operation. + # There's a known problem with IA32 floating-point that causes + # inexact rounding in some situations, and will cause the + # math.fsum tests below to fail; see issue #2937. On non IEEE + # 754 platforms, and on IEEE 754 platforms that exhibit the + # problem described in issue #2937, we simply skip the whole + # test. + + if not float.__getformat__("double").startswith("IEEE"): + return + + # on IEEE 754 compliant machines, both of the expressions + # below should round to 10000000000000002.0. + if 1e16+2.0 != 1e16+2.9999: + return + + # Python version of math.fsum, for comparison. Uses a + # different algorithm based on frexp, ldexp and integer + # arithmetic. + from sys import float_info + mant_dig = float_info.mant_dig + etiny = float_info.min_exp - mant_dig + + def msum(iterable): + """Full precision summation. Compute sum(iterable) without any + intermediate accumulation of error. Based on the 'lsum' function + at http://code.activestate.com/recipes/393090/ + + """ + tmant, texp = 0, 0 + for x in iterable: + mant, exp = math.frexp(x) + mant, exp = int(math.ldexp(mant, mant_dig)), exp - mant_dig + if texp > exp: + tmant <<= texp-exp + texp = exp + else: + mant <<= exp-texp + tmant += mant + # Round tmant * 2**texp to a float. The original recipe + # used float(str(tmant)) * 2.0**texp for this, but that's + # a little unsafe because str -> float conversion can't be + # relied upon to do correct rounding on all platforms. + tail = max(len(bin(abs(tmant)))-2 - mant_dig, etiny - texp) + if tail > 0: + h = 1 << (tail-1) + tmant = tmant // (2*h) + bool(tmant & h and tmant & 3*h-1) + texp += tail + return math.ldexp(tmant, texp) + + test_values = [ + ([], 0.0), + ([0.0], 0.0), + ([1e100, 1.0, -1e100, 1e-100, 1e50, -1.0, -1e50], 1e-100), + ([2.0**53, -0.5, -2.0**-54], 2.0**53-1.0), + ([2.0**53, 1.0, 2.0**-100], 2.0**53+2.0), + ([2.0**53+10.0, 1.0, 2.0**-100], 2.0**53+12.0), + ([2.0**53-4.0, 0.5, 2.0**-54], 2.0**53-3.0), + ([1./n for n in range(1, 1001)], + float.fromhex('0x1.df11f45f4e61ap+2')), + ([(-1.)**n/n for n in range(1, 1001)], + float.fromhex('-0x1.62a2af1bd3624p-1')), + ([1.7**(i+1)-1.7**i for i in range(1000)] + [-1.7**1000], -1.0), + ([1e16, 1., 1e-16], 10000000000000002.0), + ([1e16-2., 1.-2.**-53, -(1e16-2.), -(1.-2.**-53)], 0.0), + # exercise code for resizing partials array + ([2.**n - 2.**(n+50) + 2.**(n+52) for n in range(-1074, 972, 2)] + + [-2.**1022], + float.fromhex('0x1.5555555555555p+970')), + ] + + for i, (vals, expected) in enumerate(test_values): + try: + actual = math.fsum(vals) + except OverflowError: + self.fail("test %d failed: got OverflowError, expected %r " + "for math.fsum(%.100r)" % (i, expected, vals)) + except ValueError: + self.fail("test %d failed: got ValueError, expected %r " + "for math.fsum(%.100r)" % (i, expected, vals)) + self.assertEqual(actual, expected) + + from random import random, gauss, shuffle + for j in range(1000): + vals = [7, 1e100, -7, -1e100, -9e-20, 8e-20] * 10 + s = 0 + for i in range(200): + v = gauss(0, random()) ** 7 - s + s += v + vals.append(v) + shuffle(vals) + + s = msum(vals) + self.assertEqual(msum(vals), math.fsum(vals)) + def testHypot(self): self.assertRaises(TypeError, math.hypot) self.ftest('hypot(0,0)', math.hypot(0,0), 0) @@ -641,158 +737,6 @@ class MathTests(unittest.TestCase): self.assertRaises(ValueError, math.sqrt, NINF) self.assert_(math.isnan(math.sqrt(NAN))) - def testSum(self): - # math.sum relies on exact rounding for correct operation. - # There's a known problem with IA32 floating-point that causes - # inexact rounding in some situations, and will cause the - # math.sum tests below to fail; see issue #2937. On non IEEE - # 754 platforms, and on IEEE 754 platforms that exhibit the - # problem described in issue #2937, we simply skip the whole - # test. - - if not float.__getformat__("double").startswith("IEEE"): - return - - # on IEEE 754 compliant machines, both of the expressions - # below should round to 10000000000000002.0. - if 1e16+2.999 != 1e16+2.9999: - return - - # Python version of math.sum algorithm, for comparison - def msum(iterable): - """Full precision sum of values in iterable. Returns the value of - the sum, rounded to the nearest representable floating-point number - using the round-half-to-even rule. - - """ - # Stage 1: accumulate partials - partials = [] - for x in iterable: - i = 0 - for y in partials: - if abs(x) < abs(y): - x, y = y, x - hi = x + y - lo = y - (hi - x) - if lo: - partials[i] = lo - i += 1 - x = hi - partials[i:] = [x] if x else [] - - # Stage 2: sum partials - if not partials: - return 0.0 - - # sum from the top, stopping as soon as the sum is inexact. - total = partials.pop() - while partials: - x = partials.pop() - old_total, total = total, total + x - error = x - (total - old_total) - if error != 0.0: - # adjust for correct rounding if necessary - if partials and (partials[-1] > 0.0) == (error > 0.0) and \ - total + 2*error - total == 2*error: - total += 2*error - break - return total - - from sys import float_info - maxfloat = float_info.max - twopow = 2.**(float_info.max_exp - 1) - - test_values = [ - ([], 0.0), - ([0.0], 0.0), - ([1e100, 1.0, -1e100, 1e-100, 1e50, -1.0, -1e50], 1e-100), - ([1e308, 1e308, -1e308], OverflowError), - ([-1e308, 1e308, 1e308], 1e308), - ([1e308, -1e308, 1e308], 1e308), - ([2.0**1023, 2.0**1023, -2.0**1000], OverflowError), - ([twopow, twopow, twopow, twopow, -twopow, -twopow, -twopow], - OverflowError), - ([2.0**53, -0.5, -2.0**-54], 2.0**53-1.0), - ([2.0**53, 1.0, 2.0**-100], 2.0**53+2.0), - ([2.0**53+10.0, 1.0, 2.0**-100], 2.0**53+12.0), - - ([2.0**53-4.0, 0.5, 2.0**-54], 2.0**53-3.0), - ([2.0**1023-2.0**970, -1.0, 2.0**1023], OverflowError), - ([maxfloat, maxfloat*2.**-54], maxfloat), - ([maxfloat, maxfloat*2.**-53], OverflowError), - ([1./n for n in range(1, 1001)], 7.4854708605503451), - ([(-1.)**n/n for n in range(1, 1001)], -0.69264743055982025), - ([1.7**(i+1)-1.7**i for i in range(1000)] + [-1.7**1000], -1.0), - ([INF, -INF, NAN], ValueError), - ([NAN, INF, -INF], ValueError), - ([INF, NAN, INF], ValueError), - - ([INF, INF], OverflowError), - ([INF, -INF], ValueError), - ([-INF, 1e308, 1e308, -INF], OverflowError), - ([2.0**1023-2.0**970, 0.0, 2.0**1023], OverflowError), - ([2.0**1023-2.0**970, 1.0, 2.0**1023], OverflowError), - ([2.0**1023, 2.0**1023], OverflowError), - ([2.0**1023, 2.0**1023, -1.0], OverflowError), - ([twopow, twopow, twopow, twopow, -twopow, -twopow], - OverflowError), - ([twopow, twopow, twopow, twopow, -twopow, twopow], OverflowError), - ([-twopow, -twopow, -twopow, -twopow], OverflowError), - - ([2.**1023, 2.**1023, -2.**971], OverflowError), - ([2.**1023, 2.**1023, -2.**970], OverflowError), - ([-2.**970, 2.**1023, 2.**1023, -2.**-1074], OverflowError), - ([ 2.**1023, 2.**1023, -2.**970, 2.**-1074], OverflowError), - ([-2.**1023, 2.**971, -2.**1023], -maxfloat), - ([-2.**1023, -2.**1023, 2.**970], OverflowError), - ([-2.**1023, -2.**1023, 2.**970, 2.**-1074], OverflowError), - ([-2.**-1074, -2.**1023, -2.**1023, 2.**970], OverflowError), - ([2.**930, -2.**980, 2.**1023, 2.**1023, twopow, -twopow], - OverflowError), - ([2.**1023, 2.**1023, -1e307], OverflowError), - ([1e16, 1., 1e-16], 10000000000000002.0), - ([1e16-2., 1.-2.**-53, -(1e16-2.), -(1.-2.**-53)], 0.0), - ] - - for i, (vals, s) in enumerate(test_values): - if isinstance(s, type) and issubclass(s, Exception): - try: - m = math.sum(vals) - except s: - pass - else: - self.fail("test %d failed: got %r, expected %r " - "for math.sum(%.100r)" % - (i, m, s.__name__, vals)) - else: - try: - self.assertEqual(math.sum(vals), s) - except OverflowError: - self.fail("test %d failed: got OverflowError, expected %r " - "for math.sum(%.100r)" % (i, s, vals)) - except ValueError: - self.fail("test %d failed: got ValueError, expected %r " - "for math.sum(%.100r)" % (i, s, vals)) - - # compare with output of msum above, but only when - # result isn't an IEEE special or an exception - if not math.isinf(s) and not math.isnan(s): - self.assertEqual(msum(vals), s) - - from random import random, gauss, shuffle - for j in range(1000): - vals = [7, 1e100, -7, -1e100, -9e-20, 8e-20] * 10 - s = 0 - for i in range(200): - v = gauss(0, random()) ** 7 - s - s += v - vals.append(v) - shuffle(vals) - - s = msum(vals) - self.assertEqual(msum(vals), math.sum(vals)) - - def testTan(self): self.assertRaises(TypeError, math.tan) self.ftest('tan(0)', math.tan(0), 0) diff --git a/Lib/test/test_random.py b/Lib/test/test_random.py index da62a4fd4ef..14e9fcac2f7 100644 --- a/Lib/test/test_random.py +++ b/Lib/test/test_random.py @@ -5,7 +5,7 @@ import random import time import pickle import warnings -from math import log, exp, sqrt, pi, sum as msum +from math import log, exp, sqrt, pi, fsum as msum from test import support class TestBasicOps(unittest.TestCase): diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index a196834de4a..d23d2ffcb5d 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -396,7 +396,7 @@ FUNC1(tanh, tanh, 0, Note 4: A similar implementation is in Modules/cmathmodule.c. Be sure to update both when making changes. - Note 5: The signature of math.sum() differs from __builtin__.sum() + Note 5: The signature of math.fsum() differs from __builtin__.sum() because the start argument doesn't make sense in the context of accurate summation. Since the partials table is collapsed before returning a result, sum(seq2, start=sum(seq1)) may not equal the @@ -407,7 +407,7 @@ FUNC1(tanh, tanh, 0, /* Extend the partials array p[] by doubling its size. */ static int /* non-zero on error */ -_sum_realloc(double **p_ptr, Py_ssize_t n, +_fsum_realloc(double **p_ptr, Py_ssize_t n, double *ps, Py_ssize_t *m_ptr) { void *v = NULL; @@ -425,7 +425,7 @@ _sum_realloc(double **p_ptr, Py_ssize_t n, v = PyMem_Realloc(p, sizeof(double) * m); } if (v == NULL) { /* size overflow or no memory */ - PyErr_SetString(PyExc_MemoryError, "math sum partials"); + PyErr_SetString(PyExc_MemoryError, "math.fsum partials"); return 1; } *p_ptr = (double*) v; @@ -464,18 +464,19 @@ _sum_realloc(double **p_ptr, Py_ssize_t n, */ static PyObject* -math_sum(PyObject *self, PyObject *seq) +math_fsum(PyObject *self, PyObject *seq) { PyObject *item, *iter, *sum = NULL; Py_ssize_t i, j, n = 0, m = NUM_PARTIALS; double x, y, t, ps[NUM_PARTIALS], *p = ps; + double xsave, special_sum = 0.0, inf_sum = 0.0; volatile double hi, yr, lo; iter = PyObject_GetIter(seq); if (iter == NULL) return NULL; - PyFPE_START_PROTECT("sum", Py_DECREF(iter); return NULL) + PyFPE_START_PROTECT("fsum", Py_DECREF(iter); return NULL) for(;;) { /* for x in iterable */ assert(0 <= n && n <= m); @@ -485,18 +486,19 @@ math_sum(PyObject *self, PyObject *seq) item = PyIter_Next(iter); if (item == NULL) { if (PyErr_Occurred()) - goto _sum_error; + goto _fsum_error; break; } x = PyFloat_AsDouble(item); Py_DECREF(item); if (PyErr_Occurred()) - goto _sum_error; + goto _fsum_error; + xsave = x; for (i = j = 0; j < n; j++) { /* for y in partials */ y = p[j]; if (fabs(x) < fabs(y)) { - t = x; x = y; y = t; + t = x; x = y; y = t; } hi = x + y; yr = hi - x; @@ -505,59 +507,73 @@ math_sum(PyObject *self, PyObject *seq) p[i++] = lo; x = hi; } - - n = i; /* ps[i:] = [x] */ + + n = i; /* ps[i:] = [x] */ if (x != 0.0) { - /* If non-finite, reset partials, effectively - adding subsequent items without roundoff - and yielding correct non-finite results, - provided IEEE 754 rules are observed */ - if (! Py_IS_FINITE(x)) + if (! Py_IS_FINITE(x)) { + /* a nonfinite x could arise either as + a result of intermediate overflow, or + as a result of a nan or inf in the + summands */ + if (Py_IS_FINITE(xsave)) { + PyErr_SetString(PyExc_OverflowError, + "intermediate overflow in fsum"); + goto _fsum_error; + } + if (Py_IS_INFINITY(xsave)) + inf_sum += xsave; + special_sum += xsave; + /* reset partials */ n = 0; - else if (n >= m && _sum_realloc(&p, n, ps, &m)) - goto _sum_error; - p[n++] = x; + } + else if (n >= m && _fsum_realloc(&p, n, ps, &m)) + goto _fsum_error; + else + p[n++] = x; } } + if (special_sum != 0.0) { + if (Py_IS_NAN(inf_sum)) + PyErr_SetString(PyExc_ValueError, + "-inf + inf in fsum"); + else + sum = PyFloat_FromDouble(special_sum); + goto _fsum_error; + } + hi = 0.0; if (n > 0) { hi = p[--n]; - if (Py_IS_FINITE(hi)) { - /* sum_exact(ps, hi) from the top, stop when the sum becomes inexact. */ - while (n > 0) { - x = hi; - y = p[--n]; - assert(fabs(y) < fabs(x)); - hi = x + y; - yr = hi - x; - lo = y - yr; - if (lo != 0.0) - break; - } - /* Make half-even rounding work across multiple partials. Needed - so that sum([1e-16, 1, 1e16]) will round-up the last digit to - two instead of down to zero (the 1e-16 makes the 1 slightly - closer to two). With a potential 1 ULP rounding error fixed-up, - math.sum() can guarantee commutativity. */ - if (n > 0 && ((lo < 0.0 && p[n-1] < 0.0) || - (lo > 0.0 && p[n-1] > 0.0))) { - y = lo * 2.0; - x = hi + y; - yr = x - hi; - if (y == yr) - hi = x; - } + /* sum_exact(ps, hi) from the top, stop when the sum becomes + inexact. */ + while (n > 0) { + x = hi; + y = p[--n]; + assert(fabs(y) < fabs(x)); + hi = x + y; + yr = hi - x; + lo = y - yr; + if (lo != 0.0) + break; } - else { /* raise exception corresponding to a special value */ - errno = Py_IS_NAN(hi) ? EDOM : ERANGE; - if (is_error(hi)) - goto _sum_error; + /* Make half-even rounding work across multiple partials. + Needed so that sum([1e-16, 1, 1e16]) will round-up the last + digit to two instead of down to zero (the 1e-16 makes the 1 + slightly closer to two). With a potential 1 ULP rounding + error fixed-up, math.fsum() can guarantee commutativity. */ + if (n > 0 && ((lo < 0.0 && p[n-1] < 0.0) || + (lo > 0.0 && p[n-1] > 0.0))) { + y = lo * 2.0; + x = hi + y; + yr = x - hi; + if (y == yr) + hi = x; } } sum = PyFloat_FromDouble(hi); -_sum_error: +_fsum_error: PyFPE_END_PROTECT(hi) Py_DECREF(iter); if (p != ps) @@ -567,7 +583,7 @@ _sum_error: #undef NUM_PARTIALS -PyDoc_STRVAR(math_sum_doc, +PyDoc_STRVAR(math_fsum_doc, "sum(iterable)\n\n\ Return an accurate floating point sum of values in the iterable.\n\ Assumes IEEE-754 floating point arithmetic."); @@ -1078,6 +1094,7 @@ static PyMethodDef math_methods[] = { {"floor", math_floor, METH_O, math_floor_doc}, {"fmod", math_fmod, METH_VARARGS, math_fmod_doc}, {"frexp", math_frexp, METH_O, math_frexp_doc}, + {"fsum", math_fsum, METH_O, math_fsum_doc}, {"hypot", math_hypot, METH_VARARGS, math_hypot_doc}, {"isinf", math_isinf, METH_O, math_isinf_doc}, {"isnan", math_isnan, METH_O, math_isnan_doc}, @@ -1091,10 +1108,9 @@ static PyMethodDef math_methods[] = { {"sin", math_sin, METH_O, math_sin_doc}, {"sinh", math_sinh, METH_O, math_sinh_doc}, {"sqrt", math_sqrt, METH_O, math_sqrt_doc}, - {"sum", math_sum, METH_O, math_sum_doc}, {"tan", math_tan, METH_O, math_tan_doc}, {"tanh", math_tanh, METH_O, math_tanh_doc}, - {"trunc", math_trunc, METH_O, math_trunc_doc}, + {"trunc", math_trunc, METH_O, math_trunc_doc}, {NULL, NULL} /* sentinel */ };