diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst index 2110990d188..817c1f858aa 100644 --- a/Doc/library/functions.rst +++ b/Doc/library/functions.rst @@ -1733,6 +1733,10 @@ are always available. They are listed here in alphabetical order. .. versionchanged:: 3.8 The *start* parameter can be specified as a keyword argument. + .. versionchanged:: 3.12 Summation of floats switched to an algorithm + that gives higher accuracy on most builds. + + .. class:: super() super(type, object_or_type=None) diff --git a/Doc/library/math.rst b/Doc/library/math.rst index 559c6ec5dd9..aeebcaf6ab0 100644 --- a/Doc/library/math.rst +++ b/Doc/library/math.rst @@ -108,12 +108,7 @@ Number-theoretic and representation functions .. function:: fsum(iterable) Return an accurate floating point sum of values in the iterable. Avoids - loss of precision by tracking multiple intermediate partial sums: - - >>> sum([.1, .1, .1, .1, .1, .1, .1, .1, .1, .1]) - 0.9999999999999999 - >>> fsum([.1, .1, .1, .1, .1, .1, .1, .1, .1, .1]) - 1.0 + loss of precision by tracking multiple intermediate partial sums. The algorithm's accuracy depends on IEEE-754 arithmetic guarantees and the typical case where the rounding mode is half-even. On some non-Windows diff --git a/Doc/tutorial/floatingpoint.rst b/Doc/tutorial/floatingpoint.rst index e1cd7f9ece7..cedade6e336 100644 --- a/Doc/tutorial/floatingpoint.rst +++ b/Doc/tutorial/floatingpoint.rst @@ -192,7 +192,7 @@ added onto a running total. That can make a difference in overall accuracy so that the errors do not accumulate to the point where they affect the final total: - >>> sum([0.1] * 10) == 1.0 + >>> 0.1 + 0.1 + 0.1 + 0.1 + 0.1 + 0.1 + 0.1 + 0.1 + 0.1 + 0.1 == 1.0 False >>> math.fsum([0.1] * 10) == 1.0 True diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py index eb1c389257c..c6560048325 100644 --- a/Lib/test/test_builtin.py +++ b/Lib/test/test_builtin.py @@ -9,6 +9,7 @@ import fractions import gc import io import locale +import math import os import pickle import platform @@ -31,6 +32,7 @@ from test.support import (swap_attr, maybe_get_event_loop_policy) from test.support.os_helper import (EnvironmentVarGuard, TESTFN, unlink) from test.support.script_helper import assert_python_ok from test.support.warnings_helper import check_warnings +from test.support import requires_IEEE_754 from unittest.mock import MagicMock, patch try: import pty, signal @@ -38,6 +40,12 @@ except ImportError: pty = signal = None +# Detect evidence of double-rounding: sum() does not always +# get improved accuracy on machines that suffer from double rounding. +x, y = 1e16, 2.9999 # use temporary values to defeat peephole optimizer +HAVE_DOUBLE_ROUNDING = (x + y == 1e16 + 4) + + class Squares: def __init__(self, max): @@ -1617,6 +1625,8 @@ class BuiltinTest(unittest.TestCase): self.assertEqual(repr(sum([-0.0])), '0.0') self.assertEqual(repr(sum([-0.0], -0.0)), '-0.0') self.assertEqual(repr(sum([], -0.0)), '-0.0') + self.assertTrue(math.isinf(sum([float("inf"), float("inf")]))) + self.assertTrue(math.isinf(sum([1e308, 1e308]))) self.assertRaises(TypeError, sum) self.assertRaises(TypeError, sum, 42) @@ -1641,6 +1651,14 @@ class BuiltinTest(unittest.TestCase): sum(([x] for x in range(10)), empty) self.assertEqual(empty, []) + @requires_IEEE_754 + @unittest.skipIf(HAVE_DOUBLE_ROUNDING, + "sum accuracy not guaranteed on machines with double rounding") + @support.cpython_only # Other implementations may choose a different algorithm + def test_sum_accuracy(self): + self.assertEqual(sum([0.1] * 10), 1.0) + self.assertEqual(sum([1.0, 10E100, 1.0, -10E100]), 2.0) + def test_type(self): self.assertEqual(type(''), type('123')) self.assertNotEqual(type(''), type(())) diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-12-21-22-48-41.gh-issue-100425.U64yLu.rst b/Misc/NEWS.d/next/Core and Builtins/2022-12-21-22-48-41.gh-issue-100425.U64yLu.rst new file mode 100644 index 00000000000..5559020b11d --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-12-21-22-48-41.gh-issue-100425.U64yLu.rst @@ -0,0 +1 @@ +Improve the accuracy of ``sum()`` with compensated summation. diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index ff96c25da5e..2d4822e6d46 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -2532,6 +2532,7 @@ builtin_sum_impl(PyObject *module, PyObject *iterable, PyObject *start) if (PyFloat_CheckExact(result)) { double f_result = PyFloat_AS_DOUBLE(result); + double c = 0.0; Py_SETREF(result, NULL); while(result == NULL) { item = PyIter_Next(iter); @@ -2539,10 +2540,25 @@ builtin_sum_impl(PyObject *module, PyObject *iterable, PyObject *start) Py_DECREF(iter); if (PyErr_Occurred()) return NULL; + /* Avoid losing the sign on a negative result, + and don't let adding the compensation convert + an infinite or overflowed sum to a NaN. */ + if (c && Py_IS_FINITE(c)) { + f_result += c; + } return PyFloat_FromDouble(f_result); } if (PyFloat_CheckExact(item)) { - f_result += PyFloat_AS_DOUBLE(item); + // Improved Kahan–Babuška algorithm by Arnold Neumaier + // https://www.mat.univie.ac.at/~neum/scan/01.pdf + double x = PyFloat_AS_DOUBLE(item); + double t = f_result + x; + if (fabs(f_result) >= fabs(x)) { + c += (f_result - t) + x; + } else { + c += (x - t) + f_result; + } + f_result = t; _Py_DECREF_SPECIALIZED(item, _PyFloat_ExactDealloc); continue; } @@ -2556,6 +2572,9 @@ builtin_sum_impl(PyObject *module, PyObject *iterable, PyObject *start) continue; } } + if (c && Py_IS_FINITE(c)) { + f_result += c; + } result = PyFloat_FromDouble(f_result); if (result == NULL) { Py_DECREF(item);