Minor accuracy improvement for statistics.correlation() (GH-107781)

This commit is contained in:
Raymond Hettinger 2023-08-08 18:12:52 +02:00 committed by GitHub
parent f9e3ff1ea4
commit d4ac094cf9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 9 additions and 1 deletions

View File

@ -1004,6 +1004,14 @@ def _mean_stdev(data):
# Handle Nans and Infs gracefully
return float(xbar), float(xbar) / float(ss)
def _sqrtprod(x: float, y: float) -> float:
"Return sqrt(x * y) computed with high accuracy."
# Square root differential correction:
# https://www.wolframalpha.com/input/?i=Maclaurin+series+sqrt%28h**2+%2B+x%29+at+x%3D0
h = sqrt(x * y)
x = sumprod((x, h), (y, -h))
return h + x / (2.0 * h)
# === Statistics for relations between two inputs ===
@ -1083,7 +1091,7 @@ def correlation(x, y, /, *, method='linear'):
sxx = sumprod(x, x)
syy = sumprod(y, y)
try:
return sxy / sqrt(sxx * syy)
return sxy / _sqrtprod(sxx, syy)
except ZeroDivisionError:
raise StatisticsError('at least one of the inputs is constant')