From d4ac094cf9d15ec5705ec0fe8771df9e6ba915b9 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Tue, 8 Aug 2023 18:12:52 +0200 Subject: [PATCH] Minor accuracy improvement for statistics.correlation() (GH-107781) --- Lib/statistics.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/Lib/statistics.py b/Lib/statistics.py index 6bd214bbfe2..066669d25dd 100644 --- a/Lib/statistics.py +++ b/Lib/statistics.py @@ -1004,6 +1004,14 @@ def _mean_stdev(data): # Handle Nans and Infs gracefully return float(xbar), float(xbar) / float(ss) +def _sqrtprod(x: float, y: float) -> float: + "Return sqrt(x * y) computed with high accuracy." + # Square root differential correction: + # https://www.wolframalpha.com/input/?i=Maclaurin+series+sqrt%28h**2+%2B+x%29+at+x%3D0 + h = sqrt(x * y) + x = sumprod((x, h), (y, -h)) + return h + x / (2.0 * h) + # === Statistics for relations between two inputs === @@ -1083,7 +1091,7 @@ def correlation(x, y, /, *, method='linear'): sxx = sumprod(x, x) syy = sumprod(y, y) try: - return sxy / sqrt(sxx * syy) + return sxy / _sqrtprod(sxx, syy) except ZeroDivisionError: raise StatisticsError('at least one of the inputs is constant')