Replace straight addition with Kahan summation and move max to the end (GH-8727)

2024-12-12 03:04:15 +08:00 · 2018-08-11 11:26:36 -07:00 · 2018-08-11 11:26:36 -07:00 · 1399074535
commit 1399074535
parent 2fc46979b8
1 changed files with 45 additions and 20 deletions
--- a/Modules/mathmodule.c
+++ b/Modules/mathmodule.c
@ -2031,6 +2031,49 @@ math_fmod_impl(PyObject *module, double x, double y)
        return PyFloat_FromDouble(r);
 }

+/*
+Given an *n* length *vec* of non-negative, non-nan, non-inf values
+where *max* is the largest value in the vector, compute:
+
+    sum((x / max) ** 2 for x in vec)
+
+When a maximum value is found, it is swapped to the end.  This
+lets us skip one loop iteration and just add 1.0 at the end.
+Saving the largest value for last also helps improve accuracy.
+
+Kahan summation is used to improve accuracy.  The *csum*
+variable tracks the cumulative sum and *frac* tracks
+fractional round-off error for the most recent addition.
+
+*/
+
+static inline double
+scaled_vector_squared(Py_ssize_t n, double *vec, double max)
+{
+    double x, csum = 0.0, oldcsum, frac = 0.0;
+    Py_ssize_t i;
+
+    if (max == 0.0) {
+        return 0.0;
+    }
+    assert(n > 0);
+    for (i=0 ; i<n-1 ; i++) {
+        x = vec[i];
+        if (x == max) {
+            x = vec[n-1];
+            vec[n-1] = max;
+        }
+        x /= max;
+        x = x*x - frac;
+        oldcsum = csum;
+        csum += x;
+        frac = (csum - oldcsum) - x;
+    }
+    assert(vec[n-1] == max);
+    csum += 1.0 - frac;
+    return csum;
+}
+
 /*[clinic input]
 math.dist

@ -2054,7 +2097,6 @@ math_dist_impl(PyObject *module, PyObject *p, PyObject *q)
    PyObject *item;
    double *diffs;
    double max = 0.0;
-    double csum = 0.0;
    double x, px, qx, result;
    Py_ssize_t i, m, n;
    int found_nan = 0;
@ -2099,15 +2141,7 @@ math_dist_impl(PyObject *module, PyObject *p, PyObject *q)
        result = Py_NAN;
        goto done;
    }
-    if (max == 0.0) {
-        result = 0.0;
-        goto done;
-    }
-    for (i=0 ; i<n ; i++) {
-        x = diffs[i] / max;
-        csum += x * x;
-    }
-    result = max * sqrt(csum);
+    result = max * sqrt(scaled_vector_squared(n, diffs, max));

  done:
    PyObject_Free(diffs);
@ -2122,7 +2156,6 @@ math_hypot(PyObject *self, PyObject *args)
    PyObject *item;
    double *coordinates;
    double max = 0.0;
-    double csum = 0.0;
    double x, result;
    int found_nan = 0;

@ -2152,15 +2185,7 @@ math_hypot(PyObject *self, PyObject *args)
        result = Py_NAN;
        goto done;
    }
-    if (max == 0.0) {
-        result = 0.0;
-        goto done;
-    }
-    for (i=0 ; i<n ; i++) {
-        x = coordinates[i] / max;
-        csum += x * x;
-    }
-    result = max * sqrt(csum);
+    result = max * sqrt(scaled_vector_squared(n, coordinates, max));

  done:
    PyObject_Free(coordinates);