bpo-44150: Support optional weights parameter for fmean() (GH-26175)

2024-11-27 03:45:08 +08:00 · 2021-05-20 20:22:26 -07:00 · 2021-05-20 20:22:26 -07:00 · be4dd7fcd9
commit be4dd7fcd9
parent 18f41c04ff
4 changed files with 59 additions and 9 deletions
--- a/Doc/library/statistics.rst
+++ b/Doc/library/statistics.rst
@ -43,7 +43,7 @@ or sample.

 =======================  ===============================================================
 :func:`mean`             Arithmetic mean ("average") of data.
-:func:`fmean`            Fast, floating point arithmetic mean.
+:func:`fmean`            Fast, floating point arithmetic mean, with optional weighting.
 :func:`geometric_mean`   Geometric mean of data.
 :func:`harmonic_mean`    Harmonic mean of data.
 :func:`median`           Median (middle value) of data.
@ -128,7 +128,7 @@ However, for reading convenience, most of the examples show sorted sequences.
      ``mean(data)`` is equivalent to calculating the true population mean μ.


-.. function:: fmean(data)
+.. function:: fmean(data, weights=None)

   Convert *data* to floats and compute the arithmetic mean.

@ -141,8 +141,25 @@ However, for reading convenience, most of the examples show sorted sequences.
      >>> fmean([3.5, 4.0, 5.25])
      4.25

+   Optional weighting is supported.  For example, a professor assigns a
+   grade for a course by weighting quizzes at 20%, homework at 20%, a
+   midterm exam at 30%, and a final exam at 30%:
+
+   .. doctest::
+
+      >>> grades = [85, 92, 83, 91]
+      >>> weights = [0.20, 0.20, 0.30, 0.30]
+      >>> fmean(grades, weights)
+      87.6
+
+   If *weights* is supplied, it must be the same length as the *data* or
+   a :exc:`ValueError` will be raised.
+
   .. versionadded:: 3.8

+   .. versionchanged:: 3.11
+      Added support for *weights*.
+

 .. function:: geometric_mean(data)

--- a/Lib/statistics.py
+++ b/Lib/statistics.py
@ -136,7 +136,7 @@ from decimal import Decimal
 from itertools import groupby, repeat
 from bisect import bisect_left, bisect_right
 from math import hypot, sqrt, fabs, exp, erf, tau, log, fsum
-from operator import itemgetter
+from operator import itemgetter, mul
 from collections import Counter, namedtuple

 # === Exceptions ===
@ -345,7 +345,7 @@ def mean(data):
    return _convert(total / n, T)


-def fmean(data):
+def fmean(data, weights=None):
    """Convert data to floats and compute the arithmetic mean.

    This runs faster than the mean() function and it always returns a float.
@ -363,13 +363,24 @@ def fmean(data):
            nonlocal n
            for n, x in enumerate(iterable, start=1):
                yield x
-        total = fsum(count(data))
-    else:
+        data = count(data)
+    if weights is None:
        total = fsum(data)
-    try:
+        if not n:
+            raise StatisticsError('fmean requires at least one data point')
        return total / n
-    except ZeroDivisionError:
-        raise StatisticsError('fmean requires at least one data point') from None
+    try:
+        num_weights = len(weights)
+    except TypeError:
+        weights = list(weights)
+        num_weights = len(weights)
+    num = fsum(map(mul, data, weights))
+    if n != num_weights:
+        raise StatisticsError('data and weights must be the same length')
+    den = fsum(weights)
+    if not den:
+        raise StatisticsError('sum of weights must be non-zero')
+    return num / den


 def geometric_mean(data):
--- a/Lib/test/test_statistics.py
+++ b/Lib/test/test_statistics.py
@ -1972,6 +1972,27 @@ class TestFMean(unittest.TestCase):
        with self.assertRaises(ValueError):
            fmean([Inf, -Inf])

+    def test_weights(self):
+        fmean = statistics.fmean
+        StatisticsError = statistics.StatisticsError
+        self.assertEqual(
+            fmean([10, 10, 10, 50], [0.25] * 4),
+            fmean([10, 10, 10, 50]))
+        self.assertEqual(
+            fmean([10, 10, 20], [0.25, 0.25, 0.50]),
+            fmean([10, 10, 20, 20]))
+        self.assertEqual(                           # inputs are iterators
+            fmean(iter([10, 10, 20]), iter([0.25, 0.25, 0.50])),
+            fmean([10, 10, 20, 20]))
+        with self.assertRaises(StatisticsError):
+            fmean([10, 20, 30], [1, 2])             # unequal lengths
+        with self.assertRaises(StatisticsError):
+            fmean(iter([10, 20, 30]), iter([1, 2])) # unequal lengths
+        with self.assertRaises(StatisticsError):
+            fmean([10, 20], [-1, 1])                # sum of weights is zero
+        with self.assertRaises(StatisticsError):
+            fmean(iter([10, 20]), iter([-1, 1]))    # sum of weights is zero
+

 # === Tests for variances and standard deviations ===

--- a/Misc/NEWS.d/next/Library/2021-05-16-11-57-38.bpo-44150.xAhhik.rst
+++ b/Misc/NEWS.d/next/Library/2021-05-16-11-57-38.bpo-44150.xAhhik.rst
@ -0,0 +1 @@
+Add optional *weights* argument to statistics.fmean().
				`@ -0,0 +1 @@`
				`Add optional weights argument to statistics.fmean().`