From 6463ba3061bd311413d2951dc83c565907e10459 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 7 Apr 2019 09:20:03 -0700 Subject: [PATCH] bpo-27181: Add statistics.geometric_mean() (GH-12638) --- Doc/library/statistics.rst | 19 ++++ Doc/whatsnew/3.8.rst | 3 + Lib/statistics.py | 22 ++++- Lib/test/test_statistics.py | 88 +++++++++++++++++++ .../2019-03-31-01-18-52.bpo-27181.LVUWcc.rst | 1 + 5 files changed, 132 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2019-03-31-01-18-52.bpo-27181.LVUWcc.rst diff --git a/Doc/library/statistics.rst b/Doc/library/statistics.rst index 1d52d98b299..8bb2bdf7b69 100644 --- a/Doc/library/statistics.rst +++ b/Doc/library/statistics.rst @@ -40,6 +40,7 @@ or sample. ======================= =============================================================== :func:`mean` Arithmetic mean ("average") of data. :func:`fmean` Fast, floating point arithmetic mean. +:func:`geometric_mean` Geometric mean of data. :func:`harmonic_mean` Harmonic mean of data. :func:`median` Median (middle value) of data. :func:`median_low` Low median of data. @@ -130,6 +131,24 @@ However, for reading convenience, most of the examples show sorted sequences. .. versionadded:: 3.8 +.. function:: geometric_mean(data) + + Convert *data* to floats and compute the geometric mean. + + Raises a :exc:`StatisticsError` if the input dataset is empty, + if it contains a zero, or if it contains a negative value. + + No special efforts are made to achieve exact results. + (However, this may change in the future.) + + .. doctest:: + + >>> round(geometric_mean([54, 24, 36]), 9) + 36.0 + + .. versionadded:: 3.8 + + .. function:: harmonic_mean(data) Return the harmonic mean of *data*, a sequence or iterator of diff --git a/Doc/whatsnew/3.8.rst b/Doc/whatsnew/3.8.rst index ac20ee3aa57..4347b3ee411 100644 --- a/Doc/whatsnew/3.8.rst +++ b/Doc/whatsnew/3.8.rst @@ -322,6 +322,9 @@ Added :func:`statistics.fmean` as a faster, floating point variant of :func:`statistics.mean()`. (Contributed by Raymond Hettinger and Steven D'Aprano in :issue:`35904`.) +Added :func:`statistics.geometric_mean()` +(Contributed by Raymond Hettinger in :issue:`27181`.) + Added :func:`statistics.multimode` that returns a list of the most common values. (Contributed by Raymond Hettinger in :issue:`35892`.) diff --git a/Lib/statistics.py b/Lib/statistics.py index bd8a6f96381..262ad976b65 100644 --- a/Lib/statistics.py +++ b/Lib/statistics.py @@ -11,13 +11,14 @@ Calculating averages Function Description ================== ============================================= mean Arithmetic mean (average) of data. +geometric_mean Geometric mean of data. harmonic_mean Harmonic mean of data. median Median (middle value) of data. median_low Low median of data. median_high High median of data. median_grouped Median, or 50th percentile, of grouped data. mode Mode (most common value) of data. -multimode List of modes (most common values of data) +multimode List of modes (most common values of data). ================== ============================================= Calculate the arithmetic mean ("the average") of data: @@ -81,6 +82,7 @@ __all__ = [ 'StatisticsError', 'NormalDist', 'pstdev', 'pvariance', 'stdev', 'variance', 'median', 'median_low', 'median_high', 'median_grouped', 'mean', 'mode', 'multimode', 'harmonic_mean', 'fmean', + 'geometric_mean', ] import math @@ -328,6 +330,24 @@ def fmean(data): except ZeroDivisionError: raise StatisticsError('fmean requires at least one data point') from None +def geometric_mean(data): + """Convert data to floats and compute the geometric mean. + + Raises a StatisticsError if the input dataset is empty, + if it contains a zero, or if it contains a negative value. + + No special efforts are made to achieve exact results. + (However, this may change in the future.) + + >>> round(geometric_mean([54, 24, 36]), 9) + 36.0 + """ + try: + return exp(fmean(map(log, data))) + except ValueError: + raise StatisticsError('geometric mean requires a non-empty dataset ' + ' containing positive numbers') from None + def harmonic_mean(data): """Return the harmonic mean of data. diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py index 7f7839de460..4d397eb1265 100644 --- a/Lib/test/test_statistics.py +++ b/Lib/test/test_statistics.py @@ -2038,6 +2038,94 @@ class TestStdev(VarianceStdevMixin, NumericTestCase): expected = math.sqrt(statistics.variance(data)) self.assertEqual(self.func(data), expected) +class TestGeometricMean(unittest.TestCase): + + def test_basics(self): + geometric_mean = statistics.geometric_mean + self.assertAlmostEqual(geometric_mean([54, 24, 36]), 36.0) + self.assertAlmostEqual(geometric_mean([4.0, 9.0]), 6.0) + self.assertAlmostEqual(geometric_mean([17.625]), 17.625) + + random.seed(86753095551212) + for rng in [ + range(1, 100), + range(1, 1_000), + range(1, 10_000), + range(500, 10_000, 3), + range(10_000, 500, -3), + [12, 17, 13, 5, 120, 7], + [random.expovariate(50.0) for i in range(1_000)], + [random.lognormvariate(20.0, 3.0) for i in range(2_000)], + [random.triangular(2000, 3000, 2200) for i in range(3_000)], + ]: + gm_decimal = math.prod(map(Decimal, rng)) ** (Decimal(1) / len(rng)) + gm_float = geometric_mean(rng) + self.assertTrue(math.isclose(gm_float, float(gm_decimal))) + + def test_various_input_types(self): + geometric_mean = statistics.geometric_mean + D = Decimal + F = Fraction + # https://www.wolframalpha.com/input/?i=geometric+mean+3.5,+4.0,+5.25 + expected_mean = 4.18886 + for data, kind in [ + ([3.5, 4.0, 5.25], 'floats'), + ([D('3.5'), D('4.0'), D('5.25')], 'decimals'), + ([F(7, 2), F(4, 1), F(21, 4)], 'fractions'), + ([3.5, 4, F(21, 4)], 'mixed types'), + ((3.5, 4.0, 5.25), 'tuple'), + (iter([3.5, 4.0, 5.25]), 'iterator'), + ]: + actual_mean = geometric_mean(data) + self.assertIs(type(actual_mean), float, kind) + self.assertAlmostEqual(actual_mean, expected_mean, places=5) + + def test_big_and_small(self): + geometric_mean = statistics.geometric_mean + + # Avoid overflow to infinity + large = 2.0 ** 1000 + big_gm = geometric_mean([54.0 * large, 24.0 * large, 36.0 * large]) + self.assertTrue(math.isclose(big_gm, 36.0 * large)) + self.assertFalse(math.isinf(big_gm)) + + # Avoid underflow to zero + small = 2.0 ** -1000 + small_gm = geometric_mean([54.0 * small, 24.0 * small, 36.0 * small]) + self.assertTrue(math.isclose(small_gm, 36.0 * small)) + self.assertNotEqual(small_gm, 0.0) + + def test_error_cases(self): + geometric_mean = statistics.geometric_mean + StatisticsError = statistics.StatisticsError + with self.assertRaises(StatisticsError): + geometric_mean([]) # empty input + with self.assertRaises(StatisticsError): + geometric_mean([3.5, 0.0, 5.25]) # zero input + with self.assertRaises(StatisticsError): + geometric_mean([3.5, -4.0, 5.25]) # negative input + with self.assertRaises(StatisticsError): + geometric_mean(iter([])) # empty iterator + with self.assertRaises(TypeError): + geometric_mean(None) # non-iterable input + with self.assertRaises(TypeError): + geometric_mean([10, None, 20]) # non-numeric input + with self.assertRaises(TypeError): + geometric_mean() # missing data argument + with self.assertRaises(TypeError): + geometric_mean([10, 20, 60], 70) # too many arguments + + def test_special_values(self): + # Rules for special values are inherited from math.fsum() + geometric_mean = statistics.geometric_mean + NaN = float('Nan') + Inf = float('Inf') + self.assertTrue(math.isnan(geometric_mean([10, NaN])), 'nan') + self.assertTrue(math.isnan(geometric_mean([NaN, Inf])), 'nan and infinity') + self.assertTrue(math.isinf(geometric_mean([10, Inf])), 'infinity') + with self.assertRaises(ValueError): + geometric_mean([Inf, -Inf]) + class TestNormalDist(unittest.TestCase): # General note on precision: The pdf(), cdf(), and overlap() methods diff --git a/Misc/NEWS.d/next/Library/2019-03-31-01-18-52.bpo-27181.LVUWcc.rst b/Misc/NEWS.d/next/Library/2019-03-31-01-18-52.bpo-27181.LVUWcc.rst new file mode 100644 index 00000000000..3ce41c55798 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2019-03-31-01-18-52.bpo-27181.LVUWcc.rst @@ -0,0 +1 @@ +Add statistics.geometric_mean().