bpo-27181: Add statistics.geometric_mean() (GH-12638)
This commit is contained in:
parent
9d7b2c0909
commit
6463ba3061
|
@ -40,6 +40,7 @@ or sample.
|
|||
======================= ===============================================================
|
||||
:func:`mean` Arithmetic mean ("average") of data.
|
||||
:func:`fmean` Fast, floating point arithmetic mean.
|
||||
:func:`geometric_mean` Geometric mean of data.
|
||||
:func:`harmonic_mean` Harmonic mean of data.
|
||||
:func:`median` Median (middle value) of data.
|
||||
:func:`median_low` Low median of data.
|
||||
|
@ -130,6 +131,24 @@ However, for reading convenience, most of the examples show sorted sequences.
|
|||
.. versionadded:: 3.8
|
||||
|
||||
|
||||
.. function:: geometric_mean(data)
|
||||
|
||||
Convert *data* to floats and compute the geometric mean.
|
||||
|
||||
Raises a :exc:`StatisticsError` if the input dataset is empty,
|
||||
if it contains a zero, or if it contains a negative value.
|
||||
|
||||
No special efforts are made to achieve exact results.
|
||||
(However, this may change in the future.)
|
||||
|
||||
.. doctest::
|
||||
|
||||
>>> round(geometric_mean([54, 24, 36]), 9)
|
||||
36.0
|
||||
|
||||
.. versionadded:: 3.8
|
||||
|
||||
|
||||
.. function:: harmonic_mean(data)
|
||||
|
||||
Return the harmonic mean of *data*, a sequence or iterator of
|
||||
|
|
|
@ -322,6 +322,9 @@ Added :func:`statistics.fmean` as a faster, floating point variant of
|
|||
:func:`statistics.mean()`. (Contributed by Raymond Hettinger and
|
||||
Steven D'Aprano in :issue:`35904`.)
|
||||
|
||||
Added :func:`statistics.geometric_mean()`
|
||||
(Contributed by Raymond Hettinger in :issue:`27181`.)
|
||||
|
||||
Added :func:`statistics.multimode` that returns a list of the most
|
||||
common values. (Contributed by Raymond Hettinger in :issue:`35892`.)
|
||||
|
||||
|
|
|
@ -11,13 +11,14 @@ Calculating averages
|
|||
Function Description
|
||||
================== =============================================
|
||||
mean Arithmetic mean (average) of data.
|
||||
geometric_mean Geometric mean of data.
|
||||
harmonic_mean Harmonic mean of data.
|
||||
median Median (middle value) of data.
|
||||
median_low Low median of data.
|
||||
median_high High median of data.
|
||||
median_grouped Median, or 50th percentile, of grouped data.
|
||||
mode Mode (most common value) of data.
|
||||
multimode List of modes (most common values of data)
|
||||
multimode List of modes (most common values of data).
|
||||
================== =============================================
|
||||
|
||||
Calculate the arithmetic mean ("the average") of data:
|
||||
|
@ -81,6 +82,7 @@ __all__ = [ 'StatisticsError', 'NormalDist',
|
|||
'pstdev', 'pvariance', 'stdev', 'variance',
|
||||
'median', 'median_low', 'median_high', 'median_grouped',
|
||||
'mean', 'mode', 'multimode', 'harmonic_mean', 'fmean',
|
||||
'geometric_mean',
|
||||
]
|
||||
|
||||
import math
|
||||
|
@ -328,6 +330,24 @@ def fmean(data):
|
|||
except ZeroDivisionError:
|
||||
raise StatisticsError('fmean requires at least one data point') from None
|
||||
|
||||
def geometric_mean(data):
|
||||
"""Convert data to floats and compute the geometric mean.
|
||||
|
||||
Raises a StatisticsError if the input dataset is empty,
|
||||
if it contains a zero, or if it contains a negative value.
|
||||
|
||||
No special efforts are made to achieve exact results.
|
||||
(However, this may change in the future.)
|
||||
|
||||
>>> round(geometric_mean([54, 24, 36]), 9)
|
||||
36.0
|
||||
"""
|
||||
try:
|
||||
return exp(fmean(map(log, data)))
|
||||
except ValueError:
|
||||
raise StatisticsError('geometric mean requires a non-empty dataset '
|
||||
' containing positive numbers') from None
|
||||
|
||||
def harmonic_mean(data):
|
||||
"""Return the harmonic mean of data.
|
||||
|
||||
|
|
|
@ -2038,6 +2038,94 @@ class TestStdev(VarianceStdevMixin, NumericTestCase):
|
|||
expected = math.sqrt(statistics.variance(data))
|
||||
self.assertEqual(self.func(data), expected)
|
||||
|
||||
class TestGeometricMean(unittest.TestCase):
|
||||
|
||||
def test_basics(self):
|
||||
geometric_mean = statistics.geometric_mean
|
||||
self.assertAlmostEqual(geometric_mean([54, 24, 36]), 36.0)
|
||||
self.assertAlmostEqual(geometric_mean([4.0, 9.0]), 6.0)
|
||||
self.assertAlmostEqual(geometric_mean([17.625]), 17.625)
|
||||
|
||||
random.seed(86753095551212)
|
||||
for rng in [
|
||||
range(1, 100),
|
||||
range(1, 1_000),
|
||||
range(1, 10_000),
|
||||
range(500, 10_000, 3),
|
||||
range(10_000, 500, -3),
|
||||
[12, 17, 13, 5, 120, 7],
|
||||
[random.expovariate(50.0) for i in range(1_000)],
|
||||
[random.lognormvariate(20.0, 3.0) for i in range(2_000)],
|
||||
[random.triangular(2000, 3000, 2200) for i in range(3_000)],
|
||||
]:
|
||||
gm_decimal = math.prod(map(Decimal, rng)) ** (Decimal(1) / len(rng))
|
||||
gm_float = geometric_mean(rng)
|
||||
self.assertTrue(math.isclose(gm_float, float(gm_decimal)))
|
||||
|
||||
def test_various_input_types(self):
|
||||
geometric_mean = statistics.geometric_mean
|
||||
D = Decimal
|
||||
F = Fraction
|
||||
# https://www.wolframalpha.com/input/?i=geometric+mean+3.5,+4.0,+5.25
|
||||
expected_mean = 4.18886
|
||||
for data, kind in [
|
||||
([3.5, 4.0, 5.25], 'floats'),
|
||||
([D('3.5'), D('4.0'), D('5.25')], 'decimals'),
|
||||
([F(7, 2), F(4, 1), F(21, 4)], 'fractions'),
|
||||
([3.5, 4, F(21, 4)], 'mixed types'),
|
||||
((3.5, 4.0, 5.25), 'tuple'),
|
||||
(iter([3.5, 4.0, 5.25]), 'iterator'),
|
||||
]:
|
||||
actual_mean = geometric_mean(data)
|
||||
self.assertIs(type(actual_mean), float, kind)
|
||||
self.assertAlmostEqual(actual_mean, expected_mean, places=5)
|
||||
|
||||
def test_big_and_small(self):
|
||||
geometric_mean = statistics.geometric_mean
|
||||
|
||||
# Avoid overflow to infinity
|
||||
large = 2.0 ** 1000
|
||||
big_gm = geometric_mean([54.0 * large, 24.0 * large, 36.0 * large])
|
||||
self.assertTrue(math.isclose(big_gm, 36.0 * large))
|
||||
self.assertFalse(math.isinf(big_gm))
|
||||
|
||||
# Avoid underflow to zero
|
||||
small = 2.0 ** -1000
|
||||
small_gm = geometric_mean([54.0 * small, 24.0 * small, 36.0 * small])
|
||||
self.assertTrue(math.isclose(small_gm, 36.0 * small))
|
||||
self.assertNotEqual(small_gm, 0.0)
|
||||
|
||||
def test_error_cases(self):
|
||||
geometric_mean = statistics.geometric_mean
|
||||
StatisticsError = statistics.StatisticsError
|
||||
with self.assertRaises(StatisticsError):
|
||||
geometric_mean([]) # empty input
|
||||
with self.assertRaises(StatisticsError):
|
||||
geometric_mean([3.5, 0.0, 5.25]) # zero input
|
||||
with self.assertRaises(StatisticsError):
|
||||
geometric_mean([3.5, -4.0, 5.25]) # negative input
|
||||
with self.assertRaises(StatisticsError):
|
||||
geometric_mean(iter([])) # empty iterator
|
||||
with self.assertRaises(TypeError):
|
||||
geometric_mean(None) # non-iterable input
|
||||
with self.assertRaises(TypeError):
|
||||
geometric_mean([10, None, 20]) # non-numeric input
|
||||
with self.assertRaises(TypeError):
|
||||
geometric_mean() # missing data argument
|
||||
with self.assertRaises(TypeError):
|
||||
geometric_mean([10, 20, 60], 70) # too many arguments
|
||||
|
||||
def test_special_values(self):
|
||||
# Rules for special values are inherited from math.fsum()
|
||||
geometric_mean = statistics.geometric_mean
|
||||
NaN = float('Nan')
|
||||
Inf = float('Inf')
|
||||
self.assertTrue(math.isnan(geometric_mean([10, NaN])), 'nan')
|
||||
self.assertTrue(math.isnan(geometric_mean([NaN, Inf])), 'nan and infinity')
|
||||
self.assertTrue(math.isinf(geometric_mean([10, Inf])), 'infinity')
|
||||
with self.assertRaises(ValueError):
|
||||
geometric_mean([Inf, -Inf])
|
||||
|
||||
class TestNormalDist(unittest.TestCase):
|
||||
|
||||
# General note on precision: The pdf(), cdf(), and overlap() methods
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Add statistics.geometric_mean().
|
Loading…
Reference in New Issue