bpo-38308: Add optional weighting to statistics.harmonic_mean() (GH-23914)
This commit is contained in:
parent
6dd3da3cf4
commit
cc3467a57b
|
@ -156,10 +156,11 @@ However, for reading convenience, most of the examples show sorted sequences.
|
||||||
.. versionadded:: 3.8
|
.. versionadded:: 3.8
|
||||||
|
|
||||||
|
|
||||||
.. function:: harmonic_mean(data)
|
.. function:: harmonic_mean(data, weights=None)
|
||||||
|
|
||||||
Return the harmonic mean of *data*, a sequence or iterable of
|
Return the harmonic mean of *data*, a sequence or iterable of
|
||||||
real-valued numbers.
|
real-valued numbers. If *weights* is omitted or *None*, then
|
||||||
|
equal weighting is assumed.
|
||||||
|
|
||||||
The harmonic mean, sometimes called the subcontrary mean, is the
|
The harmonic mean, sometimes called the subcontrary mean, is the
|
||||||
reciprocal of the arithmetic :func:`mean` of the reciprocals of the
|
reciprocal of the arithmetic :func:`mean` of the reciprocals of the
|
||||||
|
@ -179,17 +180,17 @@ However, for reading convenience, most of the examples show sorted sequences.
|
||||||
>>> harmonic_mean([40, 60])
|
>>> harmonic_mean([40, 60])
|
||||||
48.0
|
48.0
|
||||||
|
|
||||||
Suppose an investor purchases an equal value of shares in each of
|
Suppose a car travels 40 km/hr for 5 km, and when traffic clears,
|
||||||
three companies, with P/E (price/earning) ratios of 2.5, 3 and 10.
|
speeds-up to 60 km/hr for the remaining 30 km of the journey. What
|
||||||
What is the average P/E ratio for the investor's portfolio?
|
is the average speed?
|
||||||
|
|
||||||
.. doctest::
|
.. doctest::
|
||||||
|
|
||||||
>>> harmonic_mean([2.5, 3, 10]) # For an equal investment portfolio.
|
>>> harmonic_mean([40, 60], weights=[5, 30])
|
||||||
3.6
|
56.0
|
||||||
|
|
||||||
:exc:`StatisticsError` is raised if *data* is empty, or any element
|
:exc:`StatisticsError` is raised if *data* is empty, any element
|
||||||
is less than zero.
|
is less than zero, or if the weighted sum isn't positive.
|
||||||
|
|
||||||
The current algorithm has an early-out when it encounters a zero
|
The current algorithm has an early-out when it encounters a zero
|
||||||
in the input. This means that the subsequent inputs are not tested
|
in the input. This means that the subsequent inputs are not tested
|
||||||
|
@ -197,6 +198,8 @@ However, for reading convenience, most of the examples show sorted sequences.
|
||||||
|
|
||||||
.. versionadded:: 3.6
|
.. versionadded:: 3.6
|
||||||
|
|
||||||
|
.. versionchanged:: 3.8
|
||||||
|
Added support for *weights*.
|
||||||
|
|
||||||
.. function:: median(data)
|
.. function:: median(data)
|
||||||
|
|
||||||
|
|
|
@ -106,7 +106,7 @@ import random
|
||||||
|
|
||||||
from fractions import Fraction
|
from fractions import Fraction
|
||||||
from decimal import Decimal
|
from decimal import Decimal
|
||||||
from itertools import groupby
|
from itertools import groupby, repeat
|
||||||
from bisect import bisect_left, bisect_right
|
from bisect import bisect_left, bisect_right
|
||||||
from math import hypot, sqrt, fabs, exp, erf, tau, log, fsum
|
from math import hypot, sqrt, fabs, exp, erf, tau, log, fsum
|
||||||
from operator import itemgetter
|
from operator import itemgetter
|
||||||
|
@ -364,37 +364,37 @@ def geometric_mean(data):
|
||||||
' containing positive numbers') from None
|
' containing positive numbers') from None
|
||||||
|
|
||||||
|
|
||||||
def harmonic_mean(data):
|
def harmonic_mean(data, weights=None):
|
||||||
"""Return the harmonic mean of data.
|
"""Return the harmonic mean of data.
|
||||||
|
|
||||||
The harmonic mean, sometimes called the subcontrary mean, is the
|
The harmonic mean, sometimes called the subcontrary mean, is the
|
||||||
reciprocal of the arithmetic mean of the reciprocals of the data,
|
reciprocal of the arithmetic mean of the reciprocals of the data,
|
||||||
and is often appropriate when averaging quantities which are rates
|
and is often appropriate when averaging quantities which are rates
|
||||||
or ratios, for example speeds. Example:
|
or ratios, for example speeds.
|
||||||
|
|
||||||
Suppose an investor purchases an equal value of shares in each of
|
Suppose a car travels 40 km/hr for 5 km and then speeds-up to
|
||||||
three companies, with P/E (price/earning) ratios of 2.5, 3 and 10.
|
60 km/hr for another 5 km. What is the average speed?
|
||||||
What is the average P/E ratio for the investor's portfolio?
|
|
||||||
|
|
||||||
>>> harmonic_mean([2.5, 3, 10]) # For an equal investment portfolio.
|
>>> harmonic_mean([40, 60])
|
||||||
3.6
|
48.0
|
||||||
|
|
||||||
Using the arithmetic mean would give an average of about 5.167, which
|
Suppose a car travels 40 km/hr for 5 km, and when traffic clears,
|
||||||
is too high.
|
speeds-up to 60 km/hr for the remaining 30 km of the journey. What
|
||||||
|
is the average speed?
|
||||||
|
|
||||||
|
>>> harmonic_mean([40, 60], weights=[5, 30])
|
||||||
|
56.0
|
||||||
|
|
||||||
If ``data`` is empty, or any element is less than zero,
|
If ``data`` is empty, or any element is less than zero,
|
||||||
``harmonic_mean`` will raise ``StatisticsError``.
|
``harmonic_mean`` will raise ``StatisticsError``.
|
||||||
"""
|
"""
|
||||||
# For a justification for using harmonic mean for P/E ratios, see
|
|
||||||
# http://fixthepitch.pellucid.com/comps-analysis-the-missing-harmony-of-summary-statistics/
|
|
||||||
# http://papers.ssrn.com/sol3/papers.cfm?abstract_id=2621087
|
|
||||||
if iter(data) is data:
|
if iter(data) is data:
|
||||||
data = list(data)
|
data = list(data)
|
||||||
errmsg = 'harmonic mean does not support negative values'
|
errmsg = 'harmonic mean does not support negative values'
|
||||||
n = len(data)
|
n = len(data)
|
||||||
if n < 1:
|
if n < 1:
|
||||||
raise StatisticsError('harmonic_mean requires at least one data point')
|
raise StatisticsError('harmonic_mean requires at least one data point')
|
||||||
elif n == 1:
|
elif n == 1 and weights is None:
|
||||||
x = data[0]
|
x = data[0]
|
||||||
if isinstance(x, (numbers.Real, Decimal)):
|
if isinstance(x, (numbers.Real, Decimal)):
|
||||||
if x < 0:
|
if x < 0:
|
||||||
|
@ -402,13 +402,23 @@ def harmonic_mean(data):
|
||||||
return x
|
return x
|
||||||
else:
|
else:
|
||||||
raise TypeError('unsupported type')
|
raise TypeError('unsupported type')
|
||||||
|
if weights is None:
|
||||||
|
weights = repeat(1, n)
|
||||||
|
sum_weights = n
|
||||||
|
else:
|
||||||
|
if iter(weights) is weights:
|
||||||
|
weights = list(weights)
|
||||||
|
if len(weights) != n:
|
||||||
|
raise StatisticsError('Number of weights does not match data size')
|
||||||
|
_, sum_weights, _ = _sum(w for w in _fail_neg(weights, errmsg))
|
||||||
try:
|
try:
|
||||||
T, total, count = _sum(1 / x for x in _fail_neg(data, errmsg))
|
data = _fail_neg(data, errmsg)
|
||||||
|
T, total, count = _sum(w / x if w else 0 for w, x in zip(weights, data))
|
||||||
except ZeroDivisionError:
|
except ZeroDivisionError:
|
||||||
return 0
|
return 0
|
||||||
assert count == n
|
if total <= 0:
|
||||||
return _convert(n / total, T)
|
raise StatisticsError('Weighted sum must be positive')
|
||||||
|
return _convert(sum_weights / total, T)
|
||||||
|
|
||||||
# FIXME: investigate ways to calculate medians without sorting? Quickselect?
|
# FIXME: investigate ways to calculate medians without sorting? Quickselect?
|
||||||
def median(data):
|
def median(data):
|
||||||
|
|
|
@ -1599,6 +1599,27 @@ class TestHarmonicMean(NumericTestCase, AverageMixin, UnivariateTypeMixin):
|
||||||
actual = self.func(data*2)
|
actual = self.func(data*2)
|
||||||
self.assertApproxEqual(actual, expected)
|
self.assertApproxEqual(actual, expected)
|
||||||
|
|
||||||
|
def test_with_weights(self):
|
||||||
|
self.assertEqual(self.func([40, 60], [5, 30]), 56.0) # common case
|
||||||
|
self.assertEqual(self.func([40, 60],
|
||||||
|
weights=[5, 30]), 56.0) # keyword argument
|
||||||
|
self.assertEqual(self.func(iter([40, 60]),
|
||||||
|
iter([5, 30])), 56.0) # iterator inputs
|
||||||
|
self.assertEqual(
|
||||||
|
self.func([Fraction(10, 3), Fraction(23, 5), Fraction(7, 2)], [5, 2, 10]),
|
||||||
|
self.func([Fraction(10, 3)] * 5 +
|
||||||
|
[Fraction(23, 5)] * 2 +
|
||||||
|
[Fraction(7, 2)] * 10))
|
||||||
|
self.assertEqual(self.func([10], [7]), 10) # n=1 fast path
|
||||||
|
with self.assertRaises(TypeError):
|
||||||
|
self.func([1, 2, 3], [1, (), 3]) # non-numeric weight
|
||||||
|
with self.assertRaises(statistics.StatisticsError):
|
||||||
|
self.func([1, 2, 3], [1, 2]) # wrong number of weights
|
||||||
|
with self.assertRaises(statistics.StatisticsError):
|
||||||
|
self.func([10], [0]) # no non-zero weights
|
||||||
|
with self.assertRaises(statistics.StatisticsError):
|
||||||
|
self.func([10, 20], [0, 0]) # no non-zero weights
|
||||||
|
|
||||||
|
|
||||||
class TestMedian(NumericTestCase, AverageMixin):
|
class TestMedian(NumericTestCase, AverageMixin):
|
||||||
# Common tests for median and all median.* functions.
|
# Common tests for median and all median.* functions.
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
Add optional *weights* to *statistics.harmonic_mean()*.
|
Loading…
Reference in New Issue