bpo-38308: Add optional weighting to statistics.harmonic_mean() (GH-23914)
This commit is contained in:
parent
6dd3da3cf4
commit
cc3467a57b
|
@ -156,10 +156,11 @@ However, for reading convenience, most of the examples show sorted sequences.
|
|||
.. versionadded:: 3.8
|
||||
|
||||
|
||||
.. function:: harmonic_mean(data)
|
||||
.. function:: harmonic_mean(data, weights=None)
|
||||
|
||||
Return the harmonic mean of *data*, a sequence or iterable of
|
||||
real-valued numbers.
|
||||
real-valued numbers. If *weights* is omitted or *None*, then
|
||||
equal weighting is assumed.
|
||||
|
||||
The harmonic mean, sometimes called the subcontrary mean, is the
|
||||
reciprocal of the arithmetic :func:`mean` of the reciprocals of the
|
||||
|
@ -179,17 +180,17 @@ However, for reading convenience, most of the examples show sorted sequences.
|
|||
>>> harmonic_mean([40, 60])
|
||||
48.0
|
||||
|
||||
Suppose an investor purchases an equal value of shares in each of
|
||||
three companies, with P/E (price/earning) ratios of 2.5, 3 and 10.
|
||||
What is the average P/E ratio for the investor's portfolio?
|
||||
Suppose a car travels 40 km/hr for 5 km, and when traffic clears,
|
||||
speeds-up to 60 km/hr for the remaining 30 km of the journey. What
|
||||
is the average speed?
|
||||
|
||||
.. doctest::
|
||||
|
||||
>>> harmonic_mean([2.5, 3, 10]) # For an equal investment portfolio.
|
||||
3.6
|
||||
>>> harmonic_mean([40, 60], weights=[5, 30])
|
||||
56.0
|
||||
|
||||
:exc:`StatisticsError` is raised if *data* is empty, or any element
|
||||
is less than zero.
|
||||
:exc:`StatisticsError` is raised if *data* is empty, any element
|
||||
is less than zero, or if the weighted sum isn't positive.
|
||||
|
||||
The current algorithm has an early-out when it encounters a zero
|
||||
in the input. This means that the subsequent inputs are not tested
|
||||
|
@ -197,6 +198,8 @@ However, for reading convenience, most of the examples show sorted sequences.
|
|||
|
||||
.. versionadded:: 3.6
|
||||
|
||||
.. versionchanged:: 3.8
|
||||
Added support for *weights*.
|
||||
|
||||
.. function:: median(data)
|
||||
|
||||
|
|
|
@ -106,7 +106,7 @@ import random
|
|||
|
||||
from fractions import Fraction
|
||||
from decimal import Decimal
|
||||
from itertools import groupby
|
||||
from itertools import groupby, repeat
|
||||
from bisect import bisect_left, bisect_right
|
||||
from math import hypot, sqrt, fabs, exp, erf, tau, log, fsum
|
||||
from operator import itemgetter
|
||||
|
@ -364,37 +364,37 @@ def geometric_mean(data):
|
|||
' containing positive numbers') from None
|
||||
|
||||
|
||||
def harmonic_mean(data):
|
||||
def harmonic_mean(data, weights=None):
|
||||
"""Return the harmonic mean of data.
|
||||
|
||||
The harmonic mean, sometimes called the subcontrary mean, is the
|
||||
reciprocal of the arithmetic mean of the reciprocals of the data,
|
||||
and is often appropriate when averaging quantities which are rates
|
||||
or ratios, for example speeds. Example:
|
||||
or ratios, for example speeds.
|
||||
|
||||
Suppose an investor purchases an equal value of shares in each of
|
||||
three companies, with P/E (price/earning) ratios of 2.5, 3 and 10.
|
||||
What is the average P/E ratio for the investor's portfolio?
|
||||
Suppose a car travels 40 km/hr for 5 km and then speeds-up to
|
||||
60 km/hr for another 5 km. What is the average speed?
|
||||
|
||||
>>> harmonic_mean([2.5, 3, 10]) # For an equal investment portfolio.
|
||||
3.6
|
||||
>>> harmonic_mean([40, 60])
|
||||
48.0
|
||||
|
||||
Using the arithmetic mean would give an average of about 5.167, which
|
||||
is too high.
|
||||
Suppose a car travels 40 km/hr for 5 km, and when traffic clears,
|
||||
speeds-up to 60 km/hr for the remaining 30 km of the journey. What
|
||||
is the average speed?
|
||||
|
||||
>>> harmonic_mean([40, 60], weights=[5, 30])
|
||||
56.0
|
||||
|
||||
If ``data`` is empty, or any element is less than zero,
|
||||
``harmonic_mean`` will raise ``StatisticsError``.
|
||||
"""
|
||||
# For a justification for using harmonic mean for P/E ratios, see
|
||||
# http://fixthepitch.pellucid.com/comps-analysis-the-missing-harmony-of-summary-statistics/
|
||||
# http://papers.ssrn.com/sol3/papers.cfm?abstract_id=2621087
|
||||
if iter(data) is data:
|
||||
data = list(data)
|
||||
errmsg = 'harmonic mean does not support negative values'
|
||||
n = len(data)
|
||||
if n < 1:
|
||||
raise StatisticsError('harmonic_mean requires at least one data point')
|
||||
elif n == 1:
|
||||
elif n == 1 and weights is None:
|
||||
x = data[0]
|
||||
if isinstance(x, (numbers.Real, Decimal)):
|
||||
if x < 0:
|
||||
|
@ -402,13 +402,23 @@ def harmonic_mean(data):
|
|||
return x
|
||||
else:
|
||||
raise TypeError('unsupported type')
|
||||
if weights is None:
|
||||
weights = repeat(1, n)
|
||||
sum_weights = n
|
||||
else:
|
||||
if iter(weights) is weights:
|
||||
weights = list(weights)
|
||||
if len(weights) != n:
|
||||
raise StatisticsError('Number of weights does not match data size')
|
||||
_, sum_weights, _ = _sum(w for w in _fail_neg(weights, errmsg))
|
||||
try:
|
||||
T, total, count = _sum(1 / x for x in _fail_neg(data, errmsg))
|
||||
data = _fail_neg(data, errmsg)
|
||||
T, total, count = _sum(w / x if w else 0 for w, x in zip(weights, data))
|
||||
except ZeroDivisionError:
|
||||
return 0
|
||||
assert count == n
|
||||
return _convert(n / total, T)
|
||||
|
||||
if total <= 0:
|
||||
raise StatisticsError('Weighted sum must be positive')
|
||||
return _convert(sum_weights / total, T)
|
||||
|
||||
# FIXME: investigate ways to calculate medians without sorting? Quickselect?
|
||||
def median(data):
|
||||
|
|
|
@ -1599,6 +1599,27 @@ class TestHarmonicMean(NumericTestCase, AverageMixin, UnivariateTypeMixin):
|
|||
actual = self.func(data*2)
|
||||
self.assertApproxEqual(actual, expected)
|
||||
|
||||
def test_with_weights(self):
|
||||
self.assertEqual(self.func([40, 60], [5, 30]), 56.0) # common case
|
||||
self.assertEqual(self.func([40, 60],
|
||||
weights=[5, 30]), 56.0) # keyword argument
|
||||
self.assertEqual(self.func(iter([40, 60]),
|
||||
iter([5, 30])), 56.0) # iterator inputs
|
||||
self.assertEqual(
|
||||
self.func([Fraction(10, 3), Fraction(23, 5), Fraction(7, 2)], [5, 2, 10]),
|
||||
self.func([Fraction(10, 3)] * 5 +
|
||||
[Fraction(23, 5)] * 2 +
|
||||
[Fraction(7, 2)] * 10))
|
||||
self.assertEqual(self.func([10], [7]), 10) # n=1 fast path
|
||||
with self.assertRaises(TypeError):
|
||||
self.func([1, 2, 3], [1, (), 3]) # non-numeric weight
|
||||
with self.assertRaises(statistics.StatisticsError):
|
||||
self.func([1, 2, 3], [1, 2]) # wrong number of weights
|
||||
with self.assertRaises(statistics.StatisticsError):
|
||||
self.func([10], [0]) # no non-zero weights
|
||||
with self.assertRaises(statistics.StatisticsError):
|
||||
self.func([10, 20], [0, 0]) # no non-zero weights
|
||||
|
||||
|
||||
class TestMedian(NumericTestCase, AverageMixin):
|
||||
# Common tests for median and all median.* functions.
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Add optional *weights* to *statistics.harmonic_mean()*.
|
Loading…
Reference in New Issue