Minor whitespace, indentation, and quoting changes to improve internal consistency and appease linters (GH-14888)
This commit is contained in:
parent
22f0483d44
commit
1c0e9bb94b
|
@ -80,12 +80,25 @@ A single exception is defined: StatisticsError is a subclass of ValueError.
|
|||
|
||||
"""
|
||||
|
||||
__all__ = [ 'StatisticsError', 'NormalDist', 'quantiles',
|
||||
'pstdev', 'pvariance', 'stdev', 'variance',
|
||||
'median', 'median_low', 'median_high', 'median_grouped',
|
||||
'mean', 'mode', 'multimode', 'harmonic_mean', 'fmean',
|
||||
'geometric_mean',
|
||||
]
|
||||
__all__ = [
|
||||
'NormalDist',
|
||||
'StatisticsError',
|
||||
'fmean',
|
||||
'geometric_mean',
|
||||
'harmonic_mean',
|
||||
'mean',
|
||||
'median',
|
||||
'median_grouped',
|
||||
'median_high',
|
||||
'median_low',
|
||||
'mode',
|
||||
'multimode',
|
||||
'pstdev',
|
||||
'pvariance',
|
||||
'quantiles',
|
||||
'stdev',
|
||||
'variance',
|
||||
]
|
||||
|
||||
import math
|
||||
import numbers
|
||||
|
@ -304,8 +317,9 @@ def mean(data):
|
|||
assert count == n
|
||||
return _convert(total/n, T)
|
||||
|
||||
|
||||
def fmean(data):
|
||||
""" Convert data to floats and compute the arithmetic mean.
|
||||
"""Convert data to floats and compute the arithmetic mean.
|
||||
|
||||
This runs faster than the mean() function and it always returns a float.
|
||||
The result is highly accurate but not as perfect as mean().
|
||||
|
@ -313,7 +327,6 @@ def fmean(data):
|
|||
|
||||
>>> fmean([3.5, 4.0, 5.25])
|
||||
4.25
|
||||
|
||||
"""
|
||||
try:
|
||||
n = len(data)
|
||||
|
@ -332,6 +345,7 @@ def fmean(data):
|
|||
except ZeroDivisionError:
|
||||
raise StatisticsError('fmean requires at least one data point') from None
|
||||
|
||||
|
||||
def geometric_mean(data):
|
||||
"""Convert data to floats and compute the geometric mean.
|
||||
|
||||
|
@ -350,6 +364,7 @@ def geometric_mean(data):
|
|||
raise StatisticsError('geometric mean requires a non-empty dataset '
|
||||
' containing positive numbers') from None
|
||||
|
||||
|
||||
def harmonic_mean(data):
|
||||
"""Return the harmonic mean of data.
|
||||
|
||||
|
@ -547,23 +562,23 @@ def mode(data):
|
|||
|
||||
|
||||
def multimode(data):
|
||||
""" Return a list of the most frequently occurring values.
|
||||
"""Return a list of the most frequently occurring values.
|
||||
|
||||
Will return more than one result if there are multiple modes
|
||||
or an empty list if *data* is empty.
|
||||
|
||||
>>> multimode('aabbbbbbbbcc')
|
||||
['b']
|
||||
>>> multimode('aabbbbccddddeeffffgg')
|
||||
['b', 'd', 'f']
|
||||
>>> multimode('')
|
||||
[]
|
||||
Will return more than one result if there are multiple modes
|
||||
or an empty list if *data* is empty.
|
||||
|
||||
>>> multimode('aabbbbbbbbcc')
|
||||
['b']
|
||||
>>> multimode('aabbbbccddddeeffffgg')
|
||||
['b', 'd', 'f']
|
||||
>>> multimode('')
|
||||
[]
|
||||
"""
|
||||
counts = Counter(iter(data)).most_common()
|
||||
maxcount, mode_items = next(groupby(counts, key=itemgetter(1)), (0, []))
|
||||
return list(map(itemgetter(0), mode_items))
|
||||
|
||||
|
||||
# Notes on methods for computing quantiles
|
||||
# ----------------------------------------
|
||||
#
|
||||
|
@ -601,7 +616,7 @@ def multimode(data):
|
|||
# external packages can be used for anything more advanced.
|
||||
|
||||
def quantiles(dist, /, *, n=4, method='exclusive'):
|
||||
'''Divide *dist* into *n* continuous intervals with equal probability.
|
||||
"""Divide *dist* into *n* continuous intervals with equal probability.
|
||||
|
||||
Returns a list of (n - 1) cut points separating the intervals.
|
||||
|
||||
|
@ -616,7 +631,7 @@ def quantiles(dist, /, *, n=4, method='exclusive'):
|
|||
If *method* is set to *inclusive*, *dist* is treated as population
|
||||
data. The minimum value is treated as the 0th percentile and the
|
||||
maximum value is treated as the 100th percentile.
|
||||
'''
|
||||
"""
|
||||
if n < 1:
|
||||
raise StatisticsError('n must be at least 1')
|
||||
if hasattr(dist, 'inv_cdf'):
|
||||
|
@ -646,6 +661,7 @@ def quantiles(dist, /, *, n=4, method='exclusive'):
|
|||
return result
|
||||
raise ValueError(f'Unknown method: {method!r}')
|
||||
|
||||
|
||||
# === Measures of spread ===
|
||||
|
||||
# See http://mathworld.wolfram.com/Variance.html
|
||||
|
@ -805,18 +821,21 @@ def pstdev(data, mu=None):
|
|||
except AttributeError:
|
||||
return math.sqrt(var)
|
||||
|
||||
|
||||
## Normal Distribution #####################################################
|
||||
|
||||
class NormalDist:
|
||||
'Normal distribution of a random variable'
|
||||
"Normal distribution of a random variable"
|
||||
# https://en.wikipedia.org/wiki/Normal_distribution
|
||||
# https://en.wikipedia.org/wiki/Variance#Properties
|
||||
|
||||
__slots__ = {'_mu': 'Arithmetic mean of a normal distribution',
|
||||
'_sigma': 'Standard deviation of a normal distribution'}
|
||||
__slots__ = {
|
||||
'_mu': 'Arithmetic mean of a normal distribution',
|
||||
'_sigma': 'Standard deviation of a normal distribution',
|
||||
}
|
||||
|
||||
def __init__(self, mu=0.0, sigma=1.0):
|
||||
'NormalDist where mu is the mean and sigma is the standard deviation.'
|
||||
"NormalDist where mu is the mean and sigma is the standard deviation."
|
||||
if sigma < 0.0:
|
||||
raise StatisticsError('sigma must be non-negative')
|
||||
self._mu = mu
|
||||
|
@ -824,40 +843,42 @@ class NormalDist:
|
|||
|
||||
@classmethod
|
||||
def from_samples(cls, data):
|
||||
'Make a normal distribution instance from sample data.'
|
||||
"Make a normal distribution instance from sample data."
|
||||
if not isinstance(data, (list, tuple)):
|
||||
data = list(data)
|
||||
xbar = fmean(data)
|
||||
return cls(xbar, stdev(data, xbar))
|
||||
|
||||
def samples(self, n, *, seed=None):
|
||||
'Generate *n* samples for a given mean and standard deviation.'
|
||||
"Generate *n* samples for a given mean and standard deviation."
|
||||
gauss = random.gauss if seed is None else random.Random(seed).gauss
|
||||
mu, sigma = self._mu, self._sigma
|
||||
return [gauss(mu, sigma) for i in range(n)]
|
||||
|
||||
def pdf(self, x):
|
||||
'Probability density function. P(x <= X < x+dx) / dx'
|
||||
"Probability density function. P(x <= X < x+dx) / dx"
|
||||
variance = self._sigma ** 2.0
|
||||
if not variance:
|
||||
raise StatisticsError('pdf() not defined when sigma is zero')
|
||||
return exp((x - self._mu)**2.0 / (-2.0*variance)) / sqrt(tau * variance)
|
||||
return exp((x - self._mu)**2.0 / (-2.0*variance)) / sqrt(tau*variance)
|
||||
|
||||
def cdf(self, x):
|
||||
'Cumulative distribution function. P(X <= x)'
|
||||
"Cumulative distribution function. P(X <= x)"
|
||||
if not self._sigma:
|
||||
raise StatisticsError('cdf() not defined when sigma is zero')
|
||||
return 0.5 * (1.0 + erf((x - self._mu) / (self._sigma * sqrt(2.0))))
|
||||
|
||||
def inv_cdf(self, p):
|
||||
'''Inverse cumulative distribution function. x : P(X <= x) = p
|
||||
"""Inverse cumulative distribution function. x : P(X <= x) = p
|
||||
|
||||
Finds the value of the random variable such that the probability of the
|
||||
variable being less than or equal to that value equals the given probability.
|
||||
Finds the value of the random variable such that the probability of
|
||||
the variable being less than or equal to that value equals the given
|
||||
probability.
|
||||
|
||||
This function is also called the percent point function or quantile function.
|
||||
'''
|
||||
if (p <= 0.0 or p >= 1.0):
|
||||
This function is also called the percent point function or quantile
|
||||
function.
|
||||
"""
|
||||
if p <= 0.0 or p >= 1.0:
|
||||
raise StatisticsError('p must be in the range 0.0 < p < 1.0')
|
||||
if self._sigma <= 0.0:
|
||||
raise StatisticsError('cdf() not defined when sigma at or below zero')
|
||||
|
@ -933,7 +954,7 @@ class NormalDist:
|
|||
return self._mu + (x * self._sigma)
|
||||
|
||||
def overlap(self, other):
|
||||
'''Compute the overlapping coefficient (OVL) between two normal distributions.
|
||||
"""Compute the overlapping coefficient (OVL) between two normal distributions.
|
||||
|
||||
Measures the agreement between two normal probability distributions.
|
||||
Returns a value between 0.0 and 1.0 giving the overlapping area in
|
||||
|
@ -943,7 +964,7 @@ class NormalDist:
|
|||
>>> N2 = NormalDist(3.2, 2.0)
|
||||
>>> N1.overlap(N2)
|
||||
0.8035050657330205
|
||||
'''
|
||||
"""
|
||||
# See: "The overlapping coefficient as a measure of agreement between
|
||||
# probability distributions and point estimation of the overlap of two
|
||||
# normal densities" -- Henry F. Inman and Edwin L. Bradley Jr
|
||||
|
@ -968,21 +989,21 @@ class NormalDist:
|
|||
|
||||
@property
|
||||
def mean(self):
|
||||
'Arithmetic mean of the normal distribution.'
|
||||
"Arithmetic mean of the normal distribution."
|
||||
return self._mu
|
||||
|
||||
@property
|
||||
def stdev(self):
|
||||
'Standard deviation of the normal distribution.'
|
||||
"Standard deviation of the normal distribution."
|
||||
return self._sigma
|
||||
|
||||
@property
|
||||
def variance(self):
|
||||
'Square of the standard deviation.'
|
||||
"Square of the standard deviation."
|
||||
return self._sigma ** 2.0
|
||||
|
||||
def __add__(x1, x2):
|
||||
'''Add a constant or another NormalDist instance.
|
||||
"""Add a constant or another NormalDist instance.
|
||||
|
||||
If *other* is a constant, translate mu by the constant,
|
||||
leaving sigma unchanged.
|
||||
|
@ -990,13 +1011,13 @@ class NormalDist:
|
|||
If *other* is a NormalDist, add both the means and the variances.
|
||||
Mathematically, this works only if the two distributions are
|
||||
independent or if they are jointly normally distributed.
|
||||
'''
|
||||
"""
|
||||
if isinstance(x2, NormalDist):
|
||||
return NormalDist(x1._mu + x2._mu, hypot(x1._sigma, x2._sigma))
|
||||
return NormalDist(x1._mu + x2, x1._sigma)
|
||||
|
||||
def __sub__(x1, x2):
|
||||
'''Subtract a constant or another NormalDist instance.
|
||||
"""Subtract a constant or another NormalDist instance.
|
||||
|
||||
If *other* is a constant, translate by the constant mu,
|
||||
leaving sigma unchanged.
|
||||
|
@ -1004,51 +1025,51 @@ class NormalDist:
|
|||
If *other* is a NormalDist, subtract the means and add the variances.
|
||||
Mathematically, this works only if the two distributions are
|
||||
independent or if they are jointly normally distributed.
|
||||
'''
|
||||
"""
|
||||
if isinstance(x2, NormalDist):
|
||||
return NormalDist(x1._mu - x2._mu, hypot(x1._sigma, x2._sigma))
|
||||
return NormalDist(x1._mu - x2, x1._sigma)
|
||||
|
||||
def __mul__(x1, x2):
|
||||
'''Multiply both mu and sigma by a constant.
|
||||
"""Multiply both mu and sigma by a constant.
|
||||
|
||||
Used for rescaling, perhaps to change measurement units.
|
||||
Sigma is scaled with the absolute value of the constant.
|
||||
'''
|
||||
"""
|
||||
return NormalDist(x1._mu * x2, x1._sigma * fabs(x2))
|
||||
|
||||
def __truediv__(x1, x2):
|
||||
'''Divide both mu and sigma by a constant.
|
||||
"""Divide both mu and sigma by a constant.
|
||||
|
||||
Used for rescaling, perhaps to change measurement units.
|
||||
Sigma is scaled with the absolute value of the constant.
|
||||
'''
|
||||
"""
|
||||
return NormalDist(x1._mu / x2, x1._sigma / fabs(x2))
|
||||
|
||||
def __pos__(x1):
|
||||
'Return a copy of the instance.'
|
||||
"Return a copy of the instance."
|
||||
return NormalDist(x1._mu, x1._sigma)
|
||||
|
||||
def __neg__(x1):
|
||||
'Negates mu while keeping sigma the same.'
|
||||
"Negates mu while keeping sigma the same."
|
||||
return NormalDist(-x1._mu, x1._sigma)
|
||||
|
||||
__radd__ = __add__
|
||||
|
||||
def __rsub__(x1, x2):
|
||||
'Subtract a NormalDist from a constant or another NormalDist.'
|
||||
"Subtract a NormalDist from a constant or another NormalDist."
|
||||
return -(x1 - x2)
|
||||
|
||||
__rmul__ = __mul__
|
||||
|
||||
def __eq__(x1, x2):
|
||||
'Two NormalDist objects are equal if their mu and sigma are both equal.'
|
||||
"Two NormalDist objects are equal if their mu and sigma are both equal."
|
||||
if not isinstance(x2, NormalDist):
|
||||
return NotImplemented
|
||||
return (x1._mu, x2._sigma) == (x2._mu, x2._sigma)
|
||||
|
||||
def __hash__(self):
|
||||
'NormalDist objects hash equal if their mu and sigma are both equal.'
|
||||
"NormalDist objects hash equal if their mu and sigma are both equal."
|
||||
return hash((self._mu, self._sigma))
|
||||
|
||||
def __repr__(self):
|
||||
|
|
Loading…
Reference in New Issue