Minor whitespace, indentation, and quoting changes to improve internal consistency and appease linters (GH-14888)

This commit is contained in:
Raymond Hettinger 2019-07-21 12:13:07 -07:00 committed by GitHub
parent 22f0483d44
commit 1c0e9bb94b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 74 additions and 53 deletions

View File

@ -80,12 +80,25 @@ A single exception is defined: StatisticsError is a subclass of ValueError.
"""
__all__ = [ 'StatisticsError', 'NormalDist', 'quantiles',
'pstdev', 'pvariance', 'stdev', 'variance',
'median', 'median_low', 'median_high', 'median_grouped',
'mean', 'mode', 'multimode', 'harmonic_mean', 'fmean',
'geometric_mean',
]
__all__ = [
'NormalDist',
'StatisticsError',
'fmean',
'geometric_mean',
'harmonic_mean',
'mean',
'median',
'median_grouped',
'median_high',
'median_low',
'mode',
'multimode',
'pstdev',
'pvariance',
'quantiles',
'stdev',
'variance',
]
import math
import numbers
@ -304,8 +317,9 @@ def mean(data):
assert count == n
return _convert(total/n, T)
def fmean(data):
""" Convert data to floats and compute the arithmetic mean.
"""Convert data to floats and compute the arithmetic mean.
This runs faster than the mean() function and it always returns a float.
The result is highly accurate but not as perfect as mean().
@ -313,7 +327,6 @@ def fmean(data):
>>> fmean([3.5, 4.0, 5.25])
4.25
"""
try:
n = len(data)
@ -332,6 +345,7 @@ def fmean(data):
except ZeroDivisionError:
raise StatisticsError('fmean requires at least one data point') from None
def geometric_mean(data):
"""Convert data to floats and compute the geometric mean.
@ -350,6 +364,7 @@ def geometric_mean(data):
raise StatisticsError('geometric mean requires a non-empty dataset '
' containing positive numbers') from None
def harmonic_mean(data):
"""Return the harmonic mean of data.
@ -547,23 +562,23 @@ def mode(data):
def multimode(data):
""" Return a list of the most frequently occurring values.
"""Return a list of the most frequently occurring values.
Will return more than one result if there are multiple modes
or an empty list if *data* is empty.
>>> multimode('aabbbbbbbbcc')
['b']
>>> multimode('aabbbbccddddeeffffgg')
['b', 'd', 'f']
>>> multimode('')
[]
Will return more than one result if there are multiple modes
or an empty list if *data* is empty.
>>> multimode('aabbbbbbbbcc')
['b']
>>> multimode('aabbbbccddddeeffffgg')
['b', 'd', 'f']
>>> multimode('')
[]
"""
counts = Counter(iter(data)).most_common()
maxcount, mode_items = next(groupby(counts, key=itemgetter(1)), (0, []))
return list(map(itemgetter(0), mode_items))
# Notes on methods for computing quantiles
# ----------------------------------------
#
@ -601,7 +616,7 @@ def multimode(data):
# external packages can be used for anything more advanced.
def quantiles(dist, /, *, n=4, method='exclusive'):
'''Divide *dist* into *n* continuous intervals with equal probability.
"""Divide *dist* into *n* continuous intervals with equal probability.
Returns a list of (n - 1) cut points separating the intervals.
@ -616,7 +631,7 @@ def quantiles(dist, /, *, n=4, method='exclusive'):
If *method* is set to *inclusive*, *dist* is treated as population
data. The minimum value is treated as the 0th percentile and the
maximum value is treated as the 100th percentile.
'''
"""
if n < 1:
raise StatisticsError('n must be at least 1')
if hasattr(dist, 'inv_cdf'):
@ -646,6 +661,7 @@ def quantiles(dist, /, *, n=4, method='exclusive'):
return result
raise ValueError(f'Unknown method: {method!r}')
# === Measures of spread ===
# See http://mathworld.wolfram.com/Variance.html
@ -805,18 +821,21 @@ def pstdev(data, mu=None):
except AttributeError:
return math.sqrt(var)
## Normal Distribution #####################################################
class NormalDist:
'Normal distribution of a random variable'
"Normal distribution of a random variable"
# https://en.wikipedia.org/wiki/Normal_distribution
# https://en.wikipedia.org/wiki/Variance#Properties
__slots__ = {'_mu': 'Arithmetic mean of a normal distribution',
'_sigma': 'Standard deviation of a normal distribution'}
__slots__ = {
'_mu': 'Arithmetic mean of a normal distribution',
'_sigma': 'Standard deviation of a normal distribution',
}
def __init__(self, mu=0.0, sigma=1.0):
'NormalDist where mu is the mean and sigma is the standard deviation.'
"NormalDist where mu is the mean and sigma is the standard deviation."
if sigma < 0.0:
raise StatisticsError('sigma must be non-negative')
self._mu = mu
@ -824,40 +843,42 @@ class NormalDist:
@classmethod
def from_samples(cls, data):
'Make a normal distribution instance from sample data.'
"Make a normal distribution instance from sample data."
if not isinstance(data, (list, tuple)):
data = list(data)
xbar = fmean(data)
return cls(xbar, stdev(data, xbar))
def samples(self, n, *, seed=None):
'Generate *n* samples for a given mean and standard deviation.'
"Generate *n* samples for a given mean and standard deviation."
gauss = random.gauss if seed is None else random.Random(seed).gauss
mu, sigma = self._mu, self._sigma
return [gauss(mu, sigma) for i in range(n)]
def pdf(self, x):
'Probability density function. P(x <= X < x+dx) / dx'
"Probability density function. P(x <= X < x+dx) / dx"
variance = self._sigma ** 2.0
if not variance:
raise StatisticsError('pdf() not defined when sigma is zero')
return exp((x - self._mu)**2.0 / (-2.0*variance)) / sqrt(tau * variance)
return exp((x - self._mu)**2.0 / (-2.0*variance)) / sqrt(tau*variance)
def cdf(self, x):
'Cumulative distribution function. P(X <= x)'
"Cumulative distribution function. P(X <= x)"
if not self._sigma:
raise StatisticsError('cdf() not defined when sigma is zero')
return 0.5 * (1.0 + erf((x - self._mu) / (self._sigma * sqrt(2.0))))
def inv_cdf(self, p):
'''Inverse cumulative distribution function. x : P(X <= x) = p
"""Inverse cumulative distribution function. x : P(X <= x) = p
Finds the value of the random variable such that the probability of the
variable being less than or equal to that value equals the given probability.
Finds the value of the random variable such that the probability of
the variable being less than or equal to that value equals the given
probability.
This function is also called the percent point function or quantile function.
'''
if (p <= 0.0 or p >= 1.0):
This function is also called the percent point function or quantile
function.
"""
if p <= 0.0 or p >= 1.0:
raise StatisticsError('p must be in the range 0.0 < p < 1.0')
if self._sigma <= 0.0:
raise StatisticsError('cdf() not defined when sigma at or below zero')
@ -933,7 +954,7 @@ class NormalDist:
return self._mu + (x * self._sigma)
def overlap(self, other):
'''Compute the overlapping coefficient (OVL) between two normal distributions.
"""Compute the overlapping coefficient (OVL) between two normal distributions.
Measures the agreement between two normal probability distributions.
Returns a value between 0.0 and 1.0 giving the overlapping area in
@ -943,7 +964,7 @@ class NormalDist:
>>> N2 = NormalDist(3.2, 2.0)
>>> N1.overlap(N2)
0.8035050657330205
'''
"""
# See: "The overlapping coefficient as a measure of agreement between
# probability distributions and point estimation of the overlap of two
# normal densities" -- Henry F. Inman and Edwin L. Bradley Jr
@ -968,21 +989,21 @@ class NormalDist:
@property
def mean(self):
'Arithmetic mean of the normal distribution.'
"Arithmetic mean of the normal distribution."
return self._mu
@property
def stdev(self):
'Standard deviation of the normal distribution.'
"Standard deviation of the normal distribution."
return self._sigma
@property
def variance(self):
'Square of the standard deviation.'
"Square of the standard deviation."
return self._sigma ** 2.0
def __add__(x1, x2):
'''Add a constant or another NormalDist instance.
"""Add a constant or another NormalDist instance.
If *other* is a constant, translate mu by the constant,
leaving sigma unchanged.
@ -990,13 +1011,13 @@ class NormalDist:
If *other* is a NormalDist, add both the means and the variances.
Mathematically, this works only if the two distributions are
independent or if they are jointly normally distributed.
'''
"""
if isinstance(x2, NormalDist):
return NormalDist(x1._mu + x2._mu, hypot(x1._sigma, x2._sigma))
return NormalDist(x1._mu + x2, x1._sigma)
def __sub__(x1, x2):
'''Subtract a constant or another NormalDist instance.
"""Subtract a constant or another NormalDist instance.
If *other* is a constant, translate by the constant mu,
leaving sigma unchanged.
@ -1004,51 +1025,51 @@ class NormalDist:
If *other* is a NormalDist, subtract the means and add the variances.
Mathematically, this works only if the two distributions are
independent or if they are jointly normally distributed.
'''
"""
if isinstance(x2, NormalDist):
return NormalDist(x1._mu - x2._mu, hypot(x1._sigma, x2._sigma))
return NormalDist(x1._mu - x2, x1._sigma)
def __mul__(x1, x2):
'''Multiply both mu and sigma by a constant.
"""Multiply both mu and sigma by a constant.
Used for rescaling, perhaps to change measurement units.
Sigma is scaled with the absolute value of the constant.
'''
"""
return NormalDist(x1._mu * x2, x1._sigma * fabs(x2))
def __truediv__(x1, x2):
'''Divide both mu and sigma by a constant.
"""Divide both mu and sigma by a constant.
Used for rescaling, perhaps to change measurement units.
Sigma is scaled with the absolute value of the constant.
'''
"""
return NormalDist(x1._mu / x2, x1._sigma / fabs(x2))
def __pos__(x1):
'Return a copy of the instance.'
"Return a copy of the instance."
return NormalDist(x1._mu, x1._sigma)
def __neg__(x1):
'Negates mu while keeping sigma the same.'
"Negates mu while keeping sigma the same."
return NormalDist(-x1._mu, x1._sigma)
__radd__ = __add__
def __rsub__(x1, x2):
'Subtract a NormalDist from a constant or another NormalDist.'
"Subtract a NormalDist from a constant or another NormalDist."
return -(x1 - x2)
__rmul__ = __mul__
def __eq__(x1, x2):
'Two NormalDist objects are equal if their mu and sigma are both equal.'
"Two NormalDist objects are equal if their mu and sigma are both equal."
if not isinstance(x2, NormalDist):
return NotImplemented
return (x1._mu, x2._sigma) == (x2._mu, x2._sigma)
def __hash__(self):
'NormalDist objects hash equal if their mu and sigma are both equal.'
"NormalDist objects hash equal if their mu and sigma are both equal."
return hash((self._mu, self._sigma))
def __repr__(self):