diff --git a/Lib/statistics.py b/Lib/statistics.py index ff07dc4a6b5..8a6be7c7590 100644 --- a/Lib/statistics.py +++ b/Lib/statistics.py @@ -80,12 +80,25 @@ A single exception is defined: StatisticsError is a subclass of ValueError. """ -__all__ = [ 'StatisticsError', 'NormalDist', 'quantiles', - 'pstdev', 'pvariance', 'stdev', 'variance', - 'median', 'median_low', 'median_high', 'median_grouped', - 'mean', 'mode', 'multimode', 'harmonic_mean', 'fmean', - 'geometric_mean', - ] +__all__ = [ + 'NormalDist', + 'StatisticsError', + 'fmean', + 'geometric_mean', + 'harmonic_mean', + 'mean', + 'median', + 'median_grouped', + 'median_high', + 'median_low', + 'mode', + 'multimode', + 'pstdev', + 'pvariance', + 'quantiles', + 'stdev', + 'variance', +] import math import numbers @@ -304,8 +317,9 @@ def mean(data): assert count == n return _convert(total/n, T) + def fmean(data): - """ Convert data to floats and compute the arithmetic mean. + """Convert data to floats and compute the arithmetic mean. This runs faster than the mean() function and it always returns a float. The result is highly accurate but not as perfect as mean(). @@ -313,7 +327,6 @@ def fmean(data): >>> fmean([3.5, 4.0, 5.25]) 4.25 - """ try: n = len(data) @@ -332,6 +345,7 @@ def fmean(data): except ZeroDivisionError: raise StatisticsError('fmean requires at least one data point') from None + def geometric_mean(data): """Convert data to floats and compute the geometric mean. @@ -350,6 +364,7 @@ def geometric_mean(data): raise StatisticsError('geometric mean requires a non-empty dataset ' ' containing positive numbers') from None + def harmonic_mean(data): """Return the harmonic mean of data. @@ -547,23 +562,23 @@ def mode(data): def multimode(data): - """ Return a list of the most frequently occurring values. + """Return a list of the most frequently occurring values. - Will return more than one result if there are multiple modes - or an empty list if *data* is empty. - - >>> multimode('aabbbbbbbbcc') - ['b'] - >>> multimode('aabbbbccddddeeffffgg') - ['b', 'd', 'f'] - >>> multimode('') - [] + Will return more than one result if there are multiple modes + or an empty list if *data* is empty. + >>> multimode('aabbbbbbbbcc') + ['b'] + >>> multimode('aabbbbccddddeeffffgg') + ['b', 'd', 'f'] + >>> multimode('') + [] """ counts = Counter(iter(data)).most_common() maxcount, mode_items = next(groupby(counts, key=itemgetter(1)), (0, [])) return list(map(itemgetter(0), mode_items)) + # Notes on methods for computing quantiles # ---------------------------------------- # @@ -601,7 +616,7 @@ def multimode(data): # external packages can be used for anything more advanced. def quantiles(dist, /, *, n=4, method='exclusive'): - '''Divide *dist* into *n* continuous intervals with equal probability. + """Divide *dist* into *n* continuous intervals with equal probability. Returns a list of (n - 1) cut points separating the intervals. @@ -616,7 +631,7 @@ def quantiles(dist, /, *, n=4, method='exclusive'): If *method* is set to *inclusive*, *dist* is treated as population data. The minimum value is treated as the 0th percentile and the maximum value is treated as the 100th percentile. - ''' + """ if n < 1: raise StatisticsError('n must be at least 1') if hasattr(dist, 'inv_cdf'): @@ -646,6 +661,7 @@ def quantiles(dist, /, *, n=4, method='exclusive'): return result raise ValueError(f'Unknown method: {method!r}') + # === Measures of spread === # See http://mathworld.wolfram.com/Variance.html @@ -805,18 +821,21 @@ def pstdev(data, mu=None): except AttributeError: return math.sqrt(var) + ## Normal Distribution ##################################################### class NormalDist: - 'Normal distribution of a random variable' + "Normal distribution of a random variable" # https://en.wikipedia.org/wiki/Normal_distribution # https://en.wikipedia.org/wiki/Variance#Properties - __slots__ = {'_mu': 'Arithmetic mean of a normal distribution', - '_sigma': 'Standard deviation of a normal distribution'} + __slots__ = { + '_mu': 'Arithmetic mean of a normal distribution', + '_sigma': 'Standard deviation of a normal distribution', + } def __init__(self, mu=0.0, sigma=1.0): - 'NormalDist where mu is the mean and sigma is the standard deviation.' + "NormalDist where mu is the mean and sigma is the standard deviation." if sigma < 0.0: raise StatisticsError('sigma must be non-negative') self._mu = mu @@ -824,40 +843,42 @@ class NormalDist: @classmethod def from_samples(cls, data): - 'Make a normal distribution instance from sample data.' + "Make a normal distribution instance from sample data." if not isinstance(data, (list, tuple)): data = list(data) xbar = fmean(data) return cls(xbar, stdev(data, xbar)) def samples(self, n, *, seed=None): - 'Generate *n* samples for a given mean and standard deviation.' + "Generate *n* samples for a given mean and standard deviation." gauss = random.gauss if seed is None else random.Random(seed).gauss mu, sigma = self._mu, self._sigma return [gauss(mu, sigma) for i in range(n)] def pdf(self, x): - 'Probability density function. P(x <= X < x+dx) / dx' + "Probability density function. P(x <= X < x+dx) / dx" variance = self._sigma ** 2.0 if not variance: raise StatisticsError('pdf() not defined when sigma is zero') - return exp((x - self._mu)**2.0 / (-2.0*variance)) / sqrt(tau * variance) + return exp((x - self._mu)**2.0 / (-2.0*variance)) / sqrt(tau*variance) def cdf(self, x): - 'Cumulative distribution function. P(X <= x)' + "Cumulative distribution function. P(X <= x)" if not self._sigma: raise StatisticsError('cdf() not defined when sigma is zero') return 0.5 * (1.0 + erf((x - self._mu) / (self._sigma * sqrt(2.0)))) def inv_cdf(self, p): - '''Inverse cumulative distribution function. x : P(X <= x) = p + """Inverse cumulative distribution function. x : P(X <= x) = p - Finds the value of the random variable such that the probability of the - variable being less than or equal to that value equals the given probability. + Finds the value of the random variable such that the probability of + the variable being less than or equal to that value equals the given + probability. - This function is also called the percent point function or quantile function. - ''' - if (p <= 0.0 or p >= 1.0): + This function is also called the percent point function or quantile + function. + """ + if p <= 0.0 or p >= 1.0: raise StatisticsError('p must be in the range 0.0 < p < 1.0') if self._sigma <= 0.0: raise StatisticsError('cdf() not defined when sigma at or below zero') @@ -933,7 +954,7 @@ class NormalDist: return self._mu + (x * self._sigma) def overlap(self, other): - '''Compute the overlapping coefficient (OVL) between two normal distributions. + """Compute the overlapping coefficient (OVL) between two normal distributions. Measures the agreement between two normal probability distributions. Returns a value between 0.0 and 1.0 giving the overlapping area in @@ -943,7 +964,7 @@ class NormalDist: >>> N2 = NormalDist(3.2, 2.0) >>> N1.overlap(N2) 0.8035050657330205 - ''' + """ # See: "The overlapping coefficient as a measure of agreement between # probability distributions and point estimation of the overlap of two # normal densities" -- Henry F. Inman and Edwin L. Bradley Jr @@ -968,21 +989,21 @@ class NormalDist: @property def mean(self): - 'Arithmetic mean of the normal distribution.' + "Arithmetic mean of the normal distribution." return self._mu @property def stdev(self): - 'Standard deviation of the normal distribution.' + "Standard deviation of the normal distribution." return self._sigma @property def variance(self): - 'Square of the standard deviation.' + "Square of the standard deviation." return self._sigma ** 2.0 def __add__(x1, x2): - '''Add a constant or another NormalDist instance. + """Add a constant or another NormalDist instance. If *other* is a constant, translate mu by the constant, leaving sigma unchanged. @@ -990,13 +1011,13 @@ class NormalDist: If *other* is a NormalDist, add both the means and the variances. Mathematically, this works only if the two distributions are independent or if they are jointly normally distributed. - ''' + """ if isinstance(x2, NormalDist): return NormalDist(x1._mu + x2._mu, hypot(x1._sigma, x2._sigma)) return NormalDist(x1._mu + x2, x1._sigma) def __sub__(x1, x2): - '''Subtract a constant or another NormalDist instance. + """Subtract a constant or another NormalDist instance. If *other* is a constant, translate by the constant mu, leaving sigma unchanged. @@ -1004,51 +1025,51 @@ class NormalDist: If *other* is a NormalDist, subtract the means and add the variances. Mathematically, this works only if the two distributions are independent or if they are jointly normally distributed. - ''' + """ if isinstance(x2, NormalDist): return NormalDist(x1._mu - x2._mu, hypot(x1._sigma, x2._sigma)) return NormalDist(x1._mu - x2, x1._sigma) def __mul__(x1, x2): - '''Multiply both mu and sigma by a constant. + """Multiply both mu and sigma by a constant. Used for rescaling, perhaps to change measurement units. Sigma is scaled with the absolute value of the constant. - ''' + """ return NormalDist(x1._mu * x2, x1._sigma * fabs(x2)) def __truediv__(x1, x2): - '''Divide both mu and sigma by a constant. + """Divide both mu and sigma by a constant. Used for rescaling, perhaps to change measurement units. Sigma is scaled with the absolute value of the constant. - ''' + """ return NormalDist(x1._mu / x2, x1._sigma / fabs(x2)) def __pos__(x1): - 'Return a copy of the instance.' + "Return a copy of the instance." return NormalDist(x1._mu, x1._sigma) def __neg__(x1): - 'Negates mu while keeping sigma the same.' + "Negates mu while keeping sigma the same." return NormalDist(-x1._mu, x1._sigma) __radd__ = __add__ def __rsub__(x1, x2): - 'Subtract a NormalDist from a constant or another NormalDist.' + "Subtract a NormalDist from a constant or another NormalDist." return -(x1 - x2) __rmul__ = __mul__ def __eq__(x1, x2): - 'Two NormalDist objects are equal if their mu and sigma are both equal.' + "Two NormalDist objects are equal if their mu and sigma are both equal." if not isinstance(x2, NormalDist): return NotImplemented return (x1._mu, x2._sigma) == (x2._mu, x2._sigma) def __hash__(self): - 'NormalDist objects hash equal if their mu and sigma are both equal.' + "NormalDist objects hash equal if their mu and sigma are both equal." return hash((self._mu, self._sigma)) def __repr__(self):