From 4db25d5c39e369f4b55eab52dc8f87f390233892 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 8 Sep 2019 16:57:58 -0700 Subject: [PATCH] bpo-36018: Address more reviewer feedback (GH-15733) --- Doc/library/statistics.rst | 41 ++++++++++++++++++++++++------------- Lib/statistics.py | 32 ++++++++++++++++++++++++----- Lib/test/test_statistics.py | 35 ++++++++++++++----------------- 3 files changed, 69 insertions(+), 39 deletions(-) diff --git a/Doc/library/statistics.rst b/Doc/library/statistics.rst index 0798ae29118..bdd706d0a93 100644 --- a/Doc/library/statistics.rst +++ b/Doc/library/statistics.rst @@ -514,15 +514,14 @@ However, for reading convenience, most of the examples show sorted sequences. Set *n* to 4 for quartiles (the default). Set *n* to 10 for deciles. Set *n* to 100 for percentiles which gives the 99 cuts points that separate - *data* in to 100 equal sized groups. Raises :exc:`StatisticsError` if *n* + *data* into 100 equal sized groups. Raises :exc:`StatisticsError` if *n* is not least 1. - The *data* can be any iterable containing sample data or it can be an - instance of a class that defines an :meth:`~inv_cdf` method. For meaningful + The *data* can be any iterable containing sample data. For meaningful results, the number of data points in *data* should be larger than *n*. Raises :exc:`StatisticsError` if there are not at least two data points. - For sample data, the cut points are linearly interpolated from the + The cut points are linearly interpolated from the two nearest data points. For example, if a cut point falls one-third of the distance between two sample values, ``100`` and ``112``, the cut-point will evaluate to ``104``. @@ -547,9 +546,6 @@ However, for reading convenience, most of the examples show sorted sequences. values, the method sorts them and assigns the following percentiles: 0%, 10%, 20%, 30%, 40%, 50%, 60%, 70%, 80%, 90%, 100%. - If *data* is an instance of a class that defines an - :meth:`~inv_cdf` method, setting *method* has no effect. - .. doctest:: # Decile cut points for empirically sampled data @@ -561,11 +557,6 @@ However, for reading convenience, most of the examples show sorted sequences. >>> [round(q, 1) for q in quantiles(data, n=10)] [81.0, 86.2, 89.0, 99.4, 102.5, 103.6, 106.0, 109.8, 111.0] - >>> # Quartile cut points for the standard normal distribution - >>> Z = NormalDist() - >>> [round(q, 4) for q in quantiles(Z, n=4)] - [-0.6745, 0.0, 0.6745] - .. versionadded:: 3.8 @@ -607,6 +598,18 @@ of applications in statistics. `_ of a normal distribution. + .. attribute:: median + + A read-only property for the `median + `_ of a normal + distribution. + + .. attribute:: mode + + A read-only property for the `mode + `_ of a normal + distribution. + .. attribute:: stdev A read-only property for the `standard deviation @@ -678,6 +681,16 @@ of applications in statistics. the two probability density functions `_. + .. method:: NormalDist.quantiles() + + Divide the normal distribution into *n* continuous intervals with + equal probability. Returns a list of (n - 1) cut points separating + the intervals. + + Set *n* to 4 for quartiles (the default). Set *n* to 10 for deciles. + Set *n* to 100 for percentiles which gives the 99 cuts points that + separate the normal distribution into 100 equal sized groups. + Instances of :class:`NormalDist` support addition, subtraction, multiplication and division by a constant. These operations are used for translation and scaling. For example: @@ -733,9 +746,9 @@ Find the `quartiles `_ and `deciles .. doctest:: - >>> list(map(round, quantiles(sat))) + >>> list(map(round, sat.quantiles())) [928, 1060, 1192] - >>> list(map(round, quantiles(sat, n=10))) + >>> list(map(round, sat.quantiles(n=10))) [810, 896, 958, 1011, 1060, 1109, 1162, 1224, 1310] To estimate the distribution for a model than isn't easy to solve diff --git a/Lib/statistics.py b/Lib/statistics.py index 4b172662770..70c48d605d1 100644 --- a/Lib/statistics.py +++ b/Lib/statistics.py @@ -624,9 +624,8 @@ def quantiles(data, /, *, n=4, method='exclusive'): Set *n* to 100 for percentiles which gives the 99 cuts points that separate *data* in to 100 equal sized groups. - The *data* can be any iterable containing sample data or it can be - an instance of a class that defines an inv_cdf() method. For sample - data, the cut points are linearly interpolated between data points. + The *data* can be any iterable containing sample. + The cut points are linearly interpolated between data points. If *method* is set to *inclusive*, *data* is treated as population data. The minimum value is treated as the 0th percentile and the @@ -634,8 +633,6 @@ def quantiles(data, /, *, n=4, method='exclusive'): """ if n < 1: raise StatisticsError('n must be at least 1') - if hasattr(data, 'inv_cdf'): - return [data.inv_cdf(i / n) for i in range(1, n)] data = sorted(data) ld = len(data) if ld < 2: @@ -955,6 +952,17 @@ class NormalDist: raise StatisticsError('cdf() not defined when sigma at or below zero') return _normal_dist_inv_cdf(p, self._mu, self._sigma) + def quantiles(self, n=4): + """Divide into *n* continuous intervals with equal probability. + + Returns a list of (n - 1) cut points separating the intervals. + + Set *n* to 4 for quartiles (the default). Set *n* to 10 for deciles. + Set *n* to 100 for percentiles which gives the 99 cuts points that + separate the normal distribution in to 100 equal sized groups. + """ + return [self.inv_cdf(i / n) for i in range(1, n)] + def overlap(self, other): """Compute the overlapping coefficient (OVL) between two normal distributions. @@ -994,6 +1002,20 @@ class NormalDist: "Arithmetic mean of the normal distribution." return self._mu + @property + def median(self): + "Return the median of the normal distribution" + return self._mu + + @property + def mode(self): + """Return the mode of the normal distribution + + The mode is the value x where which the probability density + function (pdf) takes its maximum value. + """ + return self._mu + @property def stdev(self): "Standard deviation of the normal distribution." diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py index 01b317c3281..af26473e8fd 100644 --- a/Lib/test/test_statistics.py +++ b/Lib/test/test_statistics.py @@ -2198,16 +2198,6 @@ class TestQuantiles(unittest.TestCase): exp = list(map(f, expected)) act = quantiles(map(f, data), n=n) self.assertTrue(all(math.isclose(e, a) for e, a in zip(exp, act))) - # Quartiles of a standard normal distribution - for n, expected in [ - (1, []), - (2, [0.0]), - (3, [-0.4307, 0.4307]), - (4 ,[-0.6745, 0.0, 0.6745]), - ]: - actual = quantiles(statistics.NormalDist(), n=n) - self.assertTrue(all(math.isclose(e, a, abs_tol=0.0001) - for e, a in zip(expected, actual))) # Q2 agrees with median() for k in range(2, 60): data = random.choices(range(100), k=k) @@ -2248,16 +2238,6 @@ class TestQuantiles(unittest.TestCase): exp = list(map(f, expected)) act = quantiles(map(f, data), n=n, method="inclusive") self.assertTrue(all(math.isclose(e, a) for e, a in zip(exp, act))) - # Quartiles of a standard normal distribution - for n, expected in [ - (1, []), - (2, [0.0]), - (3, [-0.4307, 0.4307]), - (4 ,[-0.6745, 0.0, 0.6745]), - ]: - actual = quantiles(statistics.NormalDist(), n=n, method="inclusive") - self.assertTrue(all(math.isclose(e, a, abs_tol=0.0001) - for e, a in zip(expected, actual))) # Natural deciles self.assertEqual(quantiles([0, 100], n=10, method='inclusive'), [10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0]) @@ -2546,6 +2526,19 @@ class TestNormalDist: # Special values self.assertTrue(math.isnan(Z.inv_cdf(float('NaN')))) + def test_quantiles(self): + # Quartiles of a standard normal distribution + Z = self.module.NormalDist() + for n, expected in [ + (1, []), + (2, [0.0]), + (3, [-0.4307, 0.4307]), + (4 ,[-0.6745, 0.0, 0.6745]), + ]: + actual = Z.quantiles(n=n) + self.assertTrue(all(math.isclose(e, a, abs_tol=0.0001) + for e, a in zip(expected, actual))) + def test_overlap(self): NormalDist = self.module.NormalDist @@ -2612,6 +2605,8 @@ class TestNormalDist: def test_properties(self): X = self.module.NormalDist(100, 15) self.assertEqual(X.mean, 100) + self.assertEqual(X.median, 100) + self.assertEqual(X.mode, 100) self.assertEqual(X.stdev, 15) self.assertEqual(X.variance, 225)