bpo-36018: Address more reviewer feedback (GH-15733)

This commit is contained in:
Raymond Hettinger 2019-09-08 16:57:58 -07:00 committed by GitHub
parent 3c87a667bb
commit 4db25d5c39
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 69 additions and 39 deletions

View File

@ -514,15 +514,14 @@ However, for reading convenience, most of the examples show sorted sequences.
Set *n* to 4 for quartiles (the default). Set *n* to 10 for deciles. Set Set *n* to 4 for quartiles (the default). Set *n* to 10 for deciles. Set
*n* to 100 for percentiles which gives the 99 cuts points that separate *n* to 100 for percentiles which gives the 99 cuts points that separate
*data* in to 100 equal sized groups. Raises :exc:`StatisticsError` if *n* *data* into 100 equal sized groups. Raises :exc:`StatisticsError` if *n*
is not least 1. is not least 1.
The *data* can be any iterable containing sample data or it can be an The *data* can be any iterable containing sample data. For meaningful
instance of a class that defines an :meth:`~inv_cdf` method. For meaningful
results, the number of data points in *data* should be larger than *n*. results, the number of data points in *data* should be larger than *n*.
Raises :exc:`StatisticsError` if there are not at least two data points. Raises :exc:`StatisticsError` if there are not at least two data points.
For sample data, the cut points are linearly interpolated from the The cut points are linearly interpolated from the
two nearest data points. For example, if a cut point falls one-third two nearest data points. For example, if a cut point falls one-third
of the distance between two sample values, ``100`` and ``112``, the of the distance between two sample values, ``100`` and ``112``, the
cut-point will evaluate to ``104``. cut-point will evaluate to ``104``.
@ -547,9 +546,6 @@ However, for reading convenience, most of the examples show sorted sequences.
values, the method sorts them and assigns the following percentiles: values, the method sorts them and assigns the following percentiles:
0%, 10%, 20%, 30%, 40%, 50%, 60%, 70%, 80%, 90%, 100%. 0%, 10%, 20%, 30%, 40%, 50%, 60%, 70%, 80%, 90%, 100%.
If *data* is an instance of a class that defines an
:meth:`~inv_cdf` method, setting *method* has no effect.
.. doctest:: .. doctest::
# Decile cut points for empirically sampled data # Decile cut points for empirically sampled data
@ -561,11 +557,6 @@ However, for reading convenience, most of the examples show sorted sequences.
>>> [round(q, 1) for q in quantiles(data, n=10)] >>> [round(q, 1) for q in quantiles(data, n=10)]
[81.0, 86.2, 89.0, 99.4, 102.5, 103.6, 106.0, 109.8, 111.0] [81.0, 86.2, 89.0, 99.4, 102.5, 103.6, 106.0, 109.8, 111.0]
>>> # Quartile cut points for the standard normal distribution
>>> Z = NormalDist()
>>> [round(q, 4) for q in quantiles(Z, n=4)]
[-0.6745, 0.0, 0.6745]
.. versionadded:: 3.8 .. versionadded:: 3.8
@ -607,6 +598,18 @@ of applications in statistics.
<https://en.wikipedia.org/wiki/Arithmetic_mean>`_ of a normal <https://en.wikipedia.org/wiki/Arithmetic_mean>`_ of a normal
distribution. distribution.
.. attribute:: median
A read-only property for the `median
<https://en.wikipedia.org/wiki/Median>`_ of a normal
distribution.
.. attribute:: mode
A read-only property for the `mode
<https://en.wikipedia.org/wiki/Mode_(statistics)>`_ of a normal
distribution.
.. attribute:: stdev .. attribute:: stdev
A read-only property for the `standard deviation A read-only property for the `standard deviation
@ -678,6 +681,16 @@ of applications in statistics.
the two probability density functions the two probability density functions
<https://www.rasch.org/rmt/rmt101r.htm>`_. <https://www.rasch.org/rmt/rmt101r.htm>`_.
.. method:: NormalDist.quantiles()
Divide the normal distribution into *n* continuous intervals with
equal probability. Returns a list of (n - 1) cut points separating
the intervals.
Set *n* to 4 for quartiles (the default). Set *n* to 10 for deciles.
Set *n* to 100 for percentiles which gives the 99 cuts points that
separate the normal distribution into 100 equal sized groups.
Instances of :class:`NormalDist` support addition, subtraction, Instances of :class:`NormalDist` support addition, subtraction,
multiplication and division by a constant. These operations multiplication and division by a constant. These operations
are used for translation and scaling. For example: are used for translation and scaling. For example:
@ -733,9 +746,9 @@ Find the `quartiles <https://en.wikipedia.org/wiki/Quartile>`_ and `deciles
.. doctest:: .. doctest::
>>> list(map(round, quantiles(sat))) >>> list(map(round, sat.quantiles()))
[928, 1060, 1192] [928, 1060, 1192]
>>> list(map(round, quantiles(sat, n=10))) >>> list(map(round, sat.quantiles(n=10)))
[810, 896, 958, 1011, 1060, 1109, 1162, 1224, 1310] [810, 896, 958, 1011, 1060, 1109, 1162, 1224, 1310]
To estimate the distribution for a model than isn't easy to solve To estimate the distribution for a model than isn't easy to solve

View File

@ -624,9 +624,8 @@ def quantiles(data, /, *, n=4, method='exclusive'):
Set *n* to 100 for percentiles which gives the 99 cuts points that Set *n* to 100 for percentiles which gives the 99 cuts points that
separate *data* in to 100 equal sized groups. separate *data* in to 100 equal sized groups.
The *data* can be any iterable containing sample data or it can be The *data* can be any iterable containing sample.
an instance of a class that defines an inv_cdf() method. For sample The cut points are linearly interpolated between data points.
data, the cut points are linearly interpolated between data points.
If *method* is set to *inclusive*, *data* is treated as population If *method* is set to *inclusive*, *data* is treated as population
data. The minimum value is treated as the 0th percentile and the data. The minimum value is treated as the 0th percentile and the
@ -634,8 +633,6 @@ def quantiles(data, /, *, n=4, method='exclusive'):
""" """
if n < 1: if n < 1:
raise StatisticsError('n must be at least 1') raise StatisticsError('n must be at least 1')
if hasattr(data, 'inv_cdf'):
return [data.inv_cdf(i / n) for i in range(1, n)]
data = sorted(data) data = sorted(data)
ld = len(data) ld = len(data)
if ld < 2: if ld < 2:
@ -955,6 +952,17 @@ class NormalDist:
raise StatisticsError('cdf() not defined when sigma at or below zero') raise StatisticsError('cdf() not defined when sigma at or below zero')
return _normal_dist_inv_cdf(p, self._mu, self._sigma) return _normal_dist_inv_cdf(p, self._mu, self._sigma)
def quantiles(self, n=4):
"""Divide into *n* continuous intervals with equal probability.
Returns a list of (n - 1) cut points separating the intervals.
Set *n* to 4 for quartiles (the default). Set *n* to 10 for deciles.
Set *n* to 100 for percentiles which gives the 99 cuts points that
separate the normal distribution in to 100 equal sized groups.
"""
return [self.inv_cdf(i / n) for i in range(1, n)]
def overlap(self, other): def overlap(self, other):
"""Compute the overlapping coefficient (OVL) between two normal distributions. """Compute the overlapping coefficient (OVL) between two normal distributions.
@ -994,6 +1002,20 @@ class NormalDist:
"Arithmetic mean of the normal distribution." "Arithmetic mean of the normal distribution."
return self._mu return self._mu
@property
def median(self):
"Return the median of the normal distribution"
return self._mu
@property
def mode(self):
"""Return the mode of the normal distribution
The mode is the value x where which the probability density
function (pdf) takes its maximum value.
"""
return self._mu
@property @property
def stdev(self): def stdev(self):
"Standard deviation of the normal distribution." "Standard deviation of the normal distribution."

View File

@ -2198,16 +2198,6 @@ class TestQuantiles(unittest.TestCase):
exp = list(map(f, expected)) exp = list(map(f, expected))
act = quantiles(map(f, data), n=n) act = quantiles(map(f, data), n=n)
self.assertTrue(all(math.isclose(e, a) for e, a in zip(exp, act))) self.assertTrue(all(math.isclose(e, a) for e, a in zip(exp, act)))
# Quartiles of a standard normal distribution
for n, expected in [
(1, []),
(2, [0.0]),
(3, [-0.4307, 0.4307]),
(4 ,[-0.6745, 0.0, 0.6745]),
]:
actual = quantiles(statistics.NormalDist(), n=n)
self.assertTrue(all(math.isclose(e, a, abs_tol=0.0001)
for e, a in zip(expected, actual)))
# Q2 agrees with median() # Q2 agrees with median()
for k in range(2, 60): for k in range(2, 60):
data = random.choices(range(100), k=k) data = random.choices(range(100), k=k)
@ -2248,16 +2238,6 @@ class TestQuantiles(unittest.TestCase):
exp = list(map(f, expected)) exp = list(map(f, expected))
act = quantiles(map(f, data), n=n, method="inclusive") act = quantiles(map(f, data), n=n, method="inclusive")
self.assertTrue(all(math.isclose(e, a) for e, a in zip(exp, act))) self.assertTrue(all(math.isclose(e, a) for e, a in zip(exp, act)))
# Quartiles of a standard normal distribution
for n, expected in [
(1, []),
(2, [0.0]),
(3, [-0.4307, 0.4307]),
(4 ,[-0.6745, 0.0, 0.6745]),
]:
actual = quantiles(statistics.NormalDist(), n=n, method="inclusive")
self.assertTrue(all(math.isclose(e, a, abs_tol=0.0001)
for e, a in zip(expected, actual)))
# Natural deciles # Natural deciles
self.assertEqual(quantiles([0, 100], n=10, method='inclusive'), self.assertEqual(quantiles([0, 100], n=10, method='inclusive'),
[10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0]) [10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0])
@ -2546,6 +2526,19 @@ class TestNormalDist:
# Special values # Special values
self.assertTrue(math.isnan(Z.inv_cdf(float('NaN')))) self.assertTrue(math.isnan(Z.inv_cdf(float('NaN'))))
def test_quantiles(self):
# Quartiles of a standard normal distribution
Z = self.module.NormalDist()
for n, expected in [
(1, []),
(2, [0.0]),
(3, [-0.4307, 0.4307]),
(4 ,[-0.6745, 0.0, 0.6745]),
]:
actual = Z.quantiles(n=n)
self.assertTrue(all(math.isclose(e, a, abs_tol=0.0001)
for e, a in zip(expected, actual)))
def test_overlap(self): def test_overlap(self):
NormalDist = self.module.NormalDist NormalDist = self.module.NormalDist
@ -2612,6 +2605,8 @@ class TestNormalDist:
def test_properties(self): def test_properties(self):
X = self.module.NormalDist(100, 15) X = self.module.NormalDist(100, 15)
self.assertEqual(X.mean, 100) self.assertEqual(X.mean, 100)
self.assertEqual(X.median, 100)
self.assertEqual(X.mode, 100)
self.assertEqual(X.stdev, 15) self.assertEqual(X.stdev, 15)
self.assertEqual(X.variance, 225) self.assertEqual(X.variance, 225)