mirror of https://github.com/python/cpython
bpo-36018: Address more reviewer feedback (GH-15733)
This commit is contained in:
parent
3c87a667bb
commit
4db25d5c39
|
@ -514,15 +514,14 @@ However, for reading convenience, most of the examples show sorted sequences.
|
||||||
|
|
||||||
Set *n* to 4 for quartiles (the default). Set *n* to 10 for deciles. Set
|
Set *n* to 4 for quartiles (the default). Set *n* to 10 for deciles. Set
|
||||||
*n* to 100 for percentiles which gives the 99 cuts points that separate
|
*n* to 100 for percentiles which gives the 99 cuts points that separate
|
||||||
*data* in to 100 equal sized groups. Raises :exc:`StatisticsError` if *n*
|
*data* into 100 equal sized groups. Raises :exc:`StatisticsError` if *n*
|
||||||
is not least 1.
|
is not least 1.
|
||||||
|
|
||||||
The *data* can be any iterable containing sample data or it can be an
|
The *data* can be any iterable containing sample data. For meaningful
|
||||||
instance of a class that defines an :meth:`~inv_cdf` method. For meaningful
|
|
||||||
results, the number of data points in *data* should be larger than *n*.
|
results, the number of data points in *data* should be larger than *n*.
|
||||||
Raises :exc:`StatisticsError` if there are not at least two data points.
|
Raises :exc:`StatisticsError` if there are not at least two data points.
|
||||||
|
|
||||||
For sample data, the cut points are linearly interpolated from the
|
The cut points are linearly interpolated from the
|
||||||
two nearest data points. For example, if a cut point falls one-third
|
two nearest data points. For example, if a cut point falls one-third
|
||||||
of the distance between two sample values, ``100`` and ``112``, the
|
of the distance between two sample values, ``100`` and ``112``, the
|
||||||
cut-point will evaluate to ``104``.
|
cut-point will evaluate to ``104``.
|
||||||
|
@ -547,9 +546,6 @@ However, for reading convenience, most of the examples show sorted sequences.
|
||||||
values, the method sorts them and assigns the following percentiles:
|
values, the method sorts them and assigns the following percentiles:
|
||||||
0%, 10%, 20%, 30%, 40%, 50%, 60%, 70%, 80%, 90%, 100%.
|
0%, 10%, 20%, 30%, 40%, 50%, 60%, 70%, 80%, 90%, 100%.
|
||||||
|
|
||||||
If *data* is an instance of a class that defines an
|
|
||||||
:meth:`~inv_cdf` method, setting *method* has no effect.
|
|
||||||
|
|
||||||
.. doctest::
|
.. doctest::
|
||||||
|
|
||||||
# Decile cut points for empirically sampled data
|
# Decile cut points for empirically sampled data
|
||||||
|
@ -561,11 +557,6 @@ However, for reading convenience, most of the examples show sorted sequences.
|
||||||
>>> [round(q, 1) for q in quantiles(data, n=10)]
|
>>> [round(q, 1) for q in quantiles(data, n=10)]
|
||||||
[81.0, 86.2, 89.0, 99.4, 102.5, 103.6, 106.0, 109.8, 111.0]
|
[81.0, 86.2, 89.0, 99.4, 102.5, 103.6, 106.0, 109.8, 111.0]
|
||||||
|
|
||||||
>>> # Quartile cut points for the standard normal distribution
|
|
||||||
>>> Z = NormalDist()
|
|
||||||
>>> [round(q, 4) for q in quantiles(Z, n=4)]
|
|
||||||
[-0.6745, 0.0, 0.6745]
|
|
||||||
|
|
||||||
.. versionadded:: 3.8
|
.. versionadded:: 3.8
|
||||||
|
|
||||||
|
|
||||||
|
@ -607,6 +598,18 @@ of applications in statistics.
|
||||||
<https://en.wikipedia.org/wiki/Arithmetic_mean>`_ of a normal
|
<https://en.wikipedia.org/wiki/Arithmetic_mean>`_ of a normal
|
||||||
distribution.
|
distribution.
|
||||||
|
|
||||||
|
.. attribute:: median
|
||||||
|
|
||||||
|
A read-only property for the `median
|
||||||
|
<https://en.wikipedia.org/wiki/Median>`_ of a normal
|
||||||
|
distribution.
|
||||||
|
|
||||||
|
.. attribute:: mode
|
||||||
|
|
||||||
|
A read-only property for the `mode
|
||||||
|
<https://en.wikipedia.org/wiki/Mode_(statistics)>`_ of a normal
|
||||||
|
distribution.
|
||||||
|
|
||||||
.. attribute:: stdev
|
.. attribute:: stdev
|
||||||
|
|
||||||
A read-only property for the `standard deviation
|
A read-only property for the `standard deviation
|
||||||
|
@ -678,6 +681,16 @@ of applications in statistics.
|
||||||
the two probability density functions
|
the two probability density functions
|
||||||
<https://www.rasch.org/rmt/rmt101r.htm>`_.
|
<https://www.rasch.org/rmt/rmt101r.htm>`_.
|
||||||
|
|
||||||
|
.. method:: NormalDist.quantiles()
|
||||||
|
|
||||||
|
Divide the normal distribution into *n* continuous intervals with
|
||||||
|
equal probability. Returns a list of (n - 1) cut points separating
|
||||||
|
the intervals.
|
||||||
|
|
||||||
|
Set *n* to 4 for quartiles (the default). Set *n* to 10 for deciles.
|
||||||
|
Set *n* to 100 for percentiles which gives the 99 cuts points that
|
||||||
|
separate the normal distribution into 100 equal sized groups.
|
||||||
|
|
||||||
Instances of :class:`NormalDist` support addition, subtraction,
|
Instances of :class:`NormalDist` support addition, subtraction,
|
||||||
multiplication and division by a constant. These operations
|
multiplication and division by a constant. These operations
|
||||||
are used for translation and scaling. For example:
|
are used for translation and scaling. For example:
|
||||||
|
@ -733,9 +746,9 @@ Find the `quartiles <https://en.wikipedia.org/wiki/Quartile>`_ and `deciles
|
||||||
|
|
||||||
.. doctest::
|
.. doctest::
|
||||||
|
|
||||||
>>> list(map(round, quantiles(sat)))
|
>>> list(map(round, sat.quantiles()))
|
||||||
[928, 1060, 1192]
|
[928, 1060, 1192]
|
||||||
>>> list(map(round, quantiles(sat, n=10)))
|
>>> list(map(round, sat.quantiles(n=10)))
|
||||||
[810, 896, 958, 1011, 1060, 1109, 1162, 1224, 1310]
|
[810, 896, 958, 1011, 1060, 1109, 1162, 1224, 1310]
|
||||||
|
|
||||||
To estimate the distribution for a model than isn't easy to solve
|
To estimate the distribution for a model than isn't easy to solve
|
||||||
|
|
|
@ -624,9 +624,8 @@ def quantiles(data, /, *, n=4, method='exclusive'):
|
||||||
Set *n* to 100 for percentiles which gives the 99 cuts points that
|
Set *n* to 100 for percentiles which gives the 99 cuts points that
|
||||||
separate *data* in to 100 equal sized groups.
|
separate *data* in to 100 equal sized groups.
|
||||||
|
|
||||||
The *data* can be any iterable containing sample data or it can be
|
The *data* can be any iterable containing sample.
|
||||||
an instance of a class that defines an inv_cdf() method. For sample
|
The cut points are linearly interpolated between data points.
|
||||||
data, the cut points are linearly interpolated between data points.
|
|
||||||
|
|
||||||
If *method* is set to *inclusive*, *data* is treated as population
|
If *method* is set to *inclusive*, *data* is treated as population
|
||||||
data. The minimum value is treated as the 0th percentile and the
|
data. The minimum value is treated as the 0th percentile and the
|
||||||
|
@ -634,8 +633,6 @@ def quantiles(data, /, *, n=4, method='exclusive'):
|
||||||
"""
|
"""
|
||||||
if n < 1:
|
if n < 1:
|
||||||
raise StatisticsError('n must be at least 1')
|
raise StatisticsError('n must be at least 1')
|
||||||
if hasattr(data, 'inv_cdf'):
|
|
||||||
return [data.inv_cdf(i / n) for i in range(1, n)]
|
|
||||||
data = sorted(data)
|
data = sorted(data)
|
||||||
ld = len(data)
|
ld = len(data)
|
||||||
if ld < 2:
|
if ld < 2:
|
||||||
|
@ -955,6 +952,17 @@ class NormalDist:
|
||||||
raise StatisticsError('cdf() not defined when sigma at or below zero')
|
raise StatisticsError('cdf() not defined when sigma at or below zero')
|
||||||
return _normal_dist_inv_cdf(p, self._mu, self._sigma)
|
return _normal_dist_inv_cdf(p, self._mu, self._sigma)
|
||||||
|
|
||||||
|
def quantiles(self, n=4):
|
||||||
|
"""Divide into *n* continuous intervals with equal probability.
|
||||||
|
|
||||||
|
Returns a list of (n - 1) cut points separating the intervals.
|
||||||
|
|
||||||
|
Set *n* to 4 for quartiles (the default). Set *n* to 10 for deciles.
|
||||||
|
Set *n* to 100 for percentiles which gives the 99 cuts points that
|
||||||
|
separate the normal distribution in to 100 equal sized groups.
|
||||||
|
"""
|
||||||
|
return [self.inv_cdf(i / n) for i in range(1, n)]
|
||||||
|
|
||||||
def overlap(self, other):
|
def overlap(self, other):
|
||||||
"""Compute the overlapping coefficient (OVL) between two normal distributions.
|
"""Compute the overlapping coefficient (OVL) between two normal distributions.
|
||||||
|
|
||||||
|
@ -994,6 +1002,20 @@ class NormalDist:
|
||||||
"Arithmetic mean of the normal distribution."
|
"Arithmetic mean of the normal distribution."
|
||||||
return self._mu
|
return self._mu
|
||||||
|
|
||||||
|
@property
|
||||||
|
def median(self):
|
||||||
|
"Return the median of the normal distribution"
|
||||||
|
return self._mu
|
||||||
|
|
||||||
|
@property
|
||||||
|
def mode(self):
|
||||||
|
"""Return the mode of the normal distribution
|
||||||
|
|
||||||
|
The mode is the value x where which the probability density
|
||||||
|
function (pdf) takes its maximum value.
|
||||||
|
"""
|
||||||
|
return self._mu
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def stdev(self):
|
def stdev(self):
|
||||||
"Standard deviation of the normal distribution."
|
"Standard deviation of the normal distribution."
|
||||||
|
|
|
@ -2198,16 +2198,6 @@ class TestQuantiles(unittest.TestCase):
|
||||||
exp = list(map(f, expected))
|
exp = list(map(f, expected))
|
||||||
act = quantiles(map(f, data), n=n)
|
act = quantiles(map(f, data), n=n)
|
||||||
self.assertTrue(all(math.isclose(e, a) for e, a in zip(exp, act)))
|
self.assertTrue(all(math.isclose(e, a) for e, a in zip(exp, act)))
|
||||||
# Quartiles of a standard normal distribution
|
|
||||||
for n, expected in [
|
|
||||||
(1, []),
|
|
||||||
(2, [0.0]),
|
|
||||||
(3, [-0.4307, 0.4307]),
|
|
||||||
(4 ,[-0.6745, 0.0, 0.6745]),
|
|
||||||
]:
|
|
||||||
actual = quantiles(statistics.NormalDist(), n=n)
|
|
||||||
self.assertTrue(all(math.isclose(e, a, abs_tol=0.0001)
|
|
||||||
for e, a in zip(expected, actual)))
|
|
||||||
# Q2 agrees with median()
|
# Q2 agrees with median()
|
||||||
for k in range(2, 60):
|
for k in range(2, 60):
|
||||||
data = random.choices(range(100), k=k)
|
data = random.choices(range(100), k=k)
|
||||||
|
@ -2248,16 +2238,6 @@ class TestQuantiles(unittest.TestCase):
|
||||||
exp = list(map(f, expected))
|
exp = list(map(f, expected))
|
||||||
act = quantiles(map(f, data), n=n, method="inclusive")
|
act = quantiles(map(f, data), n=n, method="inclusive")
|
||||||
self.assertTrue(all(math.isclose(e, a) for e, a in zip(exp, act)))
|
self.assertTrue(all(math.isclose(e, a) for e, a in zip(exp, act)))
|
||||||
# Quartiles of a standard normal distribution
|
|
||||||
for n, expected in [
|
|
||||||
(1, []),
|
|
||||||
(2, [0.0]),
|
|
||||||
(3, [-0.4307, 0.4307]),
|
|
||||||
(4 ,[-0.6745, 0.0, 0.6745]),
|
|
||||||
]:
|
|
||||||
actual = quantiles(statistics.NormalDist(), n=n, method="inclusive")
|
|
||||||
self.assertTrue(all(math.isclose(e, a, abs_tol=0.0001)
|
|
||||||
for e, a in zip(expected, actual)))
|
|
||||||
# Natural deciles
|
# Natural deciles
|
||||||
self.assertEqual(quantiles([0, 100], n=10, method='inclusive'),
|
self.assertEqual(quantiles([0, 100], n=10, method='inclusive'),
|
||||||
[10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0])
|
[10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0])
|
||||||
|
@ -2546,6 +2526,19 @@ class TestNormalDist:
|
||||||
# Special values
|
# Special values
|
||||||
self.assertTrue(math.isnan(Z.inv_cdf(float('NaN'))))
|
self.assertTrue(math.isnan(Z.inv_cdf(float('NaN'))))
|
||||||
|
|
||||||
|
def test_quantiles(self):
|
||||||
|
# Quartiles of a standard normal distribution
|
||||||
|
Z = self.module.NormalDist()
|
||||||
|
for n, expected in [
|
||||||
|
(1, []),
|
||||||
|
(2, [0.0]),
|
||||||
|
(3, [-0.4307, 0.4307]),
|
||||||
|
(4 ,[-0.6745, 0.0, 0.6745]),
|
||||||
|
]:
|
||||||
|
actual = Z.quantiles(n=n)
|
||||||
|
self.assertTrue(all(math.isclose(e, a, abs_tol=0.0001)
|
||||||
|
for e, a in zip(expected, actual)))
|
||||||
|
|
||||||
def test_overlap(self):
|
def test_overlap(self):
|
||||||
NormalDist = self.module.NormalDist
|
NormalDist = self.module.NormalDist
|
||||||
|
|
||||||
|
@ -2612,6 +2605,8 @@ class TestNormalDist:
|
||||||
def test_properties(self):
|
def test_properties(self):
|
||||||
X = self.module.NormalDist(100, 15)
|
X = self.module.NormalDist(100, 15)
|
||||||
self.assertEqual(X.mean, 100)
|
self.assertEqual(X.mean, 100)
|
||||||
|
self.assertEqual(X.median, 100)
|
||||||
|
self.assertEqual(X.mode, 100)
|
||||||
self.assertEqual(X.stdev, 15)
|
self.assertEqual(X.stdev, 15)
|
||||||
self.assertEqual(X.variance, 225)
|
self.assertEqual(X.variance, 225)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue