Fix inconsistent return type for statistics median_grouped() gh-92531 (#92533)

This commit is contained in:
Raymond Hettinger 2022-05-09 02:08:41 -05:00 committed by GitHub
parent 5bc2390229
commit e01eeb7b4b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 22 additions and 14 deletions

View File

@ -611,7 +611,7 @@ def median_high(data):
return data[n // 2]
def median_grouped(data, interval=1):
def median_grouped(data, interval=1.0):
"""Estimates the median for numeric data binned around the midpoints
of consecutive, fixed-width intervals.
@ -650,35 +650,34 @@ def median_grouped(data, interval=1):
by exact multiples of *interval*. This is essential for getting a
correct result. The function does not check this precondition.
Inputs may be any numeric type that can be coerced to a float during
the interpolation step.
"""
data = sorted(data)
n = len(data)
if n == 0:
if not n:
raise StatisticsError("no median for empty data")
elif n == 1:
return data[0]
# Find the value at the midpoint. Remember this corresponds to the
# midpoint of the class interval.
x = data[n // 2]
# Generate a clear error message for non-numeric data
for obj in (x, interval):
if isinstance(obj, (str, bytes)):
raise TypeError(f'expected a number but got {obj!r}')
# Using O(log n) bisection, find where all the x values occur in the data.
# All x will lie within data[i:j].
i = bisect_left(data, x)
j = bisect_right(data, x, lo=i)
# Coerce to floats, raising a TypeError if not possible
try:
interval = float(interval)
x = float(x)
except ValueError:
raise TypeError(f'Value cannot be converted to a float')
# Interpolate the median using the formula found at:
# https://www.cuemath.com/data/median-of-grouped-data/
try:
L = x - interval / 2 # The lower limit of the median interval.
except TypeError:
# Coerce mixed types to float.
L = float(x) - float(interval) / 2
L = x - interval / 2.0 # Lower limit of the median interval
cf = i # Cumulative frequency of the preceding interval
f = j - i # Number of elements in the median internal
return L + interval * (n / 2 - cf) / f

View File

@ -1742,6 +1742,12 @@ class TestMedianGrouped(TestMedian):
data = [x]*count
self.assertEqual(self.func(data), float(x))
def test_single_value(self):
# Override method from AverageMixin.
# Average of a single value is the value as a float.
for x in (23, 42.5, 1.3e15, Fraction(15, 19), Decimal('0.28')):
self.assertEqual(self.func([x]), float(x))
def test_odd_fractions(self):
# Test median_grouped works with an odd number of Fractions.
F = Fraction

View File

@ -0,0 +1,3 @@
The statistics.median_grouped() function now always return a float.
Formerly, it did not convert the input type when for sequences of length
one.