mirror of https://github.com/python/cpython
Close #20481: Disallow mixed type input in statistics
The most appropriate coercion rules are not yet clear, so simply disallowing mixed type input for 3.4. (Committed on Steven's behalf)
This commit is contained in:
parent
57e4127781
commit
73afe2a972
|
@ -20,6 +20,16 @@
|
|||
This module provides functions for calculating mathematical statistics of
|
||||
numeric (:class:`Real`-valued) data.
|
||||
|
||||
.. note::
|
||||
|
||||
Unless explicitly noted otherwise, these functions support :class:`int`,
|
||||
:class:`float`, :class:`decimal.Decimal` and :class:`fractions.Fraction`.
|
||||
Behaviour with other types (whether in the numeric tower or not) is
|
||||
currently unsupported. Mixed types are also undefined and
|
||||
implementation-dependent. If your input data consists of mixed types,
|
||||
you may be able to use :func:`map` to ensure a consistent result, e.g.
|
||||
``map(float, input_data)``.
|
||||
|
||||
Averages and measures of central location
|
||||
-----------------------------------------
|
||||
|
||||
|
|
|
@ -144,19 +144,31 @@ def _sum(data, start=0):
|
|||
>>> _sum(data)
|
||||
Decimal('0.6963')
|
||||
|
||||
Mixed types are currently treated as an error, except that int is
|
||||
allowed.
|
||||
"""
|
||||
# We fail as soon as we reach a value that is not an int or the type of
|
||||
# the first value which is not an int. E.g. _sum([int, int, float, int])
|
||||
# is okay, but sum([int, int, float, Fraction]) is not.
|
||||
allowed_types = set([int, type(start)])
|
||||
n, d = _exact_ratio(start)
|
||||
T = type(start)
|
||||
partials = {d: n} # map {denominator: sum of numerators}
|
||||
# Micro-optimizations.
|
||||
coerce_types = _coerce_types
|
||||
exact_ratio = _exact_ratio
|
||||
partials_get = partials.get
|
||||
# Add numerators for each denominator, and track the "current" type.
|
||||
# Add numerators for each denominator.
|
||||
for x in data:
|
||||
T = _coerce_types(T, type(x))
|
||||
_check_type(type(x), allowed_types)
|
||||
n, d = exact_ratio(x)
|
||||
partials[d] = partials_get(d, 0) + n
|
||||
# Find the expected result type. If allowed_types has only one item, it
|
||||
# will be int; if it has two, use the one which isn't int.
|
||||
assert len(allowed_types) in (1, 2)
|
||||
if len(allowed_types) == 1:
|
||||
assert allowed_types.pop() is int
|
||||
T = int
|
||||
else:
|
||||
T = (allowed_types - set([int])).pop()
|
||||
if None in partials:
|
||||
assert issubclass(T, (float, Decimal))
|
||||
assert not math.isfinite(partials[None])
|
||||
|
@ -172,6 +184,15 @@ def _sum(data, start=0):
|
|||
return T(total)
|
||||
|
||||
|
||||
def _check_type(T, allowed):
|
||||
if T not in allowed:
|
||||
if len(allowed) == 1:
|
||||
allowed.add(T)
|
||||
else:
|
||||
types = ', '.join([t.__name__ for t in allowed] + [T.__name__])
|
||||
raise TypeError("unsupported mixed types: %s" % types)
|
||||
|
||||
|
||||
def _exact_ratio(x):
|
||||
"""Convert Real number x exactly to (numerator, denominator) pair.
|
||||
|
||||
|
@ -228,44 +249,6 @@ def _decimal_to_ratio(d):
|
|||
return (num, den)
|
||||
|
||||
|
||||
def _coerce_types(T1, T2):
|
||||
"""Coerce types T1 and T2 to a common type.
|
||||
|
||||
>>> _coerce_types(int, float)
|
||||
<class 'float'>
|
||||
|
||||
Coercion is performed according to this table, where "N/A" means
|
||||
that a TypeError exception is raised.
|
||||
|
||||
+----------+-----------+-----------+-----------+----------+
|
||||
| | int | Fraction | Decimal | float |
|
||||
+----------+-----------+-----------+-----------+----------+
|
||||
| int | int | Fraction | Decimal | float |
|
||||
| Fraction | Fraction | Fraction | N/A | float |
|
||||
| Decimal | Decimal | N/A | Decimal | float |
|
||||
| float | float | float | float | float |
|
||||
+----------+-----------+-----------+-----------+----------+
|
||||
|
||||
Subclasses trump their parent class; two subclasses of the same
|
||||
base class will be coerced to the second of the two.
|
||||
|
||||
"""
|
||||
# Get the common/fast cases out of the way first.
|
||||
if T1 is T2: return T1
|
||||
if T1 is int: return T2
|
||||
if T2 is int: return T1
|
||||
# Subclasses trump their parent class.
|
||||
if issubclass(T2, T1): return T2
|
||||
if issubclass(T1, T2): return T1
|
||||
# Floats trump everything else.
|
||||
if issubclass(T2, float): return T2
|
||||
if issubclass(T1, float): return T1
|
||||
# Subclasses of the same base class give priority to the second.
|
||||
if T1.__base__ is T2.__base__: return T2
|
||||
# Otherwise, just give up.
|
||||
raise TypeError('cannot coerce types %r and %r' % (T1, T2))
|
||||
|
||||
|
||||
def _counts(data):
|
||||
# Generate a table of sorted (value, frequency) pairs.
|
||||
table = collections.Counter(iter(data)).most_common()
|
||||
|
|
|
@ -687,6 +687,26 @@ class DecimalToRatioTest(unittest.TestCase):
|
|||
self.assertRaises(ValueError, statistics._decimal_to_ratio, d)
|
||||
|
||||
|
||||
class CheckTypeTest(unittest.TestCase):
|
||||
# Test _check_type private function.
|
||||
|
||||
def test_allowed(self):
|
||||
# Test that a type which should be allowed is allowed.
|
||||
allowed = set([int, float])
|
||||
statistics._check_type(int, allowed)
|
||||
statistics._check_type(float, allowed)
|
||||
|
||||
def test_not_allowed(self):
|
||||
# Test that a type which should not be allowed raises.
|
||||
allowed = set([int, float])
|
||||
self.assertRaises(TypeError, statistics._check_type, Decimal, allowed)
|
||||
|
||||
def test_add_to_allowed(self):
|
||||
# Test that a second type will be added to the allowed set.
|
||||
allowed = set([int])
|
||||
statistics._check_type(float, allowed)
|
||||
self.assertEqual(allowed, set([int, float]))
|
||||
|
||||
|
||||
# === Tests for public functions ===
|
||||
|
||||
|
@ -881,40 +901,11 @@ class TestSum(NumericTestCase, UnivariateCommonMixin, UnivariateTypeMixin):
|
|||
self.assertRaises(TypeError, self.func, [1, 2, 3, b'999'])
|
||||
|
||||
def test_mixed_sum(self):
|
||||
# Mixed sums are allowed.
|
||||
|
||||
# Careful here: order matters. Can't mix Fraction and Decimal directly,
|
||||
# only after they're converted to float.
|
||||
data = [1, 2, Fraction(1, 2), 3.0, Decimal("0.25")]
|
||||
self.assertEqual(self.func(data), 6.75)
|
||||
|
||||
|
||||
class SumInternalsTest(NumericTestCase):
|
||||
# Test internals of the sum function.
|
||||
|
||||
def test_ignore_instance_float_method(self):
|
||||
# Test that __float__ methods on data instances are ignored.
|
||||
|
||||
# Python typically calls __dunder__ methods on the class, not the
|
||||
# instance. The ``sum`` implementation calls __float__ directly. To
|
||||
# better match the behaviour of Python, we call it only on the class,
|
||||
# not the instance. This test will fail if somebody "fixes" that code.
|
||||
|
||||
# Create a fake __float__ method.
|
||||
def __float__(self):
|
||||
raise AssertionError('test fails')
|
||||
|
||||
# Inject it into an instance.
|
||||
class MyNumber(Fraction):
|
||||
pass
|
||||
x = MyNumber(3)
|
||||
x.__float__ = types.MethodType(__float__, x)
|
||||
|
||||
# Check it works as expected.
|
||||
self.assertRaises(AssertionError, x.__float__)
|
||||
self.assertEqual(float(x), 3.0)
|
||||
# And now test the function.
|
||||
self.assertEqual(statistics._sum([1.0, 2.0, x, 4.0]), 10.0)
|
||||
# Mixed input types are not (currently) allowed.
|
||||
# Check that mixed data types fail.
|
||||
self.assertRaises(TypeError, self.func, [1, 2.0, Fraction(1, 2)])
|
||||
# And so does mixed start argument.
|
||||
self.assertRaises(TypeError, self.func, [1, 2.0], Decimal(1))
|
||||
|
||||
|
||||
class SumTortureTest(NumericTestCase):
|
||||
|
|
|
@ -24,6 +24,12 @@ Core and Builtins
|
|||
Library
|
||||
-------
|
||||
|
||||
- Issue #20481: For at least Python 3.4, the statistics module will require
|
||||
that all inputs for a single operation be of a single consistent type, or
|
||||
else a mixed of ints and a single other consistent type. This avoids
|
||||
some interoperability issues that arose with the previous approach of
|
||||
coercing to a suitable common type.
|
||||
|
||||
- Issue #20478: the statistics module now treats collections.Counter inputs
|
||||
like any other iterable.
|
||||
|
||||
|
|
Loading…
Reference in New Issue