Close #20481: Disallow mixed type input in statistics

The most appropriate coercion rules are not yet clear, so simply
disallowing mixed type input for 3.4.

(Committed on Steven's behalf)
This commit is contained in:
Nick Coghlan 2014-02-08 19:58:04 +10:00
parent 57e4127781
commit 73afe2a972
4 changed files with 66 additions and 76 deletions

View File

@ -20,6 +20,16 @@
This module provides functions for calculating mathematical statistics of
numeric (:class:`Real`-valued) data.
.. note::
Unless explicitly noted otherwise, these functions support :class:`int`,
:class:`float`, :class:`decimal.Decimal` and :class:`fractions.Fraction`.
Behaviour with other types (whether in the numeric tower or not) is
currently unsupported. Mixed types are also undefined and
implementation-dependent. If your input data consists of mixed types,
you may be able to use :func:`map` to ensure a consistent result, e.g.
``map(float, input_data)``.
Averages and measures of central location
-----------------------------------------

View File

@ -144,19 +144,31 @@ def _sum(data, start=0):
>>> _sum(data)
Decimal('0.6963')
Mixed types are currently treated as an error, except that int is
allowed.
"""
# We fail as soon as we reach a value that is not an int or the type of
# the first value which is not an int. E.g. _sum([int, int, float, int])
# is okay, but sum([int, int, float, Fraction]) is not.
allowed_types = set([int, type(start)])
n, d = _exact_ratio(start)
T = type(start)
partials = {d: n} # map {denominator: sum of numerators}
# Micro-optimizations.
coerce_types = _coerce_types
exact_ratio = _exact_ratio
partials_get = partials.get
# Add numerators for each denominator, and track the "current" type.
# Add numerators for each denominator.
for x in data:
T = _coerce_types(T, type(x))
_check_type(type(x), allowed_types)
n, d = exact_ratio(x)
partials[d] = partials_get(d, 0) + n
# Find the expected result type. If allowed_types has only one item, it
# will be int; if it has two, use the one which isn't int.
assert len(allowed_types) in (1, 2)
if len(allowed_types) == 1:
assert allowed_types.pop() is int
T = int
else:
T = (allowed_types - set([int])).pop()
if None in partials:
assert issubclass(T, (float, Decimal))
assert not math.isfinite(partials[None])
@ -172,6 +184,15 @@ def _sum(data, start=0):
return T(total)
def _check_type(T, allowed):
if T not in allowed:
if len(allowed) == 1:
allowed.add(T)
else:
types = ', '.join([t.__name__ for t in allowed] + [T.__name__])
raise TypeError("unsupported mixed types: %s" % types)
def _exact_ratio(x):
"""Convert Real number x exactly to (numerator, denominator) pair.
@ -228,44 +249,6 @@ def _decimal_to_ratio(d):
return (num, den)
def _coerce_types(T1, T2):
"""Coerce types T1 and T2 to a common type.
>>> _coerce_types(int, float)
<class 'float'>
Coercion is performed according to this table, where "N/A" means
that a TypeError exception is raised.
+----------+-----------+-----------+-----------+----------+
| | int | Fraction | Decimal | float |
+----------+-----------+-----------+-----------+----------+
| int | int | Fraction | Decimal | float |
| Fraction | Fraction | Fraction | N/A | float |
| Decimal | Decimal | N/A | Decimal | float |
| float | float | float | float | float |
+----------+-----------+-----------+-----------+----------+
Subclasses trump their parent class; two subclasses of the same
base class will be coerced to the second of the two.
"""
# Get the common/fast cases out of the way first.
if T1 is T2: return T1
if T1 is int: return T2
if T2 is int: return T1
# Subclasses trump their parent class.
if issubclass(T2, T1): return T2
if issubclass(T1, T2): return T1
# Floats trump everything else.
if issubclass(T2, float): return T2
if issubclass(T1, float): return T1
# Subclasses of the same base class give priority to the second.
if T1.__base__ is T2.__base__: return T2
# Otherwise, just give up.
raise TypeError('cannot coerce types %r and %r' % (T1, T2))
def _counts(data):
# Generate a table of sorted (value, frequency) pairs.
table = collections.Counter(iter(data)).most_common()

View File

@ -687,6 +687,26 @@ class DecimalToRatioTest(unittest.TestCase):
self.assertRaises(ValueError, statistics._decimal_to_ratio, d)
class CheckTypeTest(unittest.TestCase):
# Test _check_type private function.
def test_allowed(self):
# Test that a type which should be allowed is allowed.
allowed = set([int, float])
statistics._check_type(int, allowed)
statistics._check_type(float, allowed)
def test_not_allowed(self):
# Test that a type which should not be allowed raises.
allowed = set([int, float])
self.assertRaises(TypeError, statistics._check_type, Decimal, allowed)
def test_add_to_allowed(self):
# Test that a second type will be added to the allowed set.
allowed = set([int])
statistics._check_type(float, allowed)
self.assertEqual(allowed, set([int, float]))
# === Tests for public functions ===
@ -881,40 +901,11 @@ class TestSum(NumericTestCase, UnivariateCommonMixin, UnivariateTypeMixin):
self.assertRaises(TypeError, self.func, [1, 2, 3, b'999'])
def test_mixed_sum(self):
# Mixed sums are allowed.
# Careful here: order matters. Can't mix Fraction and Decimal directly,
# only after they're converted to float.
data = [1, 2, Fraction(1, 2), 3.0, Decimal("0.25")]
self.assertEqual(self.func(data), 6.75)
class SumInternalsTest(NumericTestCase):
# Test internals of the sum function.
def test_ignore_instance_float_method(self):
# Test that __float__ methods on data instances are ignored.
# Python typically calls __dunder__ methods on the class, not the
# instance. The ``sum`` implementation calls __float__ directly. To
# better match the behaviour of Python, we call it only on the class,
# not the instance. This test will fail if somebody "fixes" that code.
# Create a fake __float__ method.
def __float__(self):
raise AssertionError('test fails')
# Inject it into an instance.
class MyNumber(Fraction):
pass
x = MyNumber(3)
x.__float__ = types.MethodType(__float__, x)
# Check it works as expected.
self.assertRaises(AssertionError, x.__float__)
self.assertEqual(float(x), 3.0)
# And now test the function.
self.assertEqual(statistics._sum([1.0, 2.0, x, 4.0]), 10.0)
# Mixed input types are not (currently) allowed.
# Check that mixed data types fail.
self.assertRaises(TypeError, self.func, [1, 2.0, Fraction(1, 2)])
# And so does mixed start argument.
self.assertRaises(TypeError, self.func, [1, 2.0], Decimal(1))
class SumTortureTest(NumericTestCase):

View File

@ -24,6 +24,12 @@ Core and Builtins
Library
-------
- Issue #20481: For at least Python 3.4, the statistics module will require
that all inputs for a single operation be of a single consistent type, or
else a mixed of ints and a single other consistent type. This avoids
some interoperability issues that arose with the previous approach of
coercing to a suitable common type.
- Issue #20478: the statistics module now treats collections.Counter inputs
like any other iterable.