From 73afe2a972e30a3e0f87401be2fa38c67e2cb964 Mon Sep 17 00:00:00 2001 From: Nick Coghlan Date: Sat, 8 Feb 2014 19:58:04 +1000 Subject: [PATCH] Close #20481: Disallow mixed type input in statistics The most appropriate coercion rules are not yet clear, so simply disallowing mixed type input for 3.4. (Committed on Steven's behalf) --- Doc/library/statistics.rst | 10 ++++++ Lib/statistics.py | 67 ++++++++++++++----------------------- Lib/test/test_statistics.py | 59 ++++++++++++++------------------ Misc/NEWS | 6 ++++ 4 files changed, 66 insertions(+), 76 deletions(-) diff --git a/Doc/library/statistics.rst b/Doc/library/statistics.rst index e6c5959d2c5..4e7783872a4 100644 --- a/Doc/library/statistics.rst +++ b/Doc/library/statistics.rst @@ -20,6 +20,16 @@ This module provides functions for calculating mathematical statistics of numeric (:class:`Real`-valued) data. +.. note:: + + Unless explicitly noted otherwise, these functions support :class:`int`, + :class:`float`, :class:`decimal.Decimal` and :class:`fractions.Fraction`. + Behaviour with other types (whether in the numeric tower or not) is + currently unsupported. Mixed types are also undefined and + implementation-dependent. If your input data consists of mixed types, + you may be able to use :func:`map` to ensure a consistent result, e.g. + ``map(float, input_data)``. + Averages and measures of central location ----------------------------------------- diff --git a/Lib/statistics.py b/Lib/statistics.py index 9359ed71e51..e1dfbd49317 100644 --- a/Lib/statistics.py +++ b/Lib/statistics.py @@ -144,19 +144,31 @@ def _sum(data, start=0): >>> _sum(data) Decimal('0.6963') + Mixed types are currently treated as an error, except that int is + allowed. """ + # We fail as soon as we reach a value that is not an int or the type of + # the first value which is not an int. E.g. _sum([int, int, float, int]) + # is okay, but sum([int, int, float, Fraction]) is not. + allowed_types = set([int, type(start)]) n, d = _exact_ratio(start) - T = type(start) partials = {d: n} # map {denominator: sum of numerators} # Micro-optimizations. - coerce_types = _coerce_types exact_ratio = _exact_ratio partials_get = partials.get - # Add numerators for each denominator, and track the "current" type. + # Add numerators for each denominator. for x in data: - T = _coerce_types(T, type(x)) + _check_type(type(x), allowed_types) n, d = exact_ratio(x) partials[d] = partials_get(d, 0) + n + # Find the expected result type. If allowed_types has only one item, it + # will be int; if it has two, use the one which isn't int. + assert len(allowed_types) in (1, 2) + if len(allowed_types) == 1: + assert allowed_types.pop() is int + T = int + else: + T = (allowed_types - set([int])).pop() if None in partials: assert issubclass(T, (float, Decimal)) assert not math.isfinite(partials[None]) @@ -172,6 +184,15 @@ def _sum(data, start=0): return T(total) +def _check_type(T, allowed): + if T not in allowed: + if len(allowed) == 1: + allowed.add(T) + else: + types = ', '.join([t.__name__ for t in allowed] + [T.__name__]) + raise TypeError("unsupported mixed types: %s" % types) + + def _exact_ratio(x): """Convert Real number x exactly to (numerator, denominator) pair. @@ -228,44 +249,6 @@ def _decimal_to_ratio(d): return (num, den) -def _coerce_types(T1, T2): - """Coerce types T1 and T2 to a common type. - - >>> _coerce_types(int, float) - - - Coercion is performed according to this table, where "N/A" means - that a TypeError exception is raised. - - +----------+-----------+-----------+-----------+----------+ - | | int | Fraction | Decimal | float | - +----------+-----------+-----------+-----------+----------+ - | int | int | Fraction | Decimal | float | - | Fraction | Fraction | Fraction | N/A | float | - | Decimal | Decimal | N/A | Decimal | float | - | float | float | float | float | float | - +----------+-----------+-----------+-----------+----------+ - - Subclasses trump their parent class; two subclasses of the same - base class will be coerced to the second of the two. - - """ - # Get the common/fast cases out of the way first. - if T1 is T2: return T1 - if T1 is int: return T2 - if T2 is int: return T1 - # Subclasses trump their parent class. - if issubclass(T2, T1): return T2 - if issubclass(T1, T2): return T1 - # Floats trump everything else. - if issubclass(T2, float): return T2 - if issubclass(T1, float): return T1 - # Subclasses of the same base class give priority to the second. - if T1.__base__ is T2.__base__: return T2 - # Otherwise, just give up. - raise TypeError('cannot coerce types %r and %r' % (T1, T2)) - - def _counts(data): # Generate a table of sorted (value, frequency) pairs. table = collections.Counter(iter(data)).most_common() diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py index 6db821fc6e3..49b8597a91b 100644 --- a/Lib/test/test_statistics.py +++ b/Lib/test/test_statistics.py @@ -687,6 +687,26 @@ class DecimalToRatioTest(unittest.TestCase): self.assertRaises(ValueError, statistics._decimal_to_ratio, d) +class CheckTypeTest(unittest.TestCase): + # Test _check_type private function. + + def test_allowed(self): + # Test that a type which should be allowed is allowed. + allowed = set([int, float]) + statistics._check_type(int, allowed) + statistics._check_type(float, allowed) + + def test_not_allowed(self): + # Test that a type which should not be allowed raises. + allowed = set([int, float]) + self.assertRaises(TypeError, statistics._check_type, Decimal, allowed) + + def test_add_to_allowed(self): + # Test that a second type will be added to the allowed set. + allowed = set([int]) + statistics._check_type(float, allowed) + self.assertEqual(allowed, set([int, float])) + # === Tests for public functions === @@ -881,40 +901,11 @@ class TestSum(NumericTestCase, UnivariateCommonMixin, UnivariateTypeMixin): self.assertRaises(TypeError, self.func, [1, 2, 3, b'999']) def test_mixed_sum(self): - # Mixed sums are allowed. - - # Careful here: order matters. Can't mix Fraction and Decimal directly, - # only after they're converted to float. - data = [1, 2, Fraction(1, 2), 3.0, Decimal("0.25")] - self.assertEqual(self.func(data), 6.75) - - -class SumInternalsTest(NumericTestCase): - # Test internals of the sum function. - - def test_ignore_instance_float_method(self): - # Test that __float__ methods on data instances are ignored. - - # Python typically calls __dunder__ methods on the class, not the - # instance. The ``sum`` implementation calls __float__ directly. To - # better match the behaviour of Python, we call it only on the class, - # not the instance. This test will fail if somebody "fixes" that code. - - # Create a fake __float__ method. - def __float__(self): - raise AssertionError('test fails') - - # Inject it into an instance. - class MyNumber(Fraction): - pass - x = MyNumber(3) - x.__float__ = types.MethodType(__float__, x) - - # Check it works as expected. - self.assertRaises(AssertionError, x.__float__) - self.assertEqual(float(x), 3.0) - # And now test the function. - self.assertEqual(statistics._sum([1.0, 2.0, x, 4.0]), 10.0) + # Mixed input types are not (currently) allowed. + # Check that mixed data types fail. + self.assertRaises(TypeError, self.func, [1, 2.0, Fraction(1, 2)]) + # And so does mixed start argument. + self.assertRaises(TypeError, self.func, [1, 2.0], Decimal(1)) class SumTortureTest(NumericTestCase): diff --git a/Misc/NEWS b/Misc/NEWS index 9f8f43c7cbc..c0da6aa5517 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -24,6 +24,12 @@ Core and Builtins Library ------- +- Issue #20481: For at least Python 3.4, the statistics module will require + that all inputs for a single operation be of a single consistent type, or + else a mixed of ints and a single other consistent type. This avoids + some interoperability issues that arose with the previous approach of + coercing to a suitable common type. + - Issue #20478: the statistics module now treats collections.Counter inputs like any other iterable.