Fixed issue #25177, problems with the mean of very small and very large numbers.
This commit is contained in:
parent
ee1a0e4b8c
commit
40a841bcb9
|
@ -104,6 +104,8 @@ import math
|
|||
|
||||
from fractions import Fraction
|
||||
from decimal import Decimal
|
||||
from itertools import groupby
|
||||
|
||||
|
||||
|
||||
# === Exceptions ===
|
||||
|
@ -115,86 +117,102 @@ class StatisticsError(ValueError):
|
|||
# === Private utilities ===
|
||||
|
||||
def _sum(data, start=0):
|
||||
"""_sum(data [, start]) -> value
|
||||
"""_sum(data [, start]) -> (type, sum, count)
|
||||
|
||||
Return a high-precision sum of the given numeric data. If optional
|
||||
argument ``start`` is given, it is added to the total. If ``data`` is
|
||||
empty, ``start`` (defaulting to 0) is returned.
|
||||
Return a high-precision sum of the given numeric data as a fraction,
|
||||
together with the type to be converted to and the count of items.
|
||||
|
||||
If optional argument ``start`` is given, it is added to the total.
|
||||
If ``data`` is empty, ``start`` (defaulting to 0) is returned.
|
||||
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
>>> _sum([3, 2.25, 4.5, -0.5, 1.0], 0.75)
|
||||
11.0
|
||||
(<class 'float'>, Fraction(11, 1), 5)
|
||||
|
||||
Some sources of round-off error will be avoided:
|
||||
|
||||
>>> _sum([1e50, 1, -1e50] * 1000) # Built-in sum returns zero.
|
||||
1000.0
|
||||
(<class 'float'>, Fraction(1000, 1), 3000)
|
||||
|
||||
Fractions and Decimals are also supported:
|
||||
|
||||
>>> from fractions import Fraction as F
|
||||
>>> _sum([F(2, 3), F(7, 5), F(1, 4), F(5, 6)])
|
||||
Fraction(63, 20)
|
||||
(<class 'fractions.Fraction'>, Fraction(63, 20), 4)
|
||||
|
||||
>>> from decimal import Decimal as D
|
||||
>>> data = [D("0.1375"), D("0.2108"), D("0.3061"), D("0.0419")]
|
||||
>>> _sum(data)
|
||||
Decimal('0.6963')
|
||||
(<class 'decimal.Decimal'>, Fraction(6963, 10000), 4)
|
||||
|
||||
Mixed types are currently treated as an error, except that int is
|
||||
allowed.
|
||||
"""
|
||||
# We fail as soon as we reach a value that is not an int or the type of
|
||||
# the first value which is not an int. E.g. _sum([int, int, float, int])
|
||||
# is okay, but sum([int, int, float, Fraction]) is not.
|
||||
allowed_types = {int, type(start)}
|
||||
count = 0
|
||||
n, d = _exact_ratio(start)
|
||||
partials = {d: n} # map {denominator: sum of numerators}
|
||||
# Micro-optimizations.
|
||||
exact_ratio = _exact_ratio
|
||||
partials = {d: n}
|
||||
partials_get = partials.get
|
||||
# Add numerators for each denominator.
|
||||
for x in data:
|
||||
_check_type(type(x), allowed_types)
|
||||
n, d = exact_ratio(x)
|
||||
partials[d] = partials_get(d, 0) + n
|
||||
# Find the expected result type. If allowed_types has only one item, it
|
||||
# will be int; if it has two, use the one which isn't int.
|
||||
assert len(allowed_types) in (1, 2)
|
||||
if len(allowed_types) == 1:
|
||||
assert allowed_types.pop() is int
|
||||
T = int
|
||||
else:
|
||||
T = (allowed_types - {int}).pop()
|
||||
T = _coerce(int, type(start))
|
||||
for typ, values in groupby(data, type):
|
||||
T = _coerce(T, typ) # or raise TypeError
|
||||
for n,d in map(_exact_ratio, values):
|
||||
count += 1
|
||||
partials[d] = partials_get(d, 0) + n
|
||||
if None in partials:
|
||||
assert issubclass(T, (float, Decimal))
|
||||
assert not math.isfinite(partials[None])
|
||||
return T(partials[None])
|
||||
total = Fraction()
|
||||
for d, n in sorted(partials.items()):
|
||||
total += Fraction(n, d)
|
||||
if issubclass(T, int):
|
||||
assert total.denominator == 1
|
||||
return T(total.numerator)
|
||||
if issubclass(T, Decimal):
|
||||
return T(total.numerator)/total.denominator
|
||||
return T(total)
|
||||
# The sum will be a NAN or INF. We can ignore all the finite
|
||||
# partials, and just look at this special one.
|
||||
total = partials[None]
|
||||
assert not _isfinite(total)
|
||||
else:
|
||||
# Sum all the partial sums using builtin sum.
|
||||
# FIXME is this faster if we sum them in order of the denominator?
|
||||
total = sum(Fraction(n, d) for d, n in sorted(partials.items()))
|
||||
return (T, total, count)
|
||||
|
||||
|
||||
def _check_type(T, allowed):
|
||||
if T not in allowed:
|
||||
if len(allowed) == 1:
|
||||
allowed.add(T)
|
||||
else:
|
||||
types = ', '.join([t.__name__ for t in allowed] + [T.__name__])
|
||||
raise TypeError("unsupported mixed types: %s" % types)
|
||||
def _isfinite(x):
|
||||
try:
|
||||
return x.is_finite() # Likely a Decimal.
|
||||
except AttributeError:
|
||||
return math.isfinite(x) # Coerces to float first.
|
||||
|
||||
|
||||
def _coerce(T, S):
|
||||
"""Coerce types T and S to a common type, or raise TypeError.
|
||||
|
||||
Coercion rules are currently an implementation detail. See the CoerceTest
|
||||
test class in test_statistics for details.
|
||||
"""
|
||||
# See http://bugs.python.org/issue24068.
|
||||
assert T is not bool, "initial type T is bool"
|
||||
# If the types are the same, no need to coerce anything. Put this
|
||||
# first, so that the usual case (no coercion needed) happens as soon
|
||||
# as possible.
|
||||
if T is S: return T
|
||||
# Mixed int & other coerce to the other type.
|
||||
if S is int or S is bool: return T
|
||||
if T is int: return S
|
||||
# If one is a (strict) subclass of the other, coerce to the subclass.
|
||||
if issubclass(S, T): return S
|
||||
if issubclass(T, S): return T
|
||||
# Ints coerce to the other type.
|
||||
if issubclass(T, int): return S
|
||||
if issubclass(S, int): return T
|
||||
# Mixed fraction & float coerces to float (or float subclass).
|
||||
if issubclass(T, Fraction) and issubclass(S, float):
|
||||
return S
|
||||
if issubclass(T, float) and issubclass(S, Fraction):
|
||||
return T
|
||||
# Any other combination is disallowed.
|
||||
msg = "don't know how to coerce %s and %s"
|
||||
raise TypeError(msg % (T.__name__, S.__name__))
|
||||
|
||||
|
||||
def _exact_ratio(x):
|
||||
"""Convert Real number x exactly to (numerator, denominator) pair.
|
||||
"""Return Real number x to exact (numerator, denominator) pair.
|
||||
|
||||
>>> _exact_ratio(0.25)
|
||||
(1, 4)
|
||||
|
@ -202,29 +220,31 @@ def _exact_ratio(x):
|
|||
x is expected to be an int, Fraction, Decimal or float.
|
||||
"""
|
||||
try:
|
||||
# Optimise the common case of floats. We expect that the most often
|
||||
# used numeric type will be builtin floats, so try to make this as
|
||||
# fast as possible.
|
||||
if type(x) is float:
|
||||
return x.as_integer_ratio()
|
||||
try:
|
||||
# int, Fraction
|
||||
# x may be an int, Fraction, or Integral ABC.
|
||||
return (x.numerator, x.denominator)
|
||||
except AttributeError:
|
||||
# float
|
||||
try:
|
||||
# x may be a float subclass.
|
||||
return x.as_integer_ratio()
|
||||
except AttributeError:
|
||||
# Decimal
|
||||
try:
|
||||
# x may be a Decimal.
|
||||
return _decimal_to_ratio(x)
|
||||
except AttributeError:
|
||||
msg = "can't convert type '{}' to numerator/denominator"
|
||||
raise TypeError(msg.format(type(x).__name__)) from None
|
||||
# Just give up?
|
||||
pass
|
||||
except (OverflowError, ValueError):
|
||||
# INF or NAN
|
||||
if __debug__:
|
||||
# Decimal signalling NANs cannot be converted to float :-(
|
||||
if isinstance(x, Decimal):
|
||||
assert not x.is_finite()
|
||||
else:
|
||||
assert not math.isfinite(x)
|
||||
# float NAN or INF.
|
||||
assert not math.isfinite(x)
|
||||
return (x, None)
|
||||
msg = "can't convert type '{}' to numerator/denominator"
|
||||
raise TypeError(msg.format(type(x).__name__))
|
||||
|
||||
|
||||
# FIXME This is faster than Fraction.from_decimal, but still too slow.
|
||||
|
@ -239,7 +259,7 @@ def _decimal_to_ratio(d):
|
|||
sign, digits, exp = d.as_tuple()
|
||||
if exp in ('F', 'n', 'N'): # INF, NAN, sNAN
|
||||
assert not d.is_finite()
|
||||
raise ValueError
|
||||
return (d, None)
|
||||
num = 0
|
||||
for digit in digits:
|
||||
num = num*10 + digit
|
||||
|
@ -253,6 +273,24 @@ def _decimal_to_ratio(d):
|
|||
return (num, den)
|
||||
|
||||
|
||||
def _convert(value, T):
|
||||
"""Convert value to given numeric type T."""
|
||||
if type(value) is T:
|
||||
# This covers the cases where T is Fraction, or where value is
|
||||
# a NAN or INF (Decimal or float).
|
||||
return value
|
||||
if issubclass(T, int) and value.denominator != 1:
|
||||
T = float
|
||||
try:
|
||||
# FIXME: what do we do if this overflows?
|
||||
return T(value)
|
||||
except TypeError:
|
||||
if issubclass(T, Decimal):
|
||||
return T(value.numerator)/T(value.denominator)
|
||||
else:
|
||||
raise
|
||||
|
||||
|
||||
def _counts(data):
|
||||
# Generate a table of sorted (value, frequency) pairs.
|
||||
table = collections.Counter(iter(data)).most_common()
|
||||
|
@ -290,7 +328,9 @@ def mean(data):
|
|||
n = len(data)
|
||||
if n < 1:
|
||||
raise StatisticsError('mean requires at least one data point')
|
||||
return _sum(data)/n
|
||||
T, total, count = _sum(data)
|
||||
assert count == n
|
||||
return _convert(total/n, T)
|
||||
|
||||
|
||||
# FIXME: investigate ways to calculate medians without sorting? Quickselect?
|
||||
|
@ -460,12 +500,14 @@ def _ss(data, c=None):
|
|||
"""
|
||||
if c is None:
|
||||
c = mean(data)
|
||||
ss = _sum((x-c)**2 for x in data)
|
||||
T, total, count = _sum((x-c)**2 for x in data)
|
||||
# The following sum should mathematically equal zero, but due to rounding
|
||||
# error may not.
|
||||
ss -= _sum((x-c) for x in data)**2/len(data)
|
||||
assert not ss < 0, 'negative sum of square deviations: %f' % ss
|
||||
return ss
|
||||
U, total2, count2 = _sum((x-c) for x in data)
|
||||
assert T == U and count == count2
|
||||
total -= total2**2/len(data)
|
||||
assert not total < 0, 'negative sum of square deviations: %f' % total
|
||||
return (T, total)
|
||||
|
||||
|
||||
def variance(data, xbar=None):
|
||||
|
@ -511,8 +553,8 @@ def variance(data, xbar=None):
|
|||
n = len(data)
|
||||
if n < 2:
|
||||
raise StatisticsError('variance requires at least two data points')
|
||||
ss = _ss(data, xbar)
|
||||
return ss/(n-1)
|
||||
T, ss = _ss(data, xbar)
|
||||
return _convert(ss/(n-1), T)
|
||||
|
||||
|
||||
def pvariance(data, mu=None):
|
||||
|
@ -560,7 +602,8 @@ def pvariance(data, mu=None):
|
|||
if n < 1:
|
||||
raise StatisticsError('pvariance requires at least one data point')
|
||||
ss = _ss(data, mu)
|
||||
return ss/n
|
||||
T, ss = _ss(data, mu)
|
||||
return _convert(ss/n, T)
|
||||
|
||||
|
||||
def stdev(data, xbar=None):
|
||||
|
|
|
@ -21,6 +21,37 @@ import statistics
|
|||
|
||||
# === Helper functions and class ===
|
||||
|
||||
def _nan_equal(a, b):
|
||||
"""Return True if a and b are both the same kind of NAN.
|
||||
|
||||
>>> _nan_equal(Decimal('NAN'), Decimal('NAN'))
|
||||
True
|
||||
>>> _nan_equal(Decimal('sNAN'), Decimal('sNAN'))
|
||||
True
|
||||
>>> _nan_equal(Decimal('NAN'), Decimal('sNAN'))
|
||||
False
|
||||
>>> _nan_equal(Decimal(42), Decimal('NAN'))
|
||||
False
|
||||
|
||||
>>> _nan_equal(float('NAN'), float('NAN'))
|
||||
True
|
||||
>>> _nan_equal(float('NAN'), 0.5)
|
||||
False
|
||||
|
||||
>>> _nan_equal(float('NAN'), Decimal('NAN'))
|
||||
False
|
||||
|
||||
NAN payloads are not compared.
|
||||
"""
|
||||
if type(a) is not type(b):
|
||||
return False
|
||||
if isinstance(a, float):
|
||||
return math.isnan(a) and math.isnan(b)
|
||||
aexp = a.as_tuple()[2]
|
||||
bexp = b.as_tuple()[2]
|
||||
return (aexp == bexp) and (aexp in ('n', 'N')) # Both NAN or both sNAN.
|
||||
|
||||
|
||||
def _calc_errors(actual, expected):
|
||||
"""Return the absolute and relative errors between two numbers.
|
||||
|
||||
|
@ -675,15 +706,60 @@ class ExactRatioTest(unittest.TestCase):
|
|||
self.assertEqual(_exact_ratio(D("12.345")), (12345, 1000))
|
||||
self.assertEqual(_exact_ratio(D("-1.98")), (-198, 100))
|
||||
|
||||
def test_inf(self):
|
||||
INF = float("INF")
|
||||
class MyFloat(float):
|
||||
pass
|
||||
class MyDecimal(Decimal):
|
||||
pass
|
||||
for inf in (INF, -INF):
|
||||
for type_ in (float, MyFloat, Decimal, MyDecimal):
|
||||
x = type_(inf)
|
||||
ratio = statistics._exact_ratio(x)
|
||||
self.assertEqual(ratio, (x, None))
|
||||
self.assertEqual(type(ratio[0]), type_)
|
||||
self.assertTrue(math.isinf(ratio[0]))
|
||||
|
||||
def test_float_nan(self):
|
||||
NAN = float("NAN")
|
||||
class MyFloat(float):
|
||||
pass
|
||||
for nan in (NAN, MyFloat(NAN)):
|
||||
ratio = statistics._exact_ratio(nan)
|
||||
self.assertTrue(math.isnan(ratio[0]))
|
||||
self.assertIs(ratio[1], None)
|
||||
self.assertEqual(type(ratio[0]), type(nan))
|
||||
|
||||
def test_decimal_nan(self):
|
||||
NAN = Decimal("NAN")
|
||||
sNAN = Decimal("sNAN")
|
||||
class MyDecimal(Decimal):
|
||||
pass
|
||||
for nan in (NAN, MyDecimal(NAN), sNAN, MyDecimal(sNAN)):
|
||||
ratio = statistics._exact_ratio(nan)
|
||||
self.assertTrue(_nan_equal(ratio[0], nan))
|
||||
self.assertIs(ratio[1], None)
|
||||
self.assertEqual(type(ratio[0]), type(nan))
|
||||
|
||||
|
||||
class DecimalToRatioTest(unittest.TestCase):
|
||||
# Test _decimal_to_ratio private function.
|
||||
|
||||
def testSpecialsRaise(self):
|
||||
# Test that NANs and INFs raise ValueError.
|
||||
# Non-special values are covered by _exact_ratio above.
|
||||
for d in (Decimal('NAN'), Decimal('sNAN'), Decimal('INF')):
|
||||
self.assertRaises(ValueError, statistics._decimal_to_ratio, d)
|
||||
def test_infinity(self):
|
||||
# Test that INFs are handled correctly.
|
||||
inf = Decimal('INF')
|
||||
self.assertEqual(statistics._decimal_to_ratio(inf), (inf, None))
|
||||
self.assertEqual(statistics._decimal_to_ratio(-inf), (-inf, None))
|
||||
|
||||
def test_nan(self):
|
||||
# Test that NANs are handled correctly.
|
||||
for nan in (Decimal('NAN'), Decimal('sNAN')):
|
||||
num, den = statistics._decimal_to_ratio(nan)
|
||||
# Because NANs always compare non-equal, we cannot use assertEqual.
|
||||
# Nor can we use an identity test, as we don't guarantee anything
|
||||
# about the object identity.
|
||||
self.assertTrue(_nan_equal(num, nan))
|
||||
self.assertIs(den, None)
|
||||
|
||||
def test_sign(self):
|
||||
# Test sign is calculated correctly.
|
||||
|
@ -718,25 +794,181 @@ class DecimalToRatioTest(unittest.TestCase):
|
|||
self.assertEqual(t, (147000, 1))
|
||||
|
||||
|
||||
class CheckTypeTest(unittest.TestCase):
|
||||
# Test _check_type private function.
|
||||
class IsFiniteTest(unittest.TestCase):
|
||||
# Test _isfinite private function.
|
||||
|
||||
def test_allowed(self):
|
||||
# Test that a type which should be allowed is allowed.
|
||||
allowed = set([int, float])
|
||||
statistics._check_type(int, allowed)
|
||||
statistics._check_type(float, allowed)
|
||||
def test_finite(self):
|
||||
# Test that finite numbers are recognised as finite.
|
||||
for x in (5, Fraction(1, 3), 2.5, Decimal("5.5")):
|
||||
self.assertTrue(statistics._isfinite(x))
|
||||
|
||||
def test_not_allowed(self):
|
||||
# Test that a type which should not be allowed raises.
|
||||
allowed = set([int, float])
|
||||
self.assertRaises(TypeError, statistics._check_type, Decimal, allowed)
|
||||
def test_infinity(self):
|
||||
# Test that INFs are not recognised as finite.
|
||||
for x in (float("inf"), Decimal("inf")):
|
||||
self.assertFalse(statistics._isfinite(x))
|
||||
|
||||
def test_add_to_allowed(self):
|
||||
# Test that a second type will be added to the allowed set.
|
||||
allowed = set([int])
|
||||
statistics._check_type(float, allowed)
|
||||
self.assertEqual(allowed, set([int, float]))
|
||||
def test_nan(self):
|
||||
# Test that NANs are not recognised as finite.
|
||||
for x in (float("nan"), Decimal("NAN"), Decimal("sNAN")):
|
||||
self.assertFalse(statistics._isfinite(x))
|
||||
|
||||
|
||||
class CoerceTest(unittest.TestCase):
|
||||
# Test that private function _coerce correctly deals with types.
|
||||
|
||||
# The coercion rules are currently an implementation detail, although at
|
||||
# some point that should change. The tests and comments here define the
|
||||
# correct implementation.
|
||||
|
||||
# Pre-conditions of _coerce:
|
||||
#
|
||||
# - The first time _sum calls _coerce, the
|
||||
# - coerce(T, S) will never be called with bool as the first argument;
|
||||
# this is a pre-condition, guarded with an assertion.
|
||||
|
||||
#
|
||||
# - coerce(T, T) will always return T; we assume T is a valid numeric
|
||||
# type. Violate this assumption at your own risk.
|
||||
#
|
||||
# - Apart from as above, bool is treated as if it were actually int.
|
||||
#
|
||||
# - coerce(int, X) and coerce(X, int) return X.
|
||||
# -
|
||||
def test_bool(self):
|
||||
# bool is somewhat special, due to the pre-condition that it is
|
||||
# never given as the first argument to _coerce, and that it cannot
|
||||
# be subclassed. So we test it specially.
|
||||
for T in (int, float, Fraction, Decimal):
|
||||
self.assertIs(statistics._coerce(T, bool), T)
|
||||
class MyClass(T): pass
|
||||
self.assertIs(statistics._coerce(MyClass, bool), MyClass)
|
||||
|
||||
def assertCoerceTo(self, A, B):
|
||||
"""Assert that type A coerces to B."""
|
||||
self.assertIs(statistics._coerce(A, B), B)
|
||||
self.assertIs(statistics._coerce(B, A), B)
|
||||
|
||||
def check_coerce_to(self, A, B):
|
||||
"""Checks that type A coerces to B, including subclasses."""
|
||||
# Assert that type A is coerced to B.
|
||||
self.assertCoerceTo(A, B)
|
||||
# Subclasses of A are also coerced to B.
|
||||
class SubclassOfA(A): pass
|
||||
self.assertCoerceTo(SubclassOfA, B)
|
||||
# A, and subclasses of A, are coerced to subclasses of B.
|
||||
class SubclassOfB(B): pass
|
||||
self.assertCoerceTo(A, SubclassOfB)
|
||||
self.assertCoerceTo(SubclassOfA, SubclassOfB)
|
||||
|
||||
def assertCoerceRaises(self, A, B):
|
||||
"""Assert that coercing A to B, or vice versa, raises TypeError."""
|
||||
self.assertRaises(TypeError, statistics._coerce, (A, B))
|
||||
self.assertRaises(TypeError, statistics._coerce, (B, A))
|
||||
|
||||
def check_type_coercions(self, T):
|
||||
"""Check that type T coerces correctly with subclasses of itself."""
|
||||
assert T is not bool
|
||||
# Coercing a type with itself returns the same type.
|
||||
self.assertIs(statistics._coerce(T, T), T)
|
||||
# Coercing a type with a subclass of itself returns the subclass.
|
||||
class U(T): pass
|
||||
class V(T): pass
|
||||
class W(U): pass
|
||||
for typ in (U, V, W):
|
||||
self.assertCoerceTo(T, typ)
|
||||
self.assertCoerceTo(U, W)
|
||||
# Coercing two subclasses that aren't parent/child is an error.
|
||||
self.assertCoerceRaises(U, V)
|
||||
self.assertCoerceRaises(V, W)
|
||||
|
||||
def test_int(self):
|
||||
# Check that int coerces correctly.
|
||||
self.check_type_coercions(int)
|
||||
for typ in (float, Fraction, Decimal):
|
||||
self.check_coerce_to(int, typ)
|
||||
|
||||
def test_fraction(self):
|
||||
# Check that Fraction coerces correctly.
|
||||
self.check_type_coercions(Fraction)
|
||||
self.check_coerce_to(Fraction, float)
|
||||
|
||||
def test_decimal(self):
|
||||
# Check that Decimal coerces correctly.
|
||||
self.check_type_coercions(Decimal)
|
||||
|
||||
def test_float(self):
|
||||
# Check that float coerces correctly.
|
||||
self.check_type_coercions(float)
|
||||
|
||||
def test_non_numeric_types(self):
|
||||
for bad_type in (str, list, type(None), tuple, dict):
|
||||
for good_type in (int, float, Fraction, Decimal):
|
||||
self.assertCoerceRaises(good_type, bad_type)
|
||||
|
||||
def test_incompatible_types(self):
|
||||
# Test that incompatible types raise.
|
||||
for T in (float, Fraction):
|
||||
class MySubclass(T): pass
|
||||
self.assertCoerceRaises(T, Decimal)
|
||||
self.assertCoerceRaises(MySubclass, Decimal)
|
||||
|
||||
|
||||
class ConvertTest(unittest.TestCase):
|
||||
# Test private _convert function.
|
||||
|
||||
def check_exact_equal(self, x, y):
|
||||
"""Check that x equals y, and has the same type as well."""
|
||||
self.assertEqual(x, y)
|
||||
self.assertIs(type(x), type(y))
|
||||
|
||||
def test_int(self):
|
||||
# Test conversions to int.
|
||||
x = statistics._convert(Fraction(71), int)
|
||||
self.check_exact_equal(x, 71)
|
||||
class MyInt(int): pass
|
||||
x = statistics._convert(Fraction(17), MyInt)
|
||||
self.check_exact_equal(x, MyInt(17))
|
||||
|
||||
def test_fraction(self):
|
||||
# Test conversions to Fraction.
|
||||
x = statistics._convert(Fraction(95, 99), Fraction)
|
||||
self.check_exact_equal(x, Fraction(95, 99))
|
||||
class MyFraction(Fraction):
|
||||
def __truediv__(self, other):
|
||||
return self.__class__(super().__truediv__(other))
|
||||
x = statistics._convert(Fraction(71, 13), MyFraction)
|
||||
self.check_exact_equal(x, MyFraction(71, 13))
|
||||
|
||||
def test_float(self):
|
||||
# Test conversions to float.
|
||||
x = statistics._convert(Fraction(-1, 2), float)
|
||||
self.check_exact_equal(x, -0.5)
|
||||
class MyFloat(float):
|
||||
def __truediv__(self, other):
|
||||
return self.__class__(super().__truediv__(other))
|
||||
x = statistics._convert(Fraction(9, 8), MyFloat)
|
||||
self.check_exact_equal(x, MyFloat(1.125))
|
||||
|
||||
def test_decimal(self):
|
||||
# Test conversions to Decimal.
|
||||
x = statistics._convert(Fraction(1, 40), Decimal)
|
||||
self.check_exact_equal(x, Decimal("0.025"))
|
||||
class MyDecimal(Decimal):
|
||||
def __truediv__(self, other):
|
||||
return self.__class__(super().__truediv__(other))
|
||||
x = statistics._convert(Fraction(-15, 16), MyDecimal)
|
||||
self.check_exact_equal(x, MyDecimal("-0.9375"))
|
||||
|
||||
def test_inf(self):
|
||||
for INF in (float('inf'), Decimal('inf')):
|
||||
for inf in (INF, -INF):
|
||||
x = statistics._convert(inf, type(inf))
|
||||
self.check_exact_equal(x, inf)
|
||||
|
||||
def test_nan(self):
|
||||
for nan in (float('nan'), Decimal('NAN'), Decimal('sNAN')):
|
||||
x = statistics._convert(nan, type(nan))
|
||||
self.assertTrue(_nan_equal(x, nan))
|
||||
|
||||
|
||||
# === Tests for public functions ===
|
||||
|
@ -874,52 +1106,71 @@ class UnivariateTypeMixin:
|
|||
self.assertIs(type(result), kind)
|
||||
|
||||
|
||||
class TestSum(NumericTestCase, UnivariateCommonMixin, UnivariateTypeMixin):
|
||||
class TestSumCommon(UnivariateCommonMixin, UnivariateTypeMixin):
|
||||
# Common test cases for statistics._sum() function.
|
||||
|
||||
# This test suite looks only at the numeric value returned by _sum,
|
||||
# after conversion to the appropriate type.
|
||||
def setUp(self):
|
||||
def simplified_sum(*args):
|
||||
T, value, n = statistics._sum(*args)
|
||||
return statistics._coerce(value, T)
|
||||
self.func = simplified_sum
|
||||
|
||||
|
||||
class TestSum(NumericTestCase):
|
||||
# Test cases for statistics._sum() function.
|
||||
|
||||
# These tests look at the entire three value tuple returned by _sum.
|
||||
|
||||
def setUp(self):
|
||||
self.func = statistics._sum
|
||||
|
||||
def test_empty_data(self):
|
||||
# Override test for empty data.
|
||||
for data in ([], (), iter([])):
|
||||
self.assertEqual(self.func(data), 0)
|
||||
self.assertEqual(self.func(data, 23), 23)
|
||||
self.assertEqual(self.func(data, 2.3), 2.3)
|
||||
self.assertEqual(self.func(data), (int, Fraction(0), 0))
|
||||
self.assertEqual(self.func(data, 23), (int, Fraction(23), 0))
|
||||
self.assertEqual(self.func(data, 2.3), (float, Fraction(2.3), 0))
|
||||
|
||||
def test_ints(self):
|
||||
self.assertEqual(self.func([1, 5, 3, -4, -8, 20, 42, 1]), 60)
|
||||
self.assertEqual(self.func([4, 2, 3, -8, 7], 1000), 1008)
|
||||
self.assertEqual(self.func([1, 5, 3, -4, -8, 20, 42, 1]),
|
||||
(int, Fraction(60), 8))
|
||||
self.assertEqual(self.func([4, 2, 3, -8, 7], 1000),
|
||||
(int, Fraction(1008), 5))
|
||||
|
||||
def test_floats(self):
|
||||
self.assertEqual(self.func([0.25]*20), 5.0)
|
||||
self.assertEqual(self.func([0.125, 0.25, 0.5, 0.75], 1.5), 3.125)
|
||||
self.assertEqual(self.func([0.25]*20),
|
||||
(float, Fraction(5.0), 20))
|
||||
self.assertEqual(self.func([0.125, 0.25, 0.5, 0.75], 1.5),
|
||||
(float, Fraction(3.125), 4))
|
||||
|
||||
def test_fractions(self):
|
||||
F = Fraction
|
||||
self.assertEqual(self.func([Fraction(1, 1000)]*500), Fraction(1, 2))
|
||||
self.assertEqual(self.func([Fraction(1, 1000)]*500),
|
||||
(Fraction, Fraction(1, 2), 500))
|
||||
|
||||
def test_decimals(self):
|
||||
D = Decimal
|
||||
data = [D("0.001"), D("5.246"), D("1.702"), D("-0.025"),
|
||||
D("3.974"), D("2.328"), D("4.617"), D("2.843"),
|
||||
]
|
||||
self.assertEqual(self.func(data), Decimal("20.686"))
|
||||
self.assertEqual(self.func(data),
|
||||
(Decimal, Decimal("20.686"), 8))
|
||||
|
||||
def test_compare_with_math_fsum(self):
|
||||
# Compare with the math.fsum function.
|
||||
# Ideally we ought to get the exact same result, but sometimes
|
||||
# we differ by a very slight amount :-(
|
||||
data = [random.uniform(-100, 1000) for _ in range(1000)]
|
||||
self.assertApproxEqual(self.func(data), math.fsum(data), rel=2e-16)
|
||||
self.assertApproxEqual(float(self.func(data)[1]), math.fsum(data), rel=2e-16)
|
||||
|
||||
def test_start_argument(self):
|
||||
# Test that the optional start argument works correctly.
|
||||
data = [random.uniform(1, 1000) for _ in range(100)]
|
||||
t = self.func(data)
|
||||
self.assertEqual(t+42, self.func(data, 42))
|
||||
self.assertEqual(t-23, self.func(data, -23))
|
||||
self.assertEqual(t+1e20, self.func(data, 1e20))
|
||||
t = self.func(data)[1]
|
||||
self.assertEqual(t+42, self.func(data, 42)[1])
|
||||
self.assertEqual(t-23, self.func(data, -23)[1])
|
||||
self.assertEqual(t+Fraction(1e20), self.func(data, 1e20)[1])
|
||||
|
||||
def test_strings_fail(self):
|
||||
# Sum of strings should fail.
|
||||
|
@ -934,7 +1185,7 @@ class TestSum(NumericTestCase, UnivariateCommonMixin, UnivariateTypeMixin):
|
|||
def test_mixed_sum(self):
|
||||
# Mixed input types are not (currently) allowed.
|
||||
# Check that mixed data types fail.
|
||||
self.assertRaises(TypeError, self.func, [1, 2.0, Fraction(1, 2)])
|
||||
self.assertRaises(TypeError, self.func, [1, 2.0, Decimal(1)])
|
||||
# And so does mixed start argument.
|
||||
self.assertRaises(TypeError, self.func, [1, 2.0], Decimal(1))
|
||||
|
||||
|
@ -942,11 +1193,14 @@ class TestSum(NumericTestCase, UnivariateCommonMixin, UnivariateTypeMixin):
|
|||
class SumTortureTest(NumericTestCase):
|
||||
def test_torture(self):
|
||||
# Tim Peters' torture test for sum, and variants of same.
|
||||
self.assertEqual(statistics._sum([1, 1e100, 1, -1e100]*10000), 20000.0)
|
||||
self.assertEqual(statistics._sum([1e100, 1, 1, -1e100]*10000), 20000.0)
|
||||
self.assertApproxEqual(
|
||||
statistics._sum([1e-100, 1, 1e-100, -1]*10000), 2.0e-96, rel=5e-16
|
||||
)
|
||||
self.assertEqual(statistics._sum([1, 1e100, 1, -1e100]*10000),
|
||||
(float, Fraction(20000.0), 40000))
|
||||
self.assertEqual(statistics._sum([1e100, 1, 1, -1e100]*10000),
|
||||
(float, Fraction(20000.0), 40000))
|
||||
T, num, count = statistics._sum([1e-100, 1, 1e-100, -1]*10000)
|
||||
self.assertIs(T, float)
|
||||
self.assertEqual(count, 40000)
|
||||
self.assertApproxEqual(float(num), 2.0e-96, rel=5e-16)
|
||||
|
||||
|
||||
class SumSpecialValues(NumericTestCase):
|
||||
|
@ -955,7 +1209,7 @@ class SumSpecialValues(NumericTestCase):
|
|||
def test_nan(self):
|
||||
for type_ in (float, Decimal):
|
||||
nan = type_('nan')
|
||||
result = statistics._sum([1, nan, 2])
|
||||
result = statistics._sum([1, nan, 2])[1]
|
||||
self.assertIs(type(result), type_)
|
||||
self.assertTrue(math.isnan(result))
|
||||
|
||||
|
@ -968,10 +1222,10 @@ class SumSpecialValues(NumericTestCase):
|
|||
|
||||
def do_test_inf(self, inf):
|
||||
# Adding a single infinity gives infinity.
|
||||
result = statistics._sum([1, 2, inf, 3])
|
||||
result = statistics._sum([1, 2, inf, 3])[1]
|
||||
self.check_infinity(result, inf)
|
||||
# Adding two infinities of the same sign also gives infinity.
|
||||
result = statistics._sum([1, 2, inf, 3, inf, 4])
|
||||
result = statistics._sum([1, 2, inf, 3, inf, 4])[1]
|
||||
self.check_infinity(result, inf)
|
||||
|
||||
def test_float_inf(self):
|
||||
|
@ -987,7 +1241,7 @@ class SumSpecialValues(NumericTestCase):
|
|||
def test_float_mismatched_infs(self):
|
||||
# Test that adding two infinities of opposite sign gives a NAN.
|
||||
inf = float('inf')
|
||||
result = statistics._sum([1, 2, inf, 3, -inf, 4])
|
||||
result = statistics._sum([1, 2, inf, 3, -inf, 4])[1]
|
||||
self.assertTrue(math.isnan(result))
|
||||
|
||||
def test_decimal_extendedcontext_mismatched_infs_to_nan(self):
|
||||
|
@ -995,7 +1249,7 @@ class SumSpecialValues(NumericTestCase):
|
|||
inf = Decimal('inf')
|
||||
data = [1, 2, inf, 3, -inf, 4]
|
||||
with decimal.localcontext(decimal.ExtendedContext):
|
||||
self.assertTrue(math.isnan(statistics._sum(data)))
|
||||
self.assertTrue(math.isnan(statistics._sum(data)[1]))
|
||||
|
||||
def test_decimal_basiccontext_mismatched_infs_to_nan(self):
|
||||
# Test adding Decimal INFs with opposite sign raises InvalidOperation.
|
||||
|
@ -1111,6 +1365,19 @@ class TestMean(NumericTestCase, AverageMixin, UnivariateTypeMixin):
|
|||
d = Decimal('1e4')
|
||||
self.assertEqual(statistics.mean([d]), d)
|
||||
|
||||
def test_regression_25177(self):
|
||||
# Regression test for issue 25177.
|
||||
# Ensure very big and very small floats don't overflow.
|
||||
# See http://bugs.python.org/issue25177.
|
||||
self.assertEqual(statistics.mean(
|
||||
[8.988465674311579e+307, 8.98846567431158e+307]),
|
||||
8.98846567431158e+307)
|
||||
big = 8.98846567431158e+307
|
||||
tiny = 5e-324
|
||||
for n in (2, 3, 5, 200):
|
||||
self.assertEqual(statistics.mean([big]*n), big)
|
||||
self.assertEqual(statistics.mean([tiny]*n), tiny)
|
||||
|
||||
|
||||
class TestMedian(NumericTestCase, AverageMixin):
|
||||
# Common tests for median and all median.* functions.
|
||||
|
|
|
@ -20,6 +20,10 @@ Core and Builtins
|
|||
Library
|
||||
-------
|
||||
|
||||
- Issue #25177: Fixed problem with the mean of very small and very large
|
||||
numbers. As a side effect, statistics.mean and statistics.variance should
|
||||
be significantly faster.
|
||||
|
||||
- Issue #25718: Fixed copying object with state with boolean value is false.
|
||||
|
||||
- Issue #10131: Fixed deep copying of minidom documents. Based on patch
|
||||
|
|
Loading…
Reference in New Issue