mirror of https://github.com/python/cpython
Issue 26002 and 25974
patches by Upendra Kumar and Stefan Krah speed up median by using bisect, and general speedup for Decimals using as_integer_ratio
This commit is contained in:
parent
ad039f7548
commit
3b06e24352
|
@ -105,6 +105,7 @@ import math
|
|||
from fractions import Fraction
|
||||
from decimal import Decimal
|
||||
from itertools import groupby
|
||||
from bisect import bisect_left, bisect_right
|
||||
|
||||
|
||||
|
||||
|
@ -223,56 +224,26 @@ def _exact_ratio(x):
|
|||
# Optimise the common case of floats. We expect that the most often
|
||||
# used numeric type will be builtin floats, so try to make this as
|
||||
# fast as possible.
|
||||
if type(x) is float:
|
||||
if type(x) is float or type(x) is Decimal:
|
||||
return x.as_integer_ratio()
|
||||
try:
|
||||
# x may be an int, Fraction, or Integral ABC.
|
||||
return (x.numerator, x.denominator)
|
||||
except AttributeError:
|
||||
try:
|
||||
# x may be a float subclass.
|
||||
# x may be a float or Decimal subclass.
|
||||
return x.as_integer_ratio()
|
||||
except AttributeError:
|
||||
try:
|
||||
# x may be a Decimal.
|
||||
return _decimal_to_ratio(x)
|
||||
except AttributeError:
|
||||
# Just give up?
|
||||
pass
|
||||
# Just give up?
|
||||
pass
|
||||
except (OverflowError, ValueError):
|
||||
# float NAN or INF.
|
||||
assert not math.isfinite(x)
|
||||
assert not _isfinite(x)
|
||||
return (x, None)
|
||||
msg = "can't convert type '{}' to numerator/denominator"
|
||||
raise TypeError(msg.format(type(x).__name__))
|
||||
|
||||
|
||||
# FIXME This is faster than Fraction.from_decimal, but still too slow.
|
||||
def _decimal_to_ratio(d):
|
||||
"""Convert Decimal d to exact integer ratio (numerator, denominator).
|
||||
|
||||
>>> from decimal import Decimal
|
||||
>>> _decimal_to_ratio(Decimal("2.6"))
|
||||
(26, 10)
|
||||
|
||||
"""
|
||||
sign, digits, exp = d.as_tuple()
|
||||
if exp in ('F', 'n', 'N'): # INF, NAN, sNAN
|
||||
assert not d.is_finite()
|
||||
return (d, None)
|
||||
num = 0
|
||||
for digit in digits:
|
||||
num = num*10 + digit
|
||||
if exp < 0:
|
||||
den = 10**-exp
|
||||
else:
|
||||
num *= 10**exp
|
||||
den = 1
|
||||
if sign:
|
||||
num = -num
|
||||
return (num, den)
|
||||
|
||||
|
||||
def _convert(value, T):
|
||||
"""Convert value to given numeric type T."""
|
||||
if type(value) is T:
|
||||
|
@ -305,6 +276,21 @@ def _counts(data):
|
|||
return table
|
||||
|
||||
|
||||
def _find_lteq(a, x):
|
||||
'Locate the leftmost value exactly equal to x'
|
||||
i = bisect_left(a, x)
|
||||
if i != len(a) and a[i] == x:
|
||||
return i
|
||||
raise ValueError
|
||||
|
||||
|
||||
def _find_rteq(a, l, x):
|
||||
'Locate the rightmost value exactly equal to x'
|
||||
i = bisect_right(a, x, lo=l)
|
||||
if i != (len(a)+1) and a[i-1] == x:
|
||||
return i-1
|
||||
raise ValueError
|
||||
|
||||
# === Measures of central tendency (averages) ===
|
||||
|
||||
def mean(data):
|
||||
|
@ -442,9 +428,15 @@ def median_grouped(data, interval=1):
|
|||
except TypeError:
|
||||
# Mixed type. For now we just coerce to float.
|
||||
L = float(x) - float(interval)/2
|
||||
cf = data.index(x) # Number of values below the median interval.
|
||||
# FIXME The following line could be more efficient for big lists.
|
||||
f = data.count(x) # Number of data points in the median interval.
|
||||
|
||||
# Uses bisection search to search for x in data with log(n) time complexity
|
||||
# Find the position of leftmost occurence of x in data
|
||||
l1 = _find_lteq(data, x)
|
||||
# Find the position of rightmost occurence of x in data[l1...len(data)]
|
||||
# Assuming always l1 <= l2
|
||||
l2 = _find_rteq(data, l1, x)
|
||||
cf = l1
|
||||
f = l2 - l1 + 1
|
||||
return L + interval*(n/2 - cf)/f
|
||||
|
||||
|
||||
|
|
|
@ -699,13 +699,12 @@ class ExactRatioTest(unittest.TestCase):
|
|||
num, den = statistics._exact_ratio(x)
|
||||
self.assertEqual(x, num/den)
|
||||
|
||||
@unittest.skipIf(True, "temporarily disabled: see #25928")
|
||||
def test_decimal(self):
|
||||
D = Decimal
|
||||
_exact_ratio = statistics._exact_ratio
|
||||
self.assertEqual(_exact_ratio(D("0.125")), (125, 1000))
|
||||
self.assertEqual(_exact_ratio(D("12.345")), (12345, 1000))
|
||||
self.assertEqual(_exact_ratio(D("-1.98")), (-198, 100))
|
||||
self.assertEqual(_exact_ratio(D("0.125")), (1, 8))
|
||||
self.assertEqual(_exact_ratio(D("12.345")), (2469, 200))
|
||||
self.assertEqual(_exact_ratio(D("-1.98")), (-99, 50))
|
||||
|
||||
def test_inf(self):
|
||||
INF = float("INF")
|
||||
|
@ -731,7 +730,6 @@ class ExactRatioTest(unittest.TestCase):
|
|||
self.assertIs(ratio[1], None)
|
||||
self.assertEqual(type(ratio[0]), type(nan))
|
||||
|
||||
@unittest.skipIf(True, "temporarily disabled: see #25928")
|
||||
def test_decimal_nan(self):
|
||||
NAN = Decimal("NAN")
|
||||
sNAN = Decimal("sNAN")
|
||||
|
@ -745,18 +743,18 @@ class ExactRatioTest(unittest.TestCase):
|
|||
|
||||
|
||||
class DecimalToRatioTest(unittest.TestCase):
|
||||
# Test _decimal_to_ratio private function.
|
||||
# Test _exact_ratio private function.
|
||||
|
||||
def test_infinity(self):
|
||||
# Test that INFs are handled correctly.
|
||||
inf = Decimal('INF')
|
||||
self.assertEqual(statistics._decimal_to_ratio(inf), (inf, None))
|
||||
self.assertEqual(statistics._decimal_to_ratio(-inf), (-inf, None))
|
||||
self.assertEqual(statistics._exact_ratio(inf), (inf, None))
|
||||
self.assertEqual(statistics._exact_ratio(-inf), (-inf, None))
|
||||
|
||||
def test_nan(self):
|
||||
# Test that NANs are handled correctly.
|
||||
for nan in (Decimal('NAN'), Decimal('sNAN')):
|
||||
num, den = statistics._decimal_to_ratio(nan)
|
||||
num, den = statistics._exact_ratio(nan)
|
||||
# Because NANs always compare non-equal, we cannot use assertEqual.
|
||||
# Nor can we use an identity test, as we don't guarantee anything
|
||||
# about the object identity.
|
||||
|
@ -769,30 +767,30 @@ class DecimalToRatioTest(unittest.TestCase):
|
|||
for d in numbers:
|
||||
# First test positive decimals.
|
||||
assert d > 0
|
||||
num, den = statistics._decimal_to_ratio(d)
|
||||
num, den = statistics._exact_ratio(d)
|
||||
self.assertGreaterEqual(num, 0)
|
||||
self.assertGreater(den, 0)
|
||||
# Then test negative decimals.
|
||||
num, den = statistics._decimal_to_ratio(-d)
|
||||
num, den = statistics._exact_ratio(-d)
|
||||
self.assertLessEqual(num, 0)
|
||||
self.assertGreater(den, 0)
|
||||
|
||||
def test_negative_exponent(self):
|
||||
# Test result when the exponent is negative.
|
||||
t = statistics._decimal_to_ratio(Decimal("0.1234"))
|
||||
self.assertEqual(t, (1234, 10000))
|
||||
t = statistics._exact_ratio(Decimal("0.1234"))
|
||||
self.assertEqual(t, (617, 5000))
|
||||
|
||||
def test_positive_exponent(self):
|
||||
# Test results when the exponent is positive.
|
||||
t = statistics._decimal_to_ratio(Decimal("1.234e7"))
|
||||
t = statistics._exact_ratio(Decimal("1.234e7"))
|
||||
self.assertEqual(t, (12340000, 1))
|
||||
|
||||
def test_regression_20536(self):
|
||||
# Regression test for issue 20536.
|
||||
# See http://bugs.python.org/issue20536
|
||||
t = statistics._decimal_to_ratio(Decimal("1e2"))
|
||||
t = statistics._exact_ratio(Decimal("1e2"))
|
||||
self.assertEqual(t, (100, 1))
|
||||
t = statistics._decimal_to_ratio(Decimal("1.47e5"))
|
||||
t = statistics._exact_ratio(Decimal("1.47e5"))
|
||||
self.assertEqual(t, (147000, 1))
|
||||
|
||||
|
||||
|
@ -1260,7 +1258,6 @@ class SumSpecialValues(NumericTestCase):
|
|||
with decimal.localcontext(decimal.BasicContext):
|
||||
self.assertRaises(decimal.InvalidOperation, statistics._sum, data)
|
||||
|
||||
@unittest.skipIf(True, "temporarily disabled: see #25928")
|
||||
def test_decimal_snan_raises(self):
|
||||
# Adding sNAN should raise InvalidOperation.
|
||||
sNAN = Decimal('sNAN')
|
||||
|
|
Loading…
Reference in New Issue