diff --git a/Lib/test/floating_points.txt b/Lib/test/floating_points.txt index 70d21ea7140..539073d19d8 100644 --- a/Lib/test/floating_points.txt +++ b/Lib/test/floating_points.txt @@ -1019,3 +1019,10 @@ +43723334984997307E-26 +10182419849537963E-24 -93501703572661982E-26 + +# A value that caused a crash in debug builds for Python >= 2.7, 3.1 +# See http://bugs.python.org/issue7632 +2183167012312112312312.23538020374420446192e-370 + +# Another value designed to test a corner case of Python's strtod code. +0.99999999999999999999999999999999999999999e+23 diff --git a/Lib/test/test_float.py b/Lib/test/test_float.py index e2d1ea0a7cd..fd0f410f918 100644 --- a/Lib/test/test_float.py +++ b/Lib/test/test_float.py @@ -7,6 +7,7 @@ import math from math import isinf, isnan, copysign, ldexp import operator import random, fractions +import re INF = float("inf") NAN = float("nan") @@ -20,6 +21,74 @@ requires_IEEE_754 = unittest.skipUnless( test_dir = os.path.dirname(__file__) or os.curdir format_testfile = os.path.join(test_dir, 'formatfloat_testcases.txt') +finite_decimal_parser = re.compile(r""" # A numeric string consists of: + (?P[-+])? # an optional sign, followed by + (?=\d|\.\d) # a number with at least one digit + (?P\d*) # having a (possibly empty) integer part + (?:\.(?P\d*))? # followed by an optional fractional part + (?:E(?P[-+]?\d+))? # and an optional exponent + \Z +""", re.VERBOSE | re.IGNORECASE | re.UNICODE).match + +# Pure Python version of correctly rounded string->float conversion. +# Avoids any use of floating-point by returning the result as a hex string. +def strtod(s, mant_dig=53, min_exp = -1021, max_exp = 1024): + """Convert a finite decimal string to a hex string representing an + IEEE 754 binary64 float. Return 'inf' or '-inf' on overflow. + This function makes no use of floating-point arithmetic at any + stage.""" + + # parse string into a pair of integers 'a' and 'b' such that + # abs(decimal value) = a/b, and a boolean 'negative'. + m = finite_decimal_parser(s) + if m is None: + raise ValueError('invalid numeric string') + fraction = m.group('frac') or '' + intpart = int(m.group('int') + fraction) + exp = int(m.group('exp') or '0') - len(fraction) + negative = m.group('sign') == '-' + a, b = intpart*10**max(exp, 0), 10**max(0, -exp) + + # quick return for zeros + if not a: + return '-0x0.0p+0' if negative else '0x0.0p+0' + + # compute exponent e for result; may be one too small in the case + # that the rounded value of a/b lies in a different binade from a/b + d = a.bit_length() - b.bit_length() + d += (a >> d if d >= 0 else a << -d) >= b + e = max(d, min_exp) - mant_dig + + # approximate a/b by number of the form q * 2**e; adjust e if necessary + a, b = a << max(-e, 0), b << max(e, 0) + q, r = divmod(a, b) + if 2*r > b or 2*r == b and q & 1: + q += 1 + if q.bit_length() == mant_dig+1: + q //= 2 + e += 1 + + # double check that (q, e) has the right form + assert q.bit_length() <= mant_dig and e >= min_exp - mant_dig + assert q.bit_length() == mant_dig or e == min_exp - mant_dig + + # check for overflow and underflow + if e + q.bit_length() > max_exp: + return '-inf' if negative else 'inf' + if not q: + return '-0x0.0p+0' if negative else '0x0.0p+0' + + # for hex representation, shift so # bits after point is a multiple of 4 + hexdigs = 1 + (mant_dig-2)//4 + shift = 3 - (mant_dig-2)%4 + q, e = q << shift, e - shift + return '{}0x{:x}.{:0{}x}p{:+d}'.format( + '-' if negative else '', + q // 16**hexdigs, + q % 16**hexdigs, + hexdigs, + e + 4*hexdigs) + class GeneralFloatCases(unittest.TestCase): def test_float(self): @@ -1263,6 +1332,38 @@ class HexFloatTestCase(unittest.TestCase): else: self.identical(x, fromHex(toHex(x))) +class StrtodTestCase(unittest.TestCase): + def check_string(self, s): + expected = strtod(s) + try: + fs = float(s) + except OverflowError: + got = '-inf' if s[0] == '-' else 'inf' + else: + got = fs.hex() + self.assertEqual(expected, got, + "Incorrectly rounded str->float conversion for " + "{}: expected {}, got {}".format(s, expected, got)) + + @unittest.skipUnless(getattr(sys, 'float_repr_style', '') == 'short', + "applies only when using short float repr style") + def test_bug7632(self): + # check a few particular values that gave incorrectly rounded + # results with previous versions of dtoa.c + test_strings = [ + '94393431193180696942841837085033647913224148539854e-358', + '12579816049008305546974391768996369464963024663104e-357', + '17489628565202117263145367596028389348922981857013e-357', + '18487398785991994634182916638542680759613590482273e-357', + '32002864200581033134358724675198044527469366773928e-358', + '73608278998966969345824653500136787876436005957953e-358', + '64774478836417299491718435234611299336288082136054e-358', + '13704940134126574534878641876947980878824688451169e-357', + '46697445774047060960624497964425416610480524760471e-358', + ] + for s in test_strings: + self.check_string(s) + def test_main(): support.run_unittest( @@ -1275,6 +1376,7 @@ def test_main(): RoundTestCase, InfNanTest, HexFloatTestCase, + StrtodTestCase, ) if __name__ == '__main__': diff --git a/Misc/NEWS b/Misc/NEWS index db895b0f0e2..c7c943f8f63 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -12,6 +12,11 @@ What's New in Python 3.2 Alpha 1? Core and Builtins ----------------- +- Issue #7632: Fix a crash in dtoa.c that occurred in debug builds + when parsing certain long numeric strings corresponding to subnormal + values. Also fix a number of bugs in dtoa.c that could lead to + incorrectly rounded results when converting strings to floats. + - The __complex__ method is now looked up on the class of instances to make it consistent with other special methods. diff --git a/Python/dtoa.c b/Python/dtoa.c index 4f8bab78609..1fe20f4f53b 100644 --- a/Python/dtoa.c +++ b/Python/dtoa.c @@ -200,10 +200,11 @@ typedef union { double d; ULong L[2]; } U; #define STRTOD_DIGLIM 40 #endif -#ifdef DIGLIM_DEBUG -extern int strtod_diglim; -#else -#define strtod_diglim STRTOD_DIGLIM +/* maximum permitted exponent value for strtod; exponents larger than + MAX_ABS_EXP in absolute value get truncated to +-MAX_ABS_EXP. MAX_ABS_EXP + should fit into an int. */ +#ifndef MAX_ABS_EXP +#define MAX_ABS_EXP 19999U #endif /* The following definition of Storeinc is appropriate for MIPS processors. @@ -269,8 +270,7 @@ extern int strtod_diglim; typedef struct BCinfo BCinfo; struct BCinfo { - int dp0, dp1, dplen, dsign, e0, inexact; - int nd, nd0, rounding, scale, uflchk; + int dp0, dp1, dplen, dsign, e0, nd, nd0, scale; }; #define FFFFFFFF 0xffffffffUL @@ -1130,6 +1130,26 @@ quorem(Bigint *b, Bigint *S) return q; } +/* version of ulp(x) that takes bc.scale into account. + + Assuming that x is finite and nonzero, and x / 2^bc.scale is exactly + representable as a double, sulp(x) is equivalent to 2^bc.scale * ulp(x / + 2^bc.scale). */ + +static double +sulp(U *x, BCinfo *bc) +{ + U u; + + if (bc->scale && 2*P + 1 - ((word0(x) & Exp_mask) >> Exp_shift) > 0) { + /* rv/2^bc->scale is subnormal */ + word0(&u) = (P+2)*Exp_msk1; + word1(&u) = 0; + return u.d; + } + else + return ulp(x); +} /* return 0 on success, -1 on failure */ @@ -1142,7 +1162,7 @@ bigcomp(U *rv, const char *s0, BCinfo *bc) dsign = bc->dsign; nd = bc->nd; nd0 = bc->nd0; - p5 = nd + bc->e0 - 1; + p5 = nd + bc->e0; speccase = 0; if (rv->d == 0.) { /* special case: value near underflow-to-zero */ /* threshold was rounded to zero */ @@ -1227,17 +1247,21 @@ bigcomp(U *rv, const char *s0, BCinfo *bc) } } - /* Now b/d = exactly half-way between the two floating-point values */ - /* on either side of the input string. Compute first digit of b/d. */ - - if (!(dig = quorem(b,d))) { - b = multadd(b, 10, 0); /* very unlikely */ - if (b == NULL) { - Bfree(d); - return -1; - } - dig = quorem(b,d); + /* Now 10*b/d = exactly half-way between the two floating-point values + on either side of the input string. If b >= d, round down. */ + if (cmp(b, d) >= 0) { + dd = -1; + goto ret; } + + /* Compute first digit of 10*b/d. */ + b = multadd(b, 10, 0); + if (b == NULL) { + Bfree(d); + return -1; + } + dig = quorem(b, d); + assert(dig < 10); /* Compare b/d with s0 */ @@ -1285,12 +1309,12 @@ bigcomp(U *rv, const char *s0, BCinfo *bc) else if (dd < 0) { if (!dsign) /* does not happen for round-near */ retlow1: - dval(rv) -= ulp(rv); + dval(rv) -= sulp(rv, bc); } else if (dd > 0) { if (dsign) { rethi1: - dval(rv) += ulp(rv); + dval(rv) += sulp(rv, bc); } } else { @@ -1312,13 +1336,12 @@ _Py_dg_strtod(const char *s00, char **se) int esign, i, j, k, nd, nd0, nf, nz, nz0, sign; const char *s, *s0, *s1; double aadj, aadj1; - Long L; U aadj2, adj, rv, rv0; - ULong y, z; + ULong y, z, L; BCinfo bc; Bigint *bb, *bb1, *bd, *bd0, *bs, *delta; - sign = nz0 = nz = bc.dplen = bc.uflchk = 0; + sign = nz0 = nz = bc.dplen = 0; dval(&rv) = 0.; for(s = s00;;s++) switch(*s) { case '-': @@ -1413,11 +1436,11 @@ _Py_dg_strtod(const char *s00, char **se) s1 = s; while((c = *++s) >= '0' && c <= '9') L = 10*L + c - '0'; - if (s - s1 > 8 || L > 19999) + if (s - s1 > 8 || L > MAX_ABS_EXP) /* Avoid confusion from exponents * so large that e might overflow. */ - e = 19999; /* safe for 16 bit ints */ + e = (int)MAX_ABS_EXP; /* safe for 16 bit ints */ else e = (int)L; if (esign) @@ -1555,11 +1578,11 @@ _Py_dg_strtod(const char *s00, char **se) /* Put digits into bd: true value = bd * 10^e */ bc.nd = nd; - bc.nd0 = nd0; /* Only needed if nd > strtod_diglim, but done here */ + bc.nd0 = nd0; /* Only needed if nd > STRTOD_DIGLIM, but done here */ /* to silence an erroneous warning about bc.nd0 */ /* possibly not being initialized. */ - if (nd > strtod_diglim) { - /* ASSERT(strtod_diglim >= 18); 18 == one more than the */ + if (nd > STRTOD_DIGLIM) { + /* ASSERT(STRTOD_DIGLIM >= 18); 18 == one more than the */ /* minimum number of decimal digits to distinguish double values */ /* in IEEE arithmetic. */ i = j = 18; @@ -1767,10 +1790,8 @@ _Py_dg_strtod(const char *s00, char **se) /* accept rv */ break; /* rv = smallest denormal */ - if (bc.nd >nd) { - bc.uflchk = 1; + if (bc.nd >nd) break; - } goto undfl; } } @@ -1786,10 +1807,8 @@ _Py_dg_strtod(const char *s00, char **se) else { dval(&rv) -= ulp(&rv); if (!dval(&rv)) { - if (bc.nd >nd) { - bc.uflchk = 1; + if (bc.nd >nd) break; - } goto undfl; } } @@ -1801,10 +1820,8 @@ _Py_dg_strtod(const char *s00, char **se) aadj = aadj1 = 1.; else if (word1(&rv) || word0(&rv) & Bndry_mask) { if (word1(&rv) == Tiny1 && !word0(&rv)) { - if (bc.nd >nd) { - bc.uflchk = 1; + if (bc.nd >nd) break; - } goto undfl; } aadj = 1.;