From 45b6365974b6611c3dc1c2e48c55d36ca0e500d9 Mon Sep 17 00:00:00 2001 From: Mark Dickinson Date: Sat, 16 Jan 2010 18:10:25 +0000 Subject: [PATCH] Merged revisions 77519,77530,77533 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk ........ r77519 | mark.dickinson | 2010-01-16 10:44:00 +0000 (Sat, 16 Jan 2010) | 5 lines Issue #7632: Fix a serious wrong output bug for string -> float conversion. Also remove some now unused variables, and add comments clarifying the possible outputs of the parsing section of _Py_dg_strtod. Thanks Eric Smith for reviewing. ........ r77530 | mark.dickinson | 2010-01-16 17:57:49 +0000 (Sat, 16 Jan 2010) | 3 lines Issue #7632: Fix one more case of incorrect rounding for str -> float conversion (see bug 5 in the issue tracker). ........ r77533 | mark.dickinson | 2010-01-16 18:06:17 +0000 (Sat, 16 Jan 2010) | 1 line Fix multiple uses of variable 'L' in _Py_dg_strtod, where one use requires an unsigned long and the other a signed long. See also r77421. ........ --- Lib/test/test_strtod.py | 11 ++-- Misc/NEWS | 10 ++-- Python/dtoa.c | 126 ++++++++++++++++++++++++++++------------ 3 files changed, 100 insertions(+), 47 deletions(-) diff --git a/Lib/test/test_strtod.py b/Lib/test/test_strtod.py index 79cfc88e8df..b5a630edaf3 100644 --- a/Lib/test/test_strtod.py +++ b/Lib/test/test_strtod.py @@ -123,10 +123,6 @@ class StrtodTests(unittest.TestCase): digits = m * 5**-e exponent = e s = '{}e{}'.format(digits, exponent) - - # for the moment, ignore errors from trailing zeros - if digits % 10 == 0: - continue self.check_strtod(s) # get expected answer via struct, to triple check @@ -175,7 +171,8 @@ class StrtodTests(unittest.TestCase): self.check_strtod(s) def test_parsing(self): - digits = tuple(map(str, range(10))) + # make '0' more likely to be chosen than other digits + digits = '000000123456789' signs = ('+', '-', '') # put together random short valid strings @@ -257,7 +254,9 @@ class StrtodTests(unittest.TestCase): '247032822920623295e-341', # issue 7632 bug 5: the following 2 strings convert differently '1000000000000000000000000000000000000000e-16', - #'10000000000000000000000000000000000000000e-17', + '10000000000000000000000000000000000000000e-17', + # issue 7632 bug 8: the following produced 10.0 + '10.900000000000000012345678912345678912345', ] for s in test_strings: self.check_strtod(s) diff --git a/Misc/NEWS b/Misc/NEWS index 07b150347b1..1339e612210 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -16,10 +16,12 @@ Core and Builtins methods of bytes, bytearray and unicode objects by using a common implementation based on stringlib's fast search. Patch by Florent Xicluna. -- Issue #7632: Fix a crash in dtoa.c that occurred in debug builds - when parsing certain long numeric strings corresponding to subnormal - values. Also fix a number of bugs in dtoa.c that could lead to - incorrectly rounded results when converting strings to floats. +- Issue #7632: Fix various str -> float conversion bugs present in 2.7 + alpha 2, including: (1) a serious 'wrong output' bug that could + occur for long (> 40 digit) input strings, (2) a crash in dtoa.c + that occurred in debug builds when parsing certain long numeric + strings corresponding to subnormal values, and (3) a number of flaws + that could lead to incorrectly rounded results. - The __complex__ method is now looked up on the class of instances to make it consistent with other special methods. diff --git a/Python/dtoa.c b/Python/dtoa.c index 51895c77898..9eb8cdba895 100644 --- a/Python/dtoa.c +++ b/Python/dtoa.c @@ -1340,16 +1340,17 @@ bigcomp(U *rv, const char *s0, BCinfo *bc) double _Py_dg_strtod(const char *s00, char **se) { - int bb2, bb5, bbe, bd2, bd5, bbbits, bs2, c, dp0, dp1, dplen, e, e1, error; + int bb2, bb5, bbe, bd2, bd5, bbbits, bs2, c, e, e1, error; int esign, i, j, k, nd, nd0, nf, nz, nz0, sign; const char *s, *s0, *s1; double aadj, aadj1; U aadj2, adj, rv, rv0; - ULong y, z, L; + ULong y, z, abse; + Long L; BCinfo bc; Bigint *bb, *bb1, *bd, *bd0, *bs, *delta; - sign = nz0 = nz = dplen = 0; + sign = nz0 = nz = 0; dval(&rv) = 0.; for(s = s00;;s++) switch(*s) { case '-': @@ -1381,18 +1382,11 @@ _Py_dg_strtod(const char *s00, char **se) goto ret; } s0 = s; - y = z = 0; for(nd = nf = 0; (c = *s) >= '0' && c <= '9'; nd++, s++) - if (nd < 9) - y = 10*y + c - '0'; - else if (nd < 16) - z = 10*z + c - '0'; + ; nd0 = nd; - dp0 = dp1 = s - s0; if (c == '.') { c = *++s; - dp1 = s - s0; - dplen = 1; if (!nd) { for(; c == '0'; c = *++s) nz++; @@ -1409,15 +1403,7 @@ _Py_dg_strtod(const char *s00, char **se) nz++; if (c -= '0') { nf += nz; - for(i = 1; i < nz; i++) - if (nd++ < 9) - y *= 10; - else if (nd <= DBL_DIG + 1) - z *= 10; - if (nd++ < 9) - y = 10*y + c; - else if (nd <= DBL_DIG + 1) - z = 10*z + c; + nd += nz; nz = 0; } } @@ -1440,17 +1426,17 @@ _Py_dg_strtod(const char *s00, char **se) while(c == '0') c = *++s; if (c > '0' && c <= '9') { - L = c - '0'; + abse = c - '0'; s1 = s; while((c = *++s) >= '0' && c <= '9') - L = 10*L + c - '0'; - if (s - s1 > 8 || L > MAX_ABS_EXP) + abse = 10*abse + c - '0'; + if (s - s1 > 8 || abse > MAX_ABS_EXP) /* Avoid confusion from exponents * so large that e might overflow. */ e = (int)MAX_ABS_EXP; /* safe for 16 bit ints */ else - e = (int)L; + e = (int)abse; if (esign) e = -e; } @@ -1468,15 +1454,78 @@ _Py_dg_strtod(const char *s00, char **se) } goto ret; } - bc.e0 = e1 = e -= nf; + e -= nf; + if (!nd0) + nd0 = nd; + + /* strip trailing zeros */ + for (i = nd; i > 0; ) { + /* scan back until we hit a nonzero digit. significant digit 'i' + is s0[i] if i < nd0, s0[i+1] if i >= nd0. */ + --i; + if (s0[i < nd0 ? i : i+1] != '0') { + ++i; + break; + } + } + e += nd - i; + nd = i; + if (nd0 > nd) + nd0 = nd; /* Now we have nd0 digits, starting at s0, followed by a * decimal point, followed by nd-nd0 digits. The number we're * after is the integer represented by those digits times * 10**e */ - if (!nd0) - nd0 = nd; + bc.e0 = e1 = e; + + /* Summary of parsing results. The parsing stage gives values + * s0, nd0, nd, e, sign, where: + * + * - s0 points to the first significant digit of the input string s00; + * + * - nd is the total number of significant digits (here, and + * below, 'significant digits' means the set of digits of the + * significand of the input that remain after ignoring leading + * and trailing zeros. + * + * - nd0 indicates the position of the decimal point (if + * present): so the nd significant digits are in s0[0:nd0] and + * s0[nd0+1:nd+1] using the usual Python half-open slice + * notation. (If nd0 < nd, then s0[nd0] necessarily contains + * a '.' character; if nd0 == nd, then it could be anything.) + * + * - e is the adjusted exponent: the absolute value of the number + * represented by the original input string is n * 10**e, where + * n is the integer represented by the concatenation of + * s0[0:nd0] and s0[nd0+1:nd+1] + * + * - sign gives the sign of the input: 1 for negative, 0 for positive + * + * - the first and last significant digits are nonzero + */ + + /* put first DBL_DIG+1 digits into integer y and z. + * + * - y contains the value represented by the first min(9, nd) + * significant digits + * + * - if nd > 9, z contains the value represented by significant digits + * with indices in [9, min(16, nd)). So y * 10**(min(16, nd) - 9) + z + * gives the value represented by the first min(16, nd) sig. digits. + */ + + y = z = 0; + for (i = 0; i < nd; i++) { + if (i < 9) + y = 10*y + s0[i < nd0 ? i : i+1] - '0'; + else if (i < DBL_DIG+1) + z = 10*z + s0[i < nd0 ? i : i+1] - '0'; + else + break; + } + k = nd < DBL_DIG + 1 ? nd : DBL_DIG + 1; dval(&rv) = y; if (k > 9) { @@ -1593,15 +1642,18 @@ _Py_dg_strtod(const char *s00, char **se) /* ASSERT(STRTOD_DIGLIM >= 18); 18 == one more than the */ /* minimum number of decimal digits to distinguish double values */ /* in IEEE arithmetic. */ - i = j = 18; - if (i > nd0) - j += dplen; - for(;;) { - if (--j <= dp1 && j >= dp0) - j = dp0 - 1; - if (s0[j] != '0') - break; + + /* Truncate input to 18 significant digits, then discard any trailing + zeros on the result by updating nd, nd0, e and y suitably. (There's + no need to update z; it's not reused beyond this point.) */ + for (i = 18; i > 0; ) { + /* scan back until we hit a nonzero digit. significant digit 'i' + is s0[i] if i < nd0, s0[i+1] if i >= nd0. */ --i; + if (s0[i < nd0 ? i : i+1] != '0') { + ++i; + break; + } } e += nd - i; nd = i; @@ -1611,8 +1663,8 @@ _Py_dg_strtod(const char *s00, char **se) y = 0; for(i = 0; i < nd0; ++i) y = 10*y + s0[i] - '0'; - for(j = dp1; i < nd; ++i) - y = 10*y + s0[j++] - '0'; + for(; i < nd; ++i) + y = 10*y + s0[i+1] - '0'; } } bd0 = s2b(s0, nd0, nd, y);