Internal plumbing changes for float parsing:

- check for nans and infs within PyOS_ascii_strtod - simplify parsing in PyFloat_FromString, and handle out-of-memory errors properly
2009-04-26 15:30:47 +00:00 · 2009-04-26 15:30:47 +00:00 · 6d65df1e8a
parent 35f1c9470a
commit 6d65df1e8a
2 changed files with 78 additions and 87 deletions
--- a/Objects/floatobject.c
+++ b/Objects/floatobject.c
@ -162,7 +162,7 @@ PyFloat_FromDouble(double fval)
 PyObject *
 PyFloat_FromString(PyObject *v)
 {
-	const char *s, *last, *end, *sp;
+	const char *s, *last, *end;
 	double x;
 	char buffer[256]; /* for errors */
 	char *s_buffer = NULL;
@ -186,76 +186,40 @@ PyFloat_FromString(PyObject *v)
 				"float() argument must be a string or a number");
 		return NULL;
 	}
-
 	last = s + len;
+
 	while (*s && isspace(Py_CHARMASK(*s)))
 		s++;
-	if (*s == '\0') {
-		PyErr_SetString(PyExc_ValueError, "empty string for float()");
-		goto error;
-	}
-	sp = s;
-	/* We don't care about overflow or underflow.  If the platform supports
-	 * them, infinities and signed zeroes (on underflow) are fine.
-	 * However, strtod can return 0 for denormalized numbers.  Note that
-	 * whether strtod sets errno on underflow is not defined, so we can't
-	 * key off errno.
-         */
+	/* We don't care about overflow or underflow.  If the platform
+	 * supports them, infinities and signed zeroes (on underflow) are
+	 * fine. */
+	errno = 0;
 	PyFPE_START_PROTECT("strtod", goto error)
 	x = PyOS_ascii_strtod(s, (char **)&end);
 	PyFPE_END_PROTECT(x)
-	errno = 0;
-	/* Believe it or not, Solaris 2.6 can move end *beyond* the null
-	   byte at the end of the string, when the input is inf(inity). */
-	if (end > last)
-		end = last;
-	/* Check for inf and nan. This is done late because it rarely happens. */
 	if (end == s) {
-		char *p = (char*)sp;
-		int sign = 1;
-
-		if (*p == '-') {
-			sign = -1;
-			p++;
+		if (errno == ENOMEM)
+			PyErr_NoMemory();
+		else {
+			PyOS_snprintf(buffer, sizeof(buffer),
+				"invalid literal for float(): %.200s", s);
+			PyErr_SetString(PyExc_ValueError, buffer);
 		}
-		if (*p == '+') {
-			p++;
-		}
-		if (PyOS_strnicmp(p, "inf", 4) == 0) {
-			if (s_buffer != NULL)
-				PyMem_FREE(s_buffer);
-			Py_RETURN_INF(sign);
-		}
-		if (PyOS_strnicmp(p, "infinity", 9) == 0) {
-			if (s_buffer != NULL)
-				PyMem_FREE(s_buffer);
-			Py_RETURN_INF(sign);
-		}
-#ifdef Py_NAN
-		if(PyOS_strnicmp(p, "nan", 4) == 0) {
-			if (s_buffer != NULL)
-				PyMem_FREE(s_buffer);
-			Py_RETURN_NAN;
-		}
-#endif
-		PyOS_snprintf(buffer, sizeof(buffer),
-			      "invalid literal for float(): %.200s", s);
-		PyErr_SetString(PyExc_ValueError, buffer);
 		goto error;
 	}
 	/* Since end != s, the platform made *some* kind of sense out
 	   of the input.  Trust it. */
 	while (*end && isspace(Py_CHARMASK(*end)))
 		end++;
-	if (*end != '\0') {
-		PyOS_snprintf(buffer, sizeof(buffer),
-			      "invalid literal for float(): %.200s", s);
-		PyErr_SetString(PyExc_ValueError, buffer);
-		goto error;
-	}
-	else if (end != last) {
-		PyErr_SetString(PyExc_ValueError,
-				"null byte in argument for float()");
+	if (end != last) {
+		if (*end == '\0')
+			PyErr_SetString(PyExc_ValueError,
+					"null byte in argument for float()");
+		else {
+			PyOS_snprintf(buffer, sizeof(buffer),
+				"invalid literal for float(): %.200s", s);
+			PyErr_SetString(PyExc_ValueError, buffer);
+		}
 		goto error;
 	}
 	result = PyFloat_FromDouble(x);
--- a/Python/pystrtod.c
+++ b/Python/pystrtod.c
@ -94,6 +94,10 @@ PyOS_ascii_strtod(const char *nptr, char **endptr)

 	decimal_point_pos = NULL;

+	/* Set errno to zero, so that we can distinguish zero results
+	   and underflows */
+	errno = 0;
+
 	/* We process any leading whitespace and the optional sign manually,
 	   then pass the remainder to the system strtod.  This ensures that
 	   the result of an underflow has the correct sign. (bug #1725)  */
@ -107,27 +111,53 @@ PyOS_ascii_strtod(const char *nptr, char **endptr)
 	if (*p == '-') {
 		negate = 1;
 		p++;
-	} else if (*p == '+') {
+	}
+	else if (*p == '+') {
 		p++;
 	}

-	/* What's left should begin with a digit, a decimal point, or one of
-	   the letters i, I, n, N. It should not begin with 0x or 0X */
-	if ((!ISDIGIT(*p) &&
-	     *p != '.' && *p != 'i' && *p != 'I' && *p != 'n' && *p != 'N')
-	    ||
-	    (*p == '0' && (p[1] == 'x' || p[1] == 'X')))
-	{
-		if (endptr)
-			*endptr = (char*)nptr;
-		errno = EINVAL;
-		return val;
+	/* Parse infinities and nans */
+	if (*p == 'i' || *p == 'I') {
+		if (PyOS_strnicmp(p, "inf", 3) == 0) {
+			val = Py_HUGE_VAL;
+			if (PyOS_strnicmp(p+3, "inity", 5) == 0)
+				fail_pos = (char *)p+8;
+			else
+				fail_pos = (char *)p+3;
+			goto got_val;
+		}
+		else
+			goto invalid_string;
 	}
-	digits_pos = p;
+#ifdef Py_NAN
+	if (*p == 'n' || *p == 'N') {
+		if (PyOS_strnicmp(p, "nan", 3) == 0) {
+			val = Py_NAN;
+			fail_pos = (char *)p+3;
+			goto got_val;
+		}
+		else
+			goto invalid_string;
+	}
+#endif

-	if (decimal_point[0] != '.' || 
+	/* Some platform strtods accept hex floats; Python shouldn't (at the
+	   moment), so we check explicitly for strings starting with '0x'. */
+	if (*p == '0' && (*(p+1) == 'x' || *(p+1) == 'X'))
+		goto invalid_string;
+
+	/* Check that what's left begins with a digit or decimal point */
+	if (!ISDIGIT(*p) && *p != '.')
+		goto invalid_string;
+
+	digits_pos = p;
+	if (decimal_point[0] != '.' ||
 	    decimal_point[1] != 0)
 	{
+		/* Look for a '.' in the input; if present, it'll need to be
+		   swapped for the current locale's decimal point before we
+		   call strtod.  On the other hand, if we find the current
+		   locale's decimal point then the input is invalid. */
 		while (ISDIGIT(*p))
 			p++;

@ -135,6 +165,7 @@ PyOS_ascii_strtod(const char *nptr, char **endptr)
 		{
 			decimal_point_pos = p++;

+			/* locate end of number */
 			while (ISDIGIT(*p))
 				p++;

@ -147,27 +178,16 @@ PyOS_ascii_strtod(const char *nptr, char **endptr)
 			end = p;
 		}
 		else if (strncmp(p, decimal_point, decimal_point_len) == 0)
-		{
 			/* Python bug #1417699 */
-			if (endptr)
-				*endptr = (char*)nptr;
-			errno = EINVAL;
-			return val;
-		}
+			goto invalid_string;
 		/* For the other cases, we need not convert the decimal
 		   point */
 	}

-	/* Set errno to zero, so that we can distinguish zero results
-	   and underflows */
-	errno = 0;
-
-	if (decimal_point_pos)
-	{
+	if (decimal_point_pos) {
 		char *copy, *c;
-
-		/* We need to convert the '.' to the locale specific decimal
-		   point */
+		/* Create a copy of the input, with the '.' converted to the
+		   locale-specific decimal point */
 		copy = (char *)PyMem_MALLOC(end - digits_pos +
 					    1 + decimal_point_len);
 		if (copy == NULL) {
@ -208,8 +228,9 @@ PyOS_ascii_strtod(const char *nptr, char **endptr)
 	}

 	if (fail_pos == digits_pos)
-		fail_pos = (char *)nptr;
+		goto invalid_string;

+  got_val:
 	if (negate && fail_pos != nptr)
 		val = -val;

@ -217,6 +238,12 @@ PyOS_ascii_strtod(const char *nptr, char **endptr)
 		*endptr = fail_pos;

 	return val;
+
+  invalid_string:
+	if (endptr)
+		*endptr = (char*)nptr;
+	errno = EINVAL;
+	return -1.0;
 }

 #endif