Internal plumbing changes for float parsing:

- check for nans and infs within PyOS_ascii_strtod - simplify parsing in PyFloat_FromString, and handle out-of-memory errors properly
2009-04-26 15:30:47 +00:00 · 2009-04-26 15:30:47 +00:00 · 6d65df1e8a
parent 35f1c9470a
commit 6d65df1e8a
2 changed files with 78 additions and 87 deletions
--- a/Objects/floatobject.c
+++ b/Objects/floatobject.c
@ -162,7 +162,7 @@ PyFloat_FromDouble(double fval)
 PyObject *
 PyFloat_FromString(PyObject *v)
 {
-	const char *s, *last, *end, *sp;
+	const char *s, *last, *end;
 	double x;
 	char buffer[256]; /* for errors */
 	char *s_buffer = NULL;
@ -186,76 +186,40 @@ PyFloat_FromString(PyObject *v)
 				"float() argument must be a string or a number");
 		return NULL;
 	}
 	last = s + len;
 	while (*s && isspace(Py_CHARMASK(*s)))
 		s++;
-	if (*s == '\0') {
+	/* We don't care about overflow or underflow.  If the platform
-		PyErr_SetString(PyExc_ValueError, "empty string for float()");
+	 * supports them, infinities and signed zeroes (on underflow) are
-		goto error;
+	 * fine. */
-	}
+	errno = 0;
 	sp = s;
 	/* We don't care about overflow or underflow.  If the platform supports
 	 * them, infinities and signed zeroes (on underflow) are fine.
 	 * However, strtod can return 0 for denormalized numbers.  Note that
 	 * whether strtod sets errno on underflow is not defined, so we can't
 	 * key off errno.
         */
 	PyFPE_START_PROTECT("strtod", goto error)
 	x = PyOS_ascii_strtod(s, (char **)&end);
 	PyFPE_END_PROTECT(x)
 	errno = 0;
 	/* Believe it or not, Solaris 2.6 can move end *beyond* the null
 	   byte at the end of the string, when the input is inf(inity). */
 	if (end > last)
 		end = last;
 	/* Check for inf and nan. This is done late because it rarely happens. */
 	if (end == s) {
-		char *p = (char*)sp;
+		if (errno == ENOMEM)
-		int sign = 1;
+			PyErr_NoMemory();
-
+		else {
-		if (*p == '-') {
+			PyOS_snprintf(buffer, sizeof(buffer),
-			sign = -1;
+				"invalid literal for float(): %.200s", s);
-			p++;
+			PyErr_SetString(PyExc_ValueError, buffer);
 		}
 		if (*p == '+') {
 			p++;
 		}
 		if (PyOS_strnicmp(p, "inf", 4) == 0) {
 			if (s_buffer != NULL)
 				PyMem_FREE(s_buffer);
 			Py_RETURN_INF(sign);
 		}
 		if (PyOS_strnicmp(p, "infinity", 9) == 0) {
 			if (s_buffer != NULL)
 				PyMem_FREE(s_buffer);
 			Py_RETURN_INF(sign);
 		}
 #ifdef Py_NAN
 		if(PyOS_strnicmp(p, "nan", 4) == 0) {
 			if (s_buffer != NULL)
 				PyMem_FREE(s_buffer);
 			Py_RETURN_NAN;
 		}
 #endif
 		PyOS_snprintf(buffer, sizeof(buffer),
 			      "invalid literal for float(): %.200s", s);
 		PyErr_SetString(PyExc_ValueError, buffer);
 		goto error;
 	}
 	/* Since end != s, the platform made *some* kind of sense out
 	   of the input.  Trust it. */
 	while (*end && isspace(Py_CHARMASK(*end)))
 		end++;
-	if (*end != '\0') {
+	if (end != last) {
-		PyOS_snprintf(buffer, sizeof(buffer),
+		if (*end == '\0')
-			      "invalid literal for float(): %.200s", s);
+			PyErr_SetString(PyExc_ValueError,
-		PyErr_SetString(PyExc_ValueError, buffer);
+					"null byte in argument for float()");
-		goto error;
+		else {
-	}
+			PyOS_snprintf(buffer, sizeof(buffer),
-	else if (end != last) {
+				"invalid literal for float(): %.200s", s);
-		PyErr_SetString(PyExc_ValueError,
+			PyErr_SetString(PyExc_ValueError, buffer);
-				"null byte in argument for float()");
+		}
 		goto error;
 	}
 	result = PyFloat_FromDouble(x);
--- a/Python/pystrtod.c
+++ b/Python/pystrtod.c
@ -94,6 +94,10 @@ PyOS_ascii_strtod(const char *nptr, char **endptr)
 	decimal_point_pos = NULL;
 	/* Set errno to zero, so that we can distinguish zero results
 	   and underflows */
 	errno = 0;
 	/* We process any leading whitespace and the optional sign manually,
 	   then pass the remainder to the system strtod.  This ensures that
 	   the result of an underflow has the correct sign. (bug #1725)  */
@ -107,27 +111,53 @@ PyOS_ascii_strtod(const char *nptr, char **endptr)
 	if (*p == '-') {
 		negate = 1;
 		p++;
-	} else if (*p == '+') {
+	}
 	else if (*p == '+') {
 		p++;
 	}
-	/* What's left should begin with a digit, a decimal point, or one of
+	/* Parse infinities and nans */
-	   the letters i, I, n, N. It should not begin with 0x or 0X */
+	if (*p == 'i' || *p == 'I') {
-	if ((!ISDIGIT(*p) &&
+		if (PyOS_strnicmp(p, "inf", 3) == 0) {
-	     *p != '.' && *p != 'i' && *p != 'I' && *p != 'n' && *p != 'N')
+			val = Py_HUGE_VAL;
-	    ||
+			if (PyOS_strnicmp(p+3, "inity", 5) == 0)
-	    (*p == '0' && (p[1] == 'x' || p[1] == 'X')))
+				fail_pos = (char *)p+8;
-	{
+			else
-		if (endptr)
+				fail_pos = (char *)p+3;
-			*endptr = (char*)nptr;
+			goto got_val;
-		errno = EINVAL;
+		}
-		return val;
+		else
 			goto invalid_string;
 	}
-	digits_pos = p;
+#ifdef Py_NAN
 	if (*p == 'n' || *p == 'N') {
 		if (PyOS_strnicmp(p, "nan", 3) == 0) {
 			val = Py_NAN;
 			fail_pos = (char *)p+3;
 			goto got_val;
 		}
 		else
 			goto invalid_string;
 	}
 #endif
-	if (decimal_point[0] != '.' || 
+	/* Some platform strtods accept hex floats; Python shouldn't (at the
 	   moment), so we check explicitly for strings starting with '0x'. */
 	if (*p == '0' && (*(p+1) == 'x' || *(p+1) == 'X'))
 		goto invalid_string;
 	/* Check that what's left begins with a digit or decimal point */
 	if (!ISDIGIT(*p) && *p != '.')
 		goto invalid_string;
 	digits_pos = p;
 	if (decimal_point[0] != '.' ||
 	    decimal_point[1] != 0)
 	{
 		/* Look for a '.' in the input; if present, it'll need to be
 		   swapped for the current locale's decimal point before we
 		   call strtod.  On the other hand, if we find the current
 		   locale's decimal point then the input is invalid. */
 		while (ISDIGIT(*p))
 			p++;
@ -135,6 +165,7 @@ PyOS_ascii_strtod(const char *nptr, char **endptr)
 		{
 			decimal_point_pos = p++;
 			/* locate end of number */
 			while (ISDIGIT(*p))
 				p++;
@ -147,27 +178,16 @@ PyOS_ascii_strtod(const char *nptr, char **endptr)
 			end = p;
 		}
 		else if (strncmp(p, decimal_point, decimal_point_len) == 0)
 		{
 			/* Python bug #1417699 */
-			if (endptr)
+			goto invalid_string;
 				*endptr = (char*)nptr;
 			errno = EINVAL;
 			return val;
 		}
 		/* For the other cases, we need not convert the decimal
 		   point */
 	}
-	/* Set errno to zero, so that we can distinguish zero results
+	if (decimal_point_pos) {
 	   and underflows */
 	errno = 0;
 	if (decimal_point_pos)
 	{
 		char *copy, *c;
-
+		/* Create a copy of the input, with the '.' converted to the
-		/* We need to convert the '.' to the locale specific decimal
+		   locale-specific decimal point */
 		   point */
 		copy = (char *)PyMem_MALLOC(end - digits_pos +
 					    1 + decimal_point_len);
 		if (copy == NULL) {
@ -208,8 +228,9 @@ PyOS_ascii_strtod(const char *nptr, char **endptr)
 	}
 	if (fail_pos == digits_pos)
-		fail_pos = (char *)nptr;
+		goto invalid_string;
  got_val:
 	if (negate && fail_pos != nptr)
 		val = -val;
@ -217,6 +238,12 @@ PyOS_ascii_strtod(const char *nptr, char **endptr)
 		*endptr = fail_pos;
 	return val;
  invalid_string:
 	if (endptr)
 		*endptr = (char*)nptr;
 	errno = EINVAL;
 	return -1.0;
 }
 #endif