From 725bfd8489e444aedd8dfd686a27ffc308657155 Mon Sep 17 00:00:00 2001
From: Mark Dickinson <dickinsm@gmail.com>
Date: Sun, 3 May 2009 20:33:40 +0000
Subject: [PATCH] Issue #5914:  Add new C-API function PyOS_string_to_double,
 to complement PyOS_double_to_string, and deprecate PyOS_ascii_strtod and
 PyOS_ascii_atof.

---
 Doc/c-api/conversion.rst  |  40 +++++++++++++
 Include/pystrtod.h        |   3 +
 Modules/_pickle.c         |  14 ++---
 Modules/_testcapimodule.c |  49 ++++++++++++++++
 Objects/complexobject.c   |  32 +++++------
 Objects/floatobject.c     |  36 ++++--------
 Python/ast.c              |  16 +++---
 Python/dtoa.c             |   5 ++
 Python/marshal.c          |  37 ++++++------
 Python/pystrtod.c         | 117 ++++++++++++++++++++++++++++++++------
 10 files changed, 253 insertions(+), 96 deletions(-)

diff --git a/Doc/c-api/conversion.rst b/Doc/c-api/conversion.rst
index 52fe1974655..403c1837373 100644
--- a/Doc/c-api/conversion.rst
+++ b/Doc/c-api/conversion.rst
@@ -62,6 +62,43 @@ The following functions provide locale-independent string to number conversions.
 
    See the Unix man page :manpage:`strtod(2)` for details.
 
+   .. deprecated:: 3.1
+      Use :cfunc:`PyOS_string_to_double` instead.
+
+
+.. cfunction:: double PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exception)
+
+   Convert a string ``s`` to a :ctype:`double`, raising a Python
+   exception on failure.  The set of accepted strings corresponds to
+   the set of strings accepted by Python's :func:`float` constructor,
+   except that ``s`` must not have leading or trailing whitespace.
+   The conversion is independent of the current locale.
+
+   If ``endptr`` is ``NULL``, convert the whole string.  Raise
+   ValueError and return ``-1.0`` if the string is not a valid
+   representation of a floating-point number.
+
+   If endptr is not ``NULL``, convert as much of the string as
+   possible and set ``*endptr`` to point to the first unconverted
+   character.  If no initial segment of the string is the valid
+   representation of a floating-point number, set ``*endptr`` to point
+   to the beginning of the string, raise ValueError, and return
+   ``-1.0``.
+
+   If ``s`` represents a value that is too large to store in a float
+   (for example, ``"1e500"`` is such a string on many platforms) then
+   if ``overflow_exception`` is ``NULL`` return ``Py_HUGE_VAL`` (with
+   an appropriate sign) and don't set any exception.  Otherwise,
+   ``overflow_exception`` must point to a Python exception object;
+   raise that exception and return ``-1.0``.  In both cases, set
+   ``*endptr`` to point to the first character after the converted value.
+
+   If any other error occurs during the conversion (for example an
+   out-of-memory error), set the appropriate Python exception and
+   return ``-1.0``.
+
+   .. versionadded:: 3.1
+
 
 .. cfunction:: char* PyOS_ascii_formatd(char *buffer, size_t buf_len, const char *format, double d)
 
@@ -117,6 +154,9 @@ The following functions provide locale-independent string to number conversions.
 
    See the Unix man page :manpage:`atof(2)` for details.
 
+   .. deprecated:: 3.1
+      Use PyOS_string_to_double instead.
+
 
 .. cfunction:: char* PyOS_stricmp(char *s1, char *s2)
 
diff --git a/Include/pystrtod.h b/Include/pystrtod.h
index 1caa7aedfb7..abf9930caf4 100644
--- a/Include/pystrtod.h
+++ b/Include/pystrtod.h
@@ -9,6 +9,9 @@ extern "C" {
 PyAPI_FUNC(double) PyOS_ascii_strtod(const char *str, char **ptr);
 PyAPI_FUNC(double) PyOS_ascii_atof(const char *str);
 PyAPI_FUNC(char *) PyOS_ascii_formatd(char *buffer, size_t buf_len,  const char *format, double d);
+PyAPI_FUNC(double) PyOS_string_to_double(const char *str,
+                                         char **endptr,
+                                         PyObject *overflow_exception);
 
 /* The caller is responsible for calling PyMem_Free to free the buffer
    that's is returned. */
diff --git a/Modules/_pickle.c b/Modules/_pickle.c
index 754d13275cc..8adc136b805 100644
--- a/Modules/_pickle.c
+++ b/Modules/_pickle.c
@@ -2971,20 +2971,20 @@ load_float(UnpicklerObject *self)
         return bad_readline();
 
     errno = 0;
-    d = PyOS_ascii_strtod(s, &endptr);
-
-    if ((errno == ERANGE && !(fabs(d) <= 1.0)) ||
-        (endptr[0] != '\n') || (endptr[1] != '\0')) {
+    d = PyOS_string_to_double(s, &endptr, PyExc_OverflowError);
+    if (d == -1.0 && PyErr_Occurred())
+        return -1;
+    if ((endptr[0] != '\n') || (endptr[1] != '\0')) {
         PyErr_SetString(PyExc_ValueError, "could not convert string to float");
         return -1;
     }
-
-    if ((value = PyFloat_FromDouble(d)) == NULL)
+    value = PyFloat_FromDouble(d);
+    if (value == NULL)
         return -1;
 
     PDATA_PUSH(self->stack, value, -1);
     return 0;
-}
+    }
 
 static int
 load_binfloat(UnpicklerObject *self)
diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c
index 4ba489875eb..1cbb825236c 100644
--- a/Modules/_testcapimodule.c
+++ b/Modules/_testcapimodule.c
@@ -1045,6 +1045,54 @@ test_with_docstring(PyObject *self)
 	Py_RETURN_NONE;
 }
 
+/* Test PyOS_string_to_double. */
+static PyObject *
+test_string_to_double(PyObject *self) {
+	double result;
+	char *msg;
+
+#define CHECK_STRING(STR, expected)				\
+	result = PyOS_string_to_double(STR, NULL, NULL);	\
+	if (result == -1.0 && PyErr_Occurred())			\
+		return NULL;					\
+	if (result != expected) {				\
+		msg = "conversion of " STR " to float failed";	\
+		goto fail;					\
+	}
+
+#define CHECK_INVALID(STR)						\
+	result = PyOS_string_to_double(STR, NULL, NULL);		\
+	if (result == -1.0 && PyErr_Occurred()) {			\
+		if (PyErr_ExceptionMatches(PyExc_ValueError))		\
+			PyErr_Clear();					\
+		else							\
+			return NULL;					\
+	}								\
+	else {								\
+		msg = "conversion of " STR " didn't raise ValueError";	\
+		goto fail;						\
+	}
+
+	CHECK_STRING("0.1", 0.1);
+	CHECK_STRING("1.234", 1.234);
+	CHECK_STRING("-1.35", -1.35);
+	CHECK_STRING(".1e01", 1.0);
+	CHECK_STRING("2.e-2", 0.02);
+
+	CHECK_INVALID(" 0.1");
+	CHECK_INVALID("\t\n-3");
+	CHECK_INVALID(".123 ");
+	CHECK_INVALID("3\n");
+	CHECK_INVALID("123abc");
+
+	Py_RETURN_NONE;
+  fail:
+	return raiseTestError("test_string_to_double", msg);
+#undef CHECK_STRING
+#undef CHECK_INVALID
+}
+
+
 #ifdef HAVE_GETTIMEOFDAY
 /* Profiling of integer performance */
 static void print_delta(int test, struct timeval *s, struct timeval *e)
@@ -1223,6 +1271,7 @@ static PyMethodDef TestMethods[] = {
 	{"test_empty_argparse", (PyCFunction)test_empty_argparse,METH_NOARGS},
 	{"test_null_strings",	(PyCFunction)test_null_strings,	 METH_NOARGS},
 	{"test_string_from_format", (PyCFunction)test_string_from_format, METH_NOARGS},
+	{"test_string_to_double", (PyCFunction)test_string_to_double, METH_NOARGS},
 	{"test_with_docstring", (PyCFunction)test_with_docstring, METH_NOARGS,
 	 PyDoc_STR("This is a pretty normal docstring.")},
 
diff --git a/Objects/complexobject.c b/Objects/complexobject.c
index 691809f1345..4dd6151ef94 100644
--- a/Objects/complexobject.c
+++ b/Objects/complexobject.c
@@ -799,25 +799,26 @@ complex_subtype_from_string(PyTypeObject *type, PyObject *v)
 	*/
 
 	/* first look for forms starting with <float> */
-	errno = 0;
-	z = PyOS_ascii_strtod(s, &end);
-	if (end == s && errno == ENOMEM)
-		return PyErr_NoMemory();
-	if (errno == ERANGE && fabs(z) >= 1.0)
-		goto overflow;
-
+	z = PyOS_string_to_double(s, &end, PyExc_OverflowError);
+	if (z == -1.0 && PyErr_Occurred()) {
+		if (PyErr_ExceptionMatches(PyExc_ValueError))
+			PyErr_Clear();
+		else
+			return NULL;
+	}
 	if (end != s) {
 		/* all 4 forms starting with <float> land here */
 		s = end;
 		if (*s == '+' || *s == '-') {
 			/* <float><signed-float>j | <float><sign>j */
 			x = z;
-			errno = 0;
-			y = PyOS_ascii_strtod(s, &end);
-			if (end == s && errno == ENOMEM)
-				return PyErr_NoMemory();
-			if (errno == ERANGE && fabs(y) >= 1.0)
-				goto overflow;
+			y = PyOS_string_to_double(s, &end, PyExc_OverflowError);
+			if (y == -1.0 && PyErr_Occurred()) {
+				if (PyErr_ExceptionMatches(PyExc_ValueError))
+					PyErr_Clear();
+				else
+					return NULL;
+			}
 			if (end != s)
 				/* <float><signed-float>j */
 				s = end;
@@ -877,11 +878,6 @@ complex_subtype_from_string(PyTypeObject *type, PyObject *v)
 	PyErr_SetString(PyExc_ValueError,
 			"complex() arg is a malformed string");
 	return NULL;
-
-  overflow:
-	PyErr_SetString(PyExc_OverflowError,
-			"complex() arg overflow");
-	return NULL;
 }
 
 static PyObject *
diff --git a/Objects/floatobject.c b/Objects/floatobject.c
index fdca3bef364..061987722ce 100644
--- a/Objects/floatobject.c
+++ b/Objects/floatobject.c
@@ -193,36 +193,20 @@ PyFloat_FromString(PyObject *v)
 	/* We don't care about overflow or underflow.  If the platform
 	 * supports them, infinities and signed zeroes (on underflow) are
 	 * fine. */
-	errno = 0;
-	PyFPE_START_PROTECT("strtod", goto error)
-	x = PyOS_ascii_strtod(s, (char **)&end);
-	PyFPE_END_PROTECT(x)
-	if (end == s) {
-		if (errno == ENOMEM)
-			PyErr_NoMemory();
-		else {
-			PyOS_snprintf(buffer, sizeof(buffer),
-				"invalid literal for float(): %.200s", s);
-			PyErr_SetString(PyExc_ValueError, buffer);
-		}
+	x = PyOS_string_to_double(s, (char **)&end, NULL);
+	if (x == -1.0 && PyErr_Occurred())
 		goto error;
-	}
-	/* Since end != s, the platform made *some* kind of sense out
-	   of the input.  Trust it. */
 	while (*end && isspace(Py_CHARMASK(*end)))
 		end++;
-	if (end != last) {
-		if (*end == '\0')
-			PyErr_SetString(PyExc_ValueError,
-					"null byte in argument for float()");
-		else {
-			PyOS_snprintf(buffer, sizeof(buffer),
-				"invalid literal for float(): %.200s", s);
-			PyErr_SetString(PyExc_ValueError, buffer);
-		}
-		goto error;
+	if (end == last)
+		result = PyFloat_FromDouble(x);
+	else {
+		PyOS_snprintf(buffer, sizeof(buffer),
+			      "invalid literal for float(): %.200s", s);
+		PyErr_SetString(PyExc_ValueError, buffer);
+		result = NULL;
 	}
-	result = PyFloat_FromDouble(x);
+
   error:
 	if (s_buffer)
 		PyMem_FREE(s_buffer);
diff --git a/Python/ast.c b/Python/ast.c
index b08cf9b130c..1c79359ad26 100644
--- a/Python/ast.c
+++ b/Python/ast.c
@@ -3162,18 +3162,18 @@ parsenumber(struct compiling *c, const char *s)
 #ifndef WITHOUT_COMPLEX
     if (imflag) {
         compl.real = 0.;
-        PyFPE_START_PROTECT("atof", return 0)
-            compl.imag = PyOS_ascii_atof(s);
-        PyFPE_END_PROTECT(c)
-            return PyComplex_FromCComplex(compl);
+        compl.imag = PyOS_string_to_double(s, (char **)&end, NULL);
+        if (compl.imag == -1.0 && PyErr_Occurred())
+            return NULL;
+        return PyComplex_FromCComplex(compl);
     }
     else
 #endif
     {
-        PyFPE_START_PROTECT("atof", return 0)
-            dx = PyOS_ascii_atof(s);
-        PyFPE_END_PROTECT(dx)
-            return PyFloat_FromDouble(dx);
+        dx = PyOS_string_to_double(s, NULL, NULL);
+        if (dx == -1.0 && PyErr_Occurred())
+            return NULL;
+        return PyFloat_FromDouble(dx);
     }
 }
 
diff --git a/Python/dtoa.c b/Python/dtoa.c
index 1d96304ed07..82434bccc2f 100644
--- a/Python/dtoa.c
+++ b/Python/dtoa.c
@@ -61,6 +61,9 @@
  *     that hasn't been MALLOC'ed, private_mem should only be used when k <=
  *     Kmax.
  *
+ *  7. _Py_dg_strtod has been modified so that it doesn't accept strings with
+ *     leading whitespace.
+ *
  ***************************************************************/
 
 /* Please send bug reports for the original dtoa.c code to David M. Gay (dmg
@@ -1355,6 +1358,7 @@ _Py_dg_strtod(const char *s00, char **se)
             /* no break */
         case 0:
             goto ret0;
+        /* modify original dtoa.c so that it doesn't accept leading whitespace
         case '\t':
         case '\n':
         case '\v':
@@ -1362,6 +1366,7 @@ _Py_dg_strtod(const char *s00, char **se)
         case '\r':
         case ' ':
             continue;
+        */
         default:
             goto break2;
         }
diff --git a/Python/marshal.c b/Python/marshal.c
index 4ad873eb77e..4e9c1294635 100644
--- a/Python/marshal.c
+++ b/Python/marshal.c
@@ -670,18 +670,17 @@ r_object(RFILE *p)
 		{
 			char buf[256];
 			double dx;
+			retval = NULL;
 			n = r_byte(p);
 			if (n == EOF || r_string(buf, (int)n, p) != n) {
 				PyErr_SetString(PyExc_EOFError,
 					"EOF read where object expected");
-				retval = NULL;
 				break;
 			}
 			buf[n] = '\0';
-			retval = NULL;
-			PyFPE_START_PROTECT("atof", break)
-			dx = PyOS_ascii_atof(buf);
-			PyFPE_END_PROTECT(dx)
+			dx = PyOS_string_to_double(buf, NULL, NULL);
+			if (dx == -1.0 && PyErr_Occurred())
+				break;
 			retval = PyFloat_FromDouble(dx);
 			break;
 		}
@@ -710,29 +709,27 @@ r_object(RFILE *p)
 		{
 			char buf[256];
 			Py_complex c;
-			n = r_byte(p);
-			if (n == EOF || r_string(buf, (int)n, p) != n) {
-				PyErr_SetString(PyExc_EOFError,
-					"EOF read where object expected");
-				retval = NULL;
-				break;
-			}
-			buf[n] = '\0';
 			retval = NULL;
-			PyFPE_START_PROTECT("atof", break;)
-			c.real = PyOS_ascii_atof(buf);
-			PyFPE_END_PROTECT(c)
 			n = r_byte(p);
 			if (n == EOF || r_string(buf, (int)n, p) != n) {
 				PyErr_SetString(PyExc_EOFError,
 					"EOF read where object expected");
-				retval = NULL;
 				break;
 			}
 			buf[n] = '\0';
-			PyFPE_START_PROTECT("atof", break)
-			c.imag = PyOS_ascii_atof(buf);
-			PyFPE_END_PROTECT(c)
+			c.real = PyOS_string_to_double(buf, NULL, NULL);
+			if (c.real == -1.0 && PyErr_Occurred())
+				break;
+			n = r_byte(p);
+			if (n == EOF || r_string(buf, (int)n, p) != n) {
+				PyErr_SetString(PyExc_EOFError,
+					"EOF read where object expected");
+				break;
+			}
+			buf[n] = '\0';
+			c.imag = PyOS_string_to_double(buf, NULL, NULL);
+			if (c.imag == -1.0 && PyErr_Occurred())
+				break;
 			retval = PyComplex_FromCComplex(c);
 			break;
 		}
diff --git a/Python/pystrtod.c b/Python/pystrtod.c
index 104061056e7..66242d80781 100644
--- a/Python/pystrtod.c
+++ b/Python/pystrtod.c
@@ -35,7 +35,7 @@
 #ifndef PY_NO_SHORT_FLOAT_REPR
 
 double
-PyOS_ascii_strtod(const char *nptr, char **endptr)
+_PyOS_ascii_strtod(const char *nptr, char **endptr)
 {
 	double result;
 	_Py_SET_53BIT_PRECISION_HEADER;
@@ -64,7 +64,7 @@ PyOS_ascii_strtod(const char *nptr, char **endptr)
 */
 
 double
-PyOS_ascii_strtod(const char *nptr, char **endptr)
+_PyOS_ascii_strtod(const char *nptr, char **endptr)
 {
 	char *fail_pos;
 	double val = -1.0;
@@ -92,15 +92,10 @@ PyOS_ascii_strtod(const char *nptr, char **endptr)
 	   and underflows */
 	errno = 0;
 
-	/* We process any leading whitespace and the optional sign manually,
-	   then pass the remainder to the system strtod.  This ensures that
-	   the result of an underflow has the correct sign. (bug #1725)  */
-
+	/* We process the optional sign manually, then pass the remainder to
+	   the system strtod.  This ensures that the result of an underflow
+	   has the correct sign. (bug #1725)  */
 	p = nptr;
-	/* Skip leading space */
-	while (Py_ISSPACE(*p))
-		p++;
-
 	/* Process leading sign, if present */
 	if (*p == '-') {
 		negate = 1;
@@ -185,8 +180,7 @@ PyOS_ascii_strtod(const char *nptr, char **endptr)
 		copy = (char *)PyMem_MALLOC(end - digits_pos +
 					    1 + decimal_point_len);
 		if (copy == NULL) {
-			if (endptr)
-				*endptr = (char *)nptr;
+			*endptr = (char *)nptr;
 			errno = ENOMEM;
 			return val;
 		}
@@ -227,27 +221,116 @@ PyOS_ascii_strtod(const char *nptr, char **endptr)
   got_val:
 	if (negate && fail_pos != nptr)
 		val = -val;
-
-	if (endptr)
-		*endptr = fail_pos;
+	*endptr = fail_pos;
 
 	return val;
 
   invalid_string:
-	if (endptr)
-		*endptr = (char*)nptr;
+	*endptr = (char*)nptr;
 	errno = EINVAL;
 	return -1.0;
 }
 
 #endif
 
+/* PyOS_ascii_strtod is DEPRECATED in Python 3.1 */
+
+double
+PyOS_ascii_strtod(const char *nptr, char **endptr)
+{
+	char *fail_pos;
+	const char *p;
+	double x;
+
+	if (PyErr_WarnEx(PyExc_DeprecationWarning,
+			 "PyOS_ascii_strtod and PyOS_ascii_atof are "
+			 "deprecated.  Use PyOS_string_to_double "
+			 "instead.", 1) < 0)
+		return -1.0;
+
+	/* _PyOS_ascii_strtod already does everything that we want,
+	   except that it doesn't parse leading whitespace */
+	p = nptr;
+	while (Py_ISSPACE(*p))
+		p++;
+	x = _PyOS_ascii_strtod(p, &fail_pos);
+	if (fail_pos == p)
+		fail_pos = (char *)nptr;
+	if (endptr)
+		*endptr = (char *)fail_pos;
+	return x;
+}
+
+/* PyOS_ascii_strtod is DEPRECATED in Python 3.1 */
+
 double
 PyOS_ascii_atof(const char *nptr)
 {
 	return PyOS_ascii_strtod(nptr, NULL);
 }
 
+/* PyOS_string_to_double is the recommended replacement for the deprecated
+   PyOS_ascii_strtod and PyOS_ascii_atof functions.  It converts a
+   null-terminated byte string s (interpreted as a string of ASCII characters)
+   to a float.  The string should not have leading or trailing whitespace (in
+   contrast, PyOS_ascii_strtod allows leading whitespace but not trailing
+   whitespace).  The conversion is independent of the current locale.
+
+   If endptr is NULL, try to convert the whole string.  Raise ValueError and
+   return -1.0 if the string is not a valid representation of a floating-point
+   number.
+
+   If endptr is non-NULL, try to convert as much of the string as possible.
+   If no initial segment of the string is the valid representation of a
+   floating-point number then *endptr is set to point to the beginning of the
+   string, -1.0 is returned and again ValueError is raised.
+
+   On overflow (e.g., when trying to convert '1e500' on an IEEE 754 machine),
+   if overflow_exception is NULL then +-Py_HUGE_VAL is returned, and no Python
+   exception is raised.  Otherwise, overflow_exception should point to a
+   a Python exception, this exception will be raised, -1.0 will be returned,
+   and *endptr will point just past the end of the converted value.
+
+   If any other failure occurs (for example lack of memory), -1.0 is returned
+   and the appropriate Python exception will have been set.
+*/
+
+double
+PyOS_string_to_double(const char *s,
+		      char **endptr,
+		      PyObject *overflow_exception)
+{
+	double x, result=-1.0;
+	char *fail_pos;
+
+	errno = 0;
+	PyFPE_START_PROTECT("PyOS_string_to_double", return -1.0)
+	x = _PyOS_ascii_strtod(s, &fail_pos);
+	PyFPE_END_PROTECT(x)
+
+	if (errno == ENOMEM) {
+		PyErr_NoMemory();
+		fail_pos = (char *)s;
+	}
+	else if (!endptr && (fail_pos == s || *fail_pos != '\0'))
+		PyErr_Format(PyExc_ValueError,
+			      "could not convert string to float: "
+			      "%.200s", s);
+	else if (fail_pos == s)
+		PyErr_Format(PyExc_ValueError,
+			      "could not convert string to float: "
+			      "%.200s", s);
+	else if (errno == ERANGE && fabs(x) >= 1.0 && overflow_exception)
+		PyErr_Format(overflow_exception,
+			      "value too large to convert to float: "
+			      "%.200s", s);
+	else
+		result = x;
+
+	if (endptr != NULL)
+		*endptr = fail_pos;
+	return result;
+}
 
 /* Given a string that may have a decimal point in the current
    locale, change it back to a dot.  Since the string cannot get