From 725bfd8489e444aedd8dfd686a27ffc308657155 Mon Sep 17 00:00:00 2001 From: Mark Dickinson Date: Sun, 3 May 2009 20:33:40 +0000 Subject: [PATCH] Issue #5914: Add new C-API function PyOS_string_to_double, to complement PyOS_double_to_string, and deprecate PyOS_ascii_strtod and PyOS_ascii_atof. --- Doc/c-api/conversion.rst | 40 +++++++++++++ Include/pystrtod.h | 3 + Modules/_pickle.c | 14 ++--- Modules/_testcapimodule.c | 49 ++++++++++++++++ Objects/complexobject.c | 32 +++++------ Objects/floatobject.c | 36 ++++-------- Python/ast.c | 16 +++--- Python/dtoa.c | 5 ++ Python/marshal.c | 37 ++++++------ Python/pystrtod.c | 117 ++++++++++++++++++++++++++++++++------ 10 files changed, 253 insertions(+), 96 deletions(-) diff --git a/Doc/c-api/conversion.rst b/Doc/c-api/conversion.rst index 52fe1974655..403c1837373 100644 --- a/Doc/c-api/conversion.rst +++ b/Doc/c-api/conversion.rst @@ -62,6 +62,43 @@ The following functions provide locale-independent string to number conversions. See the Unix man page :manpage:`strtod(2)` for details. + .. deprecated:: 3.1 + Use :cfunc:`PyOS_string_to_double` instead. + + +.. cfunction:: double PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exception) + + Convert a string ``s`` to a :ctype:`double`, raising a Python + exception on failure. The set of accepted strings corresponds to + the set of strings accepted by Python's :func:`float` constructor, + except that ``s`` must not have leading or trailing whitespace. + The conversion is independent of the current locale. + + If ``endptr`` is ``NULL``, convert the whole string. Raise + ValueError and return ``-1.0`` if the string is not a valid + representation of a floating-point number. + + If endptr is not ``NULL``, convert as much of the string as + possible and set ``*endptr`` to point to the first unconverted + character. If no initial segment of the string is the valid + representation of a floating-point number, set ``*endptr`` to point + to the beginning of the string, raise ValueError, and return + ``-1.0``. + + If ``s`` represents a value that is too large to store in a float + (for example, ``"1e500"`` is such a string on many platforms) then + if ``overflow_exception`` is ``NULL`` return ``Py_HUGE_VAL`` (with + an appropriate sign) and don't set any exception. Otherwise, + ``overflow_exception`` must point to a Python exception object; + raise that exception and return ``-1.0``. In both cases, set + ``*endptr`` to point to the first character after the converted value. + + If any other error occurs during the conversion (for example an + out-of-memory error), set the appropriate Python exception and + return ``-1.0``. + + .. versionadded:: 3.1 + .. cfunction:: char* PyOS_ascii_formatd(char *buffer, size_t buf_len, const char *format, double d) @@ -117,6 +154,9 @@ The following functions provide locale-independent string to number conversions. See the Unix man page :manpage:`atof(2)` for details. + .. deprecated:: 3.1 + Use PyOS_string_to_double instead. + .. cfunction:: char* PyOS_stricmp(char *s1, char *s2) diff --git a/Include/pystrtod.h b/Include/pystrtod.h index 1caa7aedfb7..abf9930caf4 100644 --- a/Include/pystrtod.h +++ b/Include/pystrtod.h @@ -9,6 +9,9 @@ extern "C" { PyAPI_FUNC(double) PyOS_ascii_strtod(const char *str, char **ptr); PyAPI_FUNC(double) PyOS_ascii_atof(const char *str); PyAPI_FUNC(char *) PyOS_ascii_formatd(char *buffer, size_t buf_len, const char *format, double d); +PyAPI_FUNC(double) PyOS_string_to_double(const char *str, + char **endptr, + PyObject *overflow_exception); /* The caller is responsible for calling PyMem_Free to free the buffer that's is returned. */ diff --git a/Modules/_pickle.c b/Modules/_pickle.c index 754d13275cc..8adc136b805 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -2971,20 +2971,20 @@ load_float(UnpicklerObject *self) return bad_readline(); errno = 0; - d = PyOS_ascii_strtod(s, &endptr); - - if ((errno == ERANGE && !(fabs(d) <= 1.0)) || - (endptr[0] != '\n') || (endptr[1] != '\0')) { + d = PyOS_string_to_double(s, &endptr, PyExc_OverflowError); + if (d == -1.0 && PyErr_Occurred()) + return -1; + if ((endptr[0] != '\n') || (endptr[1] != '\0')) { PyErr_SetString(PyExc_ValueError, "could not convert string to float"); return -1; } - - if ((value = PyFloat_FromDouble(d)) == NULL) + value = PyFloat_FromDouble(d); + if (value == NULL) return -1; PDATA_PUSH(self->stack, value, -1); return 0; -} + } static int load_binfloat(UnpicklerObject *self) diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index 4ba489875eb..1cbb825236c 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -1045,6 +1045,54 @@ test_with_docstring(PyObject *self) Py_RETURN_NONE; } +/* Test PyOS_string_to_double. */ +static PyObject * +test_string_to_double(PyObject *self) { + double result; + char *msg; + +#define CHECK_STRING(STR, expected) \ + result = PyOS_string_to_double(STR, NULL, NULL); \ + if (result == -1.0 && PyErr_Occurred()) \ + return NULL; \ + if (result != expected) { \ + msg = "conversion of " STR " to float failed"; \ + goto fail; \ + } + +#define CHECK_INVALID(STR) \ + result = PyOS_string_to_double(STR, NULL, NULL); \ + if (result == -1.0 && PyErr_Occurred()) { \ + if (PyErr_ExceptionMatches(PyExc_ValueError)) \ + PyErr_Clear(); \ + else \ + return NULL; \ + } \ + else { \ + msg = "conversion of " STR " didn't raise ValueError"; \ + goto fail; \ + } + + CHECK_STRING("0.1", 0.1); + CHECK_STRING("1.234", 1.234); + CHECK_STRING("-1.35", -1.35); + CHECK_STRING(".1e01", 1.0); + CHECK_STRING("2.e-2", 0.02); + + CHECK_INVALID(" 0.1"); + CHECK_INVALID("\t\n-3"); + CHECK_INVALID(".123 "); + CHECK_INVALID("3\n"); + CHECK_INVALID("123abc"); + + Py_RETURN_NONE; + fail: + return raiseTestError("test_string_to_double", msg); +#undef CHECK_STRING +#undef CHECK_INVALID +} + + #ifdef HAVE_GETTIMEOFDAY /* Profiling of integer performance */ static void print_delta(int test, struct timeval *s, struct timeval *e) @@ -1223,6 +1271,7 @@ static PyMethodDef TestMethods[] = { {"test_empty_argparse", (PyCFunction)test_empty_argparse,METH_NOARGS}, {"test_null_strings", (PyCFunction)test_null_strings, METH_NOARGS}, {"test_string_from_format", (PyCFunction)test_string_from_format, METH_NOARGS}, + {"test_string_to_double", (PyCFunction)test_string_to_double, METH_NOARGS}, {"test_with_docstring", (PyCFunction)test_with_docstring, METH_NOARGS, PyDoc_STR("This is a pretty normal docstring.")}, diff --git a/Objects/complexobject.c b/Objects/complexobject.c index 691809f1345..4dd6151ef94 100644 --- a/Objects/complexobject.c +++ b/Objects/complexobject.c @@ -799,25 +799,26 @@ complex_subtype_from_string(PyTypeObject *type, PyObject *v) */ /* first look for forms starting with */ - errno = 0; - z = PyOS_ascii_strtod(s, &end); - if (end == s && errno == ENOMEM) - return PyErr_NoMemory(); - if (errno == ERANGE && fabs(z) >= 1.0) - goto overflow; - + z = PyOS_string_to_double(s, &end, PyExc_OverflowError); + if (z == -1.0 && PyErr_Occurred()) { + if (PyErr_ExceptionMatches(PyExc_ValueError)) + PyErr_Clear(); + else + return NULL; + } if (end != s) { /* all 4 forms starting with land here */ s = end; if (*s == '+' || *s == '-') { /* j | j */ x = z; - errno = 0; - y = PyOS_ascii_strtod(s, &end); - if (end == s && errno == ENOMEM) - return PyErr_NoMemory(); - if (errno == ERANGE && fabs(y) >= 1.0) - goto overflow; + y = PyOS_string_to_double(s, &end, PyExc_OverflowError); + if (y == -1.0 && PyErr_Occurred()) { + if (PyErr_ExceptionMatches(PyExc_ValueError)) + PyErr_Clear(); + else + return NULL; + } if (end != s) /* j */ s = end; @@ -877,11 +878,6 @@ complex_subtype_from_string(PyTypeObject *type, PyObject *v) PyErr_SetString(PyExc_ValueError, "complex() arg is a malformed string"); return NULL; - - overflow: - PyErr_SetString(PyExc_OverflowError, - "complex() arg overflow"); - return NULL; } static PyObject * diff --git a/Objects/floatobject.c b/Objects/floatobject.c index fdca3bef364..061987722ce 100644 --- a/Objects/floatobject.c +++ b/Objects/floatobject.c @@ -193,36 +193,20 @@ PyFloat_FromString(PyObject *v) /* We don't care about overflow or underflow. If the platform * supports them, infinities and signed zeroes (on underflow) are * fine. */ - errno = 0; - PyFPE_START_PROTECT("strtod", goto error) - x = PyOS_ascii_strtod(s, (char **)&end); - PyFPE_END_PROTECT(x) - if (end == s) { - if (errno == ENOMEM) - PyErr_NoMemory(); - else { - PyOS_snprintf(buffer, sizeof(buffer), - "invalid literal for float(): %.200s", s); - PyErr_SetString(PyExc_ValueError, buffer); - } + x = PyOS_string_to_double(s, (char **)&end, NULL); + if (x == -1.0 && PyErr_Occurred()) goto error; - } - /* Since end != s, the platform made *some* kind of sense out - of the input. Trust it. */ while (*end && isspace(Py_CHARMASK(*end))) end++; - if (end != last) { - if (*end == '\0') - PyErr_SetString(PyExc_ValueError, - "null byte in argument for float()"); - else { - PyOS_snprintf(buffer, sizeof(buffer), - "invalid literal for float(): %.200s", s); - PyErr_SetString(PyExc_ValueError, buffer); - } - goto error; + if (end == last) + result = PyFloat_FromDouble(x); + else { + PyOS_snprintf(buffer, sizeof(buffer), + "invalid literal for float(): %.200s", s); + PyErr_SetString(PyExc_ValueError, buffer); + result = NULL; } - result = PyFloat_FromDouble(x); + error: if (s_buffer) PyMem_FREE(s_buffer); diff --git a/Python/ast.c b/Python/ast.c index b08cf9b130c..1c79359ad26 100644 --- a/Python/ast.c +++ b/Python/ast.c @@ -3162,18 +3162,18 @@ parsenumber(struct compiling *c, const char *s) #ifndef WITHOUT_COMPLEX if (imflag) { compl.real = 0.; - PyFPE_START_PROTECT("atof", return 0) - compl.imag = PyOS_ascii_atof(s); - PyFPE_END_PROTECT(c) - return PyComplex_FromCComplex(compl); + compl.imag = PyOS_string_to_double(s, (char **)&end, NULL); + if (compl.imag == -1.0 && PyErr_Occurred()) + return NULL; + return PyComplex_FromCComplex(compl); } else #endif { - PyFPE_START_PROTECT("atof", return 0) - dx = PyOS_ascii_atof(s); - PyFPE_END_PROTECT(dx) - return PyFloat_FromDouble(dx); + dx = PyOS_string_to_double(s, NULL, NULL); + if (dx == -1.0 && PyErr_Occurred()) + return NULL; + return PyFloat_FromDouble(dx); } } diff --git a/Python/dtoa.c b/Python/dtoa.c index 1d96304ed07..82434bccc2f 100644 --- a/Python/dtoa.c +++ b/Python/dtoa.c @@ -61,6 +61,9 @@ * that hasn't been MALLOC'ed, private_mem should only be used when k <= * Kmax. * + * 7. _Py_dg_strtod has been modified so that it doesn't accept strings with + * leading whitespace. + * ***************************************************************/ /* Please send bug reports for the original dtoa.c code to David M. Gay (dmg @@ -1355,6 +1358,7 @@ _Py_dg_strtod(const char *s00, char **se) /* no break */ case 0: goto ret0; + /* modify original dtoa.c so that it doesn't accept leading whitespace case '\t': case '\n': case '\v': @@ -1362,6 +1366,7 @@ _Py_dg_strtod(const char *s00, char **se) case '\r': case ' ': continue; + */ default: goto break2; } diff --git a/Python/marshal.c b/Python/marshal.c index 4ad873eb77e..4e9c1294635 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -670,18 +670,17 @@ r_object(RFILE *p) { char buf[256]; double dx; + retval = NULL; n = r_byte(p); if (n == EOF || r_string(buf, (int)n, p) != n) { PyErr_SetString(PyExc_EOFError, "EOF read where object expected"); - retval = NULL; break; } buf[n] = '\0'; - retval = NULL; - PyFPE_START_PROTECT("atof", break) - dx = PyOS_ascii_atof(buf); - PyFPE_END_PROTECT(dx) + dx = PyOS_string_to_double(buf, NULL, NULL); + if (dx == -1.0 && PyErr_Occurred()) + break; retval = PyFloat_FromDouble(dx); break; } @@ -710,29 +709,27 @@ r_object(RFILE *p) { char buf[256]; Py_complex c; - n = r_byte(p); - if (n == EOF || r_string(buf, (int)n, p) != n) { - PyErr_SetString(PyExc_EOFError, - "EOF read where object expected"); - retval = NULL; - break; - } - buf[n] = '\0'; retval = NULL; - PyFPE_START_PROTECT("atof", break;) - c.real = PyOS_ascii_atof(buf); - PyFPE_END_PROTECT(c) n = r_byte(p); if (n == EOF || r_string(buf, (int)n, p) != n) { PyErr_SetString(PyExc_EOFError, "EOF read where object expected"); - retval = NULL; break; } buf[n] = '\0'; - PyFPE_START_PROTECT("atof", break) - c.imag = PyOS_ascii_atof(buf); - PyFPE_END_PROTECT(c) + c.real = PyOS_string_to_double(buf, NULL, NULL); + if (c.real == -1.0 && PyErr_Occurred()) + break; + n = r_byte(p); + if (n == EOF || r_string(buf, (int)n, p) != n) { + PyErr_SetString(PyExc_EOFError, + "EOF read where object expected"); + break; + } + buf[n] = '\0'; + c.imag = PyOS_string_to_double(buf, NULL, NULL); + if (c.imag == -1.0 && PyErr_Occurred()) + break; retval = PyComplex_FromCComplex(c); break; } diff --git a/Python/pystrtod.c b/Python/pystrtod.c index 104061056e7..66242d80781 100644 --- a/Python/pystrtod.c +++ b/Python/pystrtod.c @@ -35,7 +35,7 @@ #ifndef PY_NO_SHORT_FLOAT_REPR double -PyOS_ascii_strtod(const char *nptr, char **endptr) +_PyOS_ascii_strtod(const char *nptr, char **endptr) { double result; _Py_SET_53BIT_PRECISION_HEADER; @@ -64,7 +64,7 @@ PyOS_ascii_strtod(const char *nptr, char **endptr) */ double -PyOS_ascii_strtod(const char *nptr, char **endptr) +_PyOS_ascii_strtod(const char *nptr, char **endptr) { char *fail_pos; double val = -1.0; @@ -92,15 +92,10 @@ PyOS_ascii_strtod(const char *nptr, char **endptr) and underflows */ errno = 0; - /* We process any leading whitespace and the optional sign manually, - then pass the remainder to the system strtod. This ensures that - the result of an underflow has the correct sign. (bug #1725) */ - + /* We process the optional sign manually, then pass the remainder to + the system strtod. This ensures that the result of an underflow + has the correct sign. (bug #1725) */ p = nptr; - /* Skip leading space */ - while (Py_ISSPACE(*p)) - p++; - /* Process leading sign, if present */ if (*p == '-') { negate = 1; @@ -185,8 +180,7 @@ PyOS_ascii_strtod(const char *nptr, char **endptr) copy = (char *)PyMem_MALLOC(end - digits_pos + 1 + decimal_point_len); if (copy == NULL) { - if (endptr) - *endptr = (char *)nptr; + *endptr = (char *)nptr; errno = ENOMEM; return val; } @@ -227,27 +221,116 @@ PyOS_ascii_strtod(const char *nptr, char **endptr) got_val: if (negate && fail_pos != nptr) val = -val; - - if (endptr) - *endptr = fail_pos; + *endptr = fail_pos; return val; invalid_string: - if (endptr) - *endptr = (char*)nptr; + *endptr = (char*)nptr; errno = EINVAL; return -1.0; } #endif +/* PyOS_ascii_strtod is DEPRECATED in Python 3.1 */ + +double +PyOS_ascii_strtod(const char *nptr, char **endptr) +{ + char *fail_pos; + const char *p; + double x; + + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "PyOS_ascii_strtod and PyOS_ascii_atof are " + "deprecated. Use PyOS_string_to_double " + "instead.", 1) < 0) + return -1.0; + + /* _PyOS_ascii_strtod already does everything that we want, + except that it doesn't parse leading whitespace */ + p = nptr; + while (Py_ISSPACE(*p)) + p++; + x = _PyOS_ascii_strtod(p, &fail_pos); + if (fail_pos == p) + fail_pos = (char *)nptr; + if (endptr) + *endptr = (char *)fail_pos; + return x; +} + +/* PyOS_ascii_strtod is DEPRECATED in Python 3.1 */ + double PyOS_ascii_atof(const char *nptr) { return PyOS_ascii_strtod(nptr, NULL); } +/* PyOS_string_to_double is the recommended replacement for the deprecated + PyOS_ascii_strtod and PyOS_ascii_atof functions. It converts a + null-terminated byte string s (interpreted as a string of ASCII characters) + to a float. The string should not have leading or trailing whitespace (in + contrast, PyOS_ascii_strtod allows leading whitespace but not trailing + whitespace). The conversion is independent of the current locale. + + If endptr is NULL, try to convert the whole string. Raise ValueError and + return -1.0 if the string is not a valid representation of a floating-point + number. + + If endptr is non-NULL, try to convert as much of the string as possible. + If no initial segment of the string is the valid representation of a + floating-point number then *endptr is set to point to the beginning of the + string, -1.0 is returned and again ValueError is raised. + + On overflow (e.g., when trying to convert '1e500' on an IEEE 754 machine), + if overflow_exception is NULL then +-Py_HUGE_VAL is returned, and no Python + exception is raised. Otherwise, overflow_exception should point to a + a Python exception, this exception will be raised, -1.0 will be returned, + and *endptr will point just past the end of the converted value. + + If any other failure occurs (for example lack of memory), -1.0 is returned + and the appropriate Python exception will have been set. +*/ + +double +PyOS_string_to_double(const char *s, + char **endptr, + PyObject *overflow_exception) +{ + double x, result=-1.0; + char *fail_pos; + + errno = 0; + PyFPE_START_PROTECT("PyOS_string_to_double", return -1.0) + x = _PyOS_ascii_strtod(s, &fail_pos); + PyFPE_END_PROTECT(x) + + if (errno == ENOMEM) { + PyErr_NoMemory(); + fail_pos = (char *)s; + } + else if (!endptr && (fail_pos == s || *fail_pos != '\0')) + PyErr_Format(PyExc_ValueError, + "could not convert string to float: " + "%.200s", s); + else if (fail_pos == s) + PyErr_Format(PyExc_ValueError, + "could not convert string to float: " + "%.200s", s); + else if (errno == ERANGE && fabs(x) >= 1.0 && overflow_exception) + PyErr_Format(overflow_exception, + "value too large to convert to float: " + "%.200s", s); + else + result = x; + + if (endptr != NULL) + *endptr = fail_pos; + return result; +} /* Given a string that may have a decimal point in the current locale, change it back to a dot. Since the string cannot get