Internal plumbing changes for float parsing:

- check for nans and infs within PyOS_ascii_strtod
 - simplify parsing in PyFloat_FromString, and handle out-of-memory
   errors properly
This commit is contained in:
Mark Dickinson 2009-04-26 15:30:47 +00:00
parent 35f1c9470a
commit 6d65df1e8a
2 changed files with 78 additions and 87 deletions

View File

@ -162,7 +162,7 @@ PyFloat_FromDouble(double fval)
PyObject * PyObject *
PyFloat_FromString(PyObject *v) PyFloat_FromString(PyObject *v)
{ {
const char *s, *last, *end, *sp; const char *s, *last, *end;
double x; double x;
char buffer[256]; /* for errors */ char buffer[256]; /* for errors */
char *s_buffer = NULL; char *s_buffer = NULL;
@ -186,76 +186,40 @@ PyFloat_FromString(PyObject *v)
"float() argument must be a string or a number"); "float() argument must be a string or a number");
return NULL; return NULL;
} }
last = s + len; last = s + len;
while (*s && isspace(Py_CHARMASK(*s))) while (*s && isspace(Py_CHARMASK(*s)))
s++; s++;
if (*s == '\0') { /* We don't care about overflow or underflow. If the platform
PyErr_SetString(PyExc_ValueError, "empty string for float()"); * supports them, infinities and signed zeroes (on underflow) are
goto error; * fine. */
} errno = 0;
sp = s;
/* We don't care about overflow or underflow. If the platform supports
* them, infinities and signed zeroes (on underflow) are fine.
* However, strtod can return 0 for denormalized numbers. Note that
* whether strtod sets errno on underflow is not defined, so we can't
* key off errno.
*/
PyFPE_START_PROTECT("strtod", goto error) PyFPE_START_PROTECT("strtod", goto error)
x = PyOS_ascii_strtod(s, (char **)&end); x = PyOS_ascii_strtod(s, (char **)&end);
PyFPE_END_PROTECT(x) PyFPE_END_PROTECT(x)
errno = 0;
/* Believe it or not, Solaris 2.6 can move end *beyond* the null
byte at the end of the string, when the input is inf(inity). */
if (end > last)
end = last;
/* Check for inf and nan. This is done late because it rarely happens. */
if (end == s) { if (end == s) {
char *p = (char*)sp; if (errno == ENOMEM)
int sign = 1; PyErr_NoMemory();
else {
if (*p == '-') { PyOS_snprintf(buffer, sizeof(buffer),
sign = -1; "invalid literal for float(): %.200s", s);
p++; PyErr_SetString(PyExc_ValueError, buffer);
} }
if (*p == '+') {
p++;
}
if (PyOS_strnicmp(p, "inf", 4) == 0) {
if (s_buffer != NULL)
PyMem_FREE(s_buffer);
Py_RETURN_INF(sign);
}
if (PyOS_strnicmp(p, "infinity", 9) == 0) {
if (s_buffer != NULL)
PyMem_FREE(s_buffer);
Py_RETURN_INF(sign);
}
#ifdef Py_NAN
if(PyOS_strnicmp(p, "nan", 4) == 0) {
if (s_buffer != NULL)
PyMem_FREE(s_buffer);
Py_RETURN_NAN;
}
#endif
PyOS_snprintf(buffer, sizeof(buffer),
"invalid literal for float(): %.200s", s);
PyErr_SetString(PyExc_ValueError, buffer);
goto error; goto error;
} }
/* Since end != s, the platform made *some* kind of sense out /* Since end != s, the platform made *some* kind of sense out
of the input. Trust it. */ of the input. Trust it. */
while (*end && isspace(Py_CHARMASK(*end))) while (*end && isspace(Py_CHARMASK(*end)))
end++; end++;
if (*end != '\0') { if (end != last) {
PyOS_snprintf(buffer, sizeof(buffer), if (*end == '\0')
"invalid literal for float(): %.200s", s); PyErr_SetString(PyExc_ValueError,
PyErr_SetString(PyExc_ValueError, buffer); "null byte in argument for float()");
goto error; else {
} PyOS_snprintf(buffer, sizeof(buffer),
else if (end != last) { "invalid literal for float(): %.200s", s);
PyErr_SetString(PyExc_ValueError, PyErr_SetString(PyExc_ValueError, buffer);
"null byte in argument for float()"); }
goto error; goto error;
} }
result = PyFloat_FromDouble(x); result = PyFloat_FromDouble(x);

View File

@ -94,6 +94,10 @@ PyOS_ascii_strtod(const char *nptr, char **endptr)
decimal_point_pos = NULL; decimal_point_pos = NULL;
/* Set errno to zero, so that we can distinguish zero results
and underflows */
errno = 0;
/* We process any leading whitespace and the optional sign manually, /* We process any leading whitespace and the optional sign manually,
then pass the remainder to the system strtod. This ensures that then pass the remainder to the system strtod. This ensures that
the result of an underflow has the correct sign. (bug #1725) */ the result of an underflow has the correct sign. (bug #1725) */
@ -107,27 +111,53 @@ PyOS_ascii_strtod(const char *nptr, char **endptr)
if (*p == '-') { if (*p == '-') {
negate = 1; negate = 1;
p++; p++;
} else if (*p == '+') { }
else if (*p == '+') {
p++; p++;
} }
/* What's left should begin with a digit, a decimal point, or one of /* Parse infinities and nans */
the letters i, I, n, N. It should not begin with 0x or 0X */ if (*p == 'i' || *p == 'I') {
if ((!ISDIGIT(*p) && if (PyOS_strnicmp(p, "inf", 3) == 0) {
*p != '.' && *p != 'i' && *p != 'I' && *p != 'n' && *p != 'N') val = Py_HUGE_VAL;
|| if (PyOS_strnicmp(p+3, "inity", 5) == 0)
(*p == '0' && (p[1] == 'x' || p[1] == 'X'))) fail_pos = (char *)p+8;
{ else
if (endptr) fail_pos = (char *)p+3;
*endptr = (char*)nptr; goto got_val;
errno = EINVAL; }
return val; else
goto invalid_string;
} }
digits_pos = p; #ifdef Py_NAN
if (*p == 'n' || *p == 'N') {
if (PyOS_strnicmp(p, "nan", 3) == 0) {
val = Py_NAN;
fail_pos = (char *)p+3;
goto got_val;
}
else
goto invalid_string;
}
#endif
if (decimal_point[0] != '.' || /* Some platform strtods accept hex floats; Python shouldn't (at the
moment), so we check explicitly for strings starting with '0x'. */
if (*p == '0' && (*(p+1) == 'x' || *(p+1) == 'X'))
goto invalid_string;
/* Check that what's left begins with a digit or decimal point */
if (!ISDIGIT(*p) && *p != '.')
goto invalid_string;
digits_pos = p;
if (decimal_point[0] != '.' ||
decimal_point[1] != 0) decimal_point[1] != 0)
{ {
/* Look for a '.' in the input; if present, it'll need to be
swapped for the current locale's decimal point before we
call strtod. On the other hand, if we find the current
locale's decimal point then the input is invalid. */
while (ISDIGIT(*p)) while (ISDIGIT(*p))
p++; p++;
@ -135,6 +165,7 @@ PyOS_ascii_strtod(const char *nptr, char **endptr)
{ {
decimal_point_pos = p++; decimal_point_pos = p++;
/* locate end of number */
while (ISDIGIT(*p)) while (ISDIGIT(*p))
p++; p++;
@ -147,27 +178,16 @@ PyOS_ascii_strtod(const char *nptr, char **endptr)
end = p; end = p;
} }
else if (strncmp(p, decimal_point, decimal_point_len) == 0) else if (strncmp(p, decimal_point, decimal_point_len) == 0)
{
/* Python bug #1417699 */ /* Python bug #1417699 */
if (endptr) goto invalid_string;
*endptr = (char*)nptr;
errno = EINVAL;
return val;
}
/* For the other cases, we need not convert the decimal /* For the other cases, we need not convert the decimal
point */ point */
} }
/* Set errno to zero, so that we can distinguish zero results if (decimal_point_pos) {
and underflows */
errno = 0;
if (decimal_point_pos)
{
char *copy, *c; char *copy, *c;
/* Create a copy of the input, with the '.' converted to the
/* We need to convert the '.' to the locale specific decimal locale-specific decimal point */
point */
copy = (char *)PyMem_MALLOC(end - digits_pos + copy = (char *)PyMem_MALLOC(end - digits_pos +
1 + decimal_point_len); 1 + decimal_point_len);
if (copy == NULL) { if (copy == NULL) {
@ -208,8 +228,9 @@ PyOS_ascii_strtod(const char *nptr, char **endptr)
} }
if (fail_pos == digits_pos) if (fail_pos == digits_pos)
fail_pos = (char *)nptr; goto invalid_string;
got_val:
if (negate && fail_pos != nptr) if (negate && fail_pos != nptr)
val = -val; val = -val;
@ -217,6 +238,12 @@ PyOS_ascii_strtod(const char *nptr, char **endptr)
*endptr = fail_pos; *endptr = fail_pos;
return val; return val;
invalid_string:
if (endptr)
*endptr = (char*)nptr;
errno = EINVAL;
return -1.0;
} }
#endif #endif