Internal plumbing changes for float parsing:
- check for nans and infs within PyOS_ascii_strtod - simplify parsing in PyFloat_FromString, and handle out-of-memory errors properly
This commit is contained in:
parent
35f1c9470a
commit
6d65df1e8a
|
@ -162,7 +162,7 @@ PyFloat_FromDouble(double fval)
|
||||||
PyObject *
|
PyObject *
|
||||||
PyFloat_FromString(PyObject *v)
|
PyFloat_FromString(PyObject *v)
|
||||||
{
|
{
|
||||||
const char *s, *last, *end, *sp;
|
const char *s, *last, *end;
|
||||||
double x;
|
double x;
|
||||||
char buffer[256]; /* for errors */
|
char buffer[256]; /* for errors */
|
||||||
char *s_buffer = NULL;
|
char *s_buffer = NULL;
|
||||||
|
@ -186,76 +186,40 @@ PyFloat_FromString(PyObject *v)
|
||||||
"float() argument must be a string or a number");
|
"float() argument must be a string or a number");
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
last = s + len;
|
last = s + len;
|
||||||
|
|
||||||
while (*s && isspace(Py_CHARMASK(*s)))
|
while (*s && isspace(Py_CHARMASK(*s)))
|
||||||
s++;
|
s++;
|
||||||
if (*s == '\0') {
|
/* We don't care about overflow or underflow. If the platform
|
||||||
PyErr_SetString(PyExc_ValueError, "empty string for float()");
|
* supports them, infinities and signed zeroes (on underflow) are
|
||||||
goto error;
|
* fine. */
|
||||||
}
|
errno = 0;
|
||||||
sp = s;
|
|
||||||
/* We don't care about overflow or underflow. If the platform supports
|
|
||||||
* them, infinities and signed zeroes (on underflow) are fine.
|
|
||||||
* However, strtod can return 0 for denormalized numbers. Note that
|
|
||||||
* whether strtod sets errno on underflow is not defined, so we can't
|
|
||||||
* key off errno.
|
|
||||||
*/
|
|
||||||
PyFPE_START_PROTECT("strtod", goto error)
|
PyFPE_START_PROTECT("strtod", goto error)
|
||||||
x = PyOS_ascii_strtod(s, (char **)&end);
|
x = PyOS_ascii_strtod(s, (char **)&end);
|
||||||
PyFPE_END_PROTECT(x)
|
PyFPE_END_PROTECT(x)
|
||||||
errno = 0;
|
|
||||||
/* Believe it or not, Solaris 2.6 can move end *beyond* the null
|
|
||||||
byte at the end of the string, when the input is inf(inity). */
|
|
||||||
if (end > last)
|
|
||||||
end = last;
|
|
||||||
/* Check for inf and nan. This is done late because it rarely happens. */
|
|
||||||
if (end == s) {
|
if (end == s) {
|
||||||
char *p = (char*)sp;
|
if (errno == ENOMEM)
|
||||||
int sign = 1;
|
PyErr_NoMemory();
|
||||||
|
else {
|
||||||
if (*p == '-') {
|
PyOS_snprintf(buffer, sizeof(buffer),
|
||||||
sign = -1;
|
"invalid literal for float(): %.200s", s);
|
||||||
p++;
|
PyErr_SetString(PyExc_ValueError, buffer);
|
||||||
}
|
}
|
||||||
if (*p == '+') {
|
|
||||||
p++;
|
|
||||||
}
|
|
||||||
if (PyOS_strnicmp(p, "inf", 4) == 0) {
|
|
||||||
if (s_buffer != NULL)
|
|
||||||
PyMem_FREE(s_buffer);
|
|
||||||
Py_RETURN_INF(sign);
|
|
||||||
}
|
|
||||||
if (PyOS_strnicmp(p, "infinity", 9) == 0) {
|
|
||||||
if (s_buffer != NULL)
|
|
||||||
PyMem_FREE(s_buffer);
|
|
||||||
Py_RETURN_INF(sign);
|
|
||||||
}
|
|
||||||
#ifdef Py_NAN
|
|
||||||
if(PyOS_strnicmp(p, "nan", 4) == 0) {
|
|
||||||
if (s_buffer != NULL)
|
|
||||||
PyMem_FREE(s_buffer);
|
|
||||||
Py_RETURN_NAN;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
PyOS_snprintf(buffer, sizeof(buffer),
|
|
||||||
"invalid literal for float(): %.200s", s);
|
|
||||||
PyErr_SetString(PyExc_ValueError, buffer);
|
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
/* Since end != s, the platform made *some* kind of sense out
|
/* Since end != s, the platform made *some* kind of sense out
|
||||||
of the input. Trust it. */
|
of the input. Trust it. */
|
||||||
while (*end && isspace(Py_CHARMASK(*end)))
|
while (*end && isspace(Py_CHARMASK(*end)))
|
||||||
end++;
|
end++;
|
||||||
if (*end != '\0') {
|
if (end != last) {
|
||||||
PyOS_snprintf(buffer, sizeof(buffer),
|
if (*end == '\0')
|
||||||
"invalid literal for float(): %.200s", s);
|
PyErr_SetString(PyExc_ValueError,
|
||||||
PyErr_SetString(PyExc_ValueError, buffer);
|
"null byte in argument for float()");
|
||||||
goto error;
|
else {
|
||||||
}
|
PyOS_snprintf(buffer, sizeof(buffer),
|
||||||
else if (end != last) {
|
"invalid literal for float(): %.200s", s);
|
||||||
PyErr_SetString(PyExc_ValueError,
|
PyErr_SetString(PyExc_ValueError, buffer);
|
||||||
"null byte in argument for float()");
|
}
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
result = PyFloat_FromDouble(x);
|
result = PyFloat_FromDouble(x);
|
||||||
|
|
|
@ -94,6 +94,10 @@ PyOS_ascii_strtod(const char *nptr, char **endptr)
|
||||||
|
|
||||||
decimal_point_pos = NULL;
|
decimal_point_pos = NULL;
|
||||||
|
|
||||||
|
/* Set errno to zero, so that we can distinguish zero results
|
||||||
|
and underflows */
|
||||||
|
errno = 0;
|
||||||
|
|
||||||
/* We process any leading whitespace and the optional sign manually,
|
/* We process any leading whitespace and the optional sign manually,
|
||||||
then pass the remainder to the system strtod. This ensures that
|
then pass the remainder to the system strtod. This ensures that
|
||||||
the result of an underflow has the correct sign. (bug #1725) */
|
the result of an underflow has the correct sign. (bug #1725) */
|
||||||
|
@ -107,27 +111,53 @@ PyOS_ascii_strtod(const char *nptr, char **endptr)
|
||||||
if (*p == '-') {
|
if (*p == '-') {
|
||||||
negate = 1;
|
negate = 1;
|
||||||
p++;
|
p++;
|
||||||
} else if (*p == '+') {
|
}
|
||||||
|
else if (*p == '+') {
|
||||||
p++;
|
p++;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* What's left should begin with a digit, a decimal point, or one of
|
/* Parse infinities and nans */
|
||||||
the letters i, I, n, N. It should not begin with 0x or 0X */
|
if (*p == 'i' || *p == 'I') {
|
||||||
if ((!ISDIGIT(*p) &&
|
if (PyOS_strnicmp(p, "inf", 3) == 0) {
|
||||||
*p != '.' && *p != 'i' && *p != 'I' && *p != 'n' && *p != 'N')
|
val = Py_HUGE_VAL;
|
||||||
||
|
if (PyOS_strnicmp(p+3, "inity", 5) == 0)
|
||||||
(*p == '0' && (p[1] == 'x' || p[1] == 'X')))
|
fail_pos = (char *)p+8;
|
||||||
{
|
else
|
||||||
if (endptr)
|
fail_pos = (char *)p+3;
|
||||||
*endptr = (char*)nptr;
|
goto got_val;
|
||||||
errno = EINVAL;
|
}
|
||||||
return val;
|
else
|
||||||
|
goto invalid_string;
|
||||||
}
|
}
|
||||||
digits_pos = p;
|
#ifdef Py_NAN
|
||||||
|
if (*p == 'n' || *p == 'N') {
|
||||||
|
if (PyOS_strnicmp(p, "nan", 3) == 0) {
|
||||||
|
val = Py_NAN;
|
||||||
|
fail_pos = (char *)p+3;
|
||||||
|
goto got_val;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
goto invalid_string;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
if (decimal_point[0] != '.' ||
|
/* Some platform strtods accept hex floats; Python shouldn't (at the
|
||||||
|
moment), so we check explicitly for strings starting with '0x'. */
|
||||||
|
if (*p == '0' && (*(p+1) == 'x' || *(p+1) == 'X'))
|
||||||
|
goto invalid_string;
|
||||||
|
|
||||||
|
/* Check that what's left begins with a digit or decimal point */
|
||||||
|
if (!ISDIGIT(*p) && *p != '.')
|
||||||
|
goto invalid_string;
|
||||||
|
|
||||||
|
digits_pos = p;
|
||||||
|
if (decimal_point[0] != '.' ||
|
||||||
decimal_point[1] != 0)
|
decimal_point[1] != 0)
|
||||||
{
|
{
|
||||||
|
/* Look for a '.' in the input; if present, it'll need to be
|
||||||
|
swapped for the current locale's decimal point before we
|
||||||
|
call strtod. On the other hand, if we find the current
|
||||||
|
locale's decimal point then the input is invalid. */
|
||||||
while (ISDIGIT(*p))
|
while (ISDIGIT(*p))
|
||||||
p++;
|
p++;
|
||||||
|
|
||||||
|
@ -135,6 +165,7 @@ PyOS_ascii_strtod(const char *nptr, char **endptr)
|
||||||
{
|
{
|
||||||
decimal_point_pos = p++;
|
decimal_point_pos = p++;
|
||||||
|
|
||||||
|
/* locate end of number */
|
||||||
while (ISDIGIT(*p))
|
while (ISDIGIT(*p))
|
||||||
p++;
|
p++;
|
||||||
|
|
||||||
|
@ -147,27 +178,16 @@ PyOS_ascii_strtod(const char *nptr, char **endptr)
|
||||||
end = p;
|
end = p;
|
||||||
}
|
}
|
||||||
else if (strncmp(p, decimal_point, decimal_point_len) == 0)
|
else if (strncmp(p, decimal_point, decimal_point_len) == 0)
|
||||||
{
|
|
||||||
/* Python bug #1417699 */
|
/* Python bug #1417699 */
|
||||||
if (endptr)
|
goto invalid_string;
|
||||||
*endptr = (char*)nptr;
|
|
||||||
errno = EINVAL;
|
|
||||||
return val;
|
|
||||||
}
|
|
||||||
/* For the other cases, we need not convert the decimal
|
/* For the other cases, we need not convert the decimal
|
||||||
point */
|
point */
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Set errno to zero, so that we can distinguish zero results
|
if (decimal_point_pos) {
|
||||||
and underflows */
|
|
||||||
errno = 0;
|
|
||||||
|
|
||||||
if (decimal_point_pos)
|
|
||||||
{
|
|
||||||
char *copy, *c;
|
char *copy, *c;
|
||||||
|
/* Create a copy of the input, with the '.' converted to the
|
||||||
/* We need to convert the '.' to the locale specific decimal
|
locale-specific decimal point */
|
||||||
point */
|
|
||||||
copy = (char *)PyMem_MALLOC(end - digits_pos +
|
copy = (char *)PyMem_MALLOC(end - digits_pos +
|
||||||
1 + decimal_point_len);
|
1 + decimal_point_len);
|
||||||
if (copy == NULL) {
|
if (copy == NULL) {
|
||||||
|
@ -208,8 +228,9 @@ PyOS_ascii_strtod(const char *nptr, char **endptr)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (fail_pos == digits_pos)
|
if (fail_pos == digits_pos)
|
||||||
fail_pos = (char *)nptr;
|
goto invalid_string;
|
||||||
|
|
||||||
|
got_val:
|
||||||
if (negate && fail_pos != nptr)
|
if (negate && fail_pos != nptr)
|
||||||
val = -val;
|
val = -val;
|
||||||
|
|
||||||
|
@ -217,6 +238,12 @@ PyOS_ascii_strtod(const char *nptr, char **endptr)
|
||||||
*endptr = fail_pos;
|
*endptr = fail_pos;
|
||||||
|
|
||||||
return val;
|
return val;
|
||||||
|
|
||||||
|
invalid_string:
|
||||||
|
if (endptr)
|
||||||
|
*endptr = (char*)nptr;
|
||||||
|
errno = EINVAL;
|
||||||
|
return -1.0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Reference in New Issue