diff --git a/Objects/stringobject.c b/Objects/stringobject.c index 314ec426287..ed2ffddadf4 100644 --- a/Objects/stringobject.c +++ b/Objects/stringobject.c @@ -3363,9 +3363,9 @@ If tabsize is not given, a tab size of 8 characters is assumed."); static PyObject* string_expandtabs(PyStringObject *self, PyObject *args) { - const char *e, *p; + const char *e, *p, *qe; char *q; - Py_ssize_t i, j, old_j; + Py_ssize_t i, j, incr; PyObject *u; int tabsize = 8; @@ -3373,63 +3373,70 @@ string_expandtabs(PyStringObject *self, PyObject *args) return NULL; /* First pass: determine size of output string */ - i = j = old_j = 0; - e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); + i = 0; /* chars up to and including most recent \n or \r */ + j = 0; /* chars since most recent \n or \r (use in tab calculations) */ + e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */ for (p = PyString_AS_STRING(self); p < e; p++) if (*p == '\t') { if (tabsize > 0) { - j += tabsize - (j % tabsize); - if (old_j > j) { - PyErr_SetString(PyExc_OverflowError, - "new string is too long"); - return NULL; - } - old_j = j; + incr = tabsize - (j % tabsize); + if (j > PY_SSIZE_T_MAX - incr) + goto overflow1; + j += incr; } } else { + if (j > PY_SSIZE_T_MAX - 1) + goto overflow1; j++; if (*p == '\n' || *p == '\r') { + if (i > PY_SSIZE_T_MAX - j) + goto overflow1; i += j; - old_j = j = 0; - if (i < 0) { - PyErr_SetString(PyExc_OverflowError, - "new string is too long"); - return NULL; - } + j = 0; } } - if ((i + j) < 0) { - PyErr_SetString(PyExc_OverflowError, "new string is too long"); - return NULL; - } + if (i > PY_SSIZE_T_MAX - j) + goto overflow1; /* Second pass: create output string and fill it */ u = PyString_FromStringAndSize(NULL, i + j); if (!u) return NULL; - j = 0; - q = PyString_AS_STRING(u); + j = 0; /* same as in first pass */ + q = PyString_AS_STRING(u); /* next output char */ + qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */ for (p = PyString_AS_STRING(self); p < e; p++) if (*p == '\t') { if (tabsize > 0) { i = tabsize - (j % tabsize); j += i; - while (i--) + while (i--) { + if (q >= qe) + goto overflow2; *q++ = ' '; + } } } else { - j++; + if (q >= qe) + goto overflow2; *q++ = *p; + j++; if (*p == '\n' || *p == '\r') j = 0; } return u; + + overflow2: + Py_DECREF(u); + overflow1: + PyErr_SetString(PyExc_OverflowError, "new string is too long"); + return NULL; } Py_LOCAL_INLINE(PyObject *) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 0470ef4bc5b..0aed3ab1abf 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -6495,7 +6495,8 @@ unicode_expandtabs(PyUnicodeObject *self, PyObject *args) Py_UNICODE *e; Py_UNICODE *p; Py_UNICODE *q; - Py_ssize_t i, j, old_j; + Py_UNICODE *qe; + Py_ssize_t i, j, incr; PyUnicodeObject *u; int tabsize = 8; @@ -6503,63 +6504,70 @@ unicode_expandtabs(PyUnicodeObject *self, PyObject *args) return NULL; /* First pass: determine size of output string */ - i = j = old_j = 0; - e = self->str + self->length; + i = 0; /* chars up to and including most recent \n or \r */ + j = 0; /* chars since most recent \n or \r (use in tab calculations) */ + e = self->str + self->length; /* end of input */ for (p = self->str; p < e; p++) if (*p == '\t') { if (tabsize > 0) { - j += tabsize - (j % tabsize); - if (old_j > j) { - PyErr_SetString(PyExc_OverflowError, - "new string is too long"); - return NULL; - } - old_j = j; - } + incr = tabsize - (j % tabsize); /* cannot overflow */ + if (j > PY_SSIZE_T_MAX - incr) + goto overflow1; + j += incr; + } } else { + if (j > PY_SSIZE_T_MAX - 1) + goto overflow1; j++; if (*p == '\n' || *p == '\r') { + if (i > PY_SSIZE_T_MAX - j) + goto overflow1; i += j; - old_j = j = 0; - if (i < 0) { - PyErr_SetString(PyExc_OverflowError, - "new string is too long"); - return NULL; - } + j = 0; } } - if ((i + j) < 0) { - PyErr_SetString(PyExc_OverflowError, "new string is too long"); - return NULL; - } + if (i > PY_SSIZE_T_MAX - j) + goto overflow1; /* Second pass: create output string and fill it */ u = _PyUnicode_New(i + j); if (!u) return NULL; - j = 0; - q = u->str; + j = 0; /* same as in first pass */ + q = u->str; /* next output char */ + qe = u->str + u->length; /* end of output */ for (p = self->str; p < e; p++) if (*p == '\t') { if (tabsize > 0) { i = tabsize - (j % tabsize); j += i; - while (i--) + while (i--) { + if (q >= qe) + goto overflow2; *q++ = ' '; + } } } else { - j++; + if (q >= qe) + goto overflow2; *q++ = *p; + j++; if (*p == '\n' || *p == '\r') j = 0; } return (PyObject*) u; + + overflow2: + Py_DECREF(u); + overflow1: + PyErr_SetString(PyExc_OverflowError, "new string is too long"); + return NULL; } PyDoc_STRVAR(find__doc__,