diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 4085d22b716..07832bacfa0 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -4168,9 +4168,15 @@ unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler, at the new input position), so we won't have to check space when there are no errors in the rest of the string) */ Py_ssize_t replen = PyUnicode_GET_LENGTH(repunicode); - requiredsize = *outpos + replen + insize-newpos; + requiredsize = *outpos; + if (requiredsize > PY_SSIZE_T_MAX - replen) + goto overflow; + requiredsize += replen; + if (requiredsize > PY_SSIZE_T_MAX - (insize - newpos)) + goto overflow; + requiredsize += insize - newpos; if (requiredsize > outsize) { - if (requiredsize<2*outsize) + if (outsize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*outsize) requiredsize = 2*outsize; if (unicode_resize(output, requiredsize) < 0) goto onError; @@ -4191,9 +4197,15 @@ unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler, have+the replacement+the rest of the string (starting at the new input position), so we won't have to check space when there are no errors in the rest of the string) */ - requiredsize = *outpos + repwlen + insize-newpos; + requiredsize = *outpos; + if (requiredsize > PY_SSIZE_T_MAX - repwlen) + goto overflow; + requiredsize += repwlen; + if (requiredsize > PY_SSIZE_T_MAX - (insize - newpos)) + goto overflow; + requiredsize += insize - newpos; if (requiredsize > outsize) { - if (requiredsize < 2*outsize) + if (outsize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*outsize) requiredsize = 2*outsize; if (unicode_resize(output, requiredsize) < 0) goto onError; @@ -4210,6 +4222,11 @@ unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler, onError: Py_XDECREF(restuple); return res; + + overflow: + PyErr_SetString(PyExc_OverflowError, + "decoded result is too long for a Python string"); + goto onError; } /* --- UTF-7 Codec -------------------------------------------------------- */ @@ -6358,7 +6375,7 @@ unicode_encode_ucs1(PyObject *unicode, Py_ssize_t collstart = pos; Py_ssize_t collend = pos; /* find all unecodable characters */ - while ((collend < size) && (PyUnicode_READ(kind, data, collend)>=limit)) + while ((collend < size) && (PyUnicode_READ(kind, data, collend) >= limit)) ++collend; /* cache callback name lookup (if not done yet, i.e. it's the first error) */ if (known_errorHandler==-1) { @@ -6378,36 +6395,43 @@ unicode_encode_ucs1(PyObject *unicode, raise_encode_exception(&exc, encoding, unicode, collstart, collend, reason); goto onError; case 2: /* replace */ - while (collstart++ PY_SSIZE_T_MAX - incr) + goto overflow; + requiredsize += incr; } - requiredsize = respos+repsize+(size-collend); + if (requiredsize > PY_SSIZE_T_MAX - (size - collend)) + goto overflow; + requiredsize += size - collend; if (requiredsize > ressize) { - if (requiredsize<2*ressize) + if (ressize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*ressize) requiredsize = 2*ressize; if (_PyBytes_Resize(&res, requiredsize)) goto onError; @@ -6433,6 +6457,10 @@ unicode_encode_ucs1(PyObject *unicode, if (repsize > 1) { /* Make room for all additional bytes. */ respos = str - PyBytes_AS_STRING(res); + if (ressize > PY_SSIZE_T_MAX - repsize - 1) { + Py_DECREF(repunicode); + goto overflow; + } if (_PyBytes_Resize(&res, ressize+repsize-1)) { Py_DECREF(repunicode); goto onError; @@ -6451,9 +6479,15 @@ unicode_encode_ucs1(PyObject *unicode, we won't have to check space for encodable characters) */ respos = str - PyBytes_AS_STRING(res); repsize = PyUnicode_GET_LENGTH(repunicode); - requiredsize = respos+repsize+(size-collend); + requiredsize = respos; + if (requiredsize > PY_SSIZE_T_MAX - repsize) + goto overflow; + requiredsize += repsize; + if (requiredsize > PY_SSIZE_T_MAX - (size - collend)) + goto overflow; + requiredsize += size - collend; if (requiredsize > ressize) { - if (requiredsize<2*ressize) + if (ressize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*ressize) requiredsize = 2*ressize; if (_PyBytes_Resize(&res, requiredsize)) { Py_DECREF(repunicode); @@ -6491,6 +6525,10 @@ unicode_encode_ucs1(PyObject *unicode, Py_XDECREF(exc); return res; + overflow: + PyErr_SetString(PyExc_OverflowError, + "encoded result is too long for a Python string"); + onError: Py_XDECREF(res); Py_XDECREF(errorHandler);