mirror of https://github.com/python/cpython
cleanup overflowing handling in unicode_decode_call_errorhandler and unicode_encode_ucs1 (closes #22518)
This commit is contained in:
parent
e025b52db0
commit
a1c1be4e03
|
@ -4168,9 +4168,15 @@ unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler,
|
||||||
at the new input position), so we won't have to check space
|
at the new input position), so we won't have to check space
|
||||||
when there are no errors in the rest of the string) */
|
when there are no errors in the rest of the string) */
|
||||||
Py_ssize_t replen = PyUnicode_GET_LENGTH(repunicode);
|
Py_ssize_t replen = PyUnicode_GET_LENGTH(repunicode);
|
||||||
requiredsize = *outpos + replen + insize-newpos;
|
requiredsize = *outpos;
|
||||||
|
if (requiredsize > PY_SSIZE_T_MAX - replen)
|
||||||
|
goto overflow;
|
||||||
|
requiredsize += replen;
|
||||||
|
if (requiredsize > PY_SSIZE_T_MAX - (insize - newpos))
|
||||||
|
goto overflow;
|
||||||
|
requiredsize += insize - newpos;
|
||||||
if (requiredsize > outsize) {
|
if (requiredsize > outsize) {
|
||||||
if (requiredsize<2*outsize)
|
if (outsize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*outsize)
|
||||||
requiredsize = 2*outsize;
|
requiredsize = 2*outsize;
|
||||||
if (unicode_resize(output, requiredsize) < 0)
|
if (unicode_resize(output, requiredsize) < 0)
|
||||||
goto onError;
|
goto onError;
|
||||||
|
@ -4191,9 +4197,15 @@ unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler,
|
||||||
have+the replacement+the rest of the string (starting
|
have+the replacement+the rest of the string (starting
|
||||||
at the new input position), so we won't have to check space
|
at the new input position), so we won't have to check space
|
||||||
when there are no errors in the rest of the string) */
|
when there are no errors in the rest of the string) */
|
||||||
requiredsize = *outpos + repwlen + insize-newpos;
|
requiredsize = *outpos;
|
||||||
|
if (requiredsize > PY_SSIZE_T_MAX - repwlen)
|
||||||
|
goto overflow;
|
||||||
|
requiredsize += repwlen;
|
||||||
|
if (requiredsize > PY_SSIZE_T_MAX - (insize - newpos))
|
||||||
|
goto overflow;
|
||||||
|
requiredsize += insize - newpos;
|
||||||
if (requiredsize > outsize) {
|
if (requiredsize > outsize) {
|
||||||
if (requiredsize < 2*outsize)
|
if (outsize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*outsize)
|
||||||
requiredsize = 2*outsize;
|
requiredsize = 2*outsize;
|
||||||
if (unicode_resize(output, requiredsize) < 0)
|
if (unicode_resize(output, requiredsize) < 0)
|
||||||
goto onError;
|
goto onError;
|
||||||
|
@ -4210,6 +4222,11 @@ unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler,
|
||||||
onError:
|
onError:
|
||||||
Py_XDECREF(restuple);
|
Py_XDECREF(restuple);
|
||||||
return res;
|
return res;
|
||||||
|
|
||||||
|
overflow:
|
||||||
|
PyErr_SetString(PyExc_OverflowError,
|
||||||
|
"decoded result is too long for a Python string");
|
||||||
|
goto onError;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* --- UTF-7 Codec -------------------------------------------------------- */
|
/* --- UTF-7 Codec -------------------------------------------------------- */
|
||||||
|
@ -6358,7 +6375,7 @@ unicode_encode_ucs1(PyObject *unicode,
|
||||||
Py_ssize_t collstart = pos;
|
Py_ssize_t collstart = pos;
|
||||||
Py_ssize_t collend = pos;
|
Py_ssize_t collend = pos;
|
||||||
/* find all unecodable characters */
|
/* find all unecodable characters */
|
||||||
while ((collend < size) && (PyUnicode_READ(kind, data, collend)>=limit))
|
while ((collend < size) && (PyUnicode_READ(kind, data, collend) >= limit))
|
||||||
++collend;
|
++collend;
|
||||||
/* cache callback name lookup (if not done yet, i.e. it's the first error) */
|
/* cache callback name lookup (if not done yet, i.e. it's the first error) */
|
||||||
if (known_errorHandler==-1) {
|
if (known_errorHandler==-1) {
|
||||||
|
@ -6378,36 +6395,43 @@ unicode_encode_ucs1(PyObject *unicode,
|
||||||
raise_encode_exception(&exc, encoding, unicode, collstart, collend, reason);
|
raise_encode_exception(&exc, encoding, unicode, collstart, collend, reason);
|
||||||
goto onError;
|
goto onError;
|
||||||
case 2: /* replace */
|
case 2: /* replace */
|
||||||
while (collstart++<collend)
|
while (collstart++ < collend)
|
||||||
*str++ = '?'; /* fall through */
|
*str++ = '?'; /* fall through */
|
||||||
case 3: /* ignore */
|
case 3: /* ignore */
|
||||||
pos = collend;
|
pos = collend;
|
||||||
break;
|
break;
|
||||||
case 4: /* xmlcharrefreplace */
|
case 4: /* xmlcharrefreplace */
|
||||||
respos = str - PyBytes_AS_STRING(res);
|
respos = str - PyBytes_AS_STRING(res);
|
||||||
|
requiredsize = respos;
|
||||||
/* determine replacement size */
|
/* determine replacement size */
|
||||||
for (i = collstart, repsize = 0; i < collend; ++i) {
|
for (i = collstart; i < collend; ++i) {
|
||||||
Py_UCS4 ch = PyUnicode_READ(kind, data, i);
|
Py_UCS4 ch = PyUnicode_READ(kind, data, i);
|
||||||
|
Py_ssize_t incr;
|
||||||
if (ch < 10)
|
if (ch < 10)
|
||||||
repsize += 2+1+1;
|
incr = 2+1+1;
|
||||||
else if (ch < 100)
|
else if (ch < 100)
|
||||||
repsize += 2+2+1;
|
incr = 2+2+1;
|
||||||
else if (ch < 1000)
|
else if (ch < 1000)
|
||||||
repsize += 2+3+1;
|
incr = 2+3+1;
|
||||||
else if (ch < 10000)
|
else if (ch < 10000)
|
||||||
repsize += 2+4+1;
|
incr = 2+4+1;
|
||||||
else if (ch < 100000)
|
else if (ch < 100000)
|
||||||
repsize += 2+5+1;
|
incr = 2+5+1;
|
||||||
else if (ch < 1000000)
|
else if (ch < 1000000)
|
||||||
repsize += 2+6+1;
|
incr = 2+6+1;
|
||||||
else {
|
else {
|
||||||
assert(ch <= MAX_UNICODE);
|
assert(ch <= MAX_UNICODE);
|
||||||
repsize += 2+7+1;
|
incr = 2+7+1;
|
||||||
}
|
}
|
||||||
|
if (requiredsize > PY_SSIZE_T_MAX - incr)
|
||||||
|
goto overflow;
|
||||||
|
requiredsize += incr;
|
||||||
}
|
}
|
||||||
requiredsize = respos+repsize+(size-collend);
|
if (requiredsize > PY_SSIZE_T_MAX - (size - collend))
|
||||||
|
goto overflow;
|
||||||
|
requiredsize += size - collend;
|
||||||
if (requiredsize > ressize) {
|
if (requiredsize > ressize) {
|
||||||
if (requiredsize<2*ressize)
|
if (ressize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*ressize)
|
||||||
requiredsize = 2*ressize;
|
requiredsize = 2*ressize;
|
||||||
if (_PyBytes_Resize(&res, requiredsize))
|
if (_PyBytes_Resize(&res, requiredsize))
|
||||||
goto onError;
|
goto onError;
|
||||||
|
@ -6433,6 +6457,10 @@ unicode_encode_ucs1(PyObject *unicode,
|
||||||
if (repsize > 1) {
|
if (repsize > 1) {
|
||||||
/* Make room for all additional bytes. */
|
/* Make room for all additional bytes. */
|
||||||
respos = str - PyBytes_AS_STRING(res);
|
respos = str - PyBytes_AS_STRING(res);
|
||||||
|
if (ressize > PY_SSIZE_T_MAX - repsize - 1) {
|
||||||
|
Py_DECREF(repunicode);
|
||||||
|
goto overflow;
|
||||||
|
}
|
||||||
if (_PyBytes_Resize(&res, ressize+repsize-1)) {
|
if (_PyBytes_Resize(&res, ressize+repsize-1)) {
|
||||||
Py_DECREF(repunicode);
|
Py_DECREF(repunicode);
|
||||||
goto onError;
|
goto onError;
|
||||||
|
@ -6451,9 +6479,15 @@ unicode_encode_ucs1(PyObject *unicode,
|
||||||
we won't have to check space for encodable characters) */
|
we won't have to check space for encodable characters) */
|
||||||
respos = str - PyBytes_AS_STRING(res);
|
respos = str - PyBytes_AS_STRING(res);
|
||||||
repsize = PyUnicode_GET_LENGTH(repunicode);
|
repsize = PyUnicode_GET_LENGTH(repunicode);
|
||||||
requiredsize = respos+repsize+(size-collend);
|
requiredsize = respos;
|
||||||
|
if (requiredsize > PY_SSIZE_T_MAX - repsize)
|
||||||
|
goto overflow;
|
||||||
|
requiredsize += repsize;
|
||||||
|
if (requiredsize > PY_SSIZE_T_MAX - (size - collend))
|
||||||
|
goto overflow;
|
||||||
|
requiredsize += size - collend;
|
||||||
if (requiredsize > ressize) {
|
if (requiredsize > ressize) {
|
||||||
if (requiredsize<2*ressize)
|
if (ressize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*ressize)
|
||||||
requiredsize = 2*ressize;
|
requiredsize = 2*ressize;
|
||||||
if (_PyBytes_Resize(&res, requiredsize)) {
|
if (_PyBytes_Resize(&res, requiredsize)) {
|
||||||
Py_DECREF(repunicode);
|
Py_DECREF(repunicode);
|
||||||
|
@ -6491,6 +6525,10 @@ unicode_encode_ucs1(PyObject *unicode,
|
||||||
Py_XDECREF(exc);
|
Py_XDECREF(exc);
|
||||||
return res;
|
return res;
|
||||||
|
|
||||||
|
overflow:
|
||||||
|
PyErr_SetString(PyExc_OverflowError,
|
||||||
|
"encoded result is too long for a Python string");
|
||||||
|
|
||||||
onError:
|
onError:
|
||||||
Py_XDECREF(res);
|
Py_XDECREF(res);
|
||||||
Py_XDECREF(errorHandler);
|
Py_XDECREF(errorHandler);
|
||||||
|
|
Loading…
Reference in New Issue