cleanup overflowing handling in unicode_decode_call_errorhandler and unicode_encode_ucs1 (closes #22518)

This commit is contained in:
Benjamin Peterson 2014-09-29 18:18:57 -04:00
parent e025b52db0
commit a1c1be4e03
1 changed files with 56 additions and 18 deletions

View File

@ -4168,9 +4168,15 @@ unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler,
at the new input position), so we won't have to check space at the new input position), so we won't have to check space
when there are no errors in the rest of the string) */ when there are no errors in the rest of the string) */
Py_ssize_t replen = PyUnicode_GET_LENGTH(repunicode); Py_ssize_t replen = PyUnicode_GET_LENGTH(repunicode);
requiredsize = *outpos + replen + insize-newpos; requiredsize = *outpos;
if (requiredsize > PY_SSIZE_T_MAX - replen)
goto overflow;
requiredsize += replen;
if (requiredsize > PY_SSIZE_T_MAX - (insize - newpos))
goto overflow;
requiredsize += insize - newpos;
if (requiredsize > outsize) { if (requiredsize > outsize) {
if (requiredsize<2*outsize) if (outsize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*outsize)
requiredsize = 2*outsize; requiredsize = 2*outsize;
if (unicode_resize(output, requiredsize) < 0) if (unicode_resize(output, requiredsize) < 0)
goto onError; goto onError;
@ -4191,9 +4197,15 @@ unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler,
have+the replacement+the rest of the string (starting have+the replacement+the rest of the string (starting
at the new input position), so we won't have to check space at the new input position), so we won't have to check space
when there are no errors in the rest of the string) */ when there are no errors in the rest of the string) */
requiredsize = *outpos + repwlen + insize-newpos; requiredsize = *outpos;
if (requiredsize > PY_SSIZE_T_MAX - repwlen)
goto overflow;
requiredsize += repwlen;
if (requiredsize > PY_SSIZE_T_MAX - (insize - newpos))
goto overflow;
requiredsize += insize - newpos;
if (requiredsize > outsize) { if (requiredsize > outsize) {
if (requiredsize < 2*outsize) if (outsize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*outsize)
requiredsize = 2*outsize; requiredsize = 2*outsize;
if (unicode_resize(output, requiredsize) < 0) if (unicode_resize(output, requiredsize) < 0)
goto onError; goto onError;
@ -4210,6 +4222,11 @@ unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler,
onError: onError:
Py_XDECREF(restuple); Py_XDECREF(restuple);
return res; return res;
overflow:
PyErr_SetString(PyExc_OverflowError,
"decoded result is too long for a Python string");
goto onError;
} }
/* --- UTF-7 Codec -------------------------------------------------------- */ /* --- UTF-7 Codec -------------------------------------------------------- */
@ -6358,7 +6375,7 @@ unicode_encode_ucs1(PyObject *unicode,
Py_ssize_t collstart = pos; Py_ssize_t collstart = pos;
Py_ssize_t collend = pos; Py_ssize_t collend = pos;
/* find all unecodable characters */ /* find all unecodable characters */
while ((collend < size) && (PyUnicode_READ(kind, data, collend)>=limit)) while ((collend < size) && (PyUnicode_READ(kind, data, collend) >= limit))
++collend; ++collend;
/* cache callback name lookup (if not done yet, i.e. it's the first error) */ /* cache callback name lookup (if not done yet, i.e. it's the first error) */
if (known_errorHandler==-1) { if (known_errorHandler==-1) {
@ -6378,36 +6395,43 @@ unicode_encode_ucs1(PyObject *unicode,
raise_encode_exception(&exc, encoding, unicode, collstart, collend, reason); raise_encode_exception(&exc, encoding, unicode, collstart, collend, reason);
goto onError; goto onError;
case 2: /* replace */ case 2: /* replace */
while (collstart++<collend) while (collstart++ < collend)
*str++ = '?'; /* fall through */ *str++ = '?'; /* fall through */
case 3: /* ignore */ case 3: /* ignore */
pos = collend; pos = collend;
break; break;
case 4: /* xmlcharrefreplace */ case 4: /* xmlcharrefreplace */
respos = str - PyBytes_AS_STRING(res); respos = str - PyBytes_AS_STRING(res);
requiredsize = respos;
/* determine replacement size */ /* determine replacement size */
for (i = collstart, repsize = 0; i < collend; ++i) { for (i = collstart; i < collend; ++i) {
Py_UCS4 ch = PyUnicode_READ(kind, data, i); Py_UCS4 ch = PyUnicode_READ(kind, data, i);
Py_ssize_t incr;
if (ch < 10) if (ch < 10)
repsize += 2+1+1; incr = 2+1+1;
else if (ch < 100) else if (ch < 100)
repsize += 2+2+1; incr = 2+2+1;
else if (ch < 1000) else if (ch < 1000)
repsize += 2+3+1; incr = 2+3+1;
else if (ch < 10000) else if (ch < 10000)
repsize += 2+4+1; incr = 2+4+1;
else if (ch < 100000) else if (ch < 100000)
repsize += 2+5+1; incr = 2+5+1;
else if (ch < 1000000) else if (ch < 1000000)
repsize += 2+6+1; incr = 2+6+1;
else { else {
assert(ch <= MAX_UNICODE); assert(ch <= MAX_UNICODE);
repsize += 2+7+1; incr = 2+7+1;
} }
if (requiredsize > PY_SSIZE_T_MAX - incr)
goto overflow;
requiredsize += incr;
} }
requiredsize = respos+repsize+(size-collend); if (requiredsize > PY_SSIZE_T_MAX - (size - collend))
goto overflow;
requiredsize += size - collend;
if (requiredsize > ressize) { if (requiredsize > ressize) {
if (requiredsize<2*ressize) if (ressize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*ressize)
requiredsize = 2*ressize; requiredsize = 2*ressize;
if (_PyBytes_Resize(&res, requiredsize)) if (_PyBytes_Resize(&res, requiredsize))
goto onError; goto onError;
@ -6433,6 +6457,10 @@ unicode_encode_ucs1(PyObject *unicode,
if (repsize > 1) { if (repsize > 1) {
/* Make room for all additional bytes. */ /* Make room for all additional bytes. */
respos = str - PyBytes_AS_STRING(res); respos = str - PyBytes_AS_STRING(res);
if (ressize > PY_SSIZE_T_MAX - repsize - 1) {
Py_DECREF(repunicode);
goto overflow;
}
if (_PyBytes_Resize(&res, ressize+repsize-1)) { if (_PyBytes_Resize(&res, ressize+repsize-1)) {
Py_DECREF(repunicode); Py_DECREF(repunicode);
goto onError; goto onError;
@ -6451,9 +6479,15 @@ unicode_encode_ucs1(PyObject *unicode,
we won't have to check space for encodable characters) */ we won't have to check space for encodable characters) */
respos = str - PyBytes_AS_STRING(res); respos = str - PyBytes_AS_STRING(res);
repsize = PyUnicode_GET_LENGTH(repunicode); repsize = PyUnicode_GET_LENGTH(repunicode);
requiredsize = respos+repsize+(size-collend); requiredsize = respos;
if (requiredsize > PY_SSIZE_T_MAX - repsize)
goto overflow;
requiredsize += repsize;
if (requiredsize > PY_SSIZE_T_MAX - (size - collend))
goto overflow;
requiredsize += size - collend;
if (requiredsize > ressize) { if (requiredsize > ressize) {
if (requiredsize<2*ressize) if (ressize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*ressize)
requiredsize = 2*ressize; requiredsize = 2*ressize;
if (_PyBytes_Resize(&res, requiredsize)) { if (_PyBytes_Resize(&res, requiredsize)) {
Py_DECREF(repunicode); Py_DECREF(repunicode);
@ -6491,6 +6525,10 @@ unicode_encode_ucs1(PyObject *unicode,
Py_XDECREF(exc); Py_XDECREF(exc);
return res; return res;
overflow:
PyErr_SetString(PyExc_OverflowError,
"encoded result is too long for a Python string");
onError: onError:
Py_XDECREF(res); Py_XDECREF(res);
Py_XDECREF(errorHandler); Py_XDECREF(errorHandler);