Issue #28561: Clean up UTF-8 encoder: remove dead code, update comments, etc.

Patch by Xiang Zhang.
This commit is contained in:
Serhiy Storchaka 2016-10-30 18:25:27 +02:00
parent b7d14a09c2
commit 998c9cdd42
1 changed files with 4 additions and 10 deletions

View File

@ -262,9 +262,7 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
Py_ssize_t size, Py_ssize_t size,
const char *errors) const char *errors)
{ {
#define MAX_SHORT_UNICHARS 300 /* largest size we'll do on the stack */ Py_ssize_t i; /* index into data of next input character */
Py_ssize_t i; /* index into s of next input byte */
char *p; /* next free byte in output buffer */ char *p; /* next free byte in output buffer */
#if STRINGLIB_SIZEOF_CHAR > 1 #if STRINGLIB_SIZEOF_CHAR > 1
PyObject *error_handler_obj = NULL; PyObject *error_handler_obj = NULL;
@ -389,7 +387,7 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
goto error; goto error;
/* subtract preallocated bytes */ /* subtract preallocated bytes */
writer.min_size -= max_char_size; writer.min_size -= max_char_size * (newpos - startpos);
if (PyBytes_Check(rep)) { if (PyBytes_Check(rep)) {
p = _PyBytesWriter_WriteBytes(&writer, p, p = _PyBytesWriter_WriteBytes(&writer, p,
@ -402,14 +400,12 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
goto error; goto error;
if (!PyUnicode_IS_ASCII(rep)) { if (!PyUnicode_IS_ASCII(rep)) {
raise_encode_exception(&exc, "utf-8", raise_encode_exception(&exc, "utf-8", unicode,
unicode, startpos, endpos,
i-1, i,
"surrogates not allowed"); "surrogates not allowed");
goto error; goto error;
} }
assert(PyUnicode_KIND(rep) == PyUnicode_1BYTE_KIND);
p = _PyBytesWriter_WriteBytes(&writer, p, p = _PyBytesWriter_WriteBytes(&writer, p,
PyUnicode_DATA(rep), PyUnicode_DATA(rep),
PyUnicode_GET_LENGTH(rep)); PyUnicode_GET_LENGTH(rep));
@ -463,8 +459,6 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
_PyBytesWriter_Dealloc(&writer); _PyBytesWriter_Dealloc(&writer);
return NULL; return NULL;
#endif #endif
#undef MAX_SHORT_UNICHARS
} }
/* The pattern for constructing UCS2-repeated masks. */ /* The pattern for constructing UCS2-repeated masks. */