diff --git a/Lib/encodings/__init__.py b/Lib/encodings/__init__.py index 8dd713056ef..320011bd0eb 100644 --- a/Lib/encodings/__init__.py +++ b/Lib/encodings/__init__.py @@ -54,6 +54,7 @@ def normalize_encoding(encoding): """ if isinstance(encoding, bytes): encoding = str(encoding, "ascii") + chars = [] punct = False for c in encoding: diff --git a/Objects/stringlib/codecs.h b/Objects/stringlib/codecs.h index 749e7652fe0..a9d0a349d96 100644 --- a/Objects/stringlib/codecs.h +++ b/Objects/stringlib/codecs.h @@ -314,8 +314,9 @@ STRINGLIB(utf8_encoder)(PyObject *unicode, else if (Py_UNICODE_IS_SURROGATE(ch)) { Py_ssize_t startpos, endpos, newpos; Py_ssize_t k; - if (error_handler == _Py_ERROR_UNKNOWN) + if (error_handler == _Py_ERROR_UNKNOWN) { error_handler = get_error_handler(errors); + } startpos = i-1; endpos = startpos+1; diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 0226e429c3a..e9e703f2789 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -316,20 +316,27 @@ typedef enum { static _Py_error_handler get_error_handler(const char *errors) { - if (errors == NULL || strcmp(errors, "strict") == 0) + if (errors == NULL || strcmp(errors, "strict") == 0) { return _Py_ERROR_STRICT; - if (strcmp(errors, "surrogateescape") == 0) + } + if (strcmp(errors, "surrogateescape") == 0) { return _Py_ERROR_SURROGATEESCAPE; - if (strcmp(errors, "replace") == 0) + } + if (strcmp(errors, "replace") == 0) { return _Py_ERROR_REPLACE; - if (strcmp(errors, "ignore") == 0) + } + if (strcmp(errors, "ignore") == 0) { return _Py_ERROR_IGNORE; - if (strcmp(errors, "backslashreplace") == 0) + } + if (strcmp(errors, "backslashreplace") == 0) { return _Py_ERROR_BACKSLASHREPLACE; - if (strcmp(errors, "surrogatepass") == 0) + } + if (strcmp(errors, "surrogatepass") == 0) { return _Py_ERROR_SURROGATEPASS; - if (strcmp(errors, "xmlcharrefreplace") == 0) + } + if (strcmp(errors, "xmlcharrefreplace") == 0) { return _Py_ERROR_XMLCHARREFREPLACE; + } return _Py_ERROR_OTHER; } @@ -5636,36 +5643,45 @@ _PyUnicode_EncodeUTF16(PyObject *str, if (kind == PyUnicode_4BYTE_KIND) { const Py_UCS4 *in = (const Py_UCS4 *)data; const Py_UCS4 *end = in + len; - while (in < end) - if (*in++ >= 0x10000) + while (in < end) { + if (*in++ >= 0x10000) { pairs++; + } + } } - if (len > PY_SSIZE_T_MAX / 2 - pairs - (byteorder == 0)) + if (len > PY_SSIZE_T_MAX / 2 - pairs - (byteorder == 0)) { return PyErr_NoMemory(); + } nsize = len + pairs + (byteorder == 0); v = PyBytes_FromStringAndSize(NULL, nsize * 2); - if (v == NULL) + if (v == NULL) { return NULL; + } /* output buffer is 2-bytes aligned */ assert(_Py_IS_ALIGNED(PyBytes_AS_STRING(v), 2)); out = (unsigned short *)PyBytes_AS_STRING(v); - if (byteorder == 0) + if (byteorder == 0) { *out++ = 0xFEFF; - if (len == 0) + } + if (len == 0) { goto done; + } if (kind == PyUnicode_1BYTE_KIND) { ucs1lib_utf16_encode((const Py_UCS1 *)data, len, &out, native_ordering); goto done; } - if (byteorder < 0) + if (byteorder < 0) { encoding = "utf-16-le"; - else if (byteorder > 0) + } + else if (byteorder > 0) { encoding = "utf-16-be"; - else + } + else { encoding = "utf-16"; + } pos = 0; while (pos < len) {