bpo-29240: Fix locale encodings in UTF-8 Mode (#5170)

Modify locale.localeconv(), time.tzname, os.strerror() and other functions to ignore the UTF-8 Mode: always use the current locale encoding. Changes: * Add _Py_DecodeLocaleEx() and _Py_EncodeLocaleEx(). On decoding or encoding error, they return the position of the error and an error message which are used to raise Unicode errors in PyUnicode_DecodeLocale() and PyUnicode_EncodeLocale(). * Replace _Py_DecodeCurrentLocale() with _Py_DecodeLocaleEx(). * PyUnicode_DecodeLocale() now uses _Py_DecodeLocaleEx() for all cases, especially for the strict error handler. * Add _Py_DecodeUTF8Ex(): return more information on decoding error and supports the strict error handler. * Rename _Py_EncodeUTF8_surrogateescape() to _Py_EncodeUTF8Ex(). * Replace _Py_EncodeCurrentLocale() with _Py_EncodeLocaleEx(). * Ignore the UTF-8 mode to encode/decode localeconv(), strerror() and time zone name. * Remove PyUnicode_DecodeLocale(), PyUnicode_DecodeLocaleAndSize() and PyUnicode_EncodeLocale() now ignore the UTF-8 mode: always use the "current" locale. * Remove _PyUnicode_DecodeCurrentLocale(), _PyUnicode_DecodeCurrentLocaleAndSize() and _PyUnicode_EncodeCurrentLocale().
2018-01-15 10:45:49 +01:00 · 2018-01-15 10:45:49 +01:00 · 7ed7aead95
parent ee3b83547c
commit 7ed7aead95
12 changed files with 484 additions and 517 deletions
--- a/Doc/c-api/sys.rst
+++ b/Doc/c-api/sys.rst
@ -106,6 +106,16 @@ Operating System Utilities
   surrogate character, escape the bytes using the surrogateescape error
   handler instead of decoding them.
   Encoding, highest priority to lowest priority:
   * ``UTF-8`` on macOS and Android;
   * ``UTF-8`` if the Python UTF-8 mode is enabled;
   * ``ASCII`` if the ``LC_CTYPE`` locale is ``"C"``,
     ``nl_langinfo(CODESET)`` returns the ``ASCII`` encoding (or an alias),
     and :c:func:`mbstowcs` and :c:func:`wcstombs` functions uses the
     ``ISO-8859-1`` encoding.
   * the current locale encoding.
   Return a pointer to a newly allocated wide character string, use
   :c:func:`PyMem_RawFree` to free the memory. If size is not ``NULL``, write
   the number of wide characters excluding the null character into ``*size``
@ -137,6 +147,18 @@ Operating System Utilities
   :ref:`surrogateescape error handler <surrogateescape>`: surrogate characters
   in the range U+DC80..U+DCFF are converted to bytes 0x80..0xFF.
   Encoding, highest priority to lowest priority:
   * ``UTF-8`` on macOS and Android;
   * ``UTF-8`` if the Python UTF-8 mode is enabled;
   * ``ASCII`` if the ``LC_CTYPE`` locale is ``"C"``,
     ``nl_langinfo(CODESET)`` returns the ``ASCII`` encoding (or an alias),
     and :c:func:`mbstowcs` and :c:func:`wcstombs` functions uses the
     ``ISO-8859-1`` encoding.
   * the current locale encoding.
   The function uses the UTF-8 encoding in the Python UTF-8 mode.
   Return a pointer to a newly allocated byte string, use :c:func:`PyMem_Free`
   to free the memory. Return ``NULL`` on encoding error or memory allocation
   error
--- a/Doc/c-api/unicode.rst
+++ b/Doc/c-api/unicode.rst
@ -770,12 +770,20 @@ system.
   :c:data:`Py_FileSystemDefaultEncoding` (the locale encoding read at
   Python startup).
   This function ignores the Python UTF-8 mode.
   .. seealso::
      The :c:func:`Py_DecodeLocale` function.
   .. versionadded:: 3.3
   .. versionchanged:: 3.7
      The function now also uses the current locale encoding for the
      ``surrogateescape`` error handler. Previously, :c:func:`Py_DecodeLocale`
      was used for the ``surrogateescape``, and the current locale encoding was
      used for ``strict``.
 .. c:function:: PyObject* PyUnicode_DecodeLocale(const char *str, const char *errors)
@ -797,12 +805,20 @@ system.
   :c:data:`Py_FileSystemDefaultEncoding` (the locale encoding read at
   Python startup).
   This function ignores the Python UTF-8 mode.
   .. seealso::
      The :c:func:`Py_EncodeLocale` function.
   .. versionadded:: 3.3
   .. versionchanged:: 3.7
      The function now also uses the current locale encoding for the
      ``surrogateescape`` error handler. Previously, :c:func:`Py_EncodeLocale`
      was used for the ``surrogateescape``, and the current locale encoding was
      used for ``strict``.
 File System Encoding
 """"""""""""""""""""
--- a/Include/fileutils.h
+++ b/Include/fileutils.h
@ -20,18 +20,41 @@ PyAPI_FUNC(char*) _Py_EncodeLocaleRaw(
 #endif
 #ifdef Py_BUILD_CORE
-PyAPI_FUNC(wchar_t*) _Py_DecodeUTF8_surrogateescape(
+PyAPI_FUNC(int) _Py_DecodeUTF8Ex(
    const char *s,
    Py_ssize_t size,
    size_t *p_wlen);
 PyAPI_FUNC(wchar_t *) _Py_DecodeCurrentLocale(
    const char *arg,
-    size_t *size);
+    Py_ssize_t arglen,
    wchar_t **wstr,
    size_t *wlen,
    const char **reason,
    int surrogateescape);
-PyAPI_FUNC(char*) _Py_EncodeCurrentLocale(
+PyAPI_FUNC(int) _Py_EncodeUTF8Ex(
    const wchar_t *text,
-    size_t *error_pos);
+    char **str,
    size_t *error_pos,
    const char **reason,
    int raw_malloc,
    int surrogateescape);
 PyAPI_FUNC(wchar_t*) _Py_DecodeUTF8_surrogateescape(
    const char *arg,
    Py_ssize_t arglen);
 PyAPI_FUNC(int) _Py_DecodeLocaleEx(
    const char *arg,
    wchar_t **wstr,
    size_t *wlen,
    const char **reason,
    int current_locale,
    int surrogateescape);
 PyAPI_FUNC(int) _Py_EncodeLocaleEx(
    const wchar_t *text,
    char **str,
    size_t *error_pos,
    const char **reason,
    int current_locale,
    int surrogateescape);
 #endif
 #ifndef Py_LIMITED_API
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@ -1810,20 +1810,6 @@ PyAPI_FUNC(PyObject*) PyUnicode_EncodeLocale(
    PyObject *unicode,
    const char *errors
    );
 PyAPI_FUNC(PyObject*) _PyUnicode_DecodeCurrentLocale(
    const char *str,
    const char *errors);
 PyAPI_FUNC(PyObject*) _PyUnicode_DecodeCurrentLocaleAndSize(
    const char *str,
    Py_ssize_t len,
    const char *errors);
 PyAPI_FUNC(PyObject*) _PyUnicode_EncodeCurrentLocale(
    PyObject *unicode,
    const char *errors
    );
 #endif
 /* --- File system encoding ---------------------------------------------- */
--- a/Modules/_datetimemodule.c
+++ b/Modules/_datetimemodule.c
@ -696,7 +696,7 @@ static int parse_isoformat_date(const char *dtstr,
    if (NULL == p) {
        return -1;
    }
-    
+
    if (*(p++) != '-') {
        return -2;
    }
--- a/Modules/_localemodule.c
+++ b/Modules/_localemodule.c
@ -572,8 +572,9 @@ PyIntl_bind_textdomain_codeset(PyObject* self,PyObject*args)
    if (!PyArg_ParseTuple(args, "sz", &domain, &codeset))
        return NULL;
    codeset = bind_textdomain_codeset(domain, codeset);
-    if (codeset)
+    if (codeset) {
        return PyUnicode_DecodeLocale(codeset, NULL);
    }
    Py_RETURN_NONE;
 }
 #endif
--- a/Modules/getpath.c
+++ b/Modules/getpath.c
@ -449,8 +449,8 @@ search_for_exec_prefix(const _PyCoreConfig *core_config,
            n = fread(buf, 1, MAXPATHLEN, f);
            buf[n] = '\0';
            fclose(f);
-            rel_builddir_path = _Py_DecodeUTF8_surrogateescape(buf, n, NULL);
+            rel_builddir_path = _Py_DecodeUTF8_surrogateescape(buf, n);
-            if (rel_builddir_path != NULL) {
+            if (rel_builddir_path) {
                wcsncpy(exec_prefix, calculate->argv0_path, MAXPATHLEN);
                exec_prefix[MAXPATHLEN] = L'\0';
                joinpath(exec_prefix, rel_builddir_path);
--- a/Modules/readline.c
+++ b/Modules/readline.c
@ -132,13 +132,13 @@ static PyModuleDef readlinemodule;
 static PyObject *
 encode(PyObject *b)
 {
-    return _PyUnicode_EncodeCurrentLocale(b, "surrogateescape");
+    return PyUnicode_EncodeLocale(b, "surrogateescape");
 }
 static PyObject *
 decode(const char *s)
 {
-    return _PyUnicode_DecodeCurrentLocale(s, "surrogateescape");
+    return PyUnicode_DecodeLocale(s, "surrogateescape");
 }
--- a/Modules/timemodule.c
+++ b/Modules/timemodule.c
@ -418,11 +418,11 @@ tmtotuple(struct tm *p
    SET(8, p->tm_isdst);
 #ifdef HAVE_STRUCT_TM_TM_ZONE
    PyStructSequence_SET_ITEM(v, 9,
-        _PyUnicode_DecodeCurrentLocale(p->tm_zone, "surrogateescape"));
+        PyUnicode_DecodeLocale(p->tm_zone, "surrogateescape"));
    SET(10, p->tm_gmtoff);
 #else
    PyStructSequence_SET_ITEM(v, 9,
-        _PyUnicode_DecodeCurrentLocale(zone, "surrogateescape"));
+        PyUnicode_DecodeLocale(zone, "surrogateescape"));
    PyStructSequence_SET_ITEM(v, 10, _PyLong_FromTime_t(gmtoff));
 #endif /* HAVE_STRUCT_TM_TM_ZONE */
 #undef SET
@ -809,8 +809,7 @@ time_strftime(PyObject *self, PyObject *args)
 #ifdef HAVE_WCSFTIME
            ret = PyUnicode_FromWideChar(outbuf, buflen);
 #else
-            ret = _PyUnicode_DecodeCurrentLocaleAndSize(outbuf, buflen,
+            ret = PyUnicode_DecodeLocaleAndSize(outbuf, buflen, "surrogateescape");
                                                        "surrogateescape");
 #endif
            PyMem_Free(outbuf);
            break;
@ -1541,8 +1540,8 @@ PyInit_timezone(PyObject *m) {
    PyModule_AddIntConstant(m, "altzone", timezone-3600);
 #endif
    PyModule_AddIntConstant(m, "daylight", daylight);
-    otz0 = _PyUnicode_DecodeCurrentLocale(tzname[0], "surrogateescape");
+    otz0 = PyUnicode_DecodeLocale(tzname[0], "surrogateescape");
-    otz1 = _PyUnicode_DecodeCurrentLocale(tzname[1], "surrogateescape");
+    otz1 = PyUnicode_DecodeLocale(tzname[1], "surrogateescape");
    PyModule_AddObject(m, "tzname", Py_BuildValue("(NN)", otz0, otz1));
 #else /* !HAVE_TZNAME || __GLIBC__ || __CYGWIN__*/
    {
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@ -3327,53 +3327,6 @@ PyUnicode_AsEncodedObject(PyObject *unicode,
    return NULL;
 }
 static size_t
 wcstombs_errorpos(const wchar_t *wstr)
 {
    size_t len;
 #if SIZEOF_WCHAR_T == 2
    wchar_t buf[3];
 #else
    wchar_t buf[2];
 #endif
    char outbuf[MB_LEN_MAX];
    const wchar_t *start, *previous;
 #if SIZEOF_WCHAR_T == 2
    buf[2] = 0;
 #else
    buf[1] = 0;
 #endif
    start = wstr;
    while (*wstr != L'\0')
    {
        previous = wstr;
 #if SIZEOF_WCHAR_T == 2
        if (Py_UNICODE_IS_HIGH_SURROGATE(wstr[0])
            && Py_UNICODE_IS_LOW_SURROGATE(wstr[1]))
        {
            buf[0] = wstr[0];
            buf[1] = wstr[1];
            wstr += 2;
        }
        else {
            buf[0] = *wstr;
            buf[1] = 0;
            wstr++;
        }
 #else
        buf[0] = *wstr;
        wstr++;
 #endif
        len = wcstombs(outbuf, buf, sizeof(outbuf));
        if (len == (size_t)-1)
            return previous - start;
    }
    /* failed to find the unencodable character */
    return 0;
 }
 static int
 locale_error_handler(const char *errors, int *surrogateescape)
 {
@ -3396,130 +3349,60 @@ locale_error_handler(const char *errors, int *surrogateescape)
 }
 static PyObject *
-unicode_encode_locale(PyObject *unicode, const char *errors, int current_locale)
+unicode_encode_locale(PyObject *unicode, const char *errors,
                      int current_locale)
 {
    Py_ssize_t wlen, wlen2;
    wchar_t *wstr;
    char *errmsg;
    PyObject *bytes, *reason, *exc;
    size_t error_pos, errlen;
    int surrogateescape;
    if (locale_error_handler(errors, &surrogateescape) < 0)
        return NULL;
-    wstr = PyUnicode_AsWideCharString(unicode, &wlen);
+    Py_ssize_t wlen;
-    if (wstr == NULL)
+    wchar_t *wstr = PyUnicode_AsWideCharString(unicode, &wlen);
    if (wstr == NULL) {
        return NULL;
    }
-    wlen2 = wcslen(wstr);
+    Py_ssize_t wlen2 = wcslen(wstr);
    if (wlen2 != wlen) {
        PyMem_Free(wstr);
        PyErr_SetString(PyExc_ValueError, "embedded null character");
        return NULL;
    }
-    if (surrogateescape) {
+    char *str;
-        /* "surrogateescape" error handler */
+    size_t error_pos;
-        char *str;
+    const char *reason;
-
+    int res = _Py_EncodeLocaleEx(wstr, &str, &error_pos, &reason,
-        if (current_locale) {
+                                 current_locale, surrogateescape);
-            str = _Py_EncodeCurrentLocale(wstr, &error_pos);
+    if (res != 0) {
        if (res == -2) {
            PyObject *exc;
            exc = PyObject_CallFunction(PyExc_UnicodeEncodeError, "sOnns",
                    "locale", unicode,
                    (Py_ssize_t)error_pos,
                    (Py_ssize_t)(error_pos+1),
                    reason);
            if (exc != NULL) {
                PyCodec_StrictErrors(exc);
                Py_DECREF(exc);
            }
            return NULL;
        }
        else {
-            str = Py_EncodeLocale(wstr, &error_pos);
+            PyErr_NoMemory();
        }
        if (str == NULL) {
            if (error_pos == (size_t)-1) {
                PyErr_NoMemory();
                PyMem_Free(wstr);
                return NULL;
            }
            else {
                goto encode_error;
            }
        }
        PyMem_Free(wstr);
        bytes = PyBytes_FromString(str);
        if (current_locale) {
            PyMem_RawFree(str);
        }
        else {
            PyMem_Free(str);
        }
    }
    else {
        /* strict mode */
        size_t len, len2;
        len = wcstombs(NULL, wstr, 0);
        if (len == (size_t)-1) {
            error_pos = (size_t)-1;
            goto encode_error;
        }
        bytes = PyBytes_FromStringAndSize(NULL, len);
        if (bytes == NULL) {
            PyMem_Free(wstr);
            return NULL;
        }
        len2 = wcstombs(PyBytes_AS_STRING(bytes), wstr, len+1);
        if (len2 == (size_t)-1 || len2 > len) {
            Py_DECREF(bytes);
            error_pos = (size_t)-1;
            goto encode_error;
        }
        PyMem_Free(wstr);
    }
    return bytes;
 encode_error:
    errmsg = strerror(errno);
    assert(errmsg != NULL);
    if (error_pos == (size_t)-1)
        error_pos = wcstombs_errorpos(wstr);
    PyMem_Free(wstr);
-    wstr = Py_DecodeLocale(errmsg, &errlen);
+    PyObject *bytes = PyBytes_FromString(str);
-    if (wstr != NULL) {
+    PyMem_RawFree(str);
-        reason = PyUnicode_FromWideChar(wstr, errlen);
+    return bytes;
        PyMem_RawFree(wstr);
    } else {
        errmsg = NULL;
    }
    if (errmsg == NULL)
        reason = PyUnicode_FromString(
            "wcstombs() encountered an unencodable "
            "wide character");
    if (reason == NULL)
        return NULL;
    exc = PyObject_CallFunction(PyExc_UnicodeEncodeError, "sOnnO",
                                "locale", unicode,
                                (Py_ssize_t)error_pos,
                                (Py_ssize_t)(error_pos+1),
                                reason);
    Py_DECREF(reason);
    if (exc != NULL) {
        PyCodec_StrictErrors(exc);
        Py_DECREF(exc);
    }
    return NULL;
 }
 PyObject *
 PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
 {
    return unicode_encode_locale(unicode, errors, 0);
 }
 PyObject *
 _PyUnicode_EncodeCurrentLocale(PyObject *unicode, const char *errors)
 {
    return unicode_encode_locale(unicode, errors, 1);
 }
@ -3687,51 +3570,11 @@ PyUnicode_AsEncodedUnicode(PyObject *unicode,
    return NULL;
 }
 static size_t
 mbstowcs_errorpos(const char *str, size_t len)
 {
 #ifdef HAVE_MBRTOWC
    const char *start = str;
    mbstate_t mbs;
    size_t converted;
    wchar_t ch;
    memset(&mbs, 0, sizeof mbs);
    while (len)
    {
        converted = mbrtowc(&ch, str, len, &mbs);
        if (converted == 0)
            /* Reached end of string */
            break;
        if (converted == (size_t)-1 || converted == (size_t)-2) {
            /* Conversion error or incomplete character */
            return str - start;
        }
        else {
            str += converted;
            len -= converted;
        }
    }
    /* failed to find the undecodable byte sequence */
    return 0;
 #endif
    return 0;
 }
 static PyObject*
 unicode_decode_locale(const char *str, Py_ssize_t len, const char *errors,
                      int current_locale)
 {
    wchar_t smallbuf[256];
    size_t smallbuf_len = Py_ARRAY_LENGTH(smallbuf);
    wchar_t *wstr;
    size_t wlen, wlen2;
    PyObject *unicode;
    int surrogateescape;
    size_t error_pos, errlen;
    char *errmsg;
    PyObject *exc, *reason = NULL;   /* initialize to prevent gcc warning */
    if (locale_error_handler(errors, &surrogateescape) < 0)
        return NULL;
@ -3740,113 +3583,47 @@ unicode_decode_locale(const char *str, Py_ssize_t len, const char *errors,
        return NULL;
    }
-    if (surrogateescape) {
+    wchar_t *wstr;
-        /* "surrogateescape" error handler */
+    size_t wlen;
-        if (current_locale) {
+    const char *reason;
-            wstr = _Py_DecodeCurrentLocale(str, &wlen);
+    int res = _Py_DecodeLocaleEx(str, &wstr, &wlen, &reason,
                                 current_locale, surrogateescape);
    if (res != 0) {
        if (res == -2) {
            PyObject *exc;
            exc = PyObject_CallFunction(PyExc_UnicodeDecodeError, "sy#nns",
                                        "locale", str, len,
                                        (Py_ssize_t)wlen,
                                        (Py_ssize_t)(wlen + 1),
                                        reason);
            if (exc != NULL) {
                PyCodec_StrictErrors(exc);
                Py_DECREF(exc);
            }
        }
        else {
-            wstr = Py_DecodeLocale(str, &wlen);
+            PyErr_NoMemory();
        }
        if (wstr == NULL) {
            if (wlen == (size_t)-1)
                PyErr_NoMemory();
            else
                PyErr_SetFromErrno(PyExc_OSError);
            return NULL;
        }
        unicode = PyUnicode_FromWideChar(wstr, wlen);
        PyMem_RawFree(wstr);
    }
    else {
        /* strict mode */
 #ifndef HAVE_BROKEN_MBSTOWCS
        wlen = mbstowcs(NULL, str, 0);
 #else
        wlen = len;
 #endif
        if (wlen == (size_t)-1)
            goto decode_error;
        if (wlen+1 <= smallbuf_len) {
            wstr = smallbuf;
        }
        else {
            wstr = PyMem_New(wchar_t, wlen+1);
            if (!wstr)
                return PyErr_NoMemory();
        }
        wlen2 = mbstowcs(wstr, str, wlen+1);
        if (wlen2 == (size_t)-1) {
            if (wstr != smallbuf)
                PyMem_Free(wstr);
            goto decode_error;
        }
 #ifdef HAVE_BROKEN_MBSTOWCS
        assert(wlen2 == wlen);
 #endif
        unicode = PyUnicode_FromWideChar(wstr, wlen2);
        if (wstr != smallbuf)
            PyMem_Free(wstr);
    }
    return unicode;
 decode_error:
    errmsg = strerror(errno);
    assert(errmsg != NULL);
    error_pos = mbstowcs_errorpos(str, len);
    wstr = Py_DecodeLocale(errmsg, &errlen);
    if (wstr != NULL) {
        reason = PyUnicode_FromWideChar(wstr, errlen);
        PyMem_RawFree(wstr);
    }
    if (reason == NULL)
        reason = PyUnicode_FromString(
            "mbstowcs() encountered an invalid multibyte sequence");
    if (reason == NULL)
        return NULL;
    exc = PyObject_CallFunction(PyExc_UnicodeDecodeError, "sy#nnO",
                                "locale", str, len,
                                (Py_ssize_t)error_pos,
                                (Py_ssize_t)(error_pos+1),
                                reason);
    Py_DECREF(reason);
    if (exc != NULL) {
        PyCodec_StrictErrors(exc);
        Py_DECREF(exc);
    }
-    return NULL;
+
    PyObject *unicode = PyUnicode_FromWideChar(wstr, wlen);
    PyMem_RawFree(wstr);
    return unicode;
 }
 PyObject*
 PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
                              const char *errors)
 {
    return unicode_decode_locale(str, len, errors, 0);
 }
 PyObject*
 _PyUnicode_DecodeCurrentLocaleAndSize(const char *str, Py_ssize_t len,
                                      const char *errors)
 {
    return unicode_decode_locale(str, len, errors, 1);
 }
 PyObject*
 _PyUnicode_DecodeCurrentLocale(const char *str, const char *errors)
 {
    return unicode_decode_locale(str, (Py_ssize_t)strlen(str), errors, 1);
 }
 PyObject*
 PyUnicode_DecodeLocale(const char *str, const char *errors)
 {
    Py_ssize_t size = (Py_ssize_t)strlen(str);
-    return unicode_decode_locale(str, size, errors, 0);
+    return unicode_decode_locale(str, size, errors, 1);
 }
@ -3878,7 +3655,8 @@ PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
                                Py_FileSystemDefaultEncodeErrors);
    }
    else {
-        return PyUnicode_DecodeLocaleAndSize(s, size, Py_FileSystemDefaultEncodeErrors);
+        return unicode_decode_locale(s, size,
                                     Py_FileSystemDefaultEncodeErrors, 0);
    }
 #endif
 }
@ -5128,17 +4906,23 @@ onError:
 }
-/* UTF-8 decoder using the surrogateescape error handler .
+/* UTF-8 decoder: use surrogateescape error handler if 'surrogateescape' is
   non-zero, use strict error handler otherwise.
-   On success, return a pointer to a newly allocated wide character string (use
+   On success, write a pointer to a newly allocated wide character string into
-   PyMem_RawFree() to free the memory) and write the output length (in number
+   *wstr (use PyMem_RawFree() to free the memory) and write the output length
-   of wchar_t units) into *p_wlen (if p_wlen is set).
+   (in number of wchar_t units) into *wlen (if wlen is set).
-   On memory allocation failure, return -1 and write (size_t)-1 into *p_wlen
+   On memory allocation failure, return -1.
-   (if p_wlen is set). */
+
-wchar_t*
+   On decoding error (if surrogateescape is zero), return -2. If wlen is
-_Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size, size_t *p_wlen)
+   non-NULL, write the start of the illegal byte sequence into *wlen. If reason
   is not NULL, write the decoding error message into *reason. */
 int
 _Py_DecodeUTF8Ex(const char *s, Py_ssize_t size, wchar_t **wstr, size_t *wlen,
                 const char **reason, int surrogateescape)
 {
    const char *orig_s = s;
    const char *e;
    wchar_t *unicode;
    Py_ssize_t outpos;
@ -5146,18 +4930,12 @@ _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size, size_t *p_wlen)
    /* Note: size will always be longer than the resulting Unicode
       character count */
    if (PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(wchar_t) < (size + 1)) {
-        if (p_wlen) {
+        return -1;
            *p_wlen = (size_t)-1;
        }
        return NULL;
    }
    unicode = PyMem_RawMalloc((size + 1) * sizeof(wchar_t));
    if (!unicode) {
-        if (p_wlen) {
+        return -1;
            *p_wlen = (size_t)-1;
        }
        return NULL;
    }
    /* Unpack UTF-8 encoded data */
@ -5175,7 +4953,7 @@ _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size, size_t *p_wlen)
            Py_UNREACHABLE();
 #else
            assert(ch > 0xFFFF && ch <= MAX_UNICODE);
-            /*  compute and append the two surrogates: */
+            /* write a surrogate pair */
            unicode[outpos++] = (wchar_t)Py_UNICODE_HIGH_SURROGATE(ch);
            unicode[outpos++] = (wchar_t)Py_UNICODE_LOW_SURROGATE(ch);
 #endif
@ -5183,60 +4961,88 @@ _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size, size_t *p_wlen)
        else {
            if (!ch && s == e)
                break;
            if (!surrogateescape) {
                PyMem_RawFree(unicode );
                if (reason != NULL) {
                    switch (ch) {
                    case 0:
                        *reason = "unexpected end of data";
                        break;
                    case 1:
                        *reason = "invalid start byte";
                        break;
                    /* 2, 3, 4 */
                    default:
                        *reason = "invalid continuation byte";
                        break;
                    }
                }
                if (wlen != NULL) {
                    *wlen = s - orig_s;
                }
                return -2;
            }
            /* surrogateescape */
            unicode[outpos++] = 0xDC00 + (unsigned char)*s++;
        }
    }
    unicode[outpos] = L'\0';
-    if (p_wlen) {
+    if (wlen) {
-        *p_wlen = outpos;
+        *wlen = outpos;
    }
-    return unicode;
+    *wstr = unicode;
    return 0;
 }
 wchar_t*
 _Py_DecodeUTF8_surrogateescape(const char *arg, Py_ssize_t arglen)
 {
    wchar_t *wstr;
    int res = _Py_DecodeUTF8Ex(arg, arglen, &wstr, NULL, NULL, 1);
    if (res != 0) {
        return NULL;
    }
    return wstr;
 }
 /* UTF-8 encoder using the surrogateescape error handler .
-   On success, return a pointer to a newly allocated character string (use
+   On success, return 0 and write the newly allocated character string (use
-   PyMem_Free() to free the memory).
+   PyMem_Free() to free the memory) into *str.
-   On encoding failure, return NULL and write the position of the invalid
+   On encoding failure, return -2 and write the position of the invalid
-   surrogate character into *error_pos (if error_pos is set).
+   surrogate character into *error_pos (if error_pos is set) and the decoding
   error message into *reason (if reason is set).
-   On memory allocation failure, return NULL and write (size_t)-1 into
+   On memory allocation failure, return -1. */
-   *error_pos (if error_pos is set). */
+int
-char*
+_Py_EncodeUTF8Ex(const wchar_t *text, char **str, size_t *error_pos,
-_Py_EncodeUTF8_surrogateescape(const wchar_t *text, size_t *error_pos,
+                 const char **reason, int raw_malloc, int surrogateescape)
                               int raw_malloc)
 {
    const Py_ssize_t max_char_size = 4;
    Py_ssize_t len = wcslen(text);
    assert(len >= 0);
    if (len > PY_SSIZE_T_MAX / max_char_size - 1) {
        return -1;
    }
    char *bytes;
-    if (len <= PY_SSIZE_T_MAX / max_char_size - 1) {
+    if (raw_malloc) {
-        if (raw_malloc) {
+        bytes = PyMem_RawMalloc((len + 1) * max_char_size);
            bytes = PyMem_RawMalloc((len + 1) * max_char_size);
        }
        else {
            bytes = PyMem_Malloc((len + 1) * max_char_size);
        }
    }
    else {
-        bytes = NULL;
+        bytes = PyMem_Malloc((len + 1) * max_char_size);
    }
    if (bytes == NULL) {
-        if (error_pos != NULL) {
+        return -1;
            *error_pos = (size_t)-1;
        }
        return NULL;
    }
    char *p = bytes;
    Py_ssize_t i;
-    for (i = 0; i < len;) {
+    for (i = 0; i < len; i++) {
-        Py_UCS4 ch = text[i++];
+        Py_UCS4 ch = text[i];
        if (ch < 0x80) {
            /* Encode ASCII */
@ -5250,11 +5056,20 @@ _Py_EncodeUTF8_surrogateescape(const wchar_t *text, size_t *error_pos,
        }
        else if (Py_UNICODE_IS_SURROGATE(ch)) {
            /* surrogateescape error handler */
-            if (!(0xDC80 <= ch && ch <= 0xDCFF)) {
+            if (!surrogateescape || !(0xDC80 <= ch && ch <= 0xDCFF)) {
                if (error_pos != NULL) {
-                    *error_pos = (size_t)i - 1;
+                    *error_pos = (size_t)i;
                }
-                goto error;
+                if (reason != NULL) {
                    *reason = "encoding error";
                }
                if (raw_malloc) {
                    PyMem_RawFree(bytes);
                }
                else {
                    PyMem_Free(bytes);
                }
                return -2;
            }
            *p++ = (char)(ch & 0xff);
        }
@ -5286,18 +5101,16 @@ _Py_EncodeUTF8_surrogateescape(const wchar_t *text, size_t *error_pos,
        if (error_pos != NULL) {
            *error_pos = (size_t)-1;
        }
-        goto error;
+        if (raw_malloc) {
            PyMem_RawFree(bytes);
        }
        else {
            PyMem_Free(bytes);
        }
        return -1;
    }
-    return bytes2;
+    *str = bytes2;
-
+    return 0;
 error:
    if (raw_malloc) {
        PyMem_RawFree(bytes);
    }
    else {
        PyMem_Free(bytes);
    }
    return NULL;
 }
--- a/Python/fileutils.c
+++ b/Python/fileutils.c
@ -20,9 +20,6 @@ extern int winerror_to_errno(int);
 #include <fcntl.h>
 #endif /* HAVE_FCNTL_H */
 extern char* _Py_EncodeUTF8_surrogateescape(const wchar_t *text,
                                            size_t *error_pos, int raw_malloc);
 #ifdef O_CLOEXEC
 /* Does open() support the O_CLOEXEC flag? Possible values:
@ -69,7 +66,10 @@ _Py_device_encoding(int fd)
    Py_RETURN_NONE;
 }
-#if !defined(__APPLE__) && !defined(MS_WINDOWS)
+#if !defined(__APPLE__) && !defined(__ANDROID__) && !defined(MS_WINDOWS)
 #define USE_FORCE_ASCII
 extern int _Py_normalize_encoding(const char *, char *, size_t);
 /* Workaround FreeBSD and OpenIndiana locale encoding issue with the C locale.
@ -90,7 +90,7 @@ extern int _Py_normalize_encoding(const char *, char *, size_t);
       1: the workaround is used: Py_EncodeLocale() uses
          encode_ascii_surrogateescape() and Py_DecodeLocale() uses
-          decode_ascii_surrogateescape()
+          decode_ascii()
       0: the workaround is not used: Py_EncodeLocale() uses wcstombs() and
          Py_DecodeLocale() uses mbstowcs()
      -1: unknown, need to call check_force_ascii() to get the value
@ -180,16 +180,15 @@ error:
    return 1;
 }
-static char*
+static int
-encode_ascii_surrogateescape(const wchar_t *text, size_t *error_pos, int raw_malloc)
+encode_ascii(const wchar_t *text, char **str,
             size_t *error_pos, const char **reason,
             int raw_malloc, int surrogateescape)
 {
    char *result = NULL, *out;
    size_t len, i;
    wchar_t ch;
    if (error_pos != NULL)
        *error_pos = (size_t)-1;
    len = wcslen(text);
    /* +1 for NULL byte */
@ -199,8 +198,9 @@ encode_ascii_surrogateescape(const wchar_t *text, size_t *error_pos, int raw_mal
    else {
        result = PyMem_Malloc(len + 1);
    }
-    if (result == NULL)
+    if (result == NULL) {
-        return NULL;
+        return -1;
    }
    out = result;
    for (i=0; i<len; i++) {
@ -210,60 +210,84 @@ encode_ascii_surrogateescape(const wchar_t *text, size_t *error_pos, int raw_mal
            /* ASCII character */
            *out++ = (char)ch;
        }
-        else if (0xdc80 <= ch && ch <= 0xdcff) {
+        else if (surrogateescape && 0xdc80 <= ch && ch <= 0xdcff) {
            /* UTF-8b surrogate */
            *out++ = (char)(ch - 0xdc00);
        }
        else {
            if (error_pos != NULL) {
                *error_pos = i;
            }
            if (raw_malloc) {
                PyMem_RawFree(result);
            }
            else {
                PyMem_Free(result);
            }
-            return NULL;
+            if (error_pos != NULL) {
                *error_pos = i;
            }
            if (reason) {
                *reason = "encoding error";
            }
            return -2;
        }
    }
    *out = '\0';
-    return result;
+    *str = result;
    return 0;
 }
-#endif   /* !defined(__APPLE__) && !defined(MS_WINDOWS) */
+#endif   /* !defined(__APPLE__) && !defined(__ANDROID__) && !defined(MS_WINDOWS) */
-#if !defined(__APPLE__) && (!defined(MS_WINDOWS) || !defined(HAVE_MBRTOWC))
+
-static wchar_t*
+#if !defined(HAVE_MBRTOWC) || defined(USE_FORCE_ASCII)
-decode_ascii_surrogateescape(const char *arg, size_t *size)
+static int
 decode_ascii(const char *arg, wchar_t **wstr, size_t *wlen,
             const char **reason, int surrogateescape)
 {
    wchar_t *res;
    unsigned char *in;
    wchar_t *out;
    size_t argsize = strlen(arg) + 1;
-    if (argsize > PY_SSIZE_T_MAX/sizeof(wchar_t))
+    if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
-        return NULL;
+        return -1;
-    res = PyMem_RawMalloc(argsize*sizeof(wchar_t));
+    }
-    if (!res)
+    res = PyMem_RawMalloc(argsize * sizeof(wchar_t));
-        return NULL;
+    if (!res) {
        return -1;
    }
    in = (unsigned char*)arg;
    out = res;
-    while(*in)
+    for (in = (unsigned char*)arg; *in; in++) {
-        if(*in < 128)
+        unsigned char ch = *in;
-            *out++ = *in++;
+        if (ch < 128) {
-        else
+            *out++ = ch;
-            *out++ = 0xdc00 + *in++;
+        }
        else {
            if (!surrogateescape) {
                PyMem_RawFree(res);
                if (wlen) {
                    *wlen = in - (unsigned char*)arg;
                }
                if (reason) {
                    *reason = "decoding error";
                }
                return -2;
            }
            *out++ = 0xdc00 + ch;
        }
    }
    *out = 0;
    if (size != NULL)
        *size = out - res;
    return res;
 }
 #endif
-#if !defined(__APPLE__) && !defined(__ANDROID__)
+    if (wlen != NULL) {
-static wchar_t*
+        *wlen = out - res;
-decode_current_locale(const char* arg, size_t *size)
+    }
    *wstr = res;
    return 0;
 }
 #endif   /* !HAVE_MBRTOWC */
 static int
 decode_current_locale(const char* arg, wchar_t **wstr, size_t *wlen,
                      const char **reason, int surrogateescape)
 {
    wchar_t *res;
    size_t argsize;
@ -284,15 +308,15 @@ decode_current_locale(const char* arg, size_t *size)
    argsize = mbstowcs(NULL, arg, 0);
 #endif
    if (argsize != (size_t)-1) {
-        if (argsize == PY_SSIZE_T_MAX)
+        if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
-            goto oom;
+            return -1;
-        argsize += 1;
+        }
-        if (argsize > PY_SSIZE_T_MAX/sizeof(wchar_t))
+        res = (wchar_t *)PyMem_RawMalloc((argsize + 1) * sizeof(wchar_t));
-            goto oom;
+        if (!res) {
-        res = (wchar_t *)PyMem_RawMalloc(argsize*sizeof(wchar_t));
+            return -1;
-        if (!res)
+        }
-            goto oom;
+
-        count = mbstowcs(res, arg, argsize);
+        count = mbstowcs(res, arg, argsize + 1);
        if (count != (size_t)-1) {
            wchar_t *tmp;
            /* Only use the result if it contains no
@ -301,13 +325,16 @@ decode_current_locale(const char* arg, size_t *size)
                         !Py_UNICODE_IS_SURROGATE(*tmp); tmp++)
                ;
            if (*tmp == 0) {
-                if (size != NULL)
+                if (wlen != NULL) {
-                    *size = count;
+                    *wlen = count;
-                return res;
+                }
                *wstr = res;
                return 0;
            }
        }
        PyMem_RawFree(res);
    }
    /* Conversion failed. Fall back to escaping with surrogateescape. */
 #ifdef HAVE_MBRTOWC
    /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
@ -315,30 +342,37 @@ decode_current_locale(const char* arg, size_t *size)
    /* Overallocate; as multi-byte characters are in the argument, the
       actual output could use less memory. */
    argsize = strlen(arg) + 1;
-    if (argsize > PY_SSIZE_T_MAX/sizeof(wchar_t))
+    if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
-        goto oom;
+        return -1;
-    res = (wchar_t*)PyMem_RawMalloc(argsize*sizeof(wchar_t));
+    }
-    if (!res)
+    res = (wchar_t*)PyMem_RawMalloc(argsize * sizeof(wchar_t));
-        goto oom;
+    if (!res) {
        return -1;
    }
    in = (unsigned char*)arg;
    out = res;
    memset(&mbs, 0, sizeof mbs);
    while (argsize) {
        size_t converted = mbrtowc(out, (char*)in, argsize, &mbs);
-        if (converted == 0)
+        if (converted == 0) {
            /* Reached end of string; null char stored. */
            break;
        }
        if (converted == (size_t)-2) {
            /* Incomplete character. This should never happen,
               since we provide everything that we have -
               unless there is a bug in the C library, or I
               misunderstood how mbrtowc works. */
-            PyMem_RawFree(res);
+            goto decode_error;
            if (size != NULL)
                *size = (size_t)-2;
            return NULL;
        }
        if (converted == (size_t)-1) {
            if (!surrogateescape) {
                goto decode_error;
            }
            /* Conversion error. Escape as UTF-8b, and start over
               in the initial shift state. */
            *out++ = 0xdc00 + *in++;
@ -346,12 +380,18 @@ decode_current_locale(const char* arg, size_t *size)
            memset(&mbs, 0, sizeof mbs);
            continue;
        }
        if (Py_UNICODE_IS_SURROGATE(*out)) {
            if (!surrogateescape) {
                goto decode_error;
            }
            /* Surrogate character.  Escape the original
               byte sequence with surrogateescape. */
            argsize -= converted;
-            while (converted--)
+            while (converted--) {
                *out++ = 0xdc00 + *in++;
            }
            continue;
        }
        /* successfully converted some bytes */
@ -359,55 +399,80 @@ decode_current_locale(const char* arg, size_t *size)
        argsize -= converted;
        out++;
    }
-    if (size != NULL)
+    if (wlen != NULL) {
-        *size = out - res;
+        *wlen = out - res;
    }
    *wstr = res;
    return 0;
 decode_error:
    PyMem_RawFree(res);
    if (wlen) {
        *wlen = in - (unsigned char*)arg;
    }
    if (reason) {
        *reason = "decoding error";
    }
    return -2;
 #else   /* HAVE_MBRTOWC */
    /* Cannot use C locale for escaping; manually escape as if charset
       is ASCII (i.e. escape all bytes > 128. This will still roundtrip
       correctly in the locale's charset, which must be an ASCII superset. */
-    res = decode_ascii_surrogateescape(arg, size);
+    return decode_ascii(arg, wstr, wlen, reason, surrogateescape);
    if (res == NULL)
        goto oom;
 #endif   /* HAVE_MBRTOWC */
    return res;
 oom:
    if (size != NULL) {
        *size = (size_t)-1;
    }
    return NULL;
 }
 #endif
-static wchar_t*
+/* Decode a byte string from the locale encoding.
-decode_locale(const char* arg, size_t *size, int ignore_utf8_mode)
+
   Use the strict error handler if 'surrogateescape' is zero.  Use the
   surrogateescape error handler if 'surrogateescape' is non-zero: undecodable
   bytes are decoded as characters in range U+DC80..U+DCFF. If a byte sequence
   can be decoded as a surrogate character, escape the bytes using the
   surrogateescape error handler instead of decoding them.
   On sucess, return 0 and write the newly allocated wide character string into
   *wstr (use PyMem_RawFree() to free the memory). If wlen is not NULL, write
   the number of wide characters excluding the null character into *wlen.
   On memory allocation failure, return -1.
   On decoding error, return -2. If wlen is not NULL, write the start of
   invalid byte sequence in the input string into *wlen. If reason is not NULL,
   write the decoding error message into *reason.
   Use the Py_EncodeLocaleEx() function to encode the character string back to
   a byte string. */
 int
 _Py_DecodeLocaleEx(const char* arg, wchar_t **wstr, size_t *wlen,
                   const char **reason,
                   int current_locale, int surrogateescape)
 {
-#if defined(__APPLE__) || defined(__ANDROID__)
+    if (current_locale) {
-    return _Py_DecodeUTF8_surrogateescape(arg, strlen(arg), size);
+        return decode_current_locale(arg, wstr, wlen, reason, surrogateescape);
 #else
    if (!ignore_utf8_mode && Py_UTF8Mode == 1) {
        return _Py_DecodeUTF8_surrogateescape(arg, strlen(arg), size);
    }
-#ifndef MS_WINDOWS
+#if defined(__APPLE__) || defined(__ANDROID__)
-    if (force_ascii == -1)
+    return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
                            surrogateescape);
 #else
    if (Py_UTF8Mode == 1) {
        return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
                                surrogateescape);
    }
 #ifdef USE_FORCE_ASCII
    if (force_ascii == -1) {
        force_ascii = check_force_ascii();
    }
    if (force_ascii) {
        /* force ASCII encoding to workaround mbstowcs() issue */
-        wchar_t *wstr = decode_ascii_surrogateescape(arg, size);
+        return decode_ascii(arg, wstr, wlen, reason, surrogateescape);
        if (wstr == NULL) {
            if (size != NULL) {
                *size = (size_t)-1;
            }
            return NULL;
        }
        return wstr;
    }
 #endif
-    return decode_current_locale(arg, size);
+    return decode_current_locale(arg, wstr, wlen, reason, surrogateescape);
 #endif   /* __APPLE__ or __ANDROID__ */
 }
@ -432,23 +497,24 @@ decode_locale(const char* arg, size_t *size, int ignore_utf8_mode)
   Use the Py_EncodeLocale() function to encode the character string back to a
   byte string. */
 wchar_t*
-Py_DecodeLocale(const char* arg, size_t *size)
+Py_DecodeLocale(const char* arg, size_t *wlen)
 {
-    return decode_locale(arg, size, 0);
+    wchar_t *wstr;
    int res = _Py_DecodeLocaleEx(arg, &wstr, wlen, NULL, 0, 1);
    if (res != 0) {
        if (wlen != NULL) {
            *wlen = (size_t)res;
        }
        return NULL;
    }
    return wstr;
 }
-/* Similar to Py_DecodeLocale() but ignore the UTF-8 mode */
+static int
-wchar_t*
+encode_current_locale(const wchar_t *text, char **str,
-_Py_DecodeCurrentLocale(const char* arg, size_t *size)
+                      size_t *error_pos, const char **reason,
-{
+                      int raw_malloc, int surrogateescape)
    return decode_locale(arg, size, 1);
 }
 #if !defined(__APPLE__) && !defined(__ANDROID__)
 static char*
 encode_current_locale(const wchar_t *text, size_t *error_pos, int raw_malloc)
 {
    const size_t len = wcslen(text);
    char *result = NULL, *bytes = NULL;
@ -464,38 +530,37 @@ encode_current_locale(const wchar_t *text, size_t *error_pos, int raw_malloc)
        for (i=0; i < len; i++) {
            c = text[i];
            if (c >= 0xdc80 && c <= 0xdcff) {
                if (!surrogateescape) {
                    goto encode_error;
                }
                /* UTF-8b surrogate */
                if (bytes != NULL) {
                    *bytes++ = c - 0xdc00;
                    size--;
                }
-                else
+                else {
                    size++;
                }
                continue;
            }
            else {
                buf[0] = c;
-                if (bytes != NULL)
+                if (bytes != NULL) {
                    converted = wcstombs(bytes, buf, size);
-                else
+                }
                else {
                    converted = wcstombs(NULL, buf, 0);
                }
                if (converted == (size_t)-1) {
-                    if (raw_malloc) {
+                    goto encode_error;
                        PyMem_RawFree(result);
                    }
                    else {
                        PyMem_Free(result);
                    }
                    if (error_pos != NULL)
                        *error_pos = i;
                    return NULL;
                }
                if (bytes != NULL) {
                    bytes += converted;
                    size -= converted;
                }
-                else
+                else {
                    size += converted;
                }
            }
        }
        if (result != NULL) {
@ -511,38 +576,78 @@ encode_current_locale(const wchar_t *text, size_t *error_pos, int raw_malloc)
            result = PyMem_Malloc(size);
        }
        if (result == NULL) {
-            if (error_pos != NULL) {
+            return -1;
                *error_pos = (size_t)-1;
            }
            return NULL;
        }
        bytes = result;
    }
-    return result;
+    *str = result;
    return 0;
 encode_error:
    if (raw_malloc) {
        PyMem_RawFree(result);
    }
    else {
        PyMem_Free(result);
    }
    if (error_pos != NULL) {
        *error_pos = i;
    }
    if (reason) {
        *reason = "encoding error";
    }
    return -2;
 }
 static int
 encode_locale_ex(const wchar_t *text, char **str, size_t *error_pos,
                 const char **reason,
                 int raw_malloc, int current_locale, int surrogateescape)
 {
    if (current_locale) {
        return encode_current_locale(text, str, error_pos, reason,
                                     raw_malloc, surrogateescape);
    }
 #if defined(__APPLE__) || defined(__ANDROID__)
    return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
                            raw_malloc, surrogateescape);
 #else   /* __APPLE__ */
    if (Py_UTF8Mode == 1) {
        return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
                                raw_malloc, surrogateescape);
    }
 #ifdef USE_FORCE_ASCII
    if (force_ascii == -1) {
        force_ascii = check_force_ascii();
    }
    if (force_ascii) {
        return encode_ascii(text, str, error_pos, reason,
                            raw_malloc, surrogateescape);
    }
 #endif
    return encode_current_locale(text, str, error_pos, reason,
                                 raw_malloc, surrogateescape);
 #endif   /* __APPLE__ or __ANDROID__ */
 }
 static char*
 encode_locale(const wchar_t *text, size_t *error_pos,
-              int raw_malloc, int ignore_utf8_mode)
+              int raw_malloc, int current_locale)
 {
-#if defined(__APPLE__) || defined(__ANDROID__)
+    char *str;
-    return _Py_EncodeUTF8_surrogateescape(text, error_pos, raw_malloc);
+    int res = encode_locale_ex(text, &str, error_pos, NULL,
-#else   /* __APPLE__ */
+                               raw_malloc, current_locale, 1);
-    if (!ignore_utf8_mode && Py_UTF8Mode == 1) {
+    if (res != -2 && error_pos) {
-        return _Py_EncodeUTF8_surrogateescape(text, error_pos, raw_malloc);
+        *error_pos = (size_t)-1;
    }
-
+    if (res != 0) {
-#ifndef MS_WINDOWS
+        return NULL;
-    if (force_ascii == -1)
+    }
-        force_ascii = check_force_ascii();
+    return str;
    if (force_ascii)
        return encode_ascii_surrogateescape(text, error_pos, raw_malloc);
 #endif
    return encode_current_locale(text, error_pos, raw_malloc);
 #endif   /* __APPLE__ or __ANDROID__ */
 }
 /* Encode a wide character string to the locale encoding with the
@ -573,11 +678,13 @@ _Py_EncodeLocaleRaw(const wchar_t *text, size_t *error_pos)
 }
-/* Similar to _Py_EncodeLocaleRaw() but ignore the UTF-8 Mode */
+int
-char*
+_Py_EncodeLocaleEx(const wchar_t *text, char **str,
-_Py_EncodeCurrentLocale(const wchar_t *text, size_t *error_pos)
+                   size_t *error_pos, const char **reason,
                   int current_locale, int surrogateescape)
 {
-    return encode_locale(text, error_pos, 1, 1);
+    return encode_locale_ex(text, str, error_pos, reason, 1,
                            current_locale, surrogateescape);
 }
--- a/Python/pathconfig.c
+++ b/Python/pathconfig.c
@ -382,8 +382,8 @@ _Py_FindEnvConfigValue(FILE *env_file, const wchar_t *key,
            /* Comment - skip */
            continue;
        }
-        tmpbuffer = _Py_DecodeUTF8_surrogateescape(buffer, n, NULL);
+        tmpbuffer = _Py_DecodeUTF8_surrogateescape(buffer, n);
-        if (tmpbuffer != NULL) {
+        if (tmpbuffer) {
            wchar_t * state;
            wchar_t * tok = wcstok(tmpbuffer, L" \t\r\n", &state);
            if ((tok != NULL) && !wcscmp(tok, key)) {