bpo-29240: readline now ignores the UTF-8 Mode (#5145)

Add new fuctions ignoring the UTF-8 mode: * _Py_DecodeCurrentLocale() * _Py_EncodeCurrentLocale() * _PyUnicode_DecodeCurrentLocaleAndSize() * _PyUnicode_EncodeCurrentLocale() Modify the readline module to use these functions. Re-enable test_readline.test_nonascii().
2018-01-10 22:46:15 +01:00 · 2018-01-10 22:46:15 +01:00 · 2cba6b8579
parent f80c0ca133
commit 2cba6b8579
6 changed files with 126 additions and 43 deletions
--- a/Include/fileutils.h
+++ b/Include/fileutils.h
@ -24,6 +24,14 @@ PyAPI_FUNC(wchar_t*) _Py_DecodeUTF8_surrogateescape(
    const char *s,
    Py_ssize_t size,
    size_t *p_wlen);
+
+PyAPI_FUNC(wchar_t *) _Py_DecodeCurrentLocale(
+    const char *arg,
+    size_t *size);
+
+PyAPI_FUNC(char*) _Py_EncodeCurrentLocale(
+    const wchar_t *text,
+    size_t *error_pos);
 #endif

 #ifndef Py_LIMITED_API
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@ -1810,6 +1810,16 @@ PyAPI_FUNC(PyObject*) PyUnicode_EncodeLocale(
    PyObject *unicode,
    const char *errors
    );
+
+PyAPI_FUNC(PyObject*) _PyUnicode_DecodeCurrentLocaleAndSize(
+    const char *str,
+    Py_ssize_t len,
+    const char *errors);
+
+PyAPI_FUNC(PyObject*) _PyUnicode_EncodeCurrentLocale(
+    PyObject *unicode,
+    const char *errors
+    );
 #endif

 /* --- File system encoding ---------------------------------------------- */
--- a/Lib/test/test_readline.py
+++ b/Lib/test/test_readline.py
@ -152,8 +152,6 @@ print("History length:", readline.get_current_history_length())
        output = run_pty(self.auto_history_script.format(False))
        self.assertIn(b"History length: 0\r\n", output)

-    @unittest.skipIf(True,
-                     "FIXME: test broken by bpo-29240")
    def test_nonascii(self):
        try:
            readline.add_history("\xEB\xEF")
--- a/Modules/readline.c
+++ b/Modules/readline.c
@ -132,13 +132,14 @@ static PyModuleDef readlinemodule;
 static PyObject *
 encode(PyObject *b)
 {
-    return PyUnicode_EncodeLocale(b, "surrogateescape");
+    return _PyUnicode_EncodeCurrentLocale(b, "surrogateescape");
 }

 static PyObject *
 decode(const char *s)
 {
-    return PyUnicode_DecodeLocale(s, "surrogateescape");
+    return _PyUnicode_DecodeCurrentLocaleAndSize(s, strlen(s),
+                                                 "surrogateescape");
 }


--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@ -3395,8 +3395,8 @@ locale_error_handler(const char *errors, int *surrogateescape)
    }
 }

-PyObject *
-PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
+static PyObject *
+unicode_encode_locale(PyObject *unicode, const char *errors, int current_locale)
 {
    Py_ssize_t wlen, wlen2;
    wchar_t *wstr;
@ -3423,7 +3423,12 @@ PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
        /* "surrogateescape" error handler */
        char *str;

-        str = Py_EncodeLocale(wstr, &error_pos);
+        if (current_locale) {
+            str = _Py_EncodeCurrentLocale(wstr, &error_pos);
+        }
+        else {
+            str = Py_EncodeLocale(wstr, &error_pos);
+        }
        if (str == NULL) {
            if (error_pos == (size_t)-1) {
                PyErr_NoMemory();
@ -3437,7 +3442,12 @@ PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
        PyMem_Free(wstr);

        bytes = PyBytes_FromString(str);
-        PyMem_Free(str);
+        if (current_locale) {
+            PyMem_RawFree(str);
+        }
+        else {
+            PyMem_Free(str);
+        }
    }
    else {
        /* strict mode */
@ -3502,6 +3512,18 @@ encode_error:
    return NULL;
 }

+PyObject *
+PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
+{
+    return unicode_encode_locale(unicode, errors, 0);
+}
+
+PyObject *
+_PyUnicode_EncodeCurrentLocale(PyObject *unicode, const char *errors)
+{
+    return unicode_encode_locale(unicode, errors, 1);
+}
+
 PyObject *
 PyUnicode_EncodeFSDefault(PyObject *unicode)
 {
@ -3524,7 +3546,8 @@ PyUnicode_EncodeFSDefault(PyObject *unicode)
                                         Py_FileSystemDefaultEncodeErrors);
    }
    else {
-        return PyUnicode_EncodeLocale(unicode, Py_FileSystemDefaultEncodeErrors);
+        return unicode_encode_locale(unicode,
+                                     Py_FileSystemDefaultEncodeErrors, 0);
    }
 #endif
 }
@ -3695,9 +3718,9 @@ mbstowcs_errorpos(const char *str, size_t len)
    return 0;
 }

-PyObject*
-PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
-                              const char *errors)
+static PyObject*
+unicode_decode_locale(const char *str, Py_ssize_t len, const char *errors,
+                      int current_locale)
 {
    wchar_t smallbuf[256];
    size_t smallbuf_len = Py_ARRAY_LENGTH(smallbuf);
@ -3719,7 +3742,12 @@ PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,

    if (surrogateescape) {
        /* "surrogateescape" error handler */
-        wstr = Py_DecodeLocale(str, &wlen);
+        if (current_locale) {
+            wstr = _Py_DecodeCurrentLocale(str, &wlen);
+        }
+        else {
+            wstr = Py_DecodeLocale(str, &wlen);
+        }
        if (wstr == NULL) {
            if (wlen == (size_t)-1)
                PyErr_NoMemory();
@ -3794,11 +3822,25 @@ decode_error:
    return NULL;
 }

+PyObject*
+PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
+                              const char *errors)
+{
+    return unicode_decode_locale(str, len, errors, 0);
+}
+
+PyObject*
+_PyUnicode_DecodeCurrentLocaleAndSize(const char *str, Py_ssize_t len,
+                                      const char *errors)
+{
+    return unicode_decode_locale(str, len, errors, 1);
+}
+
 PyObject*
 PyUnicode_DecodeLocale(const char *str, const char *errors)
 {
    Py_ssize_t size = (Py_ssize_t)strlen(str);
-    return PyUnicode_DecodeLocaleAndSize(str, size, errors);
+    return unicode_decode_locale(str, size, errors, 0);
 }


--- a/Python/fileutils.c
+++ b/Python/fileutils.c
@ -263,7 +263,7 @@ decode_ascii_surrogateescape(const char *arg, size_t *size)

 #if !defined(__APPLE__) && !defined(__ANDROID__)
 static wchar_t*
-decode_locale(const char* arg, size_t *size)
+decode_current_locale(const char* arg, size_t *size)
 {
    wchar_t *res;
    size_t argsize;
@ -380,6 +380,38 @@ oom:
 #endif


+static wchar_t*
+decode_locale(const char* arg, size_t *size, int ignore_utf8_mode)
+{
+#if defined(__APPLE__) || defined(__ANDROID__)
+    return _Py_DecodeUTF8_surrogateescape(arg, strlen(arg), size);
+#else
+    if (!ignore_utf8_mode && Py_UTF8Mode == 1) {
+        return _Py_DecodeUTF8_surrogateescape(arg, strlen(arg), size);
+    }
+
+#ifndef MS_WINDOWS
+    if (force_ascii == -1)
+        force_ascii = check_force_ascii();
+
+    if (force_ascii) {
+        /* force ASCII encoding to workaround mbstowcs() issue */
+        wchar_t *wstr = decode_ascii_surrogateescape(arg, size);
+        if (wstr == NULL) {
+            if (size != NULL) {
+                *size = (size_t)-1;
+            }
+            return NULL;
+        }
+        return wstr;
+    }
+#endif
+
+    return decode_current_locale(arg, size);
+#endif   /* __APPLE__ or __ANDROID__ */
+}
+
+
 /* Decode a byte string from the locale encoding with the
   surrogateescape error handler: undecodable bytes are decoded as characters
   in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate
@ -402,32 +434,15 @@ oom:
 wchar_t*
 Py_DecodeLocale(const char* arg, size_t *size)
 {
-#if defined(__APPLE__) || defined(__ANDROID__)
-    return _Py_DecodeUTF8_surrogateescape(arg, strlen(arg), size);
-#else
-    if (Py_UTF8Mode == 1) {
-        return _Py_DecodeUTF8_surrogateescape(arg, strlen(arg), size);
-    }
+    return decode_locale(arg, size, 0);
+}

-#ifndef MS_WINDOWS
-    if (force_ascii == -1)
-        force_ascii = check_force_ascii();

-    if (force_ascii) {
-        /* force ASCII encoding to workaround mbstowcs() issue */
-        wchar_t *wstr = decode_ascii_surrogateescape(arg, size);
-        if (wstr == NULL) {
-            if (size != NULL) {
-                *size = (size_t)-1;
-            }
-            return NULL;
-        }
-        return wstr;
-    }
-#endif
-
-    return decode_locale(arg, size);
-#endif   /* __APPLE__ or __ANDROID__ */
+/* Similar to Py_DecodeLocale() but ignore the UTF-8 mode */
+wchar_t*
+_Py_DecodeCurrentLocale(const char* arg, size_t *size)
+{
+    return decode_locale(arg, size, 1);
 }


@ -508,12 +523,13 @@ encode_current_locale(const wchar_t *text, size_t *error_pos, int raw_malloc)
 #endif

 static char*
-encode_locale(const wchar_t *text, size_t *error_pos, int raw_malloc)
+encode_locale(const wchar_t *text, size_t *error_pos,
+              int raw_malloc, int ignore_utf8_mode)
 {
 #if defined(__APPLE__) || defined(__ANDROID__)
    return _Py_EncodeUTF8_surrogateescape(text, error_pos, raw_malloc);
 #else   /* __APPLE__ */
-    if (Py_UTF8Mode == 1) {
+    if (!ignore_utf8_mode && Py_UTF8Mode == 1) {
        return _Py_EncodeUTF8_surrogateescape(text, error_pos, raw_malloc);
    }

@ -544,7 +560,7 @@ encode_locale(const wchar_t *text, size_t *error_pos, int raw_malloc)
 char*
 Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
 {
-    return encode_locale(text, error_pos, 0);
+    return encode_locale(text, error_pos, 0, 0);
 }


@ -553,7 +569,15 @@ Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
 char*
 _Py_EncodeLocaleRaw(const wchar_t *text, size_t *error_pos)
 {
-    return encode_locale(text, error_pos, 1);
+    return encode_locale(text, error_pos, 1, 0);
+}
+
+
+/* Similar to _Py_EncodeLocaleRaw() but ignore the UTF-8 Mode */
+char*
+_Py_EncodeCurrentLocale(const wchar_t *text, size_t *error_pos)
+{
+    return encode_locale(text, error_pos, 1, 1);
 }