diff --git a/Include/fileutils.h b/Include/fileutils.h index d027e18348f..2527d84669d 100644 --- a/Include/fileutils.h +++ b/Include/fileutils.h @@ -24,6 +24,14 @@ PyAPI_FUNC(wchar_t*) _Py_DecodeUTF8_surrogateescape( const char *s, Py_ssize_t size, size_t *p_wlen); + +PyAPI_FUNC(wchar_t *) _Py_DecodeCurrentLocale( + const char *arg, + size_t *size); + +PyAPI_FUNC(char*) _Py_EncodeCurrentLocale( + const wchar_t *text, + size_t *error_pos); #endif #ifndef Py_LIMITED_API diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index 0274de6733a..576e7ad8510 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -1810,6 +1810,16 @@ PyAPI_FUNC(PyObject*) PyUnicode_EncodeLocale( PyObject *unicode, const char *errors ); + +PyAPI_FUNC(PyObject*) _PyUnicode_DecodeCurrentLocaleAndSize( + const char *str, + Py_ssize_t len, + const char *errors); + +PyAPI_FUNC(PyObject*) _PyUnicode_EncodeCurrentLocale( + PyObject *unicode, + const char *errors + ); #endif /* --- File system encoding ---------------------------------------------- */ diff --git a/Lib/test/test_readline.py b/Lib/test/test_readline.py index 28ea38b747e..b4c25dee9d3 100644 --- a/Lib/test/test_readline.py +++ b/Lib/test/test_readline.py @@ -152,8 +152,6 @@ print("History length:", readline.get_current_history_length()) output = run_pty(self.auto_history_script.format(False)) self.assertIn(b"History length: 0\r\n", output) - @unittest.skipIf(True, - "FIXME: test broken by bpo-29240") def test_nonascii(self): try: readline.add_history("\xEB\xEF") diff --git a/Modules/readline.c b/Modules/readline.c index 811fca8cd92..8db4cfd0152 100644 --- a/Modules/readline.c +++ b/Modules/readline.c @@ -132,13 +132,14 @@ static PyModuleDef readlinemodule; static PyObject * encode(PyObject *b) { - return PyUnicode_EncodeLocale(b, "surrogateescape"); + return _PyUnicode_EncodeCurrentLocale(b, "surrogateescape"); } static PyObject * decode(const char *s) { - return PyUnicode_DecodeLocale(s, "surrogateescape"); + return _PyUnicode_DecodeCurrentLocaleAndSize(s, strlen(s), + "surrogateescape"); } diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 92a6ad6b979..1a230e03e63 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -3395,8 +3395,8 @@ locale_error_handler(const char *errors, int *surrogateescape) } } -PyObject * -PyUnicode_EncodeLocale(PyObject *unicode, const char *errors) +static PyObject * +unicode_encode_locale(PyObject *unicode, const char *errors, int current_locale) { Py_ssize_t wlen, wlen2; wchar_t *wstr; @@ -3423,7 +3423,12 @@ PyUnicode_EncodeLocale(PyObject *unicode, const char *errors) /* "surrogateescape" error handler */ char *str; - str = Py_EncodeLocale(wstr, &error_pos); + if (current_locale) { + str = _Py_EncodeCurrentLocale(wstr, &error_pos); + } + else { + str = Py_EncodeLocale(wstr, &error_pos); + } if (str == NULL) { if (error_pos == (size_t)-1) { PyErr_NoMemory(); @@ -3437,7 +3442,12 @@ PyUnicode_EncodeLocale(PyObject *unicode, const char *errors) PyMem_Free(wstr); bytes = PyBytes_FromString(str); - PyMem_Free(str); + if (current_locale) { + PyMem_RawFree(str); + } + else { + PyMem_Free(str); + } } else { /* strict mode */ @@ -3502,6 +3512,18 @@ encode_error: return NULL; } +PyObject * +PyUnicode_EncodeLocale(PyObject *unicode, const char *errors) +{ + return unicode_encode_locale(unicode, errors, 0); +} + +PyObject * +_PyUnicode_EncodeCurrentLocale(PyObject *unicode, const char *errors) +{ + return unicode_encode_locale(unicode, errors, 1); +} + PyObject * PyUnicode_EncodeFSDefault(PyObject *unicode) { @@ -3524,7 +3546,8 @@ PyUnicode_EncodeFSDefault(PyObject *unicode) Py_FileSystemDefaultEncodeErrors); } else { - return PyUnicode_EncodeLocale(unicode, Py_FileSystemDefaultEncodeErrors); + return unicode_encode_locale(unicode, + Py_FileSystemDefaultEncodeErrors, 0); } #endif } @@ -3695,9 +3718,9 @@ mbstowcs_errorpos(const char *str, size_t len) return 0; } -PyObject* -PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len, - const char *errors) +static PyObject* +unicode_decode_locale(const char *str, Py_ssize_t len, const char *errors, + int current_locale) { wchar_t smallbuf[256]; size_t smallbuf_len = Py_ARRAY_LENGTH(smallbuf); @@ -3719,7 +3742,12 @@ PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len, if (surrogateescape) { /* "surrogateescape" error handler */ - wstr = Py_DecodeLocale(str, &wlen); + if (current_locale) { + wstr = _Py_DecodeCurrentLocale(str, &wlen); + } + else { + wstr = Py_DecodeLocale(str, &wlen); + } if (wstr == NULL) { if (wlen == (size_t)-1) PyErr_NoMemory(); @@ -3794,11 +3822,25 @@ decode_error: return NULL; } +PyObject* +PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len, + const char *errors) +{ + return unicode_decode_locale(str, len, errors, 0); +} + +PyObject* +_PyUnicode_DecodeCurrentLocaleAndSize(const char *str, Py_ssize_t len, + const char *errors) +{ + return unicode_decode_locale(str, len, errors, 1); +} + PyObject* PyUnicode_DecodeLocale(const char *str, const char *errors) { Py_ssize_t size = (Py_ssize_t)strlen(str); - return PyUnicode_DecodeLocaleAndSize(str, size, errors); + return unicode_decode_locale(str, size, errors, 0); } diff --git a/Python/fileutils.c b/Python/fileutils.c index 645a1793664..9275494e864 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -263,7 +263,7 @@ decode_ascii_surrogateescape(const char *arg, size_t *size) #if !defined(__APPLE__) && !defined(__ANDROID__) static wchar_t* -decode_locale(const char* arg, size_t *size) +decode_current_locale(const char* arg, size_t *size) { wchar_t *res; size_t argsize; @@ -380,6 +380,38 @@ oom: #endif +static wchar_t* +decode_locale(const char* arg, size_t *size, int ignore_utf8_mode) +{ +#if defined(__APPLE__) || defined(__ANDROID__) + return _Py_DecodeUTF8_surrogateescape(arg, strlen(arg), size); +#else + if (!ignore_utf8_mode && Py_UTF8Mode == 1) { + return _Py_DecodeUTF8_surrogateescape(arg, strlen(arg), size); + } + +#ifndef MS_WINDOWS + if (force_ascii == -1) + force_ascii = check_force_ascii(); + + if (force_ascii) { + /* force ASCII encoding to workaround mbstowcs() issue */ + wchar_t *wstr = decode_ascii_surrogateescape(arg, size); + if (wstr == NULL) { + if (size != NULL) { + *size = (size_t)-1; + } + return NULL; + } + return wstr; + } +#endif + + return decode_current_locale(arg, size); +#endif /* __APPLE__ or __ANDROID__ */ +} + + /* Decode a byte string from the locale encoding with the surrogateescape error handler: undecodable bytes are decoded as characters in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate @@ -402,32 +434,15 @@ oom: wchar_t* Py_DecodeLocale(const char* arg, size_t *size) { -#if defined(__APPLE__) || defined(__ANDROID__) - return _Py_DecodeUTF8_surrogateescape(arg, strlen(arg), size); -#else - if (Py_UTF8Mode == 1) { - return _Py_DecodeUTF8_surrogateescape(arg, strlen(arg), size); - } + return decode_locale(arg, size, 0); +} -#ifndef MS_WINDOWS - if (force_ascii == -1) - force_ascii = check_force_ascii(); - if (force_ascii) { - /* force ASCII encoding to workaround mbstowcs() issue */ - wchar_t *wstr = decode_ascii_surrogateescape(arg, size); - if (wstr == NULL) { - if (size != NULL) { - *size = (size_t)-1; - } - return NULL; - } - return wstr; - } -#endif - - return decode_locale(arg, size); -#endif /* __APPLE__ or __ANDROID__ */ +/* Similar to Py_DecodeLocale() but ignore the UTF-8 mode */ +wchar_t* +_Py_DecodeCurrentLocale(const char* arg, size_t *size) +{ + return decode_locale(arg, size, 1); } @@ -508,12 +523,13 @@ encode_current_locale(const wchar_t *text, size_t *error_pos, int raw_malloc) #endif static char* -encode_locale(const wchar_t *text, size_t *error_pos, int raw_malloc) +encode_locale(const wchar_t *text, size_t *error_pos, + int raw_malloc, int ignore_utf8_mode) { #if defined(__APPLE__) || defined(__ANDROID__) return _Py_EncodeUTF8_surrogateescape(text, error_pos, raw_malloc); #else /* __APPLE__ */ - if (Py_UTF8Mode == 1) { + if (!ignore_utf8_mode && Py_UTF8Mode == 1) { return _Py_EncodeUTF8_surrogateescape(text, error_pos, raw_malloc); } @@ -544,7 +560,7 @@ encode_locale(const wchar_t *text, size_t *error_pos, int raw_malloc) char* Py_EncodeLocale(const wchar_t *text, size_t *error_pos) { - return encode_locale(text, error_pos, 0); + return encode_locale(text, error_pos, 0, 0); } @@ -553,7 +569,15 @@ Py_EncodeLocale(const wchar_t *text, size_t *error_pos) char* _Py_EncodeLocaleRaw(const wchar_t *text, size_t *error_pos) { - return encode_locale(text, error_pos, 1); + return encode_locale(text, error_pos, 1, 0); +} + + +/* Similar to _Py_EncodeLocaleRaw() but ignore the UTF-8 Mode */ +char* +_Py_EncodeCurrentLocale(const wchar_t *text, size_t *error_pos) +{ + return encode_locale(text, error_pos, 1, 1); }