bpo-29240: readline now ignores the UTF-8 Mode (#5145)

Add new fuctions ignoring the UTF-8 mode:

* _Py_DecodeCurrentLocale()
* _Py_EncodeCurrentLocale()
* _PyUnicode_DecodeCurrentLocaleAndSize()
* _PyUnicode_EncodeCurrentLocale()

Modify the readline module to use these functions.

Re-enable test_readline.test_nonascii().
This commit is contained in:
Victor Stinner 2018-01-10 22:46:15 +01:00 committed by GitHub
parent f80c0ca133
commit 2cba6b8579
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 126 additions and 43 deletions

View File

@ -24,6 +24,14 @@ PyAPI_FUNC(wchar_t*) _Py_DecodeUTF8_surrogateescape(
const char *s, const char *s,
Py_ssize_t size, Py_ssize_t size,
size_t *p_wlen); size_t *p_wlen);
PyAPI_FUNC(wchar_t *) _Py_DecodeCurrentLocale(
const char *arg,
size_t *size);
PyAPI_FUNC(char*) _Py_EncodeCurrentLocale(
const wchar_t *text,
size_t *error_pos);
#endif #endif
#ifndef Py_LIMITED_API #ifndef Py_LIMITED_API

View File

@ -1810,6 +1810,16 @@ PyAPI_FUNC(PyObject*) PyUnicode_EncodeLocale(
PyObject *unicode, PyObject *unicode,
const char *errors const char *errors
); );
PyAPI_FUNC(PyObject*) _PyUnicode_DecodeCurrentLocaleAndSize(
const char *str,
Py_ssize_t len,
const char *errors);
PyAPI_FUNC(PyObject*) _PyUnicode_EncodeCurrentLocale(
PyObject *unicode,
const char *errors
);
#endif #endif
/* --- File system encoding ---------------------------------------------- */ /* --- File system encoding ---------------------------------------------- */

View File

@ -152,8 +152,6 @@ print("History length:", readline.get_current_history_length())
output = run_pty(self.auto_history_script.format(False)) output = run_pty(self.auto_history_script.format(False))
self.assertIn(b"History length: 0\r\n", output) self.assertIn(b"History length: 0\r\n", output)
@unittest.skipIf(True,
"FIXME: test broken by bpo-29240")
def test_nonascii(self): def test_nonascii(self):
try: try:
readline.add_history("\xEB\xEF") readline.add_history("\xEB\xEF")

View File

@ -132,13 +132,14 @@ static PyModuleDef readlinemodule;
static PyObject * static PyObject *
encode(PyObject *b) encode(PyObject *b)
{ {
return PyUnicode_EncodeLocale(b, "surrogateescape"); return _PyUnicode_EncodeCurrentLocale(b, "surrogateescape");
} }
static PyObject * static PyObject *
decode(const char *s) decode(const char *s)
{ {
return PyUnicode_DecodeLocale(s, "surrogateescape"); return _PyUnicode_DecodeCurrentLocaleAndSize(s, strlen(s),
"surrogateescape");
} }

View File

@ -3395,8 +3395,8 @@ locale_error_handler(const char *errors, int *surrogateescape)
} }
} }
PyObject * static PyObject *
PyUnicode_EncodeLocale(PyObject *unicode, const char *errors) unicode_encode_locale(PyObject *unicode, const char *errors, int current_locale)
{ {
Py_ssize_t wlen, wlen2; Py_ssize_t wlen, wlen2;
wchar_t *wstr; wchar_t *wstr;
@ -3423,7 +3423,12 @@ PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
/* "surrogateescape" error handler */ /* "surrogateescape" error handler */
char *str; char *str;
if (current_locale) {
str = _Py_EncodeCurrentLocale(wstr, &error_pos);
}
else {
str = Py_EncodeLocale(wstr, &error_pos); str = Py_EncodeLocale(wstr, &error_pos);
}
if (str == NULL) { if (str == NULL) {
if (error_pos == (size_t)-1) { if (error_pos == (size_t)-1) {
PyErr_NoMemory(); PyErr_NoMemory();
@ -3437,8 +3442,13 @@ PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
PyMem_Free(wstr); PyMem_Free(wstr);
bytes = PyBytes_FromString(str); bytes = PyBytes_FromString(str);
if (current_locale) {
PyMem_RawFree(str);
}
else {
PyMem_Free(str); PyMem_Free(str);
} }
}
else { else {
/* strict mode */ /* strict mode */
size_t len, len2; size_t len, len2;
@ -3502,6 +3512,18 @@ encode_error:
return NULL; return NULL;
} }
PyObject *
PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
{
return unicode_encode_locale(unicode, errors, 0);
}
PyObject *
_PyUnicode_EncodeCurrentLocale(PyObject *unicode, const char *errors)
{
return unicode_encode_locale(unicode, errors, 1);
}
PyObject * PyObject *
PyUnicode_EncodeFSDefault(PyObject *unicode) PyUnicode_EncodeFSDefault(PyObject *unicode)
{ {
@ -3524,7 +3546,8 @@ PyUnicode_EncodeFSDefault(PyObject *unicode)
Py_FileSystemDefaultEncodeErrors); Py_FileSystemDefaultEncodeErrors);
} }
else { else {
return PyUnicode_EncodeLocale(unicode, Py_FileSystemDefaultEncodeErrors); return unicode_encode_locale(unicode,
Py_FileSystemDefaultEncodeErrors, 0);
} }
#endif #endif
} }
@ -3695,9 +3718,9 @@ mbstowcs_errorpos(const char *str, size_t len)
return 0; return 0;
} }
PyObject* static PyObject*
PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len, unicode_decode_locale(const char *str, Py_ssize_t len, const char *errors,
const char *errors) int current_locale)
{ {
wchar_t smallbuf[256]; wchar_t smallbuf[256];
size_t smallbuf_len = Py_ARRAY_LENGTH(smallbuf); size_t smallbuf_len = Py_ARRAY_LENGTH(smallbuf);
@ -3719,7 +3742,12 @@ PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
if (surrogateescape) { if (surrogateescape) {
/* "surrogateescape" error handler */ /* "surrogateescape" error handler */
if (current_locale) {
wstr = _Py_DecodeCurrentLocale(str, &wlen);
}
else {
wstr = Py_DecodeLocale(str, &wlen); wstr = Py_DecodeLocale(str, &wlen);
}
if (wstr == NULL) { if (wstr == NULL) {
if (wlen == (size_t)-1) if (wlen == (size_t)-1)
PyErr_NoMemory(); PyErr_NoMemory();
@ -3794,11 +3822,25 @@ decode_error:
return NULL; return NULL;
} }
PyObject*
PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
const char *errors)
{
return unicode_decode_locale(str, len, errors, 0);
}
PyObject*
_PyUnicode_DecodeCurrentLocaleAndSize(const char *str, Py_ssize_t len,
const char *errors)
{
return unicode_decode_locale(str, len, errors, 1);
}
PyObject* PyObject*
PyUnicode_DecodeLocale(const char *str, const char *errors) PyUnicode_DecodeLocale(const char *str, const char *errors)
{ {
Py_ssize_t size = (Py_ssize_t)strlen(str); Py_ssize_t size = (Py_ssize_t)strlen(str);
return PyUnicode_DecodeLocaleAndSize(str, size, errors); return unicode_decode_locale(str, size, errors, 0);
} }

View File

@ -263,7 +263,7 @@ decode_ascii_surrogateescape(const char *arg, size_t *size)
#if !defined(__APPLE__) && !defined(__ANDROID__) #if !defined(__APPLE__) && !defined(__ANDROID__)
static wchar_t* static wchar_t*
decode_locale(const char* arg, size_t *size) decode_current_locale(const char* arg, size_t *size)
{ {
wchar_t *res; wchar_t *res;
size_t argsize; size_t argsize;
@ -380,6 +380,38 @@ oom:
#endif #endif
static wchar_t*
decode_locale(const char* arg, size_t *size, int ignore_utf8_mode)
{
#if defined(__APPLE__) || defined(__ANDROID__)
return _Py_DecodeUTF8_surrogateescape(arg, strlen(arg), size);
#else
if (!ignore_utf8_mode && Py_UTF8Mode == 1) {
return _Py_DecodeUTF8_surrogateescape(arg, strlen(arg), size);
}
#ifndef MS_WINDOWS
if (force_ascii == -1)
force_ascii = check_force_ascii();
if (force_ascii) {
/* force ASCII encoding to workaround mbstowcs() issue */
wchar_t *wstr = decode_ascii_surrogateescape(arg, size);
if (wstr == NULL) {
if (size != NULL) {
*size = (size_t)-1;
}
return NULL;
}
return wstr;
}
#endif
return decode_current_locale(arg, size);
#endif /* __APPLE__ or __ANDROID__ */
}
/* Decode a byte string from the locale encoding with the /* Decode a byte string from the locale encoding with the
surrogateescape error handler: undecodable bytes are decoded as characters surrogateescape error handler: undecodable bytes are decoded as characters
in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate
@ -402,32 +434,15 @@ oom:
wchar_t* wchar_t*
Py_DecodeLocale(const char* arg, size_t *size) Py_DecodeLocale(const char* arg, size_t *size)
{ {
#if defined(__APPLE__) || defined(__ANDROID__) return decode_locale(arg, size, 0);
return _Py_DecodeUTF8_surrogateescape(arg, strlen(arg), size); }
#else
if (Py_UTF8Mode == 1) {
return _Py_DecodeUTF8_surrogateescape(arg, strlen(arg), size);
}
#ifndef MS_WINDOWS
if (force_ascii == -1)
force_ascii = check_force_ascii();
if (force_ascii) { /* Similar to Py_DecodeLocale() but ignore the UTF-8 mode */
/* force ASCII encoding to workaround mbstowcs() issue */ wchar_t*
wchar_t *wstr = decode_ascii_surrogateescape(arg, size); _Py_DecodeCurrentLocale(const char* arg, size_t *size)
if (wstr == NULL) { {
if (size != NULL) { return decode_locale(arg, size, 1);
*size = (size_t)-1;
}
return NULL;
}
return wstr;
}
#endif
return decode_locale(arg, size);
#endif /* __APPLE__ or __ANDROID__ */
} }
@ -508,12 +523,13 @@ encode_current_locale(const wchar_t *text, size_t *error_pos, int raw_malloc)
#endif #endif
static char* static char*
encode_locale(const wchar_t *text, size_t *error_pos, int raw_malloc) encode_locale(const wchar_t *text, size_t *error_pos,
int raw_malloc, int ignore_utf8_mode)
{ {
#if defined(__APPLE__) || defined(__ANDROID__) #if defined(__APPLE__) || defined(__ANDROID__)
return _Py_EncodeUTF8_surrogateescape(text, error_pos, raw_malloc); return _Py_EncodeUTF8_surrogateescape(text, error_pos, raw_malloc);
#else /* __APPLE__ */ #else /* __APPLE__ */
if (Py_UTF8Mode == 1) { if (!ignore_utf8_mode && Py_UTF8Mode == 1) {
return _Py_EncodeUTF8_surrogateescape(text, error_pos, raw_malloc); return _Py_EncodeUTF8_surrogateescape(text, error_pos, raw_malloc);
} }
@ -544,7 +560,7 @@ encode_locale(const wchar_t *text, size_t *error_pos, int raw_malloc)
char* char*
Py_EncodeLocale(const wchar_t *text, size_t *error_pos) Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
{ {
return encode_locale(text, error_pos, 0); return encode_locale(text, error_pos, 0, 0);
} }
@ -553,7 +569,15 @@ Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
char* char*
_Py_EncodeLocaleRaw(const wchar_t *text, size_t *error_pos) _Py_EncodeLocaleRaw(const wchar_t *text, size_t *error_pos)
{ {
return encode_locale(text, error_pos, 1); return encode_locale(text, error_pos, 1, 0);
}
/* Similar to _Py_EncodeLocaleRaw() but ignore the UTF-8 Mode */
char*
_Py_EncodeCurrentLocale(const wchar_t *text, size_t *error_pos)
{
return encode_locale(text, error_pos, 1, 1);
} }