[3.6] bpo-32555: Fix locale encodings (#5193)
On FreeBSD and Solaris, os.strerror() now always decode the byte string from the current locale encoding, rather than using ASCII/surrogateescape in some cases. Changes: * Add _Py_DecodeLocaleEx() and _Py_EncodeLocaleEx() which has an additional current_locale parameter. * PyUnicode_DecodeLocale(), PyUnicode_DecodeLocaleAndSize() and * PyUnicode_EncodeLocale() now always use the current locale * encoding, instead of using Py_DecodeLocale()/Py_EncodeLocale(). * Document encoding in Py_DecodeLocale() and Py_EncodeLocale() documentations. * Add USE_FORCE_ASCII define to not define decode_ascii_surrogateescape() on Android.
This commit is contained in:
parent
5f959c4f9e
commit
b92c159efa
|
@ -66,9 +66,18 @@ Operating System Utilities
|
||||||
surrogate character, escape the bytes using the surrogateescape error
|
surrogate character, escape the bytes using the surrogateescape error
|
||||||
handler instead of decoding them.
|
handler instead of decoding them.
|
||||||
|
|
||||||
|
Encoding, highest priority to lowest priority:
|
||||||
|
|
||||||
|
* ``UTF-8`` on macOS and Android;
|
||||||
|
* ``ASCII`` if the ``LC_CTYPE`` locale is ``"C"``,
|
||||||
|
``nl_langinfo(CODESET)`` returns the ``ASCII`` encoding (or an alias),
|
||||||
|
and :c:func:`mbstowcs` and :c:func:`wcstombs` functions use the
|
||||||
|
``ISO-8859-1`` encoding.
|
||||||
|
* the current locale encoding (``LC_CTYPE`` locale).
|
||||||
|
|
||||||
Return a pointer to a newly allocated wide character string, use
|
Return a pointer to a newly allocated wide character string, use
|
||||||
:c:func:`PyMem_RawFree` to free the memory. If size is not ``NULL``, write
|
:c:func:`PyMem_RawFree` to free the memory. If size is not ``NULL``, write
|
||||||
the number of wide characters excluding the null character into ``*size``
|
the number of wide characters excluding the null character into ``*size``.
|
||||||
|
|
||||||
Return ``NULL`` on decoding error or memory allocation error. If *size* is
|
Return ``NULL`` on decoding error or memory allocation error. If *size* is
|
||||||
not ``NULL``, ``*size`` is set to ``(size_t)-1`` on memory error or set to
|
not ``NULL``, ``*size`` is set to ``(size_t)-1`` on memory error or set to
|
||||||
|
@ -94,6 +103,15 @@ Operating System Utilities
|
||||||
:ref:`surrogateescape error handler <surrogateescape>`: surrogate characters
|
:ref:`surrogateescape error handler <surrogateescape>`: surrogate characters
|
||||||
in the range U+DC80..U+DCFF are converted to bytes 0x80..0xFF.
|
in the range U+DC80..U+DCFF are converted to bytes 0x80..0xFF.
|
||||||
|
|
||||||
|
Encoding, highest priority to lowest priority:
|
||||||
|
|
||||||
|
* ``UTF-8`` on macOS and Android;
|
||||||
|
* ``ASCII`` if the ``LC_CTYPE`` locale is ``"C"``,
|
||||||
|
``nl_langinfo(CODESET)`` returns the ``ASCII`` encoding (or an alias),
|
||||||
|
and :c:func:`mbstowcs` and :c:func:`wcstombs` functions uses the
|
||||||
|
``ISO-8859-1`` encoding.
|
||||||
|
* the current locale encoding.
|
||||||
|
|
||||||
Return a pointer to a newly allocated byte string, use :c:func:`PyMem_Free`
|
Return a pointer to a newly allocated byte string, use :c:func:`PyMem_Free`
|
||||||
to free the memory. Return ``NULL`` on encoding error or memory allocation
|
to free the memory. Return ``NULL`` on encoding error or memory allocation
|
||||||
error
|
error
|
||||||
|
|
|
@ -773,6 +773,12 @@ system.
|
||||||
|
|
||||||
.. versionadded:: 3.3
|
.. versionadded:: 3.3
|
||||||
|
|
||||||
|
.. versionchanged:: 3.6.5
|
||||||
|
The function now also uses the current locale encoding for the
|
||||||
|
``surrogateescape`` error handler. Previously, :c:func:`Py_DecodeLocale`
|
||||||
|
was used for the ``surrogateescape``, and the current locale encoding was
|
||||||
|
used for ``strict``.
|
||||||
|
|
||||||
|
|
||||||
.. c:function:: PyObject* PyUnicode_DecodeLocale(const char *str, const char *errors)
|
.. c:function:: PyObject* PyUnicode_DecodeLocale(const char *str, const char *errors)
|
||||||
|
|
||||||
|
@ -800,6 +806,12 @@ system.
|
||||||
|
|
||||||
.. versionadded:: 3.3
|
.. versionadded:: 3.3
|
||||||
|
|
||||||
|
.. versionchanged:: 3.6.5
|
||||||
|
The function now also uses the current locale encoding for the
|
||||||
|
``surrogateescape`` error handler. Previously, :c:func:`Py_EncodeLocale`
|
||||||
|
was used for the ``surrogateescape``, and the current locale encoding was
|
||||||
|
used for ``strict``.
|
||||||
|
|
||||||
|
|
||||||
File System Encoding
|
File System Encoding
|
||||||
""""""""""""""""""""
|
""""""""""""""""""""
|
||||||
|
|
|
@ -17,6 +17,16 @@ PyAPI_FUNC(char*) Py_EncodeLocale(
|
||||||
|
|
||||||
#ifndef Py_LIMITED_API
|
#ifndef Py_LIMITED_API
|
||||||
|
|
||||||
|
PyAPI_FUNC(wchar_t *) _Py_DecodeLocaleEx(
|
||||||
|
const char *arg,
|
||||||
|
size_t *size,
|
||||||
|
int current_locale);
|
||||||
|
|
||||||
|
PyAPI_FUNC(char*) _Py_EncodeLocaleEx(
|
||||||
|
const wchar_t *text,
|
||||||
|
size_t *error_pos,
|
||||||
|
int current_locale);
|
||||||
|
|
||||||
PyAPI_FUNC(PyObject *) _Py_device_encoding(int);
|
PyAPI_FUNC(PyObject *) _Py_device_encoding(int);
|
||||||
|
|
||||||
#ifdef MS_WINDOWS
|
#ifdef MS_WINDOWS
|
||||||
|
|
|
@ -0,0 +1,3 @@
|
||||||
|
On FreeBSD and Solaris, os.strerror() now always decode the byte string from
|
||||||
|
the current locale encoding, rather than using ASCII/surrogateescape in some
|
||||||
|
cases.
|
|
@ -3439,8 +3439,9 @@ locale_error_handler(const char *errors, int *surrogateescape)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
PyObject *
|
static PyObject *
|
||||||
PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
|
unicode_encode_locale(PyObject *unicode, const char *errors,
|
||||||
|
int current_locale)
|
||||||
{
|
{
|
||||||
Py_ssize_t wlen, wlen2;
|
Py_ssize_t wlen, wlen2;
|
||||||
wchar_t *wstr;
|
wchar_t *wstr;
|
||||||
|
@ -3469,7 +3470,7 @@ PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
|
||||||
/* "surrogateescape" error handler */
|
/* "surrogateescape" error handler */
|
||||||
char *str;
|
char *str;
|
||||||
|
|
||||||
str = Py_EncodeLocale(wstr, &error_pos);
|
str = _Py_EncodeLocaleEx(wstr, &error_pos, current_locale);
|
||||||
if (str == NULL) {
|
if (str == NULL) {
|
||||||
if (error_pos == (size_t)-1) {
|
if (error_pos == (size_t)-1) {
|
||||||
PyErr_NoMemory();
|
PyErr_NoMemory();
|
||||||
|
@ -3549,6 +3550,12 @@ encode_error:
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PyObject *
|
||||||
|
PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
|
||||||
|
{
|
||||||
|
return unicode_encode_locale(unicode, errors, 1);
|
||||||
|
}
|
||||||
|
|
||||||
PyObject *
|
PyObject *
|
||||||
PyUnicode_EncodeFSDefault(PyObject *unicode)
|
PyUnicode_EncodeFSDefault(PyObject *unicode)
|
||||||
{
|
{
|
||||||
|
@ -3571,7 +3578,8 @@ PyUnicode_EncodeFSDefault(PyObject *unicode)
|
||||||
Py_FileSystemDefaultEncodeErrors);
|
Py_FileSystemDefaultEncodeErrors);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
return PyUnicode_EncodeLocale(unicode, Py_FileSystemDefaultEncodeErrors);
|
return unicode_encode_locale(unicode,
|
||||||
|
Py_FileSystemDefaultEncodeErrors, 0);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -3741,9 +3749,9 @@ mbstowcs_errorpos(const char *str, size_t len)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
PyObject*
|
static PyObject*
|
||||||
PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
|
unicode_decode_locale(const char *str, Py_ssize_t len,
|
||||||
const char *errors)
|
const char *errors, int current_locale)
|
||||||
{
|
{
|
||||||
wchar_t smallbuf[256];
|
wchar_t smallbuf[256];
|
||||||
size_t smallbuf_len = Py_ARRAY_LENGTH(smallbuf);
|
size_t smallbuf_len = Py_ARRAY_LENGTH(smallbuf);
|
||||||
|
@ -3766,7 +3774,7 @@ PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
|
||||||
|
|
||||||
if (surrogateescape) {
|
if (surrogateescape) {
|
||||||
/* "surrogateescape" error handler */
|
/* "surrogateescape" error handler */
|
||||||
wstr = Py_DecodeLocale(str, &wlen);
|
wstr = _Py_DecodeLocaleEx(str, &wlen, current_locale);
|
||||||
if (wstr == NULL) {
|
if (wstr == NULL) {
|
||||||
if (wlen == (size_t)-1)
|
if (wlen == (size_t)-1)
|
||||||
PyErr_NoMemory();
|
PyErr_NoMemory();
|
||||||
|
@ -3844,11 +3852,18 @@ decode_error:
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PyObject*
|
||||||
|
PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t size,
|
||||||
|
const char *errors)
|
||||||
|
{
|
||||||
|
return unicode_decode_locale(str, size, errors, 1);
|
||||||
|
}
|
||||||
|
|
||||||
PyObject*
|
PyObject*
|
||||||
PyUnicode_DecodeLocale(const char *str, const char *errors)
|
PyUnicode_DecodeLocale(const char *str, const char *errors)
|
||||||
{
|
{
|
||||||
Py_ssize_t size = (Py_ssize_t)strlen(str);
|
Py_ssize_t size = (Py_ssize_t)strlen(str);
|
||||||
return PyUnicode_DecodeLocaleAndSize(str, size, errors);
|
return unicode_decode_locale(str, size, errors, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -3880,7 +3895,8 @@ PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
|
||||||
Py_FileSystemDefaultEncodeErrors);
|
Py_FileSystemDefaultEncodeErrors);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
return PyUnicode_DecodeLocaleAndSize(s, size, Py_FileSystemDefaultEncodeErrors);
|
return unicode_decode_locale(s, size,
|
||||||
|
Py_FileSystemDefaultEncodeErrors, 0);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
|
@ -70,7 +70,10 @@ _Py_device_encoding(int fd)
|
||||||
Py_RETURN_NONE;
|
Py_RETURN_NONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if !defined(__APPLE__) && !defined(MS_WINDOWS)
|
#if !defined(__APPLE__) && !defined(__ANDROID__) && !defined(MS_WINDOWS)
|
||||||
|
|
||||||
|
#define USE_FORCE_ASCII
|
||||||
|
|
||||||
extern int _Py_normalize_encoding(const char *, char *, size_t);
|
extern int _Py_normalize_encoding(const char *, char *, size_t);
|
||||||
|
|
||||||
/* Workaround FreeBSD and OpenIndiana locale encoding issue with the C locale.
|
/* Workaround FreeBSD and OpenIndiana locale encoding issue with the C locale.
|
||||||
|
@ -221,7 +224,7 @@ encode_ascii_surrogateescape(const wchar_t *text, size_t *error_pos)
|
||||||
}
|
}
|
||||||
#endif /* !defined(__APPLE__) && !defined(MS_WINDOWS) */
|
#endif /* !defined(__APPLE__) && !defined(MS_WINDOWS) */
|
||||||
|
|
||||||
#if !defined(__APPLE__) && (!defined(MS_WINDOWS) || !defined(HAVE_MBRTOWC))
|
#if !defined(HAVE_MBRTOWC) || defined(USE_FORCE_ASCII)
|
||||||
static wchar_t*
|
static wchar_t*
|
||||||
decode_ascii_surrogateescape(const char *arg, size_t *size)
|
decode_ascii_surrogateescape(const char *arg, size_t *size)
|
||||||
{
|
{
|
||||||
|
@ -251,39 +254,9 @@ decode_ascii_surrogateescape(const char *arg, size_t *size)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
/* Decode a byte string from the locale encoding with the
|
static wchar_t*
|
||||||
surrogateescape error handler: undecodable bytes are decoded as characters
|
decode_current_locale(const char* arg, size_t *size)
|
||||||
in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate
|
|
||||||
character, escape the bytes using the surrogateescape error handler instead
|
|
||||||
of decoding them.
|
|
||||||
|
|
||||||
Return a pointer to a newly allocated wide character string, use
|
|
||||||
PyMem_RawFree() to free the memory. If size is not NULL, write the number of
|
|
||||||
wide characters excluding the null character into *size
|
|
||||||
|
|
||||||
Return NULL on decoding error or memory allocation error. If *size* is not
|
|
||||||
NULL, *size is set to (size_t)-1 on memory error or set to (size_t)-2 on
|
|
||||||
decoding error.
|
|
||||||
|
|
||||||
Decoding errors should never happen, unless there is a bug in the C
|
|
||||||
library.
|
|
||||||
|
|
||||||
Use the Py_EncodeLocale() function to encode the character string back to a
|
|
||||||
byte string. */
|
|
||||||
wchar_t*
|
|
||||||
Py_DecodeLocale(const char* arg, size_t *size)
|
|
||||||
{
|
{
|
||||||
#if defined(__APPLE__) || defined(__ANDROID__)
|
|
||||||
wchar_t *wstr;
|
|
||||||
wstr = _Py_DecodeUTF8_surrogateescape(arg, strlen(arg));
|
|
||||||
if (size != NULL) {
|
|
||||||
if (wstr != NULL)
|
|
||||||
*size = wcslen(wstr);
|
|
||||||
else
|
|
||||||
*size = (size_t)-1;
|
|
||||||
}
|
|
||||||
return wstr;
|
|
||||||
#else
|
|
||||||
wchar_t *res;
|
wchar_t *res;
|
||||||
size_t argsize;
|
size_t argsize;
|
||||||
size_t count;
|
size_t count;
|
||||||
|
@ -293,19 +266,6 @@ Py_DecodeLocale(const char* arg, size_t *size)
|
||||||
mbstate_t mbs;
|
mbstate_t mbs;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef MS_WINDOWS
|
|
||||||
if (force_ascii == -1)
|
|
||||||
force_ascii = check_force_ascii();
|
|
||||||
|
|
||||||
if (force_ascii) {
|
|
||||||
/* force ASCII encoding to workaround mbstowcs() issue */
|
|
||||||
res = decode_ascii_surrogateescape(arg, size);
|
|
||||||
if (res == NULL)
|
|
||||||
goto oom;
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef HAVE_BROKEN_MBSTOWCS
|
#ifdef HAVE_BROKEN_MBSTOWCS
|
||||||
/* Some platforms have a broken implementation of
|
/* Some platforms have a broken implementation of
|
||||||
* mbstowcs which does not count the characters that
|
* mbstowcs which does not count the characters that
|
||||||
|
@ -402,72 +362,96 @@ Py_DecodeLocale(const char* arg, size_t *size)
|
||||||
goto oom;
|
goto oom;
|
||||||
#endif /* HAVE_MBRTOWC */
|
#endif /* HAVE_MBRTOWC */
|
||||||
return res;
|
return res;
|
||||||
|
|
||||||
oom:
|
oom:
|
||||||
if (size != NULL)
|
if (size != NULL)
|
||||||
*size = (size_t)-1;
|
*size = (size_t)-1;
|
||||||
return NULL;
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static wchar_t*
|
||||||
|
decode_locale(const char* arg, size_t *size, int current_locale)
|
||||||
|
{
|
||||||
|
if (current_locale) {
|
||||||
|
return decode_current_locale(arg, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined(__APPLE__) || defined(__ANDROID__)
|
||||||
|
wchar_t *wstr;
|
||||||
|
wstr = _Py_DecodeUTF8_surrogateescape(arg, strlen(arg));
|
||||||
|
if (size != NULL) {
|
||||||
|
if (wstr != NULL)
|
||||||
|
*size = wcslen(wstr);
|
||||||
|
else
|
||||||
|
*size = (size_t)-1;
|
||||||
|
}
|
||||||
|
return wstr;
|
||||||
|
#else
|
||||||
|
|
||||||
|
#ifdef USE_FORCE_ASCII
|
||||||
|
if (force_ascii == -1) {
|
||||||
|
force_ascii = check_force_ascii();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (force_ascii) {
|
||||||
|
/* force ASCII encoding to workaround mbstowcs() issue */
|
||||||
|
wchar_t *res = decode_ascii_surrogateescape(arg, size);
|
||||||
|
if (res == NULL) {
|
||||||
|
if (size != NULL)
|
||||||
|
*size = (size_t)-1;
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return decode_current_locale(arg, size);
|
||||||
#endif /* __APPLE__ or __ANDROID__ */
|
#endif /* __APPLE__ or __ANDROID__ */
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Encode a wide character string to the locale encoding with the
|
|
||||||
surrogateescape error handler: surrogate characters in the range
|
|
||||||
U+DC80..U+DCFF are converted to bytes 0x80..0xFF.
|
|
||||||
|
|
||||||
Return a pointer to a newly allocated byte string, use PyMem_Free() to free
|
/* Decode a byte string from the locale encoding with the
|
||||||
the memory. Return NULL on encoding or memory allocation error.
|
surrogateescape error handler: undecodable bytes are decoded as characters
|
||||||
|
in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate
|
||||||
|
character, escape the bytes using the surrogateescape error handler instead
|
||||||
|
of decoding them.
|
||||||
|
|
||||||
If error_pos is not NULL, *error_pos is set to the index of the invalid
|
Return a pointer to a newly allocated wide character string, use
|
||||||
character on encoding error, or set to (size_t)-1 otherwise.
|
PyMem_RawFree() to free the memory. If size is not NULL, write the number of
|
||||||
|
wide characters excluding the null character into *size
|
||||||
|
|
||||||
Use the Py_DecodeLocale() function to decode the bytes string back to a wide
|
Return NULL on decoding error or memory allocation error. If *size* is not
|
||||||
character string. */
|
NULL, *size is set to (size_t)-1 on memory error or set to (size_t)-2 on
|
||||||
char*
|
decoding error.
|
||||||
Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
|
|
||||||
|
Decoding errors should never happen, unless there is a bug in the C
|
||||||
|
library.
|
||||||
|
|
||||||
|
Use the Py_EncodeLocale() function to encode the character string back to a
|
||||||
|
byte string. */
|
||||||
|
wchar_t*
|
||||||
|
Py_DecodeLocale(const char* arg, size_t *size)
|
||||||
{
|
{
|
||||||
#if defined(__APPLE__) || defined(__ANDROID__)
|
return decode_locale(arg, size, 0);
|
||||||
Py_ssize_t len;
|
}
|
||||||
PyObject *unicode, *bytes = NULL;
|
|
||||||
char *cpath;
|
|
||||||
|
|
||||||
unicode = PyUnicode_FromWideChar(text, wcslen(text));
|
|
||||||
if (unicode == NULL)
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
bytes = _PyUnicode_AsUTF8String(unicode, "surrogateescape");
|
wchar_t*
|
||||||
Py_DECREF(unicode);
|
_Py_DecodeLocaleEx(const char* arg, size_t *size, int current_locale)
|
||||||
if (bytes == NULL) {
|
{
|
||||||
PyErr_Clear();
|
return decode_locale(arg, size, current_locale);
|
||||||
if (error_pos != NULL)
|
}
|
||||||
*error_pos = (size_t)-1;
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
len = PyBytes_GET_SIZE(bytes);
|
|
||||||
cpath = PyMem_Malloc(len+1);
|
static char*
|
||||||
if (cpath == NULL) {
|
encode_current_locale(const wchar_t *text, size_t *error_pos)
|
||||||
PyErr_Clear();
|
{
|
||||||
Py_DECREF(bytes);
|
|
||||||
if (error_pos != NULL)
|
|
||||||
*error_pos = (size_t)-1;
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
memcpy(cpath, PyBytes_AsString(bytes), len + 1);
|
|
||||||
Py_DECREF(bytes);
|
|
||||||
return cpath;
|
|
||||||
#else /* __APPLE__ */
|
|
||||||
const size_t len = wcslen(text);
|
const size_t len = wcslen(text);
|
||||||
char *result = NULL, *bytes = NULL;
|
char *result = NULL, *bytes = NULL;
|
||||||
size_t i, size, converted;
|
size_t i, size, converted;
|
||||||
wchar_t c, buf[2];
|
wchar_t c, buf[2];
|
||||||
|
|
||||||
#ifndef MS_WINDOWS
|
|
||||||
if (force_ascii == -1)
|
|
||||||
force_ascii = check_force_ascii();
|
|
||||||
|
|
||||||
if (force_ascii)
|
|
||||||
return encode_ascii_surrogateescape(text, error_pos);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* The function works in two steps:
|
/* The function works in two steps:
|
||||||
1. compute the length of the output buffer in bytes (size)
|
1. compute the length of the output buffer in bytes (size)
|
||||||
2. outputs the bytes */
|
2. outputs the bytes */
|
||||||
|
@ -522,10 +506,89 @@ Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
|
||||||
bytes = result;
|
bytes = result;
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static char*
|
||||||
|
encode_locale(const wchar_t *text, size_t *error_pos, int current_locale)
|
||||||
|
{
|
||||||
|
if (current_locale) {
|
||||||
|
return encode_current_locale(text, error_pos);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined(__APPLE__) || defined(__ANDROID__)
|
||||||
|
Py_ssize_t len;
|
||||||
|
PyObject *unicode, *bytes = NULL;
|
||||||
|
char *cpath;
|
||||||
|
|
||||||
|
unicode = PyUnicode_FromWideChar(text, wcslen(text));
|
||||||
|
if (unicode == NULL)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
bytes = _PyUnicode_AsUTF8String(unicode, "surrogateescape");
|
||||||
|
Py_DECREF(unicode);
|
||||||
|
if (bytes == NULL) {
|
||||||
|
PyErr_Clear();
|
||||||
|
if (error_pos != NULL)
|
||||||
|
*error_pos = (size_t)-1;
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
len = PyBytes_GET_SIZE(bytes);
|
||||||
|
cpath = PyMem_Malloc(len+1);
|
||||||
|
if (cpath == NULL) {
|
||||||
|
PyErr_Clear();
|
||||||
|
Py_DECREF(bytes);
|
||||||
|
if (error_pos != NULL)
|
||||||
|
*error_pos = (size_t)-1;
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
memcpy(cpath, PyBytes_AsString(bytes), len + 1);
|
||||||
|
Py_DECREF(bytes);
|
||||||
|
return cpath;
|
||||||
|
#else /* __APPLE__ */
|
||||||
|
|
||||||
|
#ifdef USE_FORCE_ASCII
|
||||||
|
if (force_ascii == -1) {
|
||||||
|
force_ascii = check_force_ascii();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (force_ascii) {
|
||||||
|
return encode_ascii_surrogateescape(text, error_pos);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return encode_current_locale(text, error_pos);
|
||||||
#endif /* __APPLE__ or __ANDROID__ */
|
#endif /* __APPLE__ or __ANDROID__ */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Encode a wide character string to the locale encoding with the
|
||||||
|
surrogateescape error handler: surrogate characters in the range
|
||||||
|
U+DC80..U+DCFF are converted to bytes 0x80..0xFF.
|
||||||
|
|
||||||
|
Return a pointer to a newly allocated byte string, use PyMem_Free() to free
|
||||||
|
the memory. Return NULL on encoding or memory allocation error.
|
||||||
|
|
||||||
|
If error_pos is not NULL, *error_pos is set to the index of the invalid
|
||||||
|
character on encoding error, or set to (size_t)-1 otherwise.
|
||||||
|
|
||||||
|
Use the Py_DecodeLocale() function to decode the bytes string back to a wide
|
||||||
|
character string. */
|
||||||
|
char*
|
||||||
|
Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
|
||||||
|
{
|
||||||
|
return encode_locale(text, error_pos, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
char*
|
||||||
|
_Py_EncodeLocaleEx(const wchar_t *text, size_t *error_pos, int current_locale)
|
||||||
|
{
|
||||||
|
return encode_locale(text, error_pos, current_locale);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
#ifdef MS_WINDOWS
|
#ifdef MS_WINDOWS
|
||||||
static __int64 secs_between_epochs = 11644473600; /* Seconds between 1.1.1601 and 1.1.1970 */
|
static __int64 secs_between_epochs = 11644473600; /* Seconds between 1.1.1601 and 1.1.1970 */
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue