diff --git a/Include/pyport.h b/Include/pyport.h index 568ab8f757d..97fb5e59f9e 100644 --- a/Include/pyport.h +++ b/Include/pyport.h @@ -819,4 +819,14 @@ extern _invalid_parameter_handler _Py_silent_invalid_parameter_handler; # error "Py_TRACE_REFS ABI is not compatible with release and debug ABI" #endif +#if defined(__ANDROID__) || defined(__VXWORKS__) + /* Ignore the locale encoding: force UTF-8 */ +# define _Py_FORCE_UTF8_LOCALE +#endif + +#if defined(_Py_FORCE_UTF8_LOCALE) || defined(__APPLE__) + /* Use UTF-8 as filesystem encoding */ +# define _Py_FORCE_UTF8_FS_ENCODING +#endif + #endif /* Py_PYPORT_H */ diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index f6e68c94df5..9991362a333 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -3506,7 +3506,7 @@ PyUnicode_EncodeFSDefault(PyObject *unicode) { PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE(); const _PyCoreConfig *config = &interp->core_config; -#if defined(__APPLE__) +#ifdef _Py_FORCE_UTF8_FS_ENCODING return _PyUnicode_AsUTF8String(unicode, config->filesystem_errors); #else /* Bootstrap check: if the filesystem codec is implemented in Python, we @@ -3730,7 +3730,7 @@ PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size) { PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE(); const _PyCoreConfig *config = &interp->core_config; -#if defined(__APPLE__) +#ifdef _Py_FORCE_UTF8_FS_ENCODING return PyUnicode_DecodeUTF8Stateful(s, size, config->filesystem_errors, NULL); #else /* Bootstrap check: if the filesystem codec is implemented in Python, we diff --git a/Python/coreconfig.c b/Python/coreconfig.c index 1cb4b52e600..c40c1f859ec 100644 --- a/Python/coreconfig.c +++ b/Python/coreconfig.c @@ -1313,7 +1313,7 @@ config_get_locale_encoding(char **locale_encoding) #ifdef MS_WINDOWS char encoding[20]; PyOS_snprintf(encoding, sizeof(encoding), "cp%u", GetACP()); -#elif defined(__ANDROID__) || defined(__VXWORKS__) +#elif defined(_Py_FORCE_UTF8_LOCALE) const char *encoding = "UTF-8"; #else const char *encoding = nl_langinfo(CODESET); @@ -1450,66 +1450,40 @@ config_init_fs_encoding(_PyCoreConfig *config, const _PyPreConfig *preconfig) { _PyInitError err; -#ifdef MS_WINDOWS - if (preconfig->legacy_windows_fs_encoding) { - /* Legacy Windows filesystem encoding: mbcs/replace */ - if (config->filesystem_encoding == NULL) { - err = _PyCoreConfig_SetString(&config->filesystem_encoding, - "mbcs"); - if (_Py_INIT_FAILED(err)) { - return err; - } - } - if (config->filesystem_errors == NULL) { - err = _PyCoreConfig_SetString(&config->filesystem_errors, - "replace"); - if (_Py_INIT_FAILED(err)) { - return err; - } - } - } - - /* Windows defaults to utf-8/surrogatepass (PEP 529). - - Note: UTF-8 Mode takes the same code path and the Legacy Windows FS - encoding has the priortiy over UTF-8 Mode. */ if (config->filesystem_encoding == NULL) { +#ifdef _Py_FORCE_UTF8_FS_ENCODING err = _PyCoreConfig_SetString(&config->filesystem_encoding, "utf-8"); - if (_Py_INIT_FAILED(err)) { - return err; - } - } - - if (config->filesystem_errors == NULL) { - err = _PyCoreConfig_SetString(&config->filesystem_errors, - "surrogatepass"); - if (_Py_INIT_FAILED(err)) { - return err; - } - } #else - if (config->filesystem_encoding == NULL) { + +#ifdef MS_WINDOWS + if (preconfig->legacy_windows_fs_encoding) { + /* Legacy Windows filesystem encoding: mbcs/replace */ + err = _PyCoreConfig_SetString(&config->filesystem_encoding, + "mbcs"); + } + else +#endif if (preconfig->utf8_mode) { - /* UTF-8 Mode use: utf-8/surrogateescape */ err = _PyCoreConfig_SetString(&config->filesystem_encoding, "utf-8"); - /* errors defaults to surrogateescape above */ } +#ifndef MS_WINDOWS else if (_Py_GetForceASCII()) { err = _PyCoreConfig_SetString(&config->filesystem_encoding, "ascii"); } +#endif else { - /* macOS and Android use UTF-8, - other platforms use the locale encoding. */ -#if defined(__APPLE__) || defined(__ANDROID__) +#ifdef MS_WINDOWS + /* Windows defaults to utf-8/surrogatepass (PEP 529). */ err = _PyCoreConfig_SetString(&config->filesystem_encoding, "utf-8"); #else err = config_get_locale_encoding(&config->filesystem_encoding); #endif } +#endif /* !_Py_FORCE_UTF8_FS_ENCODING */ if (_Py_INIT_FAILED(err)) { return err; @@ -1517,14 +1491,22 @@ config_init_fs_encoding(_PyCoreConfig *config, const _PyPreConfig *preconfig) } if (config->filesystem_errors == NULL) { - /* by default, use the "surrogateescape" error handler */ - err = _PyCoreConfig_SetString(&config->filesystem_errors, - "surrogateescape"); + const char *errors; +#ifdef MS_WINDOWS + if (preconfig->legacy_windows_fs_encoding) { + errors = "replace"; + } + else { + errors = "surrogatepass"; + } +#else + errors = "surrogateescape"; +#endif + err = _PyCoreConfig_SetString(&config->filesystem_errors, errors); if (_Py_INIT_FAILED(err)) { return err; } } -#endif return _Py_INIT_OK(); } diff --git a/Python/fileutils.c b/Python/fileutils.c index b933874193b..dfad48edb81 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -85,7 +85,7 @@ _Py_device_encoding(int fd) Py_RETURN_NONE; } -#if !defined(__APPLE__) && !defined(__ANDROID__) && !defined(MS_WINDOWS) +#if !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS) #define USE_FORCE_ASCII @@ -309,7 +309,7 @@ _Py_ResetForceASCII(void) { /* nothing to do */ } -#endif /* !defined(__APPLE__) && !defined(__ANDROID__) && !defined(MS_WINDOWS) */ +#endif /* !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS) */ #if !defined(HAVE_MBRTOWC) || defined(USE_FORCE_ASCII) @@ -536,7 +536,7 @@ _Py_DecodeLocaleEx(const char* arg, wchar_t **wstr, size_t *wlen, int current_locale, _Py_error_handler errors) { if (current_locale) { -#if defined(__ANDROID__) || defined(__VXWORKS__) +#ifdef _Py_FORCE_UTF8_LOCALE return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason, errors); #else @@ -544,7 +544,7 @@ _Py_DecodeLocaleEx(const char* arg, wchar_t **wstr, size_t *wlen, #endif } -#if defined(__APPLE__) || defined(__ANDROID__) || defined(__VXWORKS__) +#ifdef _Py_FORCE_UTF8_FS_ENCODING return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason, errors); #else @@ -569,7 +569,7 @@ _Py_DecodeLocaleEx(const char* arg, wchar_t **wstr, size_t *wlen, #endif return decode_current_locale(arg, wstr, wlen, reason, errors); -#endif /* __APPLE__ or __ANDROID__ or __VXWORKS__ */ +#endif /* !_Py_FORCE_UTF8_FS_ENCODING */ } @@ -727,7 +727,7 @@ encode_locale_ex(const wchar_t *text, char **str, size_t *error_pos, int raw_malloc, int current_locale, _Py_error_handler errors) { if (current_locale) { -#ifdef __ANDROID__ +#ifdef _Py_FORCE_UTF8_LOCALE return _Py_EncodeUTF8Ex(text, str, error_pos, reason, raw_malloc, errors); #else @@ -736,7 +736,7 @@ encode_locale_ex(const wchar_t *text, char **str, size_t *error_pos, #endif } -#if defined(__APPLE__) || defined(__ANDROID__) +#ifdef _Py_FORCE_UTF8_FS_ENCODING return _Py_EncodeUTF8Ex(text, str, error_pos, reason, raw_malloc, errors); #else @@ -762,7 +762,7 @@ encode_locale_ex(const wchar_t *text, char **str, size_t *error_pos, return encode_current_locale(text, str, error_pos, reason, raw_malloc, errors); -#endif /* __APPLE__ or __ANDROID__ */ +#endif /* _Py_FORCE_UTF8_FS_ENCODING */ } static char* diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index afa683b7e84..40eeebdd1a7 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -366,8 +366,7 @@ _Py_CoerceLegacyLocale(int warn) const char *new_locale = setlocale(LC_CTYPE, target->locale_name); if (new_locale != NULL) { -#if !defined(__APPLE__) && !defined(__ANDROID__) && \ -defined(HAVE_LANGINFO_H) && defined(CODESET) +#if !defined(_Py_FORCE_UTF8_LOCALE) && defined(HAVE_LANGINFO_H) && defined(CODESET) /* Also ensure that nl_langinfo works in this locale */ char *codeset = nl_langinfo(CODESET); if (!codeset || *codeset == '\0') {