bpo-36775: Add _Py_FORCE_UTF8_FS_ENCODING macro (GH-13056)

Add _Py_FORCE_UTF8_LOCALE and _Py_FORCE_UTF8_FS_ENCODING macros to
avoid factorize "#if defined(__ANDROID__) || defined(__VXWORKS__)"
and "#if defined(__APPLE__)".

Cleanup also config_init_fs_encoding().
This commit is contained in:
Victor Stinner 2019-05-02 11:28:57 -04:00 committed by GitHub
parent c4e78b116f
commit e251095a3f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 49 additions and 58 deletions

View File

@ -819,4 +819,14 @@ extern _invalid_parameter_handler _Py_silent_invalid_parameter_handler;
# error "Py_TRACE_REFS ABI is not compatible with release and debug ABI" # error "Py_TRACE_REFS ABI is not compatible with release and debug ABI"
#endif #endif
#if defined(__ANDROID__) || defined(__VXWORKS__)
/* Ignore the locale encoding: force UTF-8 */
# define _Py_FORCE_UTF8_LOCALE
#endif
#if defined(_Py_FORCE_UTF8_LOCALE) || defined(__APPLE__)
/* Use UTF-8 as filesystem encoding */
# define _Py_FORCE_UTF8_FS_ENCODING
#endif
#endif /* Py_PYPORT_H */ #endif /* Py_PYPORT_H */

View File

@ -3506,7 +3506,7 @@ PyUnicode_EncodeFSDefault(PyObject *unicode)
{ {
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE(); PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
const _PyCoreConfig *config = &interp->core_config; const _PyCoreConfig *config = &interp->core_config;
#if defined(__APPLE__) #ifdef _Py_FORCE_UTF8_FS_ENCODING
return _PyUnicode_AsUTF8String(unicode, config->filesystem_errors); return _PyUnicode_AsUTF8String(unicode, config->filesystem_errors);
#else #else
/* Bootstrap check: if the filesystem codec is implemented in Python, we /* Bootstrap check: if the filesystem codec is implemented in Python, we
@ -3730,7 +3730,7 @@ PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
{ {
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE(); PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
const _PyCoreConfig *config = &interp->core_config; const _PyCoreConfig *config = &interp->core_config;
#if defined(__APPLE__) #ifdef _Py_FORCE_UTF8_FS_ENCODING
return PyUnicode_DecodeUTF8Stateful(s, size, config->filesystem_errors, NULL); return PyUnicode_DecodeUTF8Stateful(s, size, config->filesystem_errors, NULL);
#else #else
/* Bootstrap check: if the filesystem codec is implemented in Python, we /* Bootstrap check: if the filesystem codec is implemented in Python, we

View File

@ -1313,7 +1313,7 @@ config_get_locale_encoding(char **locale_encoding)
#ifdef MS_WINDOWS #ifdef MS_WINDOWS
char encoding[20]; char encoding[20];
PyOS_snprintf(encoding, sizeof(encoding), "cp%u", GetACP()); PyOS_snprintf(encoding, sizeof(encoding), "cp%u", GetACP());
#elif defined(__ANDROID__) || defined(__VXWORKS__) #elif defined(_Py_FORCE_UTF8_LOCALE)
const char *encoding = "UTF-8"; const char *encoding = "UTF-8";
#else #else
const char *encoding = nl_langinfo(CODESET); const char *encoding = nl_langinfo(CODESET);
@ -1450,66 +1450,40 @@ config_init_fs_encoding(_PyCoreConfig *config, const _PyPreConfig *preconfig)
{ {
_PyInitError err; _PyInitError err;
if (config->filesystem_encoding == NULL) {
#ifdef _Py_FORCE_UTF8_FS_ENCODING
err = _PyCoreConfig_SetString(&config->filesystem_encoding,
"utf-8");
#else
#ifdef MS_WINDOWS #ifdef MS_WINDOWS
if (preconfig->legacy_windows_fs_encoding) { if (preconfig->legacy_windows_fs_encoding) {
/* Legacy Windows filesystem encoding: mbcs/replace */ /* Legacy Windows filesystem encoding: mbcs/replace */
if (config->filesystem_encoding == NULL) {
err = _PyCoreConfig_SetString(&config->filesystem_encoding, err = _PyCoreConfig_SetString(&config->filesystem_encoding,
"mbcs"); "mbcs");
if (_Py_INIT_FAILED(err)) {
return err;
} }
} else
if (config->filesystem_errors == NULL) { #endif
err = _PyCoreConfig_SetString(&config->filesystem_errors,
"replace");
if (_Py_INIT_FAILED(err)) {
return err;
}
}
}
/* Windows defaults to utf-8/surrogatepass (PEP 529).
Note: UTF-8 Mode takes the same code path and the Legacy Windows FS
encoding has the priortiy over UTF-8 Mode. */
if (config->filesystem_encoding == NULL) {
err = _PyCoreConfig_SetString(&config->filesystem_encoding,
"utf-8");
if (_Py_INIT_FAILED(err)) {
return err;
}
}
if (config->filesystem_errors == NULL) {
err = _PyCoreConfig_SetString(&config->filesystem_errors,
"surrogatepass");
if (_Py_INIT_FAILED(err)) {
return err;
}
}
#else
if (config->filesystem_encoding == NULL) {
if (preconfig->utf8_mode) { if (preconfig->utf8_mode) {
/* UTF-8 Mode use: utf-8/surrogateescape */
err = _PyCoreConfig_SetString(&config->filesystem_encoding, err = _PyCoreConfig_SetString(&config->filesystem_encoding,
"utf-8"); "utf-8");
/* errors defaults to surrogateescape above */
} }
#ifndef MS_WINDOWS
else if (_Py_GetForceASCII()) { else if (_Py_GetForceASCII()) {
err = _PyCoreConfig_SetString(&config->filesystem_encoding, err = _PyCoreConfig_SetString(&config->filesystem_encoding,
"ascii"); "ascii");
} }
#endif
else { else {
/* macOS and Android use UTF-8, #ifdef MS_WINDOWS
other platforms use the locale encoding. */ /* Windows defaults to utf-8/surrogatepass (PEP 529). */
#if defined(__APPLE__) || defined(__ANDROID__)
err = _PyCoreConfig_SetString(&config->filesystem_encoding, err = _PyCoreConfig_SetString(&config->filesystem_encoding,
"utf-8"); "utf-8");
#else #else
err = config_get_locale_encoding(&config->filesystem_encoding); err = config_get_locale_encoding(&config->filesystem_encoding);
#endif #endif
} }
#endif /* !_Py_FORCE_UTF8_FS_ENCODING */
if (_Py_INIT_FAILED(err)) { if (_Py_INIT_FAILED(err)) {
return err; return err;
@ -1517,14 +1491,22 @@ config_init_fs_encoding(_PyCoreConfig *config, const _PyPreConfig *preconfig)
} }
if (config->filesystem_errors == NULL) { if (config->filesystem_errors == NULL) {
/* by default, use the "surrogateescape" error handler */ const char *errors;
err = _PyCoreConfig_SetString(&config->filesystem_errors, #ifdef MS_WINDOWS
"surrogateescape"); if (preconfig->legacy_windows_fs_encoding) {
errors = "replace";
}
else {
errors = "surrogatepass";
}
#else
errors = "surrogateescape";
#endif
err = _PyCoreConfig_SetString(&config->filesystem_errors, errors);
if (_Py_INIT_FAILED(err)) { if (_Py_INIT_FAILED(err)) {
return err; return err;
} }
} }
#endif
return _Py_INIT_OK(); return _Py_INIT_OK();
} }

View File

@ -85,7 +85,7 @@ _Py_device_encoding(int fd)
Py_RETURN_NONE; Py_RETURN_NONE;
} }
#if !defined(__APPLE__) && !defined(__ANDROID__) && !defined(MS_WINDOWS) #if !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS)
#define USE_FORCE_ASCII #define USE_FORCE_ASCII
@ -309,7 +309,7 @@ _Py_ResetForceASCII(void)
{ {
/* nothing to do */ /* nothing to do */
} }
#endif /* !defined(__APPLE__) && !defined(__ANDROID__) && !defined(MS_WINDOWS) */ #endif /* !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS) */
#if !defined(HAVE_MBRTOWC) || defined(USE_FORCE_ASCII) #if !defined(HAVE_MBRTOWC) || defined(USE_FORCE_ASCII)
@ -536,7 +536,7 @@ _Py_DecodeLocaleEx(const char* arg, wchar_t **wstr, size_t *wlen,
int current_locale, _Py_error_handler errors) int current_locale, _Py_error_handler errors)
{ {
if (current_locale) { if (current_locale) {
#if defined(__ANDROID__) || defined(__VXWORKS__) #ifdef _Py_FORCE_UTF8_LOCALE
return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason, return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
errors); errors);
#else #else
@ -544,7 +544,7 @@ _Py_DecodeLocaleEx(const char* arg, wchar_t **wstr, size_t *wlen,
#endif #endif
} }
#if defined(__APPLE__) || defined(__ANDROID__) || defined(__VXWORKS__) #ifdef _Py_FORCE_UTF8_FS_ENCODING
return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason, return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
errors); errors);
#else #else
@ -569,7 +569,7 @@ _Py_DecodeLocaleEx(const char* arg, wchar_t **wstr, size_t *wlen,
#endif #endif
return decode_current_locale(arg, wstr, wlen, reason, errors); return decode_current_locale(arg, wstr, wlen, reason, errors);
#endif /* __APPLE__ or __ANDROID__ or __VXWORKS__ */ #endif /* !_Py_FORCE_UTF8_FS_ENCODING */
} }
@ -727,7 +727,7 @@ encode_locale_ex(const wchar_t *text, char **str, size_t *error_pos,
int raw_malloc, int current_locale, _Py_error_handler errors) int raw_malloc, int current_locale, _Py_error_handler errors)
{ {
if (current_locale) { if (current_locale) {
#ifdef __ANDROID__ #ifdef _Py_FORCE_UTF8_LOCALE
return _Py_EncodeUTF8Ex(text, str, error_pos, reason, return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
raw_malloc, errors); raw_malloc, errors);
#else #else
@ -736,7 +736,7 @@ encode_locale_ex(const wchar_t *text, char **str, size_t *error_pos,
#endif #endif
} }
#if defined(__APPLE__) || defined(__ANDROID__) #ifdef _Py_FORCE_UTF8_FS_ENCODING
return _Py_EncodeUTF8Ex(text, str, error_pos, reason, return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
raw_malloc, errors); raw_malloc, errors);
#else #else
@ -762,7 +762,7 @@ encode_locale_ex(const wchar_t *text, char **str, size_t *error_pos,
return encode_current_locale(text, str, error_pos, reason, return encode_current_locale(text, str, error_pos, reason,
raw_malloc, errors); raw_malloc, errors);
#endif /* __APPLE__ or __ANDROID__ */ #endif /* _Py_FORCE_UTF8_FS_ENCODING */
} }
static char* static char*

View File

@ -366,8 +366,7 @@ _Py_CoerceLegacyLocale(int warn)
const char *new_locale = setlocale(LC_CTYPE, const char *new_locale = setlocale(LC_CTYPE,
target->locale_name); target->locale_name);
if (new_locale != NULL) { if (new_locale != NULL) {
#if !defined(__APPLE__) && !defined(__ANDROID__) && \ #if !defined(_Py_FORCE_UTF8_LOCALE) && defined(HAVE_LANGINFO_H) && defined(CODESET)
defined(HAVE_LANGINFO_H) && defined(CODESET)
/* Also ensure that nl_langinfo works in this locale */ /* Also ensure that nl_langinfo works in this locale */
char *codeset = nl_langinfo(CODESET); char *codeset = nl_langinfo(CODESET);
if (!codeset || *codeset == '\0') { if (!codeset || *codeset == '\0') {