bpo-42208: Add _Py_GetLocaleEncoding() (GH-23050)

_io.TextIOWrapper no longer calls getpreferredencoding(False) of
_bootlocale to get the locale encoding, but calls
_Py_GetLocaleEncoding() instead.

Add config_get_fs_encoding() sub-function. Reorganize also
config_get_locale_encoding() code.
This commit is contained in:
Victor Stinner 2020-10-31 01:02:09 +01:00 committed by GitHub
parent 06f8c3328d
commit 710e826307
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 112 additions and 110 deletions

View File

@ -50,6 +50,8 @@ PyAPI_FUNC(int) _Py_GetLocaleconvNumeric(
PyAPI_FUNC(void) _Py_closerange(int first, int last);
PyAPI_FUNC(PyObject*) _Py_GetLocaleEncoding(void);
#ifdef __cplusplus
}
#endif

View File

@ -593,31 +593,6 @@ _PyIO_get_module_state(void)
return state;
}
PyObject *
_PyIO_get_locale_module(_PyIO_State *state)
{
PyObject *mod;
if (state->locale_module != NULL) {
assert(PyWeakref_CheckRef(state->locale_module));
mod = PyWeakref_GET_OBJECT(state->locale_module);
if (mod != Py_None) {
Py_INCREF(mod);
return mod;
}
Py_CLEAR(state->locale_module);
}
mod = PyImport_ImportModule("_bootlocale");
if (mod == NULL)
return NULL;
state->locale_module = PyWeakref_NewRef(mod, NULL);
if (state->locale_module == NULL) {
Py_DECREF(mod);
return NULL;
}
return mod;
}
static int
iomodule_traverse(PyObject *mod, visitproc visit, void *arg) {
_PyIO_State *state = get_io_state(mod);

View File

@ -150,7 +150,6 @@ typedef struct {
#define IO_STATE() _PyIO_get_module_state()
extern _PyIO_State *_PyIO_get_module_state(void);
extern PyObject *_PyIO_get_locale_module(_PyIO_State *);
#ifdef MS_WINDOWS
extern char _PyIO_get_console_type(PyObject *);

View File

@ -10,6 +10,7 @@
#include "Python.h"
#include "pycore_interp.h" // PyInterpreterState.fs_codec
#include "pycore_long.h" // _PyLong_GetZero()
#include "pycore_fileutils.h" // _Py_GetLocaleEncoding()
#include "pycore_object.h"
#include "pycore_pystate.h" // _PyInterpreterState_GET()
#include "structmember.h" // PyMemberDef
@ -27,7 +28,6 @@ _Py_IDENTIFIER(_dealloc_warn);
_Py_IDENTIFIER(decode);
_Py_IDENTIFIER(fileno);
_Py_IDENTIFIER(flush);
_Py_IDENTIFIER(getpreferredencoding);
_Py_IDENTIFIER(isatty);
_Py_IDENTIFIER(mode);
_Py_IDENTIFIER(name);
@ -1155,29 +1155,11 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
}
}
if (encoding == NULL && self->encoding == NULL) {
PyObject *locale_module = _PyIO_get_locale_module(state);
if (locale_module == NULL)
goto catch_ImportError;
self->encoding = _PyObject_CallMethodIdOneArg(
locale_module, &PyId_getpreferredencoding, Py_False);
Py_DECREF(locale_module);
self->encoding = _Py_GetLocaleEncoding();
if (self->encoding == NULL) {
catch_ImportError:
/*
Importing locale can raise an ImportError because of
_functools, and locale.getpreferredencoding can raise an
ImportError if _locale is not available. These will happen
during module building.
*/
if (PyErr_ExceptionMatches(PyExc_ImportError)) {
PyErr_Clear();
self->encoding = PyUnicode_FromString("ascii");
}
else
goto error;
goto error;
}
else if (!PyUnicode_Check(self->encoding))
Py_CLEAR(self->encoding);
assert(PyUnicode_Check(self->encoding));
}
if (self->encoding != NULL) {
encoding = PyUnicode_AsUTF8(self->encoding);

View File

@ -1,5 +1,6 @@
#include "Python.h"
#include "pycore_fileutils.h"
#include "pycore_fileutils.h" // fileutils definitions
#include "pycore_runtime.h" // _PyRuntime
#include "osdefs.h" // SEP
#include <locale.h>
@ -820,6 +821,46 @@ _Py_EncodeLocaleEx(const wchar_t *text, char **str,
}
// Get the current locale encoding: locale.getpreferredencoding(False).
// See also config_get_locale_encoding()
PyObject *
_Py_GetLocaleEncoding(void)
{
#ifdef _Py_FORCE_UTF8_LOCALE
// On Android langinfo.h and CODESET are missing,
// and UTF-8 is always used in mbstowcs() and wcstombs().
return PyUnicode_FromString("UTF-8");
#else
const PyPreConfig *preconfig = &_PyRuntime.preconfig;
if (preconfig->utf8_mode) {
return PyUnicode_FromString("UTF-8");
}
#if defined(MS_WINDOWS)
return PyUnicode_FromFormat("cp%u", GetACP());
#else
const char *encoding = nl_langinfo(CODESET);
if (!encoding || encoding[0] == '\0') {
#ifdef _Py_FORCE_UTF8_FS_ENCODING
// nl_langinfo() can return an empty string when the LC_CTYPE locale is
// not supported. Default to UTF-8 in that case, because UTF-8 is the
// default charset on macOS.
encoding = "UTF-8";
#else
PyErr_SetString(PyExc_ValueError,
"failed to get the locale encoding: "
"nl_langinfo(CODESET) returns an empty string");
return NULL;
#endif
}
// Decode from UTF-8
return PyUnicode_FromString(encoding);
#endif // !CODESET
#endif
}
#ifdef MS_WINDOWS
static __int64 secs_between_epochs = 11644473600; /* Seconds between 1.1.1601 and 1.1.1970 */

View File

@ -766,7 +766,7 @@ config_set_bytes_string(PyConfig *config, wchar_t **config_str,
configured. */
PyStatus
PyConfig_SetBytesString(PyConfig *config, wchar_t **config_str,
const char *str)
const char *str)
{
return CONFIG_SET_BYTES_STR(config, config_str, str, "string");
}
@ -1466,8 +1466,13 @@ config_read_complex_options(PyConfig *config)
static const wchar_t *
config_get_stdio_errors(void)
config_get_stdio_errors(const PyPreConfig *preconfig)
{
if (preconfig->utf8_mode) {
/* UTF-8 Mode uses UTF-8/surrogateescape */
return L"surrogateescape";
}
#ifndef MS_WINDOWS
const char *loc = setlocale(LC_CTYPE, NULL);
if (loc != NULL) {
@ -1492,26 +1497,41 @@ config_get_stdio_errors(void)
}
// See also _Py_GetLocaleEncoding() and config_get_fs_encoding()
static PyStatus
config_get_locale_encoding(PyConfig *config, wchar_t **locale_encoding)
config_get_locale_encoding(PyConfig *config, const PyPreConfig *preconfig,
wchar_t **locale_encoding)
{
#ifdef _Py_FORCE_UTF8_LOCALE
return PyConfig_SetString(config, locale_encoding, L"utf-8");
#else
if (preconfig->utf8_mode) {
return PyConfig_SetString(config, locale_encoding, L"utf-8");
}
#ifdef MS_WINDOWS
char encoding[20];
PyOS_snprintf(encoding, sizeof(encoding), "cp%u", GetACP());
return PyConfig_SetBytesString(config, locale_encoding, encoding);
#elif defined(_Py_FORCE_UTF8_LOCALE)
return PyConfig_SetString(config, locale_encoding, L"utf-8");
#else
const char *encoding = nl_langinfo(CODESET);
if (!encoding || encoding[0] == '\0') {
#ifdef _Py_FORCE_UTF8_FS_ENCODING
// nl_langinfo() can return an empty string when the LC_CTYPE locale is
// not supported. Default to UTF-8 in that case, because UTF-8 is the
// default charset on macOS.
encoding = "UTF-8";
#else
return _PyStatus_ERR("failed to get the locale encoding: "
"nl_langinfo(CODESET) failed");
"nl_langinfo(CODESET) returns an empty string");
#endif
}
/* nl_langinfo(CODESET) is decoded by Py_DecodeLocale() */
return CONFIG_SET_BYTES_STR(config,
locale_encoding, encoding,
"nl_langinfo(CODESET)");
#endif
#endif // !MS_WINDOWS
#endif // !_Py_FORCE_UTF8_LOCALE
}
@ -1596,33 +1616,16 @@ config_init_stdio_encoding(PyConfig *config,
PyMem_RawFree(pythonioencoding);
}
/* UTF-8 Mode uses UTF-8/surrogateescape */
if (preconfig->utf8_mode) {
if (config->stdio_encoding == NULL) {
status = PyConfig_SetString(config, &config->stdio_encoding,
L"utf-8");
if (_PyStatus_EXCEPTION(status)) {
return status;
}
}
if (config->stdio_errors == NULL) {
status = PyConfig_SetString(config, &config->stdio_errors,
L"surrogateescape");
if (_PyStatus_EXCEPTION(status)) {
return status;
}
}
}
/* Choose the default error handler based on the current locale. */
if (config->stdio_encoding == NULL) {
status = config_get_locale_encoding(config, &config->stdio_encoding);
status = config_get_locale_encoding(config, preconfig,
&config->stdio_encoding);
if (_PyStatus_EXCEPTION(status)) {
return status;
}
}
if (config->stdio_errors == NULL) {
const wchar_t *errors = config_get_stdio_errors();
const wchar_t *errors = config_get_stdio_errors(preconfig);
assert(errors != NULL);
status = PyConfig_SetString(config, &config->stdio_errors, errors);
@ -1635,46 +1638,46 @@ config_init_stdio_encoding(PyConfig *config,
}
// See also config_get_locale_encoding()
static PyStatus
config_get_fs_encoding(PyConfig *config, const PyPreConfig *preconfig,
wchar_t **fs_encoding)
{
#ifdef _Py_FORCE_UTF8_FS_ENCODING
return PyConfig_SetString(config, fs_encoding, L"utf-8");
#elif defined(MS_WINDOWS)
const wchar_t *encoding;
if (preconfig->legacy_windows_fs_encoding) {
// Legacy Windows filesystem encoding: mbcs/replace
encoding = L"mbcs";
}
else {
// Windows defaults to utf-8/surrogatepass (PEP 529)
encoding = L"utf-8";
}
return PyConfig_SetString(config, fs_encoding, encoding);
#else // !MS_WINDOWS
if (preconfig->utf8_mode) {
return PyConfig_SetString(config, fs_encoding, L"utf-8");
}
else if (_Py_GetForceASCII()) {
return PyConfig_SetString(config, fs_encoding, L"ascii");
}
else {
return config_get_locale_encoding(config, preconfig, fs_encoding);
}
#endif // !MS_WINDOWS
}
static PyStatus
config_init_fs_encoding(PyConfig *config, const PyPreConfig *preconfig)
{
PyStatus status;
if (config->filesystem_encoding == NULL) {
#ifdef _Py_FORCE_UTF8_FS_ENCODING
status = PyConfig_SetString(config, &config->filesystem_encoding, L"utf-8");
#else
#ifdef MS_WINDOWS
if (preconfig->legacy_windows_fs_encoding) {
/* Legacy Windows filesystem encoding: mbcs/replace */
status = PyConfig_SetString(config, &config->filesystem_encoding,
L"mbcs");
}
else
#endif
if (preconfig->utf8_mode) {
status = PyConfig_SetString(config, &config->filesystem_encoding,
L"utf-8");
}
#ifndef MS_WINDOWS
else if (_Py_GetForceASCII()) {
status = PyConfig_SetString(config, &config->filesystem_encoding,
L"ascii");
}
#endif
else {
#ifdef MS_WINDOWS
/* Windows defaults to utf-8/surrogatepass (PEP 529). */
status = PyConfig_SetString(config, &config->filesystem_encoding,
L"utf-8");
#else
status = config_get_locale_encoding(config,
&config->filesystem_encoding);
#endif
}
#endif /* !_Py_FORCE_UTF8_FS_ENCODING */
status = config_get_fs_encoding(config, preconfig,
&config->filesystem_encoding);
if (_PyStatus_EXCEPTION(status)) {
return status;
}