bpo-36443: Disable C locale coercion and UTF-8 Mode by default (GH-12589)
bpo-36443, bpo-36202: Since Python 3.7.0, calling Py_DecodeLocale() before Py_Initialize() produces mojibake if the LC_CTYPE locale is coerced and/or if the UTF-8 Mode is enabled by the user configuration. This change fix the issue by disabling LC_CTYPE coercion and UTF-8 Mode by default. They must now be enabled explicitly (opt-in) using the new _Py_PreInitialize() API with _PyPreConfig. When embedding Python, set coerce_c_locale and utf8_mode attributes of _PyPreConfig to -1 to enable automatically these parameters depending on the LC_CTYPE locale, environment variables and command line arguments Alternative: Setting Py_UTF8Mode to 1 always explicitly enables the UTF-8 Mode. Changes: * _PyPreConfig_INIT now sets coerce_c_locale and utf8_mode to 0 by default. * _Py_InitializeFromArgs() and _Py_InitializeFromWideArgs() can now be called with config=NULL.
This commit is contained in:
parent
4a9a505d6f
commit
d929f1838a
|
@ -63,13 +63,20 @@ typedef struct {
|
||||||
set to !Py_IgnoreEnvironmentFlag. */
|
set to !Py_IgnoreEnvironmentFlag. */
|
||||||
int use_environment;
|
int use_environment;
|
||||||
|
|
||||||
/* PYTHONCOERCECLOCALE, -1 means unknown.
|
/* Coerce the LC_CTYPE locale if it's equal to "C"? (PEP 538)
|
||||||
|
|
||||||
|
Set to 0 by PYTHONCOERCECLOCALE=0. Set to 1 by PYTHONCOERCECLOCALE=1.
|
||||||
|
Set to 2 if the user preferred LC_CTYPE locale is "C".
|
||||||
|
|
||||||
If it is equal to 1, LC_CTYPE locale is read to decide it it should be
|
If it is equal to 1, LC_CTYPE locale is read to decide it it should be
|
||||||
coerced or not (ex: PYTHONCOERCECLOCALE=1). Internally, it is set to 2
|
coerced or not (ex: PYTHONCOERCECLOCALE=1). Internally, it is set to 2
|
||||||
if the LC_CTYPE locale must be coerced. */
|
if the LC_CTYPE locale must be coerced. */
|
||||||
int coerce_c_locale;
|
int coerce_c_locale;
|
||||||
int coerce_c_locale_warn; /* PYTHONCOERCECLOCALE=warn */
|
|
||||||
|
/* Emit a warning if the LC_CTYPE locale is coerced?
|
||||||
|
|
||||||
|
Disabled by default. Set to 1 by PYTHONCOERCECLOCALE=warn. */
|
||||||
|
int coerce_c_locale_warn;
|
||||||
|
|
||||||
#ifdef MS_WINDOWS
|
#ifdef MS_WINDOWS
|
||||||
/* If greater than 1, use the "mbcs" encoding instead of the UTF-8
|
/* If greater than 1, use the "mbcs" encoding instead of the UTF-8
|
||||||
|
@ -83,9 +90,17 @@ typedef struct {
|
||||||
int legacy_windows_fs_encoding;
|
int legacy_windows_fs_encoding;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Enable UTF-8 mode?
|
/* Enable UTF-8 mode? (PEP 540)
|
||||||
Set by -X utf8 command line option and PYTHONUTF8 environment variable.
|
|
||||||
If set to -1 (default), inherit Py_UTF8Mode value. */
|
Disabled by default (equals to 0).
|
||||||
|
|
||||||
|
Set to 1 by "-X utf8" and "-X utf8=1" command line options.
|
||||||
|
Set to 1 by PYTHONUTF8=1 environment variable.
|
||||||
|
|
||||||
|
Set to 0 by "-X utf8=0" and PYTHONUTF8=0.
|
||||||
|
|
||||||
|
If equals to -1, it is set to 1 if the LC_CTYPE locale is "C" or
|
||||||
|
"POSIX", otherwise inherit Py_UTF8Mode value. */
|
||||||
int utf8_mode;
|
int utf8_mode;
|
||||||
|
|
||||||
int dev_mode; /* Development mode. PYTHONDEVMODE, -X dev */
|
int dev_mode; /* Development mode. PYTHONDEVMODE, -X dev */
|
||||||
|
@ -104,8 +119,6 @@ typedef struct {
|
||||||
_PyPreConfig_WINDOWS_INIT \
|
_PyPreConfig_WINDOWS_INIT \
|
||||||
.isolated = -1, \
|
.isolated = -1, \
|
||||||
.use_environment = -1, \
|
.use_environment = -1, \
|
||||||
.coerce_c_locale = -1, \
|
|
||||||
.utf8_mode = -1, \
|
|
||||||
.dev_mode = -1, \
|
.dev_mode = -1, \
|
||||||
.allocator = NULL}
|
.allocator = NULL}
|
||||||
|
|
||||||
|
|
|
@ -494,8 +494,8 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
|
||||||
if key not in expected_preconfig:
|
if key not in expected_preconfig:
|
||||||
expected_preconfig[key] = expected_config[key]
|
expected_preconfig[key] = expected_config[key]
|
||||||
|
|
||||||
self.check_core_config(config, expected_config)
|
|
||||||
self.check_pre_config(config, expected_preconfig)
|
self.check_pre_config(config, expected_preconfig)
|
||||||
|
self.check_core_config(config, expected_config)
|
||||||
self.check_global_config(config)
|
self.check_global_config(config)
|
||||||
|
|
||||||
def test_init_default_config(self):
|
def test_init_default_config(self):
|
||||||
|
@ -573,7 +573,6 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
|
||||||
|
|
||||||
INIT_ENV_PRECONFIG = {
|
INIT_ENV_PRECONFIG = {
|
||||||
'allocator': 'malloc',
|
'allocator': 'malloc',
|
||||||
'utf8_mode': 1,
|
|
||||||
}
|
}
|
||||||
INIT_ENV_CONFIG = {
|
INIT_ENV_CONFIG = {
|
||||||
'use_hash_seed': 1,
|
'use_hash_seed': 1,
|
||||||
|
@ -581,8 +580,6 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
|
||||||
'tracemalloc': 2,
|
'tracemalloc': 2,
|
||||||
'import_time': 1,
|
'import_time': 1,
|
||||||
'malloc_stats': 1,
|
'malloc_stats': 1,
|
||||||
'filesystem_encoding': 'utf-8',
|
|
||||||
'filesystem_errors': UTF8_MODE_ERRORS,
|
|
||||||
'inspect': 1,
|
'inspect': 1,
|
||||||
'optimization_level': 2,
|
'optimization_level': 2,
|
||||||
'pycache_prefix': 'env_pycache_prefix',
|
'pycache_prefix': 'env_pycache_prefix',
|
||||||
|
|
|
@ -0,0 +1,6 @@
|
||||||
|
Since Python 3.7.0, calling :c:func:`Py_DecodeLocale` before
|
||||||
|
:c:func:`Py_Initialize` produces mojibake if the ``LC_CTYPE`` locale is coerced
|
||||||
|
and/or if the UTF-8 Mode is enabled by the user configuration. The LC_CTYPE
|
||||||
|
coercion and UTF-8 Mode are now disabled by default to fix the mojibake issue.
|
||||||
|
They must now be enabled explicitly (opt-in) using the new
|
||||||
|
:c:func:`_Py_PreInitialize` API with ``_PyPreConfig``.
|
|
@ -52,23 +52,30 @@ pymain_init(const _PyArgv *args)
|
||||||
fedisableexcept(FE_OVERFLOW);
|
fedisableexcept(FE_OVERFLOW);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
_PyCoreConfig config = _PyCoreConfig_INIT;
|
_PyPreConfig preconfig = _PyPreConfig_INIT;
|
||||||
|
/* Set to -1 to enable them depending on the LC_CTYPE locale and the
|
||||||
|
environment variables (PYTHONUTF8 and PYTHONCOERCECLOCALE) */
|
||||||
|
preconfig.coerce_c_locale = -1;
|
||||||
|
preconfig.utf8_mode = -1;
|
||||||
if (args->use_bytes_argv) {
|
if (args->use_bytes_argv) {
|
||||||
err = _Py_PreInitializeFromArgs(NULL, args->argc, args->bytes_argv);
|
err = _Py_PreInitializeFromArgs(&preconfig,
|
||||||
|
args->argc, args->bytes_argv);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
err = _Py_PreInitializeFromWideArgs(NULL, args->argc, args->wchar_argv);
|
err = _Py_PreInitializeFromWideArgs(&preconfig,
|
||||||
|
args->argc, args->wchar_argv);
|
||||||
}
|
}
|
||||||
if (_Py_INIT_FAILED(err)) {
|
if (_Py_INIT_FAILED(err)) {
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* pass NULL as the config: config is read from command line arguments,
|
||||||
|
environment variables, configuration files */
|
||||||
if (args->use_bytes_argv) {
|
if (args->use_bytes_argv) {
|
||||||
return _Py_InitializeFromArgs(&config, args->argc, args->bytes_argv);
|
return _Py_InitializeFromArgs(NULL, args->argc, args->bytes_argv);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
return _Py_InitializeFromWideArgs(&config, args->argc, args->wchar_argv);
|
return _Py_InitializeFromWideArgs(NULL, args->argc, args->wchar_argv);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -441,8 +441,6 @@ static int test_init_from_config(void)
|
||||||
putenv("PYTHONMALLOCSTATS=0");
|
putenv("PYTHONMALLOCSTATS=0");
|
||||||
config.malloc_stats = 1;
|
config.malloc_stats = 1;
|
||||||
|
|
||||||
/* FIXME: test coerce_c_locale and coerce_c_locale_warn */
|
|
||||||
|
|
||||||
putenv("PYTHONPYCACHEPREFIX=env_pycache_prefix");
|
putenv("PYTHONPYCACHEPREFIX=env_pycache_prefix");
|
||||||
config.pycache_prefix = L"conf_pycache_prefix";
|
config.pycache_prefix = L"conf_pycache_prefix";
|
||||||
|
|
||||||
|
@ -617,17 +615,6 @@ static int test_init_isolated(void)
|
||||||
{
|
{
|
||||||
_PyInitError err;
|
_PyInitError err;
|
||||||
|
|
||||||
_PyPreConfig preconfig = _PyPreConfig_INIT;
|
|
||||||
|
|
||||||
/* Set coerce_c_locale and utf8_mode to not depend on the locale */
|
|
||||||
preconfig.coerce_c_locale = 0;
|
|
||||||
preconfig.utf8_mode = 0;
|
|
||||||
|
|
||||||
err = _Py_PreInitialize(&preconfig);
|
|
||||||
if (_Py_INIT_FAILED(err)) {
|
|
||||||
_Py_ExitInitError(err);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Test _PyCoreConfig.isolated=1 */
|
/* Test _PyCoreConfig.isolated=1 */
|
||||||
_PyCoreConfig config = _PyCoreConfig_INIT;
|
_PyCoreConfig config = _PyCoreConfig_INIT;
|
||||||
|
|
||||||
|
@ -654,10 +641,6 @@ static int test_preinit_isolated1(void)
|
||||||
_PyInitError err;
|
_PyInitError err;
|
||||||
|
|
||||||
_PyPreConfig preconfig = _PyPreConfig_INIT;
|
_PyPreConfig preconfig = _PyPreConfig_INIT;
|
||||||
|
|
||||||
/* Set coerce_c_locale and utf8_mode to not depend on the locale */
|
|
||||||
preconfig.coerce_c_locale = 0;
|
|
||||||
preconfig.utf8_mode = 0;
|
|
||||||
preconfig.isolated = 1;
|
preconfig.isolated = 1;
|
||||||
|
|
||||||
err = _Py_PreInitialize(&preconfig);
|
err = _Py_PreInitialize(&preconfig);
|
||||||
|
@ -685,10 +668,6 @@ static int test_preinit_isolated2(void)
|
||||||
_PyInitError err;
|
_PyInitError err;
|
||||||
|
|
||||||
_PyPreConfig preconfig = _PyPreConfig_INIT;
|
_PyPreConfig preconfig = _PyPreConfig_INIT;
|
||||||
|
|
||||||
/* Set coerce_c_locale and utf8_mode to not depend on the locale */
|
|
||||||
preconfig.coerce_c_locale = 0;
|
|
||||||
preconfig.utf8_mode = 0;
|
|
||||||
preconfig.isolated = 0;
|
preconfig.isolated = 0;
|
||||||
|
|
||||||
err = _Py_PreInitialize(&preconfig);
|
err = _Py_PreInitialize(&preconfig);
|
||||||
|
|
|
@ -386,7 +386,9 @@ _PyPreConfig_GetGlobalConfig(_PyPreConfig *config)
|
||||||
#ifdef MS_WINDOWS
|
#ifdef MS_WINDOWS
|
||||||
COPY_FLAG(legacy_windows_fs_encoding, Py_LegacyWindowsFSEncodingFlag);
|
COPY_FLAG(legacy_windows_fs_encoding, Py_LegacyWindowsFSEncodingFlag);
|
||||||
#endif
|
#endif
|
||||||
COPY_FLAG(utf8_mode, Py_UTF8Mode);
|
if (Py_UTF8Mode > 0) {
|
||||||
|
config->utf8_mode = 1;
|
||||||
|
}
|
||||||
|
|
||||||
#undef COPY_FLAG
|
#undef COPY_FLAG
|
||||||
#undef COPY_NOT_FLAG
|
#undef COPY_NOT_FLAG
|
||||||
|
|
|
@ -485,7 +485,7 @@ _Py_Initialize_ReconfigureCore(PyInterpreterState **interp_p,
|
||||||
_PyCoreConfig_Write(core_config);
|
_PyCoreConfig_Write(core_config);
|
||||||
|
|
||||||
if (_PyCoreConfig_Copy(&interp->core_config, core_config) < 0) {
|
if (_PyCoreConfig_Copy(&interp->core_config, core_config) < 0) {
|
||||||
return _Py_INIT_ERR("failed to copy core config");
|
return _Py_INIT_NO_MEMORY();
|
||||||
}
|
}
|
||||||
core_config = &interp->core_config;
|
core_config = &interp->core_config;
|
||||||
|
|
||||||
|
@ -548,7 +548,7 @@ pycore_create_interpreter(const _PyCoreConfig *core_config,
|
||||||
*interp_p = interp;
|
*interp_p = interp;
|
||||||
|
|
||||||
if (_PyCoreConfig_Copy(&interp->core_config, core_config) < 0) {
|
if (_PyCoreConfig_Copy(&interp->core_config, core_config) < 0) {
|
||||||
return _Py_INIT_ERR("failed to copy core config");
|
return _Py_INIT_NO_MEMORY();
|
||||||
}
|
}
|
||||||
core_config = &interp->core_config;
|
core_config = &interp->core_config;
|
||||||
|
|
||||||
|
@ -785,6 +785,7 @@ _Py_PreInitialize(const _PyPreConfig *src_config)
|
||||||
_PyInitError
|
_PyInitError
|
||||||
_Py_PreInitializeFromCoreConfig(const _PyCoreConfig *coreconfig)
|
_Py_PreInitializeFromCoreConfig(const _PyCoreConfig *coreconfig)
|
||||||
{
|
{
|
||||||
|
assert(coreconfig != NULL);
|
||||||
_PyPreConfig config = _PyPreConfig_INIT;
|
_PyPreConfig config = _PyPreConfig_INIT;
|
||||||
_PyCoreConfig_GetCoreConfig(&config, coreconfig);
|
_PyCoreConfig_GetCoreConfig(&config, coreconfig);
|
||||||
return _Py_PreInitialize(&config);
|
return _Py_PreInitialize(&config);
|
||||||
|
@ -799,8 +800,10 @@ pyinit_coreconfig(_PyCoreConfig *config,
|
||||||
const _PyArgv *args,
|
const _PyArgv *args,
|
||||||
PyInterpreterState **interp_p)
|
PyInterpreterState **interp_p)
|
||||||
{
|
{
|
||||||
if (_PyCoreConfig_Copy(config, src_config) < 0) {
|
if (src_config) {
|
||||||
return _Py_INIT_ERR("failed to copy core config");
|
if (_PyCoreConfig_Copy(config, src_config) < 0) {
|
||||||
|
return _Py_INIT_NO_MEMORY();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
_PyInitError err = _PyCoreConfig_Read(config, args);
|
_PyInitError err = _PyCoreConfig_Read(config, args);
|
||||||
|
@ -839,9 +842,14 @@ _Py_InitializeCore(const _PyCoreConfig *src_config,
|
||||||
const _PyArgv *args,
|
const _PyArgv *args,
|
||||||
PyInterpreterState **interp_p)
|
PyInterpreterState **interp_p)
|
||||||
{
|
{
|
||||||
assert(src_config != NULL);
|
_PyInitError err;
|
||||||
|
|
||||||
_PyInitError err = _Py_PreInitializeFromCoreConfig(src_config);
|
if (src_config) {
|
||||||
|
err = _Py_PreInitializeFromCoreConfig(src_config);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
err = _Py_PreInitialize(NULL);
|
||||||
|
}
|
||||||
if (_Py_INIT_FAILED(err)) {
|
if (_Py_INIT_FAILED(err)) {
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
@ -1395,7 +1403,7 @@ new_interpreter(PyThreadState **tstate_p)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (_PyCoreConfig_Copy(&interp->core_config, core_config) < 0) {
|
if (_PyCoreConfig_Copy(&interp->core_config, core_config) < 0) {
|
||||||
return _Py_INIT_ERR("failed to copy core config");
|
return _Py_INIT_NO_MEMORY();
|
||||||
}
|
}
|
||||||
core_config = &interp->core_config;
|
core_config = &interp->core_config;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue