mirror of https://github.com/python/cpython
bpo-34589: Add -X coerce_c_locale command line option (GH-9378)
Add a new -X coerce_c_locale command line option to control C locale coercion (PEP 538).
This commit is contained in:
parent
7a0791b699
commit
dbdee0073c
|
@ -438,13 +438,22 @@ Miscellaneous options
|
|||
* Set the :attr:`~sys.flags.dev_mode` attribute of :attr:`sys.flags` to
|
||||
``True``
|
||||
|
||||
* ``-X utf8`` enables UTF-8 mode for operating system interfaces, overriding
|
||||
* ``-X utf8`` enables UTF-8 mode (:pep:`540`) for operating system interfaces, overriding
|
||||
the default locale-aware mode. ``-X utf8=0`` explicitly disables UTF-8
|
||||
mode (even when it would otherwise activate automatically).
|
||||
See :envvar:`PYTHONUTF8` for more details.
|
||||
* ``-X pycache_prefix=PATH`` enables writing ``.pyc`` files to a parallel
|
||||
tree rooted at the given directory instead of to the code tree. See also
|
||||
:envvar:`PYTHONPYCACHEPREFIX`.
|
||||
* ``-X coerce_c_locale`` or ``-X coerce_c_locale=1`` tries to coerce the C
|
||||
locale (:pep:`538`).
|
||||
``-X coerce_c_locale=0`` skips coercing the legacy ASCII-based C and POSIX
|
||||
locales to a more capable UTF-8 based alternative.
|
||||
``-X coerce_c_locale=warn`` will cause Python to emit warning messages on
|
||||
``stderr`` if either the locale coercion activates, or else if a locale
|
||||
that *would* have triggered coercion is still active when the Python
|
||||
runtime is initialized.
|
||||
See :envvar:`PYTHONCOERCECLOCALE` for more details.
|
||||
|
||||
It also allows passing arbitrary values and retrieving them through the
|
||||
:data:`sys._xoptions` dictionary.
|
||||
|
@ -464,6 +473,9 @@ Miscellaneous options
|
|||
.. versionadded:: 3.7
|
||||
The ``-X importtime``, ``-X dev`` and ``-X utf8`` options.
|
||||
|
||||
.. versionadded:: 3.7.1
|
||||
The ``-X coerce_c_locale`` option.
|
||||
|
||||
.. versionadded:: 3.8
|
||||
The ``-X pycache_prefix`` option.
|
||||
|
||||
|
@ -850,6 +862,8 @@ conflict.
|
|||
order to force the interpreter to use ``ASCII`` instead of ``UTF-8`` for
|
||||
system interfaces.
|
||||
|
||||
Also available as the :option:`-X` ``coerce_c_locale`` option.
|
||||
|
||||
Availability: \*nix
|
||||
|
||||
.. versionadded:: 3.7
|
||||
|
|
|
@ -2494,3 +2494,10 @@ versions, it respected an ill-defined subset of those environment variables,
|
|||
while in Python 3.7.0 it didn't read any of them due to :issue:`34247`). If
|
||||
this behavior is unwanted, set :c:data:`Py_IgnoreEnvironmentFlag` to 1 before
|
||||
calling :c:func:`Py_Initialize`.
|
||||
|
||||
:c:func:`Py_Initialize` and :c:func:`Py_Main` cannot enable the C locale
|
||||
coercion (:pep:`538`) anymore: it is always disabled. It can now only be
|
||||
enabled by the Python program ("python3).
|
||||
|
||||
New :option:`-X` ``coerce_c_locale`` command line option to control C locale
|
||||
coercion (:pep:`538`).
|
||||
|
|
|
@ -139,7 +139,7 @@ class EncodingDetails(_EncodingDetails):
|
|||
return data
|
||||
|
||||
@classmethod
|
||||
def get_child_details(cls, env_vars):
|
||||
def get_child_details(cls, env_vars, xoption=None):
|
||||
"""Retrieves fsencoding and standard stream details from a child process
|
||||
|
||||
Returns (encoding_details, stderr_lines):
|
||||
|
@ -150,10 +150,11 @@ class EncodingDetails(_EncodingDetails):
|
|||
The child is run in isolated mode if the current interpreter supports
|
||||
that.
|
||||
"""
|
||||
result, py_cmd = run_python_until_end(
|
||||
"-X", "utf8=0", "-c", cls.CHILD_PROCESS_SCRIPT,
|
||||
**env_vars
|
||||
)
|
||||
args = []
|
||||
if xoption:
|
||||
args.extend(("-X", f"coerce_c_locale={xoption}"))
|
||||
args.extend(("-X", "utf8=0", "-c", cls.CHILD_PROCESS_SCRIPT))
|
||||
result, py_cmd = run_python_until_end(*args, **env_vars)
|
||||
if not result.rc == 0:
|
||||
result.fail(py_cmd)
|
||||
# All subprocess outputs in this test case should be pure ASCII
|
||||
|
@ -212,7 +213,8 @@ class _LocaleHandlingTestCase(unittest.TestCase):
|
|||
expected_fs_encoding,
|
||||
expected_stream_encoding,
|
||||
expected_warnings,
|
||||
coercion_expected):
|
||||
coercion_expected,
|
||||
xoption=None):
|
||||
"""Check the C locale handling for the given process environment
|
||||
|
||||
Parameters:
|
||||
|
@ -220,7 +222,7 @@ class _LocaleHandlingTestCase(unittest.TestCase):
|
|||
expected_stream_encoding: expected encoding for standard streams
|
||||
expected_warning: stderr output to expect (if any)
|
||||
"""
|
||||
result = EncodingDetails.get_child_details(env_vars)
|
||||
result = EncodingDetails.get_child_details(env_vars, xoption)
|
||||
encoding_details, stderr_lines = result
|
||||
expected_details = EncodingDetails.get_expected_details(
|
||||
coercion_expected,
|
||||
|
@ -290,6 +292,7 @@ class LocaleCoercionTests(_LocaleHandlingTestCase):
|
|||
coerce_c_locale,
|
||||
expected_warnings=None,
|
||||
coercion_expected=True,
|
||||
use_xoption=False,
|
||||
**extra_vars):
|
||||
"""Check the C locale handling for various configurations
|
||||
|
||||
|
@ -319,8 +322,12 @@ class LocaleCoercionTests(_LocaleHandlingTestCase):
|
|||
"PYTHONCOERCECLOCALE": "",
|
||||
}
|
||||
base_var_dict.update(extra_vars)
|
||||
xoption = None
|
||||
if coerce_c_locale is not None:
|
||||
base_var_dict["PYTHONCOERCECLOCALE"] = coerce_c_locale
|
||||
if use_xoption:
|
||||
xoption = coerce_c_locale
|
||||
else:
|
||||
base_var_dict["PYTHONCOERCECLOCALE"] = coerce_c_locale
|
||||
|
||||
# Check behaviour for the default locale
|
||||
with self.subTest(default_locale=True,
|
||||
|
@ -342,7 +349,8 @@ class LocaleCoercionTests(_LocaleHandlingTestCase):
|
|||
fs_encoding,
|
||||
stream_encoding,
|
||||
_expected_warnings,
|
||||
_coercion_expected)
|
||||
_coercion_expected,
|
||||
xoption=xoption)
|
||||
|
||||
# Check behaviour for explicitly configured locales
|
||||
for locale_to_set in EXPECTED_C_LOCALE_EQUIVALENTS:
|
||||
|
@ -357,7 +365,8 @@ class LocaleCoercionTests(_LocaleHandlingTestCase):
|
|||
fs_encoding,
|
||||
stream_encoding,
|
||||
expected_warnings,
|
||||
coercion_expected)
|
||||
coercion_expected,
|
||||
xoption=xoption)
|
||||
|
||||
def test_PYTHONCOERCECLOCALE_not_set(self):
|
||||
# This should coerce to the first available target locale by default
|
||||
|
@ -404,6 +413,32 @@ class LocaleCoercionTests(_LocaleHandlingTestCase):
|
|||
expected_warnings=[LEGACY_LOCALE_WARNING],
|
||||
coercion_expected=False)
|
||||
|
||||
def test_xoption_set_to_1(self):
|
||||
self._check_c_locale_coercion("utf-8", "utf-8", coerce_c_locale="1",
|
||||
use_xoption=True)
|
||||
|
||||
def test_xoption_set_to_zero(self):
|
||||
# The setting "0" should result in the locale coercion being disabled
|
||||
self._check_c_locale_coercion(EXPECTED_C_LOCALE_FS_ENCODING,
|
||||
EXPECTED_C_LOCALE_STREAM_ENCODING,
|
||||
coerce_c_locale="0",
|
||||
coercion_expected=False,
|
||||
use_xoption=True)
|
||||
# Setting LC_ALL=C shouldn't make any difference to the behaviour
|
||||
self._check_c_locale_coercion(EXPECTED_C_LOCALE_FS_ENCODING,
|
||||
EXPECTED_C_LOCALE_STREAM_ENCODING,
|
||||
coerce_c_locale="0",
|
||||
LC_ALL="C",
|
||||
coercion_expected=False,
|
||||
use_xoption=True)
|
||||
|
||||
def test_xoption_set_to_warn(self):
|
||||
# -X coerce_c_locale=warn enables runtime warnings for legacy locales
|
||||
self._check_c_locale_coercion("utf-8", "utf-8",
|
||||
coerce_c_locale="warn",
|
||||
expected_warnings=[CLI_COERCION_WARNING],
|
||||
use_xoption=True)
|
||||
|
||||
def test_main():
|
||||
test.support.run_unittest(
|
||||
LocaleConfigurationTests,
|
||||
|
|
|
@ -159,13 +159,16 @@ class CmdLineTest(unittest.TestCase):
|
|||
env = os.environ.copy()
|
||||
# Use C locale to get ascii for the locale encoding
|
||||
env['LC_ALL'] = 'C'
|
||||
env['PYTHONCOERCECLOCALE'] = '0'
|
||||
code = (
|
||||
b'import locale; '
|
||||
b'print(ascii("' + undecodable + b'"), '
|
||||
b'locale.getpreferredencoding())')
|
||||
p = subprocess.Popen(
|
||||
[sys.executable, "-c", code],
|
||||
[sys.executable,
|
||||
# Disable C locale coercion and UTF-8 Mode to not use UTF-8
|
||||
"-X", "coerce_c_locale=0",
|
||||
"-X", "utf8=0",
|
||||
"-c", code],
|
||||
stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
|
||||
env=env)
|
||||
stdout, stderr = p.communicate()
|
||||
|
|
|
@ -656,9 +656,8 @@ class SysModuleTest(unittest.TestCase):
|
|||
|
||||
def c_locale_get_error_handler(self, locale, isolated=False, encoding=None):
|
||||
# Force the POSIX locale
|
||||
env = os.environ.copy()
|
||||
env = dict(os.environ)
|
||||
env["LC_ALL"] = locale
|
||||
env["PYTHONCOERCECLOCALE"] = "0"
|
||||
code = '\n'.join((
|
||||
'import sys',
|
||||
'def dump(name):',
|
||||
|
@ -668,7 +667,10 @@ class SysModuleTest(unittest.TestCase):
|
|||
'dump("stdout")',
|
||||
'dump("stderr")',
|
||||
))
|
||||
args = [sys.executable, "-X", "utf8=0", "-c", code]
|
||||
args = [sys.executable,
|
||||
"-X", "utf8=0",
|
||||
"-X", "coerce_c_locale=0",
|
||||
"-c", code]
|
||||
if isolated:
|
||||
args.append("-I")
|
||||
if encoding is not None:
|
||||
|
|
|
@ -27,6 +27,8 @@ class UTF8ModeTests(unittest.TestCase):
|
|||
return (loc in POSIX_LOCALES)
|
||||
|
||||
def get_output(self, *args, failure=False, **kw):
|
||||
# Always disable the C locale coercion (PEP 538)
|
||||
args = ('-X', 'coerce_c_locale=0', *args)
|
||||
kw = dict(self.DEFAULT_ENV, **kw)
|
||||
if failure:
|
||||
out = assert_python_failure(*args, **kw)
|
||||
|
@ -116,7 +118,6 @@ class UTF8ModeTests(unittest.TestCase):
|
|||
# PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 mode
|
||||
# and has the priority over -X utf8 and PYTHONUTF8
|
||||
out = self.get_output('-X', 'utf8', '-c', code,
|
||||
PYTHONUTF8='strict',
|
||||
PYTHONLEGACYWINDOWSFSENCODING='1')
|
||||
self.assertEqual(out, 'mbcs/replace')
|
||||
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
Add a new :option:`-X` ``coerce_c_locale`` command line option to control C
|
||||
locale coercion (:pep:`538`).
|
|
@ -705,6 +705,17 @@ config_init_utf8_mode(_PyCoreConfig *config)
|
|||
return _Py_INIT_OK();
|
||||
}
|
||||
|
||||
#ifndef MS_WINDOWS
|
||||
/* The C locale and the POSIX locale enable the UTF-8 Mode (PEP 540) */
|
||||
const char *ctype_loc = setlocale(LC_CTYPE, NULL);
|
||||
if (ctype_loc != NULL
|
||||
&& (strcmp(ctype_loc, "C") == 0 || strcmp(ctype_loc, "POSIX") == 0))
|
||||
{
|
||||
config->utf8_mode = 1;
|
||||
return _Py_INIT_OK();
|
||||
}
|
||||
#endif
|
||||
|
||||
return _Py_INIT_OK();
|
||||
}
|
||||
|
||||
|
@ -808,25 +819,6 @@ config_read_env_vars(_PyCoreConfig *config)
|
|||
config->malloc_stats = 1;
|
||||
}
|
||||
|
||||
const char *env = _PyCoreConfig_GetEnv(config, "PYTHONCOERCECLOCALE");
|
||||
if (env) {
|
||||
if (strcmp(env, "0") == 0) {
|
||||
if (config->_coerce_c_locale < 0) {
|
||||
config->_coerce_c_locale = 0;
|
||||
}
|
||||
}
|
||||
else if (strcmp(env, "warn") == 0) {
|
||||
if (config->_coerce_c_locale_warn < 0) {
|
||||
config->_coerce_c_locale_warn = 1;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (config->_coerce_c_locale < 0) {
|
||||
config->_coerce_c_locale = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
wchar_t *path;
|
||||
int res = _PyCoreConfig_GetEnvDup(config, &path,
|
||||
L"PYTHONPATH", "PYTHONPATH");
|
||||
|
@ -966,28 +958,76 @@ config_read_complex_options(_PyCoreConfig *config)
|
|||
}
|
||||
|
||||
|
||||
static void
|
||||
config_init_locale(_PyCoreConfig *config)
|
||||
static _PyInitError
|
||||
config_init_coerce_c_locale(_PyCoreConfig *config)
|
||||
{
|
||||
const wchar_t *xopt = config_get_xoption(config, L"coerce_c_locale");
|
||||
if (xopt) {
|
||||
wchar_t *sep = wcschr(xopt, L'=');
|
||||
if (sep) {
|
||||
xopt = sep + 1;
|
||||
if (wcscmp(xopt, L"1") == 0) {
|
||||
if (config->_coerce_c_locale < 0) {
|
||||
config->_coerce_c_locale = 1;
|
||||
}
|
||||
}
|
||||
else if (wcscmp(xopt, L"0") == 0) {
|
||||
if (config->_coerce_c_locale < 0) {
|
||||
config->_coerce_c_locale = 0;
|
||||
}
|
||||
}
|
||||
else if (wcscmp(xopt, L"warn") == 0) {
|
||||
if (config->_coerce_c_locale_warn < 0) {
|
||||
config->_coerce_c_locale_warn = 1;
|
||||
}
|
||||
}
|
||||
else {
|
||||
return _Py_INIT_USER_ERR("invalid -X coerce_c_locale option value");
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (config->_coerce_c_locale < 0) {
|
||||
config->_coerce_c_locale = 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (config->_coerce_c_locale_warn < 0) {
|
||||
config->_coerce_c_locale_warn = 0;
|
||||
}
|
||||
}
|
||||
|
||||
const char *env = _PyCoreConfig_GetEnv(config, "PYTHONCOERCECLOCALE");
|
||||
if (env) {
|
||||
if (strcmp(env, "0") == 0) {
|
||||
if (config->_coerce_c_locale < 0) {
|
||||
config->_coerce_c_locale = 0;
|
||||
}
|
||||
}
|
||||
else if (strcmp(env, "warn") == 0) {
|
||||
if (config->_coerce_c_locale_warn < 0) {
|
||||
config->_coerce_c_locale_warn = 1;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (config->_coerce_c_locale < 0) {
|
||||
config->_coerce_c_locale = 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (config->_coerce_c_locale_warn < 0) {
|
||||
config->_coerce_c_locale_warn = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (config->_coerce_c_locale < 0) {
|
||||
/* The C locale enables the C locale coercion (PEP 538) */
|
||||
if (_Py_LegacyLocaleDetected()) {
|
||||
config->_coerce_c_locale = 1;
|
||||
return _Py_INIT_OK();
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef MS_WINDOWS
|
||||
if (config->utf8_mode < 0) {
|
||||
/* The C locale and the POSIX locale enable the UTF-8 Mode (PEP 540) */
|
||||
const char *ctype_loc = setlocale(LC_CTYPE, NULL);
|
||||
if (ctype_loc != NULL
|
||||
&& (strcmp(ctype_loc, "C") == 0
|
||||
|| strcmp(ctype_loc, "POSIX") == 0))
|
||||
{
|
||||
config->utf8_mode = 1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return _Py_INIT_OK();
|
||||
}
|
||||
|
||||
|
||||
|
@ -1293,8 +1333,11 @@ _PyCoreConfig_Read(_PyCoreConfig *config)
|
|||
}
|
||||
}
|
||||
|
||||
if (config->utf8_mode < 0 || config->_coerce_c_locale < 0) {
|
||||
config_init_locale(config);
|
||||
if (config->_coerce_c_locale < 0 || config->_coerce_c_locale_warn < 0) {
|
||||
err = config_init_coerce_c_locale(config);
|
||||
if (_Py_INIT_FAILED(err)) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
if (config->_install_importlib) {
|
||||
|
@ -1349,6 +1392,7 @@ _PyCoreConfig_Read(_PyCoreConfig *config)
|
|||
}
|
||||
|
||||
assert(config->_coerce_c_locale >= 0);
|
||||
assert(config->_coerce_c_locale_warn >= 0);
|
||||
assert(config->use_environment >= 0);
|
||||
assert(config->filesystem_encoding != NULL);
|
||||
assert(config->filesystem_errors != NULL);
|
||||
|
|
Loading…
Reference in New Issue