bpo-34589: Add -X coerce_c_locale command line option (GH-9378)

Add a new -X coerce_c_locale command line option to control C locale
coercion (PEP 538).
This commit is contained in:
Victor Stinner 2018-09-17 17:19:26 -07:00 committed by GitHub
parent 7a0791b699
commit dbdee0073c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 160 additions and 52 deletions

View File

@ -438,13 +438,22 @@ Miscellaneous options
* Set the :attr:`~sys.flags.dev_mode` attribute of :attr:`sys.flags` to * Set the :attr:`~sys.flags.dev_mode` attribute of :attr:`sys.flags` to
``True`` ``True``
* ``-X utf8`` enables UTF-8 mode for operating system interfaces, overriding * ``-X utf8`` enables UTF-8 mode (:pep:`540`) for operating system interfaces, overriding
the default locale-aware mode. ``-X utf8=0`` explicitly disables UTF-8 the default locale-aware mode. ``-X utf8=0`` explicitly disables UTF-8
mode (even when it would otherwise activate automatically). mode (even when it would otherwise activate automatically).
See :envvar:`PYTHONUTF8` for more details. See :envvar:`PYTHONUTF8` for more details.
* ``-X pycache_prefix=PATH`` enables writing ``.pyc`` files to a parallel * ``-X pycache_prefix=PATH`` enables writing ``.pyc`` files to a parallel
tree rooted at the given directory instead of to the code tree. See also tree rooted at the given directory instead of to the code tree. See also
:envvar:`PYTHONPYCACHEPREFIX`. :envvar:`PYTHONPYCACHEPREFIX`.
* ``-X coerce_c_locale`` or ``-X coerce_c_locale=1`` tries to coerce the C
locale (:pep:`538`).
``-X coerce_c_locale=0`` skips coercing the legacy ASCII-based C and POSIX
locales to a more capable UTF-8 based alternative.
``-X coerce_c_locale=warn`` will cause Python to emit warning messages on
``stderr`` if either the locale coercion activates, or else if a locale
that *would* have triggered coercion is still active when the Python
runtime is initialized.
See :envvar:`PYTHONCOERCECLOCALE` for more details.
It also allows passing arbitrary values and retrieving them through the It also allows passing arbitrary values and retrieving them through the
:data:`sys._xoptions` dictionary. :data:`sys._xoptions` dictionary.
@ -464,6 +473,9 @@ Miscellaneous options
.. versionadded:: 3.7 .. versionadded:: 3.7
The ``-X importtime``, ``-X dev`` and ``-X utf8`` options. The ``-X importtime``, ``-X dev`` and ``-X utf8`` options.
.. versionadded:: 3.7.1
The ``-X coerce_c_locale`` option.
.. versionadded:: 3.8 .. versionadded:: 3.8
The ``-X pycache_prefix`` option. The ``-X pycache_prefix`` option.
@ -850,6 +862,8 @@ conflict.
order to force the interpreter to use ``ASCII`` instead of ``UTF-8`` for order to force the interpreter to use ``ASCII`` instead of ``UTF-8`` for
system interfaces. system interfaces.
Also available as the :option:`-X` ``coerce_c_locale`` option.
Availability: \*nix Availability: \*nix
.. versionadded:: 3.7 .. versionadded:: 3.7

View File

@ -2494,3 +2494,10 @@ versions, it respected an ill-defined subset of those environment variables,
while in Python 3.7.0 it didn't read any of them due to :issue:`34247`). If while in Python 3.7.0 it didn't read any of them due to :issue:`34247`). If
this behavior is unwanted, set :c:data:`Py_IgnoreEnvironmentFlag` to 1 before this behavior is unwanted, set :c:data:`Py_IgnoreEnvironmentFlag` to 1 before
calling :c:func:`Py_Initialize`. calling :c:func:`Py_Initialize`.
:c:func:`Py_Initialize` and :c:func:`Py_Main` cannot enable the C locale
coercion (:pep:`538`) anymore: it is always disabled. It can now only be
enabled by the Python program ("python3).
New :option:`-X` ``coerce_c_locale`` command line option to control C locale
coercion (:pep:`538`).

View File

@ -139,7 +139,7 @@ class EncodingDetails(_EncodingDetails):
return data return data
@classmethod @classmethod
def get_child_details(cls, env_vars): def get_child_details(cls, env_vars, xoption=None):
"""Retrieves fsencoding and standard stream details from a child process """Retrieves fsencoding and standard stream details from a child process
Returns (encoding_details, stderr_lines): Returns (encoding_details, stderr_lines):
@ -150,10 +150,11 @@ class EncodingDetails(_EncodingDetails):
The child is run in isolated mode if the current interpreter supports The child is run in isolated mode if the current interpreter supports
that. that.
""" """
result, py_cmd = run_python_until_end( args = []
"-X", "utf8=0", "-c", cls.CHILD_PROCESS_SCRIPT, if xoption:
**env_vars args.extend(("-X", f"coerce_c_locale={xoption}"))
) args.extend(("-X", "utf8=0", "-c", cls.CHILD_PROCESS_SCRIPT))
result, py_cmd = run_python_until_end(*args, **env_vars)
if not result.rc == 0: if not result.rc == 0:
result.fail(py_cmd) result.fail(py_cmd)
# All subprocess outputs in this test case should be pure ASCII # All subprocess outputs in this test case should be pure ASCII
@ -212,7 +213,8 @@ class _LocaleHandlingTestCase(unittest.TestCase):
expected_fs_encoding, expected_fs_encoding,
expected_stream_encoding, expected_stream_encoding,
expected_warnings, expected_warnings,
coercion_expected): coercion_expected,
xoption=None):
"""Check the C locale handling for the given process environment """Check the C locale handling for the given process environment
Parameters: Parameters:
@ -220,7 +222,7 @@ class _LocaleHandlingTestCase(unittest.TestCase):
expected_stream_encoding: expected encoding for standard streams expected_stream_encoding: expected encoding for standard streams
expected_warning: stderr output to expect (if any) expected_warning: stderr output to expect (if any)
""" """
result = EncodingDetails.get_child_details(env_vars) result = EncodingDetails.get_child_details(env_vars, xoption)
encoding_details, stderr_lines = result encoding_details, stderr_lines = result
expected_details = EncodingDetails.get_expected_details( expected_details = EncodingDetails.get_expected_details(
coercion_expected, coercion_expected,
@ -290,6 +292,7 @@ class LocaleCoercionTests(_LocaleHandlingTestCase):
coerce_c_locale, coerce_c_locale,
expected_warnings=None, expected_warnings=None,
coercion_expected=True, coercion_expected=True,
use_xoption=False,
**extra_vars): **extra_vars):
"""Check the C locale handling for various configurations """Check the C locale handling for various configurations
@ -319,7 +322,11 @@ class LocaleCoercionTests(_LocaleHandlingTestCase):
"PYTHONCOERCECLOCALE": "", "PYTHONCOERCECLOCALE": "",
} }
base_var_dict.update(extra_vars) base_var_dict.update(extra_vars)
xoption = None
if coerce_c_locale is not None: if coerce_c_locale is not None:
if use_xoption:
xoption = coerce_c_locale
else:
base_var_dict["PYTHONCOERCECLOCALE"] = coerce_c_locale base_var_dict["PYTHONCOERCECLOCALE"] = coerce_c_locale
# Check behaviour for the default locale # Check behaviour for the default locale
@ -342,7 +349,8 @@ class LocaleCoercionTests(_LocaleHandlingTestCase):
fs_encoding, fs_encoding,
stream_encoding, stream_encoding,
_expected_warnings, _expected_warnings,
_coercion_expected) _coercion_expected,
xoption=xoption)
# Check behaviour for explicitly configured locales # Check behaviour for explicitly configured locales
for locale_to_set in EXPECTED_C_LOCALE_EQUIVALENTS: for locale_to_set in EXPECTED_C_LOCALE_EQUIVALENTS:
@ -357,7 +365,8 @@ class LocaleCoercionTests(_LocaleHandlingTestCase):
fs_encoding, fs_encoding,
stream_encoding, stream_encoding,
expected_warnings, expected_warnings,
coercion_expected) coercion_expected,
xoption=xoption)
def test_PYTHONCOERCECLOCALE_not_set(self): def test_PYTHONCOERCECLOCALE_not_set(self):
# This should coerce to the first available target locale by default # This should coerce to the first available target locale by default
@ -404,6 +413,32 @@ class LocaleCoercionTests(_LocaleHandlingTestCase):
expected_warnings=[LEGACY_LOCALE_WARNING], expected_warnings=[LEGACY_LOCALE_WARNING],
coercion_expected=False) coercion_expected=False)
def test_xoption_set_to_1(self):
self._check_c_locale_coercion("utf-8", "utf-8", coerce_c_locale="1",
use_xoption=True)
def test_xoption_set_to_zero(self):
# The setting "0" should result in the locale coercion being disabled
self._check_c_locale_coercion(EXPECTED_C_LOCALE_FS_ENCODING,
EXPECTED_C_LOCALE_STREAM_ENCODING,
coerce_c_locale="0",
coercion_expected=False,
use_xoption=True)
# Setting LC_ALL=C shouldn't make any difference to the behaviour
self._check_c_locale_coercion(EXPECTED_C_LOCALE_FS_ENCODING,
EXPECTED_C_LOCALE_STREAM_ENCODING,
coerce_c_locale="0",
LC_ALL="C",
coercion_expected=False,
use_xoption=True)
def test_xoption_set_to_warn(self):
# -X coerce_c_locale=warn enables runtime warnings for legacy locales
self._check_c_locale_coercion("utf-8", "utf-8",
coerce_c_locale="warn",
expected_warnings=[CLI_COERCION_WARNING],
use_xoption=True)
def test_main(): def test_main():
test.support.run_unittest( test.support.run_unittest(
LocaleConfigurationTests, LocaleConfigurationTests,

View File

@ -159,13 +159,16 @@ class CmdLineTest(unittest.TestCase):
env = os.environ.copy() env = os.environ.copy()
# Use C locale to get ascii for the locale encoding # Use C locale to get ascii for the locale encoding
env['LC_ALL'] = 'C' env['LC_ALL'] = 'C'
env['PYTHONCOERCECLOCALE'] = '0'
code = ( code = (
b'import locale; ' b'import locale; '
b'print(ascii("' + undecodable + b'"), ' b'print(ascii("' + undecodable + b'"), '
b'locale.getpreferredencoding())') b'locale.getpreferredencoding())')
p = subprocess.Popen( p = subprocess.Popen(
[sys.executable, "-c", code], [sys.executable,
# Disable C locale coercion and UTF-8 Mode to not use UTF-8
"-X", "coerce_c_locale=0",
"-X", "utf8=0",
"-c", code],
stdout=subprocess.PIPE, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
env=env) env=env)
stdout, stderr = p.communicate() stdout, stderr = p.communicate()

View File

@ -656,9 +656,8 @@ class SysModuleTest(unittest.TestCase):
def c_locale_get_error_handler(self, locale, isolated=False, encoding=None): def c_locale_get_error_handler(self, locale, isolated=False, encoding=None):
# Force the POSIX locale # Force the POSIX locale
env = os.environ.copy() env = dict(os.environ)
env["LC_ALL"] = locale env["LC_ALL"] = locale
env["PYTHONCOERCECLOCALE"] = "0"
code = '\n'.join(( code = '\n'.join((
'import sys', 'import sys',
'def dump(name):', 'def dump(name):',
@ -668,7 +667,10 @@ class SysModuleTest(unittest.TestCase):
'dump("stdout")', 'dump("stdout")',
'dump("stderr")', 'dump("stderr")',
)) ))
args = [sys.executable, "-X", "utf8=0", "-c", code] args = [sys.executable,
"-X", "utf8=0",
"-X", "coerce_c_locale=0",
"-c", code]
if isolated: if isolated:
args.append("-I") args.append("-I")
if encoding is not None: if encoding is not None:

View File

@ -27,6 +27,8 @@ class UTF8ModeTests(unittest.TestCase):
return (loc in POSIX_LOCALES) return (loc in POSIX_LOCALES)
def get_output(self, *args, failure=False, **kw): def get_output(self, *args, failure=False, **kw):
# Always disable the C locale coercion (PEP 538)
args = ('-X', 'coerce_c_locale=0', *args)
kw = dict(self.DEFAULT_ENV, **kw) kw = dict(self.DEFAULT_ENV, **kw)
if failure: if failure:
out = assert_python_failure(*args, **kw) out = assert_python_failure(*args, **kw)
@ -116,7 +118,6 @@ class UTF8ModeTests(unittest.TestCase):
# PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 mode # PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 mode
# and has the priority over -X utf8 and PYTHONUTF8 # and has the priority over -X utf8 and PYTHONUTF8
out = self.get_output('-X', 'utf8', '-c', code, out = self.get_output('-X', 'utf8', '-c', code,
PYTHONUTF8='strict',
PYTHONLEGACYWINDOWSFSENCODING='1') PYTHONLEGACYWINDOWSFSENCODING='1')
self.assertEqual(out, 'mbcs/replace') self.assertEqual(out, 'mbcs/replace')

View File

@ -0,0 +1,2 @@
Add a new :option:`-X` ``coerce_c_locale`` command line option to control C
locale coercion (:pep:`538`).

View File

@ -705,6 +705,17 @@ config_init_utf8_mode(_PyCoreConfig *config)
return _Py_INIT_OK(); return _Py_INIT_OK();
} }
#ifndef MS_WINDOWS
/* The C locale and the POSIX locale enable the UTF-8 Mode (PEP 540) */
const char *ctype_loc = setlocale(LC_CTYPE, NULL);
if (ctype_loc != NULL
&& (strcmp(ctype_loc, "C") == 0 || strcmp(ctype_loc, "POSIX") == 0))
{
config->utf8_mode = 1;
return _Py_INIT_OK();
}
#endif
return _Py_INIT_OK(); return _Py_INIT_OK();
} }
@ -808,25 +819,6 @@ config_read_env_vars(_PyCoreConfig *config)
config->malloc_stats = 1; config->malloc_stats = 1;
} }
const char *env = _PyCoreConfig_GetEnv(config, "PYTHONCOERCECLOCALE");
if (env) {
if (strcmp(env, "0") == 0) {
if (config->_coerce_c_locale < 0) {
config->_coerce_c_locale = 0;
}
}
else if (strcmp(env, "warn") == 0) {
if (config->_coerce_c_locale_warn < 0) {
config->_coerce_c_locale_warn = 1;
}
}
else {
if (config->_coerce_c_locale < 0) {
config->_coerce_c_locale = 1;
}
}
}
wchar_t *path; wchar_t *path;
int res = _PyCoreConfig_GetEnvDup(config, &path, int res = _PyCoreConfig_GetEnvDup(config, &path,
L"PYTHONPATH", "PYTHONPATH"); L"PYTHONPATH", "PYTHONPATH");
@ -966,28 +958,76 @@ config_read_complex_options(_PyCoreConfig *config)
} }
static void static _PyInitError
config_init_locale(_PyCoreConfig *config) config_init_coerce_c_locale(_PyCoreConfig *config)
{ {
const wchar_t *xopt = config_get_xoption(config, L"coerce_c_locale");
if (xopt) {
wchar_t *sep = wcschr(xopt, L'=');
if (sep) {
xopt = sep + 1;
if (wcscmp(xopt, L"1") == 0) {
if (config->_coerce_c_locale < 0) {
config->_coerce_c_locale = 1;
}
}
else if (wcscmp(xopt, L"0") == 0) {
if (config->_coerce_c_locale < 0) {
config->_coerce_c_locale = 0;
}
}
else if (wcscmp(xopt, L"warn") == 0) {
if (config->_coerce_c_locale_warn < 0) {
config->_coerce_c_locale_warn = 1;
}
}
else {
return _Py_INIT_USER_ERR("invalid -X coerce_c_locale option value");
}
}
else {
if (config->_coerce_c_locale < 0) { if (config->_coerce_c_locale < 0) {
/* The C locale enables the C locale coercion (PEP 538) */
if (_Py_LegacyLocaleDetected()) {
config->_coerce_c_locale = 1; config->_coerce_c_locale = 1;
} }
} }
#ifndef MS_WINDOWS if (config->_coerce_c_locale_warn < 0) {
if (config->utf8_mode < 0) { config->_coerce_c_locale_warn = 0;
/* The C locale and the POSIX locale enable the UTF-8 Mode (PEP 540) */
const char *ctype_loc = setlocale(LC_CTYPE, NULL);
if (ctype_loc != NULL
&& (strcmp(ctype_loc, "C") == 0
|| strcmp(ctype_loc, "POSIX") == 0))
{
config->utf8_mode = 1;
} }
} }
#endif
const char *env = _PyCoreConfig_GetEnv(config, "PYTHONCOERCECLOCALE");
if (env) {
if (strcmp(env, "0") == 0) {
if (config->_coerce_c_locale < 0) {
config->_coerce_c_locale = 0;
}
}
else if (strcmp(env, "warn") == 0) {
if (config->_coerce_c_locale_warn < 0) {
config->_coerce_c_locale_warn = 1;
}
}
else {
if (config->_coerce_c_locale < 0) {
config->_coerce_c_locale = 1;
}
}
if (config->_coerce_c_locale_warn < 0) {
config->_coerce_c_locale_warn = 0;
}
}
if (config->_coerce_c_locale < 0) {
/* The C locale enables the C locale coercion (PEP 538) */
if (_Py_LegacyLocaleDetected()) {
config->_coerce_c_locale = 1;
return _Py_INIT_OK();
}
}
return _Py_INIT_OK();
} }
@ -1293,8 +1333,11 @@ _PyCoreConfig_Read(_PyCoreConfig *config)
} }
} }
if (config->utf8_mode < 0 || config->_coerce_c_locale < 0) { if (config->_coerce_c_locale < 0 || config->_coerce_c_locale_warn < 0) {
config_init_locale(config); err = config_init_coerce_c_locale(config);
if (_Py_INIT_FAILED(err)) {
return err;
}
} }
if (config->_install_importlib) { if (config->_install_importlib) {
@ -1349,6 +1392,7 @@ _PyCoreConfig_Read(_PyCoreConfig *config)
} }
assert(config->_coerce_c_locale >= 0); assert(config->_coerce_c_locale >= 0);
assert(config->_coerce_c_locale_warn >= 0);
assert(config->use_environment >= 0); assert(config->use_environment >= 0);
assert(config->filesystem_encoding != NULL); assert(config->filesystem_encoding != NULL);
assert(config->filesystem_errors != NULL); assert(config->filesystem_errors != NULL);