bpo-30565: Add PYTHONCOERCECLOCALE=warn runtime flag (GH-2260)

- removes PY_WARN_ON_C_LOCALE build time flag
- locale coercion and compatibility warnings are now always compiled
  in, but are off by default
- adds PYTHONCOERCECLOCALE=warn runtime option to aid in
  debugging potentially locale related compatibility problems

Due to not-yet-resolved test failures on *BSD systems (including
Mac OS X), this also temporarily disables UTF-8 as a locale coercion
target, and skips testing the interpreter's behavior in the POSIX locale.
This commit is contained in:
Nick Coghlan 2017-06-18 12:29:42 +10:00 committed by GitHub
parent 6a98a04e21
commit eb81795d7d
5 changed files with 184 additions and 122 deletions

View File

@ -744,6 +744,11 @@ conflict.
:data:`sys.stdin` and :data:`sys.stdout` to ``surrogateescape``. This :data:`sys.stdin` and :data:`sys.stdout` to ``surrogateescape``. This
behavior can be overridden using :envvar:`PYTHONIOENCODING` as usual. behavior can be overridden using :envvar:`PYTHONIOENCODING` as usual.
For debugging purposes, setting ``PYTHONCOERCECLOCALE=warn`` will cause
Python to emit warning messages on ``stderr`` if either the locale coercion
activates, or else if a locale that *would* have triggered coercion is
still active when the Python runtime is initialized.
Availability: \*nix Availability: \*nix
.. versionadded:: 3.7 .. versionadded:: 3.7

View File

@ -96,20 +96,11 @@ defined coercion target locales (currently ``C.UTF-8``, ``C.utf8``, and
``UTF-8``). The default error handler for ``stderr`` continues to be ``UTF-8``). The default error handler for ``stderr`` continues to be
``backslashreplace``, regardless of locale. ``backslashreplace``, regardless of locale.
.. note:: Locale coercion is silent by default, but to assist in debugging potentially
locale related integration problems, explicit warnings (emitted directly on
In the current implementation, a warning message is printed directly to ``stderr`` can be requested by setting ``PYTHONCOERCECLOCALE=warn``. This
``stderr`` even for successful implicit locale coercion. This gives setting will also cause the Python runtime to emit a warning if the legacy C
redistributors and system integrators the opportunity to determine if they locale remains active when the core interpreter is initialized.
should be making an environmental change to avoid the need for implicit
coercion at the Python interpreter level.
However, it's not clear that this is going to be the best approach for
the final 3.7.0 release, and we may end up deciding to disable the warning
by default and provide some way of opting into it at runtime or build time.
Concrete examples of use cases where it would be preferrable to disable the
warning by default can be noted on :issue:`30565`.
.. seealso:: .. seealso::

View File

@ -22,13 +22,23 @@ if sys.platform == "darwin":
else: else:
C_LOCALE_FS_ENCODING = C_LOCALE_STREAM_ENCODING C_LOCALE_FS_ENCODING = C_LOCALE_STREAM_ENCODING
# XXX (ncoghlan): The above is probably still wrong for: # Note that the above is probably still wrong in some cases, such as:
# * Windows when PYTHONLEGACYWINDOWSFSENCODING is set # * Windows when PYTHONLEGACYWINDOWSFSENCODING is set
# * AIX and any other platforms that use latin-1 in the C locale # * AIX and any other platforms that use latin-1 in the C locale
#
# Options for dealing with this:
# * Don't set PYTHON_COERCE_C_LOCALE on such platforms (e.g. Windows doesn't)
# * Fix the test expectations to match the actual platform behaviour
# In order to get the warning messages to match up as expected, the candidate # In order to get the warning messages to match up as expected, the candidate
# order here must much the target locale order in Python/pylifecycle.c # order here must much the target locale order in Python/pylifecycle.c
_C_UTF8_LOCALES = ("C.UTF-8", "C.utf8", "UTF-8") _C_UTF8_LOCALES = ("C.UTF-8", "C.utf8") #, "UTF-8")
# XXX (ncoghlan): Using UTF-8 as a target locale is currently disabled due to
# problems encountered on *BSD systems with those test cases
# For additional details see:
# nl_langinfo CODESET error: https://bugs.python.org/issue30647
# locale handling differences: https://bugs.python.org/issue30672
# There's no reliable cross-platform way of checking locale alias # There's no reliable cross-platform way of checking locale alias
# lists, so the only way of knowing which of these locales will work # lists, so the only way of knowing which of these locales will work
@ -40,20 +50,24 @@ def _set_locale_in_subprocess(locale_name):
result, py_cmd = run_python_until_end("-c", cmd, __isolated=True) result, py_cmd = run_python_until_end("-c", cmd, __isolated=True)
return result.rc == 0 return result.rc == 0
_EncodingDetails = namedtuple("EncodingDetails", _fields = "fsencoding stdin_info stdout_info stderr_info lang lc_ctype lc_all"
"fsencoding stdin_info stdout_info stderr_info") _EncodingDetails = namedtuple("EncodingDetails", _fields)
class EncodingDetails(_EncodingDetails): class EncodingDetails(_EncodingDetails):
# XXX (ncoghlan): Using JSON for child state reporting may be less fragile
CHILD_PROCESS_SCRIPT = ";".join([ CHILD_PROCESS_SCRIPT = ";".join([
"import sys", "import sys, os",
"print(sys.getfilesystemencoding())", "print(sys.getfilesystemencoding())",
"print(sys.stdin.encoding + ':' + sys.stdin.errors)", "print(sys.stdin.encoding + ':' + sys.stdin.errors)",
"print(sys.stdout.encoding + ':' + sys.stdout.errors)", "print(sys.stdout.encoding + ':' + sys.stdout.errors)",
"print(sys.stderr.encoding + ':' + sys.stderr.errors)", "print(sys.stderr.encoding + ':' + sys.stderr.errors)",
"print(os.environ.get('LANG', 'not set'))",
"print(os.environ.get('LC_CTYPE', 'not set'))",
"print(os.environ.get('LC_ALL', 'not set'))",
]) ])
@classmethod @classmethod
def get_expected_details(cls, fs_encoding, stream_encoding): def get_expected_details(cls, coercion_expected, fs_encoding, stream_encoding, env_vars):
"""Returns expected child process details for a given encoding""" """Returns expected child process details for a given encoding"""
_stream = stream_encoding + ":{}" _stream = stream_encoding + ":{}"
# stdin and stdout should use surrogateescape either because the # stdin and stdout should use surrogateescape either because the
@ -61,7 +75,14 @@ class EncodingDetails(_EncodingDetails):
stream_info = 2*[_stream.format("surrogateescape")] stream_info = 2*[_stream.format("surrogateescape")]
# stderr should always use backslashreplace # stderr should always use backslashreplace
stream_info.append(_stream.format("backslashreplace")) stream_info.append(_stream.format("backslashreplace"))
return dict(cls(fs_encoding, *stream_info)._asdict()) expected_lang = env_vars.get("LANG", "not set").lower()
if coercion_expected:
expected_lc_ctype = CLI_COERCION_TARGET.lower()
else:
expected_lc_ctype = env_vars.get("LC_CTYPE", "not set").lower()
expected_lc_all = env_vars.get("LC_ALL", "not set").lower()
env_info = expected_lang, expected_lc_ctype, expected_lc_all
return dict(cls(fs_encoding, *stream_info, *env_info)._asdict())
@staticmethod @staticmethod
def _handle_output_variations(data): def _handle_output_variations(data):
@ -97,64 +118,20 @@ class EncodingDetails(_EncodingDetails):
result.fail(py_cmd) result.fail(py_cmd)
# All subprocess outputs in this test case should be pure ASCII # All subprocess outputs in this test case should be pure ASCII
adjusted_output = cls._handle_output_variations(result.out) adjusted_output = cls._handle_output_variations(result.out)
stdout_lines = adjusted_output.decode("ascii").rstrip().splitlines() stdout_lines = adjusted_output.decode("ascii").splitlines()
child_encoding_details = dict(cls(*stdout_lines)._asdict()) child_encoding_details = dict(cls(*stdout_lines)._asdict())
stderr_lines = result.err.decode("ascii").rstrip().splitlines() stderr_lines = result.err.decode("ascii").rstrip().splitlines()
return child_encoding_details, stderr_lines return child_encoding_details, stderr_lines
class _ChildProcessEncodingTestCase(unittest.TestCase):
# Base class to check for expected encoding details in a child process
def _check_child_encoding_details(self,
env_vars,
expected_fs_encoding,
expected_stream_encoding,
expected_warning):
"""Check the C locale handling for the given process environment
Parameters:
expected_fs_encoding: expected sys.getfilesystemencoding() result
expected_stream_encoding: expected encoding for standard streams
expected_warning: stderr output to expect (if any)
"""
result = EncodingDetails.get_child_details(env_vars)
encoding_details, stderr_lines = result
self.assertEqual(encoding_details,
EncodingDetails.get_expected_details(
expected_fs_encoding,
expected_stream_encoding))
self.assertEqual(stderr_lines, expected_warning)
# Details of the shared library warning emitted at runtime # Details of the shared library warning emitted at runtime
LIBRARY_C_LOCALE_WARNING = ( LEGACY_LOCALE_WARNING = (
"Python runtime initialized with LC_CTYPE=C (a locale with default ASCII " "Python runtime initialized with LC_CTYPE=C (a locale with default ASCII "
"encoding), which may cause Unicode compatibility problems. Using C.UTF-8, " "encoding), which may cause Unicode compatibility problems. Using C.UTF-8, "
"C.utf8, or UTF-8 (if available) as alternative Unicode-compatible " "C.utf8, or UTF-8 (if available) as alternative Unicode-compatible "
"locales is recommended." "locales is recommended."
) )
@unittest.skipUnless(sysconfig.get_config_var("PY_WARN_ON_C_LOCALE"),
"C locale runtime warning disabled at build time")
class LocaleWarningTests(_ChildProcessEncodingTestCase):
# Test warning emitted when running in the C locale
def test_library_c_locale_warning(self):
self.maxDiff = None
for locale_to_set in ("C", "POSIX", "invalid.ascii"):
# XXX (ncoghlan): Mac OS X doesn't behave as expected in the
# POSIX locale, so we skip that for now
if sys.platform == "darwin" and locale_to_set == "POSIX":
continue
var_dict = {
"LC_ALL": locale_to_set
}
with self.subTest(forced_locale=locale_to_set):
self._check_child_encoding_details(var_dict,
C_LOCALE_FS_ENCODING,
C_LOCALE_STREAM_ENCODING,
[LIBRARY_C_LOCALE_WARNING])
# Details of the CLI locale coercion warning emitted at runtime # Details of the CLI locale coercion warning emitted at runtime
CLI_COERCION_WARNING_FMT = ( CLI_COERCION_WARNING_FMT = (
"Python detected LC_CTYPE=C: LC_CTYPE coerced to {} (set another locale " "Python detected LC_CTYPE=C: LC_CTYPE coerced to {} (set another locale "
@ -163,9 +140,13 @@ CLI_COERCION_WARNING_FMT = (
AVAILABLE_TARGETS = None AVAILABLE_TARGETS = None
CLI_COERCION_TARGET = None
CLI_COERCION_WARNING = None
def setUpModule(): def setUpModule():
global AVAILABLE_TARGETS global AVAILABLE_TARGETS
global CLI_COERCION_TARGET
global CLI_COERCION_WARNING
if AVAILABLE_TARGETS is not None: if AVAILABLE_TARGETS is not None:
# initialization already done # initialization already done
@ -177,26 +158,57 @@ def setUpModule():
if _set_locale_in_subprocess(target_locale): if _set_locale_in_subprocess(target_locale):
AVAILABLE_TARGETS.append(target_locale) AVAILABLE_TARGETS.append(target_locale)
if AVAILABLE_TARGETS:
# Coercion is expected to use the first available target locale
CLI_COERCION_TARGET = AVAILABLE_TARGETS[0]
CLI_COERCION_WARNING = CLI_COERCION_WARNING_FMT.format(CLI_COERCION_TARGET)
class _LocaleCoercionTargetsTestCase(_ChildProcessEncodingTestCase): class _LocaleHandlingTestCase(unittest.TestCase):
# Base class for test cases that rely on coercion targets being defined # Base class to check expected locale handling behaviour
@classmethod def _check_child_encoding_details(self,
def setUpClass(cls): env_vars,
expected_fs_encoding,
expected_stream_encoding,
expected_warnings,
coercion_expected):
"""Check the C locale handling for the given process environment
Parameters:
expected_fs_encoding: expected sys.getfilesystemencoding() result
expected_stream_encoding: expected encoding for standard streams
expected_warning: stderr output to expect (if any)
"""
result = EncodingDetails.get_child_details(env_vars)
encoding_details, stderr_lines = result
expected_details = EncodingDetails.get_expected_details(
coercion_expected,
expected_fs_encoding,
expected_stream_encoding,
env_vars
)
self.assertEqual(encoding_details, expected_details)
if expected_warnings is None:
expected_warnings = []
self.assertEqual(stderr_lines, expected_warnings)
class LocaleConfigurationTests(_LocaleHandlingTestCase):
# Test explicit external configuration via the process environment
def setUpClass():
# This relies on setupModule() having been run, so it can't be
# handled via the @unittest.skipUnless decorator
if not AVAILABLE_TARGETS: if not AVAILABLE_TARGETS:
raise unittest.SkipTest("No C-with-UTF-8 locale available") raise unittest.SkipTest("No C-with-UTF-8 locale available")
class LocaleConfigurationTests(_LocaleCoercionTargetsTestCase):
# Test explicit external configuration via the process environment
def test_external_target_locale_configuration(self): def test_external_target_locale_configuration(self):
# Explicitly setting a target locale should give the same behaviour as # Explicitly setting a target locale should give the same behaviour as
# is seen when implicitly coercing to that target locale # is seen when implicitly coercing to that target locale
self.maxDiff = None self.maxDiff = None
expected_warning = []
expected_fs_encoding = "utf-8" expected_fs_encoding = "utf-8"
expected_stream_encoding = "utf-8" expected_stream_encoding = "utf-8"
@ -209,6 +221,7 @@ class LocaleConfigurationTests(_LocaleCoercionTargetsTestCase):
for locale_to_set in AVAILABLE_TARGETS: for locale_to_set in AVAILABLE_TARGETS:
# XXX (ncoghlan): LANG=UTF-8 doesn't appear to work as # XXX (ncoghlan): LANG=UTF-8 doesn't appear to work as
# expected, so skip that combination for now # expected, so skip that combination for now
# See https://bugs.python.org/issue30672 for discussion
if env_var == "LANG" and locale_to_set == "UTF-8": if env_var == "LANG" and locale_to_set == "UTF-8":
continue continue
@ -219,17 +232,23 @@ class LocaleConfigurationTests(_LocaleCoercionTargetsTestCase):
self._check_child_encoding_details(var_dict, self._check_child_encoding_details(var_dict,
expected_fs_encoding, expected_fs_encoding,
expected_stream_encoding, expected_stream_encoding,
expected_warning) expected_warnings=None,
coercion_expected=False)
@test.support.cpython_only @test.support.cpython_only
@unittest.skipUnless(sysconfig.get_config_var("PY_COERCE_C_LOCALE"), @unittest.skipUnless(sysconfig.get_config_var("PY_COERCE_C_LOCALE"),
"C locale coercion disabled at build time") "C locale coercion disabled at build time")
class LocaleCoercionTests(_LocaleCoercionTargetsTestCase): class LocaleCoercionTests(_LocaleHandlingTestCase):
# Test implicit reconfiguration of the environment during CLI startup # Test implicit reconfiguration of the environment during CLI startup
def _check_c_locale_coercion(self, fs_encoding, stream_encoding, coerce_c_locale): def _check_c_locale_coercion(self,
fs_encoding, stream_encoding,
coerce_c_locale,
expected_warnings=None,
coercion_expected=True,
**extra_vars):
"""Check the C locale handling for various configurations """Check the C locale handling for various configurations
Parameters: Parameters:
@ -238,27 +257,31 @@ class LocaleCoercionTests(_LocaleCoercionTargetsTestCase):
coerce_c_locale: setting to use for PYTHONCOERCECLOCALE coerce_c_locale: setting to use for PYTHONCOERCECLOCALE
None: don't set the variable at all None: don't set the variable at all
str: the value set in the child's environment str: the value set in the child's environment
expected_warnings: expected warning lines on stderr
extra_vars: additional environment variables to set in subprocess
""" """
# Check for expected warning on stderr if C locale is coerced
self.maxDiff = None self.maxDiff = None
expected_warning = [] if not AVAILABLE_TARGETS:
if coerce_c_locale != "0": # Locale coercion is disabled when there aren't any target locales
# Expect coercion to use the first available locale fs_encoding = C_LOCALE_FS_ENCODING
warning_msg = CLI_COERCION_WARNING_FMT.format(AVAILABLE_TARGETS[0]) stream_encoding = C_LOCALE_STREAM_ENCODING
expected_warning.append(warning_msg) coercion_expected = False
if expected_warnings:
expected_warnings = [LEGACY_LOCALE_WARNING]
base_var_dict = { base_var_dict = {
"LANG": "", "LANG": "",
"LC_CTYPE": "", "LC_CTYPE": "",
"LC_ALL": "", "LC_ALL": "",
} }
base_var_dict.update(extra_vars)
for env_var in ("LANG", "LC_CTYPE"): for env_var in ("LANG", "LC_CTYPE"):
for locale_to_set in ("", "C", "POSIX", "invalid.ascii"): for locale_to_set in ("", "C", "POSIX", "invalid.ascii"):
# XXX (ncoghlan): Mac OS X doesn't behave as expected in the # XXX (ncoghlan): *BSD platforms don't behave as expected in the
# POSIX locale, so we skip that for now # POSIX locale, so we skip that for now
if sys.platform == "darwin" and locale_to_set == "POSIX": # See https://bugs.python.org/issue30672 for discussion
if locale_to_set == "POSIX":
continue continue
with self.subTest(env_var=env_var, with self.subTest(env_var=env_var,
nominal_locale=locale_to_set, nominal_locale=locale_to_set,
@ -267,33 +290,62 @@ class LocaleCoercionTests(_LocaleCoercionTargetsTestCase):
var_dict[env_var] = locale_to_set var_dict[env_var] = locale_to_set
if coerce_c_locale is not None: if coerce_c_locale is not None:
var_dict["PYTHONCOERCECLOCALE"] = coerce_c_locale var_dict["PYTHONCOERCECLOCALE"] = coerce_c_locale
# Check behaviour on successful coercion
self._check_child_encoding_details(var_dict, self._check_child_encoding_details(var_dict,
fs_encoding, fs_encoding,
stream_encoding, stream_encoding,
expected_warning) expected_warnings,
coercion_expected)
def test_test_PYTHONCOERCECLOCALE_not_set(self): def test_test_PYTHONCOERCECLOCALE_not_set(self):
# This should coerce to the first available target locale by default # This should coerce to the first available target locale by default
self._check_c_locale_coercion("utf-8", "utf-8", coerce_c_locale=None) self._check_c_locale_coercion("utf-8", "utf-8", coerce_c_locale=None)
def test_PYTHONCOERCECLOCALE_not_zero(self): def test_PYTHONCOERCECLOCALE_not_zero(self):
# *Any* string other that "0" is considered "set" for our purposes # *Any* string other than "0" is considered "set" for our purposes
# and hence should result in the locale coercion being enabled # and hence should result in the locale coercion being enabled
for setting in ("", "1", "true", "false"): for setting in ("", "1", "true", "false"):
self._check_c_locale_coercion("utf-8", "utf-8", coerce_c_locale=setting) self._check_c_locale_coercion("utf-8", "utf-8", coerce_c_locale=setting)
def test_PYTHONCOERCECLOCALE_set_to_warn(self):
# PYTHONCOERCECLOCALE=warn enables runtime warnings for legacy locales
self._check_c_locale_coercion("utf-8", "utf-8",
coerce_c_locale="warn",
expected_warnings=[CLI_COERCION_WARNING])
def test_PYTHONCOERCECLOCALE_set_to_zero(self): def test_PYTHONCOERCECLOCALE_set_to_zero(self):
# The setting "0" should result in the locale coercion being disabled # The setting "0" should result in the locale coercion being disabled
self._check_c_locale_coercion(C_LOCALE_FS_ENCODING, self._check_c_locale_coercion(C_LOCALE_FS_ENCODING,
C_LOCALE_STREAM_ENCODING, C_LOCALE_STREAM_ENCODING,
coerce_c_locale="0") coerce_c_locale="0",
coercion_expected=False)
# Setting LC_ALL=C shouldn't make any difference to the behaviour
self._check_c_locale_coercion(C_LOCALE_FS_ENCODING,
C_LOCALE_STREAM_ENCODING,
coerce_c_locale="0",
LC_ALL="C",
coercion_expected=False)
def test_LC_ALL_set_to_C(self):
# Setting LC_ALL should render the locale coercion ineffective
self._check_c_locale_coercion(C_LOCALE_FS_ENCODING,
C_LOCALE_STREAM_ENCODING,
coerce_c_locale=None,
LC_ALL="C",
coercion_expected=False)
# And result in a warning about a lack of locale compatibility
self._check_c_locale_coercion(C_LOCALE_FS_ENCODING,
C_LOCALE_STREAM_ENCODING,
coerce_c_locale="warn",
LC_ALL="C",
expected_warnings=[LEGACY_LOCALE_WARNING],
coercion_expected=False)
def test_main(): def test_main():
test.support.run_unittest( test.support.run_unittest(
LocaleConfigurationTests, LocaleConfigurationTests,
LocaleCoercionTests, LocaleCoercionTests
LocaleWarningTests
) )
test.support.reap_children() test.support.reap_children()

View File

@ -105,10 +105,10 @@ static const char usage_6[] =
" predictable seed.\n" " predictable seed.\n"
"PYTHONMALLOC: set the Python memory allocators and/or install debug hooks\n" "PYTHONMALLOC: set the Python memory allocators and/or install debug hooks\n"
" on Python memory allocators. Use PYTHONMALLOC=debug to install debug\n" " on Python memory allocators. Use PYTHONMALLOC=debug to install debug\n"
" hooks.\n"; " hooks.\n"
static const char usage_7[] =
"PYTHONCOERCECLOCALE: if this variable is set to 0, it disables the locale\n" "PYTHONCOERCECLOCALE: if this variable is set to 0, it disables the locale\n"
" coercion behavior\n"; " coercion behavior. Use PYTHONCOERCECLOCALE=warn to request display of\n"
" locale coercion and locale compatibility warnings on stderr.\n";
static int static int
usage(int exitcode, const wchar_t* program) usage(int exitcode, const wchar_t* program)
@ -125,7 +125,6 @@ usage(int exitcode, const wchar_t* program)
fprintf(f, usage_4, (wint_t)DELIM); fprintf(f, usage_4, (wint_t)DELIM);
fprintf(f, usage_5, (wint_t)DELIM, PYTHONHOMEHELP); fprintf(f, usage_5, (wint_t)DELIM, PYTHONHOMEHELP);
fputs(usage_6, f); fputs(usage_6, f);
fputs(usage_7, f);
} }
return exitcode; return exitcode;
} }

View File

@ -356,6 +356,10 @@ _Py_LegacyLocaleDetected(void)
{ {
#ifndef MS_WINDOWS #ifndef MS_WINDOWS
/* On non-Windows systems, the C locale is considered a legacy locale */ /* On non-Windows systems, the C locale is considered a legacy locale */
/* XXX (ncoghlan): some platforms (notably Mac OS X) don't appear to treat
* the POSIX locale as a simple alias for the C locale, so
* we may also want to check for that explicitly.
*/
const char *ctype_loc = setlocale(LC_CTYPE, NULL); const char *ctype_loc = setlocale(LC_CTYPE, NULL);
return ctype_loc != NULL && strcmp(ctype_loc, "C") == 0; return ctype_loc != NULL && strcmp(ctype_loc, "C") == 0;
#else #else
@ -364,6 +368,30 @@ _Py_LegacyLocaleDetected(void)
#endif #endif
} }
static const char *_C_LOCALE_WARNING =
"Python runtime initialized with LC_CTYPE=C (a locale with default ASCII "
"encoding), which may cause Unicode compatibility problems. Using C.UTF-8, "
"C.utf8, or UTF-8 (if available) as alternative Unicode-compatible "
"locales is recommended.\n";
static int
_legacy_locale_warnings_enabled(void)
{
const char *coerce_c_locale = getenv("PYTHONCOERCECLOCALE");
return (coerce_c_locale != NULL &&
strncmp(coerce_c_locale, "warn", 5) == 0);
}
static void
_emit_stderr_warning_for_legacy_locale(void)
{
if (_legacy_locale_warnings_enabled()) {
if (_Py_LegacyLocaleDetected()) {
fprintf(stderr, "%s", _C_LOCALE_WARNING);
}
}
}
typedef struct _CandidateLocale { typedef struct _CandidateLocale {
const char *locale_name; /* The locale to try as a coercion target */ const char *locale_name; /* The locale to try as a coercion target */
} _LocaleCoercionTarget; } _LocaleCoercionTarget;
@ -371,10 +399,17 @@ typedef struct _CandidateLocale {
static _LocaleCoercionTarget _TARGET_LOCALES[] = { static _LocaleCoercionTarget _TARGET_LOCALES[] = {
{"C.UTF-8"}, {"C.UTF-8"},
{"C.utf8"}, {"C.utf8"},
{"UTF-8"}, /* {"UTF-8"}, */
{NULL} {NULL}
}; };
/* XXX (ncoghlan): Using UTF-8 as a target locale is currently disabled due to
* problems encountered on *BSD systems with those test cases
* For additional details see:
* nl_langinfo CODESET error: https://bugs.python.org/issue30647
* locale handling differences: https://bugs.python.org/issue30672
*/
static char * static char *
get_default_standard_stream_error_handler(void) get_default_standard_stream_error_handler(void)
{ {
@ -419,7 +454,9 @@ _coerce_default_locale_settings(const _LocaleCoercionTarget *target)
"Error setting LC_CTYPE, skipping C locale coercion\n"); "Error setting LC_CTYPE, skipping C locale coercion\n");
return; return;
} }
if (_legacy_locale_warnings_enabled()) {
fprintf(stderr, _C_LOCALE_COERCION_WARNING, newloc); fprintf(stderr, _C_LOCALE_COERCION_WARNING, newloc);
}
/* Reconfigure with the overridden environment variables */ /* Reconfigure with the overridden environment variables */
setlocale(LC_ALL, ""); setlocale(LC_ALL, "");
@ -465,26 +502,6 @@ _Py_CoerceLegacyLocale(void)
} }
#ifdef PY_WARN_ON_C_LOCALE
static const char *_C_LOCALE_WARNING =
"Python runtime initialized with LC_CTYPE=C (a locale with default ASCII "
"encoding), which may cause Unicode compatibility problems. Using C.UTF-8, "
"C.utf8, or UTF-8 (if available) as alternative Unicode-compatible "
"locales is recommended.\n";
static void
_emit_stderr_warning_for_c_locale(void)
{
const char *coerce_c_locale = getenv("PYTHONCOERCECLOCALE");
if (coerce_c_locale == NULL || strncmp(coerce_c_locale, "0", 2) != 0) {
if (_Py_LegacyLocaleDetected()) {
fprintf(stderr, "%s", _C_LOCALE_WARNING);
}
}
}
#endif
/* Global initializations. Can be undone by Py_Finalize(). Don't /* Global initializations. Can be undone by Py_Finalize(). Don't
call this twice without an intervening Py_Finalize() call. call this twice without an intervening Py_Finalize() call.
@ -561,9 +578,7 @@ void _Py_InitializeCore(const _PyCoreConfig *config)
the locale's charset without having to switch the locale's charset without having to switch
locales. */ locales. */
setlocale(LC_CTYPE, ""); setlocale(LC_CTYPE, "");
#ifdef PY_WARN_ON_C_LOCALE _emit_stderr_warning_for_legacy_locale();
_emit_stderr_warning_for_c_locale();
#endif
#endif #endif
#endif #endif