mirror of https://github.com/python/cpython
bpo-30565: Add PYTHONCOERCECLOCALE=warn runtime flag (GH-2260)
- removes PY_WARN_ON_C_LOCALE build time flag - locale coercion and compatibility warnings are now always compiled in, but are off by default - adds PYTHONCOERCECLOCALE=warn runtime option to aid in debugging potentially locale related compatibility problems Due to not-yet-resolved test failures on *BSD systems (including Mac OS X), this also temporarily disables UTF-8 as a locale coercion target, and skips testing the interpreter's behavior in the POSIX locale.
This commit is contained in:
parent
6a98a04e21
commit
eb81795d7d
|
@ -744,6 +744,11 @@ conflict.
|
||||||
:data:`sys.stdin` and :data:`sys.stdout` to ``surrogateescape``. This
|
:data:`sys.stdin` and :data:`sys.stdout` to ``surrogateescape``. This
|
||||||
behavior can be overridden using :envvar:`PYTHONIOENCODING` as usual.
|
behavior can be overridden using :envvar:`PYTHONIOENCODING` as usual.
|
||||||
|
|
||||||
|
For debugging purposes, setting ``PYTHONCOERCECLOCALE=warn`` will cause
|
||||||
|
Python to emit warning messages on ``stderr`` if either the locale coercion
|
||||||
|
activates, or else if a locale that *would* have triggered coercion is
|
||||||
|
still active when the Python runtime is initialized.
|
||||||
|
|
||||||
Availability: \*nix
|
Availability: \*nix
|
||||||
|
|
||||||
.. versionadded:: 3.7
|
.. versionadded:: 3.7
|
||||||
|
|
|
@ -96,20 +96,11 @@ defined coercion target locales (currently ``C.UTF-8``, ``C.utf8``, and
|
||||||
``UTF-8``). The default error handler for ``stderr`` continues to be
|
``UTF-8``). The default error handler for ``stderr`` continues to be
|
||||||
``backslashreplace``, regardless of locale.
|
``backslashreplace``, regardless of locale.
|
||||||
|
|
||||||
.. note::
|
Locale coercion is silent by default, but to assist in debugging potentially
|
||||||
|
locale related integration problems, explicit warnings (emitted directly on
|
||||||
In the current implementation, a warning message is printed directly to
|
``stderr`` can be requested by setting ``PYTHONCOERCECLOCALE=warn``. This
|
||||||
``stderr`` even for successful implicit locale coercion. This gives
|
setting will also cause the Python runtime to emit a warning if the legacy C
|
||||||
redistributors and system integrators the opportunity to determine if they
|
locale remains active when the core interpreter is initialized.
|
||||||
should be making an environmental change to avoid the need for implicit
|
|
||||||
coercion at the Python interpreter level.
|
|
||||||
|
|
||||||
However, it's not clear that this is going to be the best approach for
|
|
||||||
the final 3.7.0 release, and we may end up deciding to disable the warning
|
|
||||||
by default and provide some way of opting into it at runtime or build time.
|
|
||||||
|
|
||||||
Concrete examples of use cases where it would be preferrable to disable the
|
|
||||||
warning by default can be noted on :issue:`30565`.
|
|
||||||
|
|
||||||
.. seealso::
|
.. seealso::
|
||||||
|
|
||||||
|
|
|
@ -22,13 +22,23 @@ if sys.platform == "darwin":
|
||||||
else:
|
else:
|
||||||
C_LOCALE_FS_ENCODING = C_LOCALE_STREAM_ENCODING
|
C_LOCALE_FS_ENCODING = C_LOCALE_STREAM_ENCODING
|
||||||
|
|
||||||
# XXX (ncoghlan): The above is probably still wrong for:
|
# Note that the above is probably still wrong in some cases, such as:
|
||||||
# * Windows when PYTHONLEGACYWINDOWSFSENCODING is set
|
# * Windows when PYTHONLEGACYWINDOWSFSENCODING is set
|
||||||
# * AIX and any other platforms that use latin-1 in the C locale
|
# * AIX and any other platforms that use latin-1 in the C locale
|
||||||
|
#
|
||||||
|
# Options for dealing with this:
|
||||||
|
# * Don't set PYTHON_COERCE_C_LOCALE on such platforms (e.g. Windows doesn't)
|
||||||
|
# * Fix the test expectations to match the actual platform behaviour
|
||||||
|
|
||||||
# In order to get the warning messages to match up as expected, the candidate
|
# In order to get the warning messages to match up as expected, the candidate
|
||||||
# order here must much the target locale order in Python/pylifecycle.c
|
# order here must much the target locale order in Python/pylifecycle.c
|
||||||
_C_UTF8_LOCALES = ("C.UTF-8", "C.utf8", "UTF-8")
|
_C_UTF8_LOCALES = ("C.UTF-8", "C.utf8") #, "UTF-8")
|
||||||
|
|
||||||
|
# XXX (ncoghlan): Using UTF-8 as a target locale is currently disabled due to
|
||||||
|
# problems encountered on *BSD systems with those test cases
|
||||||
|
# For additional details see:
|
||||||
|
# nl_langinfo CODESET error: https://bugs.python.org/issue30647
|
||||||
|
# locale handling differences: https://bugs.python.org/issue30672
|
||||||
|
|
||||||
# There's no reliable cross-platform way of checking locale alias
|
# There's no reliable cross-platform way of checking locale alias
|
||||||
# lists, so the only way of knowing which of these locales will work
|
# lists, so the only way of knowing which of these locales will work
|
||||||
|
@ -40,20 +50,24 @@ def _set_locale_in_subprocess(locale_name):
|
||||||
result, py_cmd = run_python_until_end("-c", cmd, __isolated=True)
|
result, py_cmd = run_python_until_end("-c", cmd, __isolated=True)
|
||||||
return result.rc == 0
|
return result.rc == 0
|
||||||
|
|
||||||
_EncodingDetails = namedtuple("EncodingDetails",
|
_fields = "fsencoding stdin_info stdout_info stderr_info lang lc_ctype lc_all"
|
||||||
"fsencoding stdin_info stdout_info stderr_info")
|
_EncodingDetails = namedtuple("EncodingDetails", _fields)
|
||||||
|
|
||||||
class EncodingDetails(_EncodingDetails):
|
class EncodingDetails(_EncodingDetails):
|
||||||
|
# XXX (ncoghlan): Using JSON for child state reporting may be less fragile
|
||||||
CHILD_PROCESS_SCRIPT = ";".join([
|
CHILD_PROCESS_SCRIPT = ";".join([
|
||||||
"import sys",
|
"import sys, os",
|
||||||
"print(sys.getfilesystemencoding())",
|
"print(sys.getfilesystemencoding())",
|
||||||
"print(sys.stdin.encoding + ':' + sys.stdin.errors)",
|
"print(sys.stdin.encoding + ':' + sys.stdin.errors)",
|
||||||
"print(sys.stdout.encoding + ':' + sys.stdout.errors)",
|
"print(sys.stdout.encoding + ':' + sys.stdout.errors)",
|
||||||
"print(sys.stderr.encoding + ':' + sys.stderr.errors)",
|
"print(sys.stderr.encoding + ':' + sys.stderr.errors)",
|
||||||
|
"print(os.environ.get('LANG', 'not set'))",
|
||||||
|
"print(os.environ.get('LC_CTYPE', 'not set'))",
|
||||||
|
"print(os.environ.get('LC_ALL', 'not set'))",
|
||||||
])
|
])
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_expected_details(cls, fs_encoding, stream_encoding):
|
def get_expected_details(cls, coercion_expected, fs_encoding, stream_encoding, env_vars):
|
||||||
"""Returns expected child process details for a given encoding"""
|
"""Returns expected child process details for a given encoding"""
|
||||||
_stream = stream_encoding + ":{}"
|
_stream = stream_encoding + ":{}"
|
||||||
# stdin and stdout should use surrogateescape either because the
|
# stdin and stdout should use surrogateescape either because the
|
||||||
|
@ -61,7 +75,14 @@ class EncodingDetails(_EncodingDetails):
|
||||||
stream_info = 2*[_stream.format("surrogateescape")]
|
stream_info = 2*[_stream.format("surrogateescape")]
|
||||||
# stderr should always use backslashreplace
|
# stderr should always use backslashreplace
|
||||||
stream_info.append(_stream.format("backslashreplace"))
|
stream_info.append(_stream.format("backslashreplace"))
|
||||||
return dict(cls(fs_encoding, *stream_info)._asdict())
|
expected_lang = env_vars.get("LANG", "not set").lower()
|
||||||
|
if coercion_expected:
|
||||||
|
expected_lc_ctype = CLI_COERCION_TARGET.lower()
|
||||||
|
else:
|
||||||
|
expected_lc_ctype = env_vars.get("LC_CTYPE", "not set").lower()
|
||||||
|
expected_lc_all = env_vars.get("LC_ALL", "not set").lower()
|
||||||
|
env_info = expected_lang, expected_lc_ctype, expected_lc_all
|
||||||
|
return dict(cls(fs_encoding, *stream_info, *env_info)._asdict())
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _handle_output_variations(data):
|
def _handle_output_variations(data):
|
||||||
|
@ -97,64 +118,20 @@ class EncodingDetails(_EncodingDetails):
|
||||||
result.fail(py_cmd)
|
result.fail(py_cmd)
|
||||||
# All subprocess outputs in this test case should be pure ASCII
|
# All subprocess outputs in this test case should be pure ASCII
|
||||||
adjusted_output = cls._handle_output_variations(result.out)
|
adjusted_output = cls._handle_output_variations(result.out)
|
||||||
stdout_lines = adjusted_output.decode("ascii").rstrip().splitlines()
|
stdout_lines = adjusted_output.decode("ascii").splitlines()
|
||||||
child_encoding_details = dict(cls(*stdout_lines)._asdict())
|
child_encoding_details = dict(cls(*stdout_lines)._asdict())
|
||||||
stderr_lines = result.err.decode("ascii").rstrip().splitlines()
|
stderr_lines = result.err.decode("ascii").rstrip().splitlines()
|
||||||
return child_encoding_details, stderr_lines
|
return child_encoding_details, stderr_lines
|
||||||
|
|
||||||
|
|
||||||
class _ChildProcessEncodingTestCase(unittest.TestCase):
|
|
||||||
# Base class to check for expected encoding details in a child process
|
|
||||||
|
|
||||||
def _check_child_encoding_details(self,
|
|
||||||
env_vars,
|
|
||||||
expected_fs_encoding,
|
|
||||||
expected_stream_encoding,
|
|
||||||
expected_warning):
|
|
||||||
"""Check the C locale handling for the given process environment
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
expected_fs_encoding: expected sys.getfilesystemencoding() result
|
|
||||||
expected_stream_encoding: expected encoding for standard streams
|
|
||||||
expected_warning: stderr output to expect (if any)
|
|
||||||
"""
|
|
||||||
result = EncodingDetails.get_child_details(env_vars)
|
|
||||||
encoding_details, stderr_lines = result
|
|
||||||
self.assertEqual(encoding_details,
|
|
||||||
EncodingDetails.get_expected_details(
|
|
||||||
expected_fs_encoding,
|
|
||||||
expected_stream_encoding))
|
|
||||||
self.assertEqual(stderr_lines, expected_warning)
|
|
||||||
|
|
||||||
# Details of the shared library warning emitted at runtime
|
# Details of the shared library warning emitted at runtime
|
||||||
LIBRARY_C_LOCALE_WARNING = (
|
LEGACY_LOCALE_WARNING = (
|
||||||
"Python runtime initialized with LC_CTYPE=C (a locale with default ASCII "
|
"Python runtime initialized with LC_CTYPE=C (a locale with default ASCII "
|
||||||
"encoding), which may cause Unicode compatibility problems. Using C.UTF-8, "
|
"encoding), which may cause Unicode compatibility problems. Using C.UTF-8, "
|
||||||
"C.utf8, or UTF-8 (if available) as alternative Unicode-compatible "
|
"C.utf8, or UTF-8 (if available) as alternative Unicode-compatible "
|
||||||
"locales is recommended."
|
"locales is recommended."
|
||||||
)
|
)
|
||||||
|
|
||||||
@unittest.skipUnless(sysconfig.get_config_var("PY_WARN_ON_C_LOCALE"),
|
|
||||||
"C locale runtime warning disabled at build time")
|
|
||||||
class LocaleWarningTests(_ChildProcessEncodingTestCase):
|
|
||||||
# Test warning emitted when running in the C locale
|
|
||||||
|
|
||||||
def test_library_c_locale_warning(self):
|
|
||||||
self.maxDiff = None
|
|
||||||
for locale_to_set in ("C", "POSIX", "invalid.ascii"):
|
|
||||||
# XXX (ncoghlan): Mac OS X doesn't behave as expected in the
|
|
||||||
# POSIX locale, so we skip that for now
|
|
||||||
if sys.platform == "darwin" and locale_to_set == "POSIX":
|
|
||||||
continue
|
|
||||||
var_dict = {
|
|
||||||
"LC_ALL": locale_to_set
|
|
||||||
}
|
|
||||||
with self.subTest(forced_locale=locale_to_set):
|
|
||||||
self._check_child_encoding_details(var_dict,
|
|
||||||
C_LOCALE_FS_ENCODING,
|
|
||||||
C_LOCALE_STREAM_ENCODING,
|
|
||||||
[LIBRARY_C_LOCALE_WARNING])
|
|
||||||
|
|
||||||
# Details of the CLI locale coercion warning emitted at runtime
|
# Details of the CLI locale coercion warning emitted at runtime
|
||||||
CLI_COERCION_WARNING_FMT = (
|
CLI_COERCION_WARNING_FMT = (
|
||||||
"Python detected LC_CTYPE=C: LC_CTYPE coerced to {} (set another locale "
|
"Python detected LC_CTYPE=C: LC_CTYPE coerced to {} (set another locale "
|
||||||
|
@ -163,9 +140,13 @@ CLI_COERCION_WARNING_FMT = (
|
||||||
|
|
||||||
|
|
||||||
AVAILABLE_TARGETS = None
|
AVAILABLE_TARGETS = None
|
||||||
|
CLI_COERCION_TARGET = None
|
||||||
|
CLI_COERCION_WARNING = None
|
||||||
|
|
||||||
def setUpModule():
|
def setUpModule():
|
||||||
global AVAILABLE_TARGETS
|
global AVAILABLE_TARGETS
|
||||||
|
global CLI_COERCION_TARGET
|
||||||
|
global CLI_COERCION_WARNING
|
||||||
|
|
||||||
if AVAILABLE_TARGETS is not None:
|
if AVAILABLE_TARGETS is not None:
|
||||||
# initialization already done
|
# initialization already done
|
||||||
|
@ -177,26 +158,57 @@ def setUpModule():
|
||||||
if _set_locale_in_subprocess(target_locale):
|
if _set_locale_in_subprocess(target_locale):
|
||||||
AVAILABLE_TARGETS.append(target_locale)
|
AVAILABLE_TARGETS.append(target_locale)
|
||||||
|
|
||||||
|
if AVAILABLE_TARGETS:
|
||||||
|
# Coercion is expected to use the first available target locale
|
||||||
|
CLI_COERCION_TARGET = AVAILABLE_TARGETS[0]
|
||||||
|
CLI_COERCION_WARNING = CLI_COERCION_WARNING_FMT.format(CLI_COERCION_TARGET)
|
||||||
|
|
||||||
|
|
||||||
class _LocaleCoercionTargetsTestCase(_ChildProcessEncodingTestCase):
|
class _LocaleHandlingTestCase(unittest.TestCase):
|
||||||
# Base class for test cases that rely on coercion targets being defined
|
# Base class to check expected locale handling behaviour
|
||||||
|
|
||||||
@classmethod
|
def _check_child_encoding_details(self,
|
||||||
def setUpClass(cls):
|
env_vars,
|
||||||
|
expected_fs_encoding,
|
||||||
|
expected_stream_encoding,
|
||||||
|
expected_warnings,
|
||||||
|
coercion_expected):
|
||||||
|
"""Check the C locale handling for the given process environment
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
expected_fs_encoding: expected sys.getfilesystemencoding() result
|
||||||
|
expected_stream_encoding: expected encoding for standard streams
|
||||||
|
expected_warning: stderr output to expect (if any)
|
||||||
|
"""
|
||||||
|
result = EncodingDetails.get_child_details(env_vars)
|
||||||
|
encoding_details, stderr_lines = result
|
||||||
|
expected_details = EncodingDetails.get_expected_details(
|
||||||
|
coercion_expected,
|
||||||
|
expected_fs_encoding,
|
||||||
|
expected_stream_encoding,
|
||||||
|
env_vars
|
||||||
|
)
|
||||||
|
self.assertEqual(encoding_details, expected_details)
|
||||||
|
if expected_warnings is None:
|
||||||
|
expected_warnings = []
|
||||||
|
self.assertEqual(stderr_lines, expected_warnings)
|
||||||
|
|
||||||
|
|
||||||
|
class LocaleConfigurationTests(_LocaleHandlingTestCase):
|
||||||
|
# Test explicit external configuration via the process environment
|
||||||
|
|
||||||
|
def setUpClass():
|
||||||
|
# This relies on setupModule() having been run, so it can't be
|
||||||
|
# handled via the @unittest.skipUnless decorator
|
||||||
if not AVAILABLE_TARGETS:
|
if not AVAILABLE_TARGETS:
|
||||||
raise unittest.SkipTest("No C-with-UTF-8 locale available")
|
raise unittest.SkipTest("No C-with-UTF-8 locale available")
|
||||||
|
|
||||||
|
|
||||||
class LocaleConfigurationTests(_LocaleCoercionTargetsTestCase):
|
|
||||||
# Test explicit external configuration via the process environment
|
|
||||||
|
|
||||||
def test_external_target_locale_configuration(self):
|
def test_external_target_locale_configuration(self):
|
||||||
|
|
||||||
# Explicitly setting a target locale should give the same behaviour as
|
# Explicitly setting a target locale should give the same behaviour as
|
||||||
# is seen when implicitly coercing to that target locale
|
# is seen when implicitly coercing to that target locale
|
||||||
self.maxDiff = None
|
self.maxDiff = None
|
||||||
|
|
||||||
expected_warning = []
|
|
||||||
expected_fs_encoding = "utf-8"
|
expected_fs_encoding = "utf-8"
|
||||||
expected_stream_encoding = "utf-8"
|
expected_stream_encoding = "utf-8"
|
||||||
|
|
||||||
|
@ -209,6 +221,7 @@ class LocaleConfigurationTests(_LocaleCoercionTargetsTestCase):
|
||||||
for locale_to_set in AVAILABLE_TARGETS:
|
for locale_to_set in AVAILABLE_TARGETS:
|
||||||
# XXX (ncoghlan): LANG=UTF-8 doesn't appear to work as
|
# XXX (ncoghlan): LANG=UTF-8 doesn't appear to work as
|
||||||
# expected, so skip that combination for now
|
# expected, so skip that combination for now
|
||||||
|
# See https://bugs.python.org/issue30672 for discussion
|
||||||
if env_var == "LANG" and locale_to_set == "UTF-8":
|
if env_var == "LANG" and locale_to_set == "UTF-8":
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
@ -219,17 +232,23 @@ class LocaleConfigurationTests(_LocaleCoercionTargetsTestCase):
|
||||||
self._check_child_encoding_details(var_dict,
|
self._check_child_encoding_details(var_dict,
|
||||||
expected_fs_encoding,
|
expected_fs_encoding,
|
||||||
expected_stream_encoding,
|
expected_stream_encoding,
|
||||||
expected_warning)
|
expected_warnings=None,
|
||||||
|
coercion_expected=False)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@test.support.cpython_only
|
@test.support.cpython_only
|
||||||
@unittest.skipUnless(sysconfig.get_config_var("PY_COERCE_C_LOCALE"),
|
@unittest.skipUnless(sysconfig.get_config_var("PY_COERCE_C_LOCALE"),
|
||||||
"C locale coercion disabled at build time")
|
"C locale coercion disabled at build time")
|
||||||
class LocaleCoercionTests(_LocaleCoercionTargetsTestCase):
|
class LocaleCoercionTests(_LocaleHandlingTestCase):
|
||||||
# Test implicit reconfiguration of the environment during CLI startup
|
# Test implicit reconfiguration of the environment during CLI startup
|
||||||
|
|
||||||
def _check_c_locale_coercion(self, fs_encoding, stream_encoding, coerce_c_locale):
|
def _check_c_locale_coercion(self,
|
||||||
|
fs_encoding, stream_encoding,
|
||||||
|
coerce_c_locale,
|
||||||
|
expected_warnings=None,
|
||||||
|
coercion_expected=True,
|
||||||
|
**extra_vars):
|
||||||
"""Check the C locale handling for various configurations
|
"""Check the C locale handling for various configurations
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
|
@ -238,27 +257,31 @@ class LocaleCoercionTests(_LocaleCoercionTargetsTestCase):
|
||||||
coerce_c_locale: setting to use for PYTHONCOERCECLOCALE
|
coerce_c_locale: setting to use for PYTHONCOERCECLOCALE
|
||||||
None: don't set the variable at all
|
None: don't set the variable at all
|
||||||
str: the value set in the child's environment
|
str: the value set in the child's environment
|
||||||
|
expected_warnings: expected warning lines on stderr
|
||||||
|
extra_vars: additional environment variables to set in subprocess
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Check for expected warning on stderr if C locale is coerced
|
|
||||||
self.maxDiff = None
|
self.maxDiff = None
|
||||||
|
|
||||||
expected_warning = []
|
if not AVAILABLE_TARGETS:
|
||||||
if coerce_c_locale != "0":
|
# Locale coercion is disabled when there aren't any target locales
|
||||||
# Expect coercion to use the first available locale
|
fs_encoding = C_LOCALE_FS_ENCODING
|
||||||
warning_msg = CLI_COERCION_WARNING_FMT.format(AVAILABLE_TARGETS[0])
|
stream_encoding = C_LOCALE_STREAM_ENCODING
|
||||||
expected_warning.append(warning_msg)
|
coercion_expected = False
|
||||||
|
if expected_warnings:
|
||||||
|
expected_warnings = [LEGACY_LOCALE_WARNING]
|
||||||
|
|
||||||
base_var_dict = {
|
base_var_dict = {
|
||||||
"LANG": "",
|
"LANG": "",
|
||||||
"LC_CTYPE": "",
|
"LC_CTYPE": "",
|
||||||
"LC_ALL": "",
|
"LC_ALL": "",
|
||||||
}
|
}
|
||||||
|
base_var_dict.update(extra_vars)
|
||||||
for env_var in ("LANG", "LC_CTYPE"):
|
for env_var in ("LANG", "LC_CTYPE"):
|
||||||
for locale_to_set in ("", "C", "POSIX", "invalid.ascii"):
|
for locale_to_set in ("", "C", "POSIX", "invalid.ascii"):
|
||||||
# XXX (ncoghlan): Mac OS X doesn't behave as expected in the
|
# XXX (ncoghlan): *BSD platforms don't behave as expected in the
|
||||||
# POSIX locale, so we skip that for now
|
# POSIX locale, so we skip that for now
|
||||||
if sys.platform == "darwin" and locale_to_set == "POSIX":
|
# See https://bugs.python.org/issue30672 for discussion
|
||||||
|
if locale_to_set == "POSIX":
|
||||||
continue
|
continue
|
||||||
with self.subTest(env_var=env_var,
|
with self.subTest(env_var=env_var,
|
||||||
nominal_locale=locale_to_set,
|
nominal_locale=locale_to_set,
|
||||||
|
@ -267,33 +290,62 @@ class LocaleCoercionTests(_LocaleCoercionTargetsTestCase):
|
||||||
var_dict[env_var] = locale_to_set
|
var_dict[env_var] = locale_to_set
|
||||||
if coerce_c_locale is not None:
|
if coerce_c_locale is not None:
|
||||||
var_dict["PYTHONCOERCECLOCALE"] = coerce_c_locale
|
var_dict["PYTHONCOERCECLOCALE"] = coerce_c_locale
|
||||||
|
# Check behaviour on successful coercion
|
||||||
self._check_child_encoding_details(var_dict,
|
self._check_child_encoding_details(var_dict,
|
||||||
fs_encoding,
|
fs_encoding,
|
||||||
stream_encoding,
|
stream_encoding,
|
||||||
expected_warning)
|
expected_warnings,
|
||||||
|
coercion_expected)
|
||||||
|
|
||||||
def test_test_PYTHONCOERCECLOCALE_not_set(self):
|
def test_test_PYTHONCOERCECLOCALE_not_set(self):
|
||||||
# This should coerce to the first available target locale by default
|
# This should coerce to the first available target locale by default
|
||||||
self._check_c_locale_coercion("utf-8", "utf-8", coerce_c_locale=None)
|
self._check_c_locale_coercion("utf-8", "utf-8", coerce_c_locale=None)
|
||||||
|
|
||||||
def test_PYTHONCOERCECLOCALE_not_zero(self):
|
def test_PYTHONCOERCECLOCALE_not_zero(self):
|
||||||
# *Any* string other that "0" is considered "set" for our purposes
|
# *Any* string other than "0" is considered "set" for our purposes
|
||||||
# and hence should result in the locale coercion being enabled
|
# and hence should result in the locale coercion being enabled
|
||||||
for setting in ("", "1", "true", "false"):
|
for setting in ("", "1", "true", "false"):
|
||||||
self._check_c_locale_coercion("utf-8", "utf-8", coerce_c_locale=setting)
|
self._check_c_locale_coercion("utf-8", "utf-8", coerce_c_locale=setting)
|
||||||
|
|
||||||
|
def test_PYTHONCOERCECLOCALE_set_to_warn(self):
|
||||||
|
# PYTHONCOERCECLOCALE=warn enables runtime warnings for legacy locales
|
||||||
|
self._check_c_locale_coercion("utf-8", "utf-8",
|
||||||
|
coerce_c_locale="warn",
|
||||||
|
expected_warnings=[CLI_COERCION_WARNING])
|
||||||
|
|
||||||
|
|
||||||
def test_PYTHONCOERCECLOCALE_set_to_zero(self):
|
def test_PYTHONCOERCECLOCALE_set_to_zero(self):
|
||||||
# The setting "0" should result in the locale coercion being disabled
|
# The setting "0" should result in the locale coercion being disabled
|
||||||
self._check_c_locale_coercion(C_LOCALE_FS_ENCODING,
|
self._check_c_locale_coercion(C_LOCALE_FS_ENCODING,
|
||||||
C_LOCALE_STREAM_ENCODING,
|
C_LOCALE_STREAM_ENCODING,
|
||||||
coerce_c_locale="0")
|
coerce_c_locale="0",
|
||||||
|
coercion_expected=False)
|
||||||
|
# Setting LC_ALL=C shouldn't make any difference to the behaviour
|
||||||
|
self._check_c_locale_coercion(C_LOCALE_FS_ENCODING,
|
||||||
|
C_LOCALE_STREAM_ENCODING,
|
||||||
|
coerce_c_locale="0",
|
||||||
|
LC_ALL="C",
|
||||||
|
coercion_expected=False)
|
||||||
|
|
||||||
|
def test_LC_ALL_set_to_C(self):
|
||||||
|
# Setting LC_ALL should render the locale coercion ineffective
|
||||||
|
self._check_c_locale_coercion(C_LOCALE_FS_ENCODING,
|
||||||
|
C_LOCALE_STREAM_ENCODING,
|
||||||
|
coerce_c_locale=None,
|
||||||
|
LC_ALL="C",
|
||||||
|
coercion_expected=False)
|
||||||
|
# And result in a warning about a lack of locale compatibility
|
||||||
|
self._check_c_locale_coercion(C_LOCALE_FS_ENCODING,
|
||||||
|
C_LOCALE_STREAM_ENCODING,
|
||||||
|
coerce_c_locale="warn",
|
||||||
|
LC_ALL="C",
|
||||||
|
expected_warnings=[LEGACY_LOCALE_WARNING],
|
||||||
|
coercion_expected=False)
|
||||||
|
|
||||||
def test_main():
|
def test_main():
|
||||||
test.support.run_unittest(
|
test.support.run_unittest(
|
||||||
LocaleConfigurationTests,
|
LocaleConfigurationTests,
|
||||||
LocaleCoercionTests,
|
LocaleCoercionTests
|
||||||
LocaleWarningTests
|
|
||||||
)
|
)
|
||||||
test.support.reap_children()
|
test.support.reap_children()
|
||||||
|
|
||||||
|
|
|
@ -105,10 +105,10 @@ static const char usage_6[] =
|
||||||
" predictable seed.\n"
|
" predictable seed.\n"
|
||||||
"PYTHONMALLOC: set the Python memory allocators and/or install debug hooks\n"
|
"PYTHONMALLOC: set the Python memory allocators and/or install debug hooks\n"
|
||||||
" on Python memory allocators. Use PYTHONMALLOC=debug to install debug\n"
|
" on Python memory allocators. Use PYTHONMALLOC=debug to install debug\n"
|
||||||
" hooks.\n";
|
" hooks.\n"
|
||||||
static const char usage_7[] =
|
|
||||||
"PYTHONCOERCECLOCALE: if this variable is set to 0, it disables the locale\n"
|
"PYTHONCOERCECLOCALE: if this variable is set to 0, it disables the locale\n"
|
||||||
" coercion behavior\n";
|
" coercion behavior. Use PYTHONCOERCECLOCALE=warn to request display of\n"
|
||||||
|
" locale coercion and locale compatibility warnings on stderr.\n";
|
||||||
|
|
||||||
static int
|
static int
|
||||||
usage(int exitcode, const wchar_t* program)
|
usage(int exitcode, const wchar_t* program)
|
||||||
|
@ -125,7 +125,6 @@ usage(int exitcode, const wchar_t* program)
|
||||||
fprintf(f, usage_4, (wint_t)DELIM);
|
fprintf(f, usage_4, (wint_t)DELIM);
|
||||||
fprintf(f, usage_5, (wint_t)DELIM, PYTHONHOMEHELP);
|
fprintf(f, usage_5, (wint_t)DELIM, PYTHONHOMEHELP);
|
||||||
fputs(usage_6, f);
|
fputs(usage_6, f);
|
||||||
fputs(usage_7, f);
|
|
||||||
}
|
}
|
||||||
return exitcode;
|
return exitcode;
|
||||||
}
|
}
|
||||||
|
|
|
@ -356,6 +356,10 @@ _Py_LegacyLocaleDetected(void)
|
||||||
{
|
{
|
||||||
#ifndef MS_WINDOWS
|
#ifndef MS_WINDOWS
|
||||||
/* On non-Windows systems, the C locale is considered a legacy locale */
|
/* On non-Windows systems, the C locale is considered a legacy locale */
|
||||||
|
/* XXX (ncoghlan): some platforms (notably Mac OS X) don't appear to treat
|
||||||
|
* the POSIX locale as a simple alias for the C locale, so
|
||||||
|
* we may also want to check for that explicitly.
|
||||||
|
*/
|
||||||
const char *ctype_loc = setlocale(LC_CTYPE, NULL);
|
const char *ctype_loc = setlocale(LC_CTYPE, NULL);
|
||||||
return ctype_loc != NULL && strcmp(ctype_loc, "C") == 0;
|
return ctype_loc != NULL && strcmp(ctype_loc, "C") == 0;
|
||||||
#else
|
#else
|
||||||
|
@ -364,6 +368,30 @@ _Py_LegacyLocaleDetected(void)
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static const char *_C_LOCALE_WARNING =
|
||||||
|
"Python runtime initialized with LC_CTYPE=C (a locale with default ASCII "
|
||||||
|
"encoding), which may cause Unicode compatibility problems. Using C.UTF-8, "
|
||||||
|
"C.utf8, or UTF-8 (if available) as alternative Unicode-compatible "
|
||||||
|
"locales is recommended.\n";
|
||||||
|
|
||||||
|
static int
|
||||||
|
_legacy_locale_warnings_enabled(void)
|
||||||
|
{
|
||||||
|
const char *coerce_c_locale = getenv("PYTHONCOERCECLOCALE");
|
||||||
|
return (coerce_c_locale != NULL &&
|
||||||
|
strncmp(coerce_c_locale, "warn", 5) == 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
_emit_stderr_warning_for_legacy_locale(void)
|
||||||
|
{
|
||||||
|
if (_legacy_locale_warnings_enabled()) {
|
||||||
|
if (_Py_LegacyLocaleDetected()) {
|
||||||
|
fprintf(stderr, "%s", _C_LOCALE_WARNING);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
typedef struct _CandidateLocale {
|
typedef struct _CandidateLocale {
|
||||||
const char *locale_name; /* The locale to try as a coercion target */
|
const char *locale_name; /* The locale to try as a coercion target */
|
||||||
} _LocaleCoercionTarget;
|
} _LocaleCoercionTarget;
|
||||||
|
@ -371,10 +399,17 @@ typedef struct _CandidateLocale {
|
||||||
static _LocaleCoercionTarget _TARGET_LOCALES[] = {
|
static _LocaleCoercionTarget _TARGET_LOCALES[] = {
|
||||||
{"C.UTF-8"},
|
{"C.UTF-8"},
|
||||||
{"C.utf8"},
|
{"C.utf8"},
|
||||||
{"UTF-8"},
|
/* {"UTF-8"}, */
|
||||||
{NULL}
|
{NULL}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* XXX (ncoghlan): Using UTF-8 as a target locale is currently disabled due to
|
||||||
|
* problems encountered on *BSD systems with those test cases
|
||||||
|
* For additional details see:
|
||||||
|
* nl_langinfo CODESET error: https://bugs.python.org/issue30647
|
||||||
|
* locale handling differences: https://bugs.python.org/issue30672
|
||||||
|
*/
|
||||||
|
|
||||||
static char *
|
static char *
|
||||||
get_default_standard_stream_error_handler(void)
|
get_default_standard_stream_error_handler(void)
|
||||||
{
|
{
|
||||||
|
@ -419,7 +454,9 @@ _coerce_default_locale_settings(const _LocaleCoercionTarget *target)
|
||||||
"Error setting LC_CTYPE, skipping C locale coercion\n");
|
"Error setting LC_CTYPE, skipping C locale coercion\n");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
fprintf(stderr, _C_LOCALE_COERCION_WARNING, newloc);
|
if (_legacy_locale_warnings_enabled()) {
|
||||||
|
fprintf(stderr, _C_LOCALE_COERCION_WARNING, newloc);
|
||||||
|
}
|
||||||
|
|
||||||
/* Reconfigure with the overridden environment variables */
|
/* Reconfigure with the overridden environment variables */
|
||||||
setlocale(LC_ALL, "");
|
setlocale(LC_ALL, "");
|
||||||
|
@ -465,26 +502,6 @@ _Py_CoerceLegacyLocale(void)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#ifdef PY_WARN_ON_C_LOCALE
|
|
||||||
static const char *_C_LOCALE_WARNING =
|
|
||||||
"Python runtime initialized with LC_CTYPE=C (a locale with default ASCII "
|
|
||||||
"encoding), which may cause Unicode compatibility problems. Using C.UTF-8, "
|
|
||||||
"C.utf8, or UTF-8 (if available) as alternative Unicode-compatible "
|
|
||||||
"locales is recommended.\n";
|
|
||||||
|
|
||||||
static void
|
|
||||||
_emit_stderr_warning_for_c_locale(void)
|
|
||||||
{
|
|
||||||
const char *coerce_c_locale = getenv("PYTHONCOERCECLOCALE");
|
|
||||||
if (coerce_c_locale == NULL || strncmp(coerce_c_locale, "0", 2) != 0) {
|
|
||||||
if (_Py_LegacyLocaleDetected()) {
|
|
||||||
fprintf(stderr, "%s", _C_LOCALE_WARNING);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
/* Global initializations. Can be undone by Py_Finalize(). Don't
|
/* Global initializations. Can be undone by Py_Finalize(). Don't
|
||||||
call this twice without an intervening Py_Finalize() call.
|
call this twice without an intervening Py_Finalize() call.
|
||||||
|
|
||||||
|
@ -561,9 +578,7 @@ void _Py_InitializeCore(const _PyCoreConfig *config)
|
||||||
the locale's charset without having to switch
|
the locale's charset without having to switch
|
||||||
locales. */
|
locales. */
|
||||||
setlocale(LC_CTYPE, "");
|
setlocale(LC_CTYPE, "");
|
||||||
#ifdef PY_WARN_ON_C_LOCALE
|
_emit_stderr_warning_for_legacy_locale();
|
||||||
_emit_stderr_warning_for_c_locale();
|
|
||||||
#endif
|
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue