From b0caf329815120acf50287e29858093d328b0e3c Mon Sep 17 00:00:00 2001 From: Ronald Oussoren Date: Thu, 29 Aug 2019 06:33:52 +0200 Subject: [PATCH] bpo-18378: Recognize "UTF-8" as a valid name in locale._parse_localename (GH-14736) --- Lib/locale.py | 4 +++ Lib/test/test_locale.py | 36 +++++++++++++++++++ .../2019-07-13-13-40-12.bpo-18378.NHcojp.rst | 1 + 3 files changed, 41 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2019-07-13-13-40-12.bpo-18378.NHcojp.rst diff --git a/Lib/locale.py b/Lib/locale.py index f3d3973d038..dd8a08524a0 100644 --- a/Lib/locale.py +++ b/Lib/locale.py @@ -492,6 +492,10 @@ def _parse_localename(localename): return tuple(code.split('.')[:2]) elif code == 'C': return None, None + elif code == 'UTF-8': + # On macOS "LC_CTYPE=UTF-8" is a valid locale setting + # for getting UTF-8 handling for text. + return None, 'UTF-8' raise ValueError('unknown locale: %s' % localename) def _build_localename(localetuple): diff --git a/Lib/test/test_locale.py b/Lib/test/test_locale.py index 792a15c50f9..c5d8e269d63 100644 --- a/Lib/test/test_locale.py +++ b/Lib/test/test_locale.py @@ -493,6 +493,42 @@ class NormalizeTest(unittest.TestCase): class TestMiscellaneous(unittest.TestCase): + def test_defaults_UTF8(self): + # Issue #18378: on (at least) macOS setting LC_CTYPE to "UTF-8" is + # valid. Futhermore LC_CTYPE=UTF is used by the UTF-8 locale coercing + # during interpreter startup (on macOS). + import _locale + import os + + self.assertEqual(locale._parse_localename('UTF-8'), (None, 'UTF-8')) + + if hasattr(_locale, '_getdefaultlocale'): + orig_getlocale = _locale._getdefaultlocale + del _locale._getdefaultlocale + else: + orig_getlocale = None + + orig_env = {} + try: + for key in ('LC_ALL', 'LC_CTYPE', 'LANG', 'LANGUAGE'): + if key in os.environ: + orig_env[key] = os.environ[key] + del os.environ[key] + + os.environ['LC_CTYPE'] = 'UTF-8' + + self.assertEqual(locale.getdefaultlocale(), (None, 'UTF-8')) + + finally: + for k in orig_env: + os.environ[k] = orig_env[k] + + if 'LC_CTYPE' not in orig_env: + del os.environ['LC_CTYPE'] + + if orig_getlocale is not None: + _locale._getdefaultlocale = orig_getlocale + def test_getpreferredencoding(self): # Invoke getpreferredencoding to make sure it does not cause exceptions. enc = locale.getpreferredencoding() diff --git a/Misc/NEWS.d/next/Library/2019-07-13-13-40-12.bpo-18378.NHcojp.rst b/Misc/NEWS.d/next/Library/2019-07-13-13-40-12.bpo-18378.NHcojp.rst new file mode 100644 index 00000000000..6dda8abf15d --- /dev/null +++ b/Misc/NEWS.d/next/Library/2019-07-13-13-40-12.bpo-18378.NHcojp.rst @@ -0,0 +1 @@ +Recognize "UTF-8" as a valid value for LC_CTYPE in locale._parse_localename.