From fd4722cacf4885c29d358b8de6718b51a8149fa3 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Sat, 12 Oct 2013 00:13:50 +0200 Subject: [PATCH] Issue #9548: Add a minimal "_bootlocale" module that is imported by the _io module instead of the full locale module. --- Lib/_bootlocale.py | 34 ++++++++++++++++++++++++++++++++++ Lib/locale.py | 21 +++++---------------- Lib/site.py | 4 ++-- Lib/test/test_subprocess.py | 7 ++++--- Misc/NEWS | 3 +++ Modules/_io/_iomodule.c | 2 +- 6 files changed, 49 insertions(+), 22 deletions(-) create mode 100644 Lib/_bootlocale.py diff --git a/Lib/_bootlocale.py b/Lib/_bootlocale.py new file mode 100644 index 00000000000..efe402375ae --- /dev/null +++ b/Lib/_bootlocale.py @@ -0,0 +1,34 @@ +"""A minimal subset of the locale module used at interpreter startup +(imported by the _io module), in order to reduce startup time. + +Don't import directly from third-party code; use the `locale` module instead! +""" + +import sys +import _locale + +if sys.platform.startswith("win"): + def getpreferredencoding(do_setlocale=True): + return _locale._getdefaultlocale()[1] +else: + try: + _locale.CODESET + except ImportError: + def getpreferredencoding(do_setlocale=True): + # This path for legacy systems needs the more complex + # getdefaultlocale() function, import the full locale module. + import locale + return locale.getpreferredencoding(do_setlocale) + else: + def getpreferredencoding(do_setlocale=True): + assert not do_setlocale + result = _locale.nl_langinfo(_locale.CODESET) + if not result and sys.platform == 'darwin': + # nl_langinfo can return an empty string + # when the setting has an invalid value. + # Default to UTF-8 in that case because + # UTF-8 is the default charset on OSX and + # returning nothing will crash the + # interpreter. + result = 'UTF-8' + return result diff --git a/Lib/locale.py b/Lib/locale.py index d2a885d519f..2e82c952ace 100644 --- a/Lib/locale.py +++ b/Lib/locale.py @@ -554,8 +554,8 @@ if sys.platform.startswith("win"): # On Win32, this will return the ANSI code page def getpreferredencoding(do_setlocale = True): """Return the charset that the user is likely using.""" - import _locale - return _locale._getdefaultlocale()[1] + import _bootlocale + return _bootlocale.getpreferredencoding(False) else: # On Unix, if CODESET is available, use that. try: @@ -574,27 +574,16 @@ else: def getpreferredencoding(do_setlocale = True): """Return the charset that the user is likely using, according to the system configuration.""" + import _bootlocale if do_setlocale: oldloc = setlocale(LC_CTYPE) try: setlocale(LC_CTYPE, "") except Error: pass - result = nl_langinfo(CODESET) - if not result and sys.platform == 'darwin': - # nl_langinfo can return an empty string - # when the setting has an invalid value. - # Default to UTF-8 in that case because - # UTF-8 is the default charset on OSX and - # returning nothing will crash the - # interpreter. - result = 'UTF-8' + result = _bootlocale.getpreferredencoding(False) + if do_setlocale: setlocale(LC_CTYPE, oldloc) - else: - result = nl_langinfo(CODESET) - if not result and sys.platform == 'darwin': - # See above for explanation - result = 'UTF-8' return result diff --git a/Lib/site.py b/Lib/site.py index e1fa30eacdf..4ac2860003e 100644 --- a/Lib/site.py +++ b/Lib/site.py @@ -426,8 +426,8 @@ def aliasmbcs(): while they are always available as "mbcs" in each locale. Make them usable by aliasing to "mbcs" in such a case.""" if sys.platform == 'win32': - import locale, codecs - enc = locale.getdefaultlocale()[1] + import _bootlocale, codecs + enc = _bootlocale.getpreferredencoding(False) if enc.startswith('cp'): # "cp***" ? try: codecs.lookup(enc) diff --git a/Lib/test/test_subprocess.py b/Lib/test/test_subprocess.py index 637b1bff856..840577dba1e 100644 --- a/Lib/test/test_subprocess.py +++ b/Lib/test/test_subprocess.py @@ -883,8 +883,9 @@ class ProcessTestCase(BaseTestCase): # # UTF-16 and UTF-32-BE are sufficient to check both with BOM and # without, and UTF-16 and UTF-32. + import _bootlocale for encoding in ['utf-16', 'utf-32-be']: - old_getpreferredencoding = locale.getpreferredencoding + old_getpreferredencoding = _bootlocale.getpreferredencoding # Indirectly via io.TextIOWrapper, Popen() defaults to # locale.getpreferredencoding(False) and earlier in Python 3.2 to # locale.getpreferredencoding(). @@ -895,7 +896,7 @@ class ProcessTestCase(BaseTestCase): encoding) args = [sys.executable, '-c', code] try: - locale.getpreferredencoding = getpreferredencoding + _bootlocale.getpreferredencoding = getpreferredencoding # We set stdin to be non-None because, as of this writing, # a different code path is used when the number of pipes is # zero or one. @@ -904,7 +905,7 @@ class ProcessTestCase(BaseTestCase): stdout=subprocess.PIPE) stdout, stderr = popen.communicate(input='') finally: - locale.getpreferredencoding = old_getpreferredencoding + _bootlocale.getpreferredencoding = old_getpreferredencoding self.assertEqual(stdout, '1\n2\n3\n4') def test_no_leaking(self): diff --git a/Misc/NEWS b/Misc/NEWS index 9fe1fa6566a..18df7bd955e 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -36,6 +36,9 @@ Core and Builtins Library ------- +- Issue #9548: Add a minimal "_bootlocale" module that is imported by the + _io module instead of the full locale module. + - Issue #18764: remove the 'print' alias for the PDB 'p' command so that it no longer shadows the print function. diff --git a/Modules/_io/_iomodule.c b/Modules/_io/_iomodule.c index 14457e80cbe..eb701d4400f 100644 --- a/Modules/_io/_iomodule.c +++ b/Modules/_io/_iomodule.c @@ -546,7 +546,7 @@ _PyIO_get_locale_module(_PyIO_State *state) } Py_CLEAR(state->locale_module); } - mod = PyImport_ImportModule("locale"); + mod = PyImport_ImportModule("_bootlocale"); if (mod == NULL) return NULL; state->locale_module = PyWeakref_NewRef(mod, NULL);