bpo-42208: Add _locale._get_locale_encoding() (GH-23052)

* Add a new _locale._get_locale_encoding() function to get the
  current locale encoding.
* Modify locale.getpreferredencoding() to use it.
* Remove the _bootlocale module.
This commit is contained in:
Victor Stinner 2020-10-31 01:32:11 +01:00 committed by GitHub
parent 710e826307
commit b62bdf71ea
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 86 additions and 103 deletions

View File

@ -1,46 +0,0 @@
"""A minimal subset of the locale module used at interpreter startup
(imported by the _io module), in order to reduce startup time.
Don't import directly from third-party code; use the `locale` module instead!
"""
import sys
import _locale
if sys.platform.startswith("win"):
def getpreferredencoding(do_setlocale=True):
if sys.flags.utf8_mode:
return 'UTF-8'
return _locale._getdefaultlocale()[1]
else:
try:
_locale.CODESET
except AttributeError:
if hasattr(sys, 'getandroidapilevel'):
# On Android langinfo.h and CODESET are missing, and UTF-8 is
# always used in mbstowcs() and wcstombs().
def getpreferredencoding(do_setlocale=True):
return 'UTF-8'
else:
def getpreferredencoding(do_setlocale=True):
if sys.flags.utf8_mode:
return 'UTF-8'
# This path for legacy systems needs the more complex
# getdefaultlocale() function, import the full locale module.
import locale
return locale.getpreferredencoding(do_setlocale)
else:
def getpreferredencoding(do_setlocale=True):
assert not do_setlocale
if sys.flags.utf8_mode:
return 'UTF-8'
result = _locale.nl_langinfo(_locale.CODESET)
if not result and sys.platform == 'darwin':
# nl_langinfo can return an empty string
# when the setting has an invalid value.
# Default to UTF-8 in that case because
# UTF-8 is the default charset on OSX and
# returning nothing will crash the
# interpreter.
result = 'UTF-8'
return result

View File

@ -619,53 +619,49 @@ def resetlocale(category=LC_ALL):
""" """
_setlocale(category, _build_localename(getdefaultlocale())) _setlocale(category, _build_localename(getdefaultlocale()))
if sys.platform.startswith("win"):
# On Win32, this will return the ANSI code page
def getpreferredencoding(do_setlocale = True):
"""Return the charset that the user is likely using."""
if sys.flags.utf8_mode:
return 'UTF-8'
import _bootlocale
return _bootlocale.getpreferredencoding(False)
else:
# On Unix, if CODESET is available, use that.
try: try:
CODESET from _locale import _get_locale_encoding
except NameError: except ImportError:
def _get_locale_encoding():
if hasattr(sys, 'getandroidapilevel'): if hasattr(sys, 'getandroidapilevel'):
# On Android langinfo.h and CODESET are missing, and UTF-8 is # On Android langinfo.h and CODESET are missing, and UTF-8 is
# always used in mbstowcs() and wcstombs(). # always used in mbstowcs() and wcstombs().
def getpreferredencoding(do_setlocale = True):
return 'UTF-8' return 'UTF-8'
else:
# Fall back to parsing environment variables :-(
def getpreferredencoding(do_setlocale = True):
"""Return the charset that the user is likely using,
by looking at environment variables."""
if sys.flags.utf8_mode: if sys.flags.utf8_mode:
return 'UTF-8' return 'UTF-8'
res = getdefaultlocale()[1] encoding = getdefaultlocale()[1]
if res is None: if encoding is None:
# LANG not set, default conservatively to ASCII # LANG not set, default conservatively to ASCII
res = 'ascii' encoding = 'ascii'
return res return encoding
try:
CODESET
except NameError:
def getpreferredencoding(do_setlocale=True):
"""Return the charset that the user is likely using."""
return _get_locale_encoding()
else: else:
# On Unix, if CODESET is available, use that.
def getpreferredencoding(do_setlocale=True): def getpreferredencoding(do_setlocale=True):
"""Return the charset that the user is likely using, """Return the charset that the user is likely using,
according to the system configuration.""" according to the system configuration."""
if sys.flags.utf8_mode: if sys.flags.utf8_mode:
return 'UTF-8' return 'UTF-8'
import _bootlocale
if do_setlocale: if not do_setlocale:
oldloc = setlocale(LC_CTYPE) return _get_locale_encoding()
old_loc = setlocale(LC_CTYPE)
try:
try: try:
setlocale(LC_CTYPE, "") setlocale(LC_CTYPE, "")
except Error: except Error:
pass pass
result = _bootlocale.getpreferredencoding(False) return _get_locale_encoding()
if do_setlocale: finally:
setlocale(LC_CTYPE, oldloc) setlocale(LC_CTYPE, old_loc)
return result
### Database ### Database

View File

@ -3,7 +3,7 @@ import locale
import mimetypes import mimetypes
import pathlib import pathlib
import sys import sys
import unittest import unittest.mock
from test import support from test import support
from test.support import os_helper from test.support import os_helper
@ -71,13 +71,13 @@ class MimeTypesTestCase(unittest.TestCase):
# bpo-41048: read_mime_types should read the rule file with 'utf-8' encoding. # bpo-41048: read_mime_types should read the rule file with 'utf-8' encoding.
# Not with locale encoding. _bootlocale has been imported because io.open(...) # Not with locale encoding. _bootlocale has been imported because io.open(...)
# uses it. # uses it.
with os_helper.temp_dir() as directory:
data = "application/no-mans-land Fran\u00E7ais" data = "application/no-mans-land Fran\u00E7ais"
file = pathlib.Path(directory, "sample.mimetype") filename = "filename"
file.write_text(data, encoding='utf-8') fp = io.StringIO(data)
import _bootlocale with unittest.mock.patch.object(mimetypes, 'open',
with support.swap_attr(_bootlocale, 'getpreferredencoding', lambda do_setlocale=True: 'ASCII'): return_value=fp) as mock_open:
mime_dict = mimetypes.read_mime_types(file) mime_dict = mimetypes.read_mime_types(filename)
mock_open.assert_called_with(filename, encoding='utf-8')
eq(mime_dict[".Français"], "application/no-mans-land") eq(mime_dict[".Français"], "application/no-mans-land")
def test_non_standard_types(self): def test_non_standard_types(self):

View File

@ -768,9 +768,24 @@ _locale_bind_textdomain_codeset_impl(PyObject *module, const char *domain,
} }
Py_RETURN_NONE; Py_RETURN_NONE;
} }
#endif #endif // HAVE_BIND_TEXTDOMAIN_CODESET
#endif // HAVE_LIBINTL_H
/*[clinic input]
_locale._get_locale_encoding
Get the current locale encoding.
[clinic start generated code]*/
static PyObject *
_locale__get_locale_encoding_impl(PyObject *module)
/*[clinic end generated code: output=e8e2f6f6f184591a input=513d9961d2f45c76]*/
{
return _Py_GetLocaleEncoding();
}
#endif
static struct PyMethodDef PyLocale_Methods[] = { static struct PyMethodDef PyLocale_Methods[] = {
_LOCALE_SETLOCALE_METHODDEF _LOCALE_SETLOCALE_METHODDEF
@ -797,6 +812,7 @@ static struct PyMethodDef PyLocale_Methods[] = {
_LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF _LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF
#endif #endif
#endif #endif
_LOCALE__GET_LOCALE_ENCODING_METHODDEF
{NULL, NULL} {NULL, NULL}
}; };

View File

@ -545,6 +545,24 @@ exit:
#endif /* defined(HAVE_LIBINTL_H) && defined(HAVE_BIND_TEXTDOMAIN_CODESET) */ #endif /* defined(HAVE_LIBINTL_H) && defined(HAVE_BIND_TEXTDOMAIN_CODESET) */
PyDoc_STRVAR(_locale__get_locale_encoding__doc__,
"_get_locale_encoding($module, /)\n"
"--\n"
"\n"
"Get the current locale encoding.");
#define _LOCALE__GET_LOCALE_ENCODING_METHODDEF \
{"_get_locale_encoding", (PyCFunction)_locale__get_locale_encoding, METH_NOARGS, _locale__get_locale_encoding__doc__},
static PyObject *
_locale__get_locale_encoding_impl(PyObject *module);
static PyObject *
_locale__get_locale_encoding(PyObject *module, PyObject *Py_UNUSED(ignored))
{
return _locale__get_locale_encoding_impl(module);
}
#ifndef _LOCALE_STRCOLL_METHODDEF #ifndef _LOCALE_STRCOLL_METHODDEF
#define _LOCALE_STRCOLL_METHODDEF #define _LOCALE_STRCOLL_METHODDEF
#endif /* !defined(_LOCALE_STRCOLL_METHODDEF) */ #endif /* !defined(_LOCALE_STRCOLL_METHODDEF) */
@ -584,4 +602,4 @@ exit:
#ifndef _LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF #ifndef _LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF
#define _LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF #define _LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF
#endif /* !defined(_LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF) */ #endif /* !defined(_LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF) */
/*[clinic end generated code: output=fe944779cd572d8e input=a9049054013a1b77]*/ /*[clinic end generated code: output=cd703c8a3a75fcf4 input=a9049054013a1b77]*/

View File

@ -1572,7 +1572,6 @@
<Compile Include="zoneinfo\__init__.py" /> <Compile Include="zoneinfo\__init__.py" />
<Compile Include="zoneinfo\_tzpath.py" /> <Compile Include="zoneinfo\_tzpath.py" />
<Compile Include="zoneinfo\_zoneinfo.py" /> <Compile Include="zoneinfo\_zoneinfo.py" />
<Compile Include="_bootlocale.py" />
<Compile Include="_collections_abc.py" /> <Compile Include="_collections_abc.py" />
<Compile Include="_compat_pickle.py" /> <Compile Include="_compat_pickle.py" />
<Compile Include="_compression.py" /> <Compile Include="_compression.py" />