bpo-42208: Add _locale._get_locale_encoding() (GH-23052)
* Add a new _locale._get_locale_encoding() function to get the current locale encoding. * Modify locale.getpreferredencoding() to use it. * Remove the _bootlocale module.
This commit is contained in:
parent
710e826307
commit
b62bdf71ea
|
@ -1,46 +0,0 @@
|
|||
"""A minimal subset of the locale module used at interpreter startup
|
||||
(imported by the _io module), in order to reduce startup time.
|
||||
|
||||
Don't import directly from third-party code; use the `locale` module instead!
|
||||
"""
|
||||
|
||||
import sys
|
||||
import _locale
|
||||
|
||||
if sys.platform.startswith("win"):
|
||||
def getpreferredencoding(do_setlocale=True):
|
||||
if sys.flags.utf8_mode:
|
||||
return 'UTF-8'
|
||||
return _locale._getdefaultlocale()[1]
|
||||
else:
|
||||
try:
|
||||
_locale.CODESET
|
||||
except AttributeError:
|
||||
if hasattr(sys, 'getandroidapilevel'):
|
||||
# On Android langinfo.h and CODESET are missing, and UTF-8 is
|
||||
# always used in mbstowcs() and wcstombs().
|
||||
def getpreferredencoding(do_setlocale=True):
|
||||
return 'UTF-8'
|
||||
else:
|
||||
def getpreferredencoding(do_setlocale=True):
|
||||
if sys.flags.utf8_mode:
|
||||
return 'UTF-8'
|
||||
# This path for legacy systems needs the more complex
|
||||
# getdefaultlocale() function, import the full locale module.
|
||||
import locale
|
||||
return locale.getpreferredencoding(do_setlocale)
|
||||
else:
|
||||
def getpreferredencoding(do_setlocale=True):
|
||||
assert not do_setlocale
|
||||
if sys.flags.utf8_mode:
|
||||
return 'UTF-8'
|
||||
result = _locale.nl_langinfo(_locale.CODESET)
|
||||
if not result and sys.platform == 'darwin':
|
||||
# nl_langinfo can return an empty string
|
||||
# when the setting has an invalid value.
|
||||
# Default to UTF-8 in that case because
|
||||
# UTF-8 is the default charset on OSX and
|
||||
# returning nothing will crash the
|
||||
# interpreter.
|
||||
result = 'UTF-8'
|
||||
return result
|
|
@ -619,53 +619,49 @@ def resetlocale(category=LC_ALL):
|
|||
"""
|
||||
_setlocale(category, _build_localename(getdefaultlocale()))
|
||||
|
||||
if sys.platform.startswith("win"):
|
||||
# On Win32, this will return the ANSI code page
|
||||
def getpreferredencoding(do_setlocale = True):
|
||||
"""Return the charset that the user is likely using."""
|
||||
if sys.flags.utf8_mode:
|
||||
return 'UTF-8'
|
||||
import _bootlocale
|
||||
return _bootlocale.getpreferredencoding(False)
|
||||
else:
|
||||
# On Unix, if CODESET is available, use that.
|
||||
try:
|
||||
CODESET
|
||||
except NameError:
|
||||
|
||||
try:
|
||||
from _locale import _get_locale_encoding
|
||||
except ImportError:
|
||||
def _get_locale_encoding():
|
||||
if hasattr(sys, 'getandroidapilevel'):
|
||||
# On Android langinfo.h and CODESET are missing, and UTF-8 is
|
||||
# always used in mbstowcs() and wcstombs().
|
||||
def getpreferredencoding(do_setlocale = True):
|
||||
return 'UTF-8'
|
||||
else:
|
||||
# Fall back to parsing environment variables :-(
|
||||
def getpreferredencoding(do_setlocale = True):
|
||||
"""Return the charset that the user is likely using,
|
||||
by looking at environment variables."""
|
||||
if sys.flags.utf8_mode:
|
||||
return 'UTF-8'
|
||||
res = getdefaultlocale()[1]
|
||||
if res is None:
|
||||
# LANG not set, default conservatively to ASCII
|
||||
res = 'ascii'
|
||||
return res
|
||||
else:
|
||||
def getpreferredencoding(do_setlocale = True):
|
||||
"""Return the charset that the user is likely using,
|
||||
according to the system configuration."""
|
||||
if sys.flags.utf8_mode:
|
||||
return 'UTF-8'
|
||||
import _bootlocale
|
||||
if do_setlocale:
|
||||
oldloc = setlocale(LC_CTYPE)
|
||||
try:
|
||||
setlocale(LC_CTYPE, "")
|
||||
except Error:
|
||||
pass
|
||||
result = _bootlocale.getpreferredencoding(False)
|
||||
if do_setlocale:
|
||||
setlocale(LC_CTYPE, oldloc)
|
||||
return result
|
||||
return 'UTF-8'
|
||||
if sys.flags.utf8_mode:
|
||||
return 'UTF-8'
|
||||
encoding = getdefaultlocale()[1]
|
||||
if encoding is None:
|
||||
# LANG not set, default conservatively to ASCII
|
||||
encoding = 'ascii'
|
||||
return encoding
|
||||
|
||||
try:
|
||||
CODESET
|
||||
except NameError:
|
||||
def getpreferredencoding(do_setlocale=True):
|
||||
"""Return the charset that the user is likely using."""
|
||||
return _get_locale_encoding()
|
||||
else:
|
||||
# On Unix, if CODESET is available, use that.
|
||||
def getpreferredencoding(do_setlocale=True):
|
||||
"""Return the charset that the user is likely using,
|
||||
according to the system configuration."""
|
||||
if sys.flags.utf8_mode:
|
||||
return 'UTF-8'
|
||||
|
||||
if not do_setlocale:
|
||||
return _get_locale_encoding()
|
||||
|
||||
old_loc = setlocale(LC_CTYPE)
|
||||
try:
|
||||
try:
|
||||
setlocale(LC_CTYPE, "")
|
||||
except Error:
|
||||
pass
|
||||
return _get_locale_encoding()
|
||||
finally:
|
||||
setlocale(LC_CTYPE, old_loc)
|
||||
|
||||
|
||||
### Database
|
||||
|
|
|
@ -3,7 +3,7 @@ import locale
|
|||
import mimetypes
|
||||
import pathlib
|
||||
import sys
|
||||
import unittest
|
||||
import unittest.mock
|
||||
|
||||
from test import support
|
||||
from test.support import os_helper
|
||||
|
@ -71,14 +71,14 @@ class MimeTypesTestCase(unittest.TestCase):
|
|||
# bpo-41048: read_mime_types should read the rule file with 'utf-8' encoding.
|
||||
# Not with locale encoding. _bootlocale has been imported because io.open(...)
|
||||
# uses it.
|
||||
with os_helper.temp_dir() as directory:
|
||||
data = "application/no-mans-land Fran\u00E7ais"
|
||||
file = pathlib.Path(directory, "sample.mimetype")
|
||||
file.write_text(data, encoding='utf-8')
|
||||
import _bootlocale
|
||||
with support.swap_attr(_bootlocale, 'getpreferredencoding', lambda do_setlocale=True: 'ASCII'):
|
||||
mime_dict = mimetypes.read_mime_types(file)
|
||||
eq(mime_dict[".Français"], "application/no-mans-land")
|
||||
data = "application/no-mans-land Fran\u00E7ais"
|
||||
filename = "filename"
|
||||
fp = io.StringIO(data)
|
||||
with unittest.mock.patch.object(mimetypes, 'open',
|
||||
return_value=fp) as mock_open:
|
||||
mime_dict = mimetypes.read_mime_types(filename)
|
||||
mock_open.assert_called_with(filename, encoding='utf-8')
|
||||
eq(mime_dict[".Français"], "application/no-mans-land")
|
||||
|
||||
def test_non_standard_types(self):
|
||||
eq = self.assertEqual
|
||||
|
|
|
@ -768,9 +768,24 @@ _locale_bind_textdomain_codeset_impl(PyObject *module, const char *domain,
|
|||
}
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
#endif
|
||||
#endif // HAVE_BIND_TEXTDOMAIN_CODESET
|
||||
|
||||
#endif // HAVE_LIBINTL_H
|
||||
|
||||
|
||||
/*[clinic input]
|
||||
_locale._get_locale_encoding
|
||||
|
||||
Get the current locale encoding.
|
||||
[clinic start generated code]*/
|
||||
|
||||
static PyObject *
|
||||
_locale__get_locale_encoding_impl(PyObject *module)
|
||||
/*[clinic end generated code: output=e8e2f6f6f184591a input=513d9961d2f45c76]*/
|
||||
{
|
||||
return _Py_GetLocaleEncoding();
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static struct PyMethodDef PyLocale_Methods[] = {
|
||||
_LOCALE_SETLOCALE_METHODDEF
|
||||
|
@ -797,6 +812,7 @@ static struct PyMethodDef PyLocale_Methods[] = {
|
|||
_LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF
|
||||
#endif
|
||||
#endif
|
||||
_LOCALE__GET_LOCALE_ENCODING_METHODDEF
|
||||
{NULL, NULL}
|
||||
};
|
||||
|
||||
|
|
|
@ -545,6 +545,24 @@ exit:
|
|||
|
||||
#endif /* defined(HAVE_LIBINTL_H) && defined(HAVE_BIND_TEXTDOMAIN_CODESET) */
|
||||
|
||||
PyDoc_STRVAR(_locale__get_locale_encoding__doc__,
|
||||
"_get_locale_encoding($module, /)\n"
|
||||
"--\n"
|
||||
"\n"
|
||||
"Get the current locale encoding.");
|
||||
|
||||
#define _LOCALE__GET_LOCALE_ENCODING_METHODDEF \
|
||||
{"_get_locale_encoding", (PyCFunction)_locale__get_locale_encoding, METH_NOARGS, _locale__get_locale_encoding__doc__},
|
||||
|
||||
static PyObject *
|
||||
_locale__get_locale_encoding_impl(PyObject *module);
|
||||
|
||||
static PyObject *
|
||||
_locale__get_locale_encoding(PyObject *module, PyObject *Py_UNUSED(ignored))
|
||||
{
|
||||
return _locale__get_locale_encoding_impl(module);
|
||||
}
|
||||
|
||||
#ifndef _LOCALE_STRCOLL_METHODDEF
|
||||
#define _LOCALE_STRCOLL_METHODDEF
|
||||
#endif /* !defined(_LOCALE_STRCOLL_METHODDEF) */
|
||||
|
@ -584,4 +602,4 @@ exit:
|
|||
#ifndef _LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF
|
||||
#define _LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF
|
||||
#endif /* !defined(_LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF) */
|
||||
/*[clinic end generated code: output=fe944779cd572d8e input=a9049054013a1b77]*/
|
||||
/*[clinic end generated code: output=cd703c8a3a75fcf4 input=a9049054013a1b77]*/
|
||||
|
|
|
@ -1572,7 +1572,6 @@
|
|||
<Compile Include="zoneinfo\__init__.py" />
|
||||
<Compile Include="zoneinfo\_tzpath.py" />
|
||||
<Compile Include="zoneinfo\_zoneinfo.py" />
|
||||
<Compile Include="_bootlocale.py" />
|
||||
<Compile Include="_collections_abc.py" />
|
||||
<Compile Include="_compat_pickle.py" />
|
||||
<Compile Include="_compression.py" />
|
||||
|
|
Loading…
Reference in New Issue