diff --git a/Lib/_bootlocale.py b/Lib/_bootlocale.py
deleted file mode 100644
index 3273a3b4225..00000000000
--- a/Lib/_bootlocale.py
+++ /dev/null
@@ -1,46 +0,0 @@
-"""A minimal subset of the locale module used at interpreter startup
-(imported by the _io module), in order to reduce startup time.
-
-Don't import directly from third-party code; use the `locale` module instead!
-"""
-
-import sys
-import _locale
-
-if sys.platform.startswith("win"):
- def getpreferredencoding(do_setlocale=True):
- if sys.flags.utf8_mode:
- return 'UTF-8'
- return _locale._getdefaultlocale()[1]
-else:
- try:
- _locale.CODESET
- except AttributeError:
- if hasattr(sys, 'getandroidapilevel'):
- # On Android langinfo.h and CODESET are missing, and UTF-8 is
- # always used in mbstowcs() and wcstombs().
- def getpreferredencoding(do_setlocale=True):
- return 'UTF-8'
- else:
- def getpreferredencoding(do_setlocale=True):
- if sys.flags.utf8_mode:
- return 'UTF-8'
- # This path for legacy systems needs the more complex
- # getdefaultlocale() function, import the full locale module.
- import locale
- return locale.getpreferredencoding(do_setlocale)
- else:
- def getpreferredencoding(do_setlocale=True):
- assert not do_setlocale
- if sys.flags.utf8_mode:
- return 'UTF-8'
- result = _locale.nl_langinfo(_locale.CODESET)
- if not result and sys.platform == 'darwin':
- # nl_langinfo can return an empty string
- # when the setting has an invalid value.
- # Default to UTF-8 in that case because
- # UTF-8 is the default charset on OSX and
- # returning nothing will crash the
- # interpreter.
- result = 'UTF-8'
- return result
diff --git a/Lib/locale.py b/Lib/locale.py
index 1a4e9f694f3..ee841e8b865 100644
--- a/Lib/locale.py
+++ b/Lib/locale.py
@@ -619,53 +619,49 @@ def resetlocale(category=LC_ALL):
"""
_setlocale(category, _build_localename(getdefaultlocale()))
-if sys.platform.startswith("win"):
- # On Win32, this will return the ANSI code page
- def getpreferredencoding(do_setlocale = True):
- """Return the charset that the user is likely using."""
- if sys.flags.utf8_mode:
- return 'UTF-8'
- import _bootlocale
- return _bootlocale.getpreferredencoding(False)
-else:
- # On Unix, if CODESET is available, use that.
- try:
- CODESET
- except NameError:
+
+try:
+ from _locale import _get_locale_encoding
+except ImportError:
+ def _get_locale_encoding():
if hasattr(sys, 'getandroidapilevel'):
# On Android langinfo.h and CODESET are missing, and UTF-8 is
# always used in mbstowcs() and wcstombs().
- def getpreferredencoding(do_setlocale = True):
- return 'UTF-8'
- else:
- # Fall back to parsing environment variables :-(
- def getpreferredencoding(do_setlocale = True):
- """Return the charset that the user is likely using,
- by looking at environment variables."""
- if sys.flags.utf8_mode:
- return 'UTF-8'
- res = getdefaultlocale()[1]
- if res is None:
- # LANG not set, default conservatively to ASCII
- res = 'ascii'
- return res
- else:
- def getpreferredencoding(do_setlocale = True):
- """Return the charset that the user is likely using,
- according to the system configuration."""
- if sys.flags.utf8_mode:
- return 'UTF-8'
- import _bootlocale
- if do_setlocale:
- oldloc = setlocale(LC_CTYPE)
- try:
- setlocale(LC_CTYPE, "")
- except Error:
- pass
- result = _bootlocale.getpreferredencoding(False)
- if do_setlocale:
- setlocale(LC_CTYPE, oldloc)
- return result
+ return 'UTF-8'
+ if sys.flags.utf8_mode:
+ return 'UTF-8'
+ encoding = getdefaultlocale()[1]
+ if encoding is None:
+ # LANG not set, default conservatively to ASCII
+ encoding = 'ascii'
+ return encoding
+
+try:
+ CODESET
+except NameError:
+ def getpreferredencoding(do_setlocale=True):
+ """Return the charset that the user is likely using."""
+ return _get_locale_encoding()
+else:
+ # On Unix, if CODESET is available, use that.
+ def getpreferredencoding(do_setlocale=True):
+ """Return the charset that the user is likely using,
+ according to the system configuration."""
+ if sys.flags.utf8_mode:
+ return 'UTF-8'
+
+ if not do_setlocale:
+ return _get_locale_encoding()
+
+ old_loc = setlocale(LC_CTYPE)
+ try:
+ try:
+ setlocale(LC_CTYPE, "")
+ except Error:
+ pass
+ return _get_locale_encoding()
+ finally:
+ setlocale(LC_CTYPE, old_loc)
### Database
diff --git a/Lib/test/test_mimetypes.py b/Lib/test/test_mimetypes.py
index ddeae38e137..d63f6b66e10 100644
--- a/Lib/test/test_mimetypes.py
+++ b/Lib/test/test_mimetypes.py
@@ -3,7 +3,7 @@ import locale
import mimetypes
import pathlib
import sys
-import unittest
+import unittest.mock
from test import support
from test.support import os_helper
@@ -71,14 +71,14 @@ class MimeTypesTestCase(unittest.TestCase):
# bpo-41048: read_mime_types should read the rule file with 'utf-8' encoding.
# Not with locale encoding. _bootlocale has been imported because io.open(...)
# uses it.
- with os_helper.temp_dir() as directory:
- data = "application/no-mans-land Fran\u00E7ais"
- file = pathlib.Path(directory, "sample.mimetype")
- file.write_text(data, encoding='utf-8')
- import _bootlocale
- with support.swap_attr(_bootlocale, 'getpreferredencoding', lambda do_setlocale=True: 'ASCII'):
- mime_dict = mimetypes.read_mime_types(file)
- eq(mime_dict[".Français"], "application/no-mans-land")
+ data = "application/no-mans-land Fran\u00E7ais"
+ filename = "filename"
+ fp = io.StringIO(data)
+ with unittest.mock.patch.object(mimetypes, 'open',
+ return_value=fp) as mock_open:
+ mime_dict = mimetypes.read_mime_types(filename)
+ mock_open.assert_called_with(filename, encoding='utf-8')
+ eq(mime_dict[".Français"], "application/no-mans-land")
def test_non_standard_types(self):
eq = self.assertEqual
diff --git a/Modules/_localemodule.c b/Modules/_localemodule.c
index 9c7ce876e40..359deb75440 100644
--- a/Modules/_localemodule.c
+++ b/Modules/_localemodule.c
@@ -768,9 +768,24 @@ _locale_bind_textdomain_codeset_impl(PyObject *module, const char *domain,
}
Py_RETURN_NONE;
}
-#endif
+#endif // HAVE_BIND_TEXTDOMAIN_CODESET
+
+#endif // HAVE_LIBINTL_H
+
+
+/*[clinic input]
+_locale._get_locale_encoding
+
+Get the current locale encoding.
+[clinic start generated code]*/
+
+static PyObject *
+_locale__get_locale_encoding_impl(PyObject *module)
+/*[clinic end generated code: output=e8e2f6f6f184591a input=513d9961d2f45c76]*/
+{
+ return _Py_GetLocaleEncoding();
+}
-#endif
static struct PyMethodDef PyLocale_Methods[] = {
_LOCALE_SETLOCALE_METHODDEF
@@ -797,6 +812,7 @@ static struct PyMethodDef PyLocale_Methods[] = {
_LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF
#endif
#endif
+ _LOCALE__GET_LOCALE_ENCODING_METHODDEF
{NULL, NULL}
};
diff --git a/Modules/clinic/_localemodule.c.h b/Modules/clinic/_localemodule.c.h
index 5d1db3ece79..703d034c32e 100644
--- a/Modules/clinic/_localemodule.c.h
+++ b/Modules/clinic/_localemodule.c.h
@@ -545,6 +545,24 @@ exit:
#endif /* defined(HAVE_LIBINTL_H) && defined(HAVE_BIND_TEXTDOMAIN_CODESET) */
+PyDoc_STRVAR(_locale__get_locale_encoding__doc__,
+"_get_locale_encoding($module, /)\n"
+"--\n"
+"\n"
+"Get the current locale encoding.");
+
+#define _LOCALE__GET_LOCALE_ENCODING_METHODDEF \
+ {"_get_locale_encoding", (PyCFunction)_locale__get_locale_encoding, METH_NOARGS, _locale__get_locale_encoding__doc__},
+
+static PyObject *
+_locale__get_locale_encoding_impl(PyObject *module);
+
+static PyObject *
+_locale__get_locale_encoding(PyObject *module, PyObject *Py_UNUSED(ignored))
+{
+ return _locale__get_locale_encoding_impl(module);
+}
+
#ifndef _LOCALE_STRCOLL_METHODDEF
#define _LOCALE_STRCOLL_METHODDEF
#endif /* !defined(_LOCALE_STRCOLL_METHODDEF) */
@@ -584,4 +602,4 @@ exit:
#ifndef _LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF
#define _LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF
#endif /* !defined(_LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF) */
-/*[clinic end generated code: output=fe944779cd572d8e input=a9049054013a1b77]*/
+/*[clinic end generated code: output=cd703c8a3a75fcf4 input=a9049054013a1b77]*/
diff --git a/PCbuild/lib.pyproj b/PCbuild/lib.pyproj
index f0c51edb9d1..a15165d92ef 100644
--- a/PCbuild/lib.pyproj
+++ b/PCbuild/lib.pyproj
@@ -1572,7 +1572,6 @@
-