mirror of https://github.com/python/cpython
bpo-34523: Py_DecodeLocale() use UTF-8 on Windows (GH-8998)
Py_DecodeLocale() and Py_EncodeLocale() now use the UTF-8 encoding on Windows if Py_LegacyWindowsFSEncodingFlag is zero. pymain_read_conf() now sets Py_LegacyWindowsFSEncodingFlag in its loop, but restore its value at exit.
This commit is contained in:
parent
70fead25e5
commit
c5989cd876
|
@ -109,6 +109,7 @@ Operating System Utilities
|
|||
Encoding, highest priority to lowest priority:
|
||||
|
||||
* ``UTF-8`` on macOS and Android;
|
||||
* ``UTF-8`` on Windows if :c:data:`Py_LegacyWindowsFSEncodingFlag` is zero;
|
||||
* ``UTF-8`` if the Python UTF-8 mode is enabled;
|
||||
* ``ASCII`` if the ``LC_CTYPE`` locale is ``"C"``,
|
||||
``nl_langinfo(CODESET)`` returns the ``ASCII`` encoding (or an alias),
|
||||
|
@ -140,6 +141,10 @@ Operating System Utilities
|
|||
.. versionchanged:: 3.7
|
||||
The function now uses the UTF-8 encoding in the UTF-8 mode.
|
||||
|
||||
.. versionchanged:: 3.8
|
||||
The function now uses the UTF-8 encoding on Windows if
|
||||
:c:data:`Py_LegacyWindowsFSEncodingFlag` is zero;
|
||||
|
||||
|
||||
.. c:function:: char* Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
|
||||
|
||||
|
@ -150,6 +155,7 @@ Operating System Utilities
|
|||
Encoding, highest priority to lowest priority:
|
||||
|
||||
* ``UTF-8`` on macOS and Android;
|
||||
* ``UTF-8`` on Windows if :c:data:`Py_LegacyWindowsFSEncodingFlag` is zero;
|
||||
* ``UTF-8`` if the Python UTF-8 mode is enabled;
|
||||
* ``ASCII`` if the ``LC_CTYPE`` locale is ``"C"``,
|
||||
``nl_langinfo(CODESET)`` returns the ``ASCII`` encoding (or an alias),
|
||||
|
@ -169,9 +175,6 @@ Operating System Utilities
|
|||
Use the :c:func:`Py_DecodeLocale` function to decode the bytes string back
|
||||
to a wide character string.
|
||||
|
||||
.. versionchanged:: 3.7
|
||||
The function now uses the UTF-8 encoding in the UTF-8 mode.
|
||||
|
||||
.. seealso::
|
||||
|
||||
The :c:func:`PyUnicode_EncodeFSDefault` and
|
||||
|
@ -180,7 +183,11 @@ Operating System Utilities
|
|||
.. versionadded:: 3.5
|
||||
|
||||
.. versionchanged:: 3.7
|
||||
The function now supports the UTF-8 mode.
|
||||
The function now uses the UTF-8 encoding in the UTF-8 mode.
|
||||
|
||||
.. versionchanged:: 3.8
|
||||
The function now uses the UTF-8 encoding on Windows if
|
||||
:c:data:`Py_LegacyWindowsFSEncodingFlag` is zero;
|
||||
|
||||
|
||||
.. _systemfunctions:
|
||||
|
|
|
@ -268,10 +268,10 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
|
|||
'dump_refs': 0,
|
||||
'malloc_stats': 0,
|
||||
|
||||
# None means that the default encoding is read at runtime:
|
||||
# see get_locale_encoding().
|
||||
# None means that the value is get by get_locale_encoding()
|
||||
'filesystem_encoding': None,
|
||||
'filesystem_errors': sys.getfilesystemencodeerrors(),
|
||||
'filesystem_errors': None,
|
||||
|
||||
'utf8_mode': 0,
|
||||
'coerce_c_locale': 0,
|
||||
'coerce_c_locale_warn': 0,
|
||||
|
@ -294,7 +294,8 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
|
|||
'quiet': 0,
|
||||
'user_site_directory': 1,
|
||||
'buffered_stdio': 1,
|
||||
# None means that check_config() gets the expected encoding at runtime
|
||||
|
||||
# None means that the value is get by get_stdio_encoding()
|
||||
'stdio_encoding': None,
|
||||
'stdio_errors': None,
|
||||
|
||||
|
@ -303,7 +304,6 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
|
|||
'_frozen': 0,
|
||||
}
|
||||
|
||||
|
||||
def get_stdio_encoding(self, env):
|
||||
code = 'import sys; print(sys.stdout.encoding, sys.stdout.errors)'
|
||||
args = (sys.executable, '-c', code)
|
||||
|
@ -315,18 +315,12 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
|
|||
out = proc.stdout.rstrip()
|
||||
return out.split()
|
||||
|
||||
def get_locale_encoding(self, isolated):
|
||||
if sys.platform in ('win32', 'darwin') or support.is_android:
|
||||
# Windows, macOS and Android use UTF-8
|
||||
return "utf-8"
|
||||
|
||||
code = ('import codecs, locale, sys',
|
||||
'locale.setlocale(locale.LC_CTYPE, "")',
|
||||
'enc = locale.nl_langinfo(locale.CODESET)',
|
||||
'enc = codecs.lookup(enc).name',
|
||||
'print(enc)')
|
||||
args = (sys.executable, '-c', '; '.join(code))
|
||||
env = dict(os.environ)
|
||||
def get_filesystem_encoding(self, isolated, env):
|
||||
code = ('import codecs, locale, sys; '
|
||||
'print(sys.getfilesystemencoding(), '
|
||||
'sys.getfilesystemencodeerrors())')
|
||||
args = (sys.executable, '-c', code)
|
||||
env = dict(env)
|
||||
if not isolated:
|
||||
env['PYTHONCOERCECLOCALE'] = '0'
|
||||
env['PYTHONUTF8'] = '0'
|
||||
|
@ -336,7 +330,8 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
|
|||
if proc.returncode:
|
||||
raise Exception(f"failed to get the locale encoding: "
|
||||
f"stdout={proc.stdout!r} stderr={proc.stderr!r}")
|
||||
return proc.stdout.rstrip()
|
||||
out = proc.stdout.rstrip()
|
||||
return out.split()
|
||||
|
||||
def check_config(self, testname, expected):
|
||||
expected = dict(self.DEFAULT_CONFIG, **expected)
|
||||
|
@ -356,8 +351,12 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
|
|||
expected['stdio_encoding'] = res[0]
|
||||
if expected['stdio_errors'] is None:
|
||||
expected['stdio_errors'] = res[1]
|
||||
if expected['filesystem_encoding'] is None:
|
||||
expected['filesystem_encoding'] = self.get_locale_encoding(expected['isolated'])
|
||||
if expected['filesystem_encoding'] is None or expected['filesystem_errors'] is None:
|
||||
res = self.get_filesystem_encoding(expected['isolated'], env)
|
||||
if expected['filesystem_encoding'] is None:
|
||||
expected['filesystem_encoding'] = res[0]
|
||||
if expected['filesystem_errors'] is None:
|
||||
expected['filesystem_errors'] = res[1]
|
||||
for key, value in expected.items():
|
||||
expected[key] = str(value)
|
||||
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
Py_DecodeLocale() and Py_EncodeLocale() now use the UTF-8 encoding on
|
||||
Windows if Py_LegacyWindowsFSEncodingFlag is zero.
|
|
@ -1287,6 +1287,9 @@ pymain_read_conf(_PyMain *pymain, _PyCoreConfig *config,
|
|||
_PyCmdline *cmdline)
|
||||
{
|
||||
int init_utf8_mode = Py_UTF8Mode;
|
||||
#ifdef MS_WINDOWS
|
||||
int init_legacy_encoding = Py_LegacyWindowsFSEncodingFlag;
|
||||
#endif
|
||||
_PyCoreConfig save_config = _PyCoreConfig_INIT;
|
||||
int res = -1;
|
||||
|
||||
|
@ -1313,9 +1316,12 @@ pymain_read_conf(_PyMain *pymain, _PyCoreConfig *config,
|
|||
goto done;
|
||||
}
|
||||
|
||||
/* bpo-34207: Py_DecodeLocale(), Py_EncodeLocale() and similar
|
||||
functions depend on Py_UTF8Mode. */
|
||||
/* bpo-34207: Py_DecodeLocale() and Py_EncodeLocale() depend
|
||||
on Py_UTF8Mode and Py_LegacyWindowsFSEncodingFlag. */
|
||||
Py_UTF8Mode = config->utf8_mode;
|
||||
#ifdef MS_WINDOWS
|
||||
Py_LegacyWindowsFSEncodingFlag = config->legacy_windows_fs_encoding;
|
||||
#endif
|
||||
|
||||
if (pymain_init_cmdline_argv(pymain, config, cmdline) < 0) {
|
||||
goto done;
|
||||
|
@ -1380,6 +1386,9 @@ pymain_read_conf(_PyMain *pymain, _PyCoreConfig *config,
|
|||
done:
|
||||
_PyCoreConfig_Clear(&save_config);
|
||||
Py_UTF8Mode = init_utf8_mode ;
|
||||
#ifdef MS_WINDOWS
|
||||
Py_LegacyWindowsFSEncodingFlag = init_legacy_encoding;
|
||||
#endif
|
||||
return res;
|
||||
}
|
||||
|
||||
|
|
|
@ -499,9 +499,13 @@ _Py_DecodeLocaleEx(const char* arg, wchar_t **wstr, size_t *wlen,
|
|||
return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
|
||||
surrogateescape);
|
||||
#else
|
||||
if (Py_UTF8Mode == 1) {
|
||||
return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
|
||||
surrogateescape);
|
||||
int use_utf8 = (Py_UTF8Mode == 1);
|
||||
#ifdef MS_WINDOWS
|
||||
use_utf8 |= !Py_LegacyWindowsFSEncodingFlag;
|
||||
#endif
|
||||
if (use_utf8) {
|
||||
return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen,
|
||||
reason, surrogateescape);
|
||||
}
|
||||
|
||||
#ifdef USE_FORCE_ASCII
|
||||
|
@ -661,7 +665,11 @@ encode_locale_ex(const wchar_t *text, char **str, size_t *error_pos,
|
|||
return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
|
||||
raw_malloc, surrogateescape);
|
||||
#else /* __APPLE__ */
|
||||
if (Py_UTF8Mode == 1) {
|
||||
int use_utf8 = (Py_UTF8Mode == 1);
|
||||
#ifdef MS_WINDOWS
|
||||
use_utf8 |= !Py_LegacyWindowsFSEncodingFlag;
|
||||
#endif
|
||||
if (use_utf8) {
|
||||
return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
|
||||
raw_malloc, surrogateescape);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue