From 3529718925f40d14ed48d281d809187bc7314a14 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 4 Nov 2020 11:20:10 +0100 Subject: [PATCH] bpo-42236: os.device_encoding() respects UTF-8 Mode (GH-23119) On Unix, the os.device_encoding() function now returns 'UTF-8' rather than the device encoding if the Python UTF-8 Mode is enabled. --- Doc/library/os.rst | 8 ++++++ Lib/test/test_utf8_mode.py | 27 +++++++++++++++++++ .../2020-11-02-23-05-17.bpo-42236.aJ6ZBR.rst | 3 +++ Python/fileutils.c | 18 +++++-------- Python/initconfig.c | 13 ++++----- 5 files changed, 52 insertions(+), 17 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2020-11-02-23-05-17.bpo-42236.aJ6ZBR.rst diff --git a/Doc/library/os.rst b/Doc/library/os.rst index f9f35b31243..3ffcfa04ffa 100644 --- a/Doc/library/os.rst +++ b/Doc/library/os.rst @@ -113,6 +113,8 @@ of the UTF-8 encoding: :ref:`error handler ` being enabled for :data:`sys.stdin` and :data:`sys.stdout` (:data:`sys.stderr` continues to use ``backslashreplace`` as it does in the default locale-aware mode) +* On Unix, :func:`os.device_encoding` returns ``'UTF-8'``. rather than the + device encoding. Note that the standard stream settings in UTF-8 mode can be overridden by :envvar:`PYTHONIOENCODING` (just as they can be in the default locale-aware @@ -808,6 +810,12 @@ as internal buffering of data. Return a string describing the encoding of the device associated with *fd* if it is connected to a terminal; else return :const:`None`. + On Unix, if the :ref:`Python UTF-8 Mode ` is enabled, return + ``'UTF-8'`` rather than the device encoding. + + .. versionchanged:: 3.10 + On Unix, the function now implements the Python UTF-8 Mode. + .. function:: dup(fd) diff --git a/Lib/test/test_utf8_mode.py b/Lib/test/test_utf8_mode.py index bdb93457cfc..8b6332ee227 100644 --- a/Lib/test/test_utf8_mode.py +++ b/Lib/test/test_utf8_mode.py @@ -3,11 +3,13 @@ Test the implementation of the PEP 540: the UTF-8 Mode. """ import locale +import subprocess import sys import textwrap import unittest from test import support from test.support.script_helper import assert_python_ok, assert_python_failure +from test.support import os_helper MS_WINDOWS = (sys.platform == 'win32') @@ -250,6 +252,31 @@ class UTF8ModeTests(unittest.TestCase): out = self.get_output('-X', 'utf8', '-E', '-c', code) self.assertEqual(out, '1') + @unittest.skipIf(MS_WINDOWS, + "os.device_encoding() doesn't implement " + "the UTF-8 Mode on Windows") + def test_device_encoding(self): + # Use stdout as TTY + if not sys.stdout.isatty(): + self.skipTest("sys.stdout is not a TTY") + + filename = 'out.txt' + self.addCleanup(os_helper.unlink, filename) + + code = (f'import os, sys; fd = sys.stdout.fileno(); ' + f'out = open({filename!r}, "w", encoding="utf-8"); ' + f'print(os.isatty(fd), os.device_encoding(fd), file=out); ' + f'out.close()') + cmd = [sys.executable, '-X', 'utf8', '-c', code] + # The stdout TTY is inherited to the child process + proc = subprocess.run(cmd, text=True) + self.assertEqual(proc.returncode, 0, proc) + + # In UTF-8 Mode, device_encoding(fd) returns "UTF-8" if fd is a TTY + with open(filename, encoding="utf8") as fp: + out = fp.read().rstrip() + self.assertEqual(out, 'True UTF-8') + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Library/2020-11-02-23-05-17.bpo-42236.aJ6ZBR.rst b/Misc/NEWS.d/next/Library/2020-11-02-23-05-17.bpo-42236.aJ6ZBR.rst new file mode 100644 index 00000000000..15e26203665 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-11-02-23-05-17.bpo-42236.aJ6ZBR.rst @@ -0,0 +1,3 @@ +On Unix, the :func:`os.device_encoding` function now returns ``'UTF-8'`` rather +than the device encoding if the :ref:`Python UTF-8 Mode ` is +enabled. diff --git a/Python/fileutils.c b/Python/fileutils.c index 5177b372882..b589d7390d4 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -55,9 +55,6 @@ get_surrogateescape(_Py_error_handler errors, int *surrogateescape) PyObject * _Py_device_encoding(int fd) { -#if defined(MS_WINDOWS) - UINT cp; -#endif int valid; _Py_BEGIN_SUPPRESS_IPH valid = isatty(fd); @@ -66,6 +63,7 @@ _Py_device_encoding(int fd) Py_RETURN_NONE; #if defined(MS_WINDOWS) + UINT cp; if (fd == 0) cp = GetConsoleCP(); else if (fd == 1 || fd == 2) @@ -74,16 +72,14 @@ _Py_device_encoding(int fd) cp = 0; /* GetConsoleCP() and GetConsoleOutputCP() return 0 if the application has no console */ - if (cp != 0) - return PyUnicode_FromFormat("cp%u", (unsigned int)cp); -#elif defined(CODESET) - { - char *codeset = nl_langinfo(CODESET); - if (codeset != NULL && codeset[0] != 0) - return PyUnicode_FromString(codeset); + if (cp == 0) { + Py_RETURN_NONE; } + + return PyUnicode_FromFormat("cp%u", (unsigned int)cp); +#else + return _Py_GetLocaleEncodingObject(); #endif - Py_RETURN_NONE; } #if !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS) diff --git a/Python/initconfig.c b/Python/initconfig.c index 7bb28ed01f1..15fb3e4d287 100644 --- a/Python/initconfig.c +++ b/Python/initconfig.c @@ -1515,8 +1515,8 @@ config_init_stdio_encoding(PyConfig *config, { PyStatus status; - /* If Py_SetStandardStreamEncoding() have been called, use these - parameters. */ + /* If Py_SetStandardStreamEncoding() has been called, use its + arguments if they are not NULL. */ if (config->stdio_encoding == NULL && _Py_StandardStreamEncoding != NULL) { status = CONFIG_SET_BYTES_STR(config, &config->stdio_encoding, _Py_StandardStreamEncoding, @@ -1535,6 +1535,7 @@ config_init_stdio_encoding(PyConfig *config, } } + // Exit if encoding and errors are defined if (config->stdio_encoding != NULL && config->stdio_errors != NULL) { return _PyStatus_OK(); } @@ -1634,12 +1635,12 @@ config_get_fs_encoding(PyConfig *config, const PyPreConfig *preconfig, if (preconfig->utf8_mode) { return PyConfig_SetString(config, fs_encoding, L"utf-8"); } - else if (_Py_GetForceASCII()) { + + if (_Py_GetForceASCII()) { return PyConfig_SetString(config, fs_encoding, L"ascii"); } - else { - return config_get_locale_encoding(config, preconfig, fs_encoding); - } + + return config_get_locale_encoding(config, preconfig, fs_encoding); #endif // !MS_WINDOWS }