From 4216dce04b7d3f329beaaafc82a77c4ac6cf4d57 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Mon, 4 Apr 2022 11:46:57 +0900 Subject: [PATCH] bpo-47000: Make `io.text_encoding()` respects UTF-8 mode (GH-32003) Co-authored-by: Eric Snow --- Doc/library/io.rst | 11 ++++++++--- Include/internal/pycore_global_strings.h | 1 + Include/internal/pycore_runtime_init.h | 1 + Lib/_pyio.py | 10 +++++++--- Lib/test/test_io.py | 11 +++++++++++ Lib/test/test_utf8_mode.py | 6 +++--- .../2022-03-20-13-00-08.bpo-47000.p8HpG0.rst | 1 + Modules/_io/_iomodule.c | 16 ++++++++++++---- Modules/_io/clinic/_iomodule.c.h | 7 ++++--- Python/sysmodule.c | 5 ++++- 10 files changed, 52 insertions(+), 17 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2022-03-20-13-00-08.bpo-47000.p8HpG0.rst diff --git a/Doc/library/io.rst b/Doc/library/io.rst index d5123348195..80107d53950 100644 --- a/Doc/library/io.rst +++ b/Doc/library/io.rst @@ -198,12 +198,13 @@ High-level Module Interface This is a helper function for callables that use :func:`open` or :class:`TextIOWrapper` and have an ``encoding=None`` parameter. - This function returns *encoding* if it is not ``None`` and ``"locale"`` if - *encoding* is ``None``. + This function returns *encoding* if it is not ``None``. + Otherwise, it returns ``"locale"`` or ``"utf-8"`` depending on + :ref:`UTF-8 Mode `. This function emits an :class:`EncodingWarning` if :data:`sys.flags.warn_default_encoding ` is true and *encoding* - is None. *stacklevel* specifies where the warning is emitted. + is ``None``. *stacklevel* specifies where the warning is emitted. For example:: def read_text(path, encoding=None): @@ -218,6 +219,10 @@ High-level Module Interface .. versionadded:: 3.10 + .. versionchanged:: 3.11 + :func:`text_encoding` returns "utf-8" when UTF-8 mode is enabled and + *encoding* is ``None``. + .. exception:: BlockingIOError diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index 3e533fd1650..833ff2710a7 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -48,6 +48,7 @@ struct _Py_global_strings { STRUCT_FOR_STR(newline, "\n") STRUCT_FOR_STR(open_br, "{") STRUCT_FOR_STR(percent, "%") + STRUCT_FOR_STR(utf_8, "utf-8") } literals; struct { diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h index d5690d83a04..fd925b3e060 100644 --- a/Include/internal/pycore_runtime_init.h +++ b/Include/internal/pycore_runtime_init.h @@ -672,6 +672,7 @@ extern "C" { INIT_STR(newline, "\n"), \ INIT_STR(open_br, "{"), \ INIT_STR(percent, "%"), \ + INIT_STR(utf_8, "utf-8"), \ }, \ .identifiers = { \ INIT_ID(False), \ diff --git a/Lib/_pyio.py b/Lib/_pyio.py index fd00d6536c0..e3ff59eb1ad 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -44,8 +44,9 @@ def text_encoding(encoding, stacklevel=2): """ A helper function to choose the text encoding. - When encoding is not None, just return it. - Otherwise, return the default text encoding (i.e. "locale"). + When encoding is not None, this function returns it. + Otherwise, this function returns the default text encoding + (i.e. "locale" or "utf-8" depends on UTF-8 mode). This function emits an EncodingWarning if *encoding* is None and sys.flags.warn_default_encoding is true. @@ -55,7 +56,10 @@ def text_encoding(encoding, stacklevel=2): However, please consider using encoding="utf-8" for new APIs. """ if encoding is None: - encoding = "locale" + if sys.flags.utf8_mode: + encoding = "utf-8" + else: + encoding = "locale" if sys.flags.warn_default_encoding: import warnings warnings.warn("'encoding' argument not specified.", diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index 2d0ca878788..67be108d252 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -4289,6 +4289,17 @@ class MiscIOTest(unittest.TestCase): self.assertTrue( warnings[1].startswith(b":8: EncodingWarning: ")) + def test_text_encoding(self): + # PEP 597, bpo-47000. io.text_encoding() returns "locale" or "utf-8" + # based on sys.flags.utf8_mode + code = "import io; print(io.text_encoding(None))" + + proc = assert_python_ok('-X', 'utf8=0', '-c', code) + self.assertEqual(b"locale", proc.out.strip()) + + proc = assert_python_ok('-X', 'utf8=1', '-c', code) + self.assertEqual(b"utf-8", proc.out.strip()) + @support.cpython_only # Depending if OpenWrapper was already created or not, the warning is # emitted or not. For example, the attribute is already created when this diff --git a/Lib/test/test_utf8_mode.py b/Lib/test/test_utf8_mode.py index 2b96f76df30..308e8e8aea6 100644 --- a/Lib/test/test_utf8_mode.py +++ b/Lib/test/test_utf8_mode.py @@ -161,7 +161,7 @@ class UTF8ModeTests(unittest.TestCase): filename = __file__ out = self.get_output('-c', code, filename, PYTHONUTF8='1') - self.assertEqual(out, 'UTF-8/strict') + self.assertEqual(out.lower(), 'utf-8/strict') def _check_io_encoding(self, module, encoding=None, errors=None): filename = __file__ @@ -183,10 +183,10 @@ class UTF8ModeTests(unittest.TestCase): PYTHONUTF8='1') if not encoding: - encoding = 'UTF-8' + encoding = 'utf-8' if not errors: errors = 'strict' - self.assertEqual(out, f'{encoding}/{errors}') + self.assertEqual(out.lower(), f'{encoding}/{errors}') def check_io_encoding(self, module): self._check_io_encoding(module, encoding="latin1") diff --git a/Misc/NEWS.d/next/Library/2022-03-20-13-00-08.bpo-47000.p8HpG0.rst b/Misc/NEWS.d/next/Library/2022-03-20-13-00-08.bpo-47000.p8HpG0.rst new file mode 100644 index 00000000000..f96b6e627ed --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-03-20-13-00-08.bpo-47000.p8HpG0.rst @@ -0,0 +1 @@ +Make :func:`io.text_encoding` returns "utf-8" when UTF-8 mode is enabled. diff --git a/Modules/_io/_iomodule.c b/Modules/_io/_iomodule.c index 7f029f26078..065f5e29c31 100644 --- a/Modules/_io/_iomodule.c +++ b/Modules/_io/_iomodule.c @@ -457,8 +457,9 @@ _io.text_encoding A helper function to choose the text encoding. -When encoding is not None, just return it. -Otherwise, return the default text encoding (i.e. "locale"). +When encoding is not None, this function returns it. +Otherwise, this function returns the default text encoding +(i.e. "locale" or "utf-8" depends on UTF-8 mode). This function emits an EncodingWarning if encoding is None and sys.flags.warn_default_encoding is true. @@ -469,7 +470,7 @@ However, please consider using encoding="utf-8" for new APIs. static PyObject * _io_text_encoding_impl(PyObject *module, PyObject *encoding, int stacklevel) -/*[clinic end generated code: output=91b2cfea6934cc0c input=bf70231213e2a7b4]*/ +/*[clinic end generated code: output=91b2cfea6934cc0c input=4999aa8b3d90f3d4]*/ { if (encoding == NULL || encoding == Py_None) { PyInterpreterState *interp = _PyInterpreterState_GET(); @@ -479,7 +480,14 @@ _io_text_encoding_impl(PyObject *module, PyObject *encoding, int stacklevel) return NULL; } } - return &_Py_ID(locale); + const PyPreConfig *preconfig = &_PyRuntime.preconfig; + if (preconfig->utf8_mode) { + _Py_DECLARE_STR(utf_8, "utf-8"); + encoding = &_Py_STR(utf_8); + } + else { + encoding = &_Py_ID(locale); + } } Py_INCREF(encoding); return encoding; diff --git a/Modules/_io/clinic/_iomodule.c.h b/Modules/_io/clinic/_iomodule.c.h index d5fb176eb66..e4a6b8c42e1 100644 --- a/Modules/_io/clinic/_iomodule.c.h +++ b/Modules/_io/clinic/_iomodule.c.h @@ -273,8 +273,9 @@ PyDoc_STRVAR(_io_text_encoding__doc__, "\n" "A helper function to choose the text encoding.\n" "\n" -"When encoding is not None, just return it.\n" -"Otherwise, return the default text encoding (i.e. \"locale\").\n" +"When encoding is not None, this function returns it.\n" +"Otherwise, this function returns the default text encoding\n" +"(i.e. \"locale\" or \"utf-8\" depends on UTF-8 mode).\n" "\n" "This function emits an EncodingWarning if encoding is None and\n" "sys.flags.warn_default_encoding is true.\n" @@ -354,4 +355,4 @@ _io_open_code(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObjec exit: return return_value; } -/*[clinic end generated code: output=6ea315343f6a94ba input=a9049054013a1b77]*/ +/*[clinic end generated code: output=1a7fd7755c9a9609 input=a9049054013a1b77]*/ diff --git a/Python/sysmodule.c b/Python/sysmodule.c index 5765e9ef657..de4e10a7e11 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -841,7 +841,10 @@ static PyObject * sys_getdefaultencoding_impl(PyObject *module) /*[clinic end generated code: output=256d19dfcc0711e6 input=d416856ddbef6909]*/ { - return PyUnicode_FromString(PyUnicode_GetDefaultEncoding()); + _Py_DECLARE_STR(utf_8, "utf-8"); + PyObject *ret = &_Py_STR(utf_8); + Py_INCREF(ret); + return ret; } /*[clinic input]