mirror of https://github.com/python/cpython
bpo-47000: Make `io.text_encoding()` respects UTF-8 mode (GH-32003)
Co-authored-by: Eric Snow <ericsnowcurrently@gmail.com>
This commit is contained in:
parent
6db2db91b9
commit
4216dce04b
|
@ -198,12 +198,13 @@ High-level Module Interface
|
|||
This is a helper function for callables that use :func:`open` or
|
||||
:class:`TextIOWrapper` and have an ``encoding=None`` parameter.
|
||||
|
||||
This function returns *encoding* if it is not ``None`` and ``"locale"`` if
|
||||
*encoding* is ``None``.
|
||||
This function returns *encoding* if it is not ``None``.
|
||||
Otherwise, it returns ``"locale"`` or ``"utf-8"`` depending on
|
||||
:ref:`UTF-8 Mode <utf8-mode>`.
|
||||
|
||||
This function emits an :class:`EncodingWarning` if
|
||||
:data:`sys.flags.warn_default_encoding <sys.flags>` is true and *encoding*
|
||||
is None. *stacklevel* specifies where the warning is emitted.
|
||||
is ``None``. *stacklevel* specifies where the warning is emitted.
|
||||
For example::
|
||||
|
||||
def read_text(path, encoding=None):
|
||||
|
@ -218,6 +219,10 @@ High-level Module Interface
|
|||
|
||||
.. versionadded:: 3.10
|
||||
|
||||
.. versionchanged:: 3.11
|
||||
:func:`text_encoding` returns "utf-8" when UTF-8 mode is enabled and
|
||||
*encoding* is ``None``.
|
||||
|
||||
|
||||
.. exception:: BlockingIOError
|
||||
|
||||
|
|
|
@ -48,6 +48,7 @@ struct _Py_global_strings {
|
|||
STRUCT_FOR_STR(newline, "\n")
|
||||
STRUCT_FOR_STR(open_br, "{")
|
||||
STRUCT_FOR_STR(percent, "%")
|
||||
STRUCT_FOR_STR(utf_8, "utf-8")
|
||||
} literals;
|
||||
|
||||
struct {
|
||||
|
|
|
@ -672,6 +672,7 @@ extern "C" {
|
|||
INIT_STR(newline, "\n"), \
|
||||
INIT_STR(open_br, "{"), \
|
||||
INIT_STR(percent, "%"), \
|
||||
INIT_STR(utf_8, "utf-8"), \
|
||||
}, \
|
||||
.identifiers = { \
|
||||
INIT_ID(False), \
|
||||
|
|
10
Lib/_pyio.py
10
Lib/_pyio.py
|
@ -44,8 +44,9 @@ def text_encoding(encoding, stacklevel=2):
|
|||
"""
|
||||
A helper function to choose the text encoding.
|
||||
|
||||
When encoding is not None, just return it.
|
||||
Otherwise, return the default text encoding (i.e. "locale").
|
||||
When encoding is not None, this function returns it.
|
||||
Otherwise, this function returns the default text encoding
|
||||
(i.e. "locale" or "utf-8" depends on UTF-8 mode).
|
||||
|
||||
This function emits an EncodingWarning if *encoding* is None and
|
||||
sys.flags.warn_default_encoding is true.
|
||||
|
@ -55,7 +56,10 @@ def text_encoding(encoding, stacklevel=2):
|
|||
However, please consider using encoding="utf-8" for new APIs.
|
||||
"""
|
||||
if encoding is None:
|
||||
encoding = "locale"
|
||||
if sys.flags.utf8_mode:
|
||||
encoding = "utf-8"
|
||||
else:
|
||||
encoding = "locale"
|
||||
if sys.flags.warn_default_encoding:
|
||||
import warnings
|
||||
warnings.warn("'encoding' argument not specified.",
|
||||
|
|
|
@ -4289,6 +4289,17 @@ class MiscIOTest(unittest.TestCase):
|
|||
self.assertTrue(
|
||||
warnings[1].startswith(b"<string>:8: EncodingWarning: "))
|
||||
|
||||
def test_text_encoding(self):
|
||||
# PEP 597, bpo-47000. io.text_encoding() returns "locale" or "utf-8"
|
||||
# based on sys.flags.utf8_mode
|
||||
code = "import io; print(io.text_encoding(None))"
|
||||
|
||||
proc = assert_python_ok('-X', 'utf8=0', '-c', code)
|
||||
self.assertEqual(b"locale", proc.out.strip())
|
||||
|
||||
proc = assert_python_ok('-X', 'utf8=1', '-c', code)
|
||||
self.assertEqual(b"utf-8", proc.out.strip())
|
||||
|
||||
@support.cpython_only
|
||||
# Depending if OpenWrapper was already created or not, the warning is
|
||||
# emitted or not. For example, the attribute is already created when this
|
||||
|
|
|
@ -161,7 +161,7 @@ class UTF8ModeTests(unittest.TestCase):
|
|||
filename = __file__
|
||||
|
||||
out = self.get_output('-c', code, filename, PYTHONUTF8='1')
|
||||
self.assertEqual(out, 'UTF-8/strict')
|
||||
self.assertEqual(out.lower(), 'utf-8/strict')
|
||||
|
||||
def _check_io_encoding(self, module, encoding=None, errors=None):
|
||||
filename = __file__
|
||||
|
@ -183,10 +183,10 @@ class UTF8ModeTests(unittest.TestCase):
|
|||
PYTHONUTF8='1')
|
||||
|
||||
if not encoding:
|
||||
encoding = 'UTF-8'
|
||||
encoding = 'utf-8'
|
||||
if not errors:
|
||||
errors = 'strict'
|
||||
self.assertEqual(out, f'{encoding}/{errors}')
|
||||
self.assertEqual(out.lower(), f'{encoding}/{errors}')
|
||||
|
||||
def check_io_encoding(self, module):
|
||||
self._check_io_encoding(module, encoding="latin1")
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Make :func:`io.text_encoding` returns "utf-8" when UTF-8 mode is enabled.
|
|
@ -457,8 +457,9 @@ _io.text_encoding
|
|||
|
||||
A helper function to choose the text encoding.
|
||||
|
||||
When encoding is not None, just return it.
|
||||
Otherwise, return the default text encoding (i.e. "locale").
|
||||
When encoding is not None, this function returns it.
|
||||
Otherwise, this function returns the default text encoding
|
||||
(i.e. "locale" or "utf-8" depends on UTF-8 mode).
|
||||
|
||||
This function emits an EncodingWarning if encoding is None and
|
||||
sys.flags.warn_default_encoding is true.
|
||||
|
@ -469,7 +470,7 @@ However, please consider using encoding="utf-8" for new APIs.
|
|||
|
||||
static PyObject *
|
||||
_io_text_encoding_impl(PyObject *module, PyObject *encoding, int stacklevel)
|
||||
/*[clinic end generated code: output=91b2cfea6934cc0c input=bf70231213e2a7b4]*/
|
||||
/*[clinic end generated code: output=91b2cfea6934cc0c input=4999aa8b3d90f3d4]*/
|
||||
{
|
||||
if (encoding == NULL || encoding == Py_None) {
|
||||
PyInterpreterState *interp = _PyInterpreterState_GET();
|
||||
|
@ -479,7 +480,14 @@ _io_text_encoding_impl(PyObject *module, PyObject *encoding, int stacklevel)
|
|||
return NULL;
|
||||
}
|
||||
}
|
||||
return &_Py_ID(locale);
|
||||
const PyPreConfig *preconfig = &_PyRuntime.preconfig;
|
||||
if (preconfig->utf8_mode) {
|
||||
_Py_DECLARE_STR(utf_8, "utf-8");
|
||||
encoding = &_Py_STR(utf_8);
|
||||
}
|
||||
else {
|
||||
encoding = &_Py_ID(locale);
|
||||
}
|
||||
}
|
||||
Py_INCREF(encoding);
|
||||
return encoding;
|
||||
|
|
|
@ -273,8 +273,9 @@ PyDoc_STRVAR(_io_text_encoding__doc__,
|
|||
"\n"
|
||||
"A helper function to choose the text encoding.\n"
|
||||
"\n"
|
||||
"When encoding is not None, just return it.\n"
|
||||
"Otherwise, return the default text encoding (i.e. \"locale\").\n"
|
||||
"When encoding is not None, this function returns it.\n"
|
||||
"Otherwise, this function returns the default text encoding\n"
|
||||
"(i.e. \"locale\" or \"utf-8\" depends on UTF-8 mode).\n"
|
||||
"\n"
|
||||
"This function emits an EncodingWarning if encoding is None and\n"
|
||||
"sys.flags.warn_default_encoding is true.\n"
|
||||
|
@ -354,4 +355,4 @@ _io_open_code(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObjec
|
|||
exit:
|
||||
return return_value;
|
||||
}
|
||||
/*[clinic end generated code: output=6ea315343f6a94ba input=a9049054013a1b77]*/
|
||||
/*[clinic end generated code: output=1a7fd7755c9a9609 input=a9049054013a1b77]*/
|
||||
|
|
|
@ -841,7 +841,10 @@ static PyObject *
|
|||
sys_getdefaultencoding_impl(PyObject *module)
|
||||
/*[clinic end generated code: output=256d19dfcc0711e6 input=d416856ddbef6909]*/
|
||||
{
|
||||
return PyUnicode_FromString(PyUnicode_GetDefaultEncoding());
|
||||
_Py_DECLARE_STR(utf_8, "utf-8");
|
||||
PyObject *ret = &_Py_STR(utf_8);
|
||||
Py_INCREF(ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*[clinic input]
|
||||
|
|
Loading…
Reference in New Issue