mirror of https://github.com/python/cpython
bpo-47000: Make `io.text_encoding()` respects UTF-8 mode (GH-32003)
Co-authored-by: Eric Snow <ericsnowcurrently@gmail.com>
This commit is contained in:
parent
6db2db91b9
commit
4216dce04b
|
@ -198,12 +198,13 @@ High-level Module Interface
|
||||||
This is a helper function for callables that use :func:`open` or
|
This is a helper function for callables that use :func:`open` or
|
||||||
:class:`TextIOWrapper` and have an ``encoding=None`` parameter.
|
:class:`TextIOWrapper` and have an ``encoding=None`` parameter.
|
||||||
|
|
||||||
This function returns *encoding* if it is not ``None`` and ``"locale"`` if
|
This function returns *encoding* if it is not ``None``.
|
||||||
*encoding* is ``None``.
|
Otherwise, it returns ``"locale"`` or ``"utf-8"`` depending on
|
||||||
|
:ref:`UTF-8 Mode <utf8-mode>`.
|
||||||
|
|
||||||
This function emits an :class:`EncodingWarning` if
|
This function emits an :class:`EncodingWarning` if
|
||||||
:data:`sys.flags.warn_default_encoding <sys.flags>` is true and *encoding*
|
:data:`sys.flags.warn_default_encoding <sys.flags>` is true and *encoding*
|
||||||
is None. *stacklevel* specifies where the warning is emitted.
|
is ``None``. *stacklevel* specifies where the warning is emitted.
|
||||||
For example::
|
For example::
|
||||||
|
|
||||||
def read_text(path, encoding=None):
|
def read_text(path, encoding=None):
|
||||||
|
@ -218,6 +219,10 @@ High-level Module Interface
|
||||||
|
|
||||||
.. versionadded:: 3.10
|
.. versionadded:: 3.10
|
||||||
|
|
||||||
|
.. versionchanged:: 3.11
|
||||||
|
:func:`text_encoding` returns "utf-8" when UTF-8 mode is enabled and
|
||||||
|
*encoding* is ``None``.
|
||||||
|
|
||||||
|
|
||||||
.. exception:: BlockingIOError
|
.. exception:: BlockingIOError
|
||||||
|
|
||||||
|
|
|
@ -48,6 +48,7 @@ struct _Py_global_strings {
|
||||||
STRUCT_FOR_STR(newline, "\n")
|
STRUCT_FOR_STR(newline, "\n")
|
||||||
STRUCT_FOR_STR(open_br, "{")
|
STRUCT_FOR_STR(open_br, "{")
|
||||||
STRUCT_FOR_STR(percent, "%")
|
STRUCT_FOR_STR(percent, "%")
|
||||||
|
STRUCT_FOR_STR(utf_8, "utf-8")
|
||||||
} literals;
|
} literals;
|
||||||
|
|
||||||
struct {
|
struct {
|
||||||
|
|
|
@ -672,6 +672,7 @@ extern "C" {
|
||||||
INIT_STR(newline, "\n"), \
|
INIT_STR(newline, "\n"), \
|
||||||
INIT_STR(open_br, "{"), \
|
INIT_STR(open_br, "{"), \
|
||||||
INIT_STR(percent, "%"), \
|
INIT_STR(percent, "%"), \
|
||||||
|
INIT_STR(utf_8, "utf-8"), \
|
||||||
}, \
|
}, \
|
||||||
.identifiers = { \
|
.identifiers = { \
|
||||||
INIT_ID(False), \
|
INIT_ID(False), \
|
||||||
|
|
10
Lib/_pyio.py
10
Lib/_pyio.py
|
@ -44,8 +44,9 @@ def text_encoding(encoding, stacklevel=2):
|
||||||
"""
|
"""
|
||||||
A helper function to choose the text encoding.
|
A helper function to choose the text encoding.
|
||||||
|
|
||||||
When encoding is not None, just return it.
|
When encoding is not None, this function returns it.
|
||||||
Otherwise, return the default text encoding (i.e. "locale").
|
Otherwise, this function returns the default text encoding
|
||||||
|
(i.e. "locale" or "utf-8" depends on UTF-8 mode).
|
||||||
|
|
||||||
This function emits an EncodingWarning if *encoding* is None and
|
This function emits an EncodingWarning if *encoding* is None and
|
||||||
sys.flags.warn_default_encoding is true.
|
sys.flags.warn_default_encoding is true.
|
||||||
|
@ -55,7 +56,10 @@ def text_encoding(encoding, stacklevel=2):
|
||||||
However, please consider using encoding="utf-8" for new APIs.
|
However, please consider using encoding="utf-8" for new APIs.
|
||||||
"""
|
"""
|
||||||
if encoding is None:
|
if encoding is None:
|
||||||
encoding = "locale"
|
if sys.flags.utf8_mode:
|
||||||
|
encoding = "utf-8"
|
||||||
|
else:
|
||||||
|
encoding = "locale"
|
||||||
if sys.flags.warn_default_encoding:
|
if sys.flags.warn_default_encoding:
|
||||||
import warnings
|
import warnings
|
||||||
warnings.warn("'encoding' argument not specified.",
|
warnings.warn("'encoding' argument not specified.",
|
||||||
|
|
|
@ -4289,6 +4289,17 @@ class MiscIOTest(unittest.TestCase):
|
||||||
self.assertTrue(
|
self.assertTrue(
|
||||||
warnings[1].startswith(b"<string>:8: EncodingWarning: "))
|
warnings[1].startswith(b"<string>:8: EncodingWarning: "))
|
||||||
|
|
||||||
|
def test_text_encoding(self):
|
||||||
|
# PEP 597, bpo-47000. io.text_encoding() returns "locale" or "utf-8"
|
||||||
|
# based on sys.flags.utf8_mode
|
||||||
|
code = "import io; print(io.text_encoding(None))"
|
||||||
|
|
||||||
|
proc = assert_python_ok('-X', 'utf8=0', '-c', code)
|
||||||
|
self.assertEqual(b"locale", proc.out.strip())
|
||||||
|
|
||||||
|
proc = assert_python_ok('-X', 'utf8=1', '-c', code)
|
||||||
|
self.assertEqual(b"utf-8", proc.out.strip())
|
||||||
|
|
||||||
@support.cpython_only
|
@support.cpython_only
|
||||||
# Depending if OpenWrapper was already created or not, the warning is
|
# Depending if OpenWrapper was already created or not, the warning is
|
||||||
# emitted or not. For example, the attribute is already created when this
|
# emitted or not. For example, the attribute is already created when this
|
||||||
|
|
|
@ -161,7 +161,7 @@ class UTF8ModeTests(unittest.TestCase):
|
||||||
filename = __file__
|
filename = __file__
|
||||||
|
|
||||||
out = self.get_output('-c', code, filename, PYTHONUTF8='1')
|
out = self.get_output('-c', code, filename, PYTHONUTF8='1')
|
||||||
self.assertEqual(out, 'UTF-8/strict')
|
self.assertEqual(out.lower(), 'utf-8/strict')
|
||||||
|
|
||||||
def _check_io_encoding(self, module, encoding=None, errors=None):
|
def _check_io_encoding(self, module, encoding=None, errors=None):
|
||||||
filename = __file__
|
filename = __file__
|
||||||
|
@ -183,10 +183,10 @@ class UTF8ModeTests(unittest.TestCase):
|
||||||
PYTHONUTF8='1')
|
PYTHONUTF8='1')
|
||||||
|
|
||||||
if not encoding:
|
if not encoding:
|
||||||
encoding = 'UTF-8'
|
encoding = 'utf-8'
|
||||||
if not errors:
|
if not errors:
|
||||||
errors = 'strict'
|
errors = 'strict'
|
||||||
self.assertEqual(out, f'{encoding}/{errors}')
|
self.assertEqual(out.lower(), f'{encoding}/{errors}')
|
||||||
|
|
||||||
def check_io_encoding(self, module):
|
def check_io_encoding(self, module):
|
||||||
self._check_io_encoding(module, encoding="latin1")
|
self._check_io_encoding(module, encoding="latin1")
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
Make :func:`io.text_encoding` returns "utf-8" when UTF-8 mode is enabled.
|
|
@ -457,8 +457,9 @@ _io.text_encoding
|
||||||
|
|
||||||
A helper function to choose the text encoding.
|
A helper function to choose the text encoding.
|
||||||
|
|
||||||
When encoding is not None, just return it.
|
When encoding is not None, this function returns it.
|
||||||
Otherwise, return the default text encoding (i.e. "locale").
|
Otherwise, this function returns the default text encoding
|
||||||
|
(i.e. "locale" or "utf-8" depends on UTF-8 mode).
|
||||||
|
|
||||||
This function emits an EncodingWarning if encoding is None and
|
This function emits an EncodingWarning if encoding is None and
|
||||||
sys.flags.warn_default_encoding is true.
|
sys.flags.warn_default_encoding is true.
|
||||||
|
@ -469,7 +470,7 @@ However, please consider using encoding="utf-8" for new APIs.
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
_io_text_encoding_impl(PyObject *module, PyObject *encoding, int stacklevel)
|
_io_text_encoding_impl(PyObject *module, PyObject *encoding, int stacklevel)
|
||||||
/*[clinic end generated code: output=91b2cfea6934cc0c input=bf70231213e2a7b4]*/
|
/*[clinic end generated code: output=91b2cfea6934cc0c input=4999aa8b3d90f3d4]*/
|
||||||
{
|
{
|
||||||
if (encoding == NULL || encoding == Py_None) {
|
if (encoding == NULL || encoding == Py_None) {
|
||||||
PyInterpreterState *interp = _PyInterpreterState_GET();
|
PyInterpreterState *interp = _PyInterpreterState_GET();
|
||||||
|
@ -479,7 +480,14 @@ _io_text_encoding_impl(PyObject *module, PyObject *encoding, int stacklevel)
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return &_Py_ID(locale);
|
const PyPreConfig *preconfig = &_PyRuntime.preconfig;
|
||||||
|
if (preconfig->utf8_mode) {
|
||||||
|
_Py_DECLARE_STR(utf_8, "utf-8");
|
||||||
|
encoding = &_Py_STR(utf_8);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
encoding = &_Py_ID(locale);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Py_INCREF(encoding);
|
Py_INCREF(encoding);
|
||||||
return encoding;
|
return encoding;
|
||||||
|
|
|
@ -273,8 +273,9 @@ PyDoc_STRVAR(_io_text_encoding__doc__,
|
||||||
"\n"
|
"\n"
|
||||||
"A helper function to choose the text encoding.\n"
|
"A helper function to choose the text encoding.\n"
|
||||||
"\n"
|
"\n"
|
||||||
"When encoding is not None, just return it.\n"
|
"When encoding is not None, this function returns it.\n"
|
||||||
"Otherwise, return the default text encoding (i.e. \"locale\").\n"
|
"Otherwise, this function returns the default text encoding\n"
|
||||||
|
"(i.e. \"locale\" or \"utf-8\" depends on UTF-8 mode).\n"
|
||||||
"\n"
|
"\n"
|
||||||
"This function emits an EncodingWarning if encoding is None and\n"
|
"This function emits an EncodingWarning if encoding is None and\n"
|
||||||
"sys.flags.warn_default_encoding is true.\n"
|
"sys.flags.warn_default_encoding is true.\n"
|
||||||
|
@ -354,4 +355,4 @@ _io_open_code(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObjec
|
||||||
exit:
|
exit:
|
||||||
return return_value;
|
return return_value;
|
||||||
}
|
}
|
||||||
/*[clinic end generated code: output=6ea315343f6a94ba input=a9049054013a1b77]*/
|
/*[clinic end generated code: output=1a7fd7755c9a9609 input=a9049054013a1b77]*/
|
||||||
|
|
|
@ -841,7 +841,10 @@ static PyObject *
|
||||||
sys_getdefaultencoding_impl(PyObject *module)
|
sys_getdefaultencoding_impl(PyObject *module)
|
||||||
/*[clinic end generated code: output=256d19dfcc0711e6 input=d416856ddbef6909]*/
|
/*[clinic end generated code: output=256d19dfcc0711e6 input=d416856ddbef6909]*/
|
||||||
{
|
{
|
||||||
return PyUnicode_FromString(PyUnicode_GetDefaultEncoding());
|
_Py_DECLARE_STR(utf_8, "utf-8");
|
||||||
|
PyObject *ret = &_Py_STR(utf_8);
|
||||||
|
Py_INCREF(ret);
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*[clinic input]
|
/*[clinic input]
|
||||||
|
|
Loading…
Reference in New Issue