mirror of https://github.com/python/cpython
gh-91156: Fix `encoding="locale"` in UTF-8 mode (GH-70056)
This commit is contained in:
parent
7b87e8af0c
commit
13b17e2a0a
|
@ -112,7 +112,7 @@ Text Encoding
|
|||
-------------
|
||||
|
||||
The default encoding of :class:`TextIOWrapper` and :func:`open` is
|
||||
locale-specific (:func:`locale.getpreferredencoding(False) <locale.getpreferredencoding>`).
|
||||
locale-specific (:func:`locale.getencoding`).
|
||||
|
||||
However, many developers forget to specify the encoding when opening text files
|
||||
encoded in UTF-8 (e.g. JSON, TOML, Markdown, etc...) since most Unix
|
||||
|
@ -948,8 +948,7 @@ Text I/O
|
|||
:class:`TextIOBase`.
|
||||
|
||||
*encoding* gives the name of the encoding that the stream will be decoded or
|
||||
encoded with. It defaults to
|
||||
:func:`locale.getpreferredencoding(False) <locale.getpreferredencoding>`.
|
||||
encoded with. It defaults to :func:`locale.getencoding()`.
|
||||
``encoding="locale"`` can be used to specify the current locale's encoding
|
||||
explicitly. See :ref:`io-text-encoding` for more information.
|
||||
|
||||
|
|
|
@ -618,7 +618,7 @@ UTF-8 mode
|
|||
|
||||
Windows still uses legacy encodings for the system encoding (the ANSI Code
|
||||
Page). Python uses it for the default encoding of text files (e.g.
|
||||
:func:`locale.getpreferredencoding`).
|
||||
:func:`locale.getencoding`).
|
||||
|
||||
This may cause issues because UTF-8 is widely used on the internet
|
||||
and most Unix systems, including WSL (Windows Subsystem for Linux).
|
||||
|
|
|
@ -1988,7 +1988,7 @@ class TextIOWrapper(TextIOBase):
|
|||
r"""Character and line based layer over a BufferedIOBase object, buffer.
|
||||
|
||||
encoding gives the name of the encoding that the stream will be
|
||||
decoded or encoded with. It defaults to locale.getpreferredencoding(False).
|
||||
decoded or encoded with. It defaults to locale.getencoding().
|
||||
|
||||
errors determines the strictness of encoding and decoding (see the
|
||||
codecs.register) and defaults to "strict".
|
||||
|
@ -2021,7 +2021,9 @@ class TextIOWrapper(TextIOBase):
|
|||
self._check_newline(newline)
|
||||
encoding = text_encoding(encoding)
|
||||
|
||||
if encoding == "locale":
|
||||
if encoding == "locale" and sys.platform == "win32":
|
||||
# On Unix, os.device_encoding() returns "utf-8" instead of locale encoding
|
||||
# in the UTF-8 mode. So we use os.device_encoding() only on Windows.
|
||||
try:
|
||||
encoding = os.device_encoding(buffer.fileno()) or "locale"
|
||||
except (AttributeError, UnsupportedOperation):
|
||||
|
@ -2034,7 +2036,7 @@ class TextIOWrapper(TextIOBase):
|
|||
# Importing locale may fail if Python is being built
|
||||
encoding = "utf-8"
|
||||
else:
|
||||
encoding = locale.getpreferredencoding(False)
|
||||
encoding = locale.getencoding()
|
||||
|
||||
if not isinstance(encoding, str):
|
||||
raise ValueError("invalid encoding: %r" % encoding)
|
||||
|
|
|
@ -557,7 +557,7 @@ def getdefaultlocale(envvars=('LC_ALL', 'LC_CTYPE', 'LANG', 'LANGUAGE')):
|
|||
|
||||
import warnings
|
||||
warnings.warn(
|
||||
"Use setlocale(), getpreferredencoding(False) and getlocale() instead",
|
||||
"Use setlocale(), getencoding() and getlocale() instead",
|
||||
DeprecationWarning, stacklevel=2
|
||||
)
|
||||
|
||||
|
|
|
@ -2737,6 +2737,7 @@ class TextIOWrapperTest(unittest.TestCase):
|
|||
os.environ.update(old_environ)
|
||||
|
||||
@support.cpython_only
|
||||
@unittest.skipIf(sys.platform != "win32", "Windows-only test")
|
||||
@unittest.skipIf(sys.flags.utf8_mode, "utf-8 mode is enabled")
|
||||
def test_device_encoding(self):
|
||||
# Issue 15989
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
Make :class:`TextIOWrapper` uses locale encoding when ``encoding="locale"``
|
||||
is specified even in UTF-8 mode.
|
|
@ -92,9 +92,9 @@ it already exists), 'x' for creating and writing to a new file, and
|
|||
'a' for appending (which on some Unix systems, means that all writes
|
||||
append to the end of the file regardless of the current seek position).
|
||||
In text mode, if encoding is not specified the encoding used is platform
|
||||
dependent: locale.getpreferredencoding(False) is called to get the
|
||||
current locale encoding. (For reading and writing raw bytes use binary
|
||||
mode and leave encoding unspecified.) The available modes are:
|
||||
dependent: locale.getencoding() is called to get the current locale encoding.
|
||||
(For reading and writing raw bytes use binary mode and leave encoding
|
||||
unspecified.) The available modes are:
|
||||
|
||||
========= ===============================================================
|
||||
Character Meaning
|
||||
|
@ -196,7 +196,7 @@ static PyObject *
|
|||
_io_open_impl(PyObject *module, PyObject *file, const char *mode,
|
||||
int buffering, const char *encoding, const char *errors,
|
||||
const char *newline, int closefd, PyObject *opener)
|
||||
/*[clinic end generated code: output=aefafc4ce2b46dc0 input=1543f4511d2356a5]*/
|
||||
/*[clinic end generated code: output=aefafc4ce2b46dc0 input=5bb37f174cb2fb11]*/
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
|
|
|
@ -22,9 +22,9 @@ PyDoc_STRVAR(_io_open__doc__,
|
|||
"\'a\' for appending (which on some Unix systems, means that all writes\n"
|
||||
"append to the end of the file regardless of the current seek position).\n"
|
||||
"In text mode, if encoding is not specified the encoding used is platform\n"
|
||||
"dependent: locale.getpreferredencoding(False) is called to get the\n"
|
||||
"current locale encoding. (For reading and writing raw bytes use binary\n"
|
||||
"mode and leave encoding unspecified.) The available modes are:\n"
|
||||
"dependent: locale.getencoding() is called to get the current locale encoding.\n"
|
||||
"(For reading and writing raw bytes use binary mode and leave encoding\n"
|
||||
"unspecified.) The available modes are:\n"
|
||||
"\n"
|
||||
"========= ===============================================================\n"
|
||||
"Character Meaning\n"
|
||||
|
@ -355,4 +355,4 @@ _io_open_code(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObjec
|
|||
exit:
|
||||
return return_value;
|
||||
}
|
||||
/*[clinic end generated code: output=1a7fd7755c9a9609 input=a9049054013a1b77]*/
|
||||
/*[clinic end generated code: output=e562f29e3c2533a6 input=a9049054013a1b77]*/
|
||||
|
|
|
@ -146,7 +146,7 @@ PyDoc_STRVAR(_io_TextIOWrapper___init____doc__,
|
|||
"Character and line based layer over a BufferedIOBase object, buffer.\n"
|
||||
"\n"
|
||||
"encoding gives the name of the encoding that the stream will be\n"
|
||||
"decoded or encoded with. It defaults to locale.getpreferredencoding(False).\n"
|
||||
"decoded or encoded with. It defaults to locale.getencoding().\n"
|
||||
"\n"
|
||||
"errors determines the strictness of encoding and decoding (see\n"
|
||||
"help(codecs.Codec) or the documentation for codecs.register) and\n"
|
||||
|
@ -671,4 +671,4 @@ _io_TextIOWrapper_close(textio *self, PyObject *Py_UNUSED(ignored))
|
|||
{
|
||||
return _io_TextIOWrapper_close_impl(self);
|
||||
}
|
||||
/*[clinic end generated code: output=2604c8f3a45b9a03 input=a9049054013a1b77]*/
|
||||
/*[clinic end generated code: output=e88abad34e31c0cb input=a9049054013a1b77]*/
|
||||
|
|
|
@ -1023,7 +1023,7 @@ _io.TextIOWrapper.__init__
|
|||
Character and line based layer over a BufferedIOBase object, buffer.
|
||||
|
||||
encoding gives the name of the encoding that the stream will be
|
||||
decoded or encoded with. It defaults to locale.getpreferredencoding(False).
|
||||
decoded or encoded with. It defaults to locale.getencoding().
|
||||
|
||||
errors determines the strictness of encoding and decoding (see
|
||||
help(codecs.Codec) or the documentation for codecs.register) and
|
||||
|
@ -1055,12 +1055,12 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
|
|||
const char *encoding, PyObject *errors,
|
||||
const char *newline, int line_buffering,
|
||||
int write_through)
|
||||
/*[clinic end generated code: output=72267c0c01032ed2 input=77d8696d1a1f460b]*/
|
||||
/*[clinic end generated code: output=72267c0c01032ed2 input=72590963698f289b]*/
|
||||
{
|
||||
PyObject *raw, *codec_info = NULL;
|
||||
_PyIO_State *state = NULL;
|
||||
PyObject *res;
|
||||
int r;
|
||||
int use_locale_encoding = 0; // Use locale encoding even in UTF-8 mode.
|
||||
|
||||
self->ok = 0;
|
||||
self->detached = 0;
|
||||
|
@ -1076,6 +1076,7 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
|
|||
}
|
||||
else if (strcmp(encoding, "locale") == 0) {
|
||||
encoding = NULL;
|
||||
use_locale_encoding = 1;
|
||||
}
|
||||
|
||||
if (errors == Py_None) {
|
||||
|
@ -1113,10 +1114,15 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
|
|||
self->encodefunc = NULL;
|
||||
self->b2cratio = 0.0;
|
||||
|
||||
#ifdef MS_WINDOWS
|
||||
// os.device_encoding() on Unix is the locale encoding or UTF-8
|
||||
// according to UTF-8 Mode.
|
||||
// Since UTF-8 mode shouldn't affect `encoding="locale"`, we call
|
||||
// os.device_encoding() only on Windows.
|
||||
if (encoding == NULL) {
|
||||
/* Try os.device_encoding(fileno) */
|
||||
PyObject *fileno;
|
||||
state = IO_STATE();
|
||||
_PyIO_State *state = IO_STATE();
|
||||
if (state == NULL)
|
||||
goto error;
|
||||
fileno = PyObject_CallMethodNoArgs(buffer, &_Py_ID(fileno));
|
||||
|
@ -1144,8 +1150,10 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
|
|||
Py_CLEAR(self->encoding);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (encoding == NULL && self->encoding == NULL) {
|
||||
if (_PyRuntime.preconfig.utf8_mode) {
|
||||
if (_PyRuntime.preconfig.utf8_mode && !use_locale_encoding) {
|
||||
_Py_DECLARE_STR(utf_8, "utf-8");
|
||||
self->encoding = Py_NewRef(&_Py_STR(utf_8));
|
||||
}
|
||||
|
|
|
@ -251,7 +251,6 @@ Modules/_io/textio.c:PyId_close _Py_IDENTIFIER(
|
|||
Modules/_io/textio.c:PyId_decode _Py_IDENTIFIER(decode)
|
||||
Modules/_io/textio.c:PyId_fileno _Py_IDENTIFIER(fileno)
|
||||
Modules/_io/textio.c:PyId_flush _Py_IDENTIFIER(flush)
|
||||
Modules/_io/textio.c:PyId_getpreferredencoding _Py_IDENTIFIER(getpreferredencoding)
|
||||
Modules/_io/textio.c:PyId_isatty _Py_IDENTIFIER(isatty)
|
||||
Modules/_io/textio.c:PyId_mode _Py_IDENTIFIER(mode)
|
||||
Modules/_io/textio.c:PyId_name _Py_IDENTIFIER(name)
|
||||
|
|
Loading…
Reference in New Issue