mirror of https://github.com/python/cpython
bpo-43510: Implement PEP 597 opt-in EncodingWarning. (GH-19481)
See [PEP 597](https://www.python.org/dev/peps/pep-0597/). * Add `-X warn_default_encoding` and `PYTHONWARNDEFAULTENCODING`. * Add EncodingWarning * Add io.text_encoding() * open(), TextIOWrapper() emits EncodingWarning when encoding is omitted and warn_default_encoding is enabled. * _pyio.TextIOWrapper() uses UTF-8 as fallback default encoding used when failed to import locale module. (used during building Python) * bz2, configparser, gzip, lzma, pathlib, tempfile modules use io.text_encoding(). * What's new entry
This commit is contained in:
parent
261a452a13
commit
4827483f47
|
@ -583,6 +583,15 @@ PyConfig
|
|||
|
||||
Default: ``0``.
|
||||
|
||||
.. c:member:: int warn_default_encoding
|
||||
|
||||
If non-zero, emit a :exc:`EncodingWarning` warning when :class:`io.TextIOWrapper`
|
||||
uses its default encoding. See :ref:`io-encoding-warning` for details.
|
||||
|
||||
Default: ``0``.
|
||||
|
||||
.. versionadded:: 3.10
|
||||
|
||||
.. c:member:: wchar_t* check_hash_pycs_mode
|
||||
|
||||
Control the validation behavior of hash-based ``.pyc`` files:
|
||||
|
|
|
@ -741,6 +741,15 @@ The following exceptions are used as warning categories; see the
|
|||
Base class for warnings related to Unicode.
|
||||
|
||||
|
||||
.. exception:: EncodingWarning
|
||||
|
||||
Base class for warnings related to encodings.
|
||||
|
||||
See :ref:`io-encoding-warning` for details.
|
||||
|
||||
.. versionadded:: 3.10
|
||||
|
||||
|
||||
.. exception:: BytesWarning
|
||||
|
||||
Base class for warnings related to :class:`bytes` and :class:`bytearray`.
|
||||
|
|
|
@ -106,6 +106,56 @@ stream by opening a file in binary mode with buffering disabled::
|
|||
The raw stream API is described in detail in the docs of :class:`RawIOBase`.
|
||||
|
||||
|
||||
.. _io-text-encoding:
|
||||
|
||||
Text Encoding
|
||||
-------------
|
||||
|
||||
The default encoding of :class:`TextIOWrapper` and :func:`open` is
|
||||
locale-specific (:func:`locale.getpreferredencoding(False) <locale.getpreferredencoding>`).
|
||||
|
||||
However, many developers forget to specify the encoding when opening text files
|
||||
encoded in UTF-8 (e.g. JSON, TOML, Markdown, etc...) since most Unix
|
||||
platforms use UTF-8 locale by default. This causes bugs because the locale
|
||||
encoding is not UTF-8 for most Windows users. For example::
|
||||
|
||||
# May not work on Windows when non-ASCII characters in the file.
|
||||
with open("README.md") as f:
|
||||
long_description = f.read()
|
||||
|
||||
Additionally, while there is no concrete plan as of yet, Python may change
|
||||
the default text file encoding to UTF-8 in the future.
|
||||
|
||||
Accordingly, it is highly recommended that you specify the encoding
|
||||
explicitly when opening text files. If you want to use UTF-8, pass
|
||||
``encoding="utf-8"``. To use the current locale encoding,
|
||||
``encoding="locale"`` is supported in Python 3.10.
|
||||
|
||||
When you need to run existing code on Windows that attempts to opens
|
||||
UTF-8 files using the default locale encoding, you can enable the UTF-8
|
||||
mode. See :ref:`UTF-8 mode on Windows <win-utf8-mode>`.
|
||||
|
||||
.. _io-encoding-warning:
|
||||
|
||||
Opt-in EncodingWarning
|
||||
^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. versionadded:: 3.10
|
||||
See :pep:`597` for more details.
|
||||
|
||||
To find where the default locale encoding is used, you can enable
|
||||
the ``-X warn_default_encoding`` command line option or set the
|
||||
:envvar:`PYTHONWARNDEFAULTENCODING` environment variable, which will
|
||||
emit an :exc:`EncodingWarning` when the default encoding is used.
|
||||
|
||||
If you are providing an API that uses :func:`open` or
|
||||
:class:`TextIOWrapper` and passes ``encoding=None`` as a parameter, you
|
||||
can use :func:`text_encoding` so that callers of the API will emit an
|
||||
:exc:`EncodingWarning` if they don't pass an ``encoding``. However,
|
||||
please consider using UTF-8 by default (i.e. ``encoding="utf-8"``) for
|
||||
new APIs.
|
||||
|
||||
|
||||
High-level Module Interface
|
||||
---------------------------
|
||||
|
||||
|
@ -143,6 +193,32 @@ High-level Module Interface
|
|||
.. versionadded:: 3.8
|
||||
|
||||
|
||||
.. function:: text_encoding(encoding, stacklevel=2)
|
||||
|
||||
This is a helper function for callables that use :func:`open` or
|
||||
:class:`TextIOWrapper` and have an ``encoding=None`` parameter.
|
||||
|
||||
This function returns *encoding* if it is not ``None`` and ``"locale"`` if
|
||||
*encoding* is ``None``.
|
||||
|
||||
This function emits an :class:`EncodingWarning` if
|
||||
:data:`sys.flags.warn_default_encoding <sys.flags>` is true and *encoding*
|
||||
is None. *stacklevel* specifies where the warning is emitted.
|
||||
For example::
|
||||
|
||||
def read_text(path, encoding=None):
|
||||
encoding = io.text_encoding(encoding) # stacklevel=2
|
||||
with open(path, encoding) as f:
|
||||
return f.read()
|
||||
|
||||
In this example, an :class:`EncodingWarning` is emitted for the caller of
|
||||
``read_text()``.
|
||||
|
||||
See :ref:`io-text-encoding` for more information.
|
||||
|
||||
.. versionadded:: 3.10
|
||||
|
||||
|
||||
.. exception:: BlockingIOError
|
||||
|
||||
This is a compatibility alias for the builtin :exc:`BlockingIOError`
|
||||
|
@ -869,6 +945,8 @@ Text I/O
|
|||
*encoding* gives the name of the encoding that the stream will be decoded or
|
||||
encoded with. It defaults to
|
||||
:func:`locale.getpreferredencoding(False) <locale.getpreferredencoding>`.
|
||||
``encoding="locale"`` can be used to specify the current locale's encoding
|
||||
explicitly. See :ref:`io-text-encoding` for more information.
|
||||
|
||||
*errors* is an optional string that specifies how encoding and decoding
|
||||
errors are to be handled. Pass ``'strict'`` to raise a :exc:`ValueError`
|
||||
|
@ -920,6 +998,9 @@ Text I/O
|
|||
locale encoding using :func:`locale.setlocale`, use the current locale
|
||||
encoding instead of the user preferred encoding.
|
||||
|
||||
.. versionchanged:: 3.10
|
||||
The *encoding* argument now supports the ``"locale"`` dummy encoding name.
|
||||
|
||||
:class:`TextIOWrapper` provides these data attributes and methods in
|
||||
addition to those from :class:`TextIOBase` and :class:`IOBase`:
|
||||
|
||||
|
|
|
@ -453,6 +453,9 @@ Miscellaneous options
|
|||
* ``-X pycache_prefix=PATH`` enables writing ``.pyc`` files to a parallel
|
||||
tree rooted at the given directory instead of to the code tree. See also
|
||||
:envvar:`PYTHONPYCACHEPREFIX`.
|
||||
* ``-X warn_default_encoding`` issues a :class:`EncodingWarning` when the
|
||||
locale-specific default encoding is used for opening files.
|
||||
See also :envvar:`PYTHONWARNDEFAULTENCODING`.
|
||||
|
||||
It also allows passing arbitrary values and retrieving them through the
|
||||
:data:`sys._xoptions` dictionary.
|
||||
|
@ -482,6 +485,9 @@ Miscellaneous options
|
|||
|
||||
The ``-X showalloccount`` option has been removed.
|
||||
|
||||
.. versionadded:: 3.10
|
||||
The ``-X warn_default_encoding`` option.
|
||||
|
||||
.. deprecated-removed:: 3.9 3.10
|
||||
The ``-X oldparser`` option.
|
||||
|
||||
|
@ -907,6 +913,15 @@ conflict.
|
|||
|
||||
.. versionadded:: 3.7
|
||||
|
||||
.. envvar:: PYTHONWARNDEFAULTENCODING
|
||||
|
||||
If this environment variable is set to a non-empty string, issue a
|
||||
:class:`EncodingWarning` when the locale-specific default encoding is used.
|
||||
|
||||
See :ref:`io-encoding-warning` for details.
|
||||
|
||||
.. versionadded:: 3.10
|
||||
|
||||
|
||||
Debug-mode variables
|
||||
~~~~~~~~~~~~~~~~~~~~
|
||||
|
|
|
@ -454,6 +454,30 @@ For the full specification see :pep:`634`. Motivation and rationale
|
|||
are in :pep:`635`, and a longer tutorial is in :pep:`636`.
|
||||
|
||||
|
||||
.. _whatsnew310-pep597:
|
||||
|
||||
Optional ``EncodingWarning`` and ``encoding="locale"`` option
|
||||
-------------------------------------------------------------
|
||||
|
||||
The default encoding of :class:`TextIOWrapper` and :func:`open` is
|
||||
platform and locale dependent. Since UTF-8 is used on most Unix
|
||||
platforms, omitting ``encoding`` option when opening UTF-8 files
|
||||
(e.g. JSON, YAML, TOML, Markdown) is very common bug. For example::
|
||||
|
||||
# BUG: "rb" mode or encoding="utf-8" should be used.
|
||||
with open("data.json") as f:
|
||||
data = json.laod(f)
|
||||
|
||||
To find this type of bugs, optional ``EncodingWarning`` is added.
|
||||
It is emitted when :data:`sys.flags.warn_default_encoding <sys.flags>`
|
||||
is true and locale-specific default encoding is used.
|
||||
|
||||
``-X warn_default_encoding`` option and :envvar:`PYTHONWARNDEFAULTENCODING`
|
||||
are added to enable the warning.
|
||||
|
||||
See :ref:`io-text-encoding` for more information.
|
||||
|
||||
|
||||
New Features Related to Type Annotations
|
||||
========================================
|
||||
|
||||
|
|
|
@ -153,6 +153,7 @@ typedef struct PyConfig {
|
|||
PyWideStringList warnoptions;
|
||||
int site_import;
|
||||
int bytes_warning;
|
||||
int warn_default_encoding;
|
||||
int inspect;
|
||||
int interactive;
|
||||
int optimization_level;
|
||||
|
|
|
@ -102,6 +102,7 @@ typedef struct {
|
|||
int isolated; /* -I option */
|
||||
int use_environment; /* -E option */
|
||||
int dev_mode; /* -X dev and PYTHONDEVMODE */
|
||||
int warn_default_encoding; /* -X warn_default_encoding and PYTHONWARNDEFAULTENCODING */
|
||||
} _PyPreCmdline;
|
||||
|
||||
#define _PyPreCmdline_INIT \
|
||||
|
|
|
@ -146,6 +146,7 @@ PyAPI_DATA(PyObject *) PyExc_FutureWarning;
|
|||
PyAPI_DATA(PyObject *) PyExc_ImportWarning;
|
||||
PyAPI_DATA(PyObject *) PyExc_UnicodeWarning;
|
||||
PyAPI_DATA(PyObject *) PyExc_BytesWarning;
|
||||
PyAPI_DATA(PyObject *) PyExc_EncodingWarning;
|
||||
PyAPI_DATA(PyObject *) PyExc_ResourceWarning;
|
||||
|
||||
|
||||
|
|
47
Lib/_pyio.py
47
Lib/_pyio.py
|
@ -40,6 +40,29 @@ _IOBASE_EMITS_UNRAISABLE = (hasattr(sys, "gettotalrefcount") or sys.flags.dev_mo
|
|||
_CHECK_ERRORS = _IOBASE_EMITS_UNRAISABLE
|
||||
|
||||
|
||||
def text_encoding(encoding, stacklevel=2):
|
||||
"""
|
||||
A helper function to choose the text encoding.
|
||||
|
||||
When encoding is not None, just return it.
|
||||
Otherwise, return the default text encoding (i.e. "locale").
|
||||
|
||||
This function emits an EncodingWarning if *encoding* is None and
|
||||
sys.flags.warn_default_encoding is true.
|
||||
|
||||
This can be used in APIs with an encoding=None parameter
|
||||
that pass it to TextIOWrapper or open.
|
||||
However, please consider using encoding="utf-8" for new APIs.
|
||||
"""
|
||||
if encoding is None:
|
||||
encoding = "locale"
|
||||
if sys.flags.warn_default_encoding:
|
||||
import warnings
|
||||
warnings.warn("'encoding' argument not specified.",
|
||||
EncodingWarning, stacklevel + 1)
|
||||
return encoding
|
||||
|
||||
|
||||
def open(file, mode="r", buffering=-1, encoding=None, errors=None,
|
||||
newline=None, closefd=True, opener=None):
|
||||
|
||||
|
@ -248,6 +271,7 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None,
|
|||
result = buffer
|
||||
if binary:
|
||||
return result
|
||||
encoding = text_encoding(encoding)
|
||||
text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
|
||||
result = text
|
||||
text.mode = mode
|
||||
|
@ -2004,19 +2028,22 @@ class TextIOWrapper(TextIOBase):
|
|||
def __init__(self, buffer, encoding=None, errors=None, newline=None,
|
||||
line_buffering=False, write_through=False):
|
||||
self._check_newline(newline)
|
||||
if encoding is None:
|
||||
encoding = text_encoding(encoding)
|
||||
|
||||
if encoding == "locale":
|
||||
try:
|
||||
encoding = os.device_encoding(buffer.fileno())
|
||||
encoding = os.device_encoding(buffer.fileno()) or "locale"
|
||||
except (AttributeError, UnsupportedOperation):
|
||||
pass
|
||||
if encoding is None:
|
||||
try:
|
||||
import locale
|
||||
except ImportError:
|
||||
# Importing locale may fail if Python is being built
|
||||
encoding = "ascii"
|
||||
else:
|
||||
encoding = locale.getpreferredencoding(False)
|
||||
|
||||
if encoding == "locale":
|
||||
try:
|
||||
import locale
|
||||
except ImportError:
|
||||
# Importing locale may fail if Python is being built
|
||||
encoding = "utf-8"
|
||||
else:
|
||||
encoding = locale.getpreferredencoding(False)
|
||||
|
||||
if not isinstance(encoding, str):
|
||||
raise ValueError("invalid encoding: %r" % encoding)
|
||||
|
|
|
@ -311,6 +311,7 @@ def open(filename, mode="rb", compresslevel=9,
|
|||
binary_file = BZ2File(filename, bz_mode, compresslevel=compresslevel)
|
||||
|
||||
if "t" in mode:
|
||||
encoding = io.text_encoding(encoding)
|
||||
return io.TextIOWrapper(binary_file, encoding, errors, newline)
|
||||
else:
|
||||
return binary_file
|
||||
|
|
|
@ -690,6 +690,7 @@ class RawConfigParser(MutableMapping):
|
|||
"""
|
||||
if isinstance(filenames, (str, bytes, os.PathLike)):
|
||||
filenames = [filenames]
|
||||
encoding = io.text_encoding(encoding)
|
||||
read_ok = []
|
||||
for filename in filenames:
|
||||
try:
|
||||
|
|
|
@ -62,6 +62,7 @@ def open(filename, mode="rb", compresslevel=_COMPRESS_LEVEL_BEST,
|
|||
raise TypeError("filename must be a str or bytes object, or a file")
|
||||
|
||||
if "t" in mode:
|
||||
encoding = io.text_encoding(encoding)
|
||||
return io.TextIOWrapper(binary_file, encoding, errors, newline)
|
||||
else:
|
||||
return binary_file
|
||||
|
|
|
@ -54,7 +54,7 @@ import abc
|
|||
from _io import (DEFAULT_BUFFER_SIZE, BlockingIOError, UnsupportedOperation,
|
||||
open, open_code, FileIO, BytesIO, StringIO, BufferedReader,
|
||||
BufferedWriter, BufferedRWPair, BufferedRandom,
|
||||
IncrementalNewlineDecoder, TextIOWrapper)
|
||||
IncrementalNewlineDecoder, text_encoding, TextIOWrapper)
|
||||
|
||||
OpenWrapper = _io.open # for compatibility with _pyio
|
||||
|
||||
|
|
|
@ -302,6 +302,7 @@ def open(filename, mode="rb", *,
|
|||
preset=preset, filters=filters)
|
||||
|
||||
if "t" in mode:
|
||||
encoding = io.text_encoding(encoding)
|
||||
return io.TextIOWrapper(binary_file, encoding, errors, newline)
|
||||
else:
|
||||
return binary_file
|
||||
|
|
|
@ -1241,6 +1241,8 @@ class Path(PurePath):
|
|||
Open the file pointed by this path and return a file object, as
|
||||
the built-in open() function does.
|
||||
"""
|
||||
if "b" not in mode:
|
||||
encoding = io.text_encoding(encoding)
|
||||
return io.open(self, mode, buffering, encoding, errors, newline,
|
||||
opener=self._opener)
|
||||
|
||||
|
@ -1255,6 +1257,7 @@ class Path(PurePath):
|
|||
"""
|
||||
Open the file in text mode, read it, and close the file.
|
||||
"""
|
||||
encoding = io.text_encoding(encoding)
|
||||
with self.open(mode='r', encoding=encoding, errors=errors) as f:
|
||||
return f.read()
|
||||
|
||||
|
@ -1274,6 +1277,7 @@ class Path(PurePath):
|
|||
if not isinstance(data, str):
|
||||
raise TypeError('data must be str, not %s' %
|
||||
data.__class__.__name__)
|
||||
encoding = io.text_encoding(encoding)
|
||||
with self.open(mode='w', encoding=encoding, errors=errors, newline=newline) as f:
|
||||
return f.write(data)
|
||||
|
||||
|
|
|
@ -170,7 +170,9 @@ def addpackage(sitedir, name, known_paths):
|
|||
fullname = os.path.join(sitedir, name)
|
||||
_trace(f"Processing .pth file: {fullname!r}")
|
||||
try:
|
||||
f = io.TextIOWrapper(io.open_code(fullname))
|
||||
# locale encoding is not ideal especially on Windows. But we have used
|
||||
# it for a long time. setuptools uses the locale encoding too.
|
||||
f = io.TextIOWrapper(io.open_code(fullname), encoding="locale")
|
||||
except OSError:
|
||||
return
|
||||
with f:
|
||||
|
|
|
@ -693,7 +693,7 @@ def _use_posix_spawn():
|
|||
_USE_POSIX_SPAWN = _use_posix_spawn()
|
||||
|
||||
|
||||
class Popen(object):
|
||||
class Popen:
|
||||
""" Execute a child program in a new process.
|
||||
|
||||
For a complete description of the arguments see the Python documentation.
|
||||
|
@ -844,6 +844,13 @@ class Popen(object):
|
|||
|
||||
self.text_mode = encoding or errors or text or universal_newlines
|
||||
|
||||
# PEP 597: We suppress the EncodingWarning in subprocess module
|
||||
# for now (at Python 3.10), because we focus on files for now.
|
||||
# This will be changed to encoding = io.text_encoding(encoding)
|
||||
# in the future.
|
||||
if self.text_mode and encoding is None:
|
||||
self.encoding = encoding = "locale"
|
||||
|
||||
# How long to resume waiting on a child after the first ^C.
|
||||
# There is no right value for this. The purpose is to be polite
|
||||
# yet remain good for interactive users trying to exit a tool.
|
||||
|
|
|
@ -543,6 +543,9 @@ def NamedTemporaryFile(mode='w+b', buffering=-1, encoding=None,
|
|||
if _os.name == 'nt' and delete:
|
||||
flags |= _os.O_TEMPORARY
|
||||
|
||||
if "b" not in mode:
|
||||
encoding = _io.text_encoding(encoding)
|
||||
|
||||
(fd, name) = _mkstemp_inner(dir, prefix, suffix, flags, output_type)
|
||||
try:
|
||||
file = _io.open(fd, mode, buffering=buffering,
|
||||
|
@ -583,6 +586,9 @@ else:
|
|||
"""
|
||||
global _O_TMPFILE_WORKS
|
||||
|
||||
if "b" not in mode:
|
||||
encoding = _io.text_encoding(encoding)
|
||||
|
||||
prefix, suffix, dir, output_type = _sanitize_params(prefix, suffix, dir)
|
||||
|
||||
flags = _bin_openflags
|
||||
|
@ -638,6 +644,7 @@ class SpooledTemporaryFile:
|
|||
if 'b' in mode:
|
||||
self._file = _io.BytesIO()
|
||||
else:
|
||||
encoding = _io.text_encoding(encoding)
|
||||
self._file = _io.TextIOWrapper(_io.BytesIO(),
|
||||
encoding=encoding, errors=errors,
|
||||
newline=newline)
|
||||
|
|
|
@ -61,4 +61,5 @@ BaseException
|
|||
+-- ImportWarning
|
||||
+-- UnicodeWarning
|
||||
+-- BytesWarning
|
||||
+-- EncodingWarning
|
||||
+-- ResourceWarning
|
||||
|
|
|
@ -389,6 +389,7 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
|
|||
|
||||
'site_import': 1,
|
||||
'bytes_warning': 0,
|
||||
'warn_default_encoding': 0,
|
||||
'inspect': 0,
|
||||
'interactive': 0,
|
||||
'optimization_level': 0,
|
||||
|
|
|
@ -4249,6 +4249,29 @@ class MiscIOTest(unittest.TestCase):
|
|||
proc = assert_python_failure('-X', 'dev', '-c', code)
|
||||
self.assertEqual(proc.rc, 10, proc)
|
||||
|
||||
def test_check_encoding_warning(self):
|
||||
# PEP 597: Raise warning when encoding is not specified
|
||||
# and sys.flags.warn_default_encoding is set.
|
||||
mod = self.io.__name__
|
||||
filename = __file__
|
||||
code = textwrap.dedent(f'''\
|
||||
import sys
|
||||
from {mod} import open, TextIOWrapper
|
||||
import pathlib
|
||||
|
||||
with open({filename!r}) as f: # line 5
|
||||
pass
|
||||
|
||||
pathlib.Path({filename!r}).read_text() # line 8
|
||||
''')
|
||||
proc = assert_python_ok('-X', 'warn_default_encoding', '-c', code)
|
||||
warnings = proc.err.splitlines()
|
||||
self.assertEqual(len(warnings), 2)
|
||||
self.assertTrue(
|
||||
warnings[0].startswith(b"<string>:5: EncodingWarning: "))
|
||||
self.assertTrue(
|
||||
warnings[1].startswith(b"<string>:8: EncodingWarning: "))
|
||||
|
||||
|
||||
class CMiscIOTest(MiscIOTest):
|
||||
io = io
|
||||
|
|
|
@ -483,7 +483,8 @@ class CompatPickleTests(unittest.TestCase):
|
|||
if exc in (BlockingIOError,
|
||||
ResourceWarning,
|
||||
StopAsyncIteration,
|
||||
RecursionError):
|
||||
RecursionError,
|
||||
EncodingWarning):
|
||||
continue
|
||||
if exc is not OSError and issubclass(exc, OSError):
|
||||
self.assertEqual(reverse_mapping('builtins', name),
|
||||
|
|
|
@ -591,7 +591,8 @@ class SysModuleTest(unittest.TestCase):
|
|||
"inspect", "interactive", "optimize",
|
||||
"dont_write_bytecode", "no_user_site", "no_site",
|
||||
"ignore_environment", "verbose", "bytes_warning", "quiet",
|
||||
"hash_randomization", "isolated", "dev_mode", "utf8_mode")
|
||||
"hash_randomization", "isolated", "dev_mode", "utf8_mode",
|
||||
"warn_default_encoding")
|
||||
for attr in attrs:
|
||||
self.assertTrue(hasattr(sys.flags, attr), attr)
|
||||
attr_type = bool if attr == "dev_mode" else int
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
Implement :pep:`597`: Add ``EncodingWarning`` warning, ``-X
|
||||
warn_default_encoding`` option, :envvar:`PYTHONWARNDEFAULTENCODING`
|
||||
environment variable and ``encoding="locale"`` argument value.
|
|
@ -10,6 +10,7 @@
|
|||
#define PY_SSIZE_T_CLEAN
|
||||
#include "Python.h"
|
||||
#include "_iomodule.h"
|
||||
#include "pycore_pystate.h" // _PyInterpreterState_GET()
|
||||
|
||||
#ifdef HAVE_SYS_TYPES_H
|
||||
#include <sys/types.h>
|
||||
|
@ -33,6 +34,7 @@ PyObject *_PyIO_str_fileno = NULL;
|
|||
PyObject *_PyIO_str_flush = NULL;
|
||||
PyObject *_PyIO_str_getstate = NULL;
|
||||
PyObject *_PyIO_str_isatty = NULL;
|
||||
PyObject *_PyIO_str_locale = NULL;
|
||||
PyObject *_PyIO_str_newlines = NULL;
|
||||
PyObject *_PyIO_str_nl = NULL;
|
||||
PyObject *_PyIO_str_peek = NULL;
|
||||
|
@ -504,6 +506,43 @@ _io_open_impl(PyObject *module, PyObject *file, const char *mode,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
/*[clinic input]
|
||||
_io.text_encoding
|
||||
encoding: object
|
||||
stacklevel: int = 2
|
||||
/
|
||||
|
||||
A helper function to choose the text encoding.
|
||||
|
||||
When encoding is not None, just return it.
|
||||
Otherwise, return the default text encoding (i.e. "locale").
|
||||
|
||||
This function emits an EncodingWarning if encoding is None and
|
||||
sys.flags.warn_default_encoding is true.
|
||||
|
||||
This can be used in APIs with an encoding=None parameter.
|
||||
However, please consider using encoding="utf-8" for new APIs.
|
||||
[clinic start generated code]*/
|
||||
|
||||
static PyObject *
|
||||
_io_text_encoding_impl(PyObject *module, PyObject *encoding, int stacklevel)
|
||||
/*[clinic end generated code: output=91b2cfea6934cc0c input=bf70231213e2a7b4]*/
|
||||
{
|
||||
if (encoding == NULL || encoding == Py_None) {
|
||||
PyInterpreterState *interp = _PyInterpreterState_GET();
|
||||
if (_PyInterpreterState_GetConfig(interp)->warn_default_encoding) {
|
||||
PyErr_WarnEx(PyExc_EncodingWarning,
|
||||
"'encoding' argument not specified", stacklevel);
|
||||
}
|
||||
Py_INCREF(_PyIO_str_locale);
|
||||
return _PyIO_str_locale;
|
||||
}
|
||||
Py_INCREF(encoding);
|
||||
return encoding;
|
||||
}
|
||||
|
||||
|
||||
/*[clinic input]
|
||||
_io.open_code
|
||||
|
||||
|
@ -629,6 +668,7 @@ iomodule_free(PyObject *mod) {
|
|||
|
||||
static PyMethodDef module_methods[] = {
|
||||
_IO_OPEN_METHODDEF
|
||||
_IO_TEXT_ENCODING_METHODDEF
|
||||
_IO_OPEN_CODE_METHODDEF
|
||||
{NULL, NULL}
|
||||
};
|
||||
|
@ -747,6 +787,7 @@ PyInit__io(void)
|
|||
ADD_INTERNED(flush)
|
||||
ADD_INTERNED(getstate)
|
||||
ADD_INTERNED(isatty)
|
||||
ADD_INTERNED(locale)
|
||||
ADD_INTERNED(newlines)
|
||||
ADD_INTERNED(peek)
|
||||
ADD_INTERNED(read)
|
||||
|
|
|
@ -272,6 +272,52 @@ exit:
|
|||
return return_value;
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(_io_text_encoding__doc__,
|
||||
"text_encoding($module, encoding, stacklevel=2, /)\n"
|
||||
"--\n"
|
||||
"\n"
|
||||
"A helper function to choose the text encoding.\n"
|
||||
"\n"
|
||||
"When encoding is not None, just return it.\n"
|
||||
"Otherwise, return the default text encoding (i.e. \"locale\").\n"
|
||||
"\n"
|
||||
"This function emits an EncodingWarning if encoding is None and\n"
|
||||
"sys.flags.warn_default_encoding is true.\n"
|
||||
"\n"
|
||||
"This can be used in APIs with an encoding=None parameter.\n"
|
||||
"However, please consider using encoding=\"utf-8\" for new APIs.");
|
||||
|
||||
#define _IO_TEXT_ENCODING_METHODDEF \
|
||||
{"text_encoding", (PyCFunction)(void(*)(void))_io_text_encoding, METH_FASTCALL, _io_text_encoding__doc__},
|
||||
|
||||
static PyObject *
|
||||
_io_text_encoding_impl(PyObject *module, PyObject *encoding, int stacklevel);
|
||||
|
||||
static PyObject *
|
||||
_io_text_encoding(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
|
||||
{
|
||||
PyObject *return_value = NULL;
|
||||
PyObject *encoding;
|
||||
int stacklevel = 2;
|
||||
|
||||
if (!_PyArg_CheckPositional("text_encoding", nargs, 1, 2)) {
|
||||
goto exit;
|
||||
}
|
||||
encoding = args[0];
|
||||
if (nargs < 2) {
|
||||
goto skip_optional;
|
||||
}
|
||||
stacklevel = _PyLong_AsInt(args[1]);
|
||||
if (stacklevel == -1 && PyErr_Occurred()) {
|
||||
goto exit;
|
||||
}
|
||||
skip_optional:
|
||||
return_value = _io_text_encoding_impl(module, encoding, stacklevel);
|
||||
|
||||
exit:
|
||||
return return_value;
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(_io_open_code__doc__,
|
||||
"open_code($module, /, path)\n"
|
||||
"--\n"
|
||||
|
@ -313,4 +359,4 @@ _io_open_code(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObjec
|
|||
exit:
|
||||
return return_value;
|
||||
}
|
||||
/*[clinic end generated code: output=5c0dd7a262c30ebc input=a9049054013a1b77]*/
|
||||
/*[clinic end generated code: output=06e055d1d80b835d input=a9049054013a1b77]*/
|
||||
|
|
|
@ -1123,6 +1123,17 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
|
|||
self->encodefunc = NULL;
|
||||
self->b2cratio = 0.0;
|
||||
|
||||
if (encoding == NULL) {
|
||||
PyInterpreterState *interp = _PyInterpreterState_GET();
|
||||
if (_PyInterpreterState_GetConfig(interp)->warn_default_encoding) {
|
||||
PyErr_WarnEx(PyExc_EncodingWarning,
|
||||
"'encoding' argument not specified", 1);
|
||||
}
|
||||
}
|
||||
else if (strcmp(encoding, "locale") == 0) {
|
||||
encoding = NULL;
|
||||
}
|
||||
|
||||
if (encoding == NULL) {
|
||||
/* Try os.device_encoding(fileno) */
|
||||
PyObject *fileno;
|
||||
|
|
|
@ -2464,6 +2464,13 @@ SimpleExtendsException(PyExc_Warning, BytesWarning,
|
|||
"related to conversion from str or comparing to str.");
|
||||
|
||||
|
||||
/*
|
||||
* EncodingWarning extends Warning
|
||||
*/
|
||||
SimpleExtendsException(PyExc_Warning, EncodingWarning,
|
||||
"Base class for warnings about encodings.");
|
||||
|
||||
|
||||
/*
|
||||
* ResourceWarning extends Warning
|
||||
*/
|
||||
|
@ -2592,6 +2599,7 @@ _PyExc_Init(PyInterpreterState *interp)
|
|||
PRE_INIT(BufferError);
|
||||
PRE_INIT(Warning);
|
||||
PRE_INIT(UserWarning);
|
||||
PRE_INIT(EncodingWarning);
|
||||
PRE_INIT(DeprecationWarning);
|
||||
PRE_INIT(PendingDeprecationWarning);
|
||||
PRE_INIT(SyntaxWarning);
|
||||
|
@ -2731,6 +2739,7 @@ _PyBuiltins_AddExceptions(PyObject *bltinmod)
|
|||
POST_INIT(BufferError);
|
||||
POST_INIT(Warning);
|
||||
POST_INIT(UserWarning);
|
||||
POST_INIT(EncodingWarning);
|
||||
POST_INIT(DeprecationWarning);
|
||||
POST_INIT(PendingDeprecationWarning);
|
||||
POST_INIT(SyntaxWarning);
|
||||
|
|
|
@ -724,6 +724,7 @@ EXPORT_DATA(PyExc_BlockingIOError)
|
|||
EXPORT_DATA(PyExc_BrokenPipeError)
|
||||
EXPORT_DATA(PyExc_BufferError)
|
||||
EXPORT_DATA(PyExc_BytesWarning)
|
||||
EXPORT_DATA(PyExc_EncodingWarning)
|
||||
EXPORT_DATA(PyExc_ChildProcessError)
|
||||
EXPORT_DATA(PyExc_ConnectionAbortedError)
|
||||
EXPORT_DATA(PyExc_ConnectionError)
|
||||
|
|
|
@ -94,6 +94,7 @@ static const char usage_3[] = "\
|
|||
otherwise activate automatically)\n\
|
||||
-X pycache_prefix=PATH: enable writing .pyc files to a parallel tree rooted at the\n\
|
||||
given directory instead of to the code tree\n\
|
||||
-X warn_default_encoding: enable opt-in EncodingWarning for 'encoding=None'\n\
|
||||
\n\
|
||||
--check-hash-based-pycs always|default|never:\n\
|
||||
control how Python invalidates hash-based .pyc files\n\
|
||||
|
@ -129,7 +130,8 @@ static const char usage_6[] =
|
|||
"PYTHONBREAKPOINT: if this variable is set to 0, it disables the default\n"
|
||||
" debugger. It can be set to the callable of your debugger of choice.\n"
|
||||
"PYTHONDEVMODE: enable the development mode.\n"
|
||||
"PYTHONPYCACHEPREFIX: root directory for bytecode cache (pyc) files.\n";
|
||||
"PYTHONPYCACHEPREFIX: root directory for bytecode cache (pyc) files.\n"
|
||||
"PYTHONWARNDEFAULTENCODING: enable opt-in EncodingWarning for 'encoding=None'.\n";
|
||||
|
||||
#if defined(MS_WINDOWS)
|
||||
# define PYTHONHOMEHELP "<prefix>\\python{major}{minor}"
|
||||
|
@ -600,6 +602,7 @@ config_check_consistency(const PyConfig *config)
|
|||
assert(config->malloc_stats >= 0);
|
||||
assert(config->site_import >= 0);
|
||||
assert(config->bytes_warning >= 0);
|
||||
assert(config->warn_default_encoding >= 0);
|
||||
assert(config->inspect >= 0);
|
||||
assert(config->interactive >= 0);
|
||||
assert(config->optimization_level >= 0);
|
||||
|
@ -698,6 +701,7 @@ _PyConfig_InitCompatConfig(PyConfig *config)
|
|||
config->parse_argv = 0;
|
||||
config->site_import = -1;
|
||||
config->bytes_warning = -1;
|
||||
config->warn_default_encoding = 0;
|
||||
config->inspect = -1;
|
||||
config->interactive = -1;
|
||||
config->optimization_level = -1;
|
||||
|
@ -906,6 +910,7 @@ _PyConfig_Copy(PyConfig *config, const PyConfig *config2)
|
|||
|
||||
COPY_ATTR(site_import);
|
||||
COPY_ATTR(bytes_warning);
|
||||
COPY_ATTR(warn_default_encoding);
|
||||
COPY_ATTR(inspect);
|
||||
COPY_ATTR(interactive);
|
||||
COPY_ATTR(optimization_level);
|
||||
|
@ -1007,6 +1012,7 @@ _PyConfig_AsDict(const PyConfig *config)
|
|||
SET_ITEM_WSTR(platlibdir);
|
||||
SET_ITEM_INT(site_import);
|
||||
SET_ITEM_INT(bytes_warning);
|
||||
SET_ITEM_INT(warn_default_encoding);
|
||||
SET_ITEM_INT(inspect);
|
||||
SET_ITEM_INT(interactive);
|
||||
SET_ITEM_INT(optimization_level);
|
||||
|
@ -1271,6 +1277,7 @@ _PyConfig_FromDict(PyConfig *config, PyObject *dict)
|
|||
GET_WSTRLIST(warnoptions);
|
||||
GET_UINT(site_import);
|
||||
GET_UINT(bytes_warning);
|
||||
GET_UINT(warn_default_encoding);
|
||||
GET_UINT(inspect);
|
||||
GET_UINT(interactive);
|
||||
GET_UINT(optimization_level);
|
||||
|
|
|
@ -169,6 +169,7 @@ _PyPreCmdline_SetConfig(const _PyPreCmdline *cmdline, PyConfig *config)
|
|||
COPY_ATTR(isolated);
|
||||
COPY_ATTR(use_environment);
|
||||
COPY_ATTR(dev_mode);
|
||||
COPY_ATTR(warn_default_encoding);
|
||||
return _PyStatus_OK();
|
||||
|
||||
#undef COPY_ATTR
|
||||
|
@ -257,9 +258,17 @@ _PyPreCmdline_Read(_PyPreCmdline *cmdline, const PyPreConfig *preconfig)
|
|||
cmdline->dev_mode = 0;
|
||||
}
|
||||
|
||||
// warn_default_encoding
|
||||
if (_Py_get_xoption(&cmdline->xoptions, L"warn_default_encoding")
|
||||
|| _Py_GetEnv(cmdline->use_environment, "PYTHONWARNDEFAULTENCODING"))
|
||||
{
|
||||
cmdline->warn_default_encoding = 1;
|
||||
}
|
||||
|
||||
assert(cmdline->use_environment >= 0);
|
||||
assert(cmdline->isolated >= 0);
|
||||
assert(cmdline->dev_mode >= 0);
|
||||
assert(cmdline->warn_default_encoding >= 0);
|
||||
|
||||
return _PyStatus_OK();
|
||||
}
|
||||
|
|
|
@ -2514,6 +2514,7 @@ static PyStructSequence_Field flags_fields[] = {
|
|||
{"isolated", "-I"},
|
||||
{"dev_mode", "-X dev"},
|
||||
{"utf8_mode", "-X utf8"},
|
||||
{"warn_default_encoding", "-X warn_default_encoding"},
|
||||
{0}
|
||||
};
|
||||
|
||||
|
@ -2521,7 +2522,7 @@ static PyStructSequence_Desc flags_desc = {
|
|||
"sys.flags", /* name */
|
||||
flags__doc__, /* doc */
|
||||
flags_fields, /* fields */
|
||||
15
|
||||
16
|
||||
};
|
||||
|
||||
static int
|
||||
|
@ -2560,6 +2561,7 @@ set_flags_from_config(PyInterpreterState *interp, PyObject *flags)
|
|||
SetFlag(config->isolated);
|
||||
SetFlagObj(PyBool_FromLong(config->dev_mode));
|
||||
SetFlag(preconfig->utf8_mode);
|
||||
SetFlag(config->warn_default_encoding);
|
||||
#undef SetFlagObj
|
||||
#undef SetFlag
|
||||
return 0;
|
||||
|
|
Loading…
Reference in New Issue