From 2515a28230b1a011205f30263da6b01c6bd167a3 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 30 Jun 2020 03:18:22 +0300 Subject: [PATCH] bpo-41152: IDLE: always use UTF-8 for standard IO streams (GH-21214) --- Lib/idlelib/NEWS.txt | 3 ++ Lib/idlelib/idle_test/test_outwin.py | 5 -- Lib/idlelib/iomenu.py | 48 ++----------------- Lib/idlelib/outwin.py | 4 +- .../2020-06-29-14-51-15.bpo-41152.d6mV0C.rst | 2 + 5 files changed, 10 insertions(+), 52 deletions(-) create mode 100644 Misc/NEWS.d/next/IDLE/2020-06-29-14-51-15.bpo-41152.d6mV0C.rst diff --git a/Lib/idlelib/NEWS.txt b/Lib/idlelib/NEWS.txt index c270fcbae2b..7ae29af0b30 100644 --- a/Lib/idlelib/NEWS.txt +++ b/Lib/idlelib/NEWS.txt @@ -3,6 +3,9 @@ Released on 2020-10-05? ====================================== +bpo-41152: The encoding of ``stdin``, ``stdout`` and ``stderr`` in IDLE +is now always UTF-8. + bpo-41144: Make Open Module open a special module such as os.path. bpo-40723: Make test_idle pass when run after import. diff --git a/Lib/idlelib/idle_test/test_outwin.py b/Lib/idlelib/idle_test/test_outwin.py index cd099ecd841..e347bfca7f1 100644 --- a/Lib/idlelib/idle_test/test_outwin.py +++ b/Lib/idlelib/idle_test/test_outwin.py @@ -58,11 +58,6 @@ class OutputWindowTest(unittest.TestCase): get = self.text.get write = self.window.write - # Test bytes. - b = b'Test bytes.' - eq(write(b), len(b)) - eq(get('1.0', '1.end'), b.decode()) - # No new line - insert stays on same line. delete('1.0', 'end') test_text = 'test text' diff --git a/Lib/idlelib/iomenu.py b/Lib/idlelib/iomenu.py index 4b2833b8ca5..7f3f656ee28 100644 --- a/Lib/idlelib/iomenu.py +++ b/Lib/idlelib/iomenu.py @@ -13,52 +13,12 @@ from tkinter.simpledialog import askstring import idlelib from idlelib.config import idleConf -if idlelib.testing: # Set True by test.test_idle to avoid setlocale. - encoding = 'utf-8' - errors = 'surrogateescape' +encoding = 'utf-8' +if sys.platform == 'win32': + errors = 'surrogatepass' else: - # Try setting the locale, so that we can find out - # what encoding to use - try: - import locale - locale.setlocale(locale.LC_CTYPE, "") - except (ImportError, locale.Error): - pass + errors = 'surrogateescape' - if sys.platform == 'win32': - encoding = 'utf-8' - errors = 'surrogateescape' - else: - try: - # Different things can fail here: the locale module may not be - # loaded, it may not offer nl_langinfo, or CODESET, or the - # resulting codeset may be unknown to Python. We ignore all - # these problems, falling back to ASCII - locale_encoding = locale.nl_langinfo(locale.CODESET) - if locale_encoding: - codecs.lookup(locale_encoding) - except (NameError, AttributeError, LookupError): - # Try getdefaultlocale: it parses environment variables, - # which may give a clue. Unfortunately, getdefaultlocale has - # bugs that can cause ValueError. - try: - locale_encoding = locale.getdefaultlocale()[1] - if locale_encoding: - codecs.lookup(locale_encoding) - except (ValueError, LookupError): - pass - - if locale_encoding: - encoding = locale_encoding.lower() - errors = 'strict' - else: - # POSIX locale or macOS - encoding = 'ascii' - errors = 'surrogateescape' - # Encoding is used in multiple files; locale_encoding nowhere. - # The only use of 'encoding' below is in _decode as initial value - # of deprecated block asking user for encoding. - # Perhaps use elsewhere should be reviewed. coding_re = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII) blank_re = re.compile(r'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII) diff --git a/Lib/idlelib/outwin.py b/Lib/idlelib/outwin.py index 90272b6feb4..5ab08bbaf4b 100644 --- a/Lib/idlelib/outwin.py +++ b/Lib/idlelib/outwin.py @@ -6,7 +6,6 @@ import re from tkinter import messagebox from idlelib.editor import EditorWindow -from idlelib import iomenu file_line_pats = [ @@ -110,8 +109,7 @@ class OutputWindow(EditorWindow): Return: Length of text inserted. """ - if isinstance(s, bytes): - s = s.decode(iomenu.encoding, "replace") + assert isinstance(s, str) self.text.insert(mark, s, tags) self.text.see(mark) self.text.update() diff --git a/Misc/NEWS.d/next/IDLE/2020-06-29-14-51-15.bpo-41152.d6mV0C.rst b/Misc/NEWS.d/next/IDLE/2020-06-29-14-51-15.bpo-41152.d6mV0C.rst new file mode 100644 index 00000000000..434be10b530 --- /dev/null +++ b/Misc/NEWS.d/next/IDLE/2020-06-29-14-51-15.bpo-41152.d6mV0C.rst @@ -0,0 +1,2 @@ +The encoding of ``stdin``, ``stdout`` and ``stderr`` in IDLE is now always +UTF-8.