From 7143029d4360637aadbd7ddf386ea5c64fb83095 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 18 Mar 2014 01:18:21 +0100 Subject: [PATCH] Issue #19977: When the ``LC_TYPE`` locale is the POSIX locale (``C`` locale), :py:data:`sys.stdin` and :py:data:`sys.stdout` are now using the ``surrogateescape`` error handler, instead of the ``strict`` error handler. --- Doc/whatsnew/3.5.rst | 5 ++++- Lib/test/test_sys.py | 44 ++++++++++++++++++++++++++++++++++++++++++++ Misc/NEWS | 4 ++++ Python/pythonrun.c | 11 ++++++++++- 4 files changed, 62 insertions(+), 2 deletions(-) diff --git a/Doc/whatsnew/3.5.rst b/Doc/whatsnew/3.5.rst index 176160b3ed1..2c044ae6c83 100644 --- a/Doc/whatsnew/3.5.rst +++ b/Doc/whatsnew/3.5.rst @@ -79,7 +79,10 @@ New built-in features: Implementation improvements: -* None yet. +* When the ``LC_TYPE`` locale is the POSIX locale (``C`` locale), + :py:data:`sys.stdin` and :py:data:`sys.stdout` are now using the + ``surrogateescape`` error handler, instead of the ``strict`` error handler + (:issue:`19977`). Significantly Improved Library Modules: diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 5a9699ff204..f3d0b42be24 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -615,6 +615,50 @@ class SysModuleTest(unittest.TestCase): expected = None self.check_fsencoding(fs_encoding, expected) + @unittest.skipIf(sys.platform == 'win32', + 'test specific to UNIX') + def test_c_locale_surrogateescape(self): + # Force the POSIX locale + env = os.environ.copy() + env["LC_ALL"] = "C" + code = '\n'.join(( + 'import codecs, sys', + 'def dump(name):', + ' std = getattr(sys, name)', + ' encoding = codecs.lookup(std.encoding).name', + ' print("%s: %s:%s" % (name, encoding, std.errors))', + 'dump("stdin")', + 'dump("stdout")', + 'dump("stderr")', + )) + p = subprocess.Popen([sys.executable, "-I", "-c", code], + stdout=subprocess.PIPE, env=env) + out = p.communicate()[0] + self.assertEqual(out, + b'stdin: ascii:surrogateescape\n' + b'stdout: ascii:surrogateescape\n' + b'stderr: ascii:backslashreplace\n') + + # replace the default error handler + env['PYTHONIOENCODING'] = ':strict' + p = subprocess.Popen([sys.executable, "-c", code], + stdout=subprocess.PIPE, env=env) + out = p.communicate()[0] + self.assertEqual(out, + b'stdin: ascii:strict\n' + b'stdout: ascii:strict\n' + b'stderr: ascii:backslashreplace\n') + + # force the encoding + env['PYTHONIOENCODING'] = 'iso8859-1' + p = subprocess.Popen([sys.executable, "-c", code], + stdout=subprocess.PIPE, env=env) + out = p.communicate()[0] + self.assertEqual(out, + b'stdin: iso8859-1:surrogateescape\n' + b'stdout: iso8859-1:surrogateescape\n' + b'stderr: iso8859-1:backslashreplace\n') + def test_implementation(self): # This test applies to all implementations equally. diff --git a/Misc/NEWS b/Misc/NEWS index 5946bc99f38..2072204267d 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -13,6 +13,10 @@ Core and Builtins Library ------- +- Issue #19977: When the ``LC_TYPE`` locale is the POSIX locale (``C`` locale), + :py:data:`sys.stdin` and :py:data:`sys.stdout` are now using the + ``surrogateescape`` error handler, instead of the ``strict`` error handler. + - Issue #20574: Implement incremental decoder for cp65001 code (Windows code page 65001, Microsoft UTF-8). diff --git a/Python/pythonrun.c b/Python/pythonrun.c index e9947e9ff68..bb9f425fb1f 100644 --- a/Python/pythonrun.c +++ b/Python/pythonrun.c @@ -1156,6 +1156,15 @@ initstdio(void) encoding = _Py_StandardStreamEncoding; errors = _Py_StandardStreamErrors; if (!encoding || !errors) { + if (!errors) { + /* When the LC_CTYPE locale is the POSIX locale ("C locale"), + stdin and stdout use the surrogateescape error handler by + default, instead of the strict error handler. */ + char *loc = setlocale(LC_CTYPE, NULL); + if (loc != NULL && strcmp(loc, "C") == 0) + errors = "surrogateescape"; + } + pythonioencoding = Py_GETENV("PYTHONIOENCODING"); if (pythonioencoding) { char *err; @@ -1168,7 +1177,7 @@ initstdio(void) if (err) { *err = '\0'; err++; - if (*err && !errors) { + if (*err && !_Py_StandardStreamErrors) { errors = err; } }