From 2e71d014ea9e22c846b459e6420e5ef2c726ac76 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 17 May 2010 09:35:44 +0000 Subject: [PATCH] Merged revisions 81250-81253 via svnmerge from svn+ssh://pythondev@svn.python.org/python/branches/py3k ........ r81250 | victor.stinner | 2010-05-17 03:13:37 +0200 (lun., 17 mai 2010) | 2 lines Issue #6697: Fix a crash if code of "python -c code" contains surrogates ........ r81251 | victor.stinner | 2010-05-17 03:26:01 +0200 (lun., 17 mai 2010) | 3 lines PyObject_Dump() encodes unicode objects to utf8 with backslashreplace (instead of strict) error handler to escape surrogates ........ r81252 | victor.stinner | 2010-05-17 10:58:51 +0200 (lun., 17 mai 2010) | 6 lines handle_system_exit() flushs files to warranty the output order PyObject_Print() writes into the C object stderr, whereas PySys_WriteStderr() writes into the Python object sys.stderr. Each object has its own buffer, so call sys.stderr.flush() and fflush(stderr). ........ r81253 | victor.stinner | 2010-05-17 11:33:42 +0200 (lun., 17 mai 2010) | 6 lines Fix refleak in internal_print() introduced by myself in r81251 _PyUnicode_AsDefaultEncodedString() uses a magical PyUnicode attribute to automatically destroy PyUnicode_EncodeUTF8() result when the unicode string is destroyed. ........ --- Lib/test/test_sys.py | 40 +++++++++++++++++++++++++++++++++++++++- Misc/NEWS | 3 +++ Modules/main.c | 12 ++++++++---- Objects/object.c | 5 ++++- Python/pythonrun.c | 4 ++++ 5 files changed, 58 insertions(+), 6 deletions(-) diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 2886e77c560..b3a06763289 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -79,6 +79,8 @@ class SysModuleTest(unittest.TestCase): # Python/pythonrun.c::PyErr_PrintEx() is tricky. def test_exit(self): + import subprocess + self.assertRaises(TypeError, sys.exit, 42, 42) # call without argument @@ -133,11 +135,29 @@ class SysModuleTest(unittest.TestCase): self.fail("no exception") # test that the exit machinery handles SystemExits properly - import subprocess rc = subprocess.call([sys.executable, "-c", "raise SystemExit(47)"]) self.assertEqual(rc, 47) + def check_exit_message(code, expected): + process = subprocess.Popen([sys.executable, "-c", code], + stderr=subprocess.PIPE) + stdout, stderr = process.communicate() + self.assertEqual(process.returncode, 1) + self.assertTrue(stderr.startswith(expected), stderr) + + # test that stderr buffer if flushed before the exit message is written + # into stderr + check_exit_message( + r'import sys; sys.stderr.write("unflushed,"); sys.exit("message")', + b"unflushed,message") + + # test that the exit message is written with backslashreplace error + # handler to stderr + check_exit_message( + r'import sys; sys.exit("surrogates:\uDCFF")', + b"surrogates:\\udcff") + def test_getdefaultencoding(self): self.assertRaises(TypeError, sys.getdefaultencoding, 42) # can't check more than the type, as the user might have changed it @@ -403,6 +423,24 @@ class SysModuleTest(unittest.TestCase): self.assertRaises(TypeError, sys.intern, S("abc")) + def test_main_invalid_unicode(self): + import locale + non_decodable = b"\xff" + encoding = locale.getpreferredencoding() + try: + non_decodable.decode(encoding) + except UnicodeDecodeError: + pass + else: + self.skipTest('%r is decodable with encoding %s' + % (non_decodable, encoding)) + code = b'print("' + non_decodable + b'")' + p = subprocess.Popen([sys.executable, "-c", code], stderr=subprocess.PIPE) + stdout, stderr = p.communicate() + self.assertEqual(p.returncode, 1) + self.assert_(stderr.startswith(b"UnicodeEncodeError: " + b"'utf-8' codec can't encode character '\\udcff' in " + b"position 7: surrogates not allowed"), stderr) def test_sys_flags(self): self.assertTrue(sys.flags) diff --git a/Misc/NEWS b/Misc/NEWS index 82a13d98eb4..cd196cf9745 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -12,6 +12,9 @@ What's New in Python 3.1.3? Core and Builtins ----------------- +- PyObject_Dump() encodes unicode objects to utf8 with backslashreplace + (instead of strict) error handler to escape surrogates + - Issue #8124: PySys_WriteStdout() and PySys_WriteStderr() don't execute indirectly Python signal handlers anymore because mywrite() ignores exceptions (KeyboardInterrupt) diff --git a/Modules/main.c b/Modules/main.c index eb44aa98865..4dcc32da7b3 100644 --- a/Modules/main.c +++ b/Modules/main.c @@ -516,18 +516,22 @@ Py_Main(int argc, wchar_t **argv) } if (command) { + char *commandStr; PyObject *commandObj = PyUnicode_FromWideChar( command, wcslen(command)); free(command); - if (commandObj != NULL) { - sts = PyRun_SimpleStringFlags( - _PyUnicode_AsString(commandObj), &cf) != 0; + if (commandObj != NULL) + commandStr = _PyUnicode_AsString(commandObj); + else + commandStr = NULL; + if (commandStr != NULL) { + sts = PyRun_SimpleStringFlags(commandStr, &cf) != 0; + Py_DECREF(commandObj); } else { PyErr_Print(); sts = 1; } - Py_DECREF(commandObj); } else if (module) { sts = RunModule(module, 1); } diff --git a/Objects/object.c b/Objects/object.c index 1b8d4e1f42c..b2a549252a8 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -303,12 +303,15 @@ internal_print(PyObject *op, FILE *fp, int flags, int nesting) } else if (PyUnicode_Check(s)) { PyObject *t; - t = _PyUnicode_AsDefaultEncodedString(s, NULL); + t = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(s), + PyUnicode_GET_SIZE(s), + "backslashreplace"); if (t == NULL) ret = 0; else { fwrite(PyBytes_AS_STRING(t), 1, PyBytes_GET_SIZE(t), fp); + Py_DECREF(t); } } else { diff --git a/Python/pythonrun.c b/Python/pythonrun.c index 031d66f5179..ec2c6caea3f 100644 --- a/Python/pythonrun.c +++ b/Python/pythonrun.c @@ -1353,7 +1353,11 @@ handle_system_exit(void) if (PyLong_Check(value)) exitcode = (int)PyLong_AsLong(value); else { + PyObject *sys_stderr = PySys_GetObject("stderr"); + if (sys_stderr != NULL) + PyObject_CallMethod(sys_stderr, "flush", NULL); PyObject_Print(value, stderr, Py_PRINT_RAW); + fflush(stderr); PySys_WriteStderr("\n"); exitcode = 1; }