From 372ac5e73260d6e8c8aefe31fd979a7706841868 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 17 May 2010 01:26:01 +0000 Subject: [PATCH] PyObject_Dump() encodes unicode objects to utf8 with backslashreplace (instead of strict) error handler to escape surrogates --- Lib/test/test_sys.py | 10 ++++++++++ Misc/NEWS | 3 +++ Objects/object.c | 4 +++- 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index abbd7592c72..ecbc9db2ec8 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -145,6 +145,16 @@ class SysModuleTest(unittest.TestCase): "raise SystemExit(47)"]) self.assertEqual(rc, 47) + # test that the exit message is written with backslashreplace error + # handler to stderr + import subprocess + code = r'import sys; sys.exit("surrogates:\uDCFF")' + process = subprocess.Popen([sys.executable, "-c", code], + stderr=subprocess.PIPE) + stdout, stderr = process.communicate() + self.assertEqual(process.returncode, 1) + self.assertTrue(stderr.startswith(b"surrogates:\\udcff"), stderr) + def test_getdefaultencoding(self): self.assertRaises(TypeError, sys.getdefaultencoding, 42) # can't check more than the type, as the user might have changed it diff --git a/Misc/NEWS b/Misc/NEWS index f801e3e39c6..6b3842510db 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -12,6 +12,9 @@ What's New in Python 3.2 Alpha 1? Core and Builtins ----------------- +- PyObject_Dump() encodes unicode objects to utf8 with backslashreplace + (instead of strict) error handler to escape surrogates + - Issue #8715: Create PyUnicode_EncodeFSDefault() function: Encode a Unicode object to Py_FileSystemDefaultEncoding with the "surrogateescape" error handler, and return bytes. If Py_FileSystemDefaultEncoding is not set, fall diff --git a/Objects/object.c b/Objects/object.c index 8ddc7ec78c0..1f4e3dd445f 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -303,7 +303,9 @@ internal_print(PyObject *op, FILE *fp, int flags, int nesting) } else if (PyUnicode_Check(s)) { PyObject *t; - t = _PyUnicode_AsDefaultEncodedString(s, NULL); + t = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(s), + PyUnicode_GET_SIZE(s), + "backslashreplace"); if (t == NULL) ret = 0; else {