From 84ec8d931404f4f9037242ec933fdcdcd4870114 Mon Sep 17 00:00:00 2001 From: Amaury Forgeot d'Arc Date: Mon, 29 Jun 2009 22:36:49 +0000 Subject: [PATCH] #6373: SystemError in str.encode('latin1', 'surrogateescape') if the string contains unpaired surrogates. (In debug build, crash in assert()) This can happen with normal processing, if python starts with utf-8, then calls sys.setfilesystemencoding('latin-1') --- Lib/test/test_codecs.py | 5 +++++ Misc/NEWS | 4 ++++ Objects/unicodeobject.c | 2 ++ 3 files changed, 11 insertions(+) diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index 4ec7b5865cc..e060471d4b0 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -1549,6 +1549,11 @@ class SurrogateEscapeTest(unittest.TestCase): self.assertEqual("foo\udca5bar".encode("iso-8859-3", "surrogateescape"), b"foo\xa5bar") + def test_latin1(self): + # Issue6373 + self.assertEqual("\udce4\udceb\udcef\udcf6\udcfc".encode("latin1", "surrogateescape"), + b"\xe4\xeb\xef\xf6\xfc") + def test_main(): support.run_unittest( diff --git a/Misc/NEWS b/Misc/NEWS index 2381844249c..98539e53173 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -12,6 +12,10 @@ What's New in Python 3.2 Alpha 1? Core and Builtins ----------------- +- Issue #6373: Fixed a RuntimeError when encoding with the latin-1 codec and + the 'surrogateescape' error handler, a string which contains unpaired + surrogates. + - Issue #4856: Remove checks for win NT. Library diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 0d4a3ddd806..305289bc78c 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -4201,10 +4201,12 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p, repsize = PyBytes_Size(repunicode); if (repsize > 1) { /* Make room for all additional bytes. */ + respos = str - PyBytes_AS_STRING(res); if (_PyBytes_Resize(&res, ressize+repsize-1)) { Py_DECREF(repunicode); goto onError; } + str = PyBytes_AS_STRING(res) + respos; ressize += repsize-1; } memcpy(str, PyBytes_AsString(repunicode), repsize);