From f7351b40b54b3354c4a8b01d9072b888256fca87 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 13 Apr 2010 11:09:22 +0000 Subject: [PATCH] Merged revisions 80031 via svnmerge from svn+ssh://pythondev@svn.python.org/python/branches/py3k ........ r80031 | victor.stinner | 2010-04-13 13:07:24 +0200 (mar., 13 avril 2010) | 4 lines Issue #8383: pickle and pickletools use surrogatepass error handler when encoding unicode as utf8 to support lone surrogates and stay compatible with Python 2.x and 3.0 ........ --- Lib/pickle.py | 4 ++-- Lib/pickletools.py | 2 +- Lib/test/pickletester.py | 4 +++- Misc/NEWS | 4 ++++ Modules/_pickle.c | 6 ++++-- 5 files changed, 14 insertions(+), 6 deletions(-) diff --git a/Lib/pickle.py b/Lib/pickle.py index 7af4ce969a4..7b48527d3a1 100644 --- a/Lib/pickle.py +++ b/Lib/pickle.py @@ -499,7 +499,7 @@ class _Pickler: def save_str(self, obj, pack=struct.pack): if self.bin: - encoded = obj.encode('utf-8') + encoded = obj.encode('utf-8', 'surrogatepass') n = len(encoded) self.write(BINUNICODE + pack("', '<\\\u1234>', '<\n>', - '<\\>', '<\\\U00012345>'] + '<\\>', '<\\\U00012345>', + # surrogates + '<\udc80>'] for proto in protocols: for u in endcases: p = self.dumps(u, proto) diff --git a/Misc/NEWS b/Misc/NEWS index b829138f392..d72ce12aaf9 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -28,6 +28,10 @@ Core and Builtins Library ------- +- Issue #8383: pickle and pickletools use surrogatepass error handler when + encoding unicode as utf8 to support lone surrogates and stay compatible with + Python 2.x and 3.0 + - Issue #8179: Fix macpath.realpath() on a non-existing path. - Issue #8139: ossaudiodev didn't initialize its types properly, therefore diff --git a/Modules/_pickle.c b/Modules/_pickle.c index 29aed7adb3b..0e1c2cdc882 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -1227,7 +1227,9 @@ save_unicode(PicklerObject *self, PyObject *obj) if (self->bin) { char pdata[5]; - encoded = PyUnicode_AsUTF8String(obj); + encoded = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(obj), + PyUnicode_GET_SIZE(obj), + "surrogatepass"); if (encoded == NULL) goto error; @@ -3352,7 +3354,7 @@ load_binunicode(UnpicklerObject *self) if (unpickler_read(self, &s, size) < 0) return -1; - str = PyUnicode_DecodeUTF8(s, size, NULL); + str = PyUnicode_DecodeUTF8(s, size, "surrogatepass"); if (str == NULL) return -1;