From bbbac2ec34e99c24d7bc0eedbcc138c5f4551d48 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 7 Feb 2013 23:12:46 +0100 Subject: [PATCH] Issue #17137: When an Unicode string is resized, the internal wide character string (wstr) format is now cleared. --- Lib/test/test_unicode.py | 15 +++++++++++++++ Misc/NEWS | 3 +++ Objects/unicodeobject.c | 4 ++++ 3 files changed, 22 insertions(+) diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 8fccab3cb8b..f7d86865954 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -2167,6 +2167,21 @@ class UnicodeTest(string_tests.CommonTest, self.assertEqual(args[0], text) self.assertEqual(len(args), 1) + def test_resize(self): + for length in range(1, 100, 7): + # generate a fresh string (refcount=1) + text = 'a' * length + 'b' + + # fill wstr internal field + abc = text.encode('unicode_internal') + self.assertEqual(abc.decode('unicode_internal'), text) + + # resize text: wstr field must be cleared and then recomputed + text += 'c' + abcdef = text.encode('unicode_internal') + self.assertNotEqual(abc, abcdef) + self.assertEqual(abcdef.decode('unicode_internal'), text) + class StringModuleTest(unittest.TestCase): def test_formatter_parser(self): diff --git a/Misc/NEWS b/Misc/NEWS index 02ec3399e7a..39a79c206fc 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -12,6 +12,9 @@ What's New in Python 3.3.1? Core and Builtins ----------------- +- Issue #17137: When an Unicode string is resized, the internal wide character + string (wstr) format is now cleared. + - Issue #17043: The unicode-internal decoder no longer read past the end of input buffer. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index abe793dfd48..51160f852d2 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -702,6 +702,10 @@ resize_compact(PyObject *unicode, Py_ssize_t length) if (!PyUnicode_IS_ASCII(unicode)) _PyUnicode_WSTR_LENGTH(unicode) = length; } + else if (_PyUnicode_HAS_WSTR_MEMORY(unicode)) { + PyObject_DEL(_PyUnicode_WSTR(unicode)); + _PyUnicode_WSTR(unicode) = NULL; + } PyUnicode_WRITE(PyUnicode_KIND(unicode), PyUnicode_DATA(unicode), length, 0); assert(_PyUnicode_CheckConsistency(unicode, 0));