diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 2d42aac5ad4..b5e08873bc6 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -2191,6 +2191,21 @@ class UnicodeTest(string_tests.CommonTest, self.assertEqual(args[0], text) self.assertEqual(len(args), 1) + def test_resize(self): + for length in range(1, 100, 7): + # generate a fresh string (refcount=1) + text = 'a' * length + 'b' + + # fill wstr internal field + abc = text.encode('unicode_internal') + self.assertEqual(abc.decode('unicode_internal'), text) + + # resize text: wstr field must be cleared and then recomputed + text += 'c' + abcdef = text.encode('unicode_internal') + self.assertNotEqual(abc, abcdef) + self.assertEqual(abcdef.decode('unicode_internal'), text) + class StringModuleTest(unittest.TestCase): def test_formatter_parser(self): diff --git a/Misc/NEWS b/Misc/NEWS index bc174853d4b..0aa2e1883d2 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,9 @@ What's New in Python 3.4.0 Alpha 1? Core and Builtins ----------------- +- Issue #17137: When an Unicode string is resized, the internal wide character + string (wstr) format is now cleared. + - Issue #17043: The unicode-internal decoder no longer read past the end of input buffer. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 8596e544233..4d380490607 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -717,6 +717,10 @@ resize_compact(PyObject *unicode, Py_ssize_t length) if (!PyUnicode_IS_ASCII(unicode)) _PyUnicode_WSTR_LENGTH(unicode) = length; } + else if (_PyUnicode_HAS_WSTR_MEMORY(unicode)) { + PyObject_DEL(_PyUnicode_WSTR(unicode)); + _PyUnicode_WSTR(unicode) = NULL; + } #ifdef Py_DEBUG unicode_fill_invalid(unicode, old_length); #endif