diff --git a/Lib/test/test_pep263.py b/Lib/test/test_pep263.py index a3a9ade0b65..e4faa9ff56a 100644 --- a/Lib/test/test_pep263.py +++ b/Lib/test/test_pep263.py @@ -1,30 +1,37 @@ -#! -*- coding: koi8-r -*- - -import unittest -from test import test_support - -class PEP263Test(unittest.TestCase): - - def test_pep263(self): - self.assertEqual( - u"ðÉÔÏÎ".encode("utf-8"), - '\xd0\x9f\xd0\xb8\xd1\x82\xd0\xbe\xd0\xbd' - ) - self.assertEqual( - u"\ð".encode("utf-8"), - '\\\xd0\x9f' - ) - - def test_compilestring(self): - # see #1882 - c = compile("\n# coding: utf-8\nu = u'\xc3\xb3'\n", "dummy", "exec") - d = {} - exec c in d - self.assertEqual(d['u'], u'\xf3') - - -def test_main(): - test_support.run_unittest(PEP263Test) - -if __name__=="__main__": - test_main() +# -*- coding: koi8-r -*- + +import unittest +from test import test_support + +class PEP263Test(unittest.TestCase): + + def test_pep263(self): + self.assertEqual( + u"ðÉÔÏÎ".encode("utf-8"), + '\xd0\x9f\xd0\xb8\xd1\x82\xd0\xbe\xd0\xbd' + ) + self.assertEqual( + u"\ð".encode("utf-8"), + '\\\xd0\x9f' + ) + + def test_compilestring(self): + # see #1882 + c = compile("\n# coding: utf-8\nu = u'\xc3\xb3'\n", "dummy", "exec") + d = {} + exec c in d + self.assertEqual(d['u'], u'\xf3') + + + def test_issue3297(self): + c = compile("a, b = '\U0001010F', '\\U0001010F'", "dummy", "exec") + d = {} + exec(c, d) + self.assertEqual(d['a'], d['b']) + self.assertEqual(len(d['a']), len(d['b'])) + +def test_main(): + test_support.run_unittest(PEP263Test) + +if __name__=="__main__": + test_main() diff --git a/Python/ast.c b/Python/ast.c index a3fdd8998d1..b89e29c55cc 100644 --- a/Python/ast.c +++ b/Python/ast.c @@ -3248,10 +3248,11 @@ decode_unicode(struct compiling *c, const char *s, size_t len, int rawmode, cons u = NULL; } else { /* check for integer overflow */ - if (len > PY_SIZE_MAX / 4) + if (len > PY_SIZE_MAX / 6) return NULL; - /* "\XX" may become "\u005c\uHHLL" (12 bytes) */ - u = PyString_FromStringAndSize((char *)NULL, len * 4); + /* "" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5 + "\ä" (3 bytes) may become "\u005c\U000000E4" (16 bytes), or ~1:6 */ + u = PyString_FromStringAndSize((char *)NULL, len * 6); if (u == NULL) return NULL; p = buf = PyString_AsString(u); @@ -3268,19 +3269,21 @@ decode_unicode(struct compiling *c, const char *s, size_t len, int rawmode, cons PyObject *w; char *r; Py_ssize_t rn, i; - w = decode_utf8(c, &s, end, "utf-16-be"); + w = decode_utf8(c, &s, end, "utf-32-be"); if (w == NULL) { Py_DECREF(u); return NULL; } r = PyString_AsString(w); rn = PyString_Size(w); - assert(rn % 2 == 0); - for (i = 0; i < rn; i += 2) { - sprintf(p, "\\u%02x%02x", + assert(rn % 4 == 0); + for (i = 0; i < rn; i += 4) { + sprintf(p, "\\U%02x%02x%02x%02x", r[i + 0] & 0xFF, - r[i + 1] & 0xFF); - p += 6; + r[i + 1] & 0xFF, + r[i + 2] & 0xFF, + r[i + 3] & 0xFF); + p += 10; } Py_DECREF(w); } else {