Merged revisions 75931 via svnmerge from

svn+ssh://pythondev@svn.python.org/python/trunk

........
  r75931 | benjamin.peterson | 2009-10-28 20:49:07 -0500 (Wed, 28 Oct 2009) | 5 lines

  do a backport of r75928

  The added test does not fail without the patch, but we still fix the issue of
  surrogates being used in wide builds where they should not be.
........
This commit is contained in:
Benjamin Peterson 2009-10-29 02:02:47 +00:00
parent e7228d3e01
commit c717aec772
2 changed files with 49 additions and 39 deletions

View File

@ -1,30 +1,37 @@
#! -*- coding: koi8-r -*-
import unittest
from test import test_support
class PEP263Test(unittest.TestCase):
def test_pep263(self):
self.assertEqual(
u"đÉÔĎÎ".encode("utf-8"),
'\xd0\x9f\xd0\xb8\xd1\x82\xd0\xbe\xd0\xbd'
)
self.assertEqual(
u"\đ".encode("utf-8"),
'\\\xd0\x9f'
)
def test_compilestring(self):
# see #1882
c = compile("\n# coding: utf-8\nu = u'\xc3\xb3'\n", "dummy", "exec")
d = {}
exec c in d
self.assertEqual(d['u'], u'\xf3')
def test_main():
test_support.run_unittest(PEP263Test)
if __name__=="__main__":
test_main()
# -*- coding: koi8-r -*-
import unittest
from test import test_support
class PEP263Test(unittest.TestCase):
def test_pep263(self):
self.assertEqual(
u"ðÉÔÏÎ".encode("utf-8"),
'\xd0\x9f\xd0\xb8\xd1\x82\xd0\xbe\xd0\xbd'
)
self.assertEqual(
u"\ð".encode("utf-8"),
'\\\xd0\x9f'
)
def test_compilestring(self):
# see #1882
c = compile("\n# coding: utf-8\nu = u'\xc3\xb3'\n", "dummy", "exec")
d = {}
exec c in d
self.assertEqual(d['u'], u'\xf3')
def test_issue3297(self):
c = compile("a, b = '\U0001010F', '\\U0001010F'", "dummy", "exec")
d = {}
exec(c, d)
self.assertEqual(d['a'], d['b'])
self.assertEqual(len(d['a']), len(d['b']))
def test_main():
test_support.run_unittest(PEP263Test)
if __name__=="__main__":
test_main()

View File

@ -3248,10 +3248,11 @@ decode_unicode(struct compiling *c, const char *s, size_t len, int rawmode, cons
u = NULL;
} else {
/* check for integer overflow */
if (len > PY_SIZE_MAX / 4)
if (len > PY_SIZE_MAX / 6)
return NULL;
/* "\XX" may become "\u005c\uHHLL" (12 bytes) */
u = PyString_FromStringAndSize((char *)NULL, len * 4);
/* "<C3><A4>" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5
"\ä" (3 bytes) may become "\u005c\U000000E4" (16 bytes), or ~1:6 */
u = PyString_FromStringAndSize((char *)NULL, len * 6);
if (u == NULL)
return NULL;
p = buf = PyString_AsString(u);
@ -3268,19 +3269,21 @@ decode_unicode(struct compiling *c, const char *s, size_t len, int rawmode, cons
PyObject *w;
char *r;
Py_ssize_t rn, i;
w = decode_utf8(c, &s, end, "utf-16-be");
w = decode_utf8(c, &s, end, "utf-32-be");
if (w == NULL) {
Py_DECREF(u);
return NULL;
}
r = PyString_AsString(w);
rn = PyString_Size(w);
assert(rn % 2 == 0);
for (i = 0; i < rn; i += 2) {
sprintf(p, "\\u%02x%02x",
assert(rn % 4 == 0);
for (i = 0; i < rn; i += 4) {
sprintf(p, "\\U%02x%02x%02x%02x",
r[i + 0] & 0xFF,
r[i + 1] & 0xFF);
p += 6;
r[i + 1] & 0xFF,
r[i + 2] & 0xFF,
r[i + 3] & 0xFF);
p += 10;
}
Py_DECREF(w);
} else {