Merged revisions 75928 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/branches/py3k ........ r75928 | benjamin.peterson | 2009-10-28 16:59:39 -0500 (Wed, 28 Oct 2009) | 5 lines in wide builds, avoid storing high unicode characters from source code with surrogates This is accomplished by decoding with utf-32 instead of utf-16 on all builds. The patch is by Adam Olsen. ........
This commit is contained in:
parent
1531f528b3
commit
7dc5ac5ec6
|
@ -36,6 +36,14 @@ class PEP263Test(unittest.TestCase):
|
|||
exec(c, d)
|
||||
self.assertEquals(d['\xc6'], '\xc6')
|
||||
|
||||
def test_issue3297(self):
|
||||
c = compile("a, b = '\U0001010F', '\\U0001010F'", "dummy", "exec")
|
||||
d = {}
|
||||
exec(c, d)
|
||||
self.assertEqual(d['a'], d['b'])
|
||||
self.assertEqual(len(d['a']), len(d['b']))
|
||||
self.assertEqual(ascii(d['a']), ascii(d['b']))
|
||||
|
||||
def test_main():
|
||||
support.run_unittest(PEP263Test)
|
||||
|
||||
|
|
|
@ -12,6 +12,9 @@ What's New in Python 3.1.2?
|
|||
Core and Builtins
|
||||
-----------------
|
||||
|
||||
- Issue #3297: On wide unicode builds, do not split unicode characters into
|
||||
surrogates.
|
||||
|
||||
- Issue #1722344: threading._shutdown() is now called in Py_Finalize(), which
|
||||
fixes the problem of some exceptions being thrown at shutdown when the
|
||||
interpreter is killed. Patch by Adam Olsen.
|
||||
|
|
23
Python/ast.c
23
Python/ast.c
|
@ -3217,10 +3217,11 @@ decode_unicode(struct compiling *c, const char *s, size_t len, int rawmode, cons
|
|||
u = NULL;
|
||||
} else {
|
||||
/* check for integer overflow */
|
||||
if (len > PY_SIZE_MAX / 4)
|
||||
if (len > PY_SIZE_MAX / 6)
|
||||
return NULL;
|
||||
/* "\XX" may become "\u005c\uHHLL" (12 bytes) */
|
||||
u = PyBytes_FromStringAndSize((char *)NULL, len * 4);
|
||||
/* "ä" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5
|
||||
"\ä" (3 bytes) may become "\u005c\U000000E4" (16 bytes), or ~1:6 */
|
||||
u = PyBytes_FromStringAndSize((char *)NULL, len * 6);
|
||||
if (u == NULL)
|
||||
return NULL;
|
||||
p = buf = PyBytes_AsString(u);
|
||||
|
@ -3237,20 +3238,24 @@ decode_unicode(struct compiling *c, const char *s, size_t len, int rawmode, cons
|
|||
PyObject *w;
|
||||
char *r;
|
||||
Py_ssize_t rn, i;
|
||||
w = decode_utf8(c, &s, end, "utf-16-be");
|
||||
w = decode_utf8(c, &s, end, "utf-32-be");
|
||||
if (w == NULL) {
|
||||
Py_DECREF(u);
|
||||
return NULL;
|
||||
}
|
||||
r = PyBytes_AS_STRING(w);
|
||||
rn = Py_SIZE(w);
|
||||
assert(rn % 2 == 0);
|
||||
for (i = 0; i < rn; i += 2) {
|
||||
sprintf(p, "\\u%02x%02x",
|
||||
assert(rn % 4 == 0);
|
||||
for (i = 0; i < rn; i += 4) {
|
||||
sprintf(p, "\\U%02x%02x%02x%02x",
|
||||
r[i + 0] & 0xFF,
|
||||
r[i + 1] & 0xFF);
|
||||
p += 6;
|
||||
r[i + 1] & 0xFF,
|
||||
r[i + 2] & 0xFF,
|
||||
r[i + 3] & 0xFF);
|
||||
p += 10;
|
||||
}
|
||||
/* Should be impossible to overflow */
|
||||
assert(p - buf <= Py_SIZE(u));
|
||||
Py_DECREF(w);
|
||||
} else {
|
||||
*p++ = *s++;
|
||||
|
|
Loading…
Reference in New Issue