[3.8] bpo-42318: Fix support of non-BMP characters in Tkinter on macOS (GH-23281). (GH-23784) (GH-23787)
(cherry picked from commita26215db11
) (cherry picked from commit28bf6ab61f
)
This commit is contained in:
parent
0178a6b67c
commit
4d840e428a
|
@ -1,4 +1,5 @@
|
||||||
import unittest
|
import unittest
|
||||||
|
import locale
|
||||||
import re
|
import re
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
|
@ -58,6 +59,10 @@ class TclTest(unittest.TestCase):
|
||||||
tcl = self.interp
|
tcl = self.interp
|
||||||
self.assertEqual(tcl.eval('set a "a\\0b"'), 'a\x00b')
|
self.assertEqual(tcl.eval('set a "a\\0b"'), 'a\x00b')
|
||||||
|
|
||||||
|
def test_eval_surrogates_in_result(self):
|
||||||
|
tcl = self.interp
|
||||||
|
self.assertIn(tcl.eval(r'set a "<\ud83d\udcbb>"'), '<\U0001f4bb>')
|
||||||
|
|
||||||
def testEvalException(self):
|
def testEvalException(self):
|
||||||
tcl = self.interp
|
tcl = self.interp
|
||||||
self.assertRaises(TclError,tcl.eval,'set a')
|
self.assertRaises(TclError,tcl.eval,'set a')
|
||||||
|
@ -190,29 +195,48 @@ class TclTest(unittest.TestCase):
|
||||||
|
|
||||||
def testEvalFile(self):
|
def testEvalFile(self):
|
||||||
tcl = self.interp
|
tcl = self.interp
|
||||||
with open(support.TESTFN, 'w') as f:
|
filename = support.TESTFN
|
||||||
self.addCleanup(support.unlink, support.TESTFN)
|
self.addCleanup(support.unlink, filename)
|
||||||
|
with open(filename, 'w') as f:
|
||||||
f.write("""set a 1
|
f.write("""set a 1
|
||||||
set b 2
|
set b 2
|
||||||
set c [ expr $a + $b ]
|
set c [ expr $a + $b ]
|
||||||
""")
|
""")
|
||||||
tcl.evalfile(support.TESTFN)
|
tcl.evalfile(filename)
|
||||||
self.assertEqual(tcl.eval('set a'),'1')
|
self.assertEqual(tcl.eval('set a'),'1')
|
||||||
self.assertEqual(tcl.eval('set b'),'2')
|
self.assertEqual(tcl.eval('set b'),'2')
|
||||||
self.assertEqual(tcl.eval('set c'),'3')
|
self.assertEqual(tcl.eval('set c'),'3')
|
||||||
|
|
||||||
def test_evalfile_null_in_result(self):
|
def test_evalfile_null_in_result(self):
|
||||||
tcl = self.interp
|
tcl = self.interp
|
||||||
with open(support.TESTFN, 'w') as f:
|
filename = support.TESTFN
|
||||||
self.addCleanup(support.unlink, support.TESTFN)
|
self.addCleanup(support.unlink, filename)
|
||||||
|
with open(filename, 'w') as f:
|
||||||
f.write("""
|
f.write("""
|
||||||
set a "a\0b"
|
set a "a\0b"
|
||||||
set b "a\\0b"
|
set b "a\\0b"
|
||||||
""")
|
""")
|
||||||
tcl.evalfile(support.TESTFN)
|
tcl.evalfile(filename)
|
||||||
self.assertEqual(tcl.eval('set a'), 'a\x00b')
|
self.assertEqual(tcl.eval('set a'), 'a\x00b')
|
||||||
self.assertEqual(tcl.eval('set b'), 'a\x00b')
|
self.assertEqual(tcl.eval('set b'), 'a\x00b')
|
||||||
|
|
||||||
|
def test_evalfile_surrogates_in_result(self):
|
||||||
|
tcl = self.interp
|
||||||
|
encoding = tcl.call('encoding', 'system')
|
||||||
|
self.addCleanup(tcl.call, 'encoding', 'system', encoding)
|
||||||
|
tcl.call('encoding', 'system', 'utf-8')
|
||||||
|
|
||||||
|
filename = support.TESTFN
|
||||||
|
self.addCleanup(support.unlink, filename)
|
||||||
|
with open(filename, 'wb') as f:
|
||||||
|
f.write(b"""
|
||||||
|
set a "<\xed\xa0\xbd\xed\xb2\xbb>"
|
||||||
|
set b "<\\ud83d\\udcbb>"
|
||||||
|
""")
|
||||||
|
tcl.evalfile(filename)
|
||||||
|
self.assertEqual(tcl.eval('set a'), '<\U0001f4bb>')
|
||||||
|
self.assertEqual(tcl.eval('set b'), '<\U0001f4bb>')
|
||||||
|
|
||||||
def testEvalFileException(self):
|
def testEvalFileException(self):
|
||||||
tcl = self.interp
|
tcl = self.interp
|
||||||
filename = "doesnotexists"
|
filename = "doesnotexists"
|
||||||
|
@ -435,6 +459,11 @@ class TclTest(unittest.TestCase):
|
||||||
self.assertEqual(passValue('str\x00ing\u20ac'), 'str\x00ing\u20ac')
|
self.assertEqual(passValue('str\x00ing\u20ac'), 'str\x00ing\u20ac')
|
||||||
self.assertEqual(passValue('str\x00ing\U0001f4bb'),
|
self.assertEqual(passValue('str\x00ing\U0001f4bb'),
|
||||||
'str\x00ing\U0001f4bb')
|
'str\x00ing\U0001f4bb')
|
||||||
|
if sys.platform != 'win32':
|
||||||
|
self.assertEqual(passValue('<\udce2\udc82\udcac>'),
|
||||||
|
'<\u20ac>')
|
||||||
|
self.assertEqual(passValue('<\udced\udca0\udcbd\udced\udcb2\udcbb>'),
|
||||||
|
'<\U0001f4bb>')
|
||||||
self.assertEqual(passValue(b'str\x00ing'),
|
self.assertEqual(passValue(b'str\x00ing'),
|
||||||
b'str\x00ing' if self.wantobjects else 'str\x00ing')
|
b'str\x00ing' if self.wantobjects else 'str\x00ing')
|
||||||
self.assertEqual(passValue(b'str\xc0\x80ing'),
|
self.assertEqual(passValue(b'str\xc0\x80ing'),
|
||||||
|
@ -494,6 +523,9 @@ class TclTest(unittest.TestCase):
|
||||||
check('string\xbd')
|
check('string\xbd')
|
||||||
check('string\u20ac')
|
check('string\u20ac')
|
||||||
check('string\U0001f4bb')
|
check('string\U0001f4bb')
|
||||||
|
if sys.platform != 'win32':
|
||||||
|
check('<\udce2\udc82\udcac>', '<\u20ac>')
|
||||||
|
check('<\udced\udca0\udcbd\udced\udcb2\udcbb>', '<\U0001f4bb>')
|
||||||
check('')
|
check('')
|
||||||
check(b'string', 'string')
|
check(b'string', 'string')
|
||||||
check(b'string\xe2\x82\xac', 'string\xe2\x82\xac')
|
check(b'string\xe2\x82\xac', 'string\xe2\x82\xac')
|
||||||
|
@ -537,6 +569,8 @@ class TclTest(unittest.TestCase):
|
||||||
('a \u20ac', ('a', '\u20ac')),
|
('a \u20ac', ('a', '\u20ac')),
|
||||||
('a \U0001f4bb', ('a', '\U0001f4bb')),
|
('a \U0001f4bb', ('a', '\U0001f4bb')),
|
||||||
(b'a \xe2\x82\xac', ('a', '\u20ac')),
|
(b'a \xe2\x82\xac', ('a', '\u20ac')),
|
||||||
|
(b'a \xf0\x9f\x92\xbb', ('a', '\U0001f4bb')),
|
||||||
|
(b'a \xed\xa0\xbd\xed\xb2\xbb', ('a', '\U0001f4bb')),
|
||||||
(b'a\xc0\x80b c\xc0\x80d', ('a\x00b', 'c\x00d')),
|
(b'a\xc0\x80b c\xc0\x80d', ('a\x00b', 'c\x00d')),
|
||||||
('a {b c}', ('a', 'b c')),
|
('a {b c}', ('a', 'b c')),
|
||||||
(r'a b\ c', ('a', 'b c')),
|
(r'a b\ c', ('a', 'b c')),
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
Fixed support of non-BMP characters in :mod:`tkinter` on macOS.
|
|
@ -397,7 +397,8 @@ unicodeFromTclStringAndSize(const char *s, Py_ssize_t size)
|
||||||
|
|
||||||
char *buf = NULL;
|
char *buf = NULL;
|
||||||
PyErr_Clear();
|
PyErr_Clear();
|
||||||
/* Tcl encodes null character as \xc0\x80 */
|
/* Tcl encodes null character as \xc0\x80.
|
||||||
|
https://en.wikipedia.org/wiki/UTF-8#Modified_UTF-8 */
|
||||||
if (memchr(s, '\xc0', size)) {
|
if (memchr(s, '\xc0', size)) {
|
||||||
char *q;
|
char *q;
|
||||||
const char *e = s + size;
|
const char *e = s + size;
|
||||||
|
@ -421,6 +422,57 @@ unicodeFromTclStringAndSize(const char *s, Py_ssize_t size)
|
||||||
if (buf != NULL) {
|
if (buf != NULL) {
|
||||||
PyMem_Free(buf);
|
PyMem_Free(buf);
|
||||||
}
|
}
|
||||||
|
if (r == NULL || PyUnicode_KIND(r) == PyUnicode_1BYTE_KIND) {
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* In CESU-8 non-BMP characters are represented as a surrogate pair,
|
||||||
|
like in UTF-16, and then each surrogate code point is encoded in UTF-8.
|
||||||
|
https://en.wikipedia.org/wiki/CESU-8 */
|
||||||
|
Py_ssize_t len = PyUnicode_GET_LENGTH(r);
|
||||||
|
Py_ssize_t i, j;
|
||||||
|
/* All encoded surrogate characters start with \xED. */
|
||||||
|
i = PyUnicode_FindChar(r, 0xdcED, 0, len, 1);
|
||||||
|
if (i == -2) {
|
||||||
|
Py_DECREF(r);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
if (i == -1) {
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
Py_UCS4 *u = PyUnicode_AsUCS4Copy(r);
|
||||||
|
Py_DECREF(r);
|
||||||
|
if (u == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
Py_UCS4 ch;
|
||||||
|
for (j = i; i < len; i++, u[j++] = ch) {
|
||||||
|
Py_UCS4 ch1, ch2, ch3, high, low;
|
||||||
|
/* Low surrogates U+D800 - U+DBFF are encoded as
|
||||||
|
\xED\xA0\x80 - \xED\xAF\xBF. */
|
||||||
|
ch1 = ch = u[i];
|
||||||
|
if (ch1 != 0xdcED) continue;
|
||||||
|
ch2 = u[i + 1];
|
||||||
|
if (!(0xdcA0 <= ch2 && ch2 <= 0xdcAF)) continue;
|
||||||
|
ch3 = u[i + 2];
|
||||||
|
if (!(0xdc80 <= ch3 && ch3 <= 0xdcBF)) continue;
|
||||||
|
high = 0xD000 | ((ch2 & 0x3F) << 6) | (ch3 & 0x3F);
|
||||||
|
assert(Py_UNICODE_IS_HIGH_SURROGATE(high));
|
||||||
|
/* High surrogates U+DC00 - U+DFFF are encoded as
|
||||||
|
\xED\xB0\x80 - \xED\xBF\xBF. */
|
||||||
|
ch1 = u[i + 3];
|
||||||
|
if (ch1 != 0xdcED) continue;
|
||||||
|
ch2 = u[i + 4];
|
||||||
|
if (!(0xdcB0 <= ch2 && ch2 <= 0xdcBF)) continue;
|
||||||
|
ch3 = u[i + 5];
|
||||||
|
if (!(0xdc80 <= ch3 && ch3 <= 0xdcBF)) continue;
|
||||||
|
low = 0xD000 | ((ch2 & 0x3F) << 6) | (ch3 & 0x3F);
|
||||||
|
assert(Py_UNICODE_IS_HIGH_SURROGATE(high));
|
||||||
|
ch = Py_UNICODE_JOIN_SURROGATES(high, low);
|
||||||
|
i += 5;
|
||||||
|
}
|
||||||
|
r = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, u, j);
|
||||||
|
PyMem_Free(u);
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue