mirror of https://github.com/python/cpython
Two changes to improve (I hope) Unicode support.
1. In Tcl 8.2 and later, use Tcl_NewUnicodeObj() when passing a Python Unicode object rather than going through UTF-8. (This function doesn't exist in Tcl 8.1, so there the original UTF-8 code is still used; in Tcl 8.0 there is no support for Unicode.) This assumes that Tcl_UniChar is the same thing as Py_UNICODE; a run-time error is issued if this is not the case. 2. In Tcl 8.1 and later (i.e., whenever Tcl supports Unicode), when a string returned from Tcl contains bytes with the top bit set, we assume it is encoded in UTF-8, and decode it into a Unicode string object. Notes: - Passing Unicode strings to Tcl 8.0 does not do the right thing; this isn't worth fixing. - When passing an 8-bit string to Tcl 8.1 or later that has bytes with the top bit set, Tcl tries to interpret it as UTF-8; it seems to fall back on Latin-1 for non-UTF-8 bytes. I'm not sure what to do about this besides telling the user to disambiguate such strings by converting them to Unicode (forcing the user to be explicit about the encoding). - Obviously it won't be possible to get binary data out of Tk this way. Do we need that ability? How to do it?
This commit is contained in:
parent
cc229ea76f
commit
990f5c6c98
|
@ -550,6 +550,8 @@ AsObj(value)
|
|||
return result;
|
||||
}
|
||||
else if (PyUnicode_Check(value)) {
|
||||
#if TKMAJORMINOR <= 8001
|
||||
/* In Tcl 8.1 we must use UTF-8 */
|
||||
PyObject* utf8 = PyUnicode_AsUTF8String (value);
|
||||
if (!utf8)
|
||||
return 0;
|
||||
|
@ -557,6 +559,17 @@ AsObj(value)
|
|||
PyString_GET_SIZE (utf8));
|
||||
Py_DECREF(utf8);
|
||||
return result;
|
||||
#else /* TKMAJORMINOR > 8001 */
|
||||
/* In Tcl 8.2 and later, use Tcl_NewUnicodeObj() */
|
||||
if (sizeof(Py_UNICODE) != sizeof(Tcl_UniChar)) {
|
||||
/* XXX Should really test this at compile time */
|
||||
PyErr_SetString(PyExc_SystemError,
|
||||
"Py_UNICODE and Tcl_UniChar differ in size");
|
||||
return 0;
|
||||
}
|
||||
return Tcl_NewUnicodeObj(PyUnicode_AS_UNICODE(value),
|
||||
PyUnicode_GET_SIZE(value));
|
||||
#endif /* TKMAJORMINOR > 8001 */
|
||||
}
|
||||
else {
|
||||
PyObject *v = PyObject_Str(value);
|
||||
|
@ -624,10 +637,26 @@ Tkapp_Call(self, args)
|
|||
ENTER_OVERLAP
|
||||
if (i == TCL_ERROR)
|
||||
Tkinter_Error(self);
|
||||
else
|
||||
else {
|
||||
/* We could request the object result here, but doing
|
||||
so would confuse applications that expect a string. */
|
||||
res = PyString_FromString(Tcl_GetStringResult(interp));
|
||||
char *s = Tcl_GetStringResult(interp);
|
||||
char *p = s;
|
||||
/* If the result contains any bytes with the top bit set,
|
||||
it's UTF-8 and we should decode it to Unicode */
|
||||
while (*p != '\0') {
|
||||
if (*p & 0x80)
|
||||
break;
|
||||
p++;
|
||||
}
|
||||
if (*p == '\0')
|
||||
res = PyString_FromStringAndSize(s, (int)(p-s));
|
||||
else {
|
||||
/* Convert UTF-8 to Unicode string */
|
||||
p = strchr(p, '\0');
|
||||
res = PyUnicode_DecodeUTF8(s, (int)(p-s), "ignore");
|
||||
}
|
||||
}
|
||||
|
||||
LEAVE_OVERLAP_TCL
|
||||
|
||||
|
|
Loading…
Reference in New Issue