mirror of https://github.com/python/cpython
gh-119614: Fix truncation of strings with embedded null characters in Tkinter (GH-120909)
Now the null character is always represented as \xc0\x80 for Tcl_NewStringObj().
This commit is contained in:
parent
fc297b4ba4
commit
c38e2f64d0
|
@ -73,6 +73,18 @@ class TclTest(unittest.TestCase):
|
||||||
tcl.call('set','a','1')
|
tcl.call('set','a','1')
|
||||||
self.assertEqual(tcl.call('set','a'),'1')
|
self.assertEqual(tcl.call('set','a'),'1')
|
||||||
|
|
||||||
|
def test_call_passing_null(self):
|
||||||
|
tcl = self.interp
|
||||||
|
tcl.call('set', 'a', 'a\0b') # ASCII-only
|
||||||
|
self.assertEqual(tcl.getvar('a'), 'a\x00b')
|
||||||
|
self.assertEqual(tcl.call('set', 'a'), 'a\x00b')
|
||||||
|
self.assertEqual(tcl.eval('set a'), 'a\x00b')
|
||||||
|
|
||||||
|
tcl.call('set', 'a', '\u20ac\0') # non-ASCII
|
||||||
|
self.assertEqual(tcl.getvar('a'), '\u20ac\x00')
|
||||||
|
self.assertEqual(tcl.call('set', 'a'), '\u20ac\x00')
|
||||||
|
self.assertEqual(tcl.eval('set a'), '\u20ac\x00')
|
||||||
|
|
||||||
def testCallException(self):
|
def testCallException(self):
|
||||||
tcl = self.interp
|
tcl = self.interp
|
||||||
self.assertRaises(TclError,tcl.call,'set','a')
|
self.assertRaises(TclError,tcl.call,'set','a')
|
||||||
|
@ -98,6 +110,18 @@ class TclTest(unittest.TestCase):
|
||||||
tcl.setvar('a','1')
|
tcl.setvar('a','1')
|
||||||
self.assertEqual(tcl.eval('set a'),'1')
|
self.assertEqual(tcl.eval('set a'),'1')
|
||||||
|
|
||||||
|
def test_setvar_passing_null(self):
|
||||||
|
tcl = self.interp
|
||||||
|
tcl.setvar('a', 'a\0b') # ASCII-only
|
||||||
|
self.assertEqual(tcl.getvar('a'), 'a\x00b')
|
||||||
|
self.assertEqual(tcl.call('set', 'a'), 'a\x00b')
|
||||||
|
self.assertEqual(tcl.eval('set a'), 'a\x00b')
|
||||||
|
|
||||||
|
tcl.setvar('a', '\u20ac\0') # non-ASCII
|
||||||
|
self.assertEqual(tcl.getvar('a'), '\u20ac\x00')
|
||||||
|
self.assertEqual(tcl.call('set', 'a'), '\u20ac\x00')
|
||||||
|
self.assertEqual(tcl.eval('set a'), '\u20ac\x00')
|
||||||
|
|
||||||
def testSetVarArray(self):
|
def testSetVarArray(self):
|
||||||
tcl = self.interp
|
tcl = self.interp
|
||||||
tcl.setvar('a(1)','1')
|
tcl.setvar('a(1)','1')
|
||||||
|
|
|
@ -476,6 +476,15 @@ class MiscTest(AbstractTkTest, unittest.TestCase):
|
||||||
self.assertEqual(vi.micro, 0)
|
self.assertEqual(vi.micro, 0)
|
||||||
self.assertTrue(str(vi).startswith(f'{vi.major}.{vi.minor}'))
|
self.assertTrue(str(vi).startswith(f'{vi.major}.{vi.minor}'))
|
||||||
|
|
||||||
|
def test_embedded_null(self):
|
||||||
|
widget = tkinter.Entry(self.root)
|
||||||
|
widget.insert(0, 'abc\0def') # ASCII-only
|
||||||
|
widget.selection_range(0, 'end')
|
||||||
|
self.assertEqual(widget.selection_get(), 'abc\x00def')
|
||||||
|
widget.insert(0, '\u20ac\0') # non-ASCII
|
||||||
|
widget.selection_range(0, 'end')
|
||||||
|
self.assertEqual(widget.selection_get(), '\u20ac\0abc\x00def')
|
||||||
|
|
||||||
|
|
||||||
class WmTest(AbstractTkTest, unittest.TestCase):
|
class WmTest(AbstractTkTest, unittest.TestCase):
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
Fix truncation of strings with embedded null characters in some internal
|
||||||
|
operations in :mod:`tkinter`.
|
|
@ -512,7 +512,7 @@ unicodeFromTclObj(TkappObject *tkapp, Tcl_Obj *value)
|
||||||
else
|
else
|
||||||
Py_UNREACHABLE();
|
Py_UNREACHABLE();
|
||||||
}
|
}
|
||||||
#endif
|
#endif /* USE_TCL_UNICODE */
|
||||||
const char *s = Tcl_GetStringFromObj(value, &len);
|
const char *s = Tcl_GetStringFromObj(value, &len);
|
||||||
return unicodeFromTclStringAndSize(s, len);
|
return unicodeFromTclStringAndSize(s, len);
|
||||||
}
|
}
|
||||||
|
@ -1018,7 +1018,9 @@ AsObj(PyObject *value)
|
||||||
PyErr_SetString(PyExc_OverflowError, "string is too long");
|
PyErr_SetString(PyExc_OverflowError, "string is too long");
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
if (PyUnicode_IS_ASCII(value)) {
|
if (PyUnicode_IS_ASCII(value) &&
|
||||||
|
strlen(PyUnicode_DATA(value)) == (size_t)PyUnicode_GET_LENGTH(value))
|
||||||
|
{
|
||||||
return Tcl_NewStringObj((const char *)PyUnicode_DATA(value),
|
return Tcl_NewStringObj((const char *)PyUnicode_DATA(value),
|
||||||
(int)size);
|
(int)size);
|
||||||
}
|
}
|
||||||
|
@ -1033,9 +1035,6 @@ AsObj(PyObject *value)
|
||||||
"surrogatepass", NATIVE_BYTEORDER);
|
"surrogatepass", NATIVE_BYTEORDER);
|
||||||
else
|
else
|
||||||
Py_UNREACHABLE();
|
Py_UNREACHABLE();
|
||||||
#else
|
|
||||||
encoded = _PyUnicode_AsUTF8String(value, "surrogateescape");
|
|
||||||
#endif
|
|
||||||
if (!encoded) {
|
if (!encoded) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
@ -1045,12 +1044,39 @@ AsObj(PyObject *value)
|
||||||
PyErr_SetString(PyExc_OverflowError, "string is too long");
|
PyErr_SetString(PyExc_OverflowError, "string is too long");
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
#if USE_TCL_UNICODE
|
|
||||||
result = Tcl_NewUnicodeObj((const Tcl_UniChar *)PyBytes_AS_STRING(encoded),
|
result = Tcl_NewUnicodeObj((const Tcl_UniChar *)PyBytes_AS_STRING(encoded),
|
||||||
(int)(size / sizeof(Tcl_UniChar)));
|
(int)(size / sizeof(Tcl_UniChar)));
|
||||||
#else
|
#else
|
||||||
|
encoded = _PyUnicode_AsUTF8String(value, "surrogateescape");
|
||||||
|
if (!encoded) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
size = PyBytes_GET_SIZE(encoded);
|
||||||
|
if (strlen(PyBytes_AS_STRING(encoded)) != (size_t)size) {
|
||||||
|
/* The string contains embedded null characters.
|
||||||
|
* Tcl needs a null character to be represented as \xc0\x80 in
|
||||||
|
* the Modified UTF-8 encoding. Otherwise the string can be
|
||||||
|
* truncated in some internal operations.
|
||||||
|
*
|
||||||
|
* NOTE: stringlib_replace() could be used here, but optimizing
|
||||||
|
* this obscure case isn't worth it unless stringlib_replace()
|
||||||
|
* was already exposed in the C API for other reasons. */
|
||||||
|
Py_SETREF(encoded,
|
||||||
|
PyObject_CallMethod(encoded, "replace", "y#y#",
|
||||||
|
"\0", (Py_ssize_t)1,
|
||||||
|
"\xc0\x80", (Py_ssize_t)2));
|
||||||
|
if (!encoded) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
size = PyBytes_GET_SIZE(encoded);
|
||||||
|
}
|
||||||
|
if (size > INT_MAX) {
|
||||||
|
Py_DECREF(encoded);
|
||||||
|
PyErr_SetString(PyExc_OverflowError, "string is too long");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
result = Tcl_NewStringObj(PyBytes_AS_STRING(encoded), (int)size);
|
result = Tcl_NewStringObj(PyBytes_AS_STRING(encoded), (int)size);
|
||||||
#endif
|
#endif /* USE_TCL_UNICODE */
|
||||||
Py_DECREF(encoded);
|
Py_DECREF(encoded);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue