bpo-40596: Fix str.isidentifier() for non-canonicalized strings containing non-BMP characters on Windows. (GH-20053)
This commit is contained in:
parent
7c6e970775
commit
5650e76f63
|
@ -720,6 +720,13 @@ class UnicodeTest(string_tests.CommonTest,
|
|||
self.assertFalse("©".isidentifier())
|
||||
self.assertFalse("0".isidentifier())
|
||||
|
||||
@support.cpython_only
|
||||
def test_isidentifier_legacy(self):
|
||||
import _testcapi
|
||||
u = '𝖀𝖓𝖎𝖈𝖔𝖉𝖊'
|
||||
self.assertTrue(u.isidentifier())
|
||||
self.assertTrue(_testcapi.unicode_legacy_string(u).isidentifier())
|
||||
|
||||
def test_isprintable(self):
|
||||
self.assertTrue("".isprintable())
|
||||
self.assertTrue(" ".isprintable())
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
Fixed :meth:`str.isidentifier` for non-canonicalized strings containing
|
||||
non-BMP characters on Windows.
|
|
@ -12356,20 +12356,38 @@ PyUnicode_IsIdentifier(PyObject *self)
|
|||
return len && i == len;
|
||||
}
|
||||
else {
|
||||
Py_ssize_t i, len = PyUnicode_GET_SIZE(self);
|
||||
Py_ssize_t i = 0, len = PyUnicode_GET_SIZE(self);
|
||||
if (len == 0) {
|
||||
/* an empty string is not a valid identifier */
|
||||
return 0;
|
||||
}
|
||||
|
||||
const wchar_t *wstr = _PyUnicode_WSTR(self);
|
||||
Py_UCS4 ch = wstr[0];
|
||||
Py_UCS4 ch = wstr[i++];
|
||||
#if SIZEOF_WCHAR_T == 2
|
||||
if (Py_UNICODE_IS_HIGH_SURROGATE(ch)
|
||||
&& i < len
|
||||
&& Py_UNICODE_IS_LOW_SURROGATE(wstr[i]))
|
||||
{
|
||||
ch = Py_UNICODE_JOIN_SURROGATES(ch, wstr[i]);
|
||||
i++;
|
||||
}
|
||||
#endif
|
||||
if (!_PyUnicode_IsXidStart(ch) && ch != 0x5F /* LOW LINE */) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
for (i = 1; i < len; i++) {
|
||||
ch = wstr[i];
|
||||
while (i < len) {
|
||||
ch = wstr[i++];
|
||||
#if SIZEOF_WCHAR_T == 2
|
||||
if (Py_UNICODE_IS_HIGH_SURROGATE(ch)
|
||||
&& i < len
|
||||
&& Py_UNICODE_IS_LOW_SURROGATE(wstr[i]))
|
||||
{
|
||||
ch = Py_UNICODE_JOIN_SURROGATES(ch, wstr[i]);
|
||||
i++;
|
||||
}
|
||||
#endif
|
||||
if (!_PyUnicode_IsXidContinue(ch)) {
|
||||
return 0;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue