mirror of https://github.com/python/cpython
in narrow builds, make sure to test codepoints as identifier characters (closes #12732)
This fixes the use of Unicode identifiers outside the BMP in narrow builds.
This commit is contained in:
parent
7bf4363f77
commit
f413b80806
|
@ -8,9 +8,12 @@ class PEP3131Test(unittest.TestCase):
|
|||
ä = 1
|
||||
µ = 2 # this is a compatibility character
|
||||
蟒 = 3
|
||||
𝔘𝔫𝔦𝔠𝔬𝔡𝔢 = 4
|
||||
self.assertEqual(getattr(T, "\xe4"), 1)
|
||||
self.assertEqual(getattr(T, "\u03bc"), 2)
|
||||
self.assertEqual(getattr(T, '\u87d2'), 3)
|
||||
v = getattr(T, "\U0001d518\U0001d52b\U0001d526\U0001d520\U0001d52c\U0001d521\U0001d522")
|
||||
self.assertEqual(v, 4)
|
||||
|
||||
def test_invalid(self):
|
||||
try:
|
||||
|
|
|
@ -404,6 +404,7 @@ class UnicodeTest(string_tests.CommonTest,
|
|||
self.assertTrue("bc".isidentifier())
|
||||
self.assertTrue("b_".isidentifier())
|
||||
self.assertTrue("µ".isidentifier())
|
||||
self.assertTrue("𝔘𝔫𝔦𝔠𝔬𝔡𝔢".isidentifier())
|
||||
|
||||
self.assertFalse(" ".isidentifier())
|
||||
self.assertFalse("[".isidentifier())
|
||||
|
|
|
@ -10,6 +10,9 @@ What's New in Python 3.2.2?
|
|||
Core and Builtins
|
||||
-----------------
|
||||
|
||||
- Issue #12732: In narrow unicode builds, allow Unicode identifiers which fall
|
||||
outside the BMP.
|
||||
|
||||
- Issue #11603: Fix a crash when __str__ is rebound as __repr__. Patch by
|
||||
Andreas Stührk.
|
||||
|
||||
|
|
|
@ -7972,14 +7972,30 @@ unicode_isnumeric(PyUnicodeObject *self)
|
|||
return PyBool_FromLong(1);
|
||||
}
|
||||
|
||||
static Py_UCS4
|
||||
decode_ucs4(const Py_UNICODE *s, Py_ssize_t *i, Py_ssize_t size)
|
||||
{
|
||||
Py_UCS4 ch;
|
||||
assert(*i < size);
|
||||
ch = s[(*i)++];
|
||||
#ifndef Py_UNICODE_WIDE
|
||||
if ((ch & 0xfffffc00) == 0xd800 &&
|
||||
*i < size
|
||||
&& (s[*i] & 0xFFFFFC00) == 0xDC00)
|
||||
ch = ((Py_UCS4)ch << 10UL) + (Py_UCS4)(s[(*i)++]) - 0x35fdc00;
|
||||
#endif
|
||||
return ch;
|
||||
}
|
||||
|
||||
int
|
||||
PyUnicode_IsIdentifier(PyObject *self)
|
||||
{
|
||||
register const Py_UNICODE *p = PyUnicode_AS_UNICODE((PyUnicodeObject*)self);
|
||||
register const Py_UNICODE *e;
|
||||
Py_ssize_t i = 0, size = PyUnicode_GET_SIZE(self);
|
||||
Py_UCS4 first;
|
||||
const Py_UNICODE *p = PyUnicode_AS_UNICODE((PyUnicodeObject*)self);
|
||||
|
||||
/* Special case for empty strings */
|
||||
if (PyUnicode_GET_SIZE(self) == 0)
|
||||
if (!size)
|
||||
return 0;
|
||||
|
||||
/* PEP 3131 says that the first character must be in
|
||||
|
@ -7990,14 +8006,13 @@ PyUnicode_IsIdentifier(PyObject *self)
|
|||
definition of XID_Start and XID_Continue, it is sufficient
|
||||
to check just for these, except that _ must be allowed
|
||||
as starting an identifier. */
|
||||
if (!_PyUnicode_IsXidStart(*p) && *p != 0x5F /* LOW LINE */)
|
||||
first = decode_ucs4(p, &i, size);
|
||||
if (!_PyUnicode_IsXidStart(first) && first != 0x5F /* LOW LINE */)
|
||||
return 0;
|
||||
|
||||
e = p + PyUnicode_GET_SIZE(self);
|
||||
for (p++; p < e; p++) {
|
||||
if (!_PyUnicode_IsXidContinue(*p))
|
||||
while (i < size)
|
||||
if (!_PyUnicode_IsXidContinue(decode_ucs4(p, &i, size)))
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue