bpo-39500: Document PyUnicode_IsIdentifier() function (GH-18397)
PyUnicode_IsIdentifier() does not call Py_FatalError() anymore if the string is not ready.
This commit is contained in:
parent
1ea45ae257
commit
f3e7ea5b8c
|
@ -240,6 +240,16 @@ access internal read-only data of Unicode objects:
|
||||||
:c:func:`PyUnicode_nBYTE_DATA` family of macros.
|
:c:func:`PyUnicode_nBYTE_DATA` family of macros.
|
||||||
|
|
||||||
|
|
||||||
|
.. c:function:: int PyUnicode_IsIdentifier(PyObject *o)
|
||||||
|
|
||||||
|
Return ``1`` if the string is a valid identifier according to the language
|
||||||
|
definition, section :ref:`identifiers`. Return ``0`` otherwise.
|
||||||
|
|
||||||
|
.. versionchanged:: 3.9
|
||||||
|
The function does not call :c:func:`Py_FatalError` anymore if the string
|
||||||
|
is not ready.
|
||||||
|
|
||||||
|
|
||||||
Unicode Character Properties
|
Unicode Character Properties
|
||||||
""""""""""""""""""""""""""""
|
""""""""""""""""""""""""""""
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
:c:func:`PyUnicode_IsIdentifier` does not call :c:func:`Py_FatalError`
|
||||||
|
anymore if the string is not ready.
|
|
@ -12198,22 +12198,33 @@ unicode_isnumeric_impl(PyObject *self)
|
||||||
int
|
int
|
||||||
PyUnicode_IsIdentifier(PyObject *self)
|
PyUnicode_IsIdentifier(PyObject *self)
|
||||||
{
|
{
|
||||||
int kind;
|
|
||||||
void *data;
|
|
||||||
Py_ssize_t i;
|
Py_ssize_t i;
|
||||||
Py_UCS4 first;
|
int ready = PyUnicode_IS_READY(self);
|
||||||
|
|
||||||
if (PyUnicode_READY(self) == -1) {
|
Py_ssize_t len = ready ? PyUnicode_GET_LENGTH(self) : PyUnicode_GET_SIZE(self);
|
||||||
Py_FatalError("identifier not ready");
|
if (len == 0) {
|
||||||
|
/* an empty string is not a valid identifier */
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Special case for empty strings */
|
int kind;
|
||||||
if (PyUnicode_GET_LENGTH(self) == 0)
|
void *data;
|
||||||
return 0;
|
wchar_t *wstr;
|
||||||
kind = PyUnicode_KIND(self);
|
if (ready) {
|
||||||
data = PyUnicode_DATA(self);
|
kind = PyUnicode_KIND(self);
|
||||||
|
data = PyUnicode_DATA(self);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
wstr = _PyUnicode_WSTR(self);
|
||||||
|
}
|
||||||
|
|
||||||
|
Py_UCS4 ch;
|
||||||
|
if (ready) {
|
||||||
|
ch = PyUnicode_READ(kind, data, 0);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
ch = wstr[0];
|
||||||
|
}
|
||||||
/* PEP 3131 says that the first character must be in
|
/* PEP 3131 says that the first character must be in
|
||||||
XID_Start and subsequent characters in XID_Continue,
|
XID_Start and subsequent characters in XID_Continue,
|
||||||
and for the ASCII range, the 2.x rules apply (i.e
|
and for the ASCII range, the 2.x rules apply (i.e
|
||||||
|
@ -12222,13 +12233,21 @@ PyUnicode_IsIdentifier(PyObject *self)
|
||||||
definition of XID_Start and XID_Continue, it is sufficient
|
definition of XID_Start and XID_Continue, it is sufficient
|
||||||
to check just for these, except that _ must be allowed
|
to check just for these, except that _ must be allowed
|
||||||
as starting an identifier. */
|
as starting an identifier. */
|
||||||
first = PyUnicode_READ(kind, data, 0);
|
if (!_PyUnicode_IsXidStart(ch) && ch != 0x5F /* LOW LINE */) {
|
||||||
if (!_PyUnicode_IsXidStart(first) && first != 0x5F /* LOW LINE */)
|
|
||||||
return 0;
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
for (i = 1; i < PyUnicode_GET_LENGTH(self); i++)
|
for (i = 1; i < len; i++) {
|
||||||
if (!_PyUnicode_IsXidContinue(PyUnicode_READ(kind, data, i)))
|
if (ready) {
|
||||||
|
ch = PyUnicode_READ(kind, data, i);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
ch = wstr[i];
|
||||||
|
}
|
||||||
|
if (!_PyUnicode_IsXidContinue(ch)) {
|
||||||
return 0;
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1079,8 +1079,9 @@ verify_identifier(struct tok_state *tok)
|
||||||
}
|
}
|
||||||
result = PyUnicode_IsIdentifier(s);
|
result = PyUnicode_IsIdentifier(s);
|
||||||
Py_DECREF(s);
|
Py_DECREF(s);
|
||||||
if (result == 0)
|
if (result == 0) {
|
||||||
tok->done = E_IDENTIFIER;
|
tok->done = E_IDENTIFIER;
|
||||||
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue