bpo-39500: Document PyUnicode_IsIdentifier() function (GH-18397)

PyUnicode_IsIdentifier() does not call Py_FatalError() anymore if the
string is not ready.
This commit is contained in:
Victor Stinner 2020-02-11 14:29:33 +01:00 committed by GitHub
parent 1ea45ae257
commit f3e7ea5b8c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 47 additions and 15 deletions

View File

@ -240,6 +240,16 @@ access internal read-only data of Unicode objects:
:c:func:`PyUnicode_nBYTE_DATA` family of macros. :c:func:`PyUnicode_nBYTE_DATA` family of macros.
.. c:function:: int PyUnicode_IsIdentifier(PyObject *o)
Return ``1`` if the string is a valid identifier according to the language
definition, section :ref:`identifiers`. Return ``0`` otherwise.
.. versionchanged:: 3.9
The function does not call :c:func:`Py_FatalError` anymore if the string
is not ready.
Unicode Character Properties Unicode Character Properties
"""""""""""""""""""""""""""" """"""""""""""""""""""""""""

View File

@ -0,0 +1,2 @@
:c:func:`PyUnicode_IsIdentifier` does not call :c:func:`Py_FatalError`
anymore if the string is not ready.

View File

@ -12198,22 +12198,33 @@ unicode_isnumeric_impl(PyObject *self)
int int
PyUnicode_IsIdentifier(PyObject *self) PyUnicode_IsIdentifier(PyObject *self)
{ {
int kind;
void *data;
Py_ssize_t i; Py_ssize_t i;
Py_UCS4 first; int ready = PyUnicode_IS_READY(self);
if (PyUnicode_READY(self) == -1) { Py_ssize_t len = ready ? PyUnicode_GET_LENGTH(self) : PyUnicode_GET_SIZE(self);
Py_FatalError("identifier not ready"); if (len == 0) {
/* an empty string is not a valid identifier */
return 0; return 0;
} }
/* Special case for empty strings */ int kind;
if (PyUnicode_GET_LENGTH(self) == 0) void *data;
return 0; wchar_t *wstr;
kind = PyUnicode_KIND(self); if (ready) {
data = PyUnicode_DATA(self); kind = PyUnicode_KIND(self);
data = PyUnicode_DATA(self);
}
else {
wstr = _PyUnicode_WSTR(self);
}
Py_UCS4 ch;
if (ready) {
ch = PyUnicode_READ(kind, data, 0);
}
else {
ch = wstr[0];
}
/* PEP 3131 says that the first character must be in /* PEP 3131 says that the first character must be in
XID_Start and subsequent characters in XID_Continue, XID_Start and subsequent characters in XID_Continue,
and for the ASCII range, the 2.x rules apply (i.e and for the ASCII range, the 2.x rules apply (i.e
@ -12222,13 +12233,21 @@ PyUnicode_IsIdentifier(PyObject *self)
definition of XID_Start and XID_Continue, it is sufficient definition of XID_Start and XID_Continue, it is sufficient
to check just for these, except that _ must be allowed to check just for these, except that _ must be allowed
as starting an identifier. */ as starting an identifier. */
first = PyUnicode_READ(kind, data, 0); if (!_PyUnicode_IsXidStart(ch) && ch != 0x5F /* LOW LINE */) {
if (!_PyUnicode_IsXidStart(first) && first != 0x5F /* LOW LINE */)
return 0; return 0;
}
for (i = 1; i < PyUnicode_GET_LENGTH(self); i++) for (i = 1; i < len; i++) {
if (!_PyUnicode_IsXidContinue(PyUnicode_READ(kind, data, i))) if (ready) {
ch = PyUnicode_READ(kind, data, i);
}
else {
ch = wstr[i];
}
if (!_PyUnicode_IsXidContinue(ch)) {
return 0; return 0;
}
}
return 1; return 1;
} }

View File

@ -1079,8 +1079,9 @@ verify_identifier(struct tok_state *tok)
} }
result = PyUnicode_IsIdentifier(s); result = PyUnicode_IsIdentifier(s);
Py_DECREF(s); Py_DECREF(s);
if (result == 0) if (result == 0) {
tok->done = E_IDENTIFIER; tok->done = E_IDENTIFIER;
}
return result; return result;
} }