bpo-39500: Document PyUnicode_IsIdentifier() function (GH-18397)

PyUnicode_IsIdentifier() does not call Py_FatalError() anymore if the string is not ready.
2020-02-11 14:29:33 +01:00 · 2020-02-11 14:29:33 +01:00 · f3e7ea5b8c
parent 1ea45ae257
commit f3e7ea5b8c
4 changed files with 47 additions and 15 deletions
--- a/Doc/c-api/unicode.rst
+++ b/Doc/c-api/unicode.rst
@ -240,6 +240,16 @@ access internal read-only data of Unicode objects:
      :c:func:`PyUnicode_nBYTE_DATA` family of macros.
 .. c:function:: int PyUnicode_IsIdentifier(PyObject *o)
   Return ``1`` if the string is a valid identifier according to the language
   definition, section :ref:`identifiers`. Return ``0`` otherwise.
   .. versionchanged:: 3.9
      The function does not call :c:func:`Py_FatalError` anymore if the string
      is not ready.
 Unicode Character Properties
 """"""""""""""""""""""""""""
--- a/API/2020-02-07-09-35-43.bpo-39500.xRAEgX.rst
+++ b/API/2020-02-07-09-35-43.bpo-39500.xRAEgX.rst
@ -0,0 +1,2 @@
 :c:func:`PyUnicode_IsIdentifier` does not call :c:func:`Py_FatalError`
 anymore if the string is not ready.
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@ -12198,22 +12198,33 @@ unicode_isnumeric_impl(PyObject *self)
 int
 PyUnicode_IsIdentifier(PyObject *self)
 {
    int kind;
    void *data;
    Py_ssize_t i;
-    Py_UCS4 first;
+    int ready = PyUnicode_IS_READY(self);
-    if (PyUnicode_READY(self) == -1) {
+    Py_ssize_t len = ready ? PyUnicode_GET_LENGTH(self) : PyUnicode_GET_SIZE(self);
-        Py_FatalError("identifier not ready");
+    if (len == 0) {
        /* an empty string is not a valid identifier */
        return 0;
    }
-    /* Special case for empty strings */
+    int kind;
-    if (PyUnicode_GET_LENGTH(self) == 0)
+    void *data;
-        return 0;
+    wchar_t *wstr;
-    kind = PyUnicode_KIND(self);
+    if (ready) {
-    data = PyUnicode_DATA(self);
+        kind = PyUnicode_KIND(self);
        data = PyUnicode_DATA(self);
    }
    else {
        wstr = _PyUnicode_WSTR(self);
    }
    Py_UCS4 ch;
    if (ready) {
        ch = PyUnicode_READ(kind, data, 0);
    }
    else {
        ch = wstr[0];
    }
    /* PEP 3131 says that the first character must be in
       XID_Start and subsequent characters in XID_Continue,
       and for the ASCII range, the 2.x rules apply (i.e
@ -12222,13 +12233,21 @@ PyUnicode_IsIdentifier(PyObject *self)
       definition of XID_Start and XID_Continue, it is sufficient
       to check just for these, except that _ must be allowed
       as starting an identifier.  */
-    first = PyUnicode_READ(kind, data, 0);
+    if (!_PyUnicode_IsXidStart(ch) && ch != 0x5F /* LOW LINE */) {
    if (!_PyUnicode_IsXidStart(first) && first != 0x5F /* LOW LINE */)
        return 0;
    }
-    for (i = 1; i < PyUnicode_GET_LENGTH(self); i++)
+    for (i = 1; i < len; i++) {
-        if (!_PyUnicode_IsXidContinue(PyUnicode_READ(kind, data, i)))
+        if (ready) {
            ch = PyUnicode_READ(kind, data, i);
        }
        else {
            ch = wstr[i];
        }
        if (!_PyUnicode_IsXidContinue(ch)) {
            return 0;
        }
    }
    return 1;
 }
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@ -1079,8 +1079,9 @@ verify_identifier(struct tok_state *tok)
    }
    result = PyUnicode_IsIdentifier(s);
    Py_DECREF(s);
-    if (result == 0)
+    if (result == 0) {
        tok->done = E_IDENTIFIER;
    }
    return result;
 }
		`@ -0,0 +1,2 @@`
							:c:func:`PyUnicode_IsIdentifier` does not call :c:func:`Py_FatalError`
							`anymore if the string is not ready.`