bpo-40130: _PyUnicode_AsKind() should not be exported. (GH-19265)

Make it a static function, and pass known attributes (kind, data, length) instead of the PyUnicode object.
2020-04-01 15:41:49 +03:00 · 2020-04-01 15:41:49 +03:00 · 17b4733f2f
parent 3ef4a7e5a7
commit 17b4733f2f
2 changed files with 46 additions and 55 deletions
--- a/Include/cpython/unicodeobject.h
+++ b/Include/cpython/unicodeobject.h
@ -726,12 +726,6 @@ PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter(
    Py_ssize_t start,
    Py_ssize_t end);

-/* --- wchar_t support for platforms which support it --------------------- */
-
-#ifdef HAVE_WCHAR_H
-PyAPI_FUNC(void*) _PyUnicode_AsKind(PyObject *s, unsigned int kind);
-#endif
-
 /* --- Manage the default encoding ---------------------------------------- */

 /* Returns a pointer to the default encoding (UTF-8) of the
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@ -2043,9 +2043,9 @@ unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
    void *data = PyUnicode_DATA(unicode);
    const char *end = str + len;

+    assert(index + len <= PyUnicode_GET_LENGTH(unicode));
    switch (kind) {
    case PyUnicode_1BYTE_KIND: {
-        assert(index + len <= PyUnicode_GET_LENGTH(unicode));
 #ifdef Py_DEBUG
        if (PyUnicode_IS_ASCII(unicode)) {
            Py_UCS4 maxchar = ucs1lib_find_max_char(
@ -2060,7 +2060,6 @@ unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
    case PyUnicode_2BYTE_KIND: {
        Py_UCS2 *start = (Py_UCS2 *)data + index;
        Py_UCS2 *ucs2 = start;
-        assert(index <= PyUnicode_GET_LENGTH(unicode));

        for (; str < end; ++ucs2, ++str)
            *ucs2 = (Py_UCS2)*str;
@ -2068,17 +2067,18 @@ unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
        assert((ucs2 - start) <= PyUnicode_GET_LENGTH(unicode));
        break;
    }
-    default: {
+    case PyUnicode_4BYTE_KIND: {
        Py_UCS4 *start = (Py_UCS4 *)data + index;
        Py_UCS4 *ucs4 = start;
-        assert(kind == PyUnicode_4BYTE_KIND);
-        assert(index <= PyUnicode_GET_LENGTH(unicode));

        for (; str < end; ++ucs4, ++str)
            *ucs4 = (Py_UCS4)*str;

        assert((ucs4 - start) <= PyUnicode_GET_LENGTH(unicode));
+        break;
    }
+    default:
+        Py_UNREACHABLE();
    }
 }

@ -2458,13 +2458,15 @@ unicode_adjust_maxchar(PyObject **p_unicode)
        if (max_char >= 256)
            return;
    }
-    else {
+    else if (kind == PyUnicode_4BYTE_KIND) {
        const Py_UCS4 *u = PyUnicode_4BYTE_DATA(unicode);
-        assert(kind == PyUnicode_4BYTE_KIND);
        max_char = ucs4lib_find_max_char(u, u + len);
        if (max_char >= 0x10000)
            return;
    }
+    else
+        Py_UNREACHABLE();
+
    copy = PyUnicode_New(len, max_char);
    if (copy != NULL)
        _PyUnicode_FastCopyCharacters(copy, 0, unicode, 0, len);
@ -2501,22 +2503,12 @@ _PyUnicode_Copy(PyObject *unicode)
 /* Widen Unicode objects to larger buffers. Don't write terminating null
   character. Return NULL on error. */

-void*
-_PyUnicode_AsKind(PyObject *s, unsigned int kind)
+static void*
+unicode_askind(unsigned int skind, void const *data, Py_ssize_t len, unsigned int kind)
 {
-    Py_ssize_t len;
    void *result;
-    unsigned int skind;

-    if (PyUnicode_READY(s) == -1)
-        return NULL;
-
-    len = PyUnicode_GET_LENGTH(s);
-    skind = PyUnicode_KIND(s);
-    if (skind >= kind) {
-        PyErr_SetString(PyExc_SystemError, "invalid widening attempt");
-        return NULL;
-    }
+    assert(skind < kind);
    switch (kind) {
    case PyUnicode_2BYTE_KIND:
        result = PyMem_New(Py_UCS2, len);
@ -2525,8 +2517,8 @@ _PyUnicode_AsKind(PyObject *s, unsigned int kind)
        assert(skind == PyUnicode_1BYTE_KIND);
        _PyUnicode_CONVERT_BYTES(
            Py_UCS1, Py_UCS2,
-            PyUnicode_1BYTE_DATA(s),
-            PyUnicode_1BYTE_DATA(s) + len,
+            (const Py_UCS1 *)data,
+            ((const Py_UCS1 *)data) + len,
            result);
        return result;
    case PyUnicode_4BYTE_KIND:
@ -2536,25 +2528,24 @@ _PyUnicode_AsKind(PyObject *s, unsigned int kind)
        if (skind == PyUnicode_2BYTE_KIND) {
            _PyUnicode_CONVERT_BYTES(
                Py_UCS2, Py_UCS4,
-                PyUnicode_2BYTE_DATA(s),
-                PyUnicode_2BYTE_DATA(s) + len,
+                (const Py_UCS2 *)data,
+                ((const Py_UCS2 *)data) + len,
                result);
        }
        else {
            assert(skind == PyUnicode_1BYTE_KIND);
            _PyUnicode_CONVERT_BYTES(
                Py_UCS1, Py_UCS4,
-                PyUnicode_1BYTE_DATA(s),
-                PyUnicode_1BYTE_DATA(s) + len,
+                (const Py_UCS1 *)data,
+                ((const Py_UCS1 *)data) + len,
                result);
        }
        return result;
    default:
-        break;
-    }
-    PyErr_SetString(PyExc_SystemError, "invalid kind");
+        Py_UNREACHABLE();
        return NULL;
    }
+}

 static Py_UCS4*
 as_ucs4(PyObject *string, Py_UCS4 *target, Py_ssize_t targetsize,
@ -9420,7 +9411,7 @@ any_find_slice(PyObject* s1, PyObject* s2,
    }

    if (kind2 != kind1) {
-        buf2 = _PyUnicode_AsKind(s2, kind1);
+        buf2 = unicode_askind(kind2, buf2, len2, kind1);
        if (!buf2)
            return -2;
    }
@ -9642,7 +9633,7 @@ PyUnicode_Count(PyObject *str,
    buf1 = PyUnicode_DATA(str);
    buf2 = PyUnicode_DATA(substr);
    if (kind2 != kind1) {
-        buf2 = _PyUnicode_AsKind(substr, kind1);
+        buf2 = unicode_askind(kind2, buf2, len2, kind1);
        if (!buf2)
            goto onError;
    }
@ -10415,7 +10406,7 @@ split(PyObject *self,
    buf1 = PyUnicode_DATA(self);
    buf2 = PyUnicode_DATA(substring);
    if (kind2 != kind1) {
-        buf2 = _PyUnicode_AsKind(substring, kind1);
+        buf2 = unicode_askind(kind2, buf2, len2, kind1);
        if (!buf2)
            return NULL;
    }
@ -10506,7 +10497,7 @@ rsplit(PyObject *self,
    buf1 = PyUnicode_DATA(self);
    buf2 = PyUnicode_DATA(substring);
    if (kind2 != kind1) {
-        buf2 = _PyUnicode_AsKind(substring, kind1);
+        buf2 = unicode_askind(kind2, buf2, len2, kind1);
        if (!buf2)
            return NULL;
    }
@ -10665,7 +10656,7 @@ replace(PyObject *self, PyObject *str1,

            if (kind1 < rkind) {
                /* widen substring */
-                buf1 = _PyUnicode_AsKind(str1, rkind);
+                buf1 = unicode_askind(kind1, buf1, len1, rkind);
                if (!buf1) goto error;
                release1 = 1;
            }
@ -10674,19 +10665,22 @@ replace(PyObject *self, PyObject *str1,
                goto nothing;
            if (rkind > kind2) {
                /* widen replacement */
-                buf2 = _PyUnicode_AsKind(str2, rkind);
+                buf2 = unicode_askind(kind2, buf2, len2, rkind);
                if (!buf2) goto error;
                release2 = 1;
            }
            else if (rkind < kind2) {
                /* widen self and buf1 */
                rkind = kind2;
-                if (release1) PyMem_Free(buf1);
+                if (release1) {
+                    PyMem_Free(buf1);
+                    buf1 = PyUnicode_DATA(str1);
                    release1 = 0;
-                sbuf = _PyUnicode_AsKind(self, rkind);
+                }
+                sbuf = unicode_askind(skind, sbuf, slen, rkind);
                if (!sbuf) goto error;
                srelease = 1;
-                buf1 = _PyUnicode_AsKind(str1, rkind);
+                buf1 = unicode_askind(kind1, buf1, len1, rkind);
                if (!buf1) goto error;
                release1 = 1;
            }
@ -10724,7 +10718,7 @@ replace(PyObject *self, PyObject *str1,

        if (kind1 < rkind) {
            /* widen substring */
-            buf1 = _PyUnicode_AsKind(str1, rkind);
+            buf1 = unicode_askind(kind1, buf1, len1, rkind);
            if (!buf1) goto error;
            release1 = 1;
        }
@ -10733,19 +10727,22 @@ replace(PyObject *self, PyObject *str1,
            goto nothing;
        if (kind2 < rkind) {
            /* widen replacement */
-            buf2 = _PyUnicode_AsKind(str2, rkind);
+            buf2 = unicode_askind(kind2, buf2, len2, rkind);
            if (!buf2) goto error;
            release2 = 1;
        }
        else if (kind2 > rkind) {
            /* widen self and buf1 */
            rkind = kind2;
-            sbuf = _PyUnicode_AsKind(self, rkind);
+            sbuf = unicode_askind(skind, sbuf, slen, rkind);
            if (!sbuf) goto error;
            srelease = 1;
-            if (release1) PyMem_Free(buf1);
+            if (release1) {
+                PyMem_Free(buf1);
+                buf1 = PyUnicode_DATA(str1);
                release1 = 0;
-            buf1 = _PyUnicode_AsKind(str1, rkind);
+            }
+            buf1 = unicode_askind(kind1, buf1, len1, rkind);
            if (!buf1) goto error;
            release1 = 1;
        }
@ -11361,7 +11358,7 @@ PyUnicode_Contains(PyObject *str, PyObject *substr)
        return result;
    }
    if (kind2 != kind1) {
-        buf2 = _PyUnicode_AsKind(substr, kind1);
+        buf2 = unicode_askind(kind2, buf2, len2, kind1);
        if (!buf2)
            return -1;
    }
@ -11578,7 +11575,7 @@ unicode_count(PyObject *self, PyObject *args)
    buf1 = PyUnicode_DATA(self);
    buf2 = PyUnicode_DATA(substring);
    if (kind2 != kind1) {
-        buf2 = _PyUnicode_AsKind(substring, kind1);
+        buf2 = unicode_askind(kind2, buf2, len2, kind1);
        if (!buf2)
            return NULL;
    }
@ -13081,7 +13078,7 @@ PyUnicode_Partition(PyObject *str_obj, PyObject *sep_obj)
    buf1 = PyUnicode_DATA(str_obj);
    buf2 = PyUnicode_DATA(sep_obj);
    if (kind2 != kind1) {
-        buf2 = _PyUnicode_AsKind(sep_obj, kind1);
+        buf2 = unicode_askind(kind2, buf2, len2, kind1);
        if (!buf2)
            return NULL;
    }
@ -13138,7 +13135,7 @@ PyUnicode_RPartition(PyObject *str_obj, PyObject *sep_obj)
    buf1 = PyUnicode_DATA(str_obj);
    buf2 = PyUnicode_DATA(sep_obj);
    if (kind2 != kind1) {
-        buf2 = _PyUnicode_AsKind(sep_obj, kind1);
+        buf2 = unicode_askind(kind2, buf2, len2, kind1);
        if (!buf2)
            return NULL;
    }