From d5d9a718662e67e2b1ac7874dda9df2d8d71d415 Mon Sep 17 00:00:00 2001
From: Inada Naoki <songofacandy@gmail.com>
Date: Mon, 11 May 2020 15:37:25 +0900
Subject: [PATCH] bpo-36346: array: Don't use deprecated APIs (GH-19653)

* Py_UNICODE -> wchar_t
* Py_UNICODE -> unicode in Argument Clinic
* PyUnicode_AsUnicode -> PyUnicode_AsWideCharString
* Don't use "u#" format.

Co-authored-by: Victor Stinner <vstinner@python.org>
---
 Doc/library/array.rst          | 13 +++---
 Doc/whatsnew/3.9.rst           |  6 +++
 Modules/arraymodule.c          | 82 +++++++++++++++++-----------------
 Modules/clinic/arraymodule.c.h | 17 ++++---
 4 files changed, 63 insertions(+), 55 deletions(-)

diff --git a/Doc/library/array.rst b/Doc/library/array.rst
index c9a9b1dabb2..78020738bf4 100644
--- a/Doc/library/array.rst
+++ b/Doc/library/array.rst
@@ -22,7 +22,7 @@ defined:
 +-----------+--------------------+-------------------+-----------------------+-------+
 | ``'B'``   | unsigned char      | int               | 1                     |       |
 +-----------+--------------------+-------------------+-----------------------+-------+
-| ``'u'``   | Py_UNICODE         | Unicode character | 2                     | \(1)  |
+| ``'u'``   | wchar_t            | Unicode character | 2                     | \(1)  |
 +-----------+--------------------+-------------------+-----------------------+-------+
 | ``'h'``   | signed short       | int               | 2                     |       |
 +-----------+--------------------+-------------------+-----------------------+-------+
@@ -48,15 +48,16 @@ defined:
 Notes:
 
 (1)
-   The ``'u'`` type code corresponds to Python's obsolete unicode character
-   (:c:type:`Py_UNICODE` which is :c:type:`wchar_t`). Depending on the
-   platform, it can be 16 bits or 32 bits.
+   It can be 16 bits or 32 bits depending on the platform.
 
-   ``'u'`` will be removed together with the rest of the :c:type:`Py_UNICODE`
-   API.
+   .. versionchanged:: 3.9
+      ``array('u')`` now uses ``wchar_t`` as C type instead of deprecated
+      ``Py_UNICODE``. This change doesn't affect to its behavior because
+      ``Py_UNICODE`` is alias of ``wchar_t`` since Python 3.3.
 
    .. deprecated-removed:: 3.3 4.0
 
+
 The actual representation of values is determined by the machine architecture
 (strictly speaking, by the C implementation).  The actual size can be accessed
 through the :attr:`itemsize` attribute.
diff --git a/Doc/whatsnew/3.9.rst b/Doc/whatsnew/3.9.rst
index 11e577baa8f..c57d702dce8 100644
--- a/Doc/whatsnew/3.9.rst
+++ b/Doc/whatsnew/3.9.rst
@@ -786,6 +786,12 @@ Changes in the Python API
   ``PyCF_ALLOW_TOP_LEVEL_AWAIT`` was clashing with ``CO_FUTURE_DIVISION``.
   (Contributed by Batuhan Taskaya in :issue:`39562`)
 
+* ``array('u')`` now uses ``wchar_t`` as C type instead of ``Py_UNICODE``.
+  This change doesn't affect to its behavior because ``Py_UNICODE`` is alias
+  of ``wchar_t`` since Python 3.3.
+  (Contributed by Inada Naoki in :issue:`34538`.)
+
+
 CPython bytecode changes
 ------------------------
 
diff --git a/Modules/arraymodule.c b/Modules/arraymodule.c
index 4920ad7b821..732703e481a 100644
--- a/Modules/arraymodule.c
+++ b/Modules/arraymodule.c
@@ -235,24 +235,31 @@ BB_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v)
 static PyObject *
 u_getitem(arrayobject *ap, Py_ssize_t i)
 {
-    return PyUnicode_FromOrdinal(((Py_UNICODE *) ap->ob_item)[i]);
+    return PyUnicode_FromOrdinal(((wchar_t *) ap->ob_item)[i]);
 }
 
 static int
 u_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v)
 {
-    Py_UNICODE *p;
-    Py_ssize_t len;
-
-    if (!PyArg_Parse(v, "u#;array item must be unicode character", &p, &len))
+    PyObject *u;
+    if (!PyArg_Parse(v, "U;array item must be unicode character", &u)) {
         return -1;
-    if (len != 1) {
+    }
+
+    Py_ssize_t len = PyUnicode_AsWideChar(u, NULL, 0);
+    if (len != 2) {
         PyErr_SetString(PyExc_TypeError,
                         "array item must be unicode character");
         return -1;
     }
-    if (i >= 0)
-        ((Py_UNICODE *)ap->ob_item)[i] = p[0];
+
+    wchar_t w;
+    len = PyUnicode_AsWideChar(u, &w, 1);
+    assert(len == 1);
+
+    if (i >= 0) {
+        ((wchar_t *)ap->ob_item)[i] = w;
+    }
     return 0;
 }
 
@@ -530,7 +537,7 @@ d_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v)
 
 DEFINE_COMPAREITEMS(b, signed char)
 DEFINE_COMPAREITEMS(BB, unsigned char)
-DEFINE_COMPAREITEMS(u, Py_UNICODE)
+DEFINE_COMPAREITEMS(u, wchar_t)
 DEFINE_COMPAREITEMS(h, short)
 DEFINE_COMPAREITEMS(HH, unsigned short)
 DEFINE_COMPAREITEMS(i, int)
@@ -548,7 +555,7 @@ DEFINE_COMPAREITEMS(QQ, unsigned long long)
 static const struct arraydescr descriptors[] = {
     {'b', 1, b_getitem, b_setitem, b_compareitems, "b", 1, 1},
     {'B', 1, BB_getitem, BB_setitem, BB_compareitems, "B", 1, 0},
-    {'u', sizeof(Py_UNICODE), u_getitem, u_setitem, u_compareitems, "u", 0, 0},
+    {'u', sizeof(wchar_t), u_getitem, u_setitem, u_compareitems, "u", 0, 0},
     {'h', sizeof(short), h_getitem, h_setitem, h_compareitems, "h", 1, 1},
     {'H', sizeof(short), HH_getitem, HH_setitem, HH_compareitems, "H", 1, 0},
     {'i', sizeof(int), i_getitem, i_setitem, i_compareitems, "i", 1, 1},
@@ -1660,7 +1667,7 @@ array_array_tobytes_impl(arrayobject *self)
 /*[clinic input]
 array.array.fromunicode
 
-    ustr: Py_UNICODE(zeroes=True)
+    ustr: unicode
     /
 
 Extends this array with data from the unicode string ustr.
@@ -1671,25 +1678,28 @@ some other type.
 [clinic start generated code]*/
 
 static PyObject *
-array_array_fromunicode_impl(arrayobject *self, const Py_UNICODE *ustr,
-                             Py_ssize_clean_t ustr_length)
-/*[clinic end generated code: output=cf2f662908e2befc input=150f00566ffbca6e]*/
+array_array_fromunicode_impl(arrayobject *self, PyObject *ustr)
+/*[clinic end generated code: output=24359f5e001a7f2b input=025db1fdade7a4ce]*/
 {
-    char typecode;
-
-    typecode = self->ob_descr->typecode;
-    if (typecode != 'u') {
+    if (self->ob_descr->typecode != 'u') {
         PyErr_SetString(PyExc_ValueError,
             "fromunicode() may only be called on "
             "unicode type arrays");
         return NULL;
     }
-    if (ustr_length > 0) {
+
+    Py_ssize_t ustr_length = PyUnicode_AsWideChar(ustr, NULL, 0);
+    assert(ustr_length > 0);
+    if (ustr_length > 1) {
+        ustr_length--; /* trim trailing NUL character */
         Py_ssize_t old_size = Py_SIZE(self);
-        if (array_resize(self, old_size + ustr_length) == -1)
+        if (array_resize(self, old_size + ustr_length) == -1) {
             return NULL;
-        memcpy(self->ob_item + old_size * sizeof(Py_UNICODE),
-               ustr, ustr_length * sizeof(Py_UNICODE));
+        }
+
+        // must not fail
+        PyUnicode_AsWideChar(
+            ustr, ((wchar_t *)self->ob_item) + old_size, ustr_length);
     }
 
     Py_RETURN_NONE;
@@ -1709,14 +1719,12 @@ static PyObject *
 array_array_tounicode_impl(arrayobject *self)
 /*[clinic end generated code: output=08e442378336e1ef input=127242eebe70b66d]*/
 {
-    char typecode;
-    typecode = self->ob_descr->typecode;
-    if (typecode != 'u') {
+    if (self->ob_descr->typecode != 'u') {
         PyErr_SetString(PyExc_ValueError,
              "tounicode() may only be called on unicode type arrays");
         return NULL;
     }
-    return PyUnicode_FromWideChar((Py_UNICODE *) self->ob_item, Py_SIZE(self));
+    return PyUnicode_FromWideChar((wchar_t *) self->ob_item, Py_SIZE(self));
 }
 
 /*[clinic input]
@@ -2675,30 +2683,20 @@ array_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
                 Py_DECREF(v);
             }
             else if (initial != NULL && PyUnicode_Check(initial))  {
-                Py_UNICODE *ustr;
                 Py_ssize_t n;
-
-                ustr = PyUnicode_AsUnicode(initial);
+                wchar_t *ustr = PyUnicode_AsWideCharString(initial, &n);
                 if (ustr == NULL) {
-                    PyErr_NoMemory();
                     Py_DECREF(a);
                     return NULL;
                 }
 
-                n = PyUnicode_GET_DATA_SIZE(initial);
                 if (n > 0) {
                     arrayobject *self = (arrayobject *)a;
-                    char *item = self->ob_item;
-                    item = (char *)PyMem_Realloc(item, n);
-                    if (item == NULL) {
-                        PyErr_NoMemory();
-                        Py_DECREF(a);
-                        return NULL;
-                    }
-                    self->ob_item = item;
-                    Py_SET_SIZE(self, n / sizeof(Py_UNICODE));
-                    memcpy(item, ustr, n);
-                    self->allocated = Py_SIZE(self);
+                    // self->ob_item may be NULL but it is safe.
+                    PyMem_Free(self->ob_item);
+                    self->ob_item = (char *)ustr;
+                    Py_SET_SIZE(self, n);
+                    self->allocated = n;
                 }
             }
             else if (initial != NULL && array_Check(initial) && len > 0) {
diff --git a/Modules/clinic/arraymodule.c.h b/Modules/clinic/arraymodule.c.h
index e1f4b0397b9..b9245ca91d5 100644
--- a/Modules/clinic/arraymodule.c.h
+++ b/Modules/clinic/arraymodule.c.h
@@ -380,20 +380,23 @@ PyDoc_STRVAR(array_array_fromunicode__doc__,
     {"fromunicode", (PyCFunction)array_array_fromunicode, METH_O, array_array_fromunicode__doc__},
 
 static PyObject *
-array_array_fromunicode_impl(arrayobject *self, const Py_UNICODE *ustr,
-                             Py_ssize_clean_t ustr_length);
+array_array_fromunicode_impl(arrayobject *self, PyObject *ustr);
 
 static PyObject *
 array_array_fromunicode(arrayobject *self, PyObject *arg)
 {
     PyObject *return_value = NULL;
-    const Py_UNICODE *ustr;
-    Py_ssize_clean_t ustr_length;
+    PyObject *ustr;
 
-    if (!PyArg_Parse(arg, "u#:fromunicode", &ustr, &ustr_length)) {
+    if (!PyUnicode_Check(arg)) {
+        _PyArg_BadArgument("fromunicode", "argument", "str", arg);
         goto exit;
     }
-    return_value = array_array_fromunicode_impl(self, ustr, ustr_length);
+    if (PyUnicode_READY(arg) == -1) {
+        goto exit;
+    }
+    ustr = arg;
+    return_value = array_array_fromunicode_impl(self, ustr);
 
 exit:
     return return_value;
@@ -531,4 +534,4 @@ PyDoc_STRVAR(array_arrayiterator___setstate____doc__,
 
 #define ARRAY_ARRAYITERATOR___SETSTATE___METHODDEF    \
     {"__setstate__", (PyCFunction)array_arrayiterator___setstate__, METH_O, array_arrayiterator___setstate____doc__},
-/*[clinic end generated code: output=f649fc0bc9f6b13a input=a9049054013a1b77]*/
+/*[clinic end generated code: output=9f70748dd3bc532f input=a9049054013a1b77]*/