Correct the handling of 0-termination of PyUnicode_AsWideChar()

and its usage in PyLocale_strcoll(). Clarify the documentation on this. Thanks to Andreas Degert for pointing this out.
2004-11-22 13:02:31 +00:00 · 2004-11-22 13:02:31 +00:00 · a9cadcd41b
parent 6d60c09624
commit a9cadcd41b
4 changed files with 25 additions and 7 deletions
--- a/Doc/api/concrete.tex
+++ b/Doc/api/concrete.tex
@ -995,9 +995,13 @@ following functions. Support is optimized if Python's own
                                             wchar_t *w,
                                             int size}
  Copies the Unicode object contents into the \ctype{wchar_t} buffer
-  \var{w}.  At most \var{size} \ctype{wchar_t} characters are copied.
-  Returns the number of \ctype{wchar_t} characters copied or -1 in
-  case of an error.
+  \var{w}.  At most \var{size} \ctype{wchar_t} characters are copied
+  (excluding a possibly trailing 0-termination character).  Returns
+  the number of \ctype{wchar_t} characters copied or -1 in case of an
+  error.  Note that the resulting \ctype{wchar_t} string may or may
+  not be 0-terminated.  It is the responsibility of the caller to make
+  sure that the \ctype{wchar_t} string is 0-terminated in case this is
+  required by the application.
 \end{cfuncdesc}


--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@ -512,10 +512,16 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromWideChar(
    int size                    /* size of buffer */
    );

-/* Copies the Unicode Object contents into the whcar_t buffer w.  At
+/* Copies the Unicode Object contents into the wchar_t buffer w.  At
   most size wchar_t characters are copied.

-   Returns the number of wchar_t characters copied or -1 in case of an
+   Note that the resulting wchar_t string may or may not be
+   0-terminated.  It is the responsibility of the caller to make sure
+   that the wchar_t string is 0-terminated in case this is required by
+   the application.
+
+   Returns the number of wchar_t characters copied (excluding a
+   possibly trailing 0-termination character) or -1 in case of an
   error. */

 PyAPI_FUNC(int) PyUnicode_AsWideChar(
--- a/Modules/_localemodule.c
+++ b/Modules/_localemodule.c
@ -305,7 +305,6 @@ PyLocale_strcoll(PyObject* self, PyObject* args)
    }
    /* Convert the unicode strings to wchar[]. */
    len1 = PyUnicode_GET_SIZE(os1) + 1;
-    len2 = PyUnicode_GET_SIZE(os2) + 1;
    ws1 = PyMem_MALLOC(len1 * sizeof(wchar_t));
    if (!ws1) {
        PyErr_NoMemory();
@ -313,6 +312,8 @@ PyLocale_strcoll(PyObject* self, PyObject* args)
    }
    if (PyUnicode_AsWideChar((PyUnicodeObject*)os1, ws1, len1) == -1)
        goto done;
+    ws1[len1 - 1] = 0;
+    len2 = PyUnicode_GET_SIZE(os2) + 1;
    ws2 = PyMem_MALLOC(len2 * sizeof(wchar_t));
    if (!ws2) {
        PyErr_NoMemory();
@ -320,6 +321,7 @@ PyLocale_strcoll(PyObject* self, PyObject* args)
    }
    if (PyUnicode_AsWideChar((PyUnicodeObject*)os2, ws2, len2) == -1)
        goto done;
+    ws2[len2 - 1] = 0;
    /* Collate the strings. */
    result = PyInt_FromLong(wcscoll(ws1, ws2));
  done:
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@ -384,8 +384,11 @@ int PyUnicode_AsWideChar(PyUnicodeObject *unicode,
 	PyErr_BadInternalCall();
 	return -1;
    }
+
+    /* If possible, try to copy the 0-termination as well */
    if (size > PyUnicode_GET_SIZE(unicode))
-	size = PyUnicode_GET_SIZE(unicode);
+	size = PyUnicode_GET_SIZE(unicode) + 1;
+
 #ifdef HAVE_USABLE_WCHAR_T
    memcpy(w, unicode->str, size * sizeof(wchar_t));
 #else
@ -398,6 +401,9 @@ int PyUnicode_AsWideChar(PyUnicodeObject *unicode,
    }
 #endif

+    if (size > PyUnicode_GET_SIZE(unicode))
+        return PyUnicode_GET_SIZE(unicode);
+    else
    return size;
 }