From a9cadcd41b27fd045626c4e3b98315aaa257ca75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lemburg?= Date: Mon, 22 Nov 2004 13:02:31 +0000 Subject: [PATCH] Correct the handling of 0-termination of PyUnicode_AsWideChar() and its usage in PyLocale_strcoll(). Clarify the documentation on this. Thanks to Andreas Degert for pointing this out. --- Doc/api/concrete.tex | 10 +++++++--- Include/unicodeobject.h | 10 ++++++++-- Modules/_localemodule.c | 4 +++- Objects/unicodeobject.c | 8 +++++++- 4 files changed, 25 insertions(+), 7 deletions(-) diff --git a/Doc/api/concrete.tex b/Doc/api/concrete.tex index af026ff64dc..001d0ad2144 100644 --- a/Doc/api/concrete.tex +++ b/Doc/api/concrete.tex @@ -995,9 +995,13 @@ following functions. Support is optimized if Python's own wchar_t *w, int size} Copies the Unicode object contents into the \ctype{wchar_t} buffer - \var{w}. At most \var{size} \ctype{wchar_t} characters are copied. - Returns the number of \ctype{wchar_t} characters copied or -1 in - case of an error. + \var{w}. At most \var{size} \ctype{wchar_t} characters are copied + (excluding a possibly trailing 0-termination character). Returns + the number of \ctype{wchar_t} characters copied or -1 in case of an + error. Note that the resulting \ctype{wchar_t} string may or may + not be 0-terminated. It is the responsibility of the caller to make + sure that the \ctype{wchar_t} string is 0-terminated in case this is + required by the application. \end{cfuncdesc} diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index 0a82caf0cd1..6738cbd6792 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -512,10 +512,16 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromWideChar( int size /* size of buffer */ ); -/* Copies the Unicode Object contents into the whcar_t buffer w. At +/* Copies the Unicode Object contents into the wchar_t buffer w. At most size wchar_t characters are copied. - Returns the number of wchar_t characters copied or -1 in case of an + Note that the resulting wchar_t string may or may not be + 0-terminated. It is the responsibility of the caller to make sure + that the wchar_t string is 0-terminated in case this is required by + the application. + + Returns the number of wchar_t characters copied (excluding a + possibly trailing 0-termination character) or -1 in case of an error. */ PyAPI_FUNC(int) PyUnicode_AsWideChar( diff --git a/Modules/_localemodule.c b/Modules/_localemodule.c index 3e3df2284e3..de470e0299d 100644 --- a/Modules/_localemodule.c +++ b/Modules/_localemodule.c @@ -305,7 +305,6 @@ PyLocale_strcoll(PyObject* self, PyObject* args) } /* Convert the unicode strings to wchar[]. */ len1 = PyUnicode_GET_SIZE(os1) + 1; - len2 = PyUnicode_GET_SIZE(os2) + 1; ws1 = PyMem_MALLOC(len1 * sizeof(wchar_t)); if (!ws1) { PyErr_NoMemory(); @@ -313,6 +312,8 @@ PyLocale_strcoll(PyObject* self, PyObject* args) } if (PyUnicode_AsWideChar((PyUnicodeObject*)os1, ws1, len1) == -1) goto done; + ws1[len1 - 1] = 0; + len2 = PyUnicode_GET_SIZE(os2) + 1; ws2 = PyMem_MALLOC(len2 * sizeof(wchar_t)); if (!ws2) { PyErr_NoMemory(); @@ -320,6 +321,7 @@ PyLocale_strcoll(PyObject* self, PyObject* args) } if (PyUnicode_AsWideChar((PyUnicodeObject*)os2, ws2, len2) == -1) goto done; + ws2[len2 - 1] = 0; /* Collate the strings. */ result = PyInt_FromLong(wcscoll(ws1, ws2)); done: diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 48821bd247c..5e5dac55a77 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -384,8 +384,11 @@ int PyUnicode_AsWideChar(PyUnicodeObject *unicode, PyErr_BadInternalCall(); return -1; } + + /* If possible, try to copy the 0-termination as well */ if (size > PyUnicode_GET_SIZE(unicode)) - size = PyUnicode_GET_SIZE(unicode); + size = PyUnicode_GET_SIZE(unicode) + 1; + #ifdef HAVE_USABLE_WCHAR_T memcpy(w, unicode->str, size * sizeof(wchar_t)); #else @@ -398,6 +401,9 @@ int PyUnicode_AsWideChar(PyUnicodeObject *unicode, } #endif + if (size > PyUnicode_GET_SIZE(unicode)) + return PyUnicode_GET_SIZE(unicode); + else return size; }