From acaa5a16d6cd9a94e7e111761264eef14a033d2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Walter=20D=C3=B6rwald?= Date: Sat, 5 May 2007 12:00:46 +0000 Subject: [PATCH] Add PyUnicode_FromString(), which create a unicode object from a const char * (i.e. 0-terminated latin-1 encoded bytes). --- Doc/api/concrete.tex | 11 ++++++++++ Include/unicodeobject.h | 8 ++++++++ Objects/unicodeobject.c | 45 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 64 insertions(+) diff --git a/Doc/api/concrete.tex b/Doc/api/concrete.tex index cdf6856a354..e1ab3ec6f3f 100644 --- a/Doc/api/concrete.tex +++ b/Doc/api/concrete.tex @@ -995,6 +995,17 @@ use these APIs: \var{u} is \NULL{}. \end{cfuncdesc} +\begin{cfuncdesc}{PyObject*}{PyUnicode_FromString}{const char *u} + Create a Unicode Object from the char buffer \var{u} of the. + \var{u} must be 0-terminated, the bytes will be interpreted as + being latin-1 encoded. \var{u} may also be \NULL{} which causes the + contents to be undefined. It is the user's responsibility to fill + in the needed data. The buffer is copied into the new object. + If the buffer is not \NULL{}, the return value might be a shared object. + Therefore, modification of the resulting Unicode object is only allowed + when \var{u} is \NULL{}. +\end{cfuncdesc} + \begin{cfuncdesc}{Py_UNICODE*}{PyUnicode_AsUnicode}{PyObject *unicode} Return a read-only pointer to the Unicode object's internal \ctype{Py_UNICODE} buffer, \NULL{} if \var{unicode} is not a Unicode diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index c12cb96af2c..9d0cabf6d3f 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -172,6 +172,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE; # define PyUnicode_FromObject PyUnicodeUCS2_FromObject # define PyUnicode_FromOrdinal PyUnicodeUCS2_FromOrdinal # define PyUnicode_FromUnicode PyUnicodeUCS2_FromUnicode +# define PyUnicode_FromString PyUnicodeUCS2_FromString # define PyUnicode_FromWideChar PyUnicodeUCS2_FromWideChar # define PyUnicode_GetDefaultEncoding PyUnicodeUCS2_GetDefaultEncoding # define PyUnicode_GetMax PyUnicodeUCS2_GetMax @@ -250,6 +251,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE; # define PyUnicode_FromObject PyUnicodeUCS4_FromObject # define PyUnicode_FromOrdinal PyUnicodeUCS4_FromOrdinal # define PyUnicode_FromUnicode PyUnicodeUCS4_FromUnicode +# define PyUnicode_FromString PyUnicodeUCS4_FromString # define PyUnicode_FromWideChar PyUnicodeUCS4_FromWideChar # define PyUnicode_GetDefaultEncoding PyUnicodeUCS4_GetDefaultEncoding # define PyUnicode_GetMax PyUnicodeUCS4_GetMax @@ -427,6 +429,12 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode( Py_ssize_t size /* size of buffer */ ); +/* Similar to PyUnicode_FromUnicode(), but u points to null-terminated + Latin-1 encoded bytes */ +PyAPI_FUNC(PyObject*) PyUnicode_FromString( + const char *u /* string */ + ); + /* Return a read-only pointer to the Unicode object's internal Py_UNICODE buffer. */ diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 45c52cc77c1..c9a922dd80c 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -393,6 +393,51 @@ PyObject *PyUnicode_FromUnicode(const Py_UNICODE *u, return (PyObject *)unicode; } +PyObject *PyUnicode_FromString(const char *u) +{ + PyUnicodeObject *unicode; + Py_ssize_t size = strlen(u); + + /* If the Unicode data is known at construction time, we can apply + some optimizations which share commonly used objects. */ + if (u != NULL) { + + /* Optimization for empty strings */ + if (size == 0 && unicode_empty != NULL) { + Py_INCREF(unicode_empty); + return (PyObject *)unicode_empty; + } + + /* Single character Unicode objects in the Latin-1 range are + shared when using this constructor */ + if (size == 1 && *u < 256) { + unicode = unicode_latin1[*u]; + if (!unicode) { + unicode = _PyUnicode_New(1); + if (!unicode) + return NULL; + unicode->str[0] = *u; + unicode_latin1[*u] = unicode; + } + Py_INCREF(unicode); + return (PyObject *)unicode; + } + } + + unicode = _PyUnicode_New(size); + if (!unicode) + return NULL; + + /* Copy the Unicode data into the new object */ + if (u != NULL) { + char *p = unicode->str; + while (*p++ = *u++) + ; + } + + return (PyObject *)unicode; +} + #ifdef HAVE_WCHAR_H PyObject *PyUnicode_FromWideChar(register const wchar_t *w,