Add PyUnicode_FromString(), which create a unicode object from a

const char * (i.e. 0-terminated latin-1 encoded bytes).
2007-05-05 12:00:46 +00:00 · 2007-05-05 12:00:46 +00:00 · acaa5a16d6
parent 1255ed62bf
commit acaa5a16d6
3 changed files with 64 additions and 0 deletions
--- a/Doc/api/concrete.tex
+++ b/Doc/api/concrete.tex
@ -995,6 +995,17 @@ use these APIs:
  \var{u} is \NULL{}.
 \end{cfuncdesc}
 \begin{cfuncdesc}{PyObject*}{PyUnicode_FromString}{const char *u}
  Create a Unicode Object from the char buffer \var{u} of the.
  \var{u} must be 0-terminated, the bytes will be interpreted as
  being latin-1 encoded. \var{u} may also be \NULL{} which causes the
  contents to be undefined. It is the user's responsibility to fill
  in the needed data.  The buffer is copied into the new object.
  If the buffer is not \NULL{}, the return value might be a shared object.
  Therefore, modification of the resulting Unicode object is only allowed
  when \var{u} is \NULL{}.
 \end{cfuncdesc}
 \begin{cfuncdesc}{Py_UNICODE*}{PyUnicode_AsUnicode}{PyObject *unicode}
  Return a read-only pointer to the Unicode object's internal
  \ctype{Py_UNICODE} buffer, \NULL{} if \var{unicode} is not a Unicode
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@ -172,6 +172,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
 # define PyUnicode_FromObject PyUnicodeUCS2_FromObject
 # define PyUnicode_FromOrdinal PyUnicodeUCS2_FromOrdinal
 # define PyUnicode_FromUnicode PyUnicodeUCS2_FromUnicode
 # define PyUnicode_FromString PyUnicodeUCS2_FromString
 # define PyUnicode_FromWideChar PyUnicodeUCS2_FromWideChar
 # define PyUnicode_GetDefaultEncoding PyUnicodeUCS2_GetDefaultEncoding
 # define PyUnicode_GetMax PyUnicodeUCS2_GetMax
@ -250,6 +251,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
 # define PyUnicode_FromObject PyUnicodeUCS4_FromObject
 # define PyUnicode_FromOrdinal PyUnicodeUCS4_FromOrdinal
 # define PyUnicode_FromUnicode PyUnicodeUCS4_FromUnicode
 # define PyUnicode_FromString PyUnicodeUCS4_FromString
 # define PyUnicode_FromWideChar PyUnicodeUCS4_FromWideChar
 # define PyUnicode_GetDefaultEncoding PyUnicodeUCS4_GetDefaultEncoding
 # define PyUnicode_GetMax PyUnicodeUCS4_GetMax
@ -427,6 +429,12 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode(
    Py_ssize_t size             /* size of buffer */
    );
 /* Similar to PyUnicode_FromUnicode(), but u points to null-terminated
   Latin-1 encoded bytes */
 PyAPI_FUNC(PyObject*) PyUnicode_FromString(
    const char *u        /* string */
    );
 /* Return a read-only pointer to the Unicode object's internal
   Py_UNICODE buffer. */
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@ -393,6 +393,51 @@ PyObject *PyUnicode_FromUnicode(const Py_UNICODE *u,
    return (PyObject *)unicode;
 }
 PyObject *PyUnicode_FromString(const char *u)
 {
    PyUnicodeObject *unicode;
    Py_ssize_t size = strlen(u);
    /* If the Unicode data is known at construction time, we can apply
       some optimizations which share commonly used objects. */
    if (u != NULL) {
 	/* Optimization for empty strings */
 	if (size == 0 && unicode_empty != NULL) {
 	    Py_INCREF(unicode_empty);
 	    return (PyObject *)unicode_empty;
 	}
 	/* Single character Unicode objects in the Latin-1 range are
 	   shared when using this constructor */
 	if (size == 1 && *u < 256) {
 	    unicode = unicode_latin1[*u];
 	    if (!unicode) {
 		unicode = _PyUnicode_New(1);
 		if (!unicode)
 		    return NULL;
 		unicode->str[0] = *u;
 		unicode_latin1[*u] = unicode;
 	    }
 	    Py_INCREF(unicode);
 	    return (PyObject *)unicode;
 	}
    }
    unicode = _PyUnicode_New(size);
    if (!unicode)
        return NULL;
    /* Copy the Unicode data into the new object */
    if (u != NULL) {
        char *p = unicode->str;
        while (*p++ = *u++)
            ;
    }
    return (PyObject *)unicode;
 }
 #ifdef HAVE_WCHAR_H
 PyObject *PyUnicode_FromWideChar(register const wchar_t *w,