From 034f6cf10c53f11cdb0f1fd42d279122cdb8ebaf Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 30 Sep 2011 02:26:44 +0200 Subject: [PATCH] Add PyUnicode_Copy() function, include it to the public API --- Include/unicodeobject.h | 5 +++++ Modules/posixmodule.c | 3 +-- Objects/unicodeobject.c | 40 +++++++++++++++++++++------------------- 3 files changed, 27 insertions(+), 21 deletions(-) diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index a2c07f5199f..9f3f66da3ff 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -510,6 +510,11 @@ PyAPI_FUNC(int) _PyUnicode_Ready( ); #endif +/* Get a copy of a Unicode string. */ +PyAPI_FUNC(PyObject*) PyUnicode_Copy( + PyObject *unicode + ); + /* Copy character from one unicode object into another, this function performs character conversion when necessary and falls back to memcpy if possible. diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 094ceb7a104..b19f1b31ad4 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -729,8 +729,7 @@ convert_to_unicode(PyObject **param) else if (PyUnicode_Check(*param)) /* For a Unicode subtype that's not a Unicode object, return a true Unicode object with the same data. */ - *param = PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(*param), - PyUnicode_GET_SIZE(*param)); + *param = PyUnicode_Copy(*param); else *param = PyUnicode_FromEncodedObject(*param, Py_FileSystemDefaultEncoding, diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index a02c2227fc1..4b6f651673e 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -1209,6 +1209,20 @@ PyUnicode_FromKindAndData(int kind, const void *buffer, Py_ssize_t size) return NULL; } +PyObject* +PyUnicode_Copy(PyObject *unicode) +{ + if (!PyUnicode_Check(unicode)) { + PyErr_BadInternalCall(); + return NULL; + } + if (PyUnicode_READY(unicode)) + return NULL; + return PyUnicode_FromKindAndData(PyUnicode_KIND(unicode), + PyUnicode_DATA(unicode), + PyUnicode_GET_LENGTH(unicode)); +} + /* Widen Unicode objects to larger buffers. Return NULL if the string is too wide already. */ @@ -9061,9 +9075,7 @@ replace(PyObject *self, PyObject *str1, Py_INCREF(self); return (PyObject *) self; } - return PyUnicode_FromKindAndData(PyUnicode_KIND(self), - PyUnicode_DATA(self), - PyUnicode_GET_LENGTH(self)); + return PyUnicode_Copy(self); error: if (srelease && sbuf) PyMem_FREE(sbuf); @@ -10477,7 +10489,8 @@ PyUnicode_Substring(PyObject *self, Py_ssize_t start, Py_ssize_t end) return NULL; kind = PyUnicode_KIND(self); data = PyUnicode_1BYTE_DATA(self); - return PyUnicode_FromKindAndData(kind, data + PyUnicode_KIND_SIZE(kind, start), + return PyUnicode_FromKindAndData(kind, + data + PyUnicode_KIND_SIZE(kind, start), end-start); } @@ -11267,8 +11280,7 @@ PyObject *unicode_str(PyObject *self) return self; } else /* Subtype -- return genuine unicode string with the same value. */ - return PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(self), - PyUnicode_GET_SIZE(self)); + return PyUnicode_Copy(self); } PyDoc_STRVAR(swapcase__doc__, @@ -11453,10 +11465,7 @@ unicode_zfill(PyUnicodeObject *self, PyObject *args) return (PyObject*) self; } else - return PyUnicode_FromUnicode( - PyUnicode_AS_UNICODE(self), - PyUnicode_GET_SIZE(self) - ); + return PyUnicode_Copy(self); } fill = width - _PyUnicode_LENGTH(self); @@ -11652,16 +11661,9 @@ PyDoc_STRVAR(sizeof__doc__, "S.__sizeof__() -> size of S in memory, in bytes"); static PyObject * -unicode_getnewargs(PyUnicodeObject *v) +unicode_getnewargs(PyObject *v) { - PyObject *copy; - unsigned char *data; - int kind; - if (PyUnicode_READY(v) == -1) - return NULL; - kind = PyUnicode_KIND(v); - data = PyUnicode_1BYTE_DATA(v); - copy = PyUnicode_FromKindAndData(kind, data, PyUnicode_GET_LENGTH(v)); + PyObject *copy = PyUnicode_Copy(v); if (!copy) return NULL; return Py_BuildValue("(N)", copy);