From d2d4598ec2e33e73efcbf295e630296b24a1b70b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lemburg?= Date: Thu, 8 Jul 2004 17:57:32 +0000 Subject: [PATCH] Allow string and unicode return types from .encode()/.decode() methods on string and unicode objects. Added unicode.decode() which was missing for no apparent reason. --- Include/unicodeobject.h | 11 +++++ Objects/stringobject.c | 26 ++++++++++- Objects/unicodeobject.c | 100 ++++++++++++++++++++++++++++++++++++++-- 3 files changed, 130 insertions(+), 7 deletions(-) diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index 345dacf146b..45118b34486 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -141,6 +141,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE; # define PyUnicode_AsASCIIString PyUnicodeUCS2_AsASCIIString # define PyUnicode_AsCharmapString PyUnicodeUCS2_AsCharmapString +# define PyUnicode_AsEncodedObject PyUnicodeUCS2_AsEncodedObject # define PyUnicode_AsEncodedString PyUnicodeUCS2_AsEncodedString # define PyUnicode_AsLatin1String PyUnicodeUCS2_AsLatin1String # define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS2_AsRawUnicodeEscapeString @@ -215,6 +216,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE; # define PyUnicode_AsASCIIString PyUnicodeUCS4_AsASCIIString # define PyUnicode_AsCharmapString PyUnicodeUCS4_AsCharmapString +# define PyUnicode_AsEncodedObject PyUnicodeUCS4_AsEncodedObject # define PyUnicode_AsEncodedString PyUnicodeUCS4_AsEncodedString # define PyUnicode_AsLatin1String PyUnicodeUCS4_AsLatin1String # define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS4_AsRawUnicodeEscapeString @@ -626,6 +628,15 @@ PyAPI_FUNC(PyObject*) PyUnicode_Encode( const char *errors /* error handling */ ); +/* Encodes a Unicode object and returns the result as Python + object. */ + +PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedObject( + PyObject *unicode, /* Unicode object */ + const char *encoding, /* encoding */ + const char *errors /* error handling */ + ); + /* Encodes a Unicode object and returns the result as Python string object. */ diff --git a/Objects/stringobject.c b/Objects/stringobject.c index 7fade569c3e..866e7e84ba5 100644 --- a/Objects/stringobject.c +++ b/Objects/stringobject.c @@ -2673,9 +2673,20 @@ string_encode(PyStringObject *self, PyObject *args) { char *encoding = NULL; char *errors = NULL; + PyObject *v; + if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors)) return NULL; - return PyString_AsEncodedObject((PyObject *)self, encoding, errors); + v = PyString_AsEncodedObject((PyObject *)self, encoding, errors); + if (!PyString_Check(v) && !PyUnicode_Check(v)) { + PyErr_Format(PyExc_TypeError, + "encoder did not return a string/unicode object " + "(type=%.400s)", + v->ob_type->tp_name); + Py_DECREF(v); + return NULL; + } + return v; } @@ -2694,9 +2705,20 @@ string_decode(PyStringObject *self, PyObject *args) { char *encoding = NULL; char *errors = NULL; + PyObject *v; + if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors)) return NULL; - return PyString_AsDecodedObject((PyObject *)self, encoding, errors); + v = PyString_AsDecodedObject((PyObject *)self, encoding, errors); + if (!PyString_Check(v) && !PyUnicode_Check(v)) { + PyErr_Format(PyExc_TypeError, + "decoder did not return a string/unicode object " + "(type=%.400s)", + v->ob_type->tp_name); + Py_DECREF(v); + return NULL; + } + return v; } diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index cf730e5626d..4550023a60d 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -561,6 +561,30 @@ PyObject *PyUnicode_Decode(const char *s, return NULL; } +PyObject *PyUnicode_AsDecodedObject(PyObject *unicode, + const char *encoding, + const char *errors) +{ + PyObject *v; + + if (!PyUnicode_Check(unicode)) { + PyErr_BadArgument(); + goto onError; + } + + if (encoding == NULL) + encoding = PyUnicode_GetDefaultEncoding(); + + /* Decode via the codec registry */ + v = PyCodec_Decode(unicode, encoding, errors); + if (v == NULL) + goto onError; + return v; + + onError: + return NULL; +} + PyObject *PyUnicode_Encode(const Py_UNICODE *s, int size, const char *encoding, @@ -576,6 +600,30 @@ PyObject *PyUnicode_Encode(const Py_UNICODE *s, return v; } +PyObject *PyUnicode_AsEncodedObject(PyObject *unicode, + const char *encoding, + const char *errors) +{ + PyObject *v; + + if (!PyUnicode_Check(unicode)) { + PyErr_BadArgument(); + goto onError; + } + + if (encoding == NULL) + encoding = PyUnicode_GetDefaultEncoding(); + + /* Encode via the codec registry */ + v = PyCodec_Encode(unicode, encoding, errors); + if (v == NULL) + goto onError; + return v; + + onError: + return NULL; +} + PyObject *PyUnicode_AsEncodedString(PyObject *unicode, const char *encoding, const char *errors) @@ -608,7 +656,6 @@ PyObject *PyUnicode_AsEncodedString(PyObject *unicode, v = PyCodec_Encode(unicode, encoding, errors); if (v == NULL) goto onError; - /* XXX Should we really enforce this ? */ if (!PyString_Check(v)) { PyErr_Format(PyExc_TypeError, "encoder did not return a string object (type=%.400s)", @@ -4869,10 +4916,10 @@ unicode_count(PyUnicodeObject *self, PyObject *args) } PyDoc_STRVAR(encode__doc__, -"S.encode([encoding[,errors]]) -> string\n\ +"S.encode([encoding[,errors]]) -> string or unicode\n\ \n\ -Return an encoded string version of S. Default encoding is the current\n\ -default string encoding. errors may be given to set a different error\n\ +Encodes S using the codec registered for encoding. encoding defaults\n\ +to the default encoding. errors may be given to set a different error\n\ handling scheme. Default is 'strict' meaning that encoding errors raise\n\ a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\ 'xmlcharrefreplace' as well as any other name registered with\n\ @@ -4883,9 +4930,51 @@ unicode_encode(PyUnicodeObject *self, PyObject *args) { char *encoding = NULL; char *errors = NULL; + PyObject *v; + if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors)) return NULL; - return PyUnicode_AsEncodedString((PyObject *)self, encoding, errors); + v = PyUnicode_AsEncodedObject((PyObject *)self, encoding, errors); + if (!PyString_Check(v) && !PyUnicode_Check(v)) { + PyErr_Format(PyExc_TypeError, + "encoder did not return a string/unicode object " + "(type=%.400s)", + v->ob_type->tp_name); + Py_DECREF(v); + return NULL; + } + return v; +} + +PyDoc_STRVAR(decode__doc__, +"S.decode([encoding[,errors]]) -> string or unicode\n\ +\n\ +Decodes S using the codec registered for encoding. encoding defaults\n\ +to the default encoding. errors may be given to set a different error\n\ +handling scheme. Default is 'strict' meaning that encoding errors raise\n\ +a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\ +as well as any other name registerd with codecs.register_error that is\n\ +able to handle UnicodeDecodeErrors."); + +static PyObject * +unicode_decode(PyStringObject *self, PyObject *args) +{ + char *encoding = NULL; + char *errors = NULL; + PyObject *v; + + if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors)) + return NULL; + v = PyUnicode_AsDecodedObject((PyObject *)self, encoding, errors); + if (!PyString_Check(v) && !PyUnicode_Check(v)) { + PyErr_Format(PyExc_TypeError, + "decoder did not return a string/unicode object " + "(type=%.400s)", + v->ob_type->tp_name); + Py_DECREF(v); + return NULL; + } + return v; } PyDoc_STRVAR(expandtabs__doc__, @@ -6138,6 +6227,7 @@ static PyMethodDef unicode_methods[] = { {"ljust", (PyCFunction) unicode_ljust, METH_VARARGS, ljust__doc__}, {"lower", (PyCFunction) unicode_lower, METH_NOARGS, lower__doc__}, {"lstrip", (PyCFunction) unicode_lstrip, METH_VARARGS, lstrip__doc__}, + {"decode", (PyCFunction) unicode_decode, METH_VARARGS, decode__doc__}, /* {"maketrans", (PyCFunction) unicode_maketrans, METH_VARARGS, maketrans__doc__}, */ {"rfind", (PyCFunction) unicode_rfind, METH_VARARGS, rfind__doc__}, {"rindex", (PyCFunction) unicode_rindex, METH_VARARGS, rindex__doc__},