From 3f41974525271c65269c84e4fcda5e5ae207039d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lemburg?= Date: Sat, 10 Jul 2004 12:06:10 +0000 Subject: [PATCH] Add generic codecs.encode() and .decode() APIs that don't impose any restriction on the return type (like unicode.encode() et al. do). --- Lib/test/test_codecs.py | 12 ++++++- Modules/_codecsmodule.c | 76 ++++++++++++++++++++++++++++++++++++++--- 2 files changed, 83 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index b7abedd2209..c428c615fdd 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -336,6 +336,15 @@ class CodecTest(unittest.TestCase): def test_builtin(self): self.assertEquals(unicode("python.org", "idna"), u"python.org") +class CodecsModuleTest(unittest.TestCase): + + def test_decode(self): + self.assertEquals(codecs.decode('\xe4\xf6\xfc', 'latin-1'), + u'\xe4\xf6\xfc') + def test_encode(self): + self.assertEquals(codecs.encode(u'\xe4\xf6\xfc', 'latin-1'), + '\xe4\xf6\xfc') + def test_main(): test_support.run_unittest( UTF16Test, @@ -343,7 +352,8 @@ def test_main(): RecodingTest, PunycodeTest, NameprepTest, - CodecTest + CodecTest, + CodecsModuleTest ) diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c index 210be516f94..fdc664be564 100644 --- a/Modules/_codecsmodule.c +++ b/Modules/_codecsmodule.c @@ -47,7 +47,7 @@ one argument, the encoding name in all lower case letters, and return\n\ a tuple of functions (encoder, decoder, stream_reader, stream_writer)."); static -PyObject *codecregister(PyObject *self, PyObject *args) +PyObject *codec_register(PyObject *self, PyObject *args) { PyObject *search_function; @@ -71,7 +71,7 @@ Looks up a codec tuple in the Python codec registry and returns\n\ a tuple of functions."); static -PyObject *codeclookup(PyObject *self, PyObject *args) +PyObject *codec_lookup(PyObject *self, PyObject *args) { char *encoding; @@ -84,6 +84,72 @@ PyObject *codeclookup(PyObject *self, PyObject *args) return NULL; } +PyDoc_STRVAR(encode__doc__, +"encode(obj, [encoding[,errors]]) -> object\n\ +\n\ +Encodes obj using the codec registered for encoding. encoding defaults\n\ +to the default encoding. errors may be given to set a different error\n\ +handling scheme. Default is 'strict' meaning that encoding errors raise\n\ +a ValueError. Other possible values are 'ignore', 'replace' and\n\ +'xmlcharrefreplace' as well as any other name registered with\n\ +codecs.register_error that can handle ValueErrors."); + +static PyObject * +codec_encode(PyObject *self, PyObject *args) +{ + char *encoding = NULL; + char *errors = NULL; + PyObject *v; + + if (!PyArg_ParseTuple(args, "O|ss:encode", &v, &encoding, &errors)) + return NULL; + + if (encoding == NULL) + encoding = PyUnicode_GetDefaultEncoding(); + + /* Encode via the codec registry */ + v = PyCodec_Encode(v, encoding, errors); + if (v == NULL) + goto onError; + return v; + + onError: + return NULL; +} + +PyDoc_STRVAR(decode__doc__, +"decode(obj, [encoding[,errors]]) -> object\n\ +\n\ +Decodes obj using the codec registered for encoding. encoding defaults\n\ +to the default encoding. errors may be given to set a different error\n\ +handling scheme. Default is 'strict' meaning that encoding errors raise\n\ +a ValueError. Other possible values are 'ignore' and 'replace'\n\ +as well as any other name registerd with codecs.register_error that is\n\ +able to handle ValueErrors."); + +static PyObject * +codec_decode(PyObject *self, PyObject *args) +{ + char *encoding = NULL; + char *errors = NULL; + PyObject *v; + + if (!PyArg_ParseTuple(args, "O|ss:decode", &v, &encoding, &errors)) + return NULL; + + if (encoding == NULL) + encoding = PyUnicode_GetDefaultEncoding(); + + /* Decode via the codec registry */ + v = PyCodec_Decode(v, encoding, errors); + if (v == NULL) + goto onError; + return v; + + onError: + return NULL; +} + /* --- Helpers ------------------------------------------------------------ */ static @@ -765,10 +831,12 @@ static PyObject *lookup_error(PyObject *self, PyObject *args) /* --- Module API --------------------------------------------------------- */ static PyMethodDef _codecs_functions[] = { - {"register", codecregister, METH_VARARGS, + {"register", codec_register, METH_VARARGS, register__doc__}, - {"lookup", codeclookup, METH_VARARGS, + {"lookup", codec_lookup, METH_VARARGS, lookup__doc__}, + {"encode", codec_encode, METH_VARARGS}, + {"decode", codec_decode, METH_VARARGS}, {"escape_encode", escape_encode, METH_VARARGS}, {"escape_decode", escape_decode, METH_VARARGS}, #ifdef Py_USING_UNICODE