/* ------------------------------------------------------------------------ Python Codec Registry and support functions Written by Marc-Andre Lemburg (mal@lemburg.com). (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. ------------------------------------------------------------------------ */ #include "Python.h" #include /* --- Globals ------------------------------------------------------------ */ static PyObject *_PyCodec_SearchPath; static PyObject *_PyCodec_SearchCache; /* Flag used for lazy import of the standard encodings package */ static int import_encodings_called = 0; /* --- Codec Registry ----------------------------------------------------- */ /* Import the standard encodings package which will register the first codec search function. This is done in a lazy way so that the Unicode implementation does not downgrade startup time of scripts not needing it. Errors are silently ignored by this function. Only one try is made. */ static void import_encodings() { PyObject *mod; import_encodings_called = 1; mod = PyImport_ImportModule("encodings"); if (mod == NULL) { PyErr_Clear(); return; } Py_DECREF(mod); } /* Register a new codec search function. The search_function's refcount is incremented by this function. */ int PyCodec_Register(PyObject *search_function) { if (!import_encodings_called) import_encodings(); if (search_function == NULL) { PyErr_BadArgument(); return -1; } if (!PyCallable_Check(search_function)) { PyErr_SetString(PyExc_TypeError, "argument must be callable"); return -1; } return PyList_Append(_PyCodec_SearchPath, search_function); } static PyObject *lowercasestring(const char *string) { register int i; int len = strlen(string); char *p; PyObject *v; v = PyString_FromStringAndSize(NULL, len); if (v == NULL) return NULL; p = PyString_AS_STRING(v); for (i = 0; i < len; i++) p[i] = tolower(string[i]); return v; } /* Lookup the given encoding and return a tuple providing the codec facilities. The encoding string is looked up converted to all lower-case characters. This makes encodings looked up through this mechanism effectively case-insensitive. If no codec is found, a KeyError is set and NULL returned. */ PyObject *_PyCodec_Lookup(const char *encoding) { PyObject *result, *args = NULL, *v; int i, len; if (_PyCodec_SearchCache == NULL || _PyCodec_SearchPath == NULL) { PyErr_SetString(PyExc_SystemError, "codec module not properly initialized"); goto onError; } if (!import_encodings_called) import_encodings(); /* Convert the encoding to a lower-cased Python string */ v = lowercasestring(encoding); if (v == NULL) goto onError; PyString_InternInPlace(&v); /* First, try to lookup the name in the registry dictionary */ result = PyDict_GetItem(_PyCodec_SearchCache, v); if (result != NULL) { Py_INCREF(result); Py_DECREF(v); return result; } /* Next, scan the search functions in order of registration */ args = PyTuple_New(1); if (args == NULL) goto onError; PyTuple_SET_ITEM(args,0,v); len = PyList_Size(_PyCodec_SearchPath); if (len < 0) goto onError; for (i = 0; i < len; i++) { PyObject *func; func = PyList_GetItem(_PyCodec_SearchPath, i); if (func == NULL) goto onError; result = PyEval_CallObject(func, args); if (result == NULL) goto onError; if (result == Py_None) { Py_DECREF(result); continue; } if (!PyTuple_Check(result) || PyTuple_GET_SIZE(result) != 4) { PyErr_SetString(PyExc_TypeError, "codec search functions must return 4-tuples"); Py_DECREF(result); goto onError; } break; } if (i == len) { /* XXX Perhaps we should cache misses too ? */ PyErr_SetString(PyExc_LookupError, "unknown encoding"); goto onError; } /* Cache and return the result */ PyDict_SetItem(_PyCodec_SearchCache, v, result); Py_DECREF(args); return result; onError: Py_XDECREF(args); return NULL; } static PyObject *args_tuple(PyObject *object, const char *errors) { PyObject *args; args = PyTuple_New(1 + (errors != NULL)); if (args == NULL) return NULL; Py_INCREF(object); PyTuple_SET_ITEM(args,0,object); if (errors) { PyObject *v; v = PyString_FromString(errors); if (v == NULL) { Py_DECREF(args); return NULL; } PyTuple_SET_ITEM(args, 1, v); } return args; } /* Build a codec by calling factory(stream[,errors]) or just factory(errors) depending on whether the given parameters are non-NULL. */ static PyObject *build_stream_codec(PyObject *factory, PyObject *stream, const char *errors) { PyObject *args, *codec; args = args_tuple(stream, errors); if (args == NULL) return NULL; codec = PyEval_CallObject(factory, args); Py_DECREF(args); return codec; } /* Convenience APIs to query the Codec registry. All APIs return a codec object with incremented refcount. */ PyObject *PyCodec_Encoder(const char *encoding) { PyObject *codecs; PyObject *v; codecs = _PyCodec_Lookup(encoding); if (codecs == NULL) goto onError; v = PyTuple_GET_ITEM(codecs,0); Py_INCREF(v); return v; onError: return NULL; } PyObject *PyCodec_Decoder(const char *encoding) { PyObject *codecs; PyObject *v; codecs = _PyCodec_Lookup(encoding); if (codecs == NULL) goto onError; v = PyTuple_GET_ITEM(codecs,1); Py_INCREF(v); return v; onError: return NULL; } PyObject *PyCodec_StreamReader(const char *encoding, PyObject *stream, const char *errors) { PyObject *codecs; codecs = _PyCodec_Lookup(encoding); if (codecs == NULL) goto onError; return build_stream_codec(PyTuple_GET_ITEM(codecs,2),stream,errors); onError: return NULL; } PyObject *PyCodec_StreamWriter(const char *encoding, PyObject *stream, const char *errors) { PyObject *codecs; codecs = _PyCodec_Lookup(encoding); if (codecs == NULL) goto onError; return build_stream_codec(PyTuple_GET_ITEM(codecs,3),stream,errors); onError: return NULL; } /* Encode an object (e.g. an Unicode object) using the given encoding and return the resulting encoded object (usually a Python string). errors is passed to the encoder factory as argument if non-NULL. */ PyObject *PyCodec_Encode(PyObject *object, const char *encoding, const char *errors) { PyObject *encoder = NULL; PyObject *args = NULL, *result; PyObject *v; encoder = PyCodec_Encoder(encoding); if (encoder == NULL) goto onError; args = args_tuple(object, errors); if (args == NULL) goto onError; result = PyEval_CallObject(encoder,args); if (result == NULL) goto onError; if (!PyTuple_Check(result) || PyTuple_GET_SIZE(result) != 2) { PyErr_SetString(PyExc_TypeError, "encoder must return a tuple (object,integer)"); goto onError; } v = PyTuple_GET_ITEM(result,0); Py_INCREF(v); /* We don't check or use the second (integer) entry. */ Py_DECREF(args); Py_DECREF(encoder); Py_DECREF(result); return v; onError: Py_XDECREF(args); Py_XDECREF(encoder); return NULL; } /* Decode an object (usually a Python string) using the given encoding and return an equivalent object (e.g. an Unicode object). errors is passed to the decoder factory as argument if non-NULL. */ PyObject *PyCodec_Decode(PyObject *object, const char *encoding, const char *errors) { PyObject *decoder = NULL; PyObject *args = NULL, *result = NULL; PyObject *v; decoder = PyCodec_Decoder(encoding); if (decoder == NULL) goto onError; args = args_tuple(object, errors); if (args == NULL) goto onError; result = PyEval_CallObject(decoder,args); if (result == NULL) goto onError; if (!PyTuple_Check(result) || PyTuple_GET_SIZE(result) != 2) { PyErr_SetString(PyExc_TypeError, "decoder must return a tuple (object,integer)"); goto onError; } v = PyTuple_GET_ITEM(result,0); Py_INCREF(v); /* We don't check or use the second (integer) entry. */ Py_DECREF(args); Py_DECREF(decoder); Py_DECREF(result); return v; onError: Py_XDECREF(args); Py_XDECREF(decoder); Py_XDECREF(result); return NULL; } void _PyCodecRegistry_Init() { if (_PyCodec_SearchPath == NULL) _PyCodec_SearchPath = PyList_New(0); if (_PyCodec_SearchCache == NULL) _PyCodec_SearchCache = PyDict_New(); if (_PyCodec_SearchPath == NULL || _PyCodec_SearchCache == NULL) Py_FatalError("can't intialize codec registry"); } void _PyCodecRegistry_Fini() { Py_XDECREF(_PyCodec_SearchPath); _PyCodec_SearchPath = NULL; Py_XDECREF(_PyCodec_SearchCache); _PyCodec_SearchCache = NULL; }