Python Codec Registry and support functions, written by Marc-Andre
Lemburg.
This commit is contained in:
parent
86016cb482
commit
feee4b994f
|
@ -0,0 +1,382 @@
|
|||
/* ------------------------------------------------------------------------
|
||||
|
||||
Python Codec Registry and support functions
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
|
||||
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
------------------------------------------------------------------------ */
|
||||
|
||||
#include "Python.h"
|
||||
#include <ctype.h>
|
||||
|
||||
/* --- Globals ------------------------------------------------------------ */
|
||||
|
||||
static PyObject *_PyCodec_SearchPath;
|
||||
static PyObject *_PyCodec_SearchCache;
|
||||
|
||||
/* Flag used for lazy import of the standard encodings package */
|
||||
static int import_encodings_called = 0;
|
||||
|
||||
/* --- Codec Registry ----------------------------------------------------- */
|
||||
|
||||
/* Import the standard encodings package which will register the first
|
||||
codec search function.
|
||||
|
||||
This is done in a lazy way so that the Unicode implementation does
|
||||
not downgrade startup time of scripts not needing it.
|
||||
|
||||
Errors are silently ignored by this function. Only one try is made.
|
||||
|
||||
*/
|
||||
|
||||
static
|
||||
void import_encodings()
|
||||
{
|
||||
PyObject *mod;
|
||||
|
||||
import_encodings_called = 1;
|
||||
mod = PyImport_ImportModule("encodings");
|
||||
if (mod == NULL) {
|
||||
PyErr_Clear();
|
||||
return;
|
||||
}
|
||||
Py_DECREF(mod);
|
||||
}
|
||||
|
||||
/* Register a new codec search function.
|
||||
|
||||
The search_function's refcount is incremented by this function. */
|
||||
|
||||
int PyCodec_Register(PyObject *search_function)
|
||||
{
|
||||
if (!import_encodings_called)
|
||||
import_encodings();
|
||||
if (search_function == NULL) {
|
||||
PyErr_BadArgument();
|
||||
return -1;
|
||||
}
|
||||
if (!PyCallable_Check(search_function)) {
|
||||
PyErr_SetString(PyExc_TypeError,
|
||||
"argument must be callable");
|
||||
return -1;
|
||||
}
|
||||
return PyList_Append(_PyCodec_SearchPath, search_function);
|
||||
}
|
||||
|
||||
static
|
||||
PyObject *lowercasestring(const char *string)
|
||||
{
|
||||
register int i;
|
||||
int len = strlen(string);
|
||||
char *p;
|
||||
PyObject *v;
|
||||
|
||||
v = PyString_FromStringAndSize(NULL, len);
|
||||
if (v == NULL)
|
||||
return NULL;
|
||||
p = PyString_AS_STRING(v);
|
||||
for (i = 0; i < len; i++)
|
||||
p[i] = tolower(string[i]);
|
||||
return v;
|
||||
}
|
||||
|
||||
/* Lookup the given encoding and return a tuple providing the codec
|
||||
facilities.
|
||||
|
||||
The encoding string is looked up converted to all lower-case
|
||||
characters. This makes encodings looked up through this mechanism
|
||||
effectively case-insensitive.
|
||||
|
||||
If no codec is found, a KeyError is set and NULL returned. */
|
||||
|
||||
PyObject *_PyCodec_Lookup(const char *encoding)
|
||||
{
|
||||
PyObject *result, *args = NULL, *v;
|
||||
int i, len;
|
||||
|
||||
if (!import_encodings_called)
|
||||
import_encodings();
|
||||
|
||||
/* Convert the encoding to a lower-cased Python string */
|
||||
v = lowercasestring(encoding);
|
||||
if (v == NULL)
|
||||
goto onError;
|
||||
PyString_InternInPlace(&v);
|
||||
|
||||
/* First, try to lookup the name in the registry dictionary */
|
||||
result = PyDict_GetItem(_PyCodec_SearchCache, v);
|
||||
if (result != NULL) {
|
||||
Py_INCREF(result);
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Next, scan the search functions in order of registration */
|
||||
len = PyList_Size(_PyCodec_SearchPath);
|
||||
if (len < 0)
|
||||
goto onError;
|
||||
|
||||
args = PyTuple_New(1);
|
||||
if (args == NULL)
|
||||
goto onError;
|
||||
PyTuple_SET_ITEM(args,0,v);
|
||||
|
||||
for (i = 0; i < len; i++) {
|
||||
PyObject *func;
|
||||
|
||||
func = PyList_GetItem(_PyCodec_SearchPath, i);
|
||||
if (func == NULL)
|
||||
goto onError;
|
||||
result = PyEval_CallObject(func,args);
|
||||
if (result == NULL)
|
||||
goto onError;
|
||||
if (result == Py_None) {
|
||||
Py_DECREF(result);
|
||||
continue;
|
||||
}
|
||||
if (!PyTuple_Check(result) || PyTuple_GET_SIZE(result) != 4) {
|
||||
PyErr_SetString(PyExc_TypeError,
|
||||
"codec search functions must return 4-tuples");
|
||||
Py_DECREF(result);
|
||||
goto onError;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (i == len) {
|
||||
/* XXX Perhaps we should cache misses too ? */
|
||||
PyErr_SetString(PyExc_LookupError,
|
||||
"unkown encoding");
|
||||
goto onError;
|
||||
}
|
||||
|
||||
/* Cache and return the result */
|
||||
PyDict_SetItem(_PyCodec_SearchCache, v, result);
|
||||
Py_DECREF(args);
|
||||
return result;
|
||||
|
||||
onError:
|
||||
Py_XDECREF(args);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static
|
||||
PyObject *args_tuple(PyObject *object,
|
||||
const char *errors)
|
||||
{
|
||||
PyObject *args;
|
||||
|
||||
args = PyTuple_New(1 + (errors != NULL));
|
||||
if (args == NULL)
|
||||
return NULL;
|
||||
Py_INCREF(object);
|
||||
PyTuple_SET_ITEM(args,0,object);
|
||||
if (errors) {
|
||||
PyObject *v;
|
||||
|
||||
v = PyString_FromString(errors);
|
||||
if (v == NULL) {
|
||||
Py_DECREF(args);
|
||||
return NULL;
|
||||
}
|
||||
PyTuple_SET_ITEM(args, 1, v);
|
||||
}
|
||||
return args;
|
||||
}
|
||||
|
||||
/* Build a codec by calling factory(stream[,errors]) or just
|
||||
factory(errors) depending on whether the given parameters are
|
||||
non-NULL. */
|
||||
|
||||
static
|
||||
PyObject *build_stream_codec(PyObject *factory,
|
||||
PyObject *stream,
|
||||
const char *errors)
|
||||
{
|
||||
PyObject *args, *codec;
|
||||
|
||||
args = args_tuple(stream, errors);
|
||||
if (args == NULL)
|
||||
return NULL;
|
||||
|
||||
codec = PyEval_CallObject(factory, args);
|
||||
Py_DECREF(args);
|
||||
return codec;
|
||||
}
|
||||
|
||||
/* Convenience APIs to query the Codec registry.
|
||||
|
||||
All APIs return a codec object with incremented refcount.
|
||||
|
||||
*/
|
||||
|
||||
PyObject *PyCodec_Encoder(const char *encoding)
|
||||
{
|
||||
PyObject *codecs;
|
||||
PyObject *v;
|
||||
|
||||
codecs = _PyCodec_Lookup(encoding);
|
||||
if (codecs == NULL)
|
||||
goto onError;
|
||||
v = PyTuple_GET_ITEM(codecs,0);
|
||||
Py_INCREF(v);
|
||||
return v;
|
||||
|
||||
onError:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
PyObject *PyCodec_Decoder(const char *encoding)
|
||||
{
|
||||
PyObject *codecs;
|
||||
PyObject *v;
|
||||
|
||||
codecs = _PyCodec_Lookup(encoding);
|
||||
if (codecs == NULL)
|
||||
goto onError;
|
||||
v = PyTuple_GET_ITEM(codecs,1);
|
||||
Py_INCREF(v);
|
||||
return v;
|
||||
|
||||
onError:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
PyObject *PyCodec_StreamReader(const char *encoding,
|
||||
PyObject *stream,
|
||||
const char *errors)
|
||||
{
|
||||
PyObject *codecs;
|
||||
|
||||
codecs = _PyCodec_Lookup(encoding);
|
||||
if (codecs == NULL)
|
||||
goto onError;
|
||||
return build_stream_codec(PyTuple_GET_ITEM(codecs,2),stream,errors);
|
||||
|
||||
onError:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
PyObject *PyCodec_StreamWriter(const char *encoding,
|
||||
PyObject *stream,
|
||||
const char *errors)
|
||||
{
|
||||
PyObject *codecs;
|
||||
|
||||
codecs = _PyCodec_Lookup(encoding);
|
||||
if (codecs == NULL)
|
||||
goto onError;
|
||||
return build_stream_codec(PyTuple_GET_ITEM(codecs,3),stream,errors);
|
||||
|
||||
onError:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Encode an object (e.g. an Unicode object) using the given encoding
|
||||
and return the resulting encoded object (usually a Python string).
|
||||
|
||||
errors is passed to the encoder factory as argument if non-NULL. */
|
||||
|
||||
PyObject *PyCodec_Encode(PyObject *object,
|
||||
const char *encoding,
|
||||
const char *errors)
|
||||
{
|
||||
PyObject *encoder = NULL;
|
||||
PyObject *args = NULL, *result;
|
||||
PyObject *v;
|
||||
|
||||
encoder = PyCodec_Encoder(encoding);
|
||||
if (encoder == NULL)
|
||||
goto onError;
|
||||
|
||||
args = args_tuple(object, errors);
|
||||
if (args == NULL)
|
||||
goto onError;
|
||||
|
||||
result = PyEval_CallObject(encoder,args);
|
||||
if (result == NULL)
|
||||
goto onError;
|
||||
|
||||
if (!PyTuple_Check(result) ||
|
||||
PyTuple_GET_SIZE(result) != 2) {
|
||||
PyErr_SetString(PyExc_TypeError,
|
||||
"encoder must return a tuple (object,integer)");
|
||||
goto onError;
|
||||
}
|
||||
v = PyTuple_GET_ITEM(result,0);
|
||||
Py_INCREF(v);
|
||||
/* We don't check or use the second (integer) entry. */
|
||||
|
||||
Py_DECREF(args);
|
||||
Py_DECREF(encoder);
|
||||
Py_DECREF(result);
|
||||
return v;
|
||||
|
||||
onError:
|
||||
Py_XDECREF(args);
|
||||
Py_XDECREF(encoder);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Decode an object (usually a Python string) using the given encoding
|
||||
and return an equivalent object (e.g. an Unicode object).
|
||||
|
||||
errors is passed to the decoder factory as argument if non-NULL. */
|
||||
|
||||
PyObject *PyCodec_Decode(PyObject *object,
|
||||
const char *encoding,
|
||||
const char *errors)
|
||||
{
|
||||
PyObject *decoder = NULL;
|
||||
PyObject *args = NULL, *result = NULL;
|
||||
PyObject *v;
|
||||
|
||||
decoder = PyCodec_Decoder(encoding);
|
||||
if (decoder == NULL)
|
||||
goto onError;
|
||||
|
||||
args = args_tuple(object, errors);
|
||||
if (args == NULL)
|
||||
goto onError;
|
||||
|
||||
result = PyEval_CallObject(decoder,args);
|
||||
if (result == NULL)
|
||||
goto onError;
|
||||
if (!PyTuple_Check(result) ||
|
||||
PyTuple_GET_SIZE(result) != 2) {
|
||||
PyErr_SetString(PyExc_TypeError,
|
||||
"decoder must return a tuple (object,integer)");
|
||||
goto onError;
|
||||
}
|
||||
v = PyTuple_GET_ITEM(result,0);
|
||||
Py_INCREF(v);
|
||||
/* We don't check or use the second (integer) entry. */
|
||||
|
||||
Py_DECREF(args);
|
||||
Py_DECREF(decoder);
|
||||
Py_DECREF(result);
|
||||
return v;
|
||||
|
||||
onError:
|
||||
Py_XDECREF(args);
|
||||
Py_XDECREF(decoder);
|
||||
Py_XDECREF(result);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void _PyCodecRegistry_Init()
|
||||
{
|
||||
if (_PyCodec_SearchPath == NULL)
|
||||
_PyCodec_SearchPath = PyList_New(0);
|
||||
if (_PyCodec_SearchCache == NULL)
|
||||
_PyCodec_SearchCache = PyDict_New();
|
||||
if (_PyCodec_SearchPath == NULL ||
|
||||
_PyCodec_SearchCache == NULL)
|
||||
Py_FatalError("can't intialize codec registry");
|
||||
}
|
||||
|
||||
void _PyCodecRegistry_Fini()
|
||||
{
|
||||
Py_XDECREF(_PyCodec_SearchPath);
|
||||
Py_XDECREF(_PyCodec_SearchCache);
|
||||
}
|
Loading…
Reference in New Issue