cpython/Python/codecs.c

383 lines
8.5 KiB
C
Raw Normal View History

/* ------------------------------------------------------------------------
Python Codec Registry and support functions
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
------------------------------------------------------------------------ */
#include "Python.h"
#include <ctype.h>
/* --- Globals ------------------------------------------------------------ */
static PyObject *_PyCodec_SearchPath;
static PyObject *_PyCodec_SearchCache;
/* Flag used for lazy import of the standard encodings package */
static int import_encodings_called = 0;
/* --- Codec Registry ----------------------------------------------------- */
/* Import the standard encodings package which will register the first
codec search function.
This is done in a lazy way so that the Unicode implementation does
not downgrade startup time of scripts not needing it.
Errors are silently ignored by this function. Only one try is made.
*/
static
void import_encodings()
{
PyObject *mod;
import_encodings_called = 1;
mod = PyImport_ImportModule("encodings");
if (mod == NULL) {
PyErr_Clear();
return;
}
Py_DECREF(mod);
}
/* Register a new codec search function.
The search_function's refcount is incremented by this function. */
int PyCodec_Register(PyObject *search_function)
{
if (!import_encodings_called)
import_encodings();
if (search_function == NULL) {
PyErr_BadArgument();
return -1;
}
if (!PyCallable_Check(search_function)) {
PyErr_SetString(PyExc_TypeError,
"argument must be callable");
return -1;
}
return PyList_Append(_PyCodec_SearchPath, search_function);
}
static
PyObject *lowercasestring(const char *string)
{
register int i;
int len = strlen(string);
char *p;
PyObject *v;
v = PyString_FromStringAndSize(NULL, len);
if (v == NULL)
return NULL;
p = PyString_AS_STRING(v);
for (i = 0; i < len; i++)
p[i] = tolower(string[i]);
return v;
}
/* Lookup the given encoding and return a tuple providing the codec
facilities.
The encoding string is looked up converted to all lower-case
characters. This makes encodings looked up through this mechanism
effectively case-insensitive.
If no codec is found, a KeyError is set and NULL returned. */
PyObject *_PyCodec_Lookup(const char *encoding)
{
PyObject *result, *args = NULL, *v;
int i, len;
if (!import_encodings_called)
import_encodings();
/* Convert the encoding to a lower-cased Python string */
v = lowercasestring(encoding);
if (v == NULL)
goto onError;
PyString_InternInPlace(&v);
/* First, try to lookup the name in the registry dictionary */
result = PyDict_GetItem(_PyCodec_SearchCache, v);
if (result != NULL) {
Py_INCREF(result);
return result;
}
/* Next, scan the search functions in order of registration */
len = PyList_Size(_PyCodec_SearchPath);
if (len < 0)
goto onError;
args = PyTuple_New(1);
if (args == NULL)
goto onError;
PyTuple_SET_ITEM(args,0,v);
for (i = 0; i < len; i++) {
PyObject *func;
func = PyList_GetItem(_PyCodec_SearchPath, i);
if (func == NULL)
goto onError;
result = PyEval_CallObject(func,args);
if (result == NULL)
goto onError;
if (result == Py_None) {
Py_DECREF(result);
continue;
}
if (!PyTuple_Check(result) || PyTuple_GET_SIZE(result) != 4) {
PyErr_SetString(PyExc_TypeError,
"codec search functions must return 4-tuples");
Py_DECREF(result);
goto onError;
}
break;
}
if (i == len) {
/* XXX Perhaps we should cache misses too ? */
PyErr_SetString(PyExc_LookupError,
"unkown encoding");
goto onError;
}
/* Cache and return the result */
PyDict_SetItem(_PyCodec_SearchCache, v, result);
Py_DECREF(args);
return result;
onError:
Py_XDECREF(args);
return NULL;
}
static
PyObject *args_tuple(PyObject *object,
const char *errors)
{
PyObject *args;
args = PyTuple_New(1 + (errors != NULL));
if (args == NULL)
return NULL;
Py_INCREF(object);
PyTuple_SET_ITEM(args,0,object);
if (errors) {
PyObject *v;
v = PyString_FromString(errors);
if (v == NULL) {
Py_DECREF(args);
return NULL;
}
PyTuple_SET_ITEM(args, 1, v);
}
return args;
}
/* Build a codec by calling factory(stream[,errors]) or just
factory(errors) depending on whether the given parameters are
non-NULL. */
static
PyObject *build_stream_codec(PyObject *factory,
PyObject *stream,
const char *errors)
{
PyObject *args, *codec;
args = args_tuple(stream, errors);
if (args == NULL)
return NULL;
codec = PyEval_CallObject(factory, args);
Py_DECREF(args);
return codec;
}
/* Convenience APIs to query the Codec registry.
All APIs return a codec object with incremented refcount.
*/
PyObject *PyCodec_Encoder(const char *encoding)
{
PyObject *codecs;
PyObject *v;
codecs = _PyCodec_Lookup(encoding);
if (codecs == NULL)
goto onError;
v = PyTuple_GET_ITEM(codecs,0);
Py_INCREF(v);
return v;
onError:
return NULL;
}
PyObject *PyCodec_Decoder(const char *encoding)
{
PyObject *codecs;
PyObject *v;
codecs = _PyCodec_Lookup(encoding);
if (codecs == NULL)
goto onError;
v = PyTuple_GET_ITEM(codecs,1);
Py_INCREF(v);
return v;
onError:
return NULL;
}
PyObject *PyCodec_StreamReader(const char *encoding,
PyObject *stream,
const char *errors)
{
PyObject *codecs;
codecs = _PyCodec_Lookup(encoding);
if (codecs == NULL)
goto onError;
return build_stream_codec(PyTuple_GET_ITEM(codecs,2),stream,errors);
onError:
return NULL;
}
PyObject *PyCodec_StreamWriter(const char *encoding,
PyObject *stream,
const char *errors)
{
PyObject *codecs;
codecs = _PyCodec_Lookup(encoding);
if (codecs == NULL)
goto onError;
return build_stream_codec(PyTuple_GET_ITEM(codecs,3),stream,errors);
onError:
return NULL;
}
/* Encode an object (e.g. an Unicode object) using the given encoding
and return the resulting encoded object (usually a Python string).
errors is passed to the encoder factory as argument if non-NULL. */
PyObject *PyCodec_Encode(PyObject *object,
const char *encoding,
const char *errors)
{
PyObject *encoder = NULL;
PyObject *args = NULL, *result;
PyObject *v;
encoder = PyCodec_Encoder(encoding);
if (encoder == NULL)
goto onError;
args = args_tuple(object, errors);
if (args == NULL)
goto onError;
result = PyEval_CallObject(encoder,args);
if (result == NULL)
goto onError;
if (!PyTuple_Check(result) ||
PyTuple_GET_SIZE(result) != 2) {
PyErr_SetString(PyExc_TypeError,
"encoder must return a tuple (object,integer)");
goto onError;
}
v = PyTuple_GET_ITEM(result,0);
Py_INCREF(v);
/* We don't check or use the second (integer) entry. */
Py_DECREF(args);
Py_DECREF(encoder);
Py_DECREF(result);
return v;
onError:
Py_XDECREF(args);
Py_XDECREF(encoder);
return NULL;
}
/* Decode an object (usually a Python string) using the given encoding
and return an equivalent object (e.g. an Unicode object).
errors is passed to the decoder factory as argument if non-NULL. */
PyObject *PyCodec_Decode(PyObject *object,
const char *encoding,
const char *errors)
{
PyObject *decoder = NULL;
PyObject *args = NULL, *result = NULL;
PyObject *v;
decoder = PyCodec_Decoder(encoding);
if (decoder == NULL)
goto onError;
args = args_tuple(object, errors);
if (args == NULL)
goto onError;
result = PyEval_CallObject(decoder,args);
if (result == NULL)
goto onError;
if (!PyTuple_Check(result) ||
PyTuple_GET_SIZE(result) != 2) {
PyErr_SetString(PyExc_TypeError,
"decoder must return a tuple (object,integer)");
goto onError;
}
v = PyTuple_GET_ITEM(result,0);
Py_INCREF(v);
/* We don't check or use the second (integer) entry. */
Py_DECREF(args);
Py_DECREF(decoder);
Py_DECREF(result);
return v;
onError:
Py_XDECREF(args);
Py_XDECREF(decoder);
Py_XDECREF(result);
return NULL;
}
void _PyCodecRegistry_Init()
{
if (_PyCodec_SearchPath == NULL)
_PyCodec_SearchPath = PyList_New(0);
if (_PyCodec_SearchCache == NULL)
_PyCodec_SearchCache = PyDict_New();
if (_PyCodec_SearchPath == NULL ||
_PyCodec_SearchCache == NULL)
Py_FatalError("can't intialize codec registry");
}
void _PyCodecRegistry_Fini()
{
Py_XDECREF(_PyCodec_SearchPath);
Py_XDECREF(_PyCodec_SearchCache);
}