Added support for passing Unicode strings to Expat handlers by default.

This version still includes #ifdef hackery to compile with 1.5.2.
This commit is contained in:
Andrew M. Kuchling 2000-06-27 00:33:30 +00:00
parent f57d7b9e30
commit beba056cd7
1 changed files with 639 additions and 461 deletions

View File

@ -65,6 +65,8 @@ typedef struct {
PyObject_HEAD
XML_Parser itself;
int returns_unicode; /* True if Unicode strings are returned;
if false, UTF-8 strings are returned */
PyObject **handlers;
} xmlparseobject;
@ -73,7 +75,7 @@ staticforward PyTypeObject Xmlparsetype;
typedef void (*xmlhandlersetter)( XML_Parser *self, void *meth );
typedef void* xmlhandler;
struct HandlerInfo{
struct HandlerInfo {
const char *name;
xmlhandlersetter setter;
xmlhandler handler;
@ -81,7 +83,9 @@ struct HandlerInfo{
staticforward struct HandlerInfo handler_info[];
static PyObject *conv_atts( XML_Char **atts){
/* Convert an array of attributes and their values into a Python dict */
static PyObject *conv_atts_using_string( XML_Char **atts){
PyObject *attrs_obj=NULL;
XML_Char **attrs_p, **attrs_k;
int attrs_len;
@ -114,11 +118,98 @@ static PyObject *conv_atts( XML_Char **atts){
return attrs_obj;
}
#if !(PY_MAJOR_VERSION == 1 && PY_MINOR_VERSION < 6)
static PyObject *conv_atts_using_unicode( XML_Char **atts){
PyObject *attrs_obj=NULL;
XML_Char **attrs_p, **attrs_k;
int attrs_len;
if( (attrs_obj = PyDict_New()) == NULL )
goto finally;
for(attrs_len=0, attrs_p = atts;
*attrs_p;
attrs_p++, attrs_len++) {
if (attrs_len%2) {
PyObject *attr_str, *value_str;
const char *p = (const char *) (*attrs_k);
attr_str=PyUnicode_DecodeUTF8(p, strlen(p), "strict");
if (! attr_str) {
Py_DECREF(attrs_obj);
attrs_obj=NULL;
goto finally;
}
p = (const char *) *attrs_p;
value_str=PyUnicode_DecodeUTF8(p, strlen(p), "strict");
if (! value_str) {
Py_DECREF(attrs_obj);
Py_DECREF(attr_str);
attrs_obj=NULL;
goto finally;
}
if (PyDict_SetItem(attrs_obj, attr_str, value_str) < 0) {
Py_DECREF(attrs_obj);
attrs_obj=NULL;
goto finally;
}
Py_DECREF(attr_str);
Py_DECREF(value_str);
}
else attrs_k=attrs_p;
}
finally:
return attrs_obj;
}
/* Convert a string of XML_Chars into a Unicode string.
Returns None if str is a null pointer. */
static PyObject *conv_string_to_unicode( XML_Char *str ) {
/* XXX currently this code assumes that XML_Char is 8-bit,
and hence in UTF-8. */
/* UTF-8 from Expat, Unicode desired */
if (str == NULL) {Py_INCREF(Py_None); return Py_None;}
return PyUnicode_DecodeUTF8( (const char *)str,
strlen( (const char *)str ),
"strict" );
}
static PyObject *conv_string_len_to_unicode( const XML_Char *str, int len ) {
/* XXX currently this code assumes that XML_Char is 8-bit,
and hence in UTF-8. */
/* UTF-8 from Expat, Unicode desired */
if (str == NULL) {Py_INCREF(Py_None); return Py_None;}
return PyUnicode_DecodeUTF8( (const char *)str,
len,
"strict" );
}
#endif
/* Convert a string of XML_Chars into an 8-bit Python string.
Returns None if str is a null pointer. */
static PyObject *conv_string_to_utf8( XML_Char *str ) {
/* XXX currently this code assumes that XML_Char is 8-bit,
and hence in UTF-8. */
/* UTF-8 from Expat, UTF-8 desired */
if (str == NULL) {Py_INCREF(Py_None); return Py_None;}
return PyString_FromString( (const char *)str );
}
static PyObject *conv_string_len_to_utf8( const XML_Char *str,
int len )
{
/* XXX currently this code assumes that XML_Char is 8-bit,
and hence in UTF-8. */
/* UTF-8 from Expat, UTF-8 desired */
if (str == NULL) {Py_INCREF(Py_None); return Py_None;}
return PyString_FromStringAndSize( (const char *)str, len );
}
/* Callback routines */
void clear_handlers( xmlparseobject *self );
static void clear_handlers( xmlparseobject *self );
void flag_error( xmlparseobject *self ){
static void flag_error( xmlparseobject *self ){
clear_handlers(self);
}
@ -147,10 +238,16 @@ static RC my_##NAME##Handler PARAMS {\
return RETURN; \
}
#define NOTHING /**/
#if PY_MAJOR_VERSION == 1 && PY_MINOR_VERSION < 6
#define STRING_CONV_FUNC conv_string_to_utf8
#else
/* Python 1.6 and later versions */
#define STRING_CONV_FUNC (self->returns_unicode \
? conv_string_to_unicode : conv_string_to_utf8)
#endif
#define VOID_HANDLER( NAME, PARAMS, PARAM_FORMAT ) \
RC_HANDLER( void, NAME, PARAMS, NOTHING, PARAM_FORMAT, NOTHING, NOTHING,\
RC_HANDLER( void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
(xmlparseobject *)userData )
#define INT_HANDLER( NAME, PARAMS, PARAM_FORMAT )\
@ -158,23 +255,42 @@ static RC my_##NAME##Handler PARAMS {\
rc = PyInt_AsLong( rv );, rc, \
(xmlparseobject *)userData )
#if PY_MAJOR_VERSION == 1 && PY_MINOR_VERSION < 6
VOID_HANDLER( StartElement,
(void *userData, const XML_Char *name, const XML_Char **atts ),
("(sO&)", name, conv_atts, atts ) )
("(O&O&)", STRING_CONV_FUNC, name,
conv_atts_using_string, atts ) )
#else
/* Python 1.6 and later */
VOID_HANDLER( StartElement,
(void *userData, const XML_Char *name, const XML_Char **atts ),
("(O&O&)", STRING_CONV_FUNC, name,
(self->returns_unicode
? conv_atts_using_unicode
: conv_atts_using_string), atts ) )
#endif
VOID_HANDLER( EndElement,
(void *userData, const XML_Char *name ),
("(s)", name) )
("(O&)", STRING_CONV_FUNC, name) )
VOID_HANDLER( ProcessingInstruction,
(void *userData,
const XML_Char *target,
const XML_Char *data),
("(ss)",target, data ))
("(O&O&)",STRING_CONV_FUNC,target, STRING_CONV_FUNC,data ))
#if PY_MAJOR_VERSION == 1 && PY_MINOR_VERSION < 6
VOID_HANDLER( CharacterData,
(void *userData, const XML_Char *data, int len),
("(s#)", data, len ) )
("(O)", conv_string_len_to_utf8(data,len) ) )
#else
VOID_HANDLER( CharacterData,
(void *userData, const XML_Char *data, int len),
("(O)", (self->returns_unicode
? conv_string_len_to_unicode(data,len)
: conv_string_len_to_utf8(data,len) ) ) )
#endif
VOID_HANDLER( UnparsedEntityDecl,
(void *userData,
@ -183,7 +299,10 @@ VOID_HANDLER( UnparsedEntityDecl,
const XML_Char *systemId,
const XML_Char *publicId,
const XML_Char *notationName),
("(sssss)", entityName, base, systemId, publicId, notationName))
("(O&O&O&O&O&)",
STRING_CONV_FUNC,entityName, STRING_CONV_FUNC,base,
STRING_CONV_FUNC,systemId, STRING_CONV_FUNC,publicId,
STRING_CONV_FUNC,notationName))
VOID_HANDLER( NotationDecl,
(void *userData,
@ -191,22 +310,24 @@ VOID_HANDLER( NotationDecl,
const XML_Char *base,
const XML_Char *systemId,
const XML_Char *publicId),
("(ssss)", notationName, base, systemId, publicId))
("(O&O&O&O&)",
STRING_CONV_FUNC,notationName, STRING_CONV_FUNC,base,
STRING_CONV_FUNC,systemId, STRING_CONV_FUNC,publicId))
VOID_HANDLER( StartNamespaceDecl,
(void *userData,
const XML_Char *prefix,
const XML_Char *uri),
("(ss)", prefix, uri ))
("(O&O&)", STRING_CONV_FUNC,prefix, STRING_CONV_FUNC,uri ))
VOID_HANDLER( EndNamespaceDecl,
(void *userData,
const XML_Char *prefix),
("(s)", prefix ))
("(O&)", STRING_CONV_FUNC,prefix ))
VOID_HANDLER( Comment,
(void *userData, const XML_Char *prefix),
("(s)", prefix))
("(O&)", STRING_CONV_FUNC,prefix))
VOID_HANDLER( StartCdataSection,
(void *userData),
@ -216,13 +337,27 @@ VOID_HANDLER( EndCdataSection,
(void *userData),
("()" ))
#if PY_MAJOR_VERSION == 1 && PY_MINOR_VERSION < 6
VOID_HANDLER( Default,
(void *userData, const XML_Char *s, int len),
("(s#)",s,len))
("(O)", conv_string_len_to_utf8(s,len) ) )
VOID_HANDLER( DefaultHandlerExpand,
(void *userData, const XML_Char *s, int len),
("(s#)",s,len))
("(O)", conv_string_len_to_utf8(s,len) ) )
#else
VOID_HANDLER( Default,
(void *userData, const XML_Char *s, int len),
("(O)", (self->returns_unicode
? conv_string_len_to_unicode(s,len)
: conv_string_len_to_utf8(s,len) ) ) )
VOID_HANDLER( DefaultHandlerExpand,
(void *userData, const XML_Char *s, int len),
("(O)", (self->returns_unicode
? conv_string_len_to_unicode(s,len)
: conv_string_len_to_utf8(s,len) ) ) )
#endif
INT_HANDLER( NotStandalone,
(void *userData),
@ -235,7 +370,9 @@ RC_HANDLER( int, ExternalEntityRef,
const XML_Char *systemId,
const XML_Char *publicId),
int rc=0;,
("(ssss)", context, base, systemId, publicId ),
("(O&O&O&O&)",
STRING_CONV_FUNC,context, STRING_CONV_FUNC,base,
STRING_CONV_FUNC,systemId, STRING_CONV_FUNC,publicId ),
rc = PyInt_AsLong( rv );, rc,
XML_GetUserData( parser ) )
@ -383,7 +520,7 @@ xmlparse_ParseFile( xmlparseobject *self, PyObject *args )
int bytes_read;
void *buf = XML_GetBuffer(self->itself, BUF_SIZE);
if (buf == NULL) {
/* FIXME: throw exception for no memory */
PyErr_SetString(PyExc_MemoryError, "out of memory");
return NULL;
}
@ -471,10 +608,20 @@ newxmlparseobject( char *encoding, char *namespace_separator){
int i;
xmlparseobject *self;
#if PY_MAJOR_VERSION == 1 && PY_MINOR_VERSION < 6
self = PyObject_NEW(xmlparseobject, &Xmlparsetype);
if (self == NULL)
return NULL;
self->returns_unicode = 0;
#else
/* Code for versions 1.6 and later */
self = PyObject_New(xmlparseobject, &Xmlparsetype);
if (self == NULL)
return NULL;
self->returns_unicode = 1;
#endif
if (namespace_separator) {
self->itself = XML_ParserCreateNS(encoding,
*namespace_separator);
@ -512,7 +659,13 @@ xmlparse_dealloc( xmlparseobject *self )
for( i=0; handler_info[i].name!=NULL; i++ ){
Py_XDECREF( self->handlers[i] );
}
#if PY_MAJOR_VERSION == 1 && PY_MINOR_VERSION < 6
/* Code for versions before 1.6 */
free(self);
#else
/* Code for versions 1.6 and later */
PyObject_Del(self);
#endif
}
static int handlername2int( const char *name ){
@ -541,6 +694,8 @@ xmlparse_getattr(xmlparseobject *self, char *name)
if (strcmp(name, "ErrorByteIndex") == 0)
return Py_BuildValue("l",
XML_GetErrorByteIndex(self->itself));
if (strcmp(name, "returns_unicode") == 0)
return Py_BuildValue("i", self->returns_unicode);
handlernum=handlername2int( name );
@ -590,6 +745,22 @@ xmlparse_setattr( xmlparseobject *self, char *name, PyObject *v)
return -1;
}
if (strcmp(name, "returns_unicode") == 0) {
PyObject *intobj = PyNumber_Int(v);
if (intobj == NULL) return -1;
if (PyInt_AsLong(intobj)) {
#if PY_MAJOR_VERSION == 1 && PY_MINOR_VERSION < 6
PyErr_SetString(PyExc_ValueError,
"Cannot return Unicode strings in Python 1.5");
return -1;
#else
self->returns_unicode = 1;
#endif
}
else self->returns_unicode = 0;
Py_DECREF(intobj);
return 0;
}
if( sethandler( self, name, v ) ){
return 0;
}
@ -673,13 +844,14 @@ static struct PyMethodDef pyexpat_methods[] = {
{NULL, (PyCFunction)NULL, 0, NULL} /* sentinel */
};
/* Initialization function for the module (*must* be called initpyexpat) */
/* Module docstring */
static char pyexpat_module_documentation[] =
"Python wrapper for Expat parser."
;
/* Initialization function for the module */
void
initpyexpat(){
PyObject *m, *d;
@ -705,6 +877,13 @@ initpyexpat(){
errors_module=PyModule_New( "errors" );
PyDict_SetItemString(d,"errors", errors_module );
/* XXX When Expat supports some way of figuring out how it was
compiled, this should check and set native_encoding
appropriately.
*/
PyDict_SetItemString(d, "native_encoding",
PyString_FromString("UTF-8") );
errors_dict=PyModule_GetDict( errors_module );
#define MYCONST(name) \
@ -817,7 +996,7 @@ void pyxml_SetEndCdataSection( XML_Parser *parser,
(pairsetter)XML_SetCdataSectionHandler);
}
static struct HandlerInfo handler_info[]=
statichere struct HandlerInfo handler_info[]=
{{"StartElementHandler",
pyxml_SetStartElementHandler,
my_StartElementHandler},
@ -867,4 +1046,3 @@ static struct HandlerInfo handler_info[]=
{NULL, NULL, NULL } /* sentinel */
};