Added support for passing Unicode strings to Expat handlers by default.
This version still includes #ifdef hackery to compile with 1.5.2.
This commit is contained in:
parent
f57d7b9e30
commit
beba056cd7
|
@ -65,6 +65,8 @@ typedef struct {
|
|||
PyObject_HEAD
|
||||
|
||||
XML_Parser itself;
|
||||
int returns_unicode; /* True if Unicode strings are returned;
|
||||
if false, UTF-8 strings are returned */
|
||||
PyObject **handlers;
|
||||
} xmlparseobject;
|
||||
|
||||
|
@ -73,7 +75,7 @@ staticforward PyTypeObject Xmlparsetype;
|
|||
typedef void (*xmlhandlersetter)( XML_Parser *self, void *meth );
|
||||
typedef void* xmlhandler;
|
||||
|
||||
struct HandlerInfo{
|
||||
struct HandlerInfo {
|
||||
const char *name;
|
||||
xmlhandlersetter setter;
|
||||
xmlhandler handler;
|
||||
|
@ -81,7 +83,9 @@ struct HandlerInfo{
|
|||
|
||||
staticforward struct HandlerInfo handler_info[];
|
||||
|
||||
static PyObject *conv_atts( XML_Char **atts){
|
||||
/* Convert an array of attributes and their values into a Python dict */
|
||||
|
||||
static PyObject *conv_atts_using_string( XML_Char **atts){
|
||||
PyObject *attrs_obj=NULL;
|
||||
XML_Char **attrs_p, **attrs_k;
|
||||
int attrs_len;
|
||||
|
@ -114,11 +118,98 @@ static PyObject *conv_atts( XML_Char **atts){
|
|||
return attrs_obj;
|
||||
}
|
||||
|
||||
#if !(PY_MAJOR_VERSION == 1 && PY_MINOR_VERSION < 6)
|
||||
static PyObject *conv_atts_using_unicode( XML_Char **atts){
|
||||
PyObject *attrs_obj=NULL;
|
||||
XML_Char **attrs_p, **attrs_k;
|
||||
int attrs_len;
|
||||
|
||||
if( (attrs_obj = PyDict_New()) == NULL )
|
||||
goto finally;
|
||||
for(attrs_len=0, attrs_p = atts;
|
||||
*attrs_p;
|
||||
attrs_p++, attrs_len++) {
|
||||
if (attrs_len%2) {
|
||||
PyObject *attr_str, *value_str;
|
||||
const char *p = (const char *) (*attrs_k);
|
||||
attr_str=PyUnicode_DecodeUTF8(p, strlen(p), "strict");
|
||||
if (! attr_str) {
|
||||
Py_DECREF(attrs_obj);
|
||||
attrs_obj=NULL;
|
||||
goto finally;
|
||||
}
|
||||
p = (const char *) *attrs_p;
|
||||
value_str=PyUnicode_DecodeUTF8(p, strlen(p), "strict");
|
||||
if (! value_str) {
|
||||
Py_DECREF(attrs_obj);
|
||||
Py_DECREF(attr_str);
|
||||
attrs_obj=NULL;
|
||||
goto finally;
|
||||
}
|
||||
if (PyDict_SetItem(attrs_obj, attr_str, value_str) < 0) {
|
||||
Py_DECREF(attrs_obj);
|
||||
attrs_obj=NULL;
|
||||
goto finally;
|
||||
}
|
||||
Py_DECREF(attr_str);
|
||||
Py_DECREF(value_str);
|
||||
}
|
||||
else attrs_k=attrs_p;
|
||||
}
|
||||
finally:
|
||||
return attrs_obj;
|
||||
}
|
||||
|
||||
/* Convert a string of XML_Chars into a Unicode string.
|
||||
Returns None if str is a null pointer. */
|
||||
|
||||
static PyObject *conv_string_to_unicode( XML_Char *str ) {
|
||||
/* XXX currently this code assumes that XML_Char is 8-bit,
|
||||
and hence in UTF-8. */
|
||||
/* UTF-8 from Expat, Unicode desired */
|
||||
if (str == NULL) {Py_INCREF(Py_None); return Py_None;}
|
||||
return PyUnicode_DecodeUTF8( (const char *)str,
|
||||
strlen( (const char *)str ),
|
||||
"strict" );
|
||||
}
|
||||
|
||||
static PyObject *conv_string_len_to_unicode( const XML_Char *str, int len ) {
|
||||
/* XXX currently this code assumes that XML_Char is 8-bit,
|
||||
and hence in UTF-8. */
|
||||
/* UTF-8 from Expat, Unicode desired */
|
||||
if (str == NULL) {Py_INCREF(Py_None); return Py_None;}
|
||||
return PyUnicode_DecodeUTF8( (const char *)str,
|
||||
len,
|
||||
"strict" );
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Convert a string of XML_Chars into an 8-bit Python string.
|
||||
Returns None if str is a null pointer. */
|
||||
|
||||
static PyObject *conv_string_to_utf8( XML_Char *str ) {
|
||||
/* XXX currently this code assumes that XML_Char is 8-bit,
|
||||
and hence in UTF-8. */
|
||||
/* UTF-8 from Expat, UTF-8 desired */
|
||||
if (str == NULL) {Py_INCREF(Py_None); return Py_None;}
|
||||
return PyString_FromString( (const char *)str );
|
||||
}
|
||||
|
||||
static PyObject *conv_string_len_to_utf8( const XML_Char *str,
|
||||
int len )
|
||||
{
|
||||
/* XXX currently this code assumes that XML_Char is 8-bit,
|
||||
and hence in UTF-8. */
|
||||
/* UTF-8 from Expat, UTF-8 desired */
|
||||
if (str == NULL) {Py_INCREF(Py_None); return Py_None;}
|
||||
return PyString_FromStringAndSize( (const char *)str, len );
|
||||
}
|
||||
|
||||
/* Callback routines */
|
||||
|
||||
void clear_handlers( xmlparseobject *self );
|
||||
static void clear_handlers( xmlparseobject *self );
|
||||
|
||||
void flag_error( xmlparseobject *self ){
|
||||
static void flag_error( xmlparseobject *self ){
|
||||
clear_handlers(self);
|
||||
}
|
||||
|
||||
|
@ -147,10 +238,16 @@ static RC my_##NAME##Handler PARAMS {\
|
|||
return RETURN; \
|
||||
}
|
||||
|
||||
#define NOTHING /**/
|
||||
#if PY_MAJOR_VERSION == 1 && PY_MINOR_VERSION < 6
|
||||
#define STRING_CONV_FUNC conv_string_to_utf8
|
||||
#else
|
||||
/* Python 1.6 and later versions */
|
||||
#define STRING_CONV_FUNC (self->returns_unicode \
|
||||
? conv_string_to_unicode : conv_string_to_utf8)
|
||||
#endif
|
||||
|
||||
#define VOID_HANDLER( NAME, PARAMS, PARAM_FORMAT ) \
|
||||
RC_HANDLER( void, NAME, PARAMS, NOTHING, PARAM_FORMAT, NOTHING, NOTHING,\
|
||||
RC_HANDLER( void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
|
||||
(xmlparseobject *)userData )
|
||||
|
||||
#define INT_HANDLER( NAME, PARAMS, PARAM_FORMAT )\
|
||||
|
@ -158,23 +255,42 @@ static RC my_##NAME##Handler PARAMS {\
|
|||
rc = PyInt_AsLong( rv );, rc, \
|
||||
(xmlparseobject *)userData )
|
||||
|
||||
#if PY_MAJOR_VERSION == 1 && PY_MINOR_VERSION < 6
|
||||
VOID_HANDLER( StartElement,
|
||||
(void *userData, const XML_Char *name, const XML_Char **atts ),
|
||||
("(sO&)", name, conv_atts, atts ) )
|
||||
("(O&O&)", STRING_CONV_FUNC, name,
|
||||
conv_atts_using_string, atts ) )
|
||||
#else
|
||||
/* Python 1.6 and later */
|
||||
VOID_HANDLER( StartElement,
|
||||
(void *userData, const XML_Char *name, const XML_Char **atts ),
|
||||
("(O&O&)", STRING_CONV_FUNC, name,
|
||||
(self->returns_unicode
|
||||
? conv_atts_using_unicode
|
||||
: conv_atts_using_string), atts ) )
|
||||
#endif
|
||||
|
||||
VOID_HANDLER( EndElement,
|
||||
(void *userData, const XML_Char *name ),
|
||||
("(s)", name) )
|
||||
("(O&)", STRING_CONV_FUNC, name) )
|
||||
|
||||
VOID_HANDLER( ProcessingInstruction,
|
||||
(void *userData,
|
||||
const XML_Char *target,
|
||||
const XML_Char *data),
|
||||
("(ss)",target, data ))
|
||||
("(O&O&)",STRING_CONV_FUNC,target, STRING_CONV_FUNC,data ))
|
||||
|
||||
#if PY_MAJOR_VERSION == 1 && PY_MINOR_VERSION < 6
|
||||
VOID_HANDLER( CharacterData,
|
||||
(void *userData, const XML_Char *data, int len),
|
||||
("(s#)", data, len ) )
|
||||
("(O)", conv_string_len_to_utf8(data,len) ) )
|
||||
#else
|
||||
VOID_HANDLER( CharacterData,
|
||||
(void *userData, const XML_Char *data, int len),
|
||||
("(O)", (self->returns_unicode
|
||||
? conv_string_len_to_unicode(data,len)
|
||||
: conv_string_len_to_utf8(data,len) ) ) )
|
||||
#endif
|
||||
|
||||
VOID_HANDLER( UnparsedEntityDecl,
|
||||
(void *userData,
|
||||
|
@ -183,7 +299,10 @@ VOID_HANDLER( UnparsedEntityDecl,
|
|||
const XML_Char *systemId,
|
||||
const XML_Char *publicId,
|
||||
const XML_Char *notationName),
|
||||
("(sssss)", entityName, base, systemId, publicId, notationName))
|
||||
("(O&O&O&O&O&)",
|
||||
STRING_CONV_FUNC,entityName, STRING_CONV_FUNC,base,
|
||||
STRING_CONV_FUNC,systemId, STRING_CONV_FUNC,publicId,
|
||||
STRING_CONV_FUNC,notationName))
|
||||
|
||||
VOID_HANDLER( NotationDecl,
|
||||
(void *userData,
|
||||
|
@ -191,22 +310,24 @@ VOID_HANDLER( NotationDecl,
|
|||
const XML_Char *base,
|
||||
const XML_Char *systemId,
|
||||
const XML_Char *publicId),
|
||||
("(ssss)", notationName, base, systemId, publicId))
|
||||
("(O&O&O&O&)",
|
||||
STRING_CONV_FUNC,notationName, STRING_CONV_FUNC,base,
|
||||
STRING_CONV_FUNC,systemId, STRING_CONV_FUNC,publicId))
|
||||
|
||||
VOID_HANDLER( StartNamespaceDecl,
|
||||
(void *userData,
|
||||
const XML_Char *prefix,
|
||||
const XML_Char *uri),
|
||||
("(ss)", prefix, uri ))
|
||||
("(O&O&)", STRING_CONV_FUNC,prefix, STRING_CONV_FUNC,uri ))
|
||||
|
||||
VOID_HANDLER( EndNamespaceDecl,
|
||||
(void *userData,
|
||||
const XML_Char *prefix),
|
||||
("(s)", prefix ))
|
||||
("(O&)", STRING_CONV_FUNC,prefix ))
|
||||
|
||||
VOID_HANDLER( Comment,
|
||||
(void *userData, const XML_Char *prefix),
|
||||
("(s)", prefix))
|
||||
("(O&)", STRING_CONV_FUNC,prefix))
|
||||
|
||||
VOID_HANDLER( StartCdataSection,
|
||||
(void *userData),
|
||||
|
@ -216,13 +337,27 @@ VOID_HANDLER( EndCdataSection,
|
|||
(void *userData),
|
||||
("()" ))
|
||||
|
||||
#if PY_MAJOR_VERSION == 1 && PY_MINOR_VERSION < 6
|
||||
VOID_HANDLER( Default,
|
||||
(void *userData, const XML_Char *s, int len),
|
||||
("(s#)",s,len))
|
||||
("(O)", conv_string_len_to_utf8(s,len) ) )
|
||||
|
||||
VOID_HANDLER( DefaultHandlerExpand,
|
||||
(void *userData, const XML_Char *s, int len),
|
||||
("(s#)",s,len))
|
||||
("(O)", conv_string_len_to_utf8(s,len) ) )
|
||||
#else
|
||||
VOID_HANDLER( Default,
|
||||
(void *userData, const XML_Char *s, int len),
|
||||
("(O)", (self->returns_unicode
|
||||
? conv_string_len_to_unicode(s,len)
|
||||
: conv_string_len_to_utf8(s,len) ) ) )
|
||||
|
||||
VOID_HANDLER( DefaultHandlerExpand,
|
||||
(void *userData, const XML_Char *s, int len),
|
||||
("(O)", (self->returns_unicode
|
||||
? conv_string_len_to_unicode(s,len)
|
||||
: conv_string_len_to_utf8(s,len) ) ) )
|
||||
#endif
|
||||
|
||||
INT_HANDLER( NotStandalone,
|
||||
(void *userData),
|
||||
|
@ -235,7 +370,9 @@ RC_HANDLER( int, ExternalEntityRef,
|
|||
const XML_Char *systemId,
|
||||
const XML_Char *publicId),
|
||||
int rc=0;,
|
||||
("(ssss)", context, base, systemId, publicId ),
|
||||
("(O&O&O&O&)",
|
||||
STRING_CONV_FUNC,context, STRING_CONV_FUNC,base,
|
||||
STRING_CONV_FUNC,systemId, STRING_CONV_FUNC,publicId ),
|
||||
rc = PyInt_AsLong( rv );, rc,
|
||||
XML_GetUserData( parser ) )
|
||||
|
||||
|
@ -383,7 +520,7 @@ xmlparse_ParseFile( xmlparseobject *self, PyObject *args )
|
|||
int bytes_read;
|
||||
void *buf = XML_GetBuffer(self->itself, BUF_SIZE);
|
||||
if (buf == NULL) {
|
||||
/* FIXME: throw exception for no memory */
|
||||
PyErr_SetString(PyExc_MemoryError, "out of memory");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@ -471,10 +608,20 @@ newxmlparseobject( char *encoding, char *namespace_separator){
|
|||
int i;
|
||||
xmlparseobject *self;
|
||||
|
||||
#if PY_MAJOR_VERSION == 1 && PY_MINOR_VERSION < 6
|
||||
self = PyObject_NEW(xmlparseobject, &Xmlparsetype);
|
||||
if (self == NULL)
|
||||
return NULL;
|
||||
|
||||
self->returns_unicode = 0;
|
||||
#else
|
||||
/* Code for versions 1.6 and later */
|
||||
self = PyObject_New(xmlparseobject, &Xmlparsetype);
|
||||
if (self == NULL)
|
||||
return NULL;
|
||||
|
||||
self->returns_unicode = 1;
|
||||
#endif
|
||||
if (namespace_separator) {
|
||||
self->itself = XML_ParserCreateNS(encoding,
|
||||
*namespace_separator);
|
||||
|
@ -512,7 +659,13 @@ xmlparse_dealloc( xmlparseobject *self )
|
|||
for( i=0; handler_info[i].name!=NULL; i++ ){
|
||||
Py_XDECREF( self->handlers[i] );
|
||||
}
|
||||
#if PY_MAJOR_VERSION == 1 && PY_MINOR_VERSION < 6
|
||||
/* Code for versions before 1.6 */
|
||||
free(self);
|
||||
#else
|
||||
/* Code for versions 1.6 and later */
|
||||
PyObject_Del(self);
|
||||
#endif
|
||||
}
|
||||
|
||||
static int handlername2int( const char *name ){
|
||||
|
@ -541,6 +694,8 @@ xmlparse_getattr(xmlparseobject *self, char *name)
|
|||
if (strcmp(name, "ErrorByteIndex") == 0)
|
||||
return Py_BuildValue("l",
|
||||
XML_GetErrorByteIndex(self->itself));
|
||||
if (strcmp(name, "returns_unicode") == 0)
|
||||
return Py_BuildValue("i", self->returns_unicode);
|
||||
|
||||
handlernum=handlername2int( name );
|
||||
|
||||
|
@ -590,6 +745,22 @@ xmlparse_setattr( xmlparseobject *self, char *name, PyObject *v)
|
|||
return -1;
|
||||
}
|
||||
|
||||
if (strcmp(name, "returns_unicode") == 0) {
|
||||
PyObject *intobj = PyNumber_Int(v);
|
||||
if (intobj == NULL) return -1;
|
||||
if (PyInt_AsLong(intobj)) {
|
||||
#if PY_MAJOR_VERSION == 1 && PY_MINOR_VERSION < 6
|
||||
PyErr_SetString(PyExc_ValueError,
|
||||
"Cannot return Unicode strings in Python 1.5");
|
||||
return -1;
|
||||
#else
|
||||
self->returns_unicode = 1;
|
||||
#endif
|
||||
}
|
||||
else self->returns_unicode = 0;
|
||||
Py_DECREF(intobj);
|
||||
return 0;
|
||||
}
|
||||
if( sethandler( self, name, v ) ){
|
||||
return 0;
|
||||
}
|
||||
|
@ -673,13 +844,14 @@ static struct PyMethodDef pyexpat_methods[] = {
|
|||
{NULL, (PyCFunction)NULL, 0, NULL} /* sentinel */
|
||||
};
|
||||
|
||||
|
||||
/* Initialization function for the module (*must* be called initpyexpat) */
|
||||
/* Module docstring */
|
||||
|
||||
static char pyexpat_module_documentation[] =
|
||||
"Python wrapper for Expat parser."
|
||||
;
|
||||
|
||||
/* Initialization function for the module */
|
||||
|
||||
void
|
||||
initpyexpat(){
|
||||
PyObject *m, *d;
|
||||
|
@ -705,6 +877,13 @@ initpyexpat(){
|
|||
errors_module=PyModule_New( "errors" );
|
||||
PyDict_SetItemString(d,"errors", errors_module );
|
||||
|
||||
/* XXX When Expat supports some way of figuring out how it was
|
||||
compiled, this should check and set native_encoding
|
||||
appropriately.
|
||||
*/
|
||||
PyDict_SetItemString(d, "native_encoding",
|
||||
PyString_FromString("UTF-8") );
|
||||
|
||||
errors_dict=PyModule_GetDict( errors_module );
|
||||
|
||||
#define MYCONST(name) \
|
||||
|
@ -817,7 +996,7 @@ void pyxml_SetEndCdataSection( XML_Parser *parser,
|
|||
(pairsetter)XML_SetCdataSectionHandler);
|
||||
}
|
||||
|
||||
static struct HandlerInfo handler_info[]=
|
||||
statichere struct HandlerInfo handler_info[]=
|
||||
{{"StartElementHandler",
|
||||
pyxml_SetStartElementHandler,
|
||||
my_StartElementHandler},
|
||||
|
@ -867,4 +1046,3 @@ static struct HandlerInfo handler_info[]=
|
|||
{NULL, NULL, NULL } /* sentinel */
|
||||
};
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue