Merge with PyXML 1.71:

60: Added support for the SkippedEntityHandler, new in Expat 1.95.4.
61: Added support for namespace prefixes, which can be enabled by setting the
    "namespace_prefixes" attribute on the parser object.
65: Disable profiling changes for Python 2.0 and 2.1.
66: Update pyexpat to export the Expat 1.95.5 XML_GetFeatureList()
    information, and tighten up a type declaration now that Expat is using
    an incomplete type rather than a void * for the XML_Parser type.
67: Clarified a comment.
    Added support for XML_UseForeignDTD(), new in Expat 1.95.5.
68: Refactor to avoid partial duplication of the code to construct an
    ExpatError instance, and actually conform to the API for the exception
    instance as well.
69: Remove some spurious trailing whitespace.
    Add a special external-entity-ref handler that gets installed once a
    handler has raised a Python exception; this can cancel actual parsing
    earlier if there's an external entity reference in the input data
    after the the Python excpetion has been raised.
70: Untabify APPEND.
71: Backport PyMODINIT_FUNC for 2.2 and earlier.
This commit is contained in:
Martin v. Löwis 2003-01-21 10:58:18 +00:00
parent dfc33fd8db
commit 069dde2be3
1 changed files with 136 additions and 16 deletions

View File

@ -6,6 +6,14 @@
#include "expat.h" #include "expat.h"
#ifndef PyDoc_STRVAR #ifndef PyDoc_STRVAR
/*
* fdrake says:
* Don't change the PyDoc_STR macro definition to (str), because
* '''the parentheses cause compile failures
* ("non-constant static initializer" or something like that)
* on some platforms (Irix?)'''
*/
#define PyDoc_STR(str) str #define PyDoc_STR(str) str
#define PyDoc_VAR(name) static char name[] #define PyDoc_VAR(name) static char name[]
#define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str) #define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
@ -14,6 +22,7 @@
#if (PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION < 2) #if (PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION < 2)
/* In Python 2.0 and 2.1, disabling Unicode was not possible. */ /* In Python 2.0 and 2.1, disabling Unicode was not possible. */
#define Py_USING_UNICODE #define Py_USING_UNICODE
#define NOFIX_TRACE
#endif #endif
enum HandlerTypes { enum HandlerTypes {
@ -38,6 +47,7 @@ enum HandlerTypes {
XmlDecl, XmlDecl,
ElementDecl, ElementDecl,
AttlistDecl, AttlistDecl,
SkippedEntity,
_DummyDecl _DummyDecl
}; };
@ -56,6 +66,7 @@ typedef struct {
int ordered_attributes; /* Return attributes as a list. */ int ordered_attributes; /* Return attributes as a list. */
int specified_attributes; /* Report only specified attributes. */ int specified_attributes; /* Report only specified attributes. */
int in_callback; /* Is a callback active? */ int in_callback; /* Is a callback active? */
int ns_prefixes; /* Namespace-triplets mode? */
XML_Char *buffer; /* Buffer used when accumulating characters */ XML_Char *buffer; /* Buffer used when accumulating characters */
/* NULL if not enabled */ /* NULL if not enabled */
int buffer_size; /* Size of buffer, in XML_Char units */ int buffer_size; /* Size of buffer, in XML_Char units */
@ -100,14 +111,13 @@ set_error_attr(PyObject *err, char *name, int value)
* information. Always returns NULL. * information. Always returns NULL.
*/ */
static PyObject * static PyObject *
set_error(xmlparseobject *self) set_error(xmlparseobject *self, enum XML_Error code)
{ {
PyObject *err; PyObject *err;
char buffer[256]; char buffer[256];
XML_Parser parser = self->itself; XML_Parser parser = self->itself;
int lineno = XML_GetErrorLineNumber(parser); int lineno = XML_GetErrorLineNumber(parser);
int column = XML_GetErrorColumnNumber(parser); int column = XML_GetErrorColumnNumber(parser);
enum XML_Error code = XML_GetErrorCode(parser);
/* There is no risk of overflowing this buffer, since /* There is no risk of overflowing this buffer, since
even for 64-bit integers, there is sufficient space. */ even for 64-bit integers, there is sufficient space. */
@ -207,10 +217,25 @@ conv_string_len_to_utf8(const XML_Char *str, int len)
static void clear_handlers(xmlparseobject *self, int initial); static void clear_handlers(xmlparseobject *self, int initial);
/* This handler is used when an error has been detected, in the hope
that actual parsing can be terminated early. This will only help
if an external entity reference is encountered. */
static int
error_external_entity_ref_handler(XML_Parser parser,
const XML_Char *context,
const XML_Char *base,
const XML_Char *systemId,
const XML_Char *publicId)
{
return 0;
}
static void static void
flag_error(xmlparseobject *self) flag_error(xmlparseobject *self)
{ {
clear_handlers(self, 0); clear_handlers(self, 0);
XML_SetExternalEntityRefHandler(self->itself,
error_external_entity_ref_handler);
} }
static PyCodeObject* static PyCodeObject*
@ -264,6 +289,7 @@ getcode(enum HandlerTypes slot, char* func_name, int lineno)
return NULL; return NULL;
} }
#ifndef NOFIX_TRACE
static int static int
trace_frame(PyThreadState *tstate, PyFrameObject *f, int code, PyObject *val) trace_frame(PyThreadState *tstate, PyFrameObject *f, int code, PyObject *val)
{ {
@ -290,6 +316,7 @@ trace_frame(PyThreadState *tstate, PyFrameObject *f, int code, PyObject *val)
} }
return result; return result;
} }
#endif
static PyObject* static PyObject*
call_with_frame(PyCodeObject *c, PyObject* func, PyObject* args) call_with_frame(PyCodeObject *c, PyObject* func, PyObject* args)
@ -310,19 +337,23 @@ call_with_frame(PyCodeObject *c, PyObject* func, PyObject* args)
if (f == NULL) if (f == NULL)
return NULL; return NULL;
tstate->frame = f; tstate->frame = f;
#ifndef NOFIX_TRACE
if (trace_frame(tstate, f, PyTrace_CALL, Py_None)) { if (trace_frame(tstate, f, PyTrace_CALL, Py_None)) {
Py_DECREF(f); Py_DECREF(f);
return NULL; return NULL;
} }
#endif
res = PyEval_CallObject(func, args); res = PyEval_CallObject(func, args);
if (res == NULL && tstate->curexc_traceback == NULL) if (res == NULL && tstate->curexc_traceback == NULL)
PyTraceBack_Here(f); PyTraceBack_Here(f);
#ifndef NOFIX_TRACE
else { else {
if (trace_frame(tstate, f, PyTrace_RETURN, res)) { if (trace_frame(tstate, f, PyTrace_RETURN, res)) {
Py_XDECREF(res); Py_XDECREF(res);
res = NULL; res = NULL;
} }
} }
#endif
tstate->frame = f->f_back; tstate->frame = f->f_back;
Py_DECREF(f); Py_DECREF(f);
return res; return res;
@ -331,7 +362,7 @@ call_with_frame(PyCodeObject *c, PyObject* func, PyObject* args)
#ifndef Py_USING_UNICODE #ifndef Py_USING_UNICODE
#define STRING_CONV_FUNC conv_string_to_utf8 #define STRING_CONV_FUNC conv_string_to_utf8
#else #else
/* Python 2.0 and later versions */ /* Python 2.0 and later versions, when built with Unicode support */
#define STRING_CONV_FUNC (self->returns_unicode \ #define STRING_CONV_FUNC (self->returns_unicode \
? conv_string_to_unicode : conv_string_to_utf8) ? conv_string_to_unicode : conv_string_to_utf8)
#endif #endif
@ -690,6 +721,13 @@ VOID_HANDLER(AttlistDecl,
STRING_CONV_FUNC,att_type, STRING_CONV_FUNC,dflt, STRING_CONV_FUNC,att_type, STRING_CONV_FUNC,dflt,
isrequired)) isrequired))
VOID_HANDLER(SkippedEntity,
(void *userData,
const XML_Char *entityName,
int is_parameter_entity),
("Ni",
string_intern(self, entityName), is_parameter_entity))
VOID_HANDLER(NotationDecl, VOID_HANDLER(NotationDecl,
(void *userData, (void *userData,
const XML_Char *notationName, const XML_Char *notationName,
@ -784,7 +822,7 @@ get_parse_result(xmlparseobject *self, int rv)
return NULL; return NULL;
} }
if (rv == 0) { if (rv == 0) {
return set_error(self); return set_error(self, XML_GetErrorCode(self->itself));
} }
if (flush_character_buffer(self) < 0) { if (flush_character_buffer(self) < 0) {
return NULL; return NULL;
@ -1023,6 +1061,7 @@ xmlparse_ExternalEntityParserCreate(xmlparseobject *self, PyObject *args)
new_parser->ordered_attributes = self->ordered_attributes; new_parser->ordered_attributes = self->ordered_attributes;
new_parser->specified_attributes = self->specified_attributes; new_parser->specified_attributes = self->specified_attributes;
new_parser->in_callback = 0; new_parser->in_callback = 0;
new_parser->ns_prefixes = self->ns_prefixes;
new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context, new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
encoding); encoding);
new_parser->handlers = 0; new_parser->handlers = 0;
@ -1083,22 +1122,50 @@ xmlparse_SetParamEntityParsing(xmlparseobject *p, PyObject* args)
return PyInt_FromLong(flag); return PyInt_FromLong(flag);
} }
PyDoc_STRVAR(xmlparse_UseForeignDTD__doc__,
"UseForeignDTD([flag])\n\
Allows the application to provide an artificial external subset if one is\n\
not specified as part of the document instance. This readily allows the\n\
use of a 'default' document type controlled by the application, while still\n\
getting the advantage of providing document type information to the parser.\n\
'flag' defaults to True if not provided.");
static PyObject *
xmlparse_UseForeignDTD(xmlparseobject *self, PyObject *args)
{
PyObject *flagobj = NULL;
XML_Bool flag = XML_TRUE;
enum XML_Error rc;
if (!PyArg_ParseTuple(args, "|O:UseForeignDTD", &flagobj))
return NULL;
if (flagobj != NULL)
flag = PyObject_IsTrue(flagobj) ? XML_TRUE : XML_FALSE;
rc = XML_UseForeignDTD(self->itself, flag);
if (rc != XML_ERROR_NONE) {
return set_error(self, rc);
}
Py_INCREF(Py_None);
return Py_None;
}
static struct PyMethodDef xmlparse_methods[] = { static struct PyMethodDef xmlparse_methods[] = {
{"Parse", (PyCFunction)xmlparse_Parse, {"Parse", (PyCFunction)xmlparse_Parse,
METH_VARARGS, xmlparse_Parse__doc__}, METH_VARARGS, xmlparse_Parse__doc__},
{"ParseFile", (PyCFunction)xmlparse_ParseFile, {"ParseFile", (PyCFunction)xmlparse_ParseFile,
METH_VARARGS, xmlparse_ParseFile__doc__}, METH_VARARGS, xmlparse_ParseFile__doc__},
{"SetBase", (PyCFunction)xmlparse_SetBase, {"SetBase", (PyCFunction)xmlparse_SetBase,
METH_VARARGS, xmlparse_SetBase__doc__}, METH_VARARGS, xmlparse_SetBase__doc__},
{"GetBase", (PyCFunction)xmlparse_GetBase, {"GetBase", (PyCFunction)xmlparse_GetBase,
METH_VARARGS, xmlparse_GetBase__doc__}, METH_VARARGS, xmlparse_GetBase__doc__},
{"ExternalEntityParserCreate", (PyCFunction)xmlparse_ExternalEntityParserCreate, {"ExternalEntityParserCreate", (PyCFunction)xmlparse_ExternalEntityParserCreate,
METH_VARARGS, xmlparse_ExternalEntityParserCreate__doc__}, METH_VARARGS, xmlparse_ExternalEntityParserCreate__doc__},
{"SetParamEntityParsing", (PyCFunction)xmlparse_SetParamEntityParsing, {"SetParamEntityParsing", (PyCFunction)xmlparse_SetParamEntityParsing,
METH_VARARGS, xmlparse_SetParamEntityParsing__doc__}, METH_VARARGS, xmlparse_SetParamEntityParsing__doc__},
{"GetInputContext", (PyCFunction)xmlparse_GetInputContext, {"GetInputContext", (PyCFunction)xmlparse_GetInputContext,
METH_VARARGS, xmlparse_GetInputContext__doc__}, METH_VARARGS, xmlparse_GetInputContext__doc__},
{NULL, NULL} /* sentinel */ {"UseForeignDTD", (PyCFunction)xmlparse_UseForeignDTD,
METH_VARARGS, xmlparse_UseForeignDTD__doc__},
{NULL, NULL} /* sentinel */
}; };
/* ---------- */ /* ---------- */
@ -1184,6 +1251,7 @@ newxmlparseobject(char *encoding, char *namespace_separator, PyObject *intern)
self->ordered_attributes = 0; self->ordered_attributes = 0;
self->specified_attributes = 0; self->specified_attributes = 0;
self->in_callback = 0; self->in_callback = 0;
self->ns_prefixes = 0;
self->handlers = NULL; self->handlers = NULL;
if (namespace_separator != NULL) { if (namespace_separator != NULL) {
self->itself = XML_ParserCreateNS(encoding, *namespace_separator); self->itself = XML_ParserCreateNS(encoding, *namespace_separator);
@ -1315,6 +1383,8 @@ xmlparse_getattr(xmlparseobject *self, char *name)
if (strcmp(name, "buffer_used") == 0) if (strcmp(name, "buffer_used") == 0)
return PyInt_FromLong((long) self->buffer_used); return PyInt_FromLong((long) self->buffer_used);
} }
if (strcmp(name, "namespace_prefixes") == 0)
return get_pybool(self->ns_prefixes);
if (strcmp(name, "ordered_attributes") == 0) if (strcmp(name, "ordered_attributes") == 0)
return get_pybool(self->ordered_attributes); return get_pybool(self->ordered_attributes);
if (strcmp(name, "returns_unicode") == 0) if (strcmp(name, "returns_unicode") == 0)
@ -1333,12 +1403,12 @@ xmlparse_getattr(xmlparseobject *self, char *name)
} }
#define APPEND(list, str) \ #define APPEND(list, str) \
do { \ do { \
PyObject *o = PyString_FromString(str); \ PyObject *o = PyString_FromString(str); \
if (o != NULL) \ if (o != NULL) \
PyList_Append(list, o); \ PyList_Append(list, o); \
Py_XDECREF(o); \ Py_XDECREF(o); \
} while (0) } while (0)
if (strcmp(name, "__members__") == 0) { if (strcmp(name, "__members__") == 0) {
int i; int i;
@ -1356,6 +1426,7 @@ xmlparse_getattr(xmlparseobject *self, char *name)
APPEND(rc, "buffer_size"); APPEND(rc, "buffer_size");
APPEND(rc, "buffer_text"); APPEND(rc, "buffer_text");
APPEND(rc, "buffer_used"); APPEND(rc, "buffer_used");
APPEND(rc, "namespace_prefixes");
APPEND(rc, "ordered_attributes"); APPEND(rc, "ordered_attributes");
APPEND(rc, "returns_unicode"); APPEND(rc, "returns_unicode");
APPEND(rc, "specified_attributes"); APPEND(rc, "specified_attributes");
@ -1416,6 +1487,14 @@ xmlparse_setattr(xmlparseobject *self, char *name, PyObject *v)
} }
return 0; return 0;
} }
if (strcmp(name, "namespace_prefixes") == 0) {
if (PyObject_IsTrue(v))
self->ns_prefixes = 1;
else
self->ns_prefixes = 0;
XML_SetReturnNSTriplet(self->itself, self->ns_prefixes);
return 0;
}
if (strcmp(name, "ordered_attributes") == 0) { if (strcmp(name, "ordered_attributes") == 0) {
if (PyObject_IsTrue(v)) if (PyObject_IsTrue(v))
self->ordered_attributes = 1; self->ordered_attributes = 1;
@ -1514,7 +1593,7 @@ static PyTypeObject Xmlparsetype = {
#else #else
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_GC, /*tp_flags*/ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_GC, /*tp_flags*/
#endif #endif
Xmlparsetype__doc__, /* Documentation string */ Xmlparsetype__doc__, /* tp_doc - Documentation string */
#ifdef WITH_CYCLE_GC #ifdef WITH_CYCLE_GC
(traverseproc)xmlparse_traverse, /* tp_traverse */ (traverseproc)xmlparse_traverse, /* tp_traverse */
(inquiry)xmlparse_clear /* tp_clear */ (inquiry)xmlparse_clear /* tp_clear */
@ -1633,9 +1712,18 @@ get_version_string(void)
#define MODULE_INITFUNC initpyexpat #define MODULE_INITFUNC initpyexpat
#endif #endif
#ifndef PyMODINIT_FUNC
# ifdef MS_WINDOWS
# define PyMODINIT_FUNC __declspec(dllexport) void
# else
# define PyMODINIT_FUNC void
# endif
#endif
PyMODINIT_FUNC MODULE_INITFUNC(void); /* avoid compiler warnings */ PyMODINIT_FUNC MODULE_INITFUNC(void); /* avoid compiler warnings */
PyMODINIT_FUNC MODULE_INITFUNC(void) PyMODINIT_FUNC
MODULE_INITFUNC(void)
{ {
PyObject *m, *d; PyObject *m, *d;
PyObject *errmod_name = PyString_FromString(MODULE_NAME ".errors"); PyObject *errmod_name = PyString_FromString(MODULE_NAME ".errors");
@ -1714,6 +1802,35 @@ PyMODINIT_FUNC MODULE_INITFUNC(void)
/* Don't core dump later! */ /* Don't core dump later! */
return; return;
{
const XML_Feature *features = XML_GetFeatureList();
PyObject *list = PyList_New(0);
if (list == NULL)
/* just ignore it */
PyErr_Clear();
else {
int i = 0;
for (; features[i].feature != XML_FEATURE_END; ++i) {
int ok;
PyObject *item = Py_BuildValue("si", features[i].name,
features[i].value);
if (item == NULL) {
Py_DECREF(list);
list = NULL;
break;
}
ok = PyList_Append(list, item);
Py_DECREF(item);
if (ok < 0) {
PyErr_Clear();
break;
}
}
if (list != NULL)
PyModule_AddObject(m, "features", list);
}
}
#define MYCONST(name) \ #define MYCONST(name) \
PyModule_AddStringConstant(errors_module, #name, \ PyModule_AddStringConstant(errors_module, #name, \
(char*)XML_ErrorString(name)) (char*)XML_ErrorString(name))
@ -1852,6 +1969,9 @@ static struct HandlerInfo handler_info[] = {
{"AttlistDeclHandler", {"AttlistDeclHandler",
(xmlhandlersetter)XML_SetAttlistDeclHandler, (xmlhandlersetter)XML_SetAttlistDeclHandler,
(xmlhandler)my_AttlistDeclHandler}, (xmlhandler)my_AttlistDeclHandler},
{"SkippedEntityHandler",
(xmlhandlersetter)XML_SetSkippedEntityHandler,
(xmlhandler)my_SkippedEntityHandler},
{NULL, NULL, NULL} /* sentinel */ {NULL, NULL, NULL} /* sentinel */
}; };