Issue #14007: make XMLParser a real subclassable type exported from _elementtree. +cleanups
This commit is contained in:
parent
7e0229e90d
commit
52467b167e
|
@ -646,8 +646,8 @@ ElementTree Objects
|
||||||
|
|
||||||
Loads an external XML section into this element tree. *source* is a file
|
Loads an external XML section into this element tree. *source* is a file
|
||||||
name or :term:`file object`. *parser* is an optional parser instance.
|
name or :term:`file object`. *parser* is an optional parser instance.
|
||||||
If not given, the standard XMLParser parser is used. Returns the section
|
If not given, the standard :class:`XMLParser` parser is used. Returns the
|
||||||
root element.
|
section root element.
|
||||||
|
|
||||||
|
|
||||||
.. method:: write(file, encoding="us-ascii", xml_declaration=None, method="xml")
|
.. method:: write(file, encoding="us-ascii", xml_declaration=None, method="xml")
|
||||||
|
@ -767,9 +767,9 @@ XMLParser Objects
|
||||||
:class:`Element` structure builder for XML source data, based on the expat
|
:class:`Element` structure builder for XML source data, based on the expat
|
||||||
parser. *html* are predefined HTML entities. This flag is not supported by
|
parser. *html* are predefined HTML entities. This flag is not supported by
|
||||||
the current implementation. *target* is the target object. If omitted, the
|
the current implementation. *target* is the target object. If omitted, the
|
||||||
builder uses an instance of the standard TreeBuilder class. *encoding* [1]_
|
builder uses an instance of the standard :class:`TreeBuilder` class.
|
||||||
is optional. If given, the value overrides the encoding specified in the
|
*encoding* [1]_ is optional. If given, the value overrides the encoding
|
||||||
XML file.
|
specified in the XML file.
|
||||||
|
|
||||||
|
|
||||||
.. method:: close()
|
.. method:: close()
|
||||||
|
|
|
@ -2028,6 +2028,34 @@ class TreeBuilderTest(unittest.TestCase):
|
||||||
'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'))
|
'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'))
|
||||||
|
|
||||||
|
|
||||||
|
class XMLParserTest(unittest.TestCase):
|
||||||
|
sample1 = '<file><line>22</line></file>'
|
||||||
|
|
||||||
|
def _check_sample_element(self, e):
|
||||||
|
self.assertEqual(e.tag, 'file')
|
||||||
|
self.assertEqual(e[0].tag, 'line')
|
||||||
|
self.assertEqual(e[0].text, '22')
|
||||||
|
|
||||||
|
def test_constructor_args(self):
|
||||||
|
# Positional args. The first (html) is not supported, but should be
|
||||||
|
# nevertheless correctly accepted.
|
||||||
|
parser = ET.XMLParser(None, ET.TreeBuilder(), 'utf-8')
|
||||||
|
parser.feed(self.sample1)
|
||||||
|
self._check_sample_element(parser.close())
|
||||||
|
|
||||||
|
# Now as keyword args.
|
||||||
|
parser2 = ET.XMLParser(encoding='utf-8', html=[{}], target=ET.TreeBuilder())
|
||||||
|
parser2.feed(self.sample1)
|
||||||
|
self._check_sample_element(parser2.close())
|
||||||
|
|
||||||
|
def test_subclass(self):
|
||||||
|
class MyParser(ET.XMLParser):
|
||||||
|
pass
|
||||||
|
parser = MyParser()
|
||||||
|
parser.feed(self.sample1)
|
||||||
|
self._check_sample_element(parser.close())
|
||||||
|
|
||||||
|
|
||||||
class NoAcceleratorTest(unittest.TestCase):
|
class NoAcceleratorTest(unittest.TestCase):
|
||||||
# Test that the C accelerator was not imported for pyET
|
# Test that the C accelerator was not imported for pyET
|
||||||
def test_correct_import_pyET(self):
|
def test_correct_import_pyET(self):
|
||||||
|
@ -2245,6 +2273,7 @@ def test_main(module=pyET):
|
||||||
ElementTreeTest,
|
ElementTreeTest,
|
||||||
NamespaceParseTest,
|
NamespaceParseTest,
|
||||||
TreeBuilderTest,
|
TreeBuilderTest,
|
||||||
|
XMLParserTest,
|
||||||
KeywordArgsTest]
|
KeywordArgsTest]
|
||||||
if module is pyET:
|
if module is pyET:
|
||||||
# Run the tests specific to the Python implementation
|
# Run the tests specific to the Python implementation
|
||||||
|
|
|
@ -2257,6 +2257,9 @@ static struct PyExpat_CAPI* expat_capi;
|
||||||
#define EXPAT(func) (XML_##func)
|
#define EXPAT(func) (XML_##func)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
static XML_Memory_Handling_Suite ExpatMemoryHandler = {
|
||||||
|
PyObject_Malloc, PyObject_Realloc, PyObject_Free};
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
PyObject_HEAD
|
PyObject_HEAD
|
||||||
|
|
||||||
|
@ -2671,121 +2674,125 @@ expat_unknown_encoding_handler(XMLParserObject *self, const XML_Char *name,
|
||||||
}
|
}
|
||||||
|
|
||||||
/* -------------------------------------------------------------------- */
|
/* -------------------------------------------------------------------- */
|
||||||
/* constructor and destructor */
|
|
||||||
|
|
||||||
static PyObject*
|
static PyObject *
|
||||||
xmlparser(PyObject* self_, PyObject* args, PyObject* kw)
|
xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
||||||
{
|
{
|
||||||
XMLParserObject* self;
|
XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
|
||||||
/* FIXME: does this need to be static? */
|
if (self) {
|
||||||
static XML_Memory_Handling_Suite memory_handler;
|
self->parser = NULL;
|
||||||
|
self->target = self->entity = self->names = NULL;
|
||||||
PyObject* target = NULL;
|
self->handle_start = self->handle_data = self->handle_end = NULL;
|
||||||
char* encoding = NULL;
|
self->handle_comment = self->handle_pi = self->handle_close = NULL;
|
||||||
static char* kwlist[] = { "target", "encoding", NULL };
|
|
||||||
if (!PyArg_ParseTupleAndKeywords(args, kw, "|Oz:XMLParser", kwlist,
|
|
||||||
&target, &encoding))
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
#if defined(USE_PYEXPAT_CAPI)
|
|
||||||
if (!expat_capi) {
|
|
||||||
PyErr_SetString(
|
|
||||||
PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
|
|
||||||
);
|
|
||||||
return NULL;
|
|
||||||
}
|
}
|
||||||
#endif
|
return (PyObject *)self;
|
||||||
|
}
|
||||||
|
|
||||||
self = PyObject_New(XMLParserObject, &XMLParser_Type);
|
static int
|
||||||
if (self == NULL)
|
xmlparser_init(PyObject *self, PyObject *args, PyObject *kwds)
|
||||||
return NULL;
|
{
|
||||||
|
XMLParserObject *self_xp = (XMLParserObject *)self;
|
||||||
|
PyObject *target = NULL, *html = NULL;
|
||||||
|
char *encoding = NULL;
|
||||||
|
static char *kwlist[] = {"html", "target", "encoding"};
|
||||||
|
|
||||||
self->entity = PyDict_New();
|
if (!PyArg_ParseTupleAndKeywords(args, kwds, "|OOz:XMLParser", kwlist,
|
||||||
if (!self->entity) {
|
&html, &target, &encoding)) {
|
||||||
PyObject_Del(self);
|
return -1;
|
||||||
return NULL;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
self->names = PyDict_New();
|
self_xp->entity = PyDict_New();
|
||||||
if (!self->names) {
|
if (!self_xp->entity)
|
||||||
PyObject_Del(self->entity);
|
return -1;
|
||||||
PyObject_Del(self);
|
|
||||||
return NULL;
|
self_xp->names = PyDict_New();
|
||||||
|
if (!self_xp->names) {
|
||||||
|
Py_XDECREF(self_xp->entity);
|
||||||
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
memory_handler.malloc_fcn = PyObject_Malloc;
|
self_xp->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
|
||||||
memory_handler.realloc_fcn = PyObject_Realloc;
|
if (!self_xp->parser) {
|
||||||
memory_handler.free_fcn = PyObject_Free;
|
Py_XDECREF(self_xp->entity);
|
||||||
|
Py_XDECREF(self_xp->names);
|
||||||
self->parser = EXPAT(ParserCreate_MM)(encoding, &memory_handler, "}");
|
|
||||||
if (!self->parser) {
|
|
||||||
PyObject_Del(self->names);
|
|
||||||
PyObject_Del(self->entity);
|
|
||||||
PyObject_Del(self);
|
|
||||||
PyErr_NoMemory();
|
PyErr_NoMemory();
|
||||||
return NULL;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* setup target handlers */
|
if (target) {
|
||||||
if (!target) {
|
Py_INCREF(target);
|
||||||
|
} else {
|
||||||
target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
|
target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
|
||||||
if (!target) {
|
if (!target) {
|
||||||
EXPAT(ParserFree)(self->parser);
|
Py_XDECREF(self_xp->entity);
|
||||||
PyObject_Del(self->names);
|
Py_XDECREF(self_xp->names);
|
||||||
PyObject_Del(self->entity);
|
EXPAT(ParserFree)(self_xp->parser);
|
||||||
PyObject_Del(self);
|
return -1;
|
||||||
return NULL;
|
|
||||||
}
|
}
|
||||||
} else
|
}
|
||||||
Py_INCREF(target);
|
self_xp->target = target;
|
||||||
self->target = target;
|
|
||||||
|
|
||||||
self->handle_start = PyObject_GetAttrString(target, "start");
|
self_xp->handle_start = PyObject_GetAttrString(target, "start");
|
||||||
self->handle_data = PyObject_GetAttrString(target, "data");
|
self_xp->handle_data = PyObject_GetAttrString(target, "data");
|
||||||
self->handle_end = PyObject_GetAttrString(target, "end");
|
self_xp->handle_end = PyObject_GetAttrString(target, "end");
|
||||||
self->handle_comment = PyObject_GetAttrString(target, "comment");
|
self_xp->handle_comment = PyObject_GetAttrString(target, "comment");
|
||||||
self->handle_pi = PyObject_GetAttrString(target, "pi");
|
self_xp->handle_pi = PyObject_GetAttrString(target, "pi");
|
||||||
self->handle_close = PyObject_GetAttrString(target, "close");
|
self_xp->handle_close = PyObject_GetAttrString(target, "close");
|
||||||
|
|
||||||
PyErr_Clear();
|
PyErr_Clear();
|
||||||
|
|
||||||
/* configure parser */
|
/* configure parser */
|
||||||
EXPAT(SetUserData)(self->parser, self);
|
EXPAT(SetUserData)(self_xp->parser, self_xp);
|
||||||
EXPAT(SetElementHandler)(
|
EXPAT(SetElementHandler)(
|
||||||
self->parser,
|
self_xp->parser,
|
||||||
(XML_StartElementHandler) expat_start_handler,
|
(XML_StartElementHandler) expat_start_handler,
|
||||||
(XML_EndElementHandler) expat_end_handler
|
(XML_EndElementHandler) expat_end_handler
|
||||||
);
|
);
|
||||||
EXPAT(SetDefaultHandlerExpand)(
|
EXPAT(SetDefaultHandlerExpand)(
|
||||||
self->parser,
|
self_xp->parser,
|
||||||
(XML_DefaultHandler) expat_default_handler
|
(XML_DefaultHandler) expat_default_handler
|
||||||
);
|
);
|
||||||
EXPAT(SetCharacterDataHandler)(
|
EXPAT(SetCharacterDataHandler)(
|
||||||
self->parser,
|
self_xp->parser,
|
||||||
(XML_CharacterDataHandler) expat_data_handler
|
(XML_CharacterDataHandler) expat_data_handler
|
||||||
);
|
);
|
||||||
if (self->handle_comment)
|
if (self_xp->handle_comment)
|
||||||
EXPAT(SetCommentHandler)(
|
EXPAT(SetCommentHandler)(
|
||||||
self->parser,
|
self_xp->parser,
|
||||||
(XML_CommentHandler) expat_comment_handler
|
(XML_CommentHandler) expat_comment_handler
|
||||||
);
|
);
|
||||||
if (self->handle_pi)
|
if (self_xp->handle_pi)
|
||||||
EXPAT(SetProcessingInstructionHandler)(
|
EXPAT(SetProcessingInstructionHandler)(
|
||||||
self->parser,
|
self_xp->parser,
|
||||||
(XML_ProcessingInstructionHandler) expat_pi_handler
|
(XML_ProcessingInstructionHandler) expat_pi_handler
|
||||||
);
|
);
|
||||||
EXPAT(SetUnknownEncodingHandler)(
|
EXPAT(SetUnknownEncodingHandler)(
|
||||||
self->parser,
|
self_xp->parser,
|
||||||
(XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
|
(XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
|
||||||
);
|
);
|
||||||
|
|
||||||
ALLOC(sizeof(XMLParserObject), "create expatparser");
|
return 0;
|
||||||
|
|
||||||
return (PyObject*) self;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static int
|
||||||
xmlparser_dealloc(XMLParserObject* self)
|
xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
|
||||||
|
{
|
||||||
|
Py_VISIT(self->handle_close);
|
||||||
|
Py_VISIT(self->handle_pi);
|
||||||
|
Py_VISIT(self->handle_comment);
|
||||||
|
Py_VISIT(self->handle_end);
|
||||||
|
Py_VISIT(self->handle_data);
|
||||||
|
Py_VISIT(self->handle_start);
|
||||||
|
|
||||||
|
Py_VISIT(self->target);
|
||||||
|
Py_VISIT(self->entity);
|
||||||
|
Py_VISIT(self->names);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
xmlparser_gc_clear(XMLParserObject *self)
|
||||||
{
|
{
|
||||||
EXPAT(ParserFree)(self->parser);
|
EXPAT(ParserFree)(self->parser);
|
||||||
|
|
||||||
|
@ -2796,17 +2803,20 @@ xmlparser_dealloc(XMLParserObject* self)
|
||||||
Py_XDECREF(self->handle_data);
|
Py_XDECREF(self->handle_data);
|
||||||
Py_XDECREF(self->handle_start);
|
Py_XDECREF(self->handle_start);
|
||||||
|
|
||||||
Py_DECREF(self->target);
|
Py_XDECREF(self->target);
|
||||||
Py_DECREF(self->entity);
|
Py_XDECREF(self->entity);
|
||||||
Py_DECREF(self->names);
|
Py_XDECREF(self->names);
|
||||||
|
|
||||||
RELEASE(sizeof(XMLParserObject), "destroy expatparser");
|
return 0;
|
||||||
|
|
||||||
PyObject_Del(self);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* -------------------------------------------------------------------- */
|
static void
|
||||||
/* methods (in alphabetical order) */
|
xmlparser_dealloc(XMLParserObject* self)
|
||||||
|
{
|
||||||
|
PyObject_GC_UnTrack(self);
|
||||||
|
xmlparser_gc_clear(self);
|
||||||
|
Py_TYPE(self)->tp_free((PyObject *)self);
|
||||||
|
}
|
||||||
|
|
||||||
LOCAL(PyObject*)
|
LOCAL(PyObject*)
|
||||||
expat_parse(XMLParserObject* self, char* data, int data_len, int final)
|
expat_parse(XMLParserObject* self, char* data, int data_len, int final)
|
||||||
|
@ -3083,31 +3093,42 @@ static PyTypeObject XMLParser_Type = {
|
||||||
PyVarObject_HEAD_INIT(NULL, 0)
|
PyVarObject_HEAD_INIT(NULL, 0)
|
||||||
"XMLParser", sizeof(XMLParserObject), 0,
|
"XMLParser", sizeof(XMLParserObject), 0,
|
||||||
/* methods */
|
/* methods */
|
||||||
(destructor)xmlparser_dealloc, /* tp_dealloc */
|
(destructor)xmlparser_dealloc, /* tp_dealloc */
|
||||||
0, /* tp_print */
|
0, /* tp_print */
|
||||||
0, /* tp_getattr */
|
0, /* tp_getattr */
|
||||||
0, /* tp_setattr */
|
0, /* tp_setattr */
|
||||||
0, /* tp_reserved */
|
0, /* tp_reserved */
|
||||||
0, /* tp_repr */
|
0, /* tp_repr */
|
||||||
0, /* tp_as_number */
|
0, /* tp_as_number */
|
||||||
0, /* tp_as_sequence */
|
0, /* tp_as_sequence */
|
||||||
0, /* tp_as_mapping */
|
0, /* tp_as_mapping */
|
||||||
0, /* tp_hash */
|
0, /* tp_hash */
|
||||||
0, /* tp_call */
|
0, /* tp_call */
|
||||||
0, /* tp_str */
|
0, /* tp_str */
|
||||||
(getattrofunc)xmlparser_getattro, /* tp_getattro */
|
(getattrofunc)xmlparser_getattro, /* tp_getattro */
|
||||||
0, /* tp_setattro */
|
0, /* tp_setattro */
|
||||||
0, /* tp_as_buffer */
|
0, /* tp_as_buffer */
|
||||||
Py_TPFLAGS_DEFAULT, /* tp_flags */
|
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
|
||||||
0, /* tp_doc */
|
/* tp_flags */
|
||||||
0, /* tp_traverse */
|
0, /* tp_doc */
|
||||||
0, /* tp_clear */
|
(traverseproc)xmlparser_gc_traverse, /* tp_traverse */
|
||||||
0, /* tp_richcompare */
|
(inquiry)xmlparser_gc_clear, /* tp_clear */
|
||||||
0, /* tp_weaklistoffset */
|
0, /* tp_richcompare */
|
||||||
0, /* tp_iter */
|
0, /* tp_weaklistoffset */
|
||||||
0, /* tp_iternext */
|
0, /* tp_iter */
|
||||||
xmlparser_methods, /* tp_methods */
|
0, /* tp_iternext */
|
||||||
0, /* tp_members */
|
xmlparser_methods, /* tp_methods */
|
||||||
|
0, /* tp_members */
|
||||||
|
0, /* tp_getset */
|
||||||
|
0, /* tp_base */
|
||||||
|
0, /* tp_dict */
|
||||||
|
0, /* tp_descr_get */
|
||||||
|
0, /* tp_descr_set */
|
||||||
|
0, /* tp_dictoffset */
|
||||||
|
(initproc)xmlparser_init, /* tp_init */
|
||||||
|
PyType_GenericAlloc, /* tp_alloc */
|
||||||
|
xmlparser_new, /* tp_new */
|
||||||
|
0, /* tp_free */
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -3117,9 +3138,6 @@ static PyTypeObject XMLParser_Type = {
|
||||||
|
|
||||||
static PyMethodDef _functions[] = {
|
static PyMethodDef _functions[] = {
|
||||||
{"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
|
{"SubElement", (PyCFunction) subelement, METH_VARARGS|METH_KEYWORDS},
|
||||||
#if defined(USE_EXPAT)
|
|
||||||
{"XMLParser", (PyCFunction) xmlparser, METH_VARARGS|METH_KEYWORDS},
|
|
||||||
#endif
|
|
||||||
{NULL, NULL}
|
{NULL, NULL}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -3214,8 +3232,15 @@ PyInit__elementtree(void)
|
||||||
expat_capi->size < sizeof(struct PyExpat_CAPI) ||
|
expat_capi->size < sizeof(struct PyExpat_CAPI) ||
|
||||||
expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
|
expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
|
||||||
expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
|
expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
|
||||||
expat_capi->MICRO_VERSION != XML_MICRO_VERSION)
|
expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
|
||||||
expat_capi = NULL;
|
expat_capi = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!expat_capi) {
|
||||||
|
PyErr_SetString(
|
||||||
|
PyExc_RuntimeError, "cannot load dispatch table from pyexpat"
|
||||||
|
);
|
||||||
|
return NULL;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -3231,5 +3256,10 @@ PyInit__elementtree(void)
|
||||||
Py_INCREF((PyObject *)&TreeBuilder_Type);
|
Py_INCREF((PyObject *)&TreeBuilder_Type);
|
||||||
PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
|
PyModule_AddObject(m, "TreeBuilder", (PyObject *)&TreeBuilder_Type);
|
||||||
|
|
||||||
|
#if defined(USE_EXPAT)
|
||||||
|
Py_INCREF((PyObject *)&XMLParser_Type);
|
||||||
|
PyModule_AddObject(m, "XMLParser", (PyObject *)&XMLParser_Type);
|
||||||
|
#endif
|
||||||
|
|
||||||
return m;
|
return m;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue