_elementtree.XMLParser._setevents should support any sequence, not just tuples

Also clean up some code around this
This commit is contained in:
Eli Bendersky 2013-05-19 09:01:49 -07:00
parent 5b6616dee3
commit 3a4fbd8241
3 changed files with 94 additions and 58 deletions

View File

@ -979,6 +979,21 @@ class IncrementalParserTest(unittest.TestCase):
parser.eof_received()
self.assertEqual(parser.root.tag, '{namespace}root')
def test_ns_events(self):
parser = ET.IncrementalParser(events=('start-ns', 'end-ns'))
self._feed(parser, "<!-- comment -->\n")
self._feed(parser, "<root xmlns='namespace'>\n")
self.assertEqual(
list(parser.events()),
[('start-ns', ('', 'namespace'))])
self._feed(parser, "<element key='value'>text</element")
self._feed(parser, ">\n")
self._feed(parser, "<element>text</element>tail\n")
self._feed(parser, "<empty-element/>\n")
self._feed(parser, "</root>\n")
self.assertEqual(list(parser.events()), [('end-ns', None)])
parser.eof_received()
def test_events(self):
parser = ET.IncrementalParser(events=())
self._feed(parser, "<root/>\n")
@ -1026,6 +1041,26 @@ class IncrementalParserTest(unittest.TestCase):
parser.eof_received()
self.assertEqual(parser.root.tag, 'root')
def test_events_sequence(self):
# Test that events can be some sequence that's not just a tuple or list
eventset = {'end', 'start'}
parser = ET.IncrementalParser(events=eventset)
self._feed(parser, "<foo>bar</foo>")
self.assert_event_tags(parser, [('start', 'foo'), ('end', 'foo')])
class DummyIter:
def __init__(self):
self.events = iter(['start', 'end', 'start-ns'])
def __iter__(self):
return self
def __next__(self):
return next(self.events)
parser = ET.IncrementalParser(events=DummyIter())
self._feed(parser, "<foo>bar</foo>")
self.assert_event_tags(parser, [('start', 'foo'), ('end', 'foo')])
def test_unknown_event(self):
with self.assertRaises(ValueError):
ET.IncrementalParser(events=('start', 'end', 'bogus'))

View File

@ -1498,33 +1498,38 @@ class XMLParser:
except AttributeError:
pass # unknown
def _setevents(self, event_list, events):
def _setevents(self, events_queue, events_to_report):
# Internal API for IncrementalParser
# events_to_report: a list of events to report during parsing (same as
# the *events* of IncrementalParser's constructor.
# events_queue: a list of actual parsing events that will be populated
# by the underlying parser.
#
parser = self._parser
append = event_list.append
for event in events:
if event == "start":
append = events_queue.append
for event_name in events_to_report:
if event_name == "start":
parser.ordered_attributes = 1
parser.specified_attributes = 1
def handler(tag, attrib_in, event=event, append=append,
def handler(tag, attrib_in, event=event_name, append=append,
start=self._start_list):
append((event, start(tag, attrib_in)))
parser.StartElementHandler = handler
elif event == "end":
def handler(tag, event=event, append=append,
elif event_name == "end":
def handler(tag, event=event_name, append=append,
end=self._end):
append((event, end(tag)))
parser.EndElementHandler = handler
elif event == "start-ns":
def handler(prefix, uri, event=event, append=append):
elif event_name == "start-ns":
def handler(prefix, uri, event=event_name, append=append):
append((event, (prefix or "", uri or "")))
parser.StartNamespaceDeclHandler = handler
elif event == "end-ns":
def handler(prefix, event=event, append=append):
elif event_name == "end-ns":
def handler(prefix, event=event_name, append=append):
append((event, None))
parser.EndNamespaceDeclHandler = handler
else:
raise ValueError("unknown event %r" % event)
raise ValueError("unknown event %r" % event_name)
def _raiseerror(self, value):
err = ParseError(value)

View File

@ -3431,14 +3431,14 @@ static PyObject*
xmlparser_setevents(XMLParserObject *self, PyObject* args)
{
/* activate element event reporting */
Py_ssize_t i, seqlen;
TreeBuilderObject *target;
Py_ssize_t i;
TreeBuilderObject* target;
PyObject* events; /* event collector */
PyObject* event_set = Py_None;
if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events,
&event_set))
PyObject *events_queue;
PyObject *events_to_report = Py_None;
PyObject *events_seq;
if (!PyArg_ParseTuple(args, "O!|O:_setevents", &PyList_Type, &events_queue,
&events_to_report))
return NULL;
if (!TreeBuilder_CheckExact(self->target)) {
@ -3452,9 +3452,9 @@ xmlparser_setevents(XMLParserObject *self, PyObject* args)
target = (TreeBuilderObject*) self->target;
Py_INCREF(events);
Py_INCREF(events_queue);
Py_XDECREF(target->events);
target->events = events;
target->events = events_queue;
/* clear out existing events */
Py_CLEAR(target->start_event_obj);
@ -3462,69 +3462,65 @@ xmlparser_setevents(XMLParserObject *self, PyObject* args)
Py_CLEAR(target->start_ns_event_obj);
Py_CLEAR(target->end_ns_event_obj);
if (event_set == Py_None) {
if (events_to_report == Py_None) {
/* default is "end" only */
target->end_event_obj = PyUnicode_FromString("end");
Py_RETURN_NONE;
}
if (!PyTuple_Check(event_set)) /* FIXME: handle arbitrary sequences */
goto error;
if (!(events_seq = PySequence_Fast(events_to_report,
"events must be a sequence"))) {
return NULL;
}
for (i = 0; i < PyTuple_GET_SIZE(event_set); i++) {
PyObject* item = PyTuple_GET_ITEM(event_set, i);
char* event;
if (PyUnicode_Check(item)) {
event = _PyUnicode_AsString(item);
if (event == NULL)
goto error;
} else if (PyBytes_Check(item))
event = PyBytes_AS_STRING(item);
else {
goto error;
seqlen = PySequence_Size(events_seq);
for (i = 0; i < seqlen; ++i) {
PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
char *event_name = NULL;
if (PyUnicode_Check(event_name_obj)) {
event_name = _PyUnicode_AsString(event_name_obj);
} else if (PyBytes_Check(event_name_obj)) {
event_name = PyBytes_AS_STRING(event_name_obj);
}
if (strcmp(event, "start") == 0) {
Py_INCREF(item);
target->start_event_obj = item;
} else if (strcmp(event, "end") == 0) {
Py_INCREF(item);
if (event_name == NULL) {
Py_DECREF(events_seq);
PyErr_Format(PyExc_ValueError, "invalid events sequence");
return NULL;
} else if (strcmp(event_name, "start") == 0) {
Py_INCREF(event_name_obj);
target->start_event_obj = event_name_obj;
} else if (strcmp(event_name, "end") == 0) {
Py_INCREF(event_name_obj);
Py_XDECREF(target->end_event_obj);
target->end_event_obj = item;
} else if (strcmp(event, "start-ns") == 0) {
Py_INCREF(item);
target->end_event_obj = event_name_obj;
} else if (strcmp(event_name, "start-ns") == 0) {
Py_INCREF(event_name_obj);
Py_XDECREF(target->start_ns_event_obj);
target->start_ns_event_obj = item;
target->start_ns_event_obj = event_name_obj;
EXPAT(SetNamespaceDeclHandler)(
self->parser,
(XML_StartNamespaceDeclHandler) expat_start_ns_handler,
(XML_EndNamespaceDeclHandler) expat_end_ns_handler
);
} else if (strcmp(event, "end-ns") == 0) {
Py_INCREF(item);
} else if (strcmp(event_name, "end-ns") == 0) {
Py_INCREF(event_name_obj);
Py_XDECREF(target->end_ns_event_obj);
target->end_ns_event_obj = item;
target->end_ns_event_obj = event_name_obj;
EXPAT(SetNamespaceDeclHandler)(
self->parser,
(XML_StartNamespaceDeclHandler) expat_start_ns_handler,
(XML_EndNamespaceDeclHandler) expat_end_ns_handler
);
} else {
PyErr_Format(
PyExc_ValueError,
"unknown event '%s'", event
);
Py_DECREF(events_seq);
PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
return NULL;
}
}
Py_DECREF(events_seq);
Py_RETURN_NONE;
error:
PyErr_SetString(
PyExc_TypeError,
"invalid event tuple"
);
return NULL;
}
static PyMethodDef xmlparser_methods[] = {