From 43851a202cabce1e6be699e7177735c778b6697e Mon Sep 17 00:00:00 2001 From: Stefan Behnel Date: Wed, 1 May 2019 21:20:38 +0200 Subject: [PATCH] bpo-36673: Implement comment/PI parsing support for the TreeBuilder in ElementTree. (#12883) * bpo-36673: Implement comment/PI parsing support for the TreeBuilder in ElementTree. * bpo-36673: Rewrite the comment/PI factory handling for the TreeBuilder in "_elementtree" to make it use the same factories as the ElementTree module, and to make it explicit when the comments/PIs are inserted into the tree and when they are not (which is the default). --- Doc/library/xml.etree.elementtree.rst | 65 +++- Lib/test/test_xml_etree.py | 90 ++++- Lib/xml/etree/ElementTree.py | 67 +++- .../2019-04-20-09-50-32.bpo-36673.XF4Egb.rst | 3 + Modules/_elementtree.c | 331 ++++++++++++++++-- Modules/clinic/_elementtree.c.h | 128 ++++++- 6 files changed, 630 insertions(+), 54 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2019-04-20-09-50-32.bpo-36673.XF4Egb.rst diff --git a/Doc/library/xml.etree.elementtree.rst b/Doc/library/xml.etree.elementtree.rst index 9e2c295867c..c9e04c2fc8f 100644 --- a/Doc/library/xml.etree.elementtree.rst +++ b/Doc/library/xml.etree.elementtree.rst @@ -523,8 +523,9 @@ Functions Parses an XML section into an element tree incrementally, and reports what's going on to the user. *source* is a filename or :term:`file object` containing XML data. *events* is a sequence of events to report back. The - supported events are the strings ``"start"``, ``"end"``, ``"start-ns"`` and - ``"end-ns"`` (the "ns" events are used to get detailed namespace + supported events are the strings ``"start"``, ``"end"``, ``"comment"``, + ``"pi"``, ``"start-ns"`` and ``"end-ns"`` + (the "ns" events are used to get detailed namespace information). If *events* is omitted, only ``"end"`` events are reported. *parser* is an optional parser instance. If not given, the standard :class:`XMLParser` parser is used. *parser* must be a subclass of @@ -549,6 +550,10 @@ Functions .. deprecated:: 3.4 The *parser* argument. + .. versionchanged:: 3.8 + The ``comment`` and ``pi`` events were added. + + .. function:: parse(source, parser=None) Parses an XML section into an element tree. *source* is a filename or file @@ -1021,14 +1026,24 @@ TreeBuilder Objects ^^^^^^^^^^^^^^^^^^^ -.. class:: TreeBuilder(element_factory=None) +.. class:: TreeBuilder(element_factory=None, *, comment_factory=None, \ + pi_factory=None, insert_comments=False, insert_pis=False) Generic element structure builder. This builder converts a sequence of - start, data, and end method calls to a well-formed element structure. You - can use this class to build an element structure using a custom XML parser, - or a parser for some other XML-like format. *element_factory*, when given, - must be a callable accepting two positional arguments: a tag and - a dict of attributes. It is expected to return a new element instance. + start, data, end, comment and pi method calls to a well-formed element + structure. You can use this class to build an element structure using + a custom XML parser, or a parser for some other XML-like format. + + *element_factory*, when given, must be a callable accepting two positional + arguments: a tag and a dict of attributes. It is expected to return a new + element instance. + + The *comment_factory* and *pi_factory* functions, when given, should behave + like the :func:`Comment` and :func:`ProcessingInstruction` functions to + create comments and processing instructions. When not given, the default + factories will be used. When *insert_comments* and/or *insert_pis* is true, + comments/pis will be inserted into the tree if they appear within the root + element (but not outside of it). .. method:: close() @@ -1054,6 +1069,22 @@ TreeBuilder Objects containing element attributes. Returns the opened element. + .. method:: comment(text) + + Creates a comment with the given *text*. If ``insert_comments`` is true, + this will also add it to the tree. + + .. versionadded:: 3.8 + + + .. method:: pi(target, text) + + Creates a comment with the given *target* name and *text*. If + ``insert_pis`` is true, this will also add it to the tree. + + .. versionadded:: 3.8 + + In addition, a custom :class:`TreeBuilder` object can provide the following method: @@ -1150,9 +1181,9 @@ XMLPullParser Objects callback target, :class:`XMLPullParser` collects an internal list of parsing events and lets the user read from it. *events* is a sequence of events to report back. The supported events are the strings ``"start"``, ``"end"``, - ``"start-ns"`` and ``"end-ns"`` (the "ns" events are used to get detailed - namespace information). If *events* is omitted, only ``"end"`` events are - reported. + ``"comment"``, ``"pi"``, ``"start-ns"`` and ``"end-ns"`` (the "ns" events + are used to get detailed namespace information). If *events* is omitted, + only ``"end"`` events are reported. .. method:: feed(data) @@ -1171,7 +1202,13 @@ XMLPullParser Objects data fed to the parser. The iterator yields ``(event, elem)`` pairs, where *event* is a string representing the type of event (e.g. ``"end"``) and *elem* is the - encountered :class:`Element` object. + encountered :class:`Element` object, or other context value as follows. + + * ``start``, ``end``: the current Element. + * ``comment``, ``pi``: the current comment / processing instruction + * ``start-ns``: a tuple ``(prefix, uri)`` naming the declared namespace + mapping. + * ``end-ns``: :const:`None` (this may change in a future version) Events provided in a previous call to :meth:`read_events` will not be yielded again. Events are consumed from the internal queue only when @@ -1191,6 +1228,10 @@ XMLPullParser Objects .. versionadded:: 3.4 + .. versionchanged:: 3.8 + The ``comment`` and ``pi`` events were added. + + Exceptions ^^^^^^^^^^ diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index e0d2cb7b995..8a228b8ccd6 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -1194,6 +1194,12 @@ class XMLPullParserTest(unittest.TestCase): for i in range(0, len(data), chunk_size): parser.feed(data[i:i+chunk_size]) + def assert_events(self, parser, expected): + self.assertEqual( + [(event, (elem.tag, elem.text)) + for event, elem in parser.read_events()], + expected) + def assert_event_tags(self, parser, expected): events = parser.read_events() self.assertEqual([(action, elem.tag) for action, elem in events], @@ -1276,8 +1282,10 @@ class XMLPullParserTest(unittest.TestCase): self.assert_event_tags(parser, []) parser = ET.XMLPullParser(events=('start', 'end')) - self._feed(parser, "\n") - self.assert_event_tags(parser, []) + self._feed(parser, "\n") + self.assert_events(parser, []) + + parser = ET.XMLPullParser(events=('start', 'end')) self._feed(parser, "\n") self.assert_event_tags(parser, [('start', 'root')]) self._feed(parser, "text") self.assertIsNone(parser.close()) + def test_events_comment(self): + parser = ET.XMLPullParser(events=('start', 'comment', 'end')) + self._feed(parser, "\n") + self.assert_events(parser, [('comment', (ET.Comment, ' text here '))]) + self._feed(parser, "\n") + self.assert_events(parser, [('comment', (ET.Comment, ' more text here '))]) + self._feed(parser, "text") + self.assert_event_tags(parser, [('start', 'root-tag')]) + self._feed(parser, "\n") + self.assert_events(parser, [('comment', (ET.Comment, ' inner comment'))]) + self._feed(parser, "\n") + self.assert_event_tags(parser, [('end', 'root-tag')]) + self._feed(parser, "\n") + self.assert_events(parser, [('comment', (ET.Comment, ' outer comment '))]) + + parser = ET.XMLPullParser(events=('comment',)) + self._feed(parser, "\n") + self.assert_events(parser, [('comment', (ET.Comment, ' text here '))]) + + def test_events_pi(self): + parser = ET.XMLPullParser(events=('start', 'pi', 'end')) + self._feed(parser, "\n") + self.assert_events(parser, [('pi', (ET.PI, 'pitarget'))]) + parser = ET.XMLPullParser(events=('pi',)) + self._feed(parser, "\n") + self.assert_events(parser, [('pi', (ET.PI, 'pitarget some text '))]) + def test_events_sequence(self): # Test that events can be some sequence that's not just a tuple or list eventset = {'end', 'start'} @@ -1333,7 +1368,6 @@ class XMLPullParserTest(unittest.TestCase): self._feed(parser, "bar") self.assert_event_tags(parser, [('start', 'foo'), ('end', 'foo')]) - def test_unknown_event(self): with self.assertRaises(ValueError): ET.XMLPullParser(events=('start', 'end', 'bogus')) @@ -2741,6 +2775,33 @@ class TreeBuilderTest(unittest.TestCase): parser.feed(self.sample1) self.assertIsNone(parser.close()) + def test_treebuilder_comment(self): + b = ET.TreeBuilder() + self.assertEqual(b.comment('ctext').tag, ET.Comment) + self.assertEqual(b.comment('ctext').text, 'ctext') + + b = ET.TreeBuilder(comment_factory=ET.Comment) + self.assertEqual(b.comment('ctext').tag, ET.Comment) + self.assertEqual(b.comment('ctext').text, 'ctext') + + b = ET.TreeBuilder(comment_factory=len) + self.assertEqual(b.comment('ctext'), len('ctext')) + + def test_treebuilder_pi(self): + b = ET.TreeBuilder() + self.assertEqual(b.pi('target', None).tag, ET.PI) + self.assertEqual(b.pi('target', None).text, 'target') + + b = ET.TreeBuilder(pi_factory=ET.PI) + self.assertEqual(b.pi('target').tag, ET.PI) + self.assertEqual(b.pi('target').text, "target") + self.assertEqual(b.pi('pitarget', ' text ').tag, ET.PI) + self.assertEqual(b.pi('pitarget', ' text ').text, "pitarget text ") + + b = ET.TreeBuilder(pi_factory=lambda target, text: (len(target), text)) + self.assertEqual(b.pi('target'), (len('target'), None)) + self.assertEqual(b.pi('pitarget', ' text '), (len('pitarget'), ' text ')) + def test_treebuilder_elementfactory_none(self): parser = ET.XMLParser(target=ET.TreeBuilder(element_factory=None)) parser.feed(self.sample1) @@ -2761,6 +2822,21 @@ class TreeBuilderTest(unittest.TestCase): e = parser.close() self._check_sample1_element(e) + def test_subclass_comment_pi(self): + class MyTreeBuilder(ET.TreeBuilder): + def foobar(self, x): + return x * 2 + + tb = MyTreeBuilder(comment_factory=ET.Comment, pi_factory=ET.PI) + self.assertEqual(tb.foobar(10), 20) + + parser = ET.XMLParser(target=tb) + parser.feed(self.sample1) + parser.feed('') + + e = parser.close() + self._check_sample1_element(e) + def test_element_factory(self): lst = [] def myfactory(tag, attrib): @@ -3418,6 +3494,12 @@ def test_main(module=None): # Copy the path cache (should be empty) path_cache = ElementPath._cache ElementPath._cache = path_cache.copy() + # Align the Comment/PI factories. + if hasattr(ET, '_set_factories'): + old_factories = ET._set_factories(ET.Comment, ET.PI) + else: + old_factories = None + try: support.run_unittest(*test_classes) finally: @@ -3426,6 +3508,8 @@ def test_main(module=None): nsmap.clear() nsmap.update(nsmap_copy) ElementPath._cache = path_cache + if old_factories is not None: + ET._set_factories(*old_factories) # don't interfere with subsequent tests ET = pyET = None diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index c9e2f368350..c6400480f5b 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -1374,12 +1374,30 @@ class TreeBuilder: *element_factory* is an optional element factory which is called to create new Element instances, as necessary. + *comment_factory* is a factory to create comments to be used instead of + the standard factory. If *insert_comments* is false (the default), + comments will not be inserted into the tree. + + *pi_factory* is a factory to create processing instructions to be used + instead of the standard factory. If *insert_pis* is false (the default), + processing instructions will not be inserted into the tree. """ - def __init__(self, element_factory=None): + def __init__(self, element_factory=None, *, + comment_factory=None, pi_factory=None, + insert_comments=False, insert_pis=False): self._data = [] # data collector self._elem = [] # element stack self._last = None # last element + self._root = None # root element self._tail = None # true if we're after an end tag + if comment_factory is None: + comment_factory = Comment + self._comment_factory = comment_factory + self.insert_comments = insert_comments + if pi_factory is None: + pi_factory = ProcessingInstruction + self._pi_factory = pi_factory + self.insert_pis = insert_pis if element_factory is None: element_factory = Element self._factory = element_factory @@ -1387,8 +1405,8 @@ class TreeBuilder: def close(self): """Flush builder buffers and return toplevel document Element.""" assert len(self._elem) == 0, "missing end tags" - assert self._last is not None, "missing toplevel element" - return self._last + assert self._root is not None, "missing toplevel element" + return self._root def _flush(self): if self._data: @@ -1417,6 +1435,8 @@ class TreeBuilder: self._last = elem = self._factory(tag, attrs) if self._elem: self._elem[-1].append(elem) + elif self._root is None: + self._root = elem self._elem.append(elem) self._tail = 0 return elem @@ -1435,6 +1455,33 @@ class TreeBuilder: self._tail = 1 return self._last + def comment(self, text): + """Create a comment using the comment_factory. + + *text* is the text of the comment. + """ + return self._handle_single( + self._comment_factory, self.insert_comments, text) + + def pi(self, target, text=None): + """Create a processing instruction using the pi_factory. + + *target* is the target name of the processing instruction. + *text* is the data of the processing instruction, or ''. + """ + return self._handle_single( + self._pi_factory, self.insert_pis, target, text) + + def _handle_single(self, factory, insert, *args): + elem = factory(*args) + if insert: + self._flush() + self._last = elem + if self._elem: + self._elem[-1].append(elem) + self._tail = 1 + return elem + # also see ElementTree and TreeBuilder class XMLParser: @@ -1519,6 +1566,15 @@ class XMLParser: def handler(prefix, event=event_name, append=append): append((event, None)) parser.EndNamespaceDeclHandler = handler + elif event_name == 'comment': + def handler(text, event=event_name, append=append, self=self): + append((event, self.target.comment(text))) + parser.CommentHandler = handler + elif event_name == 'pi': + def handler(pi_target, data, event=event_name, append=append, + self=self): + append((event, self.target.pi(pi_target, data))) + parser.ProcessingInstructionHandler = handler else: raise ValueError("unknown event %r" % event_name) @@ -1640,7 +1696,10 @@ try: # (see tests) _Element_Py = Element - # Element, SubElement, ParseError, TreeBuilder, XMLParser + # Element, SubElement, ParseError, TreeBuilder, XMLParser, _set_factories from _elementtree import * + from _elementtree import _set_factories except ImportError: pass +else: + _set_factories(Comment, ProcessingInstruction) diff --git a/Misc/NEWS.d/next/Library/2019-04-20-09-50-32.bpo-36673.XF4Egb.rst b/Misc/NEWS.d/next/Library/2019-04-20-09-50-32.bpo-36673.XF4Egb.rst new file mode 100644 index 00000000000..76bf914e22b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2019-04-20-09-50-32.bpo-36673.XF4Egb.rst @@ -0,0 +1,3 @@ +The TreeBuilder and XMLPullParser in xml.etree.ElementTree gained support +for parsing comments and processing instructions. +Patch by Stefan Behnel. diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index 1e58cd05b51..5481c616787 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -92,6 +92,8 @@ typedef struct { PyObject *parseerror_obj; PyObject *deepcopy_obj; PyObject *elementpath_obj; + PyObject *comment_factory; + PyObject *pi_factory; } elementtreestate; static struct PyModuleDef elementtreemodule; @@ -114,6 +116,8 @@ elementtree_clear(PyObject *m) Py_CLEAR(st->parseerror_obj); Py_CLEAR(st->deepcopy_obj); Py_CLEAR(st->elementpath_obj); + Py_CLEAR(st->comment_factory); + Py_CLEAR(st->pi_factory); return 0; } @@ -124,6 +128,8 @@ elementtree_traverse(PyObject *m, visitproc visit, void *arg) Py_VISIT(st->parseerror_obj); Py_VISIT(st->deepcopy_obj); Py_VISIT(st->elementpath_obj); + Py_VISIT(st->comment_factory); + Py_VISIT(st->pi_factory); return 0; } @@ -2385,6 +2391,8 @@ typedef struct { Py_ssize_t index; /* current stack size (0 means empty) */ PyObject *element_factory; + PyObject *comment_factory; + PyObject *pi_factory; /* element tracing */ PyObject *events_append; /* the append method of the list of events, or NULL */ @@ -2392,6 +2400,11 @@ typedef struct { PyObject *end_event_obj; PyObject *start_ns_event_obj; PyObject *end_ns_event_obj; + PyObject *comment_event_obj; + PyObject *pi_event_obj; + + char insert_comments; + char insert_pis; } TreeBuilderObject; #define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type) @@ -2413,6 +2426,8 @@ treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) t->data = NULL; t->element_factory = NULL; + t->comment_factory = NULL; + t->pi_factory = NULL; t->stack = PyList_New(20); if (!t->stack) { Py_DECREF(t->this); @@ -2425,6 +2440,8 @@ treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) t->events_append = NULL; t->start_event_obj = t->end_event_obj = NULL; t->start_ns_event_obj = t->end_ns_event_obj = NULL; + t->comment_event_obj = t->pi_event_obj = NULL; + t->insert_comments = t->insert_pis = 0; } return (PyObject *)t; } @@ -2433,17 +2450,53 @@ treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) _elementtree.TreeBuilder.__init__ element_factory: object = NULL + * + comment_factory: object = NULL + pi_factory: object = NULL + insert_comments: bool = False + insert_pis: bool = False [clinic start generated code]*/ static int _elementtree_TreeBuilder___init___impl(TreeBuilderObject *self, - PyObject *element_factory) -/*[clinic end generated code: output=91cfa7558970ee96 input=1b424eeefc35249c]*/ + PyObject *element_factory, + PyObject *comment_factory, + PyObject *pi_factory, + int insert_comments, int insert_pis) +/*[clinic end generated code: output=8571d4dcadfdf952 input=1f967b5c245e0a71]*/ { - if (element_factory) { + if (element_factory && element_factory != Py_None) { Py_INCREF(element_factory); Py_XSETREF(self->element_factory, element_factory); + } else { + Py_CLEAR(self->element_factory); + } + + if (!comment_factory || comment_factory == Py_None) { + elementtreestate *st = ET_STATE_GLOBAL; + comment_factory = st->comment_factory; + } + if (comment_factory) { + Py_INCREF(comment_factory); + Py_XSETREF(self->comment_factory, comment_factory); + self->insert_comments = insert_comments; + } else { + Py_CLEAR(self->comment_factory); + self->insert_comments = 0; + } + + if (!pi_factory || pi_factory == Py_None) { + elementtreestate *st = ET_STATE_GLOBAL; + pi_factory = st->pi_factory; + } + if (pi_factory) { + Py_INCREF(pi_factory); + Py_XSETREF(self->pi_factory, pi_factory); + self->insert_pis = insert_pis; + } else { + Py_CLEAR(self->pi_factory); + self->insert_pis = 0; } return 0; @@ -2452,6 +2505,8 @@ _elementtree_TreeBuilder___init___impl(TreeBuilderObject *self, static int treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg) { + Py_VISIT(self->pi_event_obj); + Py_VISIT(self->comment_event_obj); Py_VISIT(self->end_ns_event_obj); Py_VISIT(self->start_ns_event_obj); Py_VISIT(self->end_event_obj); @@ -2462,6 +2517,8 @@ treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg) Py_VISIT(self->last); Py_VISIT(self->data); Py_VISIT(self->stack); + Py_VISIT(self->pi_factory); + Py_VISIT(self->comment_factory); Py_VISIT(self->element_factory); return 0; } @@ -2469,6 +2526,8 @@ treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg) static int treebuilder_gc_clear(TreeBuilderObject *self) { + Py_CLEAR(self->pi_event_obj); + Py_CLEAR(self->comment_event_obj); Py_CLEAR(self->end_ns_event_obj); Py_CLEAR(self->start_ns_event_obj); Py_CLEAR(self->end_event_obj); @@ -2478,6 +2537,8 @@ treebuilder_gc_clear(TreeBuilderObject *self) Py_CLEAR(self->data); Py_CLEAR(self->last); Py_CLEAR(self->this); + Py_CLEAR(self->pi_factory); + Py_CLEAR(self->comment_factory); Py_CLEAR(self->element_factory); Py_CLEAR(self->root); return 0; @@ -2494,6 +2555,57 @@ treebuilder_dealloc(TreeBuilderObject *self) /* -------------------------------------------------------------------- */ /* helpers for handling of arbitrary element-like objects */ +/*[clinic input] +_elementtree._set_factories + + comment_factory: object + pi_factory: object + / + +Change the factories used to create comments and processing instructions. + +For internal use only. +[clinic start generated code]*/ + +static PyObject * +_elementtree__set_factories_impl(PyObject *module, PyObject *comment_factory, + PyObject *pi_factory) +/*[clinic end generated code: output=813b408adee26535 input=99d17627aea7fb3b]*/ +{ + elementtreestate *st = ET_STATE_GLOBAL; + PyObject *old; + + if (!PyCallable_Check(comment_factory) && comment_factory != Py_None) { + PyErr_Format(PyExc_TypeError, "Comment factory must be callable, not %.100s", + Py_TYPE(comment_factory)->tp_name); + return NULL; + } + if (!PyCallable_Check(pi_factory) && pi_factory != Py_None) { + PyErr_Format(PyExc_TypeError, "PI factory must be callable, not %.100s", + Py_TYPE(pi_factory)->tp_name); + return NULL; + } + + old = PyTuple_Pack(2, + st->comment_factory ? st->comment_factory : Py_None, + st->pi_factory ? st->pi_factory : Py_None); + + if (comment_factory == Py_None) { + Py_CLEAR(st->comment_factory); + } else { + Py_INCREF(comment_factory); + Py_XSETREF(st->comment_factory, comment_factory); + } + if (pi_factory == Py_None) { + Py_CLEAR(st->pi_factory); + } else { + Py_INCREF(pi_factory); + Py_XSETREF(st->pi_factory, pi_factory); + } + + return old; +} + static int treebuilder_set_element_text_or_tail(PyObject *element, PyObject **data, PyObject **dest, _Py_Identifier *name) @@ -2569,7 +2681,7 @@ treebuilder_append_event(TreeBuilderObject *self, PyObject *action, PyObject *event = PyTuple_Pack(2, action, node); if (event == NULL) return -1; - res = PyObject_CallFunctionObjArgs(self->events_append, event, NULL); + res = _PyObject_FastCall(self->events_append, &event, 1); Py_DECREF(event); if (res == NULL) return -1; @@ -2593,7 +2705,7 @@ treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag, return NULL; } - if (!self->element_factory || self->element_factory == Py_None) { + if (!self->element_factory) { node = create_new_element(tag, attrib); } else if (attrib == Py_None) { attrib = PyDict_New(); @@ -2721,6 +2833,84 @@ treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag) return (PyObject*) self->last; } +LOCAL(PyObject*) +treebuilder_handle_comment(TreeBuilderObject* self, PyObject* text) +{ + PyObject* comment = NULL; + PyObject* this; + + if (treebuilder_flush_data(self) < 0) { + return NULL; + } + + if (self->comment_factory) { + comment = _PyObject_FastCall(self->comment_factory, &text, 1); + if (!comment) + return NULL; + + this = self->this; + if (self->insert_comments && this != Py_None) { + if (treebuilder_add_subelement(this, comment) < 0) + goto error; + } + } else { + Py_INCREF(text); + comment = text; + } + + if (self->events_append && self->comment_event_obj) { + if (treebuilder_append_event(self, self->comment_event_obj, comment) < 0) + goto error; + } + + return comment; + + error: + Py_DECREF(comment); + return NULL; +} + +LOCAL(PyObject*) +treebuilder_handle_pi(TreeBuilderObject* self, PyObject* target, PyObject* text) +{ + PyObject* pi = NULL; + PyObject* this; + PyObject* stack[2] = {target, text}; + + if (treebuilder_flush_data(self) < 0) { + return NULL; + } + + if (self->pi_factory) { + pi = _PyObject_FastCall(self->pi_factory, stack, 2); + if (!pi) { + return NULL; + } + + this = self->this; + if (self->insert_pis && this != Py_None) { + if (treebuilder_add_subelement(this, pi) < 0) + goto error; + } + } else { + pi = PyTuple_Pack(2, target, text); + if (!pi) { + return NULL; + } + } + + if (self->events_append && self->pi_event_obj) { + if (treebuilder_append_event(self, self->pi_event_obj, pi) < 0) + goto error; + } + + return pi; + + error: + Py_DECREF(pi); + return NULL; +} + /* -------------------------------------------------------------------- */ /* methods (in alphabetical order) */ @@ -2754,6 +2944,38 @@ _elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag) return treebuilder_handle_end(self, tag); } +/*[clinic input] +_elementtree.TreeBuilder.comment + + text: object + / + +[clinic start generated code]*/ + +static PyObject * +_elementtree_TreeBuilder_comment(TreeBuilderObject *self, PyObject *text) +/*[clinic end generated code: output=22835be41deeaa27 input=47e7ebc48ed01dfa]*/ +{ + return treebuilder_handle_comment(self, text); +} + +/*[clinic input] +_elementtree.TreeBuilder.pi + + target: object + text: object = None + / + +[clinic start generated code]*/ + +static PyObject * +_elementtree_TreeBuilder_pi_impl(TreeBuilderObject *self, PyObject *target, + PyObject *text) +/*[clinic end generated code: output=21eb95ec9d04d1d9 input=349342bd79c35570]*/ +{ + return treebuilder_handle_pi(self, target, text); +} + LOCAL(PyObject*) treebuilder_done(TreeBuilderObject* self) { @@ -2925,7 +3147,7 @@ expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column, if (errmsg == NULL) return; - error = PyObject_CallFunctionObjArgs(st->parseerror_obj, errmsg, NULL); + error = _PyObject_FastCall(st->parseerror_obj, &errmsg, 1); Py_DECREF(errmsg); if (!error) return; @@ -2988,7 +3210,7 @@ expat_default_handler(XMLParserObject* self, const XML_Char* data_in, (TreeBuilderObject*) self->target, value ); else if (self->handle_data) - res = PyObject_CallFunctionObjArgs(self->handle_data, value, NULL); + res = _PyObject_FastCall(self->handle_data, &value, 1); else res = NULL; Py_XDECREF(res); @@ -3099,7 +3321,7 @@ expat_data_handler(XMLParserObject* self, const XML_Char* data_in, /* shortcut */ res = treebuilder_handle_data((TreeBuilderObject*) self->target, data); else if (self->handle_data) - res = PyObject_CallFunctionObjArgs(self->handle_data, data, NULL); + res = _PyObject_FastCall(self->handle_data, &data, 1); else res = NULL; @@ -3126,7 +3348,7 @@ expat_end_handler(XMLParserObject* self, const XML_Char* tag_in) else if (self->handle_end) { tag = makeuniversal(self, tag_in); if (tag) { - res = PyObject_CallFunctionObjArgs(self->handle_end, tag, NULL); + res = _PyObject_FastCall(self->handle_end, &tag, 1); Py_DECREF(tag); } } @@ -3176,21 +3398,31 @@ expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in) static void expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in) { - PyObject* comment; - PyObject* res; + PyObject* comment = NULL; + PyObject* res = NULL; if (PyErr_Occurred()) return; - if (self->handle_comment) { + if (TreeBuilder_CheckExact(self->target)) { + /* shortcut */ + TreeBuilderObject *target = (TreeBuilderObject*) self->target; + comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict"); - if (comment) { - res = PyObject_CallFunctionObjArgs(self->handle_comment, - comment, NULL); - Py_XDECREF(res); - Py_DECREF(comment); - } + if (!comment) + return; /* parser will look for errors */ + + res = treebuilder_handle_comment(target, comment); + } else if (self->handle_comment) { + comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict"); + if (!comment) + return; + + res = _PyObject_FastCall(self->handle_comment, &comment, 1); } + + Py_XDECREF(res); + Py_DECREF(comment); } static void @@ -3258,27 +3490,51 @@ static void expat_pi_handler(XMLParserObject* self, const XML_Char* target_in, const XML_Char* data_in) { - PyObject* target; + PyObject* pi_target = NULL; PyObject* data; PyObject* res; + PyObject* stack[2]; if (PyErr_Occurred()) return; - if (self->handle_pi) { - target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict"); - data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict"); - if (target && data) { - res = PyObject_CallFunctionObjArgs(self->handle_pi, - target, data, NULL); + if (TreeBuilder_CheckExact(self->target)) { + /* shortcut */ + TreeBuilderObject *target = (TreeBuilderObject*) self->target; + + if (target->events_append && target->pi_event_obj) { + pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict"); + if (!pi_target) + goto error; + data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict"); + if (!data) + goto error; + res = treebuilder_handle_pi(target, pi_target, data); Py_XDECREF(res); Py_DECREF(data); - Py_DECREF(target); - } else { - Py_XDECREF(data); - Py_XDECREF(target); + Py_DECREF(pi_target); } + } else if (self->handle_pi) { + pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict"); + if (!pi_target) + goto error; + data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict"); + if (!data) + goto error; + + stack[0] = pi_target; + stack[1] = data; + res = _PyObject_FastCall(self->handle_pi, stack, 2); + Py_XDECREF(res); + Py_DECREF(data); + Py_DECREF(pi_target); } + + return; + + error: + Py_XDECREF(pi_target); + return; } /* -------------------------------------------------------------------- */ @@ -3695,6 +3951,8 @@ _elementtree_XMLParser__setevents_impl(XMLParserObject *self, Py_CLEAR(target->end_event_obj); Py_CLEAR(target->start_ns_event_obj); Py_CLEAR(target->end_ns_event_obj); + Py_CLEAR(target->comment_event_obj); + Py_CLEAR(target->pi_event_obj); if (events_to_report == Py_None) { /* default is "end" only */ @@ -3740,6 +3998,18 @@ _elementtree_XMLParser__setevents_impl(XMLParserObject *self, (XML_StartNamespaceDeclHandler) expat_start_ns_handler, (XML_EndNamespaceDeclHandler) expat_end_ns_handler ); + } else if (strcmp(event_name, "comment") == 0) { + Py_XSETREF(target->comment_event_obj, event_name_obj); + EXPAT(SetCommentHandler)( + self->parser, + (XML_CommentHandler) expat_comment_handler + ); + } else if (strcmp(event_name, "pi") == 0) { + Py_XSETREF(target->pi_event_obj, event_name_obj); + EXPAT(SetProcessingInstructionHandler)( + self->parser, + (XML_ProcessingInstructionHandler) expat_pi_handler + ); } else { Py_DECREF(event_name_obj); Py_DECREF(events_seq); @@ -3882,6 +4152,8 @@ static PyMethodDef treebuilder_methods[] = { _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF _ELEMENTTREE_TREEBUILDER_START_METHODDEF _ELEMENTTREE_TREEBUILDER_END_METHODDEF + _ELEMENTTREE_TREEBUILDER_COMMENT_METHODDEF + _ELEMENTTREE_TREEBUILDER_PI_METHODDEF _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF {NULL, NULL} }; @@ -3983,6 +4255,7 @@ static PyTypeObject XMLParser_Type = { static PyMethodDef _functions[] = { {"SubElement", (PyCFunction)(void(*)(void)) subelement, METH_VARARGS | METH_KEYWORDS}, + _ELEMENTTREE__SET_FACTORIES_METHODDEF {NULL, NULL} }; diff --git a/Modules/clinic/_elementtree.c.h b/Modules/clinic/_elementtree.c.h index d239c802583..0f55480140b 100644 --- a/Modules/clinic/_elementtree.c.h +++ b/Modules/clinic/_elementtree.c.h @@ -635,19 +635,26 @@ exit: static int _elementtree_TreeBuilder___init___impl(TreeBuilderObject *self, - PyObject *element_factory); + PyObject *element_factory, + PyObject *comment_factory, + PyObject *pi_factory, + int insert_comments, int insert_pis); static int _elementtree_TreeBuilder___init__(PyObject *self, PyObject *args, PyObject *kwargs) { int return_value = -1; - static const char * const _keywords[] = {"element_factory", NULL}; + static const char * const _keywords[] = {"element_factory", "comment_factory", "pi_factory", "insert_comments", "insert_pis", NULL}; static _PyArg_Parser _parser = {NULL, _keywords, "TreeBuilder", 0}; - PyObject *argsbuf[1]; + PyObject *argsbuf[5]; PyObject * const *fastargs; Py_ssize_t nargs = PyTuple_GET_SIZE(args); Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 0; PyObject *element_factory = NULL; + PyObject *comment_factory = NULL; + PyObject *pi_factory = NULL; + int insert_comments = 0; + int insert_pis = 0; fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, 0, 1, 0, argsbuf); if (!fastargs) { @@ -656,9 +663,76 @@ _elementtree_TreeBuilder___init__(PyObject *self, PyObject *args, PyObject *kwar if (!noptargs) { goto skip_optional_pos; } - element_factory = fastargs[0]; + if (fastargs[0]) { + element_factory = fastargs[0]; + if (!--noptargs) { + goto skip_optional_pos; + } + } skip_optional_pos: - return_value = _elementtree_TreeBuilder___init___impl((TreeBuilderObject *)self, element_factory); + if (!noptargs) { + goto skip_optional_kwonly; + } + if (fastargs[1]) { + comment_factory = fastargs[1]; + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + if (fastargs[2]) { + pi_factory = fastargs[2]; + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + if (fastargs[3]) { + insert_comments = PyObject_IsTrue(fastargs[3]); + if (insert_comments < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + insert_pis = PyObject_IsTrue(fastargs[4]); + if (insert_pis < 0) { + goto exit; + } +skip_optional_kwonly: + return_value = _elementtree_TreeBuilder___init___impl((TreeBuilderObject *)self, element_factory, comment_factory, pi_factory, insert_comments, insert_pis); + +exit: + return return_value; +} + +PyDoc_STRVAR(_elementtree__set_factories__doc__, +"_set_factories($module, comment_factory, pi_factory, /)\n" +"--\n" +"\n" +"Change the factories used to create comments and processing instructions.\n" +"\n" +"For internal use only."); + +#define _ELEMENTTREE__SET_FACTORIES_METHODDEF \ + {"_set_factories", (PyCFunction)(void(*)(void))_elementtree__set_factories, METH_FASTCALL, _elementtree__set_factories__doc__}, + +static PyObject * +_elementtree__set_factories_impl(PyObject *module, PyObject *comment_factory, + PyObject *pi_factory); + +static PyObject * +_elementtree__set_factories(PyObject *module, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + PyObject *comment_factory; + PyObject *pi_factory; + + if (!_PyArg_CheckPositional("_set_factories", nargs, 2, 2)) { + goto exit; + } + comment_factory = args[0]; + pi_factory = args[1]; + return_value = _elementtree__set_factories_impl(module, comment_factory, pi_factory); exit: return return_value; @@ -680,6 +754,48 @@ PyDoc_STRVAR(_elementtree_TreeBuilder_end__doc__, #define _ELEMENTTREE_TREEBUILDER_END_METHODDEF \ {"end", (PyCFunction)_elementtree_TreeBuilder_end, METH_O, _elementtree_TreeBuilder_end__doc__}, +PyDoc_STRVAR(_elementtree_TreeBuilder_comment__doc__, +"comment($self, text, /)\n" +"--\n" +"\n"); + +#define _ELEMENTTREE_TREEBUILDER_COMMENT_METHODDEF \ + {"comment", (PyCFunction)_elementtree_TreeBuilder_comment, METH_O, _elementtree_TreeBuilder_comment__doc__}, + +PyDoc_STRVAR(_elementtree_TreeBuilder_pi__doc__, +"pi($self, target, text=None, /)\n" +"--\n" +"\n"); + +#define _ELEMENTTREE_TREEBUILDER_PI_METHODDEF \ + {"pi", (PyCFunction)(void(*)(void))_elementtree_TreeBuilder_pi, METH_FASTCALL, _elementtree_TreeBuilder_pi__doc__}, + +static PyObject * +_elementtree_TreeBuilder_pi_impl(TreeBuilderObject *self, PyObject *target, + PyObject *text); + +static PyObject * +_elementtree_TreeBuilder_pi(TreeBuilderObject *self, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + PyObject *target; + PyObject *text = Py_None; + + if (!_PyArg_CheckPositional("pi", nargs, 1, 2)) { + goto exit; + } + target = args[0]; + if (nargs < 2) { + goto skip_optional; + } + text = args[1]; +skip_optional: + return_value = _elementtree_TreeBuilder_pi_impl(self, target, text); + +exit: + return return_value; +} + PyDoc_STRVAR(_elementtree_TreeBuilder_close__doc__, "close($self, /)\n" "--\n" @@ -853,4 +969,4 @@ skip_optional: exit: return return_value; } -/*[clinic end generated code: output=440b5d90a4b86590 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=386a68425d072b5c input=a9049054013a1b77]*/