From 66c08d90f6d7e5889a19891a0ef994354b14f172 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 21 Dec 2015 11:09:48 +0200 Subject: [PATCH] Issue #25902: Fixed various refcount issues in ElementTree iteration. --- Lib/test/test_xml_etree.py | 51 ++++++++++++++++++++ Lib/xml/etree/ElementTree.py | 10 ++-- Misc/NEWS | 2 + Modules/_elementtree.c | 90 +++++++++++++++++++++++------------- 4 files changed, 117 insertions(+), 36 deletions(-) diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 57d8e4d2de2..358484dad78 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -1666,6 +1666,57 @@ class BugsTest(unittest.TestCase): ET.register_namespace('test10777', 'http://myuri/') ET.register_namespace('test10777', 'http://myuri/') + def test_lost_text(self): + # Issue #25902: Borrowed text can disappear + class Text: + def __bool__(self): + e.text = 'changed' + return True + + e = ET.Element('tag') + e.text = Text() + i = e.itertext() + t = next(i) + self.assertIsInstance(t, Text) + self.assertIsInstance(e.text, str) + self.assertEqual(e.text, 'changed') + + def test_lost_tail(self): + # Issue #25902: Borrowed tail can disappear + class Text: + def __bool__(self): + e[0].tail = 'changed' + return True + + e = ET.Element('root') + e.append(ET.Element('tag')) + e[0].tail = Text() + i = e.itertext() + t = next(i) + self.assertIsInstance(t, Text) + self.assertIsInstance(e[0].tail, str) + self.assertEqual(e[0].tail, 'changed') + + def test_lost_elem(self): + # Issue #25902: Borrowed element can disappear + class Tag: + def __eq__(self, other): + e[0] = ET.Element('changed') + next(i) + return True + + e = ET.Element('root') + e.append(ET.Element(Tag())) + e.append(ET.Element('tag')) + i = e.iter('tag') + try: + t = next(i) + except ValueError: + self.skipTest('generators are not reentrant') + self.assertIsInstance(t.tag, Tag) + self.assertIsInstance(e[0].tag, str) + self.assertEqual(e[0].tag, 'changed') + # -------------------------------------------------------------------- diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index 62b5d3aeec6..b4e110d5dea 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -428,12 +428,14 @@ class Element: tag = self.tag if not isinstance(tag, str) and tag is not None: return - if self.text: - yield self.text + t = self.text + if t: + yield t for e in self: yield from e.itertext() - if e.tail: - yield e.tail + t = e.tail + if t: + yield t def SubElement(parent, tag, attrib={}, **extra): diff --git a/Misc/NEWS b/Misc/NEWS index 6a496b8c6df..90adba8eb8e 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -28,6 +28,8 @@ Core and Builtins Library ------- +- Issue #25902: Fixed various refcount issues in ElementTree iteration. + - Issue #25717: Restore the previous behaviour of tolerating most fstat() errors when opening files. This was a regression in 3.5a1, and stopped anonymous temporary files from working in special cases. diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index c69998cd17c..8eb655c1f4d 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -2072,6 +2072,7 @@ elementiter_next(ElementIterObject *it) ElementObject *cur_parent; Py_ssize_t child_index; int rc; + ElementObject *elem; while (1) { /* Handle the case reached in the beginning and end of iteration, where @@ -2085,38 +2086,47 @@ elementiter_next(ElementIterObject *it) PyErr_SetNone(PyExc_StopIteration); return NULL; } else { + elem = it->root_element; it->parent_stack = parent_stack_push_new(it->parent_stack, - it->root_element); + elem); if (!it->parent_stack) { PyErr_NoMemory(); return NULL; } + Py_INCREF(elem); it->root_done = 1; rc = (it->sought_tag == Py_None); if (!rc) { - rc = PyObject_RichCompareBool(it->root_element->tag, + rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ); - if (rc < 0) + if (rc < 0) { + Py_DECREF(elem); return NULL; + } } if (rc) { if (it->gettext) { - PyObject *text = element_get_text(it->root_element); - if (!text) + PyObject *text = element_get_text(elem); + if (!text) { + Py_DECREF(elem); return NULL; + } + Py_INCREF(text); + Py_DECREF(elem); rc = PyObject_IsTrue(text); + if (rc > 0) + return text; + Py_DECREF(text); if (rc < 0) return NULL; - if (rc) { - Py_INCREF(text); - return text; - } } else { - Py_INCREF(it->root_element); - return (PyObject *)it->root_element; + return (PyObject *)elem; } } + else { + Py_DECREF(elem); + } } } @@ -2126,54 +2136,68 @@ elementiter_next(ElementIterObject *it) cur_parent = it->parent_stack->parent; child_index = it->parent_stack->child_index; if (cur_parent->extra && child_index < cur_parent->extra->length) { - ElementObject *child = (ElementObject *) - cur_parent->extra->children[child_index]; + elem = (ElementObject *)cur_parent->extra->children[child_index]; it->parent_stack->child_index++; it->parent_stack = parent_stack_push_new(it->parent_stack, - child); + elem); if (!it->parent_stack) { PyErr_NoMemory(); return NULL; } + Py_INCREF(elem); if (it->gettext) { - PyObject *text = element_get_text(child); - if (!text) + PyObject *text = element_get_text(elem); + if (!text) { + Py_DECREF(elem); return NULL; + } + Py_INCREF(text); + Py_DECREF(elem); rc = PyObject_IsTrue(text); + if (rc > 0) + return text; + Py_DECREF(text); if (rc < 0) return NULL; - if (rc) { - Py_INCREF(text); - return text; - } } else { rc = (it->sought_tag == Py_None); if (!rc) { - rc = PyObject_RichCompareBool(child->tag, + rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ); - if (rc < 0) + if (rc < 0) { + Py_DECREF(elem); return NULL; + } } if (rc) { - Py_INCREF(child); - return (PyObject *)child; + return (PyObject *)elem; } + Py_DECREF(elem); } } else { PyObject *tail; - ParentLocator *next = it->parent_stack->next; + ParentLocator *next; if (it->gettext) { + Py_INCREF(cur_parent); tail = element_get_tail(cur_parent); - if (!tail) + if (!tail) { + Py_DECREF(cur_parent); return NULL; + } + Py_INCREF(tail); + Py_DECREF(cur_parent); } - else + else { tail = Py_None; - Py_XDECREF(it->parent_stack->parent); + Py_INCREF(tail); + } + next = it->parent_stack->next; + cur_parent = it->parent_stack->parent; PyObject_Free(it->parent_stack); it->parent_stack = next; + Py_XDECREF(cur_parent); /* Note that extra condition on it->parent_stack->parent here; * this is because itertext() is supposed to only return *inner* @@ -2181,12 +2205,14 @@ elementiter_next(ElementIterObject *it) */ if (it->parent_stack->parent) { rc = PyObject_IsTrue(tail); + if (rc > 0) + return tail; + Py_DECREF(tail); if (rc < 0) return NULL; - if (rc) { - Py_INCREF(tail); - return tail; - } + } + else { + Py_DECREF(tail); } } }