diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 029320153e9..39e608b8b1d 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -1668,6 +1668,57 @@ class BugsTest(unittest.TestCase): ET.register_namespace('test10777', 'http://myuri/') ET.register_namespace('test10777', 'http://myuri/') + def test_lost_text(self): + # Issue #25902: Borrowed text can disappear + class Text: + def __bool__(self): + e.text = 'changed' + return True + + e = ET.Element('tag') + e.text = Text() + i = e.itertext() + t = next(i) + self.assertIsInstance(t, Text) + self.assertIsInstance(e.text, str) + self.assertEqual(e.text, 'changed') + + def test_lost_tail(self): + # Issue #25902: Borrowed tail can disappear + class Text: + def __bool__(self): + e[0].tail = 'changed' + return True + + e = ET.Element('root') + e.append(ET.Element('tag')) + e[0].tail = Text() + i = e.itertext() + t = next(i) + self.assertIsInstance(t, Text) + self.assertIsInstance(e[0].tail, str) + self.assertEqual(e[0].tail, 'changed') + + def test_lost_elem(self): + # Issue #25902: Borrowed element can disappear + class Tag: + def __eq__(self, other): + e[0] = ET.Element('changed') + next(i) + return True + + e = ET.Element('root') + e.append(ET.Element(Tag())) + e.append(ET.Element('tag')) + i = e.iter('tag') + try: + t = next(i) + except ValueError: + self.skipTest('generators are not reentrant') + self.assertIsInstance(t.tag, Tag) + self.assertIsInstance(e[0].tag, str) + self.assertEqual(e[0].tag, 'changed') + # -------------------------------------------------------------------- diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index a58ef31812e..b92fb52124a 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -429,12 +429,14 @@ class Element: tag = self.tag if not isinstance(tag, str) and tag is not None: return - if self.text: - yield self.text + t = self.text + if t: + yield t for e in self: yield from e.itertext() - if e.tail: - yield e.tail + t = e.tail + if t: + yield t def SubElement(parent, tag, attrib={}, **extra): diff --git a/Misc/NEWS b/Misc/NEWS index 01cf6729b55..ad37eb78741 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -115,6 +115,8 @@ Core and Builtins Library ------- +- Issue #25902: Fixed various refcount issues in ElementTree iteration. + - Issue #22227: The TarFile iterator is reimplemented using generator. This implementation is simpler that using class. diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index 57c7af23893..949e3a05bb1 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -2070,6 +2070,7 @@ elementiter_next(ElementIterObject *it) ElementObject *cur_parent; Py_ssize_t child_index; int rc; + ElementObject *elem; while (1) { /* Handle the case reached in the beginning and end of iteration, where @@ -2083,38 +2084,47 @@ elementiter_next(ElementIterObject *it) PyErr_SetNone(PyExc_StopIteration); return NULL; } else { + elem = it->root_element; it->parent_stack = parent_stack_push_new(it->parent_stack, - it->root_element); + elem); if (!it->parent_stack) { PyErr_NoMemory(); return NULL; } + Py_INCREF(elem); it->root_done = 1; rc = (it->sought_tag == Py_None); if (!rc) { - rc = PyObject_RichCompareBool(it->root_element->tag, + rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ); - if (rc < 0) + if (rc < 0) { + Py_DECREF(elem); return NULL; + } } if (rc) { if (it->gettext) { - PyObject *text = element_get_text(it->root_element); - if (!text) + PyObject *text = element_get_text(elem); + if (!text) { + Py_DECREF(elem); return NULL; + } + Py_INCREF(text); + Py_DECREF(elem); rc = PyObject_IsTrue(text); + if (rc > 0) + return text; + Py_DECREF(text); if (rc < 0) return NULL; - if (rc) { - Py_INCREF(text); - return text; - } } else { - Py_INCREF(it->root_element); - return (PyObject *)it->root_element; + return (PyObject *)elem; } } + else { + Py_DECREF(elem); + } } } @@ -2124,54 +2134,68 @@ elementiter_next(ElementIterObject *it) cur_parent = it->parent_stack->parent; child_index = it->parent_stack->child_index; if (cur_parent->extra && child_index < cur_parent->extra->length) { - ElementObject *child = (ElementObject *) - cur_parent->extra->children[child_index]; + elem = (ElementObject *)cur_parent->extra->children[child_index]; it->parent_stack->child_index++; it->parent_stack = parent_stack_push_new(it->parent_stack, - child); + elem); if (!it->parent_stack) { PyErr_NoMemory(); return NULL; } + Py_INCREF(elem); if (it->gettext) { - PyObject *text = element_get_text(child); - if (!text) + PyObject *text = element_get_text(elem); + if (!text) { + Py_DECREF(elem); return NULL; + } + Py_INCREF(text); + Py_DECREF(elem); rc = PyObject_IsTrue(text); + if (rc > 0) + return text; + Py_DECREF(text); if (rc < 0) return NULL; - if (rc) { - Py_INCREF(text); - return text; - } } else { rc = (it->sought_tag == Py_None); if (!rc) { - rc = PyObject_RichCompareBool(child->tag, + rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ); - if (rc < 0) + if (rc < 0) { + Py_DECREF(elem); return NULL; + } } if (rc) { - Py_INCREF(child); - return (PyObject *)child; + return (PyObject *)elem; } + Py_DECREF(elem); } } else { PyObject *tail; - ParentLocator *next = it->parent_stack->next; + ParentLocator *next; if (it->gettext) { + Py_INCREF(cur_parent); tail = element_get_tail(cur_parent); - if (!tail) + if (!tail) { + Py_DECREF(cur_parent); return NULL; + } + Py_INCREF(tail); + Py_DECREF(cur_parent); } - else + else { tail = Py_None; - Py_XDECREF(it->parent_stack->parent); + Py_INCREF(tail); + } + next = it->parent_stack->next; + cur_parent = it->parent_stack->parent; PyObject_Free(it->parent_stack); it->parent_stack = next; + Py_XDECREF(cur_parent); /* Note that extra condition on it->parent_stack->parent here; * this is because itertext() is supposed to only return *inner* @@ -2179,12 +2203,14 @@ elementiter_next(ElementIterObject *it) */ if (it->parent_stack->parent) { rc = PyObject_IsTrue(tail); + if (rc > 0) + return tail; + Py_DECREF(tail); if (rc < 0) return NULL; - if (rc) { - Py_INCREF(tail); - return tail; - } + } + else { + Py_DECREF(tail); } } }