diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index c0144d1cb8f..dbdad23a742 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -1879,6 +1879,118 @@ class BadElementTest(ElementTestCase, unittest.TestCase): with self.assertRaises(RuntimeError): repr(e) # Should not crash + def test_element_get_text(self): + # Issue #27863 + class X(str): + def __del__(self): + try: + elem.text + except NameError: + pass + + b = ET.TreeBuilder() + b.start('tag', {}) + b.data('ABCD') + b.data(X('EFGH')) + b.data('IJKL') + b.end('tag') + + elem = b.close() + self.assertEqual(elem.text, 'ABCDEFGHIJKL') + + def test_element_get_tail(self): + # Issue #27863 + class X(str): + def __del__(self): + try: + elem[0].tail + except NameError: + pass + + b = ET.TreeBuilder() + b.start('root', {}) + b.start('tag', {}) + b.end('tag') + b.data('ABCD') + b.data(X('EFGH')) + b.data('IJKL') + b.end('root') + + elem = b.close() + self.assertEqual(elem[0].tail, 'ABCDEFGHIJKL') + + def test_element_iter(self): + # Issue #27863 + state = { + 'tag': 'tag', + '_children': [None], # non-Element + 'attrib': 'attr', + 'tail': 'tail', + 'text': 'text', + } + + e = ET.Element('tag') + try: + e.__setstate__(state) + except AttributeError: + e.__dict__ = state + + it = e.iter() + self.assertIs(next(it), e) + self.assertRaises(AttributeError, next, it) + + def test_subscr(self): + # Issue #27863 + class X: + def __index__(self): + del e[:] + return 1 + + e = ET.Element('elem') + e.append(ET.Element('child')) + e[:X()] # shouldn't crash + + e.append(ET.Element('child')) + e[0:10:X()] # shouldn't crash + + def test_ass_subscr(self): + # Issue #27863 + class X: + def __index__(self): + e[:] = [] + return 1 + + e = ET.Element('elem') + for _ in range(10): + e.insert(0, ET.Element('child')) + + e[0:10:X()] = [] # shouldn't crash + + def test_treebuilder_start(self): + # Issue #27863 + def element_factory(x, y): + return [] + b = ET.TreeBuilder(element_factory=element_factory) + + b.start('tag', {}) + b.data('ABCD') + self.assertRaises(AttributeError, b.start, 'tag2', {}) + del b + gc_collect() + + def test_treebuilder_end(self): + # Issue #27863 + def element_factory(x, y): + return [] + b = ET.TreeBuilder(element_factory=element_factory) + + b.start('tag', {}) + b.data('ABCD') + self.assertRaises(AttributeError, b.end, 'tag') + del b + gc_collect() + + class MutatingElementPath(str): def __new__(cls, elem, *args): self = str.__new__(cls, *args) diff --git a/Misc/NEWS b/Misc/NEWS index 2bea5ec1e3a..5fff581cfce 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -298,6 +298,9 @@ Extension Modules Library ------- +- bpo-27863: Fixed multiple crashes in ElementTree caused by race conditions + and wrong types. + - bpo-25996: Added support of file descriptors in os.scandir() on Unix. os.fwalk() is sped up by 2 times by using os.scandir(). diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index 2d623dc2620..36aa391609f 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -131,7 +131,7 @@ elementtree_free(void *m) LOCAL(PyObject*) list_join(PyObject* list) { - /* join list elements (destroying the list in the process) */ + /* join list elements */ PyObject* joiner; PyObject* result; @@ -140,8 +140,6 @@ list_join(PyObject* list) return NULL; result = PyUnicode_Join(joiner, list); Py_DECREF(joiner); - if (result) - Py_DECREF(list); return result; } @@ -508,15 +506,17 @@ element_get_text(ElementObject* self) { /* return borrowed reference to text attribute */ - PyObject* res = self->text; + PyObject *res = self->text; if (JOIN_GET(res)) { res = JOIN_OBJ(res); if (PyList_CheckExact(res)) { - res = list_join(res); - if (!res) + PyObject *tmp = list_join(res); + if (!tmp) return NULL; - self->text = res; + self->text = tmp; + Py_DECREF(res); + res = tmp; } } @@ -528,15 +528,17 @@ element_get_tail(ElementObject* self) { /* return borrowed reference to text attribute */ - PyObject* res = self->tail; + PyObject *res = self->tail; if (JOIN_GET(res)) { res = JOIN_OBJ(res); if (PyList_CheckExact(res)) { - res = list_join(res); - if (!res) + PyObject *tmp = list_join(res); + if (!tmp) return NULL; - self->tail = res; + self->tail = tmp; + Py_DECREF(res); + res = tmp; } } @@ -2148,6 +2150,12 @@ elementiter_next(ElementIterObject *it) continue; } + if (!PyObject_TypeCheck(extra->children[child_index], &Element_Type)) { + PyErr_Format(PyExc_AttributeError, + "'%.100s' object has no attribute 'iter'", + Py_TYPE(extra->children[child_index])->tp_name); + return NULL; + } elem = (ElementObject *)extra->children[child_index]; item->child_index++; Py_INCREF(elem); @@ -2397,40 +2405,51 @@ treebuilder_dealloc(TreeBuilderObject *self) /* helpers for handling of arbitrary element-like objects */ static int -treebuilder_set_element_text_or_tail(PyObject *element, PyObject *data, +treebuilder_set_element_text_or_tail(PyObject *element, PyObject **data, PyObject **dest, _Py_Identifier *name) { if (Element_CheckExact(element)) { - Py_DECREF(JOIN_OBJ(*dest)); - *dest = JOIN_SET(data, PyList_CheckExact(data)); + PyObject *tmp = JOIN_OBJ(*dest); + *dest = JOIN_SET(*data, PyList_CheckExact(*data)); + *data = NULL; + Py_DECREF(tmp); return 0; } else { - PyObject *joined = list_join(data); + PyObject *joined = list_join(*data); int r; if (joined == NULL) return -1; r = _PyObject_SetAttrId(element, name, joined); Py_DECREF(joined); - return r; + if (r < 0) + return -1; + Py_CLEAR(*data); + return 0; } } -/* These two functions steal a reference to data */ -static int -treebuilder_set_element_text(PyObject *element, PyObject *data) +LOCAL(int) +treebuilder_flush_data(TreeBuilderObject* self) { - _Py_IDENTIFIER(text); - return treebuilder_set_element_text_or_tail( - element, data, &((ElementObject *) element)->text, &PyId_text); -} + PyObject *element = self->last; -static int -treebuilder_set_element_tail(PyObject *element, PyObject *data) -{ - _Py_IDENTIFIER(tail); - return treebuilder_set_element_text_or_tail( - element, data, &((ElementObject *) element)->tail, &PyId_tail); + if (!self->data) { + return 0; + } + + if (self->this == element) { + _Py_IDENTIFIER(text); + return treebuilder_set_element_text_or_tail( + element, &self->data, + &((ElementObject *) element)->text, &PyId_text); + } + else { + _Py_IDENTIFIER(tail); + return treebuilder_set_element_text_or_tail( + element, &self->data, + &((ElementObject *) element)->tail, &PyId_tail); + } } static int @@ -2480,16 +2499,8 @@ treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag, PyObject* this; elementtreestate *st = ET_STATE_GLOBAL; - if (self->data) { - if (self->this == self->last) { - if (treebuilder_set_element_text(self->last, self->data)) - return NULL; - } - else { - if (treebuilder_set_element_tail(self->last, self->data)) - return NULL; - } - self->data = NULL; + if (treebuilder_flush_data(self) < 0) { + return NULL; } if (!self->element_factory || self->element_factory == Py_None) { @@ -2594,15 +2605,8 @@ treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag) { PyObject* item; - if (self->data) { - if (self->this == self->last) { - if (treebuilder_set_element_text(self->last, self->data)) - return NULL; - } else { - if (treebuilder_set_element_tail(self->last, self->data)) - return NULL; - } - self->data = NULL; + if (treebuilder_flush_data(self) < 0) { + return NULL; } if (self->index == 0) {