diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 8cd01eba86b..c73e4bed1c6 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -1709,6 +1709,126 @@ class BasicElementTest(ElementTestCase, unittest.TestCase): self.assertEqual(e2[0].tag, 'dogs') +class BadElementTest(ElementTestCase, unittest.TestCase): + def test_extend_mutable_list(self): + class X: + @property + def __class__(self): + L[:] = [ET.Element('baz')] + return ET.Element + L = [X()] + e = ET.Element('foo') + try: + e.extend(L) + except TypeError: + pass + + class Y(X, ET.Element): + pass + L = [Y('x')] + e = ET.Element('foo') + e.extend(L) + + def test_extend_mutable_list2(self): + class X: + @property + def __class__(self): + del L[:] + return ET.Element + L = [X(), ET.Element('baz')] + e = ET.Element('foo') + try: + e.extend(L) + except TypeError: + pass + + class Y(X, ET.Element): + pass + L = [Y('bar'), ET.Element('baz')] + e = ET.Element('foo') + e.extend(L) + + def test_remove_with_mutating(self): + class X(ET.Element): + def __eq__(self, o): + del e[:] + return False + e = ET.Element('foo') + e.extend([X('bar')]) + self.assertRaises(ValueError, e.remove, ET.Element('baz')) + + e = ET.Element('foo') + e.extend([ET.Element('bar')]) + self.assertRaises(ValueError, e.remove, X('baz')) + + +class MutatingElementPath(str): + def __new__(cls, elem, *args): + self = str.__new__(cls, *args) + self.elem = elem + return self + def __eq__(self, o): + del self.elem[:] + return True +MutatingElementPath.__hash__ = str.__hash__ + +class BadElementPath(str): + def __eq__(self, o): + raise 1/0 +BadElementPath.__hash__ = str.__hash__ + +class BadElementPathTest(ElementTestCase, unittest.TestCase): + def setUp(self): + super().setUp() + from xml.etree import ElementPath + self.path_cache = ElementPath._cache + ElementPath._cache = {} + + def tearDown(self): + from xml.etree import ElementPath + ElementPath._cache = self.path_cache + super().tearDown() + + def test_find_with_mutating(self): + e = ET.Element('foo') + e.extend([ET.Element('bar')]) + e.find(MutatingElementPath(e, 'x')) + + def test_find_with_error(self): + e = ET.Element('foo') + e.extend([ET.Element('bar')]) + try: + e.find(BadElementPath('x')) + except ZeroDivisionError: + pass + + def test_findtext_with_mutating(self): + e = ET.Element('foo') + e.extend([ET.Element('bar')]) + e.findtext(MutatingElementPath(e, 'x')) + + def test_findtext_with_error(self): + e = ET.Element('foo') + e.extend([ET.Element('bar')]) + try: + e.findtext(BadElementPath('x')) + except ZeroDivisionError: + pass + + def test_findall_with_mutating(self): + e = ET.Element('foo') + e.extend([ET.Element('bar')]) + e.findall(MutatingElementPath(e, 'x')) + + def test_findall_with_error(self): + e = ET.Element('foo') + e.extend([ET.Element('bar')]) + try: + e.findall(BadElementPath('x')) + except ZeroDivisionError: + pass + + class ElementTreeTypeTest(unittest.TestCase): def test_istype(self): self.assertIsInstance(ET.ParseError, type) @@ -2556,6 +2676,8 @@ def test_main(module=None): ModuleTest, ElementSlicingTest, BasicElementTest, + BadElementTest, + BadElementPathTest, ElementTreeTest, IOTest, ParseErrorTest, diff --git a/Misc/NEWS b/Misc/NEWS index 141d2e0a54b..9ece51a4900 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -49,6 +49,9 @@ Core and Builtins Library ------- +- Issue #24091: Fixed various crashes in corner cases in C implementation of + ElementTree. + - Issue #21931: msilib.FCICreate() now raises TypeError in the case of a bad argument instead of a ValueError with a bogus FCI error number. Patch by Jeffrey Armstrong. diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index ebdb95b05ec..ea770517c14 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -1094,7 +1094,7 @@ _elementtree_Element_extend(ElementObject *self, PyObject *elements) /*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/ { PyObject* seq; - Py_ssize_t i, seqlen = 0; + Py_ssize_t i; seq = PySequence_Fast(elements, ""); if (!seq) { @@ -1105,22 +1105,25 @@ _elementtree_Element_extend(ElementObject *self, PyObject *elements) return NULL; } - seqlen = PySequence_Size(seq); - for (i = 0; i < seqlen; i++) { + for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) { PyObject* element = PySequence_Fast_GET_ITEM(seq, i); - if (!PyObject_IsInstance(element, (PyObject *)&Element_Type)) { - Py_DECREF(seq); + Py_INCREF(element); + if (!PyObject_TypeCheck(element, (PyTypeObject *)&Element_Type)) { PyErr_Format( PyExc_TypeError, "expected an Element, not \"%.200s\"", Py_TYPE(element)->tp_name); + Py_DECREF(seq); + Py_DECREF(element); return NULL; } if (element_add_subelement(self, element) < 0) { Py_DECREF(seq); + Py_DECREF(element); return NULL; } + Py_DECREF(element); } Py_DECREF(seq); @@ -1156,11 +1159,16 @@ _elementtree_Element_find_impl(ElementObject *self, PyObject *path, for (i = 0; i < self->extra->length; i++) { PyObject* item = self->extra->children[i]; - if (Element_CheckExact(item) && - PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ) == 1) { - Py_INCREF(item); + int rc; + if (!Element_CheckExact(item)) + continue; + Py_INCREF(item); + rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ); + if (rc > 0) return item; - } + Py_DECREF(item); + if (rc < 0) + return NULL; } Py_RETURN_NONE; @@ -1197,14 +1205,24 @@ _elementtree_Element_findtext_impl(ElementObject *self, PyObject *path, for (i = 0; i < self->extra->length; i++) { ElementObject* item = (ElementObject*) self->extra->children[i]; - if (Element_CheckExact(item) && - (PyObject_RichCompareBool(item->tag, path, Py_EQ) == 1)) { + int rc; + if (!Element_CheckExact(item)) + continue; + Py_INCREF(item); + rc = PyObject_RichCompareBool(item->tag, path, Py_EQ); + if (rc > 0) { PyObject* text = element_get_text(item); - if (text == Py_None) + if (text == Py_None) { + Py_DECREF(item); return PyUnicode_New(0, 0); + } Py_XINCREF(text); + Py_DECREF(item); return text; } + Py_DECREF(item); + if (rc < 0) + return NULL; } Py_INCREF(default_value); @@ -1245,13 +1263,17 @@ _elementtree_Element_findall_impl(ElementObject *self, PyObject *path, for (i = 0; i < self->extra->length; i++) { PyObject* item = self->extra->children[i]; - if (Element_CheckExact(item) && - PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ) == 1) { - if (PyList_Append(out, item) < 0) { - Py_DECREF(out); - return NULL; - } + int rc; + if (!Element_CheckExact(item)) + continue; + Py_INCREF(item); + rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, tag, Py_EQ); + if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) { + Py_DECREF(item); + Py_DECREF(out); + return NULL; } + Py_DECREF(item); } return out; @@ -1507,6 +1529,8 @@ _elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement) /*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/ { Py_ssize_t i; + int rc; + PyObject *found; if (!self->extra) { /* element has no children, so raise exception */ @@ -1520,11 +1544,14 @@ _elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement) for (i = 0; i < self->extra->length; i++) { if (self->extra->children[i] == subelement) break; - if (PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ) == 1) + rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ); + if (rc > 0) break; + if (rc < 0) + return NULL; } - if (i == self->extra->length) { + if (i >= self->extra->length) { /* subelement is not in children, so raise exception */ PyErr_SetString( PyExc_ValueError, @@ -1533,13 +1560,13 @@ _elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement) return NULL; } - Py_DECREF(self->extra->children[i]); + found = self->extra->children[i]; self->extra->length--; - for (; i < self->extra->length; i++) self->extra->children[i] = self->extra->children[i+1]; + Py_DECREF(found); Py_RETURN_NONE; } @@ -2033,6 +2060,7 @@ elementiter_next(ElementIterObject *it) */ ElementObject *cur_parent; Py_ssize_t child_index; + int rc; while (1) { /* Handle the case reached in the beginning and end of iteration, where @@ -2054,14 +2082,22 @@ elementiter_next(ElementIterObject *it) } it->root_done = 1; - if (it->sought_tag == Py_None || - PyObject_RichCompareBool(it->root_element->tag, - it->sought_tag, Py_EQ) == 1) { + rc = (it->sought_tag == Py_None); + if (!rc) { + rc = PyObject_RichCompareBool(it->root_element->tag, + it->sought_tag, Py_EQ); + if (rc < 0) + return NULL; + } + if (rc) { if (it->gettext) { PyObject *text = element_get_text(it->root_element); if (!text) return NULL; - if (PyObject_IsTrue(text)) { + rc = PyObject_IsTrue(text); + if (rc < 0) + return NULL; + if (rc) { Py_INCREF(text); return text; } @@ -2093,18 +2129,26 @@ elementiter_next(ElementIterObject *it) PyObject *text = element_get_text(child); if (!text) return NULL; - if (PyObject_IsTrue(text)) { + rc = PyObject_IsTrue(text); + if (rc < 0) + return NULL; + if (rc) { Py_INCREF(text); return text; } - } else if (it->sought_tag == Py_None || - PyObject_RichCompareBool(child->tag, - it->sought_tag, Py_EQ) == 1) { - Py_INCREF(child); - return (PyObject *)child; + } else { + rc = (it->sought_tag == Py_None); + if (!rc) { + rc = PyObject_RichCompareBool(child->tag, + it->sought_tag, Py_EQ); + if (rc < 0) + return NULL; + } + if (rc) { + Py_INCREF(child); + return (PyObject *)child; + } } - else - continue; } else { PyObject *tail; @@ -2124,9 +2168,14 @@ elementiter_next(ElementIterObject *it) * this is because itertext() is supposed to only return *inner* * text, not text following the element it began iteration with. */ - if (it->parent_stack->parent && PyObject_IsTrue(tail)) { - Py_INCREF(tail); - return tail; + if (it->parent_stack->parent) { + rc = PyObject_IsTrue(tail); + if (rc < 0) + return NULL; + if (rc) { + Py_INCREF(tail); + return tail; + } } } } @@ -2184,20 +2233,21 @@ static PyObject * create_elementiter(ElementObject *self, PyObject *tag, int gettext) { ElementIterObject *it; - PyObject *star = NULL; it = PyObject_GC_New(ElementIterObject, &ElementIter_Type); if (!it) return NULL; - if (PyUnicode_Check(tag)) - star = PyUnicode_FromString("*"); - else if (PyBytes_Check(tag)) - star = PyBytes_FromString("*"); - - if (star && PyObject_RichCompareBool(tag, star, Py_EQ) == 1) - tag = Py_None; - Py_XDECREF(star); + if (PyUnicode_Check(tag)) { + if (PyUnicode_READY(tag) < 0) + return NULL; + if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*') + tag = Py_None; + } + else if (PyBytes_Check(tag)) { + if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*') + tag = Py_None; + } Py_INCREF(tag); it->sought_tag = tag;