From 737b173355b0473d134b1715dd8b1695eb023d8b Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Tue, 29 May 2012 06:02:56 +0300 Subject: [PATCH] Issue 14814: Add namespaces keyword arg to find(*) methods in _elementtree. Add attrib keyword to Element and SubElement in _elementtree. Patch developed with Ezio Melotti. --- Doc/library/xml.etree.elementtree.rst | 29 ++++--- Lib/test/test_xml_etree.py | 66 +++++++++++++++- Lib/xml/etree/ElementTree.py | 3 + Modules/_elementtree.c | 105 ++++++++++++++++++++------ 4 files changed, 169 insertions(+), 34 deletions(-) diff --git a/Doc/library/xml.etree.elementtree.rst b/Doc/library/xml.etree.elementtree.rst index d00781c1906..156571c5d1a 100644 --- a/Doc/library/xml.etree.elementtree.rst +++ b/Doc/library/xml.etree.elementtree.rst @@ -476,27 +476,30 @@ Element Objects .. versionadded:: 3.2 - .. method:: find(match) + .. method:: find(match, namespaces=None) Finds the first subelement matching *match*. *match* may be a tag name or a :ref:`path `. Returns an element instance - or ``None``. + or ``None``. *namespaces* is an optional mapping from namespace prefix + to full name. - .. method:: findall(match) + .. method:: findall(match, namespaces=None) Finds all matching subelements, by tag name or :ref:`path `. Returns a list containing all matching - elements in document order. + elements in document order. *namespaces* is an optional mapping from + namespace prefix to full name. - .. method:: findtext(match, default=None) + .. method:: findtext(match, default=None, namespaces=None) Finds text for the first subelement matching *match*. *match* may be a tag name or a :ref:`path `. Returns the text content of the first matching element, or *default* if no element was found. Note that if the matching element has no text content an empty string - is returned. + is returned. *namespaces* is an optional mapping from namespace prefix + to full name. .. method:: getchildren() @@ -528,11 +531,13 @@ Element Objects .. versionadded:: 3.2 - .. method:: iterfind(match) + .. method:: iterfind(match, namespaces=None) Finds all matching subelements, by tag name or :ref:`path `. Returns an iterable yielding all - matching elements in document order. + matching elements in document order. *namespaces* is an optional mapping + from namespace prefix to full name. + .. versionadded:: 3.2 @@ -597,17 +602,17 @@ ElementTree Objects care. *element* is an element instance. - .. method:: find(match) + .. method:: find(match, namespaces=None) Same as :meth:`Element.find`, starting at the root of the tree. - .. method:: findall(match) + .. method:: findall(match, namespaces=None) Same as :meth:`Element.findall`, starting at the root of the tree. - .. method:: findtext(match, default=None) + .. method:: findtext(match, default=None, namespaces=None) Same as :meth:`Element.findtext`, starting at the root of the tree. @@ -630,7 +635,7 @@ ElementTree Objects to look for (default is to return all elements) - .. method:: iterfind(match) + .. method:: iterfind(match, namespaces=None) Same as :meth:`Element.iterfind`, starting at the root of the tree. diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index df1f7714405..cdba2b634c6 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -62,6 +62,22 @@ SAMPLE_XML_NS = """ """ +SAMPLE_XML_NS_ELEMS = """ + + + + Apples + Bananas + + + + + African Coffee Table + 80 + 120 + + +""" def sanity(): """ @@ -1995,6 +2011,17 @@ class NoAcceleratorTest(unittest.TestCase): self.assertEqual(pyET.SubElement.__module__, 'xml.etree.ElementTree') +class NamespaceParseTest(unittest.TestCase): + def test_find_with_namespace(self): + nsmap = {'h': 'hello', 'f': 'foo'} + doc = ET.fromstring(SAMPLE_XML_NS_ELEMS) + + self.assertEqual(len(doc.findall('{hello}table', nsmap)), 1) + self.assertEqual(len(doc.findall('.//{hello}td', nsmap)), 2) + self.assertEqual(len(doc.findall('.//{foo}name', nsmap)), 1) + + + class ElementSlicingTest(unittest.TestCase): def _elem_tags(self, elemlist): return [e.tag for e in elemlist] @@ -2102,6 +2129,41 @@ class ParseErrorTest(unittest.TestCase): ERRORS.codes[ERRORS.XML_ERROR_SYNTAX]) +class KeywordArgsTest(unittest.TestCase): + # Test various issues with keyword arguments passed to ET.Element + # constructor and methods + def test_issue14818(self): + x = ET.XML("foo") + self.assertEqual(x.find('a', None), + x.find(path='a', namespaces=None)) + self.assertEqual(x.findtext('a', None, None), + x.findtext(path='a', default=None, namespaces=None)) + self.assertEqual(x.findall('a', None), + x.findall(path='a', namespaces=None)) + self.assertEqual(list(x.iterfind('a', None)), + list(x.iterfind(path='a', namespaces=None))) + + self.assertEqual(ET.Element('a').attrib, {}) + elements = [ + ET.Element('a', dict(href="#", id="foo")), + ET.Element('a', attrib=dict(href="#", id="foo")), + ET.Element('a', dict(href="#"), id="foo"), + ET.Element('a', href="#", id="foo"), + ET.Element('a', dict(href="#", id="foo"), href="#", id="foo"), + ] + for e in elements: + self.assertEqual(e.tag, 'a') + self.assertEqual(e.attrib, dict(href="#", id="foo")) + + e2 = ET.SubElement(elements[0], 'foobar', attrib={'key1': 'value1'}) + self.assertEqual(e2.attrib['key1'], 'value1') + + with self.assertRaisesRegex(TypeError, 'must be dict, not str'): + ET.Element('a', "I'm not a dict") + with self.assertRaisesRegex(TypeError, 'must be dict, not str'): + ET.Element('a', attrib="I'm not a dict") + + # -------------------------------------------------------------------- @@ -2157,7 +2219,9 @@ def test_main(module=pyET): StringIOTest, ParseErrorTest, ElementTreeTest, - TreeBuilderTest] + NamespaceParseTest, + TreeBuilderTest, + KeywordArgsTest] if module is pyET: # Run the tests specific to the Python implementation test_classes += [NoAcceleratorTest] diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index 5f974f65b08..e068fc2443d 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -205,6 +205,9 @@ class Element: # constructor def __init__(self, tag, attrib={}, **extra): + if not isinstance(attrib, dict): + raise TypeError("attrib must be dict, not %s" % ( + attrib.__class__.__name__,)) attrib = attrib.copy() attrib.update(extra) self.tag = tag diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index d74b4972f55..f2f370fe22e 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -347,6 +347,41 @@ element_new(PyTypeObject *type, PyObject *args, PyObject *kwds) return (PyObject *)e; } +/* Helper function for extracting the attrib dictionary from a keywords dict. + * This is required by some constructors/functions in this module that can + * either accept attrib as a keyword argument or all attributes splashed + * directly into *kwds. + * If there is no 'attrib' keyword, return an empty dict. + */ +static PyObject* +get_attrib_from_keywords(PyObject *kwds) +{ + PyObject *attrib_str = PyUnicode_FromString("attrib"); + PyObject *attrib = PyDict_GetItem(kwds, attrib_str); + + if (attrib) { + /* If attrib was found in kwds, copy its value and remove it from + * kwds + */ + if (!PyDict_Check(attrib)) { + Py_DECREF(attrib_str); + PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s", + Py_TYPE(attrib)->tp_name); + return NULL; + } + attrib = PyDict_Copy(attrib); + PyDict_DelItem(kwds, attrib_str); + } else { + attrib = PyDict_New(); + } + + Py_DECREF(attrib_str); + + if (attrib) + PyDict_Update(attrib, kwds); + return attrib; +} + static int element_init(PyObject *self, PyObject *args, PyObject *kwds) { @@ -358,13 +393,23 @@ element_init(PyObject *self, PyObject *args, PyObject *kwds) if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib)) return -1; - if (attrib || kwds) { - attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New(); + if (attrib) { + /* attrib passed as positional arg */ + attrib = PyDict_Copy(attrib); + if (!attrib) + return -1; + if (kwds) { + if (PyDict_Update(attrib, kwds) < 0) { + return -1; + } + } + } else if (kwds) { + /* have keywords args */ + attrib = get_attrib_from_keywords(kwds); if (!attrib) return -1; - if (kwds) - PyDict_Update(attrib, kwds); } else { + /* no attrib arg, no kwds, so no attributes */ Py_INCREF(Py_None); attrib = Py_None; } @@ -536,7 +581,7 @@ element_get_tail(ElementObject* self) } static PyObject* -subelement(PyObject* self, PyObject* args, PyObject* kw) +subelement(PyObject *self, PyObject *args, PyObject *kwds) { PyObject* elem; @@ -548,13 +593,23 @@ subelement(PyObject* self, PyObject* args, PyObject* kw) &PyDict_Type, &attrib)) return NULL; - if (attrib || kw) { - attrib = (attrib) ? PyDict_Copy(attrib) : PyDict_New(); + if (attrib) { + /* attrib passed as positional arg */ + attrib = PyDict_Copy(attrib); + if (!attrib) + return NULL; + if (kwds) { + if (PyDict_Update(attrib, kwds) < 0) { + return NULL; + } + } + } else if (kwds) { + /* have keyword args */ + attrib = get_attrib_from_keywords(kwds); if (!attrib) return NULL; - if (kw) - PyDict_Update(attrib, kw); } else { + /* no attrib arg, no kwds, so no attribute */ Py_INCREF(Py_None); attrib = Py_None; } @@ -881,13 +936,15 @@ element_extend(ElementObject* self, PyObject* args) } static PyObject* -element_find(ElementObject* self, PyObject* args) +element_find(ElementObject *self, PyObject *args, PyObject *kwds) { int i; PyObject* tag; PyObject* namespaces = Py_None; + static char *kwlist[] = {"path", "namespaces", 0}; - if (!PyArg_ParseTuple(args, "O|O:find", &tag, &namespaces)) + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:find", kwlist, + &tag, &namespaces)) return NULL; if (checkpath(tag) || namespaces != Py_None) { @@ -913,15 +970,17 @@ element_find(ElementObject* self, PyObject* args) } static PyObject* -element_findtext(ElementObject* self, PyObject* args) +element_findtext(ElementObject *self, PyObject *args, PyObject *kwds) { int i; PyObject* tag; PyObject* default_value = Py_None; PyObject* namespaces = Py_None; _Py_IDENTIFIER(findtext); + static char *kwlist[] = {"path", "default", "namespaces", 0}; - if (!PyArg_ParseTuple(args, "O|OO:findtext", &tag, &default_value, &namespaces)) + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:findtext", kwlist, + &tag, &default_value, &namespaces)) return NULL; if (checkpath(tag) || namespaces != Py_None) @@ -951,14 +1010,16 @@ element_findtext(ElementObject* self, PyObject* args) } static PyObject* -element_findall(ElementObject* self, PyObject* args) +element_findall(ElementObject *self, PyObject *args, PyObject *kwds) { int i; PyObject* out; PyObject* tag; PyObject* namespaces = Py_None; + static char *kwlist[] = {"path", "namespaces", 0}; - if (!PyArg_ParseTuple(args, "O|O:findall", &tag, &namespaces)) + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:findall", kwlist, + &tag, &namespaces)) return NULL; if (checkpath(tag) || namespaces != Py_None) { @@ -990,13 +1051,15 @@ element_findall(ElementObject* self, PyObject* args) } static PyObject* -element_iterfind(ElementObject* self, PyObject* args) +element_iterfind(ElementObject *self, PyObject *args, PyObject *kwds) { PyObject* tag; PyObject* namespaces = Py_None; _Py_IDENTIFIER(iterfind); + static char *kwlist[] = {"path", "namespaces", 0}; - if (!PyArg_ParseTuple(args, "O|O:iterfind", &tag, &namespaces)) + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:iterfind", kwlist, + &tag, &namespaces)) return NULL; return _PyObject_CallMethodId( @@ -1567,9 +1630,9 @@ static PyMethodDef element_methods[] = { {"get", (PyCFunction) element_get, METH_VARARGS}, {"set", (PyCFunction) element_set, METH_VARARGS}, - {"find", (PyCFunction) element_find, METH_VARARGS}, - {"findtext", (PyCFunction) element_findtext, METH_VARARGS}, - {"findall", (PyCFunction) element_findall, METH_VARARGS}, + {"find", (PyCFunction) element_find, METH_VARARGS | METH_KEYWORDS}, + {"findtext", (PyCFunction) element_findtext, METH_VARARGS | METH_KEYWORDS}, + {"findall", (PyCFunction) element_findall, METH_VARARGS | METH_KEYWORDS}, {"append", (PyCFunction) element_append, METH_VARARGS}, {"extend", (PyCFunction) element_extend, METH_VARARGS}, @@ -1578,7 +1641,7 @@ static PyMethodDef element_methods[] = { {"iter", (PyCFunction) element_iter, METH_VARARGS}, {"itertext", (PyCFunction) element_itertext, METH_VARARGS}, - {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS}, + {"iterfind", (PyCFunction) element_iterfind, METH_VARARGS | METH_KEYWORDS}, {"getiterator", (PyCFunction) element_iter, METH_VARARGS}, {"getchildren", (PyCFunction) element_getchildren, METH_VARARGS},