From b5d3ceea48c181b3e2c6c67424317afed606bd39 Mon Sep 17 00:00:00 2001 From: Stefan Behnel Date: Fri, 23 Aug 2019 16:44:25 +0200 Subject: [PATCH] bpo-14465: Add an indent() function to xml.etree.ElementTree to pretty-print XML trees (GH-15200) --- Doc/library/xml.etree.elementtree.rst | 12 ++ Lib/test/test_xml_etree.py | 117 ++++++++++++++++++ Lib/xml/etree/ElementTree.py | 53 +++++++- .../2019-08-10-18-50-04.bpo-14465.qZGC4g.rst | 2 + 4 files changed, 183 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2019-08-10-18-50-04.bpo-14465.qZGC4g.rst diff --git a/Doc/library/xml.etree.elementtree.rst b/Doc/library/xml.etree.elementtree.rst index 9f46755c268..6047e6e29b9 100644 --- a/Doc/library/xml.etree.elementtree.rst +++ b/Doc/library/xml.etree.elementtree.rst @@ -572,6 +572,18 @@ Functions .. versionadded:: 3.2 +.. function:: indent(tree, space=" ", level=0) + + Appends whitespace to the subtree to indent the tree visually. + This can be used to generate pretty-printed XML output. + *tree* can be an Element or ElementTree. *space* is the whitespace + string that will be inserted for each indentation level, two space + characters by default. For indenting partial subtrees inside of an + already indented tree, pass the initial indentation level as *level*. + + .. versionadded:: 3.9 + + .. function:: iselement(element) Checks if an object appears to be a valid element object. *element* is an diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index b2492cda848..db06aceb146 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -788,6 +788,123 @@ class ElementTreeTest(unittest.TestCase): elem = ET.fromstring("text") self.assertEqual(ET.tostring(elem), b'text') + def test_indent(self): + elem = ET.XML("") + ET.indent(elem) + self.assertEqual(ET.tostring(elem), b'') + + elem = ET.XML("text") + ET.indent(elem) + self.assertEqual(ET.tostring(elem), b'\n text\n') + + elem = ET.XML(" text ") + ET.indent(elem) + self.assertEqual(ET.tostring(elem), b'\n text\n') + + elem = ET.XML("texttail") + ET.indent(elem) + self.assertEqual(ET.tostring(elem), b'\n texttail') + + elem = ET.XML("

par

\n

text

\t


") + ET.indent(elem) + self.assertEqual( + ET.tostring(elem), + b'\n' + b' \n' + b'

par

\n' + b'

text

\n' + b'

\n' + b'
\n' + b'

\n' + b' \n' + b'' + ) + + elem = ET.XML("

pre
post

text

") + ET.indent(elem) + self.assertEqual( + ET.tostring(elem), + b'\n' + b' \n' + b'

pre
post

\n' + b'

text

\n' + b' \n' + b'' + ) + + def test_indent_space(self): + elem = ET.XML("

pre
post

text

") + ET.indent(elem, space='\t') + self.assertEqual( + ET.tostring(elem), + b'\n' + b'\t\n' + b'\t\t

pre
post

\n' + b'\t\t

text

\n' + b'\t\n' + b'' + ) + + elem = ET.XML("

pre
post

text

") + ET.indent(elem, space='') + self.assertEqual( + ET.tostring(elem), + b'\n' + b'\n' + b'

pre
post

\n' + b'

text

\n' + b'\n' + b'' + ) + + def test_indent_space_caching(self): + elem = ET.XML("

par

text


") + ET.indent(elem) + self.assertEqual( + {el.tail for el in elem.iter()}, + {None, "\n", "\n ", "\n "} + ) + self.assertEqual( + {el.text for el in elem.iter()}, + {None, "\n ", "\n ", "\n ", "par", "text"} + ) + self.assertEqual( + len({el.tail for el in elem.iter()}), + len({id(el.tail) for el in elem.iter()}), + ) + + def test_indent_level(self): + elem = ET.XML("

pre
post

text

") + with self.assertRaises(ValueError): + ET.indent(elem, level=-1) + self.assertEqual( + ET.tostring(elem), + b"

pre
post

text

" + ) + + ET.indent(elem, level=2) + self.assertEqual( + ET.tostring(elem), + b'\n' + b' \n' + b'

pre
post

\n' + b'

text

\n' + b' \n' + b' ' + ) + + elem = ET.XML("

pre
post

text

") + ET.indent(elem, level=1, space=' ') + self.assertEqual( + ET.tostring(elem), + b'\n' + b' \n' + b'

pre
post

\n' + b'

text

\n' + b' \n' + b' ' + ) + def test_tostring_default_namespace(self): elem = ET.XML('') self.assertEqual( diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index 645e999a0be..431ecd0dddf 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -76,7 +76,7 @@ __all__ = [ "dump", "Element", "ElementTree", "fromstring", "fromstringlist", - "iselement", "iterparse", + "indent", "iselement", "iterparse", "parse", "ParseError", "PI", "ProcessingInstruction", "QName", @@ -1185,6 +1185,57 @@ def dump(elem): if not tail or tail[-1] != "\n": sys.stdout.write("\n") + +def indent(tree, space=" ", level=0): + """Indent an XML document by inserting newlines and indentation space + after elements. + + *tree* is the ElementTree or Element to modify. The (root) element + itself will not be changed, but the tail text of all elements in its + subtree will be adapted. + + *space* is the whitespace to insert for each indentation level, two + space characters by default. + + *level* is the initial indentation level. Setting this to a higher + value than 0 can be used for indenting subtrees that are more deeply + nested inside of a document. + """ + if isinstance(tree, ElementTree): + tree = tree.getroot() + if level < 0: + raise ValueError(f"Initial indentation level must be >= 0, got {level}") + if not len(tree): + return + + # Reduce the memory consumption by reusing indentation strings. + indentations = ["\n" + level * space] + + def _indent_children(elem, level): + # Start a new indentation level for the first child. + child_level = level + 1 + try: + child_indentation = indentations[child_level] + except IndexError: + child_indentation = indentations[level] + space + indentations.append(child_indentation) + + if not elem.text or not elem.text.strip(): + elem.text = child_indentation + + for child in elem: + if len(child): + _indent_children(child, child_level) + if not child.tail or not child.tail.strip(): + child.tail = child_indentation + + # Dedent after the last child by overwriting the previous indentation. + if not child.tail.strip(): + child.tail = indentations[level] + + _indent_children(tree, 0) + + # -------------------------------------------------------------------- # parsing diff --git a/Misc/NEWS.d/next/Library/2019-08-10-18-50-04.bpo-14465.qZGC4g.rst b/Misc/NEWS.d/next/Library/2019-08-10-18-50-04.bpo-14465.qZGC4g.rst new file mode 100644 index 00000000000..5f8b7a0934d --- /dev/null +++ b/Misc/NEWS.d/next/Library/2019-08-10-18-50-04.bpo-14465.qZGC4g.rst @@ -0,0 +1,2 @@ +Add an xml.etree.ElementTree.indent() function for pretty-printing XML trees. +Contributed by Stefan Behnel.