bpo-14465: Add an indent() function to xml.etree.ElementTree to pretty-print XML trees (GH-15200)
This commit is contained in:
parent
81446fd0d4
commit
b5d3ceea48
|
@ -572,6 +572,18 @@ Functions
|
|||
.. versionadded:: 3.2
|
||||
|
||||
|
||||
.. function:: indent(tree, space=" ", level=0)
|
||||
|
||||
Appends whitespace to the subtree to indent the tree visually.
|
||||
This can be used to generate pretty-printed XML output.
|
||||
*tree* can be an Element or ElementTree. *space* is the whitespace
|
||||
string that will be inserted for each indentation level, two space
|
||||
characters by default. For indenting partial subtrees inside of an
|
||||
already indented tree, pass the initial indentation level as *level*.
|
||||
|
||||
.. versionadded:: 3.9
|
||||
|
||||
|
||||
.. function:: iselement(element)
|
||||
|
||||
Checks if an object appears to be a valid element object. *element* is an
|
||||
|
|
|
@ -788,6 +788,123 @@ class ElementTreeTest(unittest.TestCase):
|
|||
elem = ET.fromstring("<html><body>text</body></html>")
|
||||
self.assertEqual(ET.tostring(elem), b'<html><body>text</body></html>')
|
||||
|
||||
def test_indent(self):
|
||||
elem = ET.XML("<root></root>")
|
||||
ET.indent(elem)
|
||||
self.assertEqual(ET.tostring(elem), b'<root />')
|
||||
|
||||
elem = ET.XML("<html><body>text</body></html>")
|
||||
ET.indent(elem)
|
||||
self.assertEqual(ET.tostring(elem), b'<html>\n <body>text</body>\n</html>')
|
||||
|
||||
elem = ET.XML("<html> <body>text</body> </html>")
|
||||
ET.indent(elem)
|
||||
self.assertEqual(ET.tostring(elem), b'<html>\n <body>text</body>\n</html>')
|
||||
|
||||
elem = ET.XML("<html><body>text</body>tail</html>")
|
||||
ET.indent(elem)
|
||||
self.assertEqual(ET.tostring(elem), b'<html>\n <body>text</body>tail</html>')
|
||||
|
||||
elem = ET.XML("<html><body><p>par</p>\n<p>text</p>\t<p><br/></p></body></html>")
|
||||
ET.indent(elem)
|
||||
self.assertEqual(
|
||||
ET.tostring(elem),
|
||||
b'<html>\n'
|
||||
b' <body>\n'
|
||||
b' <p>par</p>\n'
|
||||
b' <p>text</p>\n'
|
||||
b' <p>\n'
|
||||
b' <br />\n'
|
||||
b' </p>\n'
|
||||
b' </body>\n'
|
||||
b'</html>'
|
||||
)
|
||||
|
||||
elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
|
||||
ET.indent(elem)
|
||||
self.assertEqual(
|
||||
ET.tostring(elem),
|
||||
b'<html>\n'
|
||||
b' <body>\n'
|
||||
b' <p>pre<br />post</p>\n'
|
||||
b' <p>text</p>\n'
|
||||
b' </body>\n'
|
||||
b'</html>'
|
||||
)
|
||||
|
||||
def test_indent_space(self):
|
||||
elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
|
||||
ET.indent(elem, space='\t')
|
||||
self.assertEqual(
|
||||
ET.tostring(elem),
|
||||
b'<html>\n'
|
||||
b'\t<body>\n'
|
||||
b'\t\t<p>pre<br />post</p>\n'
|
||||
b'\t\t<p>text</p>\n'
|
||||
b'\t</body>\n'
|
||||
b'</html>'
|
||||
)
|
||||
|
||||
elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
|
||||
ET.indent(elem, space='')
|
||||
self.assertEqual(
|
||||
ET.tostring(elem),
|
||||
b'<html>\n'
|
||||
b'<body>\n'
|
||||
b'<p>pre<br />post</p>\n'
|
||||
b'<p>text</p>\n'
|
||||
b'</body>\n'
|
||||
b'</html>'
|
||||
)
|
||||
|
||||
def test_indent_space_caching(self):
|
||||
elem = ET.XML("<html><body><p>par</p><p>text</p><p><br/></p><p /></body></html>")
|
||||
ET.indent(elem)
|
||||
self.assertEqual(
|
||||
{el.tail for el in elem.iter()},
|
||||
{None, "\n", "\n ", "\n "}
|
||||
)
|
||||
self.assertEqual(
|
||||
{el.text for el in elem.iter()},
|
||||
{None, "\n ", "\n ", "\n ", "par", "text"}
|
||||
)
|
||||
self.assertEqual(
|
||||
len({el.tail for el in elem.iter()}),
|
||||
len({id(el.tail) for el in elem.iter()}),
|
||||
)
|
||||
|
||||
def test_indent_level(self):
|
||||
elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
|
||||
with self.assertRaises(ValueError):
|
||||
ET.indent(elem, level=-1)
|
||||
self.assertEqual(
|
||||
ET.tostring(elem),
|
||||
b"<html><body><p>pre<br />post</p><p>text</p></body></html>"
|
||||
)
|
||||
|
||||
ET.indent(elem, level=2)
|
||||
self.assertEqual(
|
||||
ET.tostring(elem),
|
||||
b'<html>\n'
|
||||
b' <body>\n'
|
||||
b' <p>pre<br />post</p>\n'
|
||||
b' <p>text</p>\n'
|
||||
b' </body>\n'
|
||||
b' </html>'
|
||||
)
|
||||
|
||||
elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
|
||||
ET.indent(elem, level=1, space=' ')
|
||||
self.assertEqual(
|
||||
ET.tostring(elem),
|
||||
b'<html>\n'
|
||||
b' <body>\n'
|
||||
b' <p>pre<br />post</p>\n'
|
||||
b' <p>text</p>\n'
|
||||
b' </body>\n'
|
||||
b' </html>'
|
||||
)
|
||||
|
||||
def test_tostring_default_namespace(self):
|
||||
elem = ET.XML('<body xmlns="http://effbot.org/ns"><tag/></body>')
|
||||
self.assertEqual(
|
||||
|
|
|
@ -76,7 +76,7 @@ __all__ = [
|
|||
"dump",
|
||||
"Element", "ElementTree",
|
||||
"fromstring", "fromstringlist",
|
||||
"iselement", "iterparse",
|
||||
"indent", "iselement", "iterparse",
|
||||
"parse", "ParseError",
|
||||
"PI", "ProcessingInstruction",
|
||||
"QName",
|
||||
|
@ -1185,6 +1185,57 @@ def dump(elem):
|
|||
if not tail or tail[-1] != "\n":
|
||||
sys.stdout.write("\n")
|
||||
|
||||
|
||||
def indent(tree, space=" ", level=0):
|
||||
"""Indent an XML document by inserting newlines and indentation space
|
||||
after elements.
|
||||
|
||||
*tree* is the ElementTree or Element to modify. The (root) element
|
||||
itself will not be changed, but the tail text of all elements in its
|
||||
subtree will be adapted.
|
||||
|
||||
*space* is the whitespace to insert for each indentation level, two
|
||||
space characters by default.
|
||||
|
||||
*level* is the initial indentation level. Setting this to a higher
|
||||
value than 0 can be used for indenting subtrees that are more deeply
|
||||
nested inside of a document.
|
||||
"""
|
||||
if isinstance(tree, ElementTree):
|
||||
tree = tree.getroot()
|
||||
if level < 0:
|
||||
raise ValueError(f"Initial indentation level must be >= 0, got {level}")
|
||||
if not len(tree):
|
||||
return
|
||||
|
||||
# Reduce the memory consumption by reusing indentation strings.
|
||||
indentations = ["\n" + level * space]
|
||||
|
||||
def _indent_children(elem, level):
|
||||
# Start a new indentation level for the first child.
|
||||
child_level = level + 1
|
||||
try:
|
||||
child_indentation = indentations[child_level]
|
||||
except IndexError:
|
||||
child_indentation = indentations[level] + space
|
||||
indentations.append(child_indentation)
|
||||
|
||||
if not elem.text or not elem.text.strip():
|
||||
elem.text = child_indentation
|
||||
|
||||
for child in elem:
|
||||
if len(child):
|
||||
_indent_children(child, child_level)
|
||||
if not child.tail or not child.tail.strip():
|
||||
child.tail = child_indentation
|
||||
|
||||
# Dedent after the last child by overwriting the previous indentation.
|
||||
if not child.tail.strip():
|
||||
child.tail = indentations[level]
|
||||
|
||||
_indent_children(tree, 0)
|
||||
|
||||
|
||||
# --------------------------------------------------------------------
|
||||
# parsing
|
||||
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
Add an xml.etree.ElementTree.indent() function for pretty-printing XML trees.
|
||||
Contributed by Stefan Behnel.
|
Loading…
Reference in New Issue