From 8008f2aba0c063a882c33ebd4b39a5a560deb8c0 Mon Sep 17 00:00:00 2001 From: Ezio Melotti Date: Fri, 18 Nov 2011 17:34:26 +0200 Subject: [PATCH] #4147: minidom's toprettyxml no longer adds whitespace around a text node when it is the only child of an element. Initial patch by Dan Kenigsberg. --- Lib/test/test_minidom.py | 37 ++++++++++++++++++++++++++++++++----- Lib/xml/dom/minidom.py | 14 +++++++++----- Misc/NEWS | 4 ++++ 3 files changed, 45 insertions(+), 10 deletions(-) diff --git a/Lib/test/test_minidom.py b/Lib/test/test_minidom.py index 126bdb15d25..f3fa1b8c581 100644 --- a/Lib/test/test_minidom.py +++ b/Lib/test/test_minidom.py @@ -446,12 +446,39 @@ class MinidomTest(unittest.TestCase): dom.unlink() self.confirm(domstr == str.replace("\n", "\r\n")) + def test_toprettyxml_with_text_nodes(self): + # see issue #4147, text nodes are not indented + decl = '\n' + self.assertEqual(parseString('A').toprettyxml(), + decl + 'A\n') + self.assertEqual(parseString('AA').toprettyxml(), + decl + '\n\tA\n\tA\n\n') + self.assertEqual(parseString('AA').toprettyxml(), + decl + '\n\tA\n\tA\n\n') + self.assertEqual(parseString('AA').toprettyxml(), + decl + '\n\tA\n\tA\n\n') + self.assertEqual(parseString('AAA').toprettyxml(), + decl + '\n\tA\n\tA\n\tA\n\n') + + def test_toprettyxml_with_adjacent_text_nodes(self): + # see issue #4147, adjacent text nodes are indented normally + dom = Document() + elem = dom.createElement('elem') + elem.appendChild(dom.createTextNode('TEXT')) + elem.appendChild(dom.createTextNode('TEXT')) + dom.appendChild(elem) + decl = '\n' + self.assertEqual(dom.toprettyxml(), + decl + '\n\tTEXT\n\tTEXT\n\n') + def test_toprettyxml_preserves_content_of_text_node(self): - str = 'B' - dom = parseString(str) - dom2 = parseString(dom.toprettyxml()) - self.assertEqual(dom.childNodes[0].childNodes[0].toxml(), - dom2.childNodes[0].childNodes[0].toxml()) + # see issue #4147 + for str in ('A', 'C'): + dom = parseString(str) + dom2 = parseString(dom.toprettyxml()) + self.assertEqual( + dom.getElementsByTagName('B')[0].childNodes[0].toxml(), + dom2.getElementsByTagName('B')[0].childNodes[0].toxml()) def testProcessingInstruction(self): dom = parseString('') diff --git a/Lib/xml/dom/minidom.py b/Lib/xml/dom/minidom.py index 386494d288e..f23ad053333 100644 --- a/Lib/xml/dom/minidom.py +++ b/Lib/xml/dom/minidom.py @@ -837,11 +837,15 @@ class Element(Node): writer.write("\"") if self.childNodes: writer.write(">") - if self.childNodes[0].nodeType != Node.TEXT_NODE: + if (len(self.childNodes) == 1 and + self.childNodes[0].nodeType == Node.TEXT_NODE): + self.childNodes[0].writexml(writer, '', '', '') + else: writer.write(newl) - for node in self.childNodes: - node.writexml(writer,indent+addindent,addindent,newl) - writer.write("%s%s" % (indent,self.tagName,newl)) + for node in self.childNodes: + node.writexml(writer, indent+addindent, addindent, newl) + writer.write(indent) + writer.write("%s" % (self.tagName, newl)) else: writer.write("/>%s"%(newl)) @@ -1063,7 +1067,7 @@ class Text(CharacterData): return newText def writexml(self, writer, indent="", addindent="", newl=""): - _write_data(writer, self.data) + _write_data(writer, "%s%s%s" % (indent, self.data, newl)) # DOM Level 3 (WD 9 April 2002) diff --git a/Misc/NEWS b/Misc/NEWS index 4fb9ff6305a..8a82b099ff8 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -76,6 +76,10 @@ Core and Builtins Library ------- +- Issue #4147: minidom's toprettyxml no longer adds whitespace around a text + node when it is the only child of an element. Initial patch by Dan + Kenigsberg. + - Issues #1745761, #755670, #13357, #12629, #1200313: HTMLParser now correctly handles non-valid attributes, including adjacent and unquoted attributes.