Issue #2170: refactored xml.dom.minidom.normalize, increasing both

its clarity and its speed.
This commit is contained in:
R. David Murray 2009-04-09 21:54:50 +00:00
parent ad95826c33
commit 0374a82f75
3 changed files with 175 additions and 18 deletions

View File

@ -790,6 +790,167 @@ class MinidomTest(unittest.TestCase):
"testNormalize -- single empty node removed") "testNormalize -- single empty node removed")
doc.unlink() doc.unlink()
def testNormalizeCombineAndNextSibling(self):
doc = parseString("<doc/>")
root = doc.documentElement
root.appendChild(doc.createTextNode("first"))
root.appendChild(doc.createTextNode("second"))
root.appendChild(doc.createElement("i"))
self.confirm(len(root.childNodes) == 3
and root.childNodes.length == 3,
"testNormalizeCombineAndNextSibling -- preparation")
doc.normalize()
self.confirm(len(root.childNodes) == 2
and root.childNodes.length == 2
and root.firstChild.data == "firstsecond"
and root.firstChild is not root.lastChild
and root.firstChild.nextSibling is root.lastChild
and root.firstChild.previousSibling is None
and root.lastChild.previousSibling is root.firstChild
and root.lastChild.nextSibling is None
, "testNormalizeCombinedAndNextSibling -- result")
doc.unlink()
def testNormalizeDeleteWithPrevSibling(self):
doc = parseString("<doc/>")
root = doc.documentElement
root.appendChild(doc.createTextNode("first"))
root.appendChild(doc.createTextNode(""))
self.confirm(len(root.childNodes) == 2
and root.childNodes.length == 2,
"testNormalizeDeleteWithPrevSibling -- preparation")
doc.normalize()
self.confirm(len(root.childNodes) == 1
and root.childNodes.length == 1
and root.firstChild.data == "first"
and root.firstChild is root.lastChild
and root.firstChild.nextSibling is None
and root.firstChild.previousSibling is None
, "testNormalizeDeleteWithPrevSibling -- result")
doc.unlink()
def testNormalizeDeleteWithNextSibling(self):
doc = parseString("<doc/>")
root = doc.documentElement
root.appendChild(doc.createTextNode(""))
root.appendChild(doc.createTextNode("second"))
self.confirm(len(root.childNodes) == 2
and root.childNodes.length == 2,
"testNormalizeDeleteWithNextSibling -- preparation")
doc.normalize()
self.confirm(len(root.childNodes) == 1
and root.childNodes.length == 1
and root.firstChild.data == "second"
and root.firstChild is root.lastChild
and root.firstChild.nextSibling is None
and root.firstChild.previousSibling is None
, "testNormalizeDeleteWithNextSibling -- result")
doc.unlink()
def testNormalizeDeleteWithTwoNonTextSiblings(self):
doc = parseString("<doc/>")
root = doc.documentElement
root.appendChild(doc.createElement("i"))
root.appendChild(doc.createTextNode(""))
root.appendChild(doc.createElement("i"))
self.confirm(len(root.childNodes) == 3
and root.childNodes.length == 3,
"testNormalizeDeleteWithTwoSiblings -- preparation")
doc.normalize()
self.confirm(len(root.childNodes) == 2
and root.childNodes.length == 2
and root.firstChild is not root.lastChild
and root.firstChild.nextSibling is root.lastChild
and root.firstChild.previousSibling is None
and root.lastChild.previousSibling is root.firstChild
and root.lastChild.nextSibling is None
, "testNormalizeDeleteWithTwoSiblings -- result")
doc.unlink()
def testNormalizeDeleteAndCombine(self):
doc = parseString("<doc/>")
root = doc.documentElement
root.appendChild(doc.createTextNode(""))
root.appendChild(doc.createTextNode("second"))
root.appendChild(doc.createTextNode(""))
root.appendChild(doc.createTextNode("fourth"))
root.appendChild(doc.createTextNode(""))
self.confirm(len(root.childNodes) == 5
and root.childNodes.length == 5,
"testNormalizeDeleteAndCombine -- preparation")
doc.normalize()
self.confirm(len(root.childNodes) == 1
and root.childNodes.length == 1
and root.firstChild is root.lastChild
and root.firstChild.data == "secondfourth"
and root.firstChild.previousSibling is None
and root.firstChild.nextSibling is None
, "testNormalizeDeleteAndCombine -- result")
doc.unlink()
def testNormalizeRecursion(self):
doc = parseString("<doc>"
"<o>"
"<i/>"
"t"
#
#x
"</o>"
"<o>"
"<o>"
"t2"
#x2
"</o>"
"t3"
#x3
"</o>"
#
"</doc>")
root = doc.documentElement
root.childNodes[0].appendChild(doc.createTextNode(""))
root.childNodes[0].appendChild(doc.createTextNode("x"))
root.childNodes[1].childNodes[0].appendChild(doc.createTextNode("x2"))
root.childNodes[1].appendChild(doc.createTextNode("x3"))
root.appendChild(doc.createTextNode(""))
self.confirm(len(root.childNodes) == 3
and root.childNodes.length == 3
and len(root.childNodes[0].childNodes) == 4
and root.childNodes[0].childNodes.length == 4
and len(root.childNodes[1].childNodes) == 3
and root.childNodes[1].childNodes.length == 3
and len(root.childNodes[1].childNodes[0].childNodes) == 2
and root.childNodes[1].childNodes[0].childNodes.length == 2
, "testNormalize2 -- preparation")
doc.normalize()
self.confirm(len(root.childNodes) == 2
and root.childNodes.length == 2
and len(root.childNodes[0].childNodes) == 2
and root.childNodes[0].childNodes.length == 2
and len(root.childNodes[1].childNodes) == 2
and root.childNodes[1].childNodes.length == 2
and len(root.childNodes[1].childNodes[0].childNodes) == 1
and root.childNodes[1].childNodes[0].childNodes.length == 1
, "testNormalize2 -- childNodes lengths")
self.confirm(root.childNodes[0].childNodes[1].data == "tx"
and root.childNodes[1].childNodes[0].childNodes[0].data == "t2x2"
and root.childNodes[1].childNodes[1].data == "t3x3"
, "testNormalize2 -- joined text fields")
self.confirm(root.childNodes[0].childNodes[1].nextSibling is None
and root.childNodes[0].childNodes[1].previousSibling
is root.childNodes[0].childNodes[0]
and root.childNodes[0].childNodes[0].previousSibling is None
and root.childNodes[0].childNodes[0].nextSibling
is root.childNodes[0].childNodes[1]
and root.childNodes[1].childNodes[1].nextSibling is None
and root.childNodes[1].childNodes[1].previousSibling
is root.childNodes[1].childNodes[0]
and root.childNodes[1].childNodes[0].previousSibling is None
and root.childNodes[1].childNodes[0].nextSibling
is root.childNodes[1].childNodes[1]
, "testNormalize2 -- sibling pointers")
doc.unlink()
def testBug1433694(self): def testBug1433694(self):
doc = parseString("<o><i/>t</o>") doc = parseString("<o><i/>t</o>")
node = doc.documentElement node = doc.documentElement

View File

@ -177,34 +177,27 @@ class Node(xml.dom.Node):
L = [] L = []
for child in self.childNodes: for child in self.childNodes:
if child.nodeType == Node.TEXT_NODE: if child.nodeType == Node.TEXT_NODE:
data = child.data if not child.data:
if data and L and L[-1].nodeType == child.nodeType: # empty text node; discard
if L:
L[-1].nextSibling = child.nextSibling
if child.nextSibling:
child.nextSibling.previousSibling = child.previousSibling
child.unlink()
elif L and L[-1].nodeType == child.nodeType:
# collapse text node # collapse text node
node = L[-1] node = L[-1]
node.data = node.data + child.data node.data = node.data + child.data
node.nextSibling = child.nextSibling node.nextSibling = child.nextSibling
if child.nextSibling:
child.nextSibling.previousSibling = node
child.unlink() child.unlink()
elif data: else:
if L:
L[-1].nextSibling = child
child.previousSibling = L[-1]
else:
child.previousSibling = None
L.append(child) L.append(child)
else:
# empty text node; discard
child.unlink()
else: else:
if L:
L[-1].nextSibling = child
child.previousSibling = L[-1]
else:
child.previousSibling = None
L.append(child) L.append(child)
if child.nodeType == Node.ELEMENT_NODE: if child.nodeType == Node.ELEMENT_NODE:
child.normalize() child.normalize()
if L:
L[-1].nextSibling = None
self.childNodes[:] = L self.childNodes[:] = L
def cloneNode(self, deep): def cloneNode(self, deep):

View File

@ -12,6 +12,9 @@ What's New in Python 2.7 alpha 1
Core and Builtins Core and Builtins
----------------- -----------------
- Issue #2170: refactored xml.dom.minidom.normalize, increasing both
its clarity and its speed.
- Issue #2396: the memoryview object was backported from Python 3.1. - Issue #2396: the memoryview object was backported from Python 3.1.
- Fix a problem in PyErr_NormalizeException that leads to "undetected errors" - Fix a problem in PyErr_NormalizeException that leads to "undetected errors"