Reduce the visibility of imported modules for cleaner "from ... import *"

behavior. Added support for the Attr.ownerElement attribute. Everywhere: Define constant object attributes in the classes rather than on the instances during object construction. This reduces the amount of work needed for object construction and destruction; these need to be lightweight operations on a DOM. Node._get_firstChild(), Node._get_lastChild(): Return None if there are no children (required for compliance with DOM level 1). Node.insertBefore(): If refChild is None, append the new node instead of failing (required for compliance). Also, update the sibling relationships. Return the inserted node (required for compliance). Node.appendChild(): Update the parent of the appended node. Node.replaceChild(): Actually replace the old child! Update the parent and sibling relationships of both the old and new children. Return the replaced child (required for compliance). Node.normalize(): Implemented the normalize() method. Required for compliance, but missing from the release. Useful for joining adjacent Text nodes into a single node for easier processing. Node.cloneNode(): Actually make this work. Don't let the new node share the instance __dict__ with the original. Do proper recursion if doing a "deep" clone. Move the attribute cloning out of the base class, since only Element is supposed to have attributes. Node.unlink(): Simplify handling of child nodes for efficiency, and remove the attribute handling since only Element nodes support attributes. Attr.cloneNode(): Extend this to clear the ownerElement attribute in the clone. AttributeList.items(), AttributeList.itemsNS(): Slight performance improvement (avoid lambda). Element.cloneNode(): Extend Node.cloneNode() with support for the attributes. Clone the Attr objects after creating the underlying clone. Element.unlink(): Clean out the attributes here instead of in the base class, since this is the only class that will have them. Element.toxml(): Adjust to create only one AttributeList instance; minor efficiency improvement. _nssplit(): No need to re-import string. Document.__init__(): No longer needed once constant attributes are initialized in the class itself. Document.createElementNS(), Document.createAttributeNS(): Use the defined constructors rather than directly access the classes. _get_StringIO(): New function. Create an output StringIO using the most efficient available flavor. parse(), parseString(): Import pulldom here instead of in the public namespace of the module.
2000-11-21 22:02:22 +00:00 · 2000-11-21 22:02:22 +00:00 · 4ccf4a1e8a
parent 707e964734
commit 4ccf4a1e8a
1 changed files with 181 additions and 76 deletions
--- a/Lib/xml/dom/minidom.py
+++ b/Lib/xml/dom/minidom.py
@ -14,10 +14,19 @@ Todo:
 * SAX 2 namespaces
 """
 import pulldom
 import string
-from StringIO import StringIO
+_string = string
 del string
 # localize the types, and allow support for Unicode values if available:
 import types
 _TupleType = types.TupleType
 try:
    _StringTypes = (types.StringType, types.UnicodeType)
 except AttributeError:
    _StringTypes = (types.StringType,)
 del types
 class Node:
    ELEMENT_NODE                = 1
@ -44,7 +53,7 @@ class Node:
            index = repr(id(self)) + repr(self.__class__)
            Node.allnodes[index] = repr(self.__dict__)
            if Node.debug is None:
-                Node.debug = StringIO()
+                Node.debug = _get_StringIO()
                #open( "debug4.out", "w" )
            Node.debug.write("create %s\n" % index)
@ -79,7 +88,7 @@ class Node:
        return 1
    def toxml(self):
-        writer = StringIO()
+        writer = _get_StringIO()
        self.writexml(writer)
        return writer.getvalue()
@ -90,16 +99,30 @@ class Node:
            return 0
    def _get_firstChild(self):
        if self.childNodes:
            return self.childNodes[0]
    def _get_lastChild(self):
        if self.childNodes:
            return self.childNodes[-1]
    def insertBefore(self, newChild, refChild):
        if refChild is None:
            self.appendChild(newChild)
        else:
            index = self.childNodes.index(refChild)
            self.childNodes.insert(index, newChild)
            newChild.nextSibling = refChild
            refChild.previousSibling = newChild
            if index:
                node = self.childNodes[index-1]
                node.nextSibling = newChild
                newChild.previousSibling = node
            else:
                newChild.previousSibling = None
            if self._makeParentNodes:
                newChild.parentNode = self
        return newChild
    def appendChild(self, node):
        if self.childNodes:
@ -110,39 +133,69 @@ class Node:
            node.previousSibling = None
        node.nextSibling = None
        self.childNodes.append(node)
        if self._makeParentNodes:
            node.parentNode = self
        return node
    def replaceChild(self, newChild, oldChild):
        if newChild is oldChild:
            return
        index = self.childNodes.index(oldChild)
-        self.childNodes[index] = oldChild
+        self.childNodes[index] = newChild
        if self._makeParentNodes:
            newChild.parentNode = self
            oldChild.parentNode = None
        newChild.nextSibling = oldChild.nextSibling
        newChild.previousSibling = oldChild.previousSibling
        oldChild.newChild = None
        oldChild.previousSibling = None
        return oldChild
    def removeChild(self, oldChild):
-        index = self.childNodes.index(oldChild)
+        self.childNodes.remove(oldChild)
-        del self.childNodes[index]
+        if self._makeParentNodes:
            oldChild.parentNode = None
        return oldChild
    def normalize(self):
        if len(self.childNodes) > 1:
            L = [self.childNodes[0]]
            for child in self.childNodes[1:]:
                if (  child.nodeType == Node.TEXT_NODE
                      and L[-1].nodeType == child.nodeType):
                    # collapse text node
                    node = L[-1]
                    node.data = node.nodeValue = node.data + child.data
                    node.nextSibling = child.nextSibling
                    child.unlink()
                else:
                    L[-1].nextSibling = child
                    child.previousSibling = L[-1]
                    L.append(child)
                    child.normalize()
            self.childNodes = L
        elif self.childNodes:
            # exactly one child -- just recurse
            self.childNodes[0].normalize()
    def cloneNode(self, deep):
        import new
-        clone = new.instance(self.__class__, self.__dict__)
+        clone = new.instance(self.__class__, self.__dict__.copy())
-        clone.attributes = self.attributes.copy()
+        if self._makeParentNodes:
-        if not deep:
+            clone.parentNode = None
        clone.childNodes = []
-        else:
+        if deep:
-            clone.childNodes = map(lambda x: x.cloneNode, self.childNodes)
+            for child in self.childNodes:
                clone.appendChild(child.cloneNode(1))
        return clone
    def unlink(self):
        self.parentNode = None
-        while self.childNodes:
+        for child in self.childNodes:
-            self.childNodes[-1].unlink()
+            child.unlink()
            del self.childNodes[-1] # probably not most efficient!
        self.childNodes = None
        self.previousSibling = None
        self.nextSibling = None
        if self.attributes:
            for attr in self._attrs.values():
                self.removeAttributeNode(attr)
            assert not len(self._attrs)
            assert not len(self._attrsNS)
        if Node._debug:
            index = repr(id(self)) + repr(self.__class__)
            self.debug.write("Deleting: %s\n" % index)
@ -150,10 +203,11 @@ class Node:
 def _write_data(writer, data):
    "Writes datachars to writer."
-    data = string.replace(data, "&", "&amp;")
+    replace = _string.replace
-    data = string.replace(data, "<", "&lt;")
+    data = replace(data, "&", "&amp;")
-    data = string.replace(data, "\"", "&quot;")
+    data = replace(data, "<", "&lt;")
-    data = string.replace(data, ">", "&gt;")
+    data = replace(data, "\"", "&quot;")
    data = replace(data, ">", "&gt;")
    writer.write(data)
 def _getElementsByTagNameHelper(parent, name, rc):
@ -174,14 +228,16 @@ def _getElementsByTagNameNSHelper(parent, nsURI, localName, rc):
 class Attr(Node):
    nodeType = Node.ATTRIBUTE_NODE
    attributes = None
    ownerElement = None
    def __init__(self, qName, namespaceURI="", localName=None, prefix=None):
        # skip setattr for performance
-        self.__dict__["localName"] = localName or qName
+        d = self.__dict__
-        self.__dict__["nodeName"] = self.__dict__["name"] = qName
+        d["localName"] = localName or qName
-        self.__dict__["namespaceURI"] = namespaceURI
+        d["nodeName"] = d["name"] = qName
-        self.__dict__["prefix"] = prefix
+        d["namespaceURI"] = namespaceURI
-        self.attributes = None
+        d["prefix"] = prefix
        Node.__init__(self)
        # nodeValue and value are set elsewhere
@ -191,14 +247,21 @@ class Attr(Node):
        else:
            self.__dict__[name] = value
    def cloneNode(self, deep):
        clone = Node.cloneNode(self, deep)
        if clone.__dict__.has_key("ownerElement"):
            del clone.ownerElement
        return clone
 class AttributeList:
-    """the attribute list is a transient interface to the underlying
+    """The attribute list is a transient interface to the underlying
-    dictionaries.  mutations here will change the underlying element's
+    dictionaries.  Mutations here will change the underlying element's
    dictionary"""
    def __init__(self, attrs, attrsNS):
        self._attrs = attrs
        self._attrsNS = attrsNS
-        self.length = len(self._attrs.keys())
+        self.length = len(self._attrs)
    def item(self, index):
        try:
@ -207,12 +270,16 @@ class AttributeList:
            return None
    def items(self):
-        return map(lambda node: (node.tagName, node.value),
+        L = []
-                   self._attrs.values())
+        for node in self._attrs.values():
            L.append((node.tagName, node.value))
        return L
    def itemsNS(self):
-        return map(lambda node: ((node.URI, node.localName), node.value),
+        L = []
-                   self._attrs.values())
+        for node in self._attrs.values():
            L.append(((node.URI, node.localName), node.value))
        return L
    def keys(self):
        return self._attrs.keys()
@ -234,18 +301,19 @@ class AttributeList:
    #FIXME: is it appropriate to return .value?
    def __getitem__(self, attname_or_tuple):
-        if type(attname_or_tuple) is types.TupleType:
+        if type(attname_or_tuple) is _TupleType:
            return self._attrsNS[attname_or_tuple]
        else:
            return self._attrs[attname_or_tuple]
    # same as set
    def __setitem__(self, attname, value):
-        if type(value) is types.StringType:
+        if type(value) in _StringTypes:
            node = Attr(attname)
            node.value = value
        else:
-            assert isinstance(value, Attr) or type(value) is types.StringType
+            if not isinstance(value, Attr):
                raise TypeError, "value must be a string or Attr object"
            node = value
        old = self._attrs.get(attname, None)
        if old:
@ -261,6 +329,8 @@ class AttributeList:
 class Element(Node):
    nodeType = Node.ELEMENT_NODE
    nextSibling = None
    previousSibling = None
    def __init__(self, tagName, namespaceURI="", prefix="",
                 localName=None):
@ -274,9 +344,28 @@ class Element(Node):
        self._attrs = {}   # attributes are double-indexed:
        self._attrsNS = {} #    tagName -> Attribute
                           #    URI,localName -> Attribute
-                # in the future: consider lazy generation of attribute objects
+                           # in the future: consider lazy generation
-                #                this is too tricky for now because of headaches
+                           # of attribute objects this is too tricky
-                #                with namespaces.
+                           # for now because of headaches with
                           # namespaces.
    def cloneNode(self, deep):
        clone = Node.cloneNode(self, deep)
        clone._attrs = {}
        clone._attrsNS = {}
        for attr in self._attrs.values():
            node = attr.cloneNode(1)
            clone._attrs[node.name] = node
            clone._attrsNS[(node.namespaceURI, node.localName)] = node
            node.ownerElement = clone
        return clone
    def unlink(self):
        for attr in self._attrs.values():
            attr.unlink()
        self._attrs = None
        self._attrsNS = None
        Node.unlink(self)
    def getAttribute(self, attname):
        return self._attrs[attname].value
@ -296,7 +385,6 @@ class Element(Node):
        attr = Attr(qualifiedName, namespaceURI, localname, prefix)
        attr.__dict__["value"] = attr.__dict__["nodeValue"] = value
        self.setAttributeNode(attr)
        # FIXME: return original node if something changed.
    def getAttributeNode(self, attrname):
        return self._attrs.get(attrname)
@ -305,12 +393,23 @@ class Element(Node):
        return self._attrsNS[(namespaceURI, localName)]
    def setAttributeNode(self, attr):
        if attr.ownerElement not in (None, self):
            raise ValueError, "attribute node already owned"
        old = self._attrs.get(attr.name, None)
        if old:
            old.unlink()
        self._attrs[attr.name] = attr
        self._attrsNS[(attr.namespaceURI, attr.localName)] = attr
-        # FIXME: return old value if something changed
+
        # This creates a circular reference, but Element.unlink()
        # breaks the cycle since the references to the attribute
        # dictionaries are tossed.
        attr.ownerElement = self
        if old is not attr:
            # It might have already been part of this node, in which case
            # it doesn't represent a change, and should not be returned.
            return old
    def removeAttribute(self, name):
        attr = self._attrs[name]
@ -334,16 +433,16 @@ class Element(Node):
    def __repr__(self):
        return "<DOM Element: %s at %s>" % (self.tagName, id(self))
    # undocumented
    def writexml(self, writer):
        writer.write("<" + self.tagName)
-        a_names = self._get_attributes().keys()
+        attrs = self._get_attributes()
        a_names = attrs.keys()
        a_names.sort()
        for a_name in a_names:
            writer.write(" %s=\"" % a_name)
-            _write_data(writer, self._get_attributes()[a_name].value)
+            _write_data(writer, attrs[a_name].value)
            writer.write("\"")
        if self.childNodes:
            writer.write(">")
@ -358,24 +457,24 @@ class Element(Node):
 class Comment(Node):
    nodeType = Node.COMMENT_NODE
    nodeName = "#comment"
    attributes = None
    def __init__(self, data):
        Node.__init__(self)
        self.data = self.nodeValue = data
        self.nodeName = "#comment"
        self.attributes = None
    def writexml(self, writer):
        writer.write("<!--%s-->" % self.data)
 class ProcessingInstruction(Node):
    nodeType = Node.PROCESSING_INSTRUCTION_NODE
    attributes = None
    def __init__(self, target, data):
        Node.__init__(self)
        self.target = self.nodeName = target
        self.data = self.nodeValue = data
        self.attributes = None
    def writexml(self, writer):
        writer.write("<?%s %s?>" % (self.target, self.data))
@ -383,11 +482,11 @@ class ProcessingInstruction(Node):
 class Text(Node):
    nodeType = Node.TEXT_NODE
    nodeName = "#text"
    attributes = None
    def __init__(self, data):
        Node.__init__(self)
        self.data = self.nodeValue = data
        self.attributes = None
    def __repr__(self):
        if len(self.data) > 10:
@ -400,8 +499,7 @@ class Text(Node):
        _write_data(writer, self.data)
 def _nssplit(qualifiedName):
-    import string
+    fields = _string.split(qualifiedName, ':', 1)
    fields = string.split(qualifiedName,':', 1)
    if len(fields) == 2:
        return fields
    elif len(fields) == 1:
@ -409,22 +507,18 @@ def _nssplit(qualifiedName):
 class Document(Node):
    nodeType = Node.DOCUMENT_NODE
    nodeName = "#document"
    nodeValue = None
    attributes = None
    documentElement = None
    def __init__(self):
        Node.__init__(self)
        self.attributes = None
        self.nodeName = "#document"
        self.nodeValue = None
    def appendChild(self, node):
        if node.nodeType == Node.ELEMENT_NODE:
            if self.documentElement:
                raise TypeError, "Two document elements disallowed"
            else:
                self.documentElement = node
-        Node.appendChild(self, node)
+        return Node.appendChild(self, node)
        return node
    createElement = Element
@ -438,11 +532,13 @@ class Document(Node):
    def createElementNS(self, namespaceURI, qualifiedName):
        prefix, localName = _nssplit(qualifiedName)
-        return Element(qualifiedName, namespaceURI, prefix, localName)
+        return self.createElement(qualifiedName, namespaceURI,
                                  prefix, localName)
    def createAttributeNS(self, namespaceURI, qualifiedName):
        prefix, localName = _nssplit(qualifiedName)
-        return Attr(qualifiedName, namespaceURI, localName, prefix)
+        return self.createAttribute(qualifiedName, namespaceURI,
                                    localName, prefix)
    def getElementsByTagNameNS(self, namespaceURI, localName):
        _getElementsByTagNameNSHelper(self, namespaceURI, localName)
@ -460,6 +556,13 @@ class Document(Node):
        for node in self.childNodes:
            node.writexml(writer)
 def _get_StringIO():
    try:
        from cStringIO import StringIO
    except ImportError:
        from StringIO import StringIO
    return StringIO()
 def _doparse(func, args, kwargs):
    events = apply(func, args, kwargs)
    toktype, rootNode = events.getEvent()
@ -468,8 +571,10 @@ def _doparse(func, args, kwargs):
 def parse(*args, **kwargs):
    "Parse a file into a DOM by filename or file object"
    from xml.dom import pulldom
    return _doparse(pulldom.parse, args, kwargs)
 def parseString(*args, **kwargs):
    "Parse a file into a DOM from a string"
    from xml.dom import pulldom
    return _doparse(pulldom.parseString, args, kwargs)