Reduce the visibility of imported modules for cleaner "from ... import *"

behavior.

Added support for the Attr.ownerElement attribute.

Everywhere:  Define constant object attributes in the classes rather than
on the instances during object construction.  This reduces the amount of
work needed for object construction and destruction; these need to be
lightweight operations on a DOM.

Node._get_firstChild(),
Node._get_lastChild():  Return None if there are no children (required for
        compliance with DOM level 1).

Node.insertBefore():  If refChild is None, append the new node instead of
        failing (required for compliance).  Also, update the sibling
        relationships.  Return the inserted node (required for compliance).

Node.appendChild():  Update the parent of the appended node.

Node.replaceChild():  Actually replace the old child!  Update the parent
        and sibling relationships of both the old and new children.  Return
        the replaced child (required for compliance).

Node.normalize():  Implemented the normalize() method.  Required for
        compliance, but missing from the release.  Useful for joining
        adjacent Text nodes into a single node for easier processing.

Node.cloneNode():  Actually make this work.  Don't let the new node share
        the instance __dict__ with the original.  Do proper recursion if
        doing a "deep" clone.  Move the attribute cloning out of the base
        class, since only Element is supposed to have attributes.

Node.unlink():  Simplify handling of child nodes for efficiency, and
        remove the attribute handling since only Element nodes support
        attributes.

Attr.cloneNode():  Extend this to clear the ownerElement attribute in
        the clone.

AttributeList.items(),
AttributeList.itemsNS():  Slight performance improvement (avoid lambda).

Element.cloneNode():  Extend Node.cloneNode() with support for the
        attributes.  Clone the Attr objects after creating the underlying
        clone.

Element.unlink():  Clean out the attributes here instead of in the base
        class, since this is the only class that will have them.

Element.toxml():  Adjust to create only one AttributeList instance; minor
        efficiency improvement.

_nssplit():  No need to re-import string.

Document.__init__():  No longer needed once constant attributes are
        initialized in the class itself.

Document.createElementNS(),
Document.createAttributeNS():  Use the defined constructors rather than
        directly access the classes.

_get_StringIO():  New function.  Create an output StringIO using the most
        efficient available flavor.

parse(),
parseString():  Import pulldom here instead of in the public namespace of
        the module.
This commit is contained in:
Fred Drake 2000-11-21 22:02:22 +00:00
parent 707e964734
commit 4ccf4a1e8a
1 changed files with 181 additions and 76 deletions

View File

@ -14,10 +14,19 @@ Todo:
* SAX 2 namespaces * SAX 2 namespaces
""" """
import pulldom
import string import string
from StringIO import StringIO _string = string
del string
# localize the types, and allow support for Unicode values if available:
import types import types
_TupleType = types.TupleType
try:
_StringTypes = (types.StringType, types.UnicodeType)
except AttributeError:
_StringTypes = (types.StringType,)
del types
class Node: class Node:
ELEMENT_NODE = 1 ELEMENT_NODE = 1
@ -44,7 +53,7 @@ class Node:
index = repr(id(self)) + repr(self.__class__) index = repr(id(self)) + repr(self.__class__)
Node.allnodes[index] = repr(self.__dict__) Node.allnodes[index] = repr(self.__dict__)
if Node.debug is None: if Node.debug is None:
Node.debug = StringIO() Node.debug = _get_StringIO()
#open( "debug4.out", "w" ) #open( "debug4.out", "w" )
Node.debug.write("create %s\n" % index) Node.debug.write("create %s\n" % index)
@ -79,7 +88,7 @@ class Node:
return 1 return 1
def toxml(self): def toxml(self):
writer = StringIO() writer = _get_StringIO()
self.writexml(writer) self.writexml(writer)
return writer.getvalue() return writer.getvalue()
@ -90,16 +99,30 @@ class Node:
return 0 return 0
def _get_firstChild(self): def _get_firstChild(self):
if self.childNodes:
return self.childNodes[0] return self.childNodes[0]
def _get_lastChild(self): def _get_lastChild(self):
if self.childNodes:
return self.childNodes[-1] return self.childNodes[-1]
def insertBefore(self, newChild, refChild): def insertBefore(self, newChild, refChild):
if refChild is None:
self.appendChild(newChild)
else:
index = self.childNodes.index(refChild) index = self.childNodes.index(refChild)
self.childNodes.insert(index, newChild) self.childNodes.insert(index, newChild)
newChild.nextSibling = refChild
refChild.previousSibling = newChild
if index:
node = self.childNodes[index-1]
node.nextSibling = newChild
newChild.previousSibling = node
else:
newChild.previousSibling = None
if self._makeParentNodes: if self._makeParentNodes:
newChild.parentNode = self newChild.parentNode = self
return newChild
def appendChild(self, node): def appendChild(self, node):
if self.childNodes: if self.childNodes:
@ -110,39 +133,69 @@ class Node:
node.previousSibling = None node.previousSibling = None
node.nextSibling = None node.nextSibling = None
self.childNodes.append(node) self.childNodes.append(node)
if self._makeParentNodes:
node.parentNode = self
return node return node
def replaceChild(self, newChild, oldChild): def replaceChild(self, newChild, oldChild):
if newChild is oldChild:
return
index = self.childNodes.index(oldChild) index = self.childNodes.index(oldChild)
self.childNodes[index] = oldChild self.childNodes[index] = newChild
if self._makeParentNodes:
newChild.parentNode = self
oldChild.parentNode = None
newChild.nextSibling = oldChild.nextSibling
newChild.previousSibling = oldChild.previousSibling
oldChild.newChild = None
oldChild.previousSibling = None
return oldChild
def removeChild(self, oldChild): def removeChild(self, oldChild):
index = self.childNodes.index(oldChild) self.childNodes.remove(oldChild)
del self.childNodes[index] if self._makeParentNodes:
oldChild.parentNode = None
return oldChild
def normalize(self):
if len(self.childNodes) > 1:
L = [self.childNodes[0]]
for child in self.childNodes[1:]:
if ( child.nodeType == Node.TEXT_NODE
and L[-1].nodeType == child.nodeType):
# collapse text node
node = L[-1]
node.data = node.nodeValue = node.data + child.data
node.nextSibling = child.nextSibling
child.unlink()
else:
L[-1].nextSibling = child
child.previousSibling = L[-1]
L.append(child)
child.normalize()
self.childNodes = L
elif self.childNodes:
# exactly one child -- just recurse
self.childNodes[0].normalize()
def cloneNode(self, deep): def cloneNode(self, deep):
import new import new
clone = new.instance(self.__class__, self.__dict__) clone = new.instance(self.__class__, self.__dict__.copy())
clone.attributes = self.attributes.copy() if self._makeParentNodes:
if not deep: clone.parentNode = None
clone.childNodes = [] clone.childNodes = []
else: if deep:
clone.childNodes = map(lambda x: x.cloneNode, self.childNodes) for child in self.childNodes:
clone.appendChild(child.cloneNode(1))
return clone return clone
def unlink(self): def unlink(self):
self.parentNode = None self.parentNode = None
while self.childNodes: for child in self.childNodes:
self.childNodes[-1].unlink() child.unlink()
del self.childNodes[-1] # probably not most efficient!
self.childNodes = None self.childNodes = None
self.previousSibling = None self.previousSibling = None
self.nextSibling = None self.nextSibling = None
if self.attributes:
for attr in self._attrs.values():
self.removeAttributeNode(attr)
assert not len(self._attrs)
assert not len(self._attrsNS)
if Node._debug: if Node._debug:
index = repr(id(self)) + repr(self.__class__) index = repr(id(self)) + repr(self.__class__)
self.debug.write("Deleting: %s\n" % index) self.debug.write("Deleting: %s\n" % index)
@ -150,10 +203,11 @@ class Node:
def _write_data(writer, data): def _write_data(writer, data):
"Writes datachars to writer." "Writes datachars to writer."
data = string.replace(data, "&", "&") replace = _string.replace
data = string.replace(data, "<", "&lt;") data = replace(data, "&", "&amp;")
data = string.replace(data, "\"", "&quot;") data = replace(data, "<", "&lt;")
data = string.replace(data, ">", "&gt;") data = replace(data, "\"", "&quot;")
data = replace(data, ">", "&gt;")
writer.write(data) writer.write(data)
def _getElementsByTagNameHelper(parent, name, rc): def _getElementsByTagNameHelper(parent, name, rc):
@ -174,14 +228,16 @@ def _getElementsByTagNameNSHelper(parent, nsURI, localName, rc):
class Attr(Node): class Attr(Node):
nodeType = Node.ATTRIBUTE_NODE nodeType = Node.ATTRIBUTE_NODE
attributes = None
ownerElement = None
def __init__(self, qName, namespaceURI="", localName=None, prefix=None): def __init__(self, qName, namespaceURI="", localName=None, prefix=None):
# skip setattr for performance # skip setattr for performance
self.__dict__["localName"] = localName or qName d = self.__dict__
self.__dict__["nodeName"] = self.__dict__["name"] = qName d["localName"] = localName or qName
self.__dict__["namespaceURI"] = namespaceURI d["nodeName"] = d["name"] = qName
self.__dict__["prefix"] = prefix d["namespaceURI"] = namespaceURI
self.attributes = None d["prefix"] = prefix
Node.__init__(self) Node.__init__(self)
# nodeValue and value are set elsewhere # nodeValue and value are set elsewhere
@ -191,14 +247,21 @@ class Attr(Node):
else: else:
self.__dict__[name] = value self.__dict__[name] = value
def cloneNode(self, deep):
clone = Node.cloneNode(self, deep)
if clone.__dict__.has_key("ownerElement"):
del clone.ownerElement
return clone
class AttributeList: class AttributeList:
"""the attribute list is a transient interface to the underlying """The attribute list is a transient interface to the underlying
dictionaries. mutations here will change the underlying element's dictionaries. Mutations here will change the underlying element's
dictionary""" dictionary"""
def __init__(self, attrs, attrsNS): def __init__(self, attrs, attrsNS):
self._attrs = attrs self._attrs = attrs
self._attrsNS = attrsNS self._attrsNS = attrsNS
self.length = len(self._attrs.keys()) self.length = len(self._attrs)
def item(self, index): def item(self, index):
try: try:
@ -207,12 +270,16 @@ class AttributeList:
return None return None
def items(self): def items(self):
return map(lambda node: (node.tagName, node.value), L = []
self._attrs.values()) for node in self._attrs.values():
L.append((node.tagName, node.value))
return L
def itemsNS(self): def itemsNS(self):
return map(lambda node: ((node.URI, node.localName), node.value), L = []
self._attrs.values()) for node in self._attrs.values():
L.append(((node.URI, node.localName), node.value))
return L
def keys(self): def keys(self):
return self._attrs.keys() return self._attrs.keys()
@ -234,18 +301,19 @@ class AttributeList:
#FIXME: is it appropriate to return .value? #FIXME: is it appropriate to return .value?
def __getitem__(self, attname_or_tuple): def __getitem__(self, attname_or_tuple):
if type(attname_or_tuple) is types.TupleType: if type(attname_or_tuple) is _TupleType:
return self._attrsNS[attname_or_tuple] return self._attrsNS[attname_or_tuple]
else: else:
return self._attrs[attname_or_tuple] return self._attrs[attname_or_tuple]
# same as set # same as set
def __setitem__(self, attname, value): def __setitem__(self, attname, value):
if type(value) is types.StringType: if type(value) in _StringTypes:
node = Attr(attname) node = Attr(attname)
node.value = value node.value = value
else: else:
assert isinstance(value, Attr) or type(value) is types.StringType if not isinstance(value, Attr):
raise TypeError, "value must be a string or Attr object"
node = value node = value
old = self._attrs.get(attname, None) old = self._attrs.get(attname, None)
if old: if old:
@ -261,6 +329,8 @@ class AttributeList:
class Element(Node): class Element(Node):
nodeType = Node.ELEMENT_NODE nodeType = Node.ELEMENT_NODE
nextSibling = None
previousSibling = None
def __init__(self, tagName, namespaceURI="", prefix="", def __init__(self, tagName, namespaceURI="", prefix="",
localName=None): localName=None):
@ -274,9 +344,28 @@ class Element(Node):
self._attrs = {} # attributes are double-indexed: self._attrs = {} # attributes are double-indexed:
self._attrsNS = {} # tagName -> Attribute self._attrsNS = {} # tagName -> Attribute
# URI,localName -> Attribute # URI,localName -> Attribute
# in the future: consider lazy generation of attribute objects # in the future: consider lazy generation
# this is too tricky for now because of headaches # of attribute objects this is too tricky
# with namespaces. # for now because of headaches with
# namespaces.
def cloneNode(self, deep):
clone = Node.cloneNode(self, deep)
clone._attrs = {}
clone._attrsNS = {}
for attr in self._attrs.values():
node = attr.cloneNode(1)
clone._attrs[node.name] = node
clone._attrsNS[(node.namespaceURI, node.localName)] = node
node.ownerElement = clone
return clone
def unlink(self):
for attr in self._attrs.values():
attr.unlink()
self._attrs = None
self._attrsNS = None
Node.unlink(self)
def getAttribute(self, attname): def getAttribute(self, attname):
return self._attrs[attname].value return self._attrs[attname].value
@ -296,7 +385,6 @@ class Element(Node):
attr = Attr(qualifiedName, namespaceURI, localname, prefix) attr = Attr(qualifiedName, namespaceURI, localname, prefix)
attr.__dict__["value"] = attr.__dict__["nodeValue"] = value attr.__dict__["value"] = attr.__dict__["nodeValue"] = value
self.setAttributeNode(attr) self.setAttributeNode(attr)
# FIXME: return original node if something changed.
def getAttributeNode(self, attrname): def getAttributeNode(self, attrname):
return self._attrs.get(attrname) return self._attrs.get(attrname)
@ -305,12 +393,23 @@ class Element(Node):
return self._attrsNS[(namespaceURI, localName)] return self._attrsNS[(namespaceURI, localName)]
def setAttributeNode(self, attr): def setAttributeNode(self, attr):
if attr.ownerElement not in (None, self):
raise ValueError, "attribute node already owned"
old = self._attrs.get(attr.name, None) old = self._attrs.get(attr.name, None)
if old: if old:
old.unlink() old.unlink()
self._attrs[attr.name] = attr self._attrs[attr.name] = attr
self._attrsNS[(attr.namespaceURI, attr.localName)] = attr self._attrsNS[(attr.namespaceURI, attr.localName)] = attr
# FIXME: return old value if something changed
# This creates a circular reference, but Element.unlink()
# breaks the cycle since the references to the attribute
# dictionaries are tossed.
attr.ownerElement = self
if old is not attr:
# It might have already been part of this node, in which case
# it doesn't represent a change, and should not be returned.
return old
def removeAttribute(self, name): def removeAttribute(self, name):
attr = self._attrs[name] attr = self._attrs[name]
@ -334,16 +433,16 @@ class Element(Node):
def __repr__(self): def __repr__(self):
return "<DOM Element: %s at %s>" % (self.tagName, id(self)) return "<DOM Element: %s at %s>" % (self.tagName, id(self))
# undocumented
def writexml(self, writer): def writexml(self, writer):
writer.write("<" + self.tagName) writer.write("<" + self.tagName)
a_names = self._get_attributes().keys() attrs = self._get_attributes()
a_names = attrs.keys()
a_names.sort() a_names.sort()
for a_name in a_names: for a_name in a_names:
writer.write(" %s=\"" % a_name) writer.write(" %s=\"" % a_name)
_write_data(writer, self._get_attributes()[a_name].value) _write_data(writer, attrs[a_name].value)
writer.write("\"") writer.write("\"")
if self.childNodes: if self.childNodes:
writer.write(">") writer.write(">")
@ -358,24 +457,24 @@ class Element(Node):
class Comment(Node): class Comment(Node):
nodeType = Node.COMMENT_NODE nodeType = Node.COMMENT_NODE
nodeName = "#comment"
attributes = None
def __init__(self, data): def __init__(self, data):
Node.__init__(self) Node.__init__(self)
self.data = self.nodeValue = data self.data = self.nodeValue = data
self.nodeName = "#comment"
self.attributes = None
def writexml(self, writer): def writexml(self, writer):
writer.write("<!--%s-->" % self.data) writer.write("<!--%s-->" % self.data)
class ProcessingInstruction(Node): class ProcessingInstruction(Node):
nodeType = Node.PROCESSING_INSTRUCTION_NODE nodeType = Node.PROCESSING_INSTRUCTION_NODE
attributes = None
def __init__(self, target, data): def __init__(self, target, data):
Node.__init__(self) Node.__init__(self)
self.target = self.nodeName = target self.target = self.nodeName = target
self.data = self.nodeValue = data self.data = self.nodeValue = data
self.attributes = None
def writexml(self, writer): def writexml(self, writer):
writer.write("<?%s %s?>" % (self.target, self.data)) writer.write("<?%s %s?>" % (self.target, self.data))
@ -383,11 +482,11 @@ class ProcessingInstruction(Node):
class Text(Node): class Text(Node):
nodeType = Node.TEXT_NODE nodeType = Node.TEXT_NODE
nodeName = "#text" nodeName = "#text"
attributes = None
def __init__(self, data): def __init__(self, data):
Node.__init__(self) Node.__init__(self)
self.data = self.nodeValue = data self.data = self.nodeValue = data
self.attributes = None
def __repr__(self): def __repr__(self):
if len(self.data) > 10: if len(self.data) > 10:
@ -400,8 +499,7 @@ class Text(Node):
_write_data(writer, self.data) _write_data(writer, self.data)
def _nssplit(qualifiedName): def _nssplit(qualifiedName):
import string fields = _string.split(qualifiedName, ':', 1)
fields = string.split(qualifiedName,':', 1)
if len(fields) == 2: if len(fields) == 2:
return fields return fields
elif len(fields) == 1: elif len(fields) == 1:
@ -409,22 +507,18 @@ def _nssplit(qualifiedName):
class Document(Node): class Document(Node):
nodeType = Node.DOCUMENT_NODE nodeType = Node.DOCUMENT_NODE
nodeName = "#document"
nodeValue = None
attributes = None
documentElement = None documentElement = None
def __init__(self):
Node.__init__(self)
self.attributes = None
self.nodeName = "#document"
self.nodeValue = None
def appendChild(self, node): def appendChild(self, node):
if node.nodeType == Node.ELEMENT_NODE: if node.nodeType == Node.ELEMENT_NODE:
if self.documentElement: if self.documentElement:
raise TypeError, "Two document elements disallowed" raise TypeError, "Two document elements disallowed"
else: else:
self.documentElement = node self.documentElement = node
Node.appendChild(self, node) return Node.appendChild(self, node)
return node
createElement = Element createElement = Element
@ -438,11 +532,13 @@ class Document(Node):
def createElementNS(self, namespaceURI, qualifiedName): def createElementNS(self, namespaceURI, qualifiedName):
prefix, localName = _nssplit(qualifiedName) prefix, localName = _nssplit(qualifiedName)
return Element(qualifiedName, namespaceURI, prefix, localName) return self.createElement(qualifiedName, namespaceURI,
prefix, localName)
def createAttributeNS(self, namespaceURI, qualifiedName): def createAttributeNS(self, namespaceURI, qualifiedName):
prefix, localName = _nssplit(qualifiedName) prefix, localName = _nssplit(qualifiedName)
return Attr(qualifiedName, namespaceURI, localName, prefix) return self.createAttribute(qualifiedName, namespaceURI,
localName, prefix)
def getElementsByTagNameNS(self, namespaceURI, localName): def getElementsByTagNameNS(self, namespaceURI, localName):
_getElementsByTagNameNSHelper(self, namespaceURI, localName) _getElementsByTagNameNSHelper(self, namespaceURI, localName)
@ -460,6 +556,13 @@ class Document(Node):
for node in self.childNodes: for node in self.childNodes:
node.writexml(writer) node.writexml(writer)
def _get_StringIO():
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
return StringIO()
def _doparse(func, args, kwargs): def _doparse(func, args, kwargs):
events = apply(func, args, kwargs) events = apply(func, args, kwargs)
toktype, rootNode = events.getEvent() toktype, rootNode = events.getEvent()
@ -468,8 +571,10 @@ def _doparse(func, args, kwargs):
def parse(*args, **kwargs): def parse(*args, **kwargs):
"Parse a file into a DOM by filename or file object" "Parse a file into a DOM by filename or file object"
from xml.dom import pulldom
return _doparse(pulldom.parse, args, kwargs) return _doparse(pulldom.parse, args, kwargs)
def parseString(*args, **kwargs): def parseString(*args, **kwargs):
"Parse a file into a DOM from a string" "Parse a file into a DOM from a string"
from xml.dom import pulldom
return _doparse(pulldom.parseString, args, kwargs) return _doparse(pulldom.parseString, args, kwargs)