476 lines
14 KiB
Python
476 lines
14 KiB
Python
"""\
|
|
minidom.py -- a lightweight DOM implementation based on SAX.
|
|
|
|
parse( "foo.xml" )
|
|
|
|
parseString( "<foo><bar/></foo>" )
|
|
|
|
Todo:
|
|
=====
|
|
* convenience methods for getting elements and text.
|
|
* more testing
|
|
* bring some of the writer and linearizer code into conformance with this
|
|
interface
|
|
* SAX 2 namespaces
|
|
"""
|
|
|
|
import pulldom
|
|
import string
|
|
from StringIO import StringIO
|
|
import types
|
|
|
|
class Node:
|
|
ELEMENT_NODE = 1
|
|
ATTRIBUTE_NODE = 2
|
|
TEXT_NODE = 3
|
|
CDATA_SECTION_NODE = 4
|
|
ENTITY_REFERENCE_NODE = 5
|
|
ENTITY_NODE = 6
|
|
PROCESSING_INSTRUCTION_NODE = 7
|
|
COMMENT_NODE = 8
|
|
DOCUMENT_NODE = 9
|
|
DOCUMENT_TYPE_NODE = 10
|
|
DOCUMENT_FRAGMENT_NODE = 11
|
|
NOTATION_NODE = 12
|
|
|
|
allnodes = {}
|
|
_debug = 0
|
|
_makeParentNodes = 1
|
|
debug = None
|
|
|
|
def __init__(self):
|
|
self.childNodes = []
|
|
if Node._debug:
|
|
index = repr(id(self)) + repr(self.__class__)
|
|
Node.allnodes[index] = repr(self.__dict__)
|
|
if Node.debug is None:
|
|
Node.debug = StringIO()
|
|
#open( "debug4.out", "w" )
|
|
Node.debug.write("create %s\n" % index)
|
|
|
|
def __getattr__(self, key):
|
|
if key[0:2] == "__":
|
|
raise AttributeError
|
|
# getattr should never call getattr!
|
|
if self.__dict__.has_key("inGetAttr"):
|
|
del self.inGetAttr
|
|
raise AttributeError, key
|
|
|
|
prefix, attrname = key[:5], key[5:]
|
|
if prefix == "_get_":
|
|
self.inGetAttr = 1
|
|
if hasattr(self, attrname):
|
|
del self.inGetAttr
|
|
return (lambda self=self, attrname=attrname:
|
|
getattr(self, attrname))
|
|
else:
|
|
del self.inGetAttr
|
|
raise AttributeError, key
|
|
else:
|
|
self.inGetAttr = 1
|
|
try:
|
|
func = getattr(self, "_get_" + key)
|
|
except AttributeError:
|
|
raise AttributeError, key
|
|
del self.inGetAttr
|
|
return func()
|
|
|
|
def __nonzero__(self):
|
|
return 1
|
|
|
|
def toxml(self):
|
|
writer = StringIO()
|
|
self.writexml(writer)
|
|
return writer.getvalue()
|
|
|
|
def hasChildNodes(self):
|
|
if self.childNodes:
|
|
return 1
|
|
else:
|
|
return 0
|
|
|
|
def _get_firstChild(self):
|
|
return self.childNodes[0]
|
|
|
|
def _get_lastChild(self):
|
|
return self.childNodes[-1]
|
|
|
|
def insertBefore(self, newChild, refChild):
|
|
index = self.childNodes.index(refChild)
|
|
self.childNodes.insert(index, newChild)
|
|
if self._makeParentNodes:
|
|
newChild.parentNode = self
|
|
|
|
def appendChild(self, node):
|
|
if self.childNodes:
|
|
last = self.lastChild
|
|
node.previousSibling = last
|
|
last.nextSibling = node
|
|
else:
|
|
node.previousSibling = None
|
|
node.nextSibling = None
|
|
self.childNodes.append(node)
|
|
return node
|
|
|
|
def replaceChild(self, newChild, oldChild):
|
|
index = self.childNodes.index(oldChild)
|
|
self.childNodes[index] = oldChild
|
|
|
|
def removeChild(self, oldChild):
|
|
index = self.childNodes.index(oldChild)
|
|
del self.childNodes[index]
|
|
|
|
def cloneNode(self, deep):
|
|
import new
|
|
clone = new.instance(self.__class__, self.__dict__)
|
|
clone.attributes = self.attributes.copy()
|
|
if not deep:
|
|
clone.childNodes = []
|
|
else:
|
|
clone.childNodes = map(lambda x: x.cloneNode, self.childNodes)
|
|
return clone
|
|
|
|
def unlink(self):
|
|
self.parentNode = None
|
|
while self.childNodes:
|
|
self.childNodes[-1].unlink()
|
|
del self.childNodes[-1] # probably not most efficient!
|
|
self.childNodes = None
|
|
self.previousSibling = None
|
|
self.nextSibling = None
|
|
if self.attributes:
|
|
for attr in self._attrs.values():
|
|
self.removeAttributeNode(attr)
|
|
assert not len(self._attrs)
|
|
assert not len(self._attrsNS)
|
|
if Node._debug:
|
|
index = repr(id(self)) + repr(self.__class__)
|
|
self.debug.write("Deleting: %s\n" % index)
|
|
del Node.allnodes[index]
|
|
|
|
def _write_data(writer, data):
|
|
"Writes datachars to writer."
|
|
data = string.replace(data, "&", "&")
|
|
data = string.replace(data, "<", "<")
|
|
data = string.replace(data, "\"", """)
|
|
data = string.replace(data, ">", ">")
|
|
writer.write(data)
|
|
|
|
def _getElementsByTagNameHelper(parent, name, rc):
|
|
for node in parent.childNodes:
|
|
if node.nodeType == Node.ELEMENT_NODE and \
|
|
(name == "*" or node.tagName == name):
|
|
rc.append(node)
|
|
_getElementsByTagNameHelper(node, name, rc)
|
|
return rc
|
|
|
|
def _getElementsByTagNameNSHelper(parent, nsURI, localName, rc):
|
|
for node in parent.childNodes:
|
|
if node.nodeType == Node.ELEMENT_NODE:
|
|
if ((localName == "*" or node.tagName == localName) and
|
|
(nsURI == "*" or node.namespaceURI == nsURI)):
|
|
rc.append(node)
|
|
_getElementsByTagNameNSHelper(node, name, rc)
|
|
|
|
class Attr(Node):
|
|
nodeType = Node.ATTRIBUTE_NODE
|
|
|
|
def __init__(self, qName, namespaceURI="", localName=None, prefix=None):
|
|
# skip setattr for performance
|
|
self.__dict__["localName"] = localName or qName
|
|
self.__dict__["nodeName"] = self.__dict__["name"] = qName
|
|
self.__dict__["namespaceURI"] = namespaceURI
|
|
self.__dict__["prefix"] = prefix
|
|
self.attributes = None
|
|
Node.__init__(self)
|
|
# nodeValue and value are set elsewhere
|
|
|
|
def __setattr__(self, name, value):
|
|
if name in ("value", "nodeValue"):
|
|
self.__dict__["value"] = self.__dict__["nodeValue"] = value
|
|
else:
|
|
self.__dict__[name] = value
|
|
|
|
class AttributeList:
|
|
"""the attribute list is a transient interface to the underlying
|
|
dictionaries. mutations here will change the underlying element's
|
|
dictionary"""
|
|
def __init__(self, attrs, attrsNS):
|
|
self._attrs = attrs
|
|
self._attrsNS = attrsNS
|
|
self.length = len(self._attrs.keys())
|
|
|
|
def item(self, index):
|
|
try:
|
|
return self[self.keys()[index]]
|
|
except IndexError:
|
|
return None
|
|
|
|
def items(self):
|
|
return map(lambda node: (node.tagName, node.value),
|
|
self._attrs.values())
|
|
|
|
def itemsNS(self):
|
|
return map(lambda node: ((node.URI, node.localName), node.value),
|
|
self._attrs.values())
|
|
|
|
def keys(self):
|
|
return self._attrs.keys()
|
|
|
|
def keysNS(self):
|
|
return self._attrsNS.keys()
|
|
|
|
def values(self):
|
|
return self._attrs.values()
|
|
|
|
def __len__(self):
|
|
return self.length
|
|
|
|
def __cmp__(self, other):
|
|
if self._attrs is getattr(other, "_attrs", None):
|
|
return 0
|
|
else:
|
|
return cmp(id(self), id(other))
|
|
|
|
#FIXME: is it appropriate to return .value?
|
|
def __getitem__(self, attname_or_tuple):
|
|
if type(attname_or_tuple) is types.TupleType:
|
|
return self._attrsNS[attname_or_tuple]
|
|
else:
|
|
return self._attrs[attname_or_tuple]
|
|
|
|
# same as set
|
|
def __setitem__(self, attname, value):
|
|
if type(value) is types.StringType:
|
|
node = Attr(attname)
|
|
node.value=value
|
|
else:
|
|
assert isinstance(value, Attr) or type(value) is types.StringType
|
|
node = value
|
|
old = self._attrs.get(attname, None)
|
|
if old:
|
|
old.unlink()
|
|
self._attrs[node.name] = node
|
|
self._attrsNS[(node.namespaceURI, node.localName)] = node
|
|
|
|
def __delitem__(self, attname_or_tuple):
|
|
node = self[attname_or_tuple]
|
|
node.unlink()
|
|
del self._attrs[node.name]
|
|
del self._attrsNS[(node.namespaceURI, node.localName)]
|
|
|
|
class Element(Node):
|
|
nodeType = Node.ELEMENT_NODE
|
|
|
|
def __init__(self, tagName, namespaceURI="", prefix="",
|
|
localName=None):
|
|
Node.__init__(self)
|
|
self.tagName = self.nodeName = tagName
|
|
self.localName = localName or tagName
|
|
self.prefix = prefix
|
|
self.namespaceURI = namespaceURI
|
|
self.nodeValue = None
|
|
|
|
self._attrs={} # attributes are double-indexed:
|
|
self._attrsNS={}# tagName -> Attribute
|
|
# URI,localName -> Attribute
|
|
# in the future: consider lazy generation of attribute objects
|
|
# this is too tricky for now because of headaches
|
|
# with namespaces.
|
|
|
|
def getAttribute(self, attname):
|
|
return self._attrs[attname].value
|
|
|
|
def getAttributeNS(self, namespaceURI, localName):
|
|
return self._attrsNS[(namespaceURI, localName)].value
|
|
|
|
def setAttribute(self, attname, value):
|
|
attr = Attr(attname)
|
|
# for performance
|
|
attr.__dict__["value"] = attr.__dict__["nodeValue"] = value
|
|
self.setAttributeNode(attr)
|
|
|
|
def setAttributeNS(self, namespaceURI, qualifiedName, value):
|
|
prefix, localname = _nssplit(qualifiedName)
|
|
# for performance
|
|
attr = Attr(qualifiedName, namespaceURI, localname, prefix)
|
|
attr.__dict__["value"] = attr.__dict__["nodeValue"] = value
|
|
self.setAttributeNode(attr)
|
|
# FIXME: return original node if something changed.
|
|
|
|
def getAttributeNode(self, attrname):
|
|
return self._attrs.get(attrname)
|
|
|
|
def getAttributeNodeNS(self, namespaceURI, localName):
|
|
return self._attrsNS[(namespaceURI, localName)]
|
|
|
|
def setAttributeNode(self, attr):
|
|
old = self._attrs.get(attr.name, None)
|
|
if old:
|
|
old.unlink()
|
|
self._attrs[attr.name] = attr
|
|
self._attrsNS[(attr.namespaceURI, attr.localName)] = attr
|
|
# FIXME: return old value if something changed
|
|
|
|
def removeAttribute(self, name):
|
|
attr = self._attrs[name]
|
|
self.removeAttributeNode(attr)
|
|
|
|
def removeAttributeNS(self, namespaceURI, localName):
|
|
attr = self._attrsNS[(namespaceURI, localName)]
|
|
self.removeAttributeNode(attr)
|
|
|
|
def removeAttributeNode(self, node):
|
|
node.unlink()
|
|
del self._attrs[node.name]
|
|
del self._attrsNS[(node.namespaceURI, node.localName)]
|
|
|
|
def getElementsByTagName(self, name):
|
|
return _getElementsByTagNameHelper(self, name, [])
|
|
|
|
def getElementsByTagNameNS(self, namespaceURI, localName):
|
|
_getElementsByTagNameNSHelper(self, namespaceURI, localName, [])
|
|
|
|
def __repr__(self):
|
|
return "<DOM Element: %s at %s>" % (self.tagName, id(self))
|
|
|
|
# undocumented
|
|
def writexml(self, writer):
|
|
writer.write("<" + self.tagName)
|
|
|
|
a_names = self._get_attributes().keys()
|
|
a_names.sort()
|
|
|
|
for a_name in a_names:
|
|
writer.write(" %s=\"" % a_name)
|
|
_write_data(writer, self._get_attributes()[a_name].value)
|
|
writer.write("\"")
|
|
if self.childNodes:
|
|
writer.write(">")
|
|
for node in self.childNodes:
|
|
node.writexml(writer)
|
|
writer.write("</%s>" % self.tagName)
|
|
else:
|
|
writer.write("/>")
|
|
|
|
def _get_attributes(self):
|
|
return AttributeList(self._attrs, self._attrsNS)
|
|
|
|
class Comment(Node):
|
|
nodeType = Node.COMMENT_NODE
|
|
|
|
def __init__(self, data):
|
|
Node.__init__(self)
|
|
self.data = self.nodeValue = data
|
|
self.nodeName = "#comment"
|
|
self.attributes = None
|
|
|
|
def writexml(self, writer):
|
|
writer.write("<!--%s-->" % self.data)
|
|
|
|
class ProcessingInstruction(Node):
|
|
nodeType = Node.PROCESSING_INSTRUCTION_NODE
|
|
|
|
def __init__(self, target, data):
|
|
Node.__init__(self)
|
|
self.target = self.nodeName = target
|
|
self.data = self.nodeValue = data
|
|
self.attributes = None
|
|
|
|
def writexml(self, writer):
|
|
writer.write("<?%s %s?>" % (self.target, self.data))
|
|
|
|
class Text(Node):
|
|
nodeType = Node.TEXT_NODE
|
|
nodeName = "#text"
|
|
|
|
def __init__(self, data):
|
|
Node.__init__(self)
|
|
self.data = self.nodeValue = data
|
|
self.attributes = None
|
|
|
|
def __repr__(self):
|
|
if len(self.data) > 10:
|
|
dotdotdot = "..."
|
|
else:
|
|
dotdotdot = ""
|
|
return "<DOM Text node \"%s%s\">" % (self.data[0:10], dotdotdot)
|
|
|
|
def writexml(self, writer):
|
|
_write_data(writer, self.data)
|
|
|
|
def _nssplit(qualifiedName):
|
|
import string
|
|
fields = string.split(qualifiedName,':', 1)
|
|
if len(fields) == 2:
|
|
return fields
|
|
elif len(fields) == 1:
|
|
return ('', fields[0])
|
|
|
|
class Document(Node):
|
|
nodeType = Node.DOCUMENT_NODE
|
|
documentElement = None
|
|
|
|
def __init__(self):
|
|
Node.__init__(self)
|
|
self.attributes = None
|
|
self.nodeName = "#document"
|
|
self.nodeValue = None
|
|
|
|
def appendChild(self, node):
|
|
if node.nodeType == Node.ELEMENT_NODE:
|
|
if self.documentElement:
|
|
raise TypeError, "Two document elements disallowed"
|
|
else:
|
|
self.documentElement = node
|
|
Node.appendChild(self, node)
|
|
return node
|
|
|
|
createElement = Element
|
|
|
|
createTextNode = Text
|
|
|
|
createComment = Comment
|
|
|
|
createProcessingInstruction = ProcessingInstruction
|
|
|
|
createAttribute = Attr
|
|
|
|
def createElementNS(self, namespaceURI, qualifiedName):
|
|
prefix,localName = _nssplit(qualifiedName)
|
|
return Element(qualifiedName, namespaceURI, prefix, localName)
|
|
|
|
def createAttributeNS(self, namespaceURI, qualifiedName):
|
|
prefix,localName = _nssplit(qualifiedName)
|
|
return Attr(qualifiedName, namespaceURI, localName, prefix)
|
|
|
|
def getElementsByTagNameNS(self, namespaceURI, localName):
|
|
_getElementsByTagNameNSHelper(self, namespaceURI, localName)
|
|
|
|
def unlink(self):
|
|
self.documentElement = None
|
|
Node.unlink(self)
|
|
|
|
def getElementsByTagName(self, name):
|
|
rc = []
|
|
_getElementsByTagNameHelper(self, name, rc)
|
|
return rc
|
|
|
|
def writexml(self, writer):
|
|
for node in self.childNodes:
|
|
node.writexml(writer)
|
|
|
|
def _doparse(func, args, kwargs):
|
|
events = apply(func, args, kwargs)
|
|
toktype, rootNode = events.getEvent()
|
|
events.expandNode(rootNode)
|
|
return rootNode
|
|
|
|
def parse(*args, **kwargs):
|
|
"Parse a file into a DOM by filename or file object"
|
|
return _doparse(pulldom.parse, args, kwargs)
|
|
|
|
def parseString(*args, **kwargs):
|
|
"Parse a file into a DOM from a string"
|
|
return _doparse(pulldom.parseString, args, kwargs)
|