cpython/Lib/xml/dom/minidom.py

464 lines
14 KiB
Python

"""\
minidom.py -- a lightweight DOM implementation based on SAX.
parse( "foo.xml" )
parseString( "<foo><bar/></foo>" )
Todo:
=====
* convenience methods for getting elements and text.
* more testing
* bring some of the writer and linearizer code into conformance with this
interface
* SAX 2 namespaces
"""
import pulldom
import string
from StringIO import StringIO
import types
class Node:
ELEMENT_NODE = 1
ATTRIBUTE_NODE = 2
TEXT_NODE = 3
CDATA_SECTION_NODE = 4
ENTITY_REFERENCE_NODE = 5
ENTITY_NODE = 6
PROCESSING_INSTRUCTION_NODE = 7
COMMENT_NODE = 8
DOCUMENT_NODE = 9
DOCUMENT_TYPE_NODE = 10
DOCUMENT_FRAGMENT_NODE = 11
NOTATION_NODE = 12
allnodes = {}
_debug = 0
_makeParentNodes = 1
debug = None
def __init__(self):
self.childNodes = []
if Node._debug:
index = repr(id(self)) + repr(self.__class__)
Node.allnodes[index] = repr(self.__dict__)
if Node.debug is None:
Node.debug = StringIO()
#open( "debug4.out", "w" )
Node.debug.write("create %s\n" % index)
def __getattr__(self, key):
if key[0:2] == "__":
raise AttributeError
# getattr should never call getattr!
if self.__dict__.has_key("inGetAttr"):
del self.inGetAttr
raise AttributeError, key
prefix, attrname = key[:5], key[5:]
if prefix == "_get_":
self.inGetAttr = 1
if hasattr(self, attrname):
del self.inGetAttr
return (lambda self=self, attrname=attrname:
getattr(self, attrname))
else:
del self.inGetAttr
raise AttributeError, key
else:
self.inGetAttr = 1
try:
func = getattr(self, "_get_" + key)
except AttributeError:
raise AttributeError, key
del self.inGetAttr
return func()
def __nonzero__(self):
return 1
def toxml(self):
writer = StringIO()
self.writexml(writer)
return writer.getvalue()
def hasChildNodes(self):
if self.childNodes:
return 1
else:
return 0
def _get_firstChild(self):
return self.childNodes[0]
def _get_lastChild(self):
return self.childNodes[-1]
def insertBefore(self, newChild, refChild):
index = self.childNodes.index(refChild)
self.childNodes.insert(index, newChild)
if self._makeParentNodes:
newChild.parentNode = self
def appendChild(self, node):
self.childNodes.append(node)
return node
def replaceChild(self, newChild, oldChild):
index = self.childNodes.index(oldChild)
self.childNodes[index] = oldChild
def removeChild(self, oldChild):
index = self.childNodes.index(oldChild)
del self.childNodes[index]
def cloneNode(self, deep):
import new
clone = new.instance(self.__class__, self.__dict__)
clone.attributes = self.attributes.copy()
if not deep:
clone.childNodes = []
else:
clone.childNodes = map(lambda x: x.cloneNode, self.childNodes)
return clone
def unlink(self):
self.parentNode = None
while self.childNodes:
self.childNodes[-1].unlink()
del self.childNodes[-1] # probably not most efficient!
self.childNodes = None
if self.attributes:
for attr in self._attrs.values():
self.removeAttributeNode(attr)
assert not len(self._attrs)
assert not len(self._attrsNS)
if Node._debug:
index = repr(id(self)) + repr(self.__class__)
self.debug.write("Deleting: %s\n" % index)
del Node.allnodes[index]
def _write_data(writer, data):
"Writes datachars to writer."
data = string.replace(data, "&", "&amp;")
data = string.replace(data, "<", "&lt;")
data = string.replace(data, "\"", "&quot;")
data = string.replace(data, ">", "&gt;")
writer.write(data)
def _getElementsByTagNameHelper(parent, name, rc):
for node in parent.childNodes:
if node.nodeType == Node.ELEMENT_NODE and \
(name == "*" or node.tagName == name):
rc.append(node)
_getElementsByTagNameHelper(node, name, rc)
return rc
def _getElementsByTagNameNSHelper(parent, nsURI, localName, rc):
for node in parent.childNodes:
if node.nodeType == Node.ELEMENT_NODE:
if ((localName == "*" or node.tagName == localName) and
(nsURI == "*" or node.namespaceURI == nsURI)):
rc.append(node)
_getElementsByTagNameNSHelper(node, name, rc)
class Attr(Node):
nodeType = Node.ATTRIBUTE_NODE
def __init__(self, qName, namespaceURI="", localName=None, prefix=None):
# skip setattr for performance
self.__dict__["localName"] = localName or qName
self.__dict__["nodeName"] = self.__dict__["name"] = qName
self.__dict__["namespaceURI"] = namespaceURI
self.__dict__["prefix"] = prefix
self.attributes = None
Node.__init__(self)
# nodeValue and value are set elsewhere
def __setattr__(self, name, value):
if name in ("value", "nodeValue"):
self.__dict__["value"] = self.__dict__["nodeValue"] = value
else:
self.__dict__[name] = value
class AttributeList:
"""the attribute list is a transient interface to the underlying
dictionaries. mutations here will change the underlying element's
dictionary"""
def __init__(self, attrs, attrsNS):
self._attrs = attrs
self._attrsNS = attrsNS
self.length = len(self._attrs.keys())
def item(self, index):
try:
return self[self.keys()[index]]
except IndexError:
return None
def items(self):
return map(lambda node: (node.tagName, node.value),
self._attrs.values())
def itemsNS(self):
return map(lambda node: ((node.URI, node.localName), node.value),
self._attrs.values())
def keys(self):
return self._attrs.keys()
def keysNS(self):
return self._attrsNS.keys()
def values(self):
return self._attrs.values()
def __len__(self):
return self.length
def __cmp__(self, other):
if self._attrs is getattr(other, "_attrs", None):
return 0
else:
return cmp(id(self), id(other))
#FIXME: is it appropriate to return .value?
def __getitem__(self, attname_or_tuple):
if type(attname_or_tuple) is types.TupleType:
return self._attrsNS[attname_or_tuple]
else:
return self._attrs[attname_or_tuple]
# same as set
def __setitem__(self, attname, value):
if type(value) is types.StringType:
node = Attr(attname)
node.value=value
else:
assert isinstance(value, Attr) or type(value) is types.StringType
node = value
old = self._attrs.get(attname, None)
if old:
old.unlink()
self._attrs[node.name] = node
self._attrsNS[(node.namespaceURI, node.localName)] = node
def __delitem__(self, attname_or_tuple):
node = self[attname_or_tuple]
node.unlink()
del self._attrs[node.name]
del self._attrsNS[(node.namespaceURI, node.localName)]
class Element( Node ):
nodeType = Node.ELEMENT_NODE
def __init__(self, tagName, namespaceURI="", prefix="",
localName=None):
Node.__init__(self)
self.tagName = self.nodeName = tagName
self.localName = localName or tagName
self.prefix = prefix
self.namespaceURI = namespaceURI
self.nodeValue = None
self._attrs={} # attributes are double-indexed:
self._attrsNS={}# tagName -> Attribute
# URI,localName -> Attribute
# in the future: consider lazy generation of attribute objects
# this is too tricky for now because of headaches
# with namespaces.
def getAttribute(self, attname):
return self._attrs[attname].value
def getAttributeNS(self, namespaceURI, localName):
return self._attrsNS[(namespaceURI, localName)].value
def setAttribute(self, attname, value):
attr = Attr(attname)
# for performance
attr.__dict__["value"] = attr.__dict__["nodeValue"] = value
self.setAttributeNode(attr)
def setAttributeNS(self, namespaceURI, qualifiedName, value):
prefix, localname = _nssplit(qualifiedName)
# for performance
attr = Attr(qualifiedName, namespaceURI, localname, prefix)
attr.__dict__["value"] = attr.__dict__["nodeValue"] = value
self.setAttributeNode(attr)
def getAttributeNode(self, attrname):
return self._attrs.get(attrname)
def getAttributeNodeNS(self, namespaceURI, localName):
return self._attrsNS[(namespaceURI, localName)]
def setAttributeNode(self, attr):
old = self._attrs.get(attr.name, None)
if old:
old.unlink()
self._attrs[attr.name] = attr
self._attrsNS[(attr.namespaceURI, attr.localName)] = attr
def removeAttribute(self, name):
attr = self._attrs[name]
self.removeAttributeNode(attr)
def removeAttributeNS(self, namespaceURI, localName):
attr = self._attrsNS[(namespaceURI, localName)]
self.removeAttributeNode(attr)
def removeAttributeNode(self, node):
node.unlink()
del self._attrs[node.name]
del self._attrsNS[(node.namespaceURI, node.localName)]
def getElementsByTagName(self, name):
return _getElementsByTagNameHelper(self, name, [])
def getElementsByTagNameNS(self, namespaceURI, localName):
_getElementsByTagNameNSHelper(self, namespaceURI, localName, [])
def __repr__(self):
return "<DOM Element: %s at %s>" % (self.tagName, id(self))
def writexml(self, writer):
writer.write("<" + self.tagName)
a_names = self._get_attributes().keys()
a_names.sort()
for a_name in a_names:
writer.write(" %s=\"" % a_name)
_write_data(writer, self._get_attributes()[a_name])
writer.write("\"")
if self.childNodes:
writer.write(">")
for node in self.childNodes:
node.writexml(writer)
writer.write("</%s>" % self.tagName)
else:
writer.write("/>")
def _get_attributes(self):
return AttributeList(self._attrs, self._attrsNS)
class Comment(Node):
nodeType = Node.COMMENT_NODE
def __init__(self, data):
Node.__init__(self)
self.data = self.nodeValue = data
self.nodeName = "#comment"
self.attributes = None
def writexml(self, writer):
writer.write("<!--%s-->" % self.data)
class ProcessingInstruction(Node):
nodeType = Node.PROCESSING_INSTRUCTION_NODE
def __init__(self, target, data):
Node.__init__(self)
self.target = self.nodeName = target
self.data = self.nodeValue = data
self.attributes = None
def writexml(self, writer):
writer.write("<?%s %s?>" % (self.target, self.data))
class Text(Node):
nodeType = Node.TEXT_NODE
nodeName = "#text"
def __init__(self, data):
Node.__init__(self)
self.data = self.nodeValue = data
self.attributes = None
def __repr__(self):
if len(self.data) > 10:
dotdotdot = "..."
else:
dotdotdot = ""
return "<DOM Text node \"%s%s\">" % (self.data[0:10], dotdotdot)
def writexml(self, writer):
_write_data(writer, self.data)
def _nssplit(qualifiedName):
fields = qualifiedName.split(':', 1)
if len(fields) == 2:
return fields
elif len(fields) == 1:
return ('', fields[0])
class Document(Node):
nodeType = Node.DOCUMENT_NODE
documentElement = None
def __init__(self):
Node.__init__(self)
self.attributes = None
self.nodeName = "#document"
self.nodeValue = None
def appendChild(self, node):
if node.nodeType == Node.ELEMENT_NODE:
if self.documentElement:
raise TypeError, "Two document elements disallowed"
else:
self.documentElement = node
Node.appendChild(self, node)
return node
createElement = Element
createTextNode = Text
createComment = Comment
createProcessingInstruction = ProcessingInstruction
createAttribute = Attr
def createElementNS(self, namespaceURI, qualifiedName):
prefix,localName = _nssplit(qualifiedName)
return Element(qualifiedName, namespaceURI, prefix, localName)
def createAttributeNS(self, namespaceURI, qualifiedName):
prefix,localName = _nssplit(qualifiedName)
return Attr(namespaceURI, qualifiedName, localName, prefix)
def getElementsByTagNameNS(self, namespaceURI, localName):
_getElementsByTagNameNSHelper(self, namespaceURI, localName)
def unlink(self):
self.documentElement = None
Node.unlink(self)
def getElementsByTagName(self, name):
rc = []
_getElementsByTagNameHelper(self, name, rc)
return rc
def writexml(self, writer):
for node in self.childNodes:
node.writexml(writer)
def _doparse(func, args, kwargs):
events = apply(func, args, kwargs)
toktype, rootNode = events.getEvent()
events.expandNode(rootNode)
return rootNode
def parse(*args, **kwargs):
"Parse a file into a DOM by filename or file object"
return _doparse(pulldom.parse, args, kwargs)
def parseString(*args, **kwargs):
"Parse a file into a DOM from a string"
return _doparse(pulldom.parseString, args, kwargs)