cpython/Lib/xml/dom/pulldom.py

239 lines
7.9 KiB
Python
Raw Normal View History

import minidom
2000-09-24 18:54:14 -03:00
import xml.sax,xml.sax.handler
2000-09-24 02:21:58 -03:00
START_ELEMENT = "START_ELEMENT"
END_ELEMENT = "END_ELEMENT"
COMMENT = "COMMENT"
START_DOCUMENT = "START_DOCUMENT"
END_DOCUMENT = "END_DOCUMENT"
PROCESSING_INSTRUCTION = "PROCESSING_INSTRUCTION"
IGNORABLE_WHITESPACE = "IGNORABLE_WHITESPACE"
CHARACTERS = "CHARACTERS"
2000-09-24 18:54:14 -03:00
class PullDOM(xml.sax.ContentHandler):
2000-09-24 02:21:58 -03:00
def __init__(self):
self.firstEvent = [None, None]
self.lastEvent = self.firstEvent
2000-09-24 18:54:14 -03:00
self._ns_contexts = [{}] # contains uri -> prefix dicts
self._current_context = self._ns_contexts[-1]
2000-09-24 02:21:58 -03:00
def setDocumentLocator(self, locator): pass
2000-09-24 18:54:14 -03:00
def startPrefixMapping(self, prefix, uri):
self._ns_contexts.append(self._current_context.copy())
self._current_context[uri] = prefix
def endPrefixMapping(self, prefix):
del self._ns_contexts[-1]
def startElementNS(self, name, tagName , attrs):
uri,localname = name
if uri:
2000-09-24 18:54:14 -03:00
# When using namespaces, the reader may or may not
# provide us with the original name. If not, create
# *a* valid tagName from the current context.
if tagName is None:
tagName = self._current_context[uri] + ":" + localname
node = self.document.createElementNS(uri, tagName)
2000-09-24 18:54:14 -03:00
else:
# When the tagname is not prefixed, it just appears as
# localname
node = self.document.createElement(localname)
2000-09-24 18:54:14 -03:00
for aname,value in attrs.items():
a_uri, a_localname = aname
if a_uri:
qname = self._current_context[a_uri] + ":" + a_localname
attr = self.document.createAttributeNS(a_uri, qname)
2000-09-24 18:54:14 -03:00
else:
attr = self.document.createAttribute(a_localname)
2000-09-24 18:54:14 -03:00
attr.value = value
node.setAttributeNode(attr)
2000-09-24 02:21:58 -03:00
parent = self.curNode
node.parentNode = parent
self.curNode = node
2000-09-24 02:21:58 -03:00
self.lastEvent[1] = [(START_ELEMENT, node), None]
self.lastEvent = self.lastEvent[1]
#self.events.append((START_ELEMENT, node))
2000-09-24 18:54:14 -03:00
def endElementNS(self, name, tagName):
node = self.curNode
2000-09-24 02:21:58 -03:00
self.lastEvent[1] = [(END_ELEMENT, node), None]
self.lastEvent = self.lastEvent[1]
#self.events.append((END_ELEMENT, node))
self.curNode = node.parentNode
def startElement(self, name, attrs):
node = self.document.createElement(name)
for aname,value in attrs.items():
attr = self.document.createAttribute(aname)
attr.value = value
node.setAttributeNode(attr)
parent = self.curNode
node.parentNode = parent
self.curNode = node
self.lastEvent[1] = [(START_ELEMENT, node), None]
self.lastEvent = self.lastEvent[1]
#self.events.append((START_ELEMENT, node))
def endElement(self, name):
node = self.curNode
self.lastEvent[1] = [(END_ELEMENT, node), None]
self.lastEvent = self.lastEvent[1]
#self.events.append((END_ELEMENT, node))
self.curNode = node.parentNode
2000-09-24 02:21:58 -03:00
def comment(self, s):
node = self.document.createComment(s)
parent = self.curNode
node.parentNode = parent
self.lastEvent[1] = [(COMMENT, node), None]
self.lastEvent = self.lastEvent[1]
#self.events.append((COMMENT, node))
def processingInstruction(self, target, data):
node = self.document.createProcessingInstruction(target, data)
2000-09-24 02:21:58 -03:00
parent = self.curNode
node.parentNode = parent
self.lastEvent[1] = [(PROCESSING_INSTRUCTION, node), None]
self.lastEvent = self.lastEvent[1]
#self.events.append((PROCESSING_INSTRUCTION, node))
def ignorableWhitespace(self, chars):
node = self.document.createTextNode(chars[start:start + length])
parent = self.curNode
node.parentNode = parent
self.lastEvent[1] = [(IGNORABLE_WHITESPACE, node), None]
self.lastEvent = self.lastEvent[1]
#self.events.append((IGNORABLE_WHITESPACE, node))
def characters(self, chars):
node = self.document.createTextNode(chars)
parent = self.curNode
node.parentNode = parent
2000-09-24 02:21:58 -03:00
self.lastEvent[1] = [(CHARACTERS, node), None]
self.lastEvent = self.lastEvent[1]
def startDocument(self):
node = self.curNode = self.document = minidom.Document()
2000-09-24 02:21:58 -03:00
node.parentNode = None
self.lastEvent[1] = [(START_DOCUMENT, node), None]
self.lastEvent = self.lastEvent[1]
#self.events.append((START_DOCUMENT, node))
def endDocument(self):
assert not self.curNode.parentNode
for node in self.curNode.childNodes:
2000-09-24 02:21:58 -03:00
if node.nodeType == node.ELEMENT_NODE:
self.document.documentElement = node
#if not self.document.documentElement:
2000-09-24 02:21:58 -03:00
# raise Error, "No document element"
2000-09-24 02:21:58 -03:00
self.lastEvent[1] = [(END_DOCUMENT, node), None]
#self.events.append((END_DOCUMENT, self.curNode))
class ErrorHandler:
2000-09-24 02:21:58 -03:00
def warning(self, exception):
print exception
2000-09-24 02:21:58 -03:00
def error(self, exception):
raise exception
2000-09-24 02:21:58 -03:00
def fatalError(self, exception):
raise exception
class DOMEventStream:
2000-09-24 02:21:58 -03:00
def __init__(self, stream, parser, bufsize):
self.stream = stream
self.parser = parser
self.bufsize = bufsize
self.reset()
2000-09-24 02:21:58 -03:00
def reset(self):
self.pulldom = PullDOM()
2000-09-24 18:54:14 -03:00
# This content handler relies on namespace support
self.parser.setFeature(xml.sax.handler.feature_namespaces,1)
2000-09-24 02:21:58 -03:00
self.parser.setContentHandler(self.pulldom)
2000-09-24 02:21:58 -03:00
def __getitem__(self, pos):
rc = self.getEvent()
if rc:
return rc
raise IndexError
2000-09-24 02:21:58 -03:00
def expandNode(self, node):
event = self.getEvent()
while event:
2000-09-24 02:21:58 -03:00
token, cur_node = event
if cur_node is node:
return
if token != END_ELEMENT:
cur_node.parentNode.appendChild(cur_node)
2000-09-24 02:21:58 -03:00
event = self.getEvent()
def getEvent(self):
if not self.pulldom.firstEvent[1]:
2000-09-24 02:21:58 -03:00
self.pulldom.lastEvent = self.pulldom.firstEvent
while not self.pulldom.firstEvent[1]:
2000-09-24 02:21:58 -03:00
buf=self.stream.read(self.bufsize)
if not buf:
#FIXME: why doesn't Expat close work?
#self.parser.close()
return None
2000-09-24 02:21:58 -03:00
self.parser.feed(buf)
rc = self.pulldom.firstEvent[1][0]
self.pulldom.firstEvent[1] = self.pulldom.firstEvent[1][1]
return rc
class SAX2DOM(PullDOM):
def startElementNS(self, name, tagName , attrs):
PullDOM.startElementNS(self, name, tagName, attrs)
self.curNode.parentNode.appendChild(self.curNode)
def startElement(self, name, attrs):
PullDOM.startElement(self, name, attrs)
self.curNode.parentNode.appendChild(self.curNode)
def processingInstruction(self, target, data):
PullDOM.processingInstruction(self, target, data)
node = self.lastEvent[0][1]
node.parentNode.appendChild(node)
def ignorableWhitespace(self, chars):
PullDOM.ignorableWhitespace(self, chars)
node = self.lastEvent[0][1]
node.parentNode.appendChild(node)
def characters(self, chars):
PullDOM.characters(self, chars)
node = self.lastEvent[0][1]
node.parentNode.appendChild(node)
2000-09-24 02:21:58 -03:00
default_bufsize = (2 ** 14) - 20
def parse(stream_or_string, parser=None, bufsize=default_bufsize):
if type(stream_or_string) is type(""):
stream = open(stream_or_string)
else:
2000-09-24 02:21:58 -03:00
stream = stream_or_string
if not parser:
2000-09-24 18:54:14 -03:00
parser = xml.sax.make_parser()
2000-09-24 02:21:58 -03:00
return DOMEventStream(stream, parser, bufsize)
2000-09-24 02:21:58 -03:00
def parseString(string, parser=None):
try:
2000-09-24 02:21:58 -03:00
from cStringIO import StringIO
except ImportError:
2000-09-24 02:21:58 -03:00
from StringIO import StringIO
2000-09-24 02:21:58 -03:00
bufsize = len(string)
buf = StringIO(string)
2000-09-24 18:54:14 -03:00
if not parser:
parser = xml.sax.make_parser()
2000-09-24 02:21:58 -03:00
return DOMEventStream(buf, parser, bufsize)