cpython/Lib/xml/sax/expatreader.py

204 lines
6.6 KiB
Python
Raw Normal View History

"""
SAX driver for the Pyexpat C module. This driver works with
pyexpat.__version__ == '1.5'.
$Id$
"""
# Todo on driver:
# - make it support external entities (wait for pyexpat.c)
# - enable configuration between reset() and feed() calls
# - support lexical events?
# - proper inputsource handling
# - properties and features
# Todo on pyexpat.c:
# - support XML_ExternalEntityParserCreate
# - exceptions in callouts from pyexpat to python code lose position info
version = "0.20"
from string import split
from xml.sax import xmlreader
import pyexpat
import xml.sax
# --- ExpatParser
class ExpatParser( xmlreader.IncrementalParser, xmlreader.Locator ):
"SAX driver for the Pyexpat C module."
def __init__(self, namespaceHandling=0, bufsize=2**16-20):
xmlreader.IncrementalParser.__init__(self, bufsize)
self._source = None
self._parser = None
self._namespaces = namespaceHandling
self._parsing = 0
# XMLReader methods
def parse(self, stream_or_string ):
2000-07-05 23:56:36 -03:00
"Parse an XML document from a URL."
if type( stream_or_string ) == type( "" ):
stream=open( stream_or_string )
else:
stream=stream_or_string
self.reset()
self._cont_handler.setDocumentLocator(self)
try:
xmlreader.IncrementalParser.parse(self, stream)
except pyexpat.error:
error_code = self._parser.ErrorCode
raise xml.sax.SAXParseException(pyexpat.ErrorString(error_code),
None, self)
self._cont_handler.endDocument()
def prepareParser(self, filename=None):
self._source = filename
if self._source != None:
self._parser.SetBase(self._source)
def getFeature(self, name):
"Looks up and returns the state of a SAX2 feature."
raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
def setFeature(self, name, state):
"Sets the state of a SAX2 feature."
raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
def getProperty(self, name):
"Looks up and returns the value of a SAX2 property."
raise SAXNotRecognizedException("Property '%s' not recognized" % name)
def setProperty(self, name, value):
"Sets the value of a SAX2 property."
raise SAXNotRecognizedException("Property '%s' not recognized" % name)
# IncrementalParser methods
def feed(self, data):
if not self._parsing:
self._parsing=1
self.reset()
self._cont_handler.startDocument()
# FIXME: error checking and endDocument()
self._parser.Parse(data, 0)
def close(self):
if self._parsing:
self._cont_handler.endDocument()
self._parsing=0
self._parser.Parse("", 1)
def reset(self):
if self._namespaces:
self._parser = pyexpat.ParserCreate(None, " ")
self._parser.StartElementHandler = self.start_element_ns
self._parser.EndElementHandler = self.end_element_ns
else:
self._parser = pyexpat.ParserCreate()
2000-07-04 00:39:33 -03:00
self._parser.StartElementHandler = self.start_element
self._parser.EndElementHandler = self.end_element
self._parser.ProcessingInstructionHandler = \
self._cont_handler.processingInstruction
self._parser.CharacterDataHandler = self._cont_handler.characters
self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl
self._parser.NotationDeclHandler = self.notation_decl
self._parser.StartNamespaceDeclHandler = self.start_namespace_decl
self._parser.EndNamespaceDeclHandler = self.end_namespace_decl
# self._parser.CommentHandler =
# self._parser.StartCdataSectionHandler =
# self._parser.EndCdataSectionHandler =
# self._parser.DefaultHandler =
# self._parser.DefaultHandlerExpand =
# self._parser.NotStandaloneHandler =
self._parser.ExternalEntityRefHandler = self.external_entity_ref
# Locator methods
def getColumnNumber(self):
return self._parser.ErrorColumnNumber
def getLineNumber(self):
return self._parser.ErrorLineNumber
def getPublicId(self):
return self._source.getPublicId()
def getSystemId(self):
return self._parser.GetBase()
# event handlers
def start_element(self, name, attrs):
2000-07-04 00:39:33 -03:00
self._cont_handler.startElement(name, name,
xmlreader.AttributesImpl(attrs, attrs))
def end_element(self, name):
2000-07-04 00:39:33 -03:00
self._cont_handler.endElement( name, name )
def start_element_ns(self, name, attrs):
pair = split(name)
if len(pair) == 1:
2000-07-04 00:39:33 -03:00
tup = (None, name )
else:
2000-07-04 00:39:33 -03:00
tup = pair
2000-07-04 00:39:33 -03:00
self._cont_handler.startElement(tup, None,
xmlreader.AttributesImpl(attrs, None))
def end_element_ns(self, name):
pair = split(name)
if len(pair) == 1:
name = (None, name, None)
else:
name = pair+[None] # prefix is not implemented yet!
2000-07-04 00:39:33 -03:00
self._cont_handler.endElement(name, None)
2000-07-04 00:39:33 -03:00
# this is not used
def processing_instruction(self, target, data):
self._cont_handler.processingInstruction(target, data)
2000-07-04 00:39:33 -03:00
# this is not used
def character_data(self, data):
self._cont_handler.characters(data)
def start_namespace_decl(self, prefix, uri):
self._cont_handler.startPrefixMapping(prefix, uri)
def end_namespace_decl(self, prefix):
self._cont_handler.endPrefixMapping(prefix)
def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name):
self._dtd_handler.unparsedEntityDecl(name, pubid, sysid, notation_name)
def notation_decl(self, name, base, sysid, pubid):
self._dtd_handler.notationDecl(name, pubid, sysid)
def external_entity_ref(self, context, base, sysid, pubid):
assert 0 # not implemented
source = self._ent_handler.resolveEntity(pubid, sysid)
source = saxutils.prepare_input_source(source)
# FIXME: create new parser, stack self._source and self._parser
# FIXME: reuse code from self.parse(...)
return 1
# ---
def create_parser(*args, **kwargs):
return apply( ExpatParser, args, kwargs )
# ---
if __name__ == "__main__":
import xml.sax
p = create_parser()
p.setContentHandler(xml.sax.XMLGenerator())
p.setErrorHandler(xml.sax.ErrorHandler())
p.parse("../../../hamlet.xml")