2000-09-18 14:40:22 -03:00
|
|
|
"""\
|
|
|
|
A library of useful helper classes to the SAX classes, for the
|
2000-06-29 16:34:54 -03:00
|
|
|
convenience of application and driver writers.
|
|
|
|
"""
|
|
|
|
|
2000-09-24 18:31:06 -03:00
|
|
|
import os, urlparse, urllib, types
|
2000-06-29 16:34:54 -03:00
|
|
|
import handler
|
2000-09-24 07:53:31 -03:00
|
|
|
import xmlreader
|
2000-09-18 14:40:22 -03:00
|
|
|
|
2000-12-15 21:45:11 -04:00
|
|
|
try:
|
|
|
|
_StringTypes = [types.StringType, types.UnicodeType]
|
|
|
|
except AttributeError:
|
|
|
|
_StringTypes = [types.StringType]
|
|
|
|
|
2004-05-05 23:22:43 -03:00
|
|
|
# See whether the xmlcharrefreplace error handler is
|
|
|
|
# supported
|
|
|
|
try:
|
|
|
|
from codecs import xmlcharrefreplace_errors
|
|
|
|
_error_handling = "xmlcharrefreplace"
|
|
|
|
del xmlcharrefreplace_errors
|
|
|
|
except ImportError:
|
|
|
|
_error_handling = "strict"
|
|
|
|
|
2002-10-26 11:50:45 -03:00
|
|
|
def __dict_replace(s, d):
|
|
|
|
"""Replace substrings of a string using a dictionary."""
|
|
|
|
for key, value in d.items():
|
|
|
|
s = s.replace(key, value)
|
|
|
|
return s
|
2000-09-24 18:31:06 -03:00
|
|
|
|
2000-09-18 14:40:22 -03:00
|
|
|
def escape(data, entities={}):
|
2000-06-29 16:34:54 -03:00
|
|
|
"""Escape &, <, and > in a string of data.
|
2003-04-24 13:02:54 -03:00
|
|
|
|
2000-10-23 15:09:50 -03:00
|
|
|
You can escape other strings of data by passing a dictionary as
|
2000-06-29 16:34:54 -03:00
|
|
|
the optional entities parameter. The keys and values must all be
|
|
|
|
strings; each key will be replaced with its corresponding value.
|
|
|
|
"""
|
2002-10-26 11:50:45 -03:00
|
|
|
|
|
|
|
# must do ampersand first
|
2000-09-18 14:40:22 -03:00
|
|
|
data = data.replace("&", "&")
|
2002-10-28 13:29:01 -04:00
|
|
|
data = data.replace(">", ">")
|
|
|
|
data = data.replace("<", "<")
|
|
|
|
if entities:
|
|
|
|
data = __dict_replace(data, entities)
|
|
|
|
return data
|
2002-10-26 11:50:45 -03:00
|
|
|
|
|
|
|
def unescape(data, entities={}):
|
|
|
|
"""Unescape &, <, and > in a string of data.
|
|
|
|
|
|
|
|
You can unescape other strings of data by passing a dictionary as
|
|
|
|
the optional entities parameter. The keys and values must all be
|
|
|
|
strings; each key will be replaced with its corresponding value.
|
|
|
|
"""
|
2002-10-28 13:29:01 -04:00
|
|
|
data = data.replace("<", "<")
|
|
|
|
data = data.replace(">", ">")
|
|
|
|
if entities:
|
|
|
|
data = __dict_replace(data, entities)
|
2002-10-28 13:46:59 -04:00
|
|
|
# must do ampersand last
|
2002-10-28 14:09:41 -04:00
|
|
|
return data.replace("&", "&")
|
2000-06-29 16:34:54 -03:00
|
|
|
|
2001-07-19 13:10:15 -03:00
|
|
|
def quoteattr(data, entities={}):
|
|
|
|
"""Escape and quote an attribute value.
|
|
|
|
|
|
|
|
Escape &, <, and > in a string of data, then quote it for use as
|
|
|
|
an attribute value. The \" character will be escaped as well, if
|
|
|
|
necessary.
|
|
|
|
|
|
|
|
You can escape other strings of data by passing a dictionary as
|
|
|
|
the optional entities parameter. The keys and values must all be
|
|
|
|
strings; each key will be replaced with its corresponding value.
|
|
|
|
"""
|
2006-06-09 10:15:57 -03:00
|
|
|
entities = entities.copy()
|
|
|
|
entities.update({'\n': ' ', '\r': ' ', '\t':'	'})
|
2001-07-19 13:10:15 -03:00
|
|
|
data = escape(data, entities)
|
|
|
|
if '"' in data:
|
|
|
|
if "'" in data:
|
|
|
|
data = '"%s"' % data.replace('"', """)
|
|
|
|
else:
|
|
|
|
data = "'%s'" % data
|
|
|
|
else:
|
|
|
|
data = '"%s"' % data
|
|
|
|
return data
|
|
|
|
|
2000-09-18 14:40:22 -03:00
|
|
|
|
2000-06-29 16:34:54 -03:00
|
|
|
class XMLGenerator(handler.ContentHandler):
|
|
|
|
|
2000-09-21 05:25:28 -03:00
|
|
|
def __init__(self, out=None, encoding="iso-8859-1"):
|
2000-09-18 14:40:22 -03:00
|
|
|
if out is None:
|
|
|
|
import sys
|
|
|
|
out = sys.stdout
|
2000-06-29 16:34:54 -03:00
|
|
|
handler.ContentHandler.__init__(self)
|
|
|
|
self._out = out
|
2000-09-21 05:25:28 -03:00
|
|
|
self._ns_contexts = [{}] # contains uri -> prefix dicts
|
|
|
|
self._current_context = self._ns_contexts[-1]
|
2000-09-24 07:53:31 -03:00
|
|
|
self._undeclared_ns_maps = []
|
2000-09-21 05:25:28 -03:00
|
|
|
self._encoding = encoding
|
2000-06-29 16:34:54 -03:00
|
|
|
|
2004-05-05 23:22:43 -03:00
|
|
|
def _write(self, text):
|
|
|
|
if isinstance(text, str):
|
|
|
|
self._out.write(text)
|
|
|
|
else:
|
|
|
|
self._out.write(text.encode(self._encoding, _error_handling))
|
|
|
|
|
2007-02-12 08:21:41 -04:00
|
|
|
def _qname(self, name):
|
|
|
|
"""Builds a qualified name from a (ns_url, localname) pair"""
|
|
|
|
if name[0]:
|
|
|
|
# The name is in a non-empty namespace
|
|
|
|
prefix = self._current_context[name[0]]
|
|
|
|
if prefix:
|
|
|
|
# If it is not the default namespace, prepend the prefix
|
|
|
|
return prefix + ":" + name[1]
|
|
|
|
# Return the unqualified name
|
|
|
|
return name[1]
|
|
|
|
|
2000-06-29 16:34:54 -03:00
|
|
|
# ContentHandler methods
|
2000-09-18 14:40:22 -03:00
|
|
|
|
2000-06-29 16:34:54 -03:00
|
|
|
def startDocument(self):
|
2004-05-05 23:22:43 -03:00
|
|
|
self._write('<?xml version="1.0" encoding="%s"?>\n' %
|
2000-09-21 05:25:28 -03:00
|
|
|
self._encoding)
|
2000-06-29 16:34:54 -03:00
|
|
|
|
|
|
|
def startPrefixMapping(self, prefix, uri):
|
2000-09-21 05:25:28 -03:00
|
|
|
self._ns_contexts.append(self._current_context.copy())
|
|
|
|
self._current_context[uri] = prefix
|
2000-09-24 07:53:31 -03:00
|
|
|
self._undeclared_ns_maps.append((prefix, uri))
|
2000-06-29 16:34:54 -03:00
|
|
|
|
|
|
|
def endPrefixMapping(self, prefix):
|
2000-09-24 07:53:31 -03:00
|
|
|
self._current_context = self._ns_contexts[-1]
|
|
|
|
del self._ns_contexts[-1]
|
2000-06-29 16:34:54 -03:00
|
|
|
|
|
|
|
def startElement(self, name, attrs):
|
2004-05-05 23:22:43 -03:00
|
|
|
self._write('<' + name)
|
2000-06-29 16:34:54 -03:00
|
|
|
for (name, value) in attrs.items():
|
2004-05-05 23:22:43 -03:00
|
|
|
self._write(' %s=%s' % (name, quoteattr(value)))
|
|
|
|
self._write('>')
|
2000-10-23 15:09:50 -03:00
|
|
|
|
2000-06-29 16:34:54 -03:00
|
|
|
def endElement(self, name):
|
2004-05-05 23:22:43 -03:00
|
|
|
self._write('</%s>' % name)
|
2000-06-29 16:34:54 -03:00
|
|
|
|
2000-09-21 05:25:28 -03:00
|
|
|
def startElementNS(self, name, qname, attrs):
|
2007-02-12 08:21:41 -04:00
|
|
|
self._write('<' + self._qname(name))
|
2000-09-24 07:53:31 -03:00
|
|
|
|
2007-02-12 08:21:41 -04:00
|
|
|
for prefix, uri in self._undeclared_ns_maps:
|
|
|
|
if prefix:
|
|
|
|
self._out.write(' xmlns:%s="%s"' % (prefix, uri))
|
|
|
|
else:
|
|
|
|
self._out.write(' xmlns="%s"' % uri)
|
2000-09-24 07:53:31 -03:00
|
|
|
self._undeclared_ns_maps = []
|
2000-10-23 15:09:50 -03:00
|
|
|
|
2000-09-21 05:25:28 -03:00
|
|
|
for (name, value) in attrs.items():
|
2007-02-12 08:21:41 -04:00
|
|
|
self._write(' %s=%s' % (self._qname(name), quoteattr(value)))
|
2004-05-05 23:22:43 -03:00
|
|
|
self._write('>')
|
2000-09-21 05:25:28 -03:00
|
|
|
|
|
|
|
def endElementNS(self, name, qname):
|
2007-02-12 08:21:41 -04:00
|
|
|
self._write('</%s>' % self._qname(name))
|
2000-10-23 15:09:50 -03:00
|
|
|
|
2000-06-29 16:34:54 -03:00
|
|
|
def characters(self, content):
|
2004-05-05 23:22:43 -03:00
|
|
|
self._write(escape(content))
|
2000-06-29 16:34:54 -03:00
|
|
|
|
|
|
|
def ignorableWhitespace(self, content):
|
2004-05-05 23:22:43 -03:00
|
|
|
self._write(content)
|
2000-09-18 14:40:22 -03:00
|
|
|
|
2000-06-29 16:34:54 -03:00
|
|
|
def processingInstruction(self, target, data):
|
2004-05-05 23:22:43 -03:00
|
|
|
self._write('<?%s %s?>' % (target, data))
|
2000-06-29 16:34:54 -03:00
|
|
|
|
2000-09-18 14:40:22 -03:00
|
|
|
|
2000-09-24 07:53:31 -03:00
|
|
|
class XMLFilterBase(xmlreader.XMLReader):
|
2000-06-29 16:34:54 -03:00
|
|
|
"""This class is designed to sit between an XMLReader and the
|
|
|
|
client application's event handlers. By default, it does nothing
|
|
|
|
but pass requests up to the reader and events on to the handlers
|
|
|
|
unmodified, but subclasses can override specific methods to modify
|
|
|
|
the event stream or the configuration requests as they pass
|
|
|
|
through."""
|
|
|
|
|
2000-10-11 19:35:00 -03:00
|
|
|
def __init__(self, parent = None):
|
|
|
|
xmlreader.XMLReader.__init__(self)
|
|
|
|
self._parent = parent
|
2000-10-23 15:09:50 -03:00
|
|
|
|
2000-06-29 16:34:54 -03:00
|
|
|
# ErrorHandler methods
|
|
|
|
|
|
|
|
def error(self, exception):
|
|
|
|
self._err_handler.error(exception)
|
|
|
|
|
|
|
|
def fatalError(self, exception):
|
|
|
|
self._err_handler.fatalError(exception)
|
|
|
|
|
|
|
|
def warning(self, exception):
|
|
|
|
self._err_handler.warning(exception)
|
|
|
|
|
|
|
|
# ContentHandler methods
|
2000-09-18 14:40:22 -03:00
|
|
|
|
2000-06-29 16:34:54 -03:00
|
|
|
def setDocumentLocator(self, locator):
|
|
|
|
self._cont_handler.setDocumentLocator(locator)
|
2000-09-18 14:40:22 -03:00
|
|
|
|
2000-06-29 16:34:54 -03:00
|
|
|
def startDocument(self):
|
|
|
|
self._cont_handler.startDocument()
|
|
|
|
|
|
|
|
def endDocument(self):
|
|
|
|
self._cont_handler.endDocument()
|
|
|
|
|
|
|
|
def startPrefixMapping(self, prefix, uri):
|
|
|
|
self._cont_handler.startPrefixMapping(prefix, uri)
|
|
|
|
|
|
|
|
def endPrefixMapping(self, prefix):
|
|
|
|
self._cont_handler.endPrefixMapping(prefix)
|
|
|
|
|
|
|
|
def startElement(self, name, attrs):
|
|
|
|
self._cont_handler.startElement(name, attrs)
|
|
|
|
|
2000-09-21 05:25:28 -03:00
|
|
|
def endElement(self, name):
|
|
|
|
self._cont_handler.endElement(name)
|
|
|
|
|
|
|
|
def startElementNS(self, name, qname, attrs):
|
2004-05-05 23:04:21 -03:00
|
|
|
self._cont_handler.startElementNS(name, qname, attrs)
|
2000-09-21 05:25:28 -03:00
|
|
|
|
|
|
|
def endElementNS(self, name, qname):
|
|
|
|
self._cont_handler.endElementNS(name, qname)
|
2000-06-29 16:34:54 -03:00
|
|
|
|
|
|
|
def characters(self, content):
|
|
|
|
self._cont_handler.characters(content)
|
|
|
|
|
2000-09-24 07:53:31 -03:00
|
|
|
def ignorableWhitespace(self, chars):
|
|
|
|
self._cont_handler.ignorableWhitespace(chars)
|
2000-06-29 16:34:54 -03:00
|
|
|
|
|
|
|
def processingInstruction(self, target, data):
|
|
|
|
self._cont_handler.processingInstruction(target, data)
|
|
|
|
|
|
|
|
def skippedEntity(self, name):
|
|
|
|
self._cont_handler.skippedEntity(name)
|
|
|
|
|
|
|
|
# DTDHandler methods
|
|
|
|
|
|
|
|
def notationDecl(self, name, publicId, systemId):
|
|
|
|
self._dtd_handler.notationDecl(name, publicId, systemId)
|
|
|
|
|
|
|
|
def unparsedEntityDecl(self, name, publicId, systemId, ndata):
|
|
|
|
self._dtd_handler.unparsedEntityDecl(name, publicId, systemId, ndata)
|
|
|
|
|
|
|
|
# EntityResolver methods
|
|
|
|
|
|
|
|
def resolveEntity(self, publicId, systemId):
|
2005-02-03 13:31:39 -04:00
|
|
|
return self._ent_handler.resolveEntity(publicId, systemId)
|
2000-06-29 16:34:54 -03:00
|
|
|
|
|
|
|
# XMLReader methods
|
|
|
|
|
|
|
|
def parse(self, source):
|
|
|
|
self._parent.setContentHandler(self)
|
|
|
|
self._parent.setErrorHandler(self)
|
|
|
|
self._parent.setEntityResolver(self)
|
|
|
|
self._parent.setDTDHandler(self)
|
|
|
|
self._parent.parse(source)
|
|
|
|
|
|
|
|
def setLocale(self, locale):
|
|
|
|
self._parent.setLocale(locale)
|
2000-09-18 14:40:22 -03:00
|
|
|
|
2000-06-29 16:34:54 -03:00
|
|
|
def getFeature(self, name):
|
|
|
|
return self._parent.getFeature(name)
|
|
|
|
|
|
|
|
def setFeature(self, name, state):
|
|
|
|
self._parent.setFeature(name, state)
|
|
|
|
|
|
|
|
def getProperty(self, name):
|
|
|
|
return self._parent.getProperty(name)
|
|
|
|
|
|
|
|
def setProperty(self, name, value):
|
|
|
|
self._parent.setProperty(name, value)
|
2000-09-24 15:54:49 -03:00
|
|
|
|
2000-10-11 19:35:00 -03:00
|
|
|
# XMLFilter methods
|
|
|
|
|
|
|
|
def getParent(self):
|
|
|
|
return self._parent
|
|
|
|
|
|
|
|
def setParent(self, parent):
|
|
|
|
self._parent = parent
|
|
|
|
|
2000-09-24 15:54:49 -03:00
|
|
|
# --- Utility functions
|
|
|
|
|
|
|
|
def prepare_input_source(source, base = ""):
|
|
|
|
"""This function takes an InputSource and an optional base URL and
|
|
|
|
returns a fully resolved InputSource object ready for reading."""
|
2000-10-23 15:09:50 -03:00
|
|
|
|
2000-09-24 18:31:06 -03:00
|
|
|
if type(source) in _StringTypes:
|
|
|
|
source = xmlreader.InputSource(source)
|
|
|
|
elif hasattr(source, "read"):
|
|
|
|
f = source
|
2000-10-06 18:11:20 -03:00
|
|
|
source = xmlreader.InputSource()
|
2000-09-24 18:31:06 -03:00
|
|
|
source.setByteStream(f)
|
2000-10-06 18:11:20 -03:00
|
|
|
if hasattr(f, "name"):
|
2000-10-24 12:53:12 -03:00
|
|
|
source.setSystemId(f.name)
|
2000-09-24 15:54:49 -03:00
|
|
|
|
2000-09-26 14:23:09 -03:00
|
|
|
if source.getByteStream() is None:
|
2000-09-24 15:54:49 -03:00
|
|
|
sysid = source.getSystemId()
|
2004-10-20 08:08:35 -03:00
|
|
|
basehead = os.path.dirname(os.path.normpath(base))
|
2004-10-20 05:21:19 -03:00
|
|
|
sysidfilename = os.path.join(basehead, sysid)
|
|
|
|
if os.path.isfile(sysidfilename):
|
|
|
|
source.setSystemId(sysidfilename)
|
|
|
|
f = open(sysidfilename, "rb")
|
2000-09-24 15:54:49 -03:00
|
|
|
else:
|
|
|
|
source.setSystemId(urlparse.urljoin(base, sysid))
|
2000-09-26 14:23:09 -03:00
|
|
|
f = urllib.urlopen(source.getSystemId())
|
2000-10-23 15:09:50 -03:00
|
|
|
|
2000-09-26 14:23:09 -03:00
|
|
|
source.setByteStream(f)
|
2000-10-23 15:09:50 -03:00
|
|
|
|
2000-09-24 15:54:49 -03:00
|
|
|
return source
|