Added back the InputSource class (patch 101630).
This commit is contained in:
parent
b7536d5860
commit
523b0a6ec8
|
@ -21,16 +21,17 @@ expatreader -- Driver that allows use of the Expat parser with the
|
|||
|
||||
"""
|
||||
|
||||
from xmlreader import InputSource
|
||||
from handler import ContentHandler, ErrorHandler
|
||||
from _exceptions import SAXException, SAXNotRecognizedException, \
|
||||
SAXParseException, SAXNotSupportedException
|
||||
|
||||
|
||||
def parse(filename_or_stream, handler, errorHandler=ErrorHandler()):
|
||||
def parse(source, handler, errorHandler=ErrorHandler()):
|
||||
parser = ExpatParser()
|
||||
parser.setContentHandler(handler)
|
||||
parser.setErrorHandler(errorHandler)
|
||||
parser.parse(filename_or_stream)
|
||||
parser.parse(source)
|
||||
|
||||
def parseString(string, handler, errorHandler=ErrorHandler()):
|
||||
try:
|
||||
|
@ -43,7 +44,10 @@ def parseString(string, handler, errorHandler=ErrorHandler()):
|
|||
parser = ExpatParser()
|
||||
parser.setContentHandler(handler)
|
||||
parser.setErrorHandler(errorHandler)
|
||||
parser.parse(StringIO(string))
|
||||
|
||||
inpsrc = InputSource()
|
||||
inpsrc.setByteStream(StringIO(string))
|
||||
parser.parse(inpsrc)
|
||||
|
||||
# this is the parser list used by the make_parser function if no
|
||||
# alternatives are given as parameters to the function
|
||||
|
|
|
@ -18,7 +18,7 @@ version = "0.20"
|
|||
|
||||
from xml.sax._exceptions import *
|
||||
from xml.parsers import expat
|
||||
from xml.sax import xmlreader
|
||||
from xml.sax import xmlreader, saxutils
|
||||
|
||||
AttributesImpl = xmlreader.AttributesImpl
|
||||
AttributesNSImpl = xmlreader.AttributesNSImpl
|
||||
|
@ -37,28 +37,24 @@ class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
|
|||
|
||||
# XMLReader methods
|
||||
|
||||
def parse(self, stream_or_string):
|
||||
def parse(self, source):
|
||||
"Parse an XML document from a URL."
|
||||
if type(stream_or_string) is type(""):
|
||||
stream = open(stream_or_string)
|
||||
else:
|
||||
stream = stream_or_string
|
||||
source = saxutils.prepare_input_source(source)
|
||||
|
||||
self._source = source
|
||||
self.reset()
|
||||
self._cont_handler.setDocumentLocator(self)
|
||||
try:
|
||||
xmlreader.IncrementalParser.parse(self, stream)
|
||||
xmlreader.IncrementalParser.parse(self, source)
|
||||
except expat.error:
|
||||
error_code = self._parser.ErrorCode
|
||||
raise SAXParseException(expat.ErrorString(error_code), None, self)
|
||||
|
||||
self._cont_handler.endDocument()
|
||||
|
||||
def prepareParser(self, filename=None):
|
||||
self._source = filename
|
||||
|
||||
if self._source != None:
|
||||
self._parser.SetBase(self._source)
|
||||
def prepareParser(self, source):
|
||||
if source.getSystemId() != None:
|
||||
self._parser.SetBase(source.getSystemId())
|
||||
|
||||
def getFeature(self, name):
|
||||
if name == feature_namespaces:
|
||||
|
|
|
@ -3,6 +3,7 @@ A library of useful helper classes to the SAX classes, for the
|
|||
convenience of application and driver writers.
|
||||
"""
|
||||
|
||||
import os, urlparse, urllib
|
||||
import handler
|
||||
import xmlreader
|
||||
|
||||
|
@ -181,3 +182,24 @@ class XMLFilterBase(xmlreader.XMLReader):
|
|||
|
||||
def setProperty(self, name, value):
|
||||
self._parent.setProperty(name, value)
|
||||
|
||||
# --- Utility functions
|
||||
|
||||
def prepare_input_source(source, base = ""):
|
||||
"""This function takes an InputSource and an optional base URL and
|
||||
returns a fully resolved InputSource object ready for reading."""
|
||||
|
||||
if type(source) == type(""):
|
||||
source = xmlreader.InputSource(source)
|
||||
|
||||
if source.getByteStream() == None:
|
||||
sysid = source.getSystemId()
|
||||
if urlparse.urlparse(sysid)[0] == '':
|
||||
basehead = os.path.split(os.path.normpath(base))[0]
|
||||
source.setSystemId(os.path.join(basehead, sysid))
|
||||
else:
|
||||
source.setSystemId(urlparse.urljoin(base, sysid))
|
||||
|
||||
source.setByteStream(urllib.urlopen(source.getSystemId()))
|
||||
|
||||
return source
|
||||
|
|
|
@ -6,6 +6,7 @@ import handler
|
|||
# ===== XMLREADER =====
|
||||
|
||||
class XMLReader:
|
||||
|
||||
def __init__(self):
|
||||
self._cont_handler = handler.ContentHandler()
|
||||
#self._dtd_handler = handler.DTDHandler()
|
||||
|
@ -73,6 +74,7 @@ class XMLReader:
|
|||
"Sets the value of a SAX2 property."
|
||||
raise SAXNotRecognizedException("Property '%s' not recognized" % name)
|
||||
|
||||
import saxutils
|
||||
|
||||
class IncrementalParser(XMLReader):
|
||||
"""This interface adds three extra methods to the XMLReader
|
||||
|
@ -98,23 +100,17 @@ class IncrementalParser(XMLReader):
|
|||
self._bufsize = bufsize
|
||||
XMLReader.__init__(self)
|
||||
|
||||
def _parseOpenFile(self, source):
|
||||
buffer = source.read(self._bufsize)
|
||||
def parse(self, source):
|
||||
source = saxutils.prepare_input_source(source)
|
||||
|
||||
self.prepareParser(source)
|
||||
file = source.getByteStream()
|
||||
buffer = file.read(self._bufsize)
|
||||
while buffer != "":
|
||||
self.feed(buffer)
|
||||
buffer = source.read(self._bufsize)
|
||||
self.close()
|
||||
self.reset()
|
||||
buffer = file.read(self._bufsize)
|
||||
|
||||
def parse(self, source):
|
||||
if hasattr(source, "read"):
|
||||
self._parseOpenFile(source)
|
||||
else:
|
||||
#FIXME: how to recognize if it is a URL instead of filename?
|
||||
self.prepareParser(source)
|
||||
file = open(source)
|
||||
self._parseOpenFile(file)
|
||||
file.close()
|
||||
self.reset()
|
||||
|
||||
def feed(self, data):
|
||||
"""This method gives the raw XML data in the data parameter to
|
||||
|
@ -174,6 +170,95 @@ class Locator:
|
|||
"Return the system identifier for the current event."
|
||||
return None
|
||||
|
||||
# ===== INPUTSOURCE =====
|
||||
|
||||
class InputSource:
|
||||
"""Encapsulation of the information needed by the XMLReader to
|
||||
read entities.
|
||||
|
||||
This class may include information about the public identifier,
|
||||
system identifier, byte stream (possibly with character encoding
|
||||
information) and/or the character stream of an entity.
|
||||
|
||||
Applications will create objects of this class for use in the
|
||||
XMLReader.parse method and for returning from
|
||||
EntityResolver.resolveEntity.
|
||||
|
||||
An InputSource belongs to the application, the XMLReader is not
|
||||
allowed to modify InputSource objects passed to it from the
|
||||
application, although it may make copies and modify those."""
|
||||
|
||||
def __init__(self, system_id = None):
|
||||
self.__system_id = system_id
|
||||
self.__public_id = None
|
||||
self.__encoding = None
|
||||
self.__bytefile = None
|
||||
self.__charfile = None
|
||||
|
||||
def setPublicId(self, public_id):
|
||||
"Sets the public identifier of this InputSource."
|
||||
self.__public_id = public_id
|
||||
|
||||
def getPublicId(self):
|
||||
"Returns the public identifier of this InputSource."
|
||||
return self.__public_id
|
||||
|
||||
def setSystemId(self, system_id):
|
||||
"Sets the system identifier of this InputSource."
|
||||
self.__system_id = system_id
|
||||
|
||||
def getSystemId(self):
|
||||
"Returns the system identifier of this InputSource."
|
||||
return self.__system_id
|
||||
|
||||
def setEncoding(self, encoding):
|
||||
"""Sets the character encoding of this InputSource.
|
||||
|
||||
The encoding must be a string acceptable for an XML encoding
|
||||
declaration (see section 4.3.3 of the XML recommendation).
|
||||
|
||||
The encoding attribute of the InputSource is ignored if the
|
||||
InputSource also contains a character stream."""
|
||||
self.__encoding = encoding
|
||||
|
||||
def getEncoding(self):
|
||||
"Get the character encoding of this InputSource."
|
||||
return self.__encoding
|
||||
|
||||
def setByteStream(self, bytefile):
|
||||
"""Set the byte stream (a Python file-like object which does
|
||||
not perform byte-to-character conversion) for this input
|
||||
source.
|
||||
|
||||
The SAX parser will ignore this if there is also a character
|
||||
stream specified, but it will use a byte stream in preference
|
||||
to opening a URI connection itself.
|
||||
|
||||
If the application knows the character encoding of the byte
|
||||
stream, it should set it with the setEncoding method."""
|
||||
self.__bytefile = bytefile
|
||||
|
||||
def getByteStream(self):
|
||||
"""Get the byte stream for this input source.
|
||||
|
||||
The getEncoding method will return the character encoding for
|
||||
this byte stream, or None if unknown."""
|
||||
return self.__bytefile
|
||||
|
||||
def setCharacterStream(self, charfile):
|
||||
"""Set the character stream for this input source. (The stream
|
||||
must be a Python 1.6 Unicode-wrapped file-like that performs
|
||||
conversion to Unicode strings.)
|
||||
|
||||
If there is a character stream specified, the SAX parser will
|
||||
ignore any byte stream and will not attempt to open a URI
|
||||
connection to the system identifier."""
|
||||
self.__charfile = charfile
|
||||
|
||||
def getCharacterStream(self):
|
||||
"Get the character stream for this input source."
|
||||
return self.__charfile
|
||||
|
||||
# ===== ATTRIBUTESIMPL =====
|
||||
|
||||
class AttributesImpl:
|
||||
|
|
Loading…
Reference in New Issue