Added EntityResolver and DTDHandler (patch 101631) with test cases.

This commit is contained in:
Lars Gustäbel 2000-09-24 20:19:45 +00:00
parent bc1c1c98eb
commit e292a24589
5 changed files with 90 additions and 37 deletions

View File

@ -6,6 +6,8 @@ Passed test_escape_basic
Passed test_escape_extra
Passed test_expat_attrs_empty
Passed test_expat_attrs_wattr
Passed test_expat_dtdhandler
Passed test_expat_entityresolver
Passed test_expat_inpsource_filename
Passed test_expat_inpsource_stream
Passed test_expat_inpsource_sysid
@ -20,4 +22,4 @@ Passed test_xmlgen_content_escape
Passed test_xmlgen_ignorable
Passed test_xmlgen_ns
Passed test_xmlgen_pi
21 tests, 0 failures
23 tests, 0 failures

View File

@ -156,25 +156,45 @@ class TestDTDHandler:
def unparsedEntityDecl(self, name, publicId, systemId, ndata):
self._entities.append((name, publicId, systemId, ndata))
# def test_expat_dtdhandler():
# parser = create_parser()
# handler = TestDTDHandler()
# parser.setDTDHandler(handler)
def test_expat_dtdhandler():
parser = create_parser()
handler = TestDTDHandler()
parser.setDTDHandler(handler)
# parser.feed('<!DOCTYPE doc [\n')
# parser.feed(' <!ENTITY img SYSTEM "expat.gif" NDATA GIF>\n')
# parser.feed(' <!NOTATION GIF PUBLIC "-//CompuServe//NOTATION Graphics Interchange Format 89a//EN">\n')
# parser.feed(']>\n')
# parser.feed('<doc></doc>')
# parser.close()
parser.feed('<!DOCTYPE doc [\n')
parser.feed(' <!ENTITY img SYSTEM "expat.gif" NDATA GIF>\n')
parser.feed(' <!NOTATION GIF PUBLIC "-//CompuServe//NOTATION Graphics Interchange Format 89a//EN">\n')
parser.feed(']>\n')
parser.feed('<doc></doc>')
parser.close()
# return handler._notations == [("GIF", "-//CompuServe//NOTATION Graphics Interchange Format 89a//EN", None)] and \
# handler._entities == [("img", None, "expat.gif", "GIF")]
return handler._notations == [("GIF", "-//CompuServe//NOTATION Graphics Interchange Format 89a//EN", None)] and \
handler._entities == [("img", None, "expat.gif", "GIF")]
# ===== EntityResolver support
# can't test this until InputSource is in place
class TestEntityResolver:
def resolveEntity(self, publicId, systemId):
inpsrc = InputSource()
inpsrc.setByteStream(StringIO("<entity/>"))
return inpsrc
def test_expat_entityresolver():
return 1 # disabling this until pyexpat.c has been fixed
parser = create_parser()
parser.setEntityResolver(TestEntityResolver())
result = StringIO()
parser.setContentHandler(XMLGenerator(result))
parser.feed('<!DOCTYPE doc [\n')
parser.feed(' <!ENTITY test SYSTEM "whatever">\n')
parser.feed(']>\n')
parser.feed('<doc>&test;</doc>')
parser.close()
return result.getvalue() == start + "<doc><entity></entity></doc>"
# ===== Attributes support
class AttrGatherer(ContentHandler):
@ -440,5 +460,3 @@ for (name, value) in items:
print "%d tests, %d failures" % (tests, fails)
if fails != 0:
raise TestFailed, "%d of %d tests failed" % (fails, tests)
make_test_output()

View File

@ -3,17 +3,6 @@ SAX driver for the Pyexpat C module. This driver works with
pyexpat.__version__ == '1.5'.
"""
# Todo on driver:
# - make it support external entities (wait for pyexpat.c)
# - enable configuration between reset() and feed() calls
# - support lexical events?
# - proper inputsource handling
# - properties and features
# Todo on pyexpat.c:
# - support XML_ExternalEntityParserCreate
# - exceptions in callouts from pyexpat to python code lose position info
version = "0.20"
from xml.sax._exceptions import *
@ -30,10 +19,11 @@ class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
def __init__(self, namespaceHandling=0, bufsize=2**16-20):
xmlreader.IncrementalParser.__init__(self, bufsize)
self._source = None
self._source = xmlreader.InputSource()
self._parser = None
self._namespaces = namespaceHandling
self._parsing = 0
self._entity_stack = []
# XMLReader methods
@ -186,11 +176,23 @@ class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
self._dtd_handler.notationDecl(name, pubid, sysid)
def external_entity_ref(self, context, base, sysid, pubid):
raise NotImplementedError()
source = self._ent_handler.resolveEntity(pubid, sysid)
source = saxutils.prepare_input_source(source)
# FIXME: create new parser, stack self._source and self._parser
# FIXME: reuse code from self.parse(...)
source = saxutils.prepare_input_source(source,
self._source.getSystemId() or
"")
self._entity_stack.append((self._parser, self._source))
self._parser = self._parser.ExternalEntityParserCreate(context)
self._source = source
try:
xmlreader.IncrementalParser.parse(self, source)
self.close()
except:
return 0 # FIXME: save error info here?
(self._parser, self._source) = self._entity_stack[-1]
del self._entity_stack[-1]
return 1
# ---

View File

@ -17,7 +17,7 @@ version = '2.0beta'
#
#============================================================================
# ===== ErrorHandler =====
# ===== ERRORHANDLER =====
class ErrorHandler:
"""Basic interface for SAX error handlers. If you create an object
@ -40,6 +40,7 @@ class ErrorHandler:
"Handle a warning."
print exception
# ===== CONTENTHANDLER =====
class ContentHandler:
@ -199,7 +200,39 @@ class ContentHandler:
http://xml.org/sax/features/external-general-entities and the
http://xml.org/sax/features/external-parameter-entities
properties."""
# ===== DTDHandler =====
class DTDHandler:
"""Handle DTD events.
This interface specifies only those DTD events required for basic
parsing (unparsed entities and attributes)."""
def notationDecl(self, name, publicId, systemId):
"Handle a notation declaration event."
def unparsedEntityDecl(self, name, publicId, systemId, ndata):
"Handle an unparsed entity declaration event."
# ===== ENTITYRESOLVER =====
class EntityResolver:
"""Basic interface for resolving entities. If you create an object
implementing this interface, then register the object with your
Parser, the parser will call the method in your object to
resolve all external entities. Note that DefaultHandler implements
this interface with the default behaviour."""
def resolveEntity(self, publicId, systemId):
"""Resolve the system identifier of an entity and return either
the system identifier to read from as a string, or an InputSource
to read from."""
return systemId
#============================================================================
#
# CORE FEATURES

View File

@ -9,8 +9,8 @@ class XMLReader:
def __init__(self):
self._cont_handler = handler.ContentHandler()
#self._dtd_handler = handler.DTDHandler()
#self._ent_handler = handler.EntityResolver()
self._dtd_handler = handler.DTDHandler()
self._ent_handler = handler.EntityResolver()
self._err_handler = handler.ErrorHandler()
def parse(self, source):
@ -109,8 +109,6 @@ class IncrementalParser(XMLReader):
while buffer != "":
self.feed(buffer)
buffer = file.read(self._bufsize)
self.reset()
def feed(self, data):
"""This method gives the raw XML data in the data parameter to