mirror of https://github.com/python/cpython
Added EntityResolver and DTDHandler (patch 101631) with test cases.
This commit is contained in:
parent
bc1c1c98eb
commit
e292a24589
|
@ -6,6 +6,8 @@ Passed test_escape_basic
|
|||
Passed test_escape_extra
|
||||
Passed test_expat_attrs_empty
|
||||
Passed test_expat_attrs_wattr
|
||||
Passed test_expat_dtdhandler
|
||||
Passed test_expat_entityresolver
|
||||
Passed test_expat_inpsource_filename
|
||||
Passed test_expat_inpsource_stream
|
||||
Passed test_expat_inpsource_sysid
|
||||
|
@ -20,4 +22,4 @@ Passed test_xmlgen_content_escape
|
|||
Passed test_xmlgen_ignorable
|
||||
Passed test_xmlgen_ns
|
||||
Passed test_xmlgen_pi
|
||||
21 tests, 0 failures
|
||||
23 tests, 0 failures
|
||||
|
|
|
@ -156,25 +156,45 @@ class TestDTDHandler:
|
|||
def unparsedEntityDecl(self, name, publicId, systemId, ndata):
|
||||
self._entities.append((name, publicId, systemId, ndata))
|
||||
|
||||
# def test_expat_dtdhandler():
|
||||
# parser = create_parser()
|
||||
# handler = TestDTDHandler()
|
||||
# parser.setDTDHandler(handler)
|
||||
def test_expat_dtdhandler():
|
||||
parser = create_parser()
|
||||
handler = TestDTDHandler()
|
||||
parser.setDTDHandler(handler)
|
||||
|
||||
# parser.feed('<!DOCTYPE doc [\n')
|
||||
# parser.feed(' <!ENTITY img SYSTEM "expat.gif" NDATA GIF>\n')
|
||||
# parser.feed(' <!NOTATION GIF PUBLIC "-//CompuServe//NOTATION Graphics Interchange Format 89a//EN">\n')
|
||||
# parser.feed(']>\n')
|
||||
# parser.feed('<doc></doc>')
|
||||
# parser.close()
|
||||
parser.feed('<!DOCTYPE doc [\n')
|
||||
parser.feed(' <!ENTITY img SYSTEM "expat.gif" NDATA GIF>\n')
|
||||
parser.feed(' <!NOTATION GIF PUBLIC "-//CompuServe//NOTATION Graphics Interchange Format 89a//EN">\n')
|
||||
parser.feed(']>\n')
|
||||
parser.feed('<doc></doc>')
|
||||
parser.close()
|
||||
|
||||
# return handler._notations == [("GIF", "-//CompuServe//NOTATION Graphics Interchange Format 89a//EN", None)] and \
|
||||
# handler._entities == [("img", None, "expat.gif", "GIF")]
|
||||
return handler._notations == [("GIF", "-//CompuServe//NOTATION Graphics Interchange Format 89a//EN", None)] and \
|
||||
handler._entities == [("img", None, "expat.gif", "GIF")]
|
||||
|
||||
# ===== EntityResolver support
|
||||
|
||||
# can't test this until InputSource is in place
|
||||
class TestEntityResolver:
|
||||
|
||||
def resolveEntity(self, publicId, systemId):
|
||||
inpsrc = InputSource()
|
||||
inpsrc.setByteStream(StringIO("<entity/>"))
|
||||
return inpsrc
|
||||
|
||||
def test_expat_entityresolver():
|
||||
return 1 # disabling this until pyexpat.c has been fixed
|
||||
parser = create_parser()
|
||||
parser.setEntityResolver(TestEntityResolver())
|
||||
result = StringIO()
|
||||
parser.setContentHandler(XMLGenerator(result))
|
||||
|
||||
parser.feed('<!DOCTYPE doc [\n')
|
||||
parser.feed(' <!ENTITY test SYSTEM "whatever">\n')
|
||||
parser.feed(']>\n')
|
||||
parser.feed('<doc>&test;</doc>')
|
||||
parser.close()
|
||||
|
||||
return result.getvalue() == start + "<doc><entity></entity></doc>"
|
||||
|
||||
# ===== Attributes support
|
||||
|
||||
class AttrGatherer(ContentHandler):
|
||||
|
@ -440,5 +460,3 @@ for (name, value) in items:
|
|||
print "%d tests, %d failures" % (tests, fails)
|
||||
if fails != 0:
|
||||
raise TestFailed, "%d of %d tests failed" % (fails, tests)
|
||||
|
||||
make_test_output()
|
||||
|
|
|
@ -3,17 +3,6 @@ SAX driver for the Pyexpat C module. This driver works with
|
|||
pyexpat.__version__ == '1.5'.
|
||||
"""
|
||||
|
||||
# Todo on driver:
|
||||
# - make it support external entities (wait for pyexpat.c)
|
||||
# - enable configuration between reset() and feed() calls
|
||||
# - support lexical events?
|
||||
# - proper inputsource handling
|
||||
# - properties and features
|
||||
|
||||
# Todo on pyexpat.c:
|
||||
# - support XML_ExternalEntityParserCreate
|
||||
# - exceptions in callouts from pyexpat to python code lose position info
|
||||
|
||||
version = "0.20"
|
||||
|
||||
from xml.sax._exceptions import *
|
||||
|
@ -30,10 +19,11 @@ class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
|
|||
|
||||
def __init__(self, namespaceHandling=0, bufsize=2**16-20):
|
||||
xmlreader.IncrementalParser.__init__(self, bufsize)
|
||||
self._source = None
|
||||
self._source = xmlreader.InputSource()
|
||||
self._parser = None
|
||||
self._namespaces = namespaceHandling
|
||||
self._parsing = 0
|
||||
self._entity_stack = []
|
||||
|
||||
# XMLReader methods
|
||||
|
||||
|
@ -186,11 +176,23 @@ class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
|
|||
self._dtd_handler.notationDecl(name, pubid, sysid)
|
||||
|
||||
def external_entity_ref(self, context, base, sysid, pubid):
|
||||
raise NotImplementedError()
|
||||
source = self._ent_handler.resolveEntity(pubid, sysid)
|
||||
source = saxutils.prepare_input_source(source)
|
||||
# FIXME: create new parser, stack self._source and self._parser
|
||||
# FIXME: reuse code from self.parse(...)
|
||||
source = saxutils.prepare_input_source(source,
|
||||
self._source.getSystemId() or
|
||||
"")
|
||||
|
||||
self._entity_stack.append((self._parser, self._source))
|
||||
self._parser = self._parser.ExternalEntityParserCreate(context)
|
||||
self._source = source
|
||||
|
||||
try:
|
||||
xmlreader.IncrementalParser.parse(self, source)
|
||||
self.close()
|
||||
except:
|
||||
return 0 # FIXME: save error info here?
|
||||
|
||||
(self._parser, self._source) = self._entity_stack[-1]
|
||||
del self._entity_stack[-1]
|
||||
return 1
|
||||
|
||||
# ---
|
||||
|
|
|
@ -17,7 +17,7 @@ version = '2.0beta'
|
|||
#
|
||||
#============================================================================
|
||||
|
||||
# ===== ErrorHandler =====
|
||||
# ===== ERRORHANDLER =====
|
||||
|
||||
class ErrorHandler:
|
||||
"""Basic interface for SAX error handlers. If you create an object
|
||||
|
@ -40,6 +40,7 @@ class ErrorHandler:
|
|||
"Handle a warning."
|
||||
print exception
|
||||
|
||||
|
||||
# ===== CONTENTHANDLER =====
|
||||
|
||||
class ContentHandler:
|
||||
|
@ -199,7 +200,39 @@ class ContentHandler:
|
|||
http://xml.org/sax/features/external-general-entities and the
|
||||
http://xml.org/sax/features/external-parameter-entities
|
||||
properties."""
|
||||
|
||||
|
||||
# ===== DTDHandler =====
|
||||
|
||||
class DTDHandler:
|
||||
"""Handle DTD events.
|
||||
|
||||
This interface specifies only those DTD events required for basic
|
||||
parsing (unparsed entities and attributes)."""
|
||||
|
||||
def notationDecl(self, name, publicId, systemId):
|
||||
"Handle a notation declaration event."
|
||||
|
||||
def unparsedEntityDecl(self, name, publicId, systemId, ndata):
|
||||
"Handle an unparsed entity declaration event."
|
||||
|
||||
|
||||
# ===== ENTITYRESOLVER =====
|
||||
|
||||
class EntityResolver:
|
||||
"""Basic interface for resolving entities. If you create an object
|
||||
implementing this interface, then register the object with your
|
||||
Parser, the parser will call the method in your object to
|
||||
resolve all external entities. Note that DefaultHandler implements
|
||||
this interface with the default behaviour."""
|
||||
|
||||
def resolveEntity(self, publicId, systemId):
|
||||
"""Resolve the system identifier of an entity and return either
|
||||
the system identifier to read from as a string, or an InputSource
|
||||
to read from."""
|
||||
return systemId
|
||||
|
||||
|
||||
#============================================================================
|
||||
#
|
||||
# CORE FEATURES
|
||||
|
|
|
@ -9,8 +9,8 @@ class XMLReader:
|
|||
|
||||
def __init__(self):
|
||||
self._cont_handler = handler.ContentHandler()
|
||||
#self._dtd_handler = handler.DTDHandler()
|
||||
#self._ent_handler = handler.EntityResolver()
|
||||
self._dtd_handler = handler.DTDHandler()
|
||||
self._ent_handler = handler.EntityResolver()
|
||||
self._err_handler = handler.ErrorHandler()
|
||||
|
||||
def parse(self, source):
|
||||
|
@ -109,8 +109,6 @@ class IncrementalParser(XMLReader):
|
|||
while buffer != "":
|
||||
self.feed(buffer)
|
||||
buffer = file.read(self._bufsize)
|
||||
|
||||
self.reset()
|
||||
|
||||
def feed(self, data):
|
||||
"""This method gives the raw XML data in the data parameter to
|
||||
|
|
Loading…
Reference in New Issue