mirror of https://github.com/python/cpython
parent
8eda5f7cd9
commit
863a0c3f53
|
@ -4,9 +4,7 @@ import pickle
|
|||
from test.support import verbose, run_unittest, findfile
|
||||
import unittest
|
||||
|
||||
import xml.dom
|
||||
import xml.dom.minidom
|
||||
import xml.parsers.expat
|
||||
|
||||
from xml.dom.minidom import parse, Node, Document, parseString
|
||||
from xml.dom.minidom import getDOMImplementation
|
||||
|
@ -14,7 +12,6 @@ from xml.dom.minidom import getDOMImplementation
|
|||
|
||||
tstfile = findfile("test.xml", subdir="xmltestdata")
|
||||
|
||||
|
||||
# The tests of DocumentType importing use these helpers to construct
|
||||
# the documents to work with, since not all DOM builders actually
|
||||
# create the DocumentType nodes.
|
||||
|
@ -1009,41 +1006,6 @@ class MinidomTest(unittest.TestCase):
|
|||
"test NodeList.item()")
|
||||
doc.unlink()
|
||||
|
||||
def testSAX2DOM(self):
|
||||
from xml.dom import pulldom
|
||||
|
||||
sax2dom = pulldom.SAX2DOM()
|
||||
sax2dom.startDocument()
|
||||
sax2dom.startElement("doc", {})
|
||||
sax2dom.characters("text")
|
||||
sax2dom.startElement("subelm", {})
|
||||
sax2dom.characters("text")
|
||||
sax2dom.endElement("subelm")
|
||||
sax2dom.characters("text")
|
||||
sax2dom.endElement("doc")
|
||||
sax2dom.endDocument()
|
||||
|
||||
doc = sax2dom.document
|
||||
root = doc.documentElement
|
||||
(text1, elm1, text2) = root.childNodes
|
||||
text3 = elm1.childNodes[0]
|
||||
|
||||
self.confirm(text1.previousSibling is None and
|
||||
text1.nextSibling is elm1 and
|
||||
elm1.previousSibling is text1 and
|
||||
elm1.nextSibling is text2 and
|
||||
text2.previousSibling is elm1 and
|
||||
text2.nextSibling is None and
|
||||
text3.previousSibling is None and
|
||||
text3.nextSibling is None, "testSAX2DOM - siblings")
|
||||
|
||||
self.confirm(root.parentNode is doc and
|
||||
text1.parentNode is root and
|
||||
elm1.parentNode is root and
|
||||
text2.parentNode is root and
|
||||
text3.parentNode is elm1, "testSAX2DOM - parents")
|
||||
doc.unlink()
|
||||
|
||||
def testEncodings(self):
|
||||
doc = parseString('<foo>€</foo>')
|
||||
self.assertEqual(doc.toxml(),
|
||||
|
@ -1490,6 +1452,7 @@ class MinidomTest(unittest.TestCase):
|
|||
doc.appendChild(doc.createComment("foo--bar"))
|
||||
self.assertRaises(ValueError, doc.toxml)
|
||||
|
||||
|
||||
def testEmptyXMLNSValue(self):
|
||||
doc = parseString("<element xmlns=''>\n"
|
||||
"<foo/>\n</element>")
|
||||
|
|
|
@ -0,0 +1,345 @@
|
|||
import io
|
||||
import unittest
|
||||
import sys
|
||||
import xml.sax
|
||||
|
||||
from xml.sax.xmlreader import AttributesImpl
|
||||
from xml.dom import pulldom
|
||||
|
||||
from test.support import run_unittest, findfile
|
||||
|
||||
|
||||
tstfile = findfile("test.xml", subdir="xmltestdata")
|
||||
|
||||
# A handy XML snippet, containing attributes, a namespace prefix, and a
|
||||
# self-closing tag:
|
||||
SMALL_SAMPLE = """<?xml version="1.0"?>
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:xdc="http://www.xml.com/books">
|
||||
<!-- A comment -->
|
||||
<title>Introduction to XSL</title>
|
||||
<hr/>
|
||||
<p><xdc:author xdc:attrib="prefixed attribute" attrib="other attrib">A. Namespace</xdc:author></p>
|
||||
</html>"""
|
||||
|
||||
|
||||
class PullDOMTestCase(unittest.TestCase):
|
||||
|
||||
def test_parse(self):
|
||||
"""Minimal test of DOMEventStream.parse()"""
|
||||
|
||||
# This just tests that parsing from a stream works. Actual parser
|
||||
# semantics are tested using parseString with a more focused XML
|
||||
# fragment.
|
||||
|
||||
# Test with a filename:
|
||||
list(pulldom.parse(tstfile))
|
||||
|
||||
# Test with a file object:
|
||||
with open(tstfile, "rb") as fin:
|
||||
list(pulldom.parse(fin))
|
||||
|
||||
def test_parse_semantics(self):
|
||||
"""Test DOMEventStream parsing semantics."""
|
||||
|
||||
items = pulldom.parseString(SMALL_SAMPLE)
|
||||
evt, node = next(items)
|
||||
# Just check the node is a Document:
|
||||
self.assertTrue(hasattr(node, "createElement"))
|
||||
self.assertEqual(pulldom.START_DOCUMENT, evt)
|
||||
evt, node = next(items)
|
||||
self.assertEqual(pulldom.START_ELEMENT, evt)
|
||||
self.assertEqual("html", node.tagName)
|
||||
self.assertEqual(2, len(node.attributes))
|
||||
self.assertEqual(node.attributes.getNamedItem("xmlns:xdc").value,
|
||||
"http://www.xml.com/books")
|
||||
evt, node = next(items)
|
||||
self.assertEqual(pulldom.CHARACTERS, evt) # Line break
|
||||
evt, node = next(items)
|
||||
# XXX - A comment should be reported here!
|
||||
# self.assertEqual(pulldom.COMMENT, evt)
|
||||
# Line break after swallowed comment:
|
||||
self.assertEqual(pulldom.CHARACTERS, evt)
|
||||
evt, node = next(items)
|
||||
self.assertEqual("title", node.tagName)
|
||||
title_node = node
|
||||
evt, node = next(items)
|
||||
self.assertEqual(pulldom.CHARACTERS, evt)
|
||||
self.assertEqual("Introduction to XSL", node.data)
|
||||
evt, node = next(items)
|
||||
self.assertEqual(pulldom.END_ELEMENT, evt)
|
||||
self.assertEqual("title", node.tagName)
|
||||
self.assertTrue(title_node is node)
|
||||
evt, node = next(items)
|
||||
self.assertEqual(pulldom.CHARACTERS, evt)
|
||||
evt, node = next(items)
|
||||
self.assertEqual(pulldom.START_ELEMENT, evt)
|
||||
self.assertEqual("hr", node.tagName)
|
||||
evt, node = next(items)
|
||||
self.assertEqual(pulldom.END_ELEMENT, evt)
|
||||
self.assertEqual("hr", node.tagName)
|
||||
evt, node = next(items)
|
||||
self.assertEqual(pulldom.CHARACTERS, evt)
|
||||
evt, node = next(items)
|
||||
self.assertEqual(pulldom.START_ELEMENT, evt)
|
||||
self.assertEqual("p", node.tagName)
|
||||
evt, node = next(items)
|
||||
self.assertEqual(pulldom.START_ELEMENT, evt)
|
||||
self.assertEqual("xdc:author", node.tagName)
|
||||
evt, node = next(items)
|
||||
self.assertEqual(pulldom.CHARACTERS, evt)
|
||||
evt, node = next(items)
|
||||
self.assertEqual(pulldom.END_ELEMENT, evt)
|
||||
self.assertEqual("xdc:author", node.tagName)
|
||||
evt, node = next(items)
|
||||
self.assertEqual(pulldom.END_ELEMENT, evt)
|
||||
evt, node = next(items)
|
||||
self.assertEqual(pulldom.CHARACTERS, evt)
|
||||
evt, node = next(items)
|
||||
self.assertEqual(pulldom.END_ELEMENT, evt)
|
||||
# XXX No END_DOCUMENT item is ever obtained:
|
||||
#evt, node = next(items)
|
||||
#self.assertEqual(pulldom.END_DOCUMENT, evt)
|
||||
|
||||
def test_expandItem(self):
|
||||
"""Ensure expandItem works as expected."""
|
||||
items = pulldom.parseString(SMALL_SAMPLE)
|
||||
# Loop through the nodes until we get to a "title" start tag:
|
||||
for evt, item in items:
|
||||
if evt == pulldom.START_ELEMENT and item.tagName == "title":
|
||||
items.expandNode(item)
|
||||
self.assertEqual(1, len(item.childNodes))
|
||||
break
|
||||
else:
|
||||
self.fail("No \"title\" element detected in SMALL_SAMPLE!")
|
||||
# Loop until we get to the next start-element:
|
||||
for evt, node in items:
|
||||
if evt == pulldom.START_ELEMENT:
|
||||
break
|
||||
self.assertEqual("hr", node.tagName,
|
||||
"expandNode did not leave DOMEventStream in the correct state.")
|
||||
# Attempt to expand a standalone element:
|
||||
items.expandNode(node)
|
||||
self.assertEqual(next(items)[0], pulldom.CHARACTERS)
|
||||
evt, node = next(items)
|
||||
self.assertEqual(node.tagName, "p")
|
||||
items.expandNode(node)
|
||||
next(items) # Skip character data
|
||||
evt, node = next(items)
|
||||
self.assertEqual(node.tagName, "html")
|
||||
with self.assertRaises(StopIteration):
|
||||
next(items)
|
||||
items.clear()
|
||||
self.assertIsNone(items.parser)
|
||||
self.assertIsNone(items.stream)
|
||||
|
||||
@unittest.expectedFailure
|
||||
def test_comment(self):
|
||||
"""PullDOM does not receive "comment" events."""
|
||||
items = pulldom.parseString(SMALL_SAMPLE)
|
||||
for evt, _ in items:
|
||||
if evt == pulldom.COMMENT:
|
||||
break
|
||||
else:
|
||||
self.fail("No comment was encountered")
|
||||
|
||||
@unittest.expectedFailure
|
||||
def test_end_document(self):
|
||||
"""PullDOM does not receive "end-document" events."""
|
||||
items = pulldom.parseString(SMALL_SAMPLE)
|
||||
# Read all of the nodes up to and including </html>:
|
||||
for evt, node in items:
|
||||
if evt == pulldom.END_ELEMENT and node.tagName == "html":
|
||||
break
|
||||
try:
|
||||
# Assert that the next node is END_DOCUMENT:
|
||||
evt, node = next(items)
|
||||
self.assertEqual(pulldom.END_DOCUMENT, evt)
|
||||
except StopIteration:
|
||||
self.fail(
|
||||
"Ran out of events, but should have received END_DOCUMENT")
|
||||
|
||||
|
||||
class ThoroughTestCase(unittest.TestCase):
|
||||
"""Test the hard-to-reach parts of pulldom."""
|
||||
|
||||
def test_thorough_parse(self):
|
||||
"""Test some of the hard-to-reach parts of PullDOM."""
|
||||
self._test_thorough(pulldom.parse(None, parser=SAXExerciser()))
|
||||
|
||||
@unittest.expectedFailure
|
||||
def test_sax2dom_fail(self):
|
||||
"""SAX2DOM can"t handle a PI before the root element."""
|
||||
pd = SAX2DOMTestHelper(None, SAXExerciser(), 12)
|
||||
self._test_thorough(pd)
|
||||
|
||||
def test_thorough_sax2dom(self):
|
||||
"""Test some of the hard-to-reach parts of SAX2DOM."""
|
||||
pd = SAX2DOMTestHelper(None, SAX2DOMExerciser(), 12)
|
||||
self._test_thorough(pd, False)
|
||||
|
||||
def _test_thorough(self, pd, before_root=True):
|
||||
"""Test some of the hard-to-reach parts of the parser, using a mock
|
||||
parser."""
|
||||
|
||||
evt, node = next(pd)
|
||||
self.assertEqual(pulldom.START_DOCUMENT, evt)
|
||||
# Just check the node is a Document:
|
||||
self.assertTrue(hasattr(node, "createElement"))
|
||||
|
||||
if before_root:
|
||||
evt, node = next(pd)
|
||||
self.assertEqual(pulldom.COMMENT, evt)
|
||||
self.assertEqual("a comment", node.data)
|
||||
evt, node = next(pd)
|
||||
self.assertEqual(pulldom.PROCESSING_INSTRUCTION, evt)
|
||||
self.assertEqual("target", node.target)
|
||||
self.assertEqual("data", node.data)
|
||||
|
||||
evt, node = next(pd)
|
||||
self.assertEqual(pulldom.START_ELEMENT, evt)
|
||||
self.assertEqual("html", node.tagName)
|
||||
|
||||
evt, node = next(pd)
|
||||
self.assertEqual(pulldom.COMMENT, evt)
|
||||
self.assertEqual("a comment", node.data)
|
||||
evt, node = next(pd)
|
||||
self.assertEqual(pulldom.PROCESSING_INSTRUCTION, evt)
|
||||
self.assertEqual("target", node.target)
|
||||
self.assertEqual("data", node.data)
|
||||
|
||||
evt, node = next(pd)
|
||||
self.assertEqual(pulldom.START_ELEMENT, evt)
|
||||
self.assertEqual("p", node.tagName)
|
||||
|
||||
evt, node = next(pd)
|
||||
self.assertEqual(pulldom.CHARACTERS, evt)
|
||||
self.assertEqual("text", node.data)
|
||||
evt, node = next(pd)
|
||||
self.assertEqual(pulldom.END_ELEMENT, evt)
|
||||
self.assertEqual("p", node.tagName)
|
||||
evt, node = next(pd)
|
||||
self.assertEqual(pulldom.END_ELEMENT, evt)
|
||||
self.assertEqual("html", node.tagName)
|
||||
evt, node = next(pd)
|
||||
self.assertEqual(pulldom.END_DOCUMENT, evt)
|
||||
|
||||
|
||||
class SAXExerciser(object):
|
||||
"""A fake sax parser that calls some of the harder-to-reach sax methods to
|
||||
ensure it emits the correct events"""
|
||||
|
||||
def setContentHandler(self, handler):
|
||||
self._handler = handler
|
||||
|
||||
def parse(self, _):
|
||||
h = self._handler
|
||||
h.startDocument()
|
||||
|
||||
# The next two items ensure that items preceding the first
|
||||
# start_element are properly stored and emitted:
|
||||
h.comment("a comment")
|
||||
h.processingInstruction("target", "data")
|
||||
|
||||
h.startElement("html", AttributesImpl({}))
|
||||
|
||||
h.comment("a comment")
|
||||
h.processingInstruction("target", "data")
|
||||
|
||||
h.startElement("p", AttributesImpl({"class": "paraclass"}))
|
||||
h.characters("text")
|
||||
h.endElement("p")
|
||||
h.endElement("html")
|
||||
h.endDocument()
|
||||
|
||||
def stub(self, *args, **kwargs):
|
||||
"""Stub method. Does nothing."""
|
||||
pass
|
||||
setProperty = stub
|
||||
setFeature = stub
|
||||
|
||||
|
||||
class SAX2DOMExerciser(SAXExerciser):
|
||||
"""The same as SAXExerciser, but without the processing instruction and
|
||||
comment before the root element, because S2D can"t handle it"""
|
||||
|
||||
def parse(self, _):
|
||||
h = self._handler
|
||||
h.startDocument()
|
||||
h.startElement("html", AttributesImpl({}))
|
||||
h.comment("a comment")
|
||||
h.processingInstruction("target", "data")
|
||||
h.startElement("p", AttributesImpl({"class": "paraclass"}))
|
||||
h.characters("text")
|
||||
h.endElement("p")
|
||||
h.endElement("html")
|
||||
h.endDocument()
|
||||
|
||||
|
||||
class SAX2DOMTestHelper(pulldom.DOMEventStream):
|
||||
"""Allows us to drive SAX2DOM from a DOMEventStream."""
|
||||
|
||||
def reset(self):
|
||||
self.pulldom = pulldom.SAX2DOM()
|
||||
# This content handler relies on namespace support
|
||||
self.parser.setFeature(xml.sax.handler.feature_namespaces, 1)
|
||||
self.parser.setContentHandler(self.pulldom)
|
||||
|
||||
|
||||
class SAX2DOMTestCase(unittest.TestCase):
|
||||
|
||||
def confirm(self, test, testname="Test"):
|
||||
self.assertTrue(test, testname)
|
||||
|
||||
def test_basic(self):
|
||||
"""Ensure SAX2DOM can parse from a stream."""
|
||||
with io.StringIO(SMALL_SAMPLE) as fin:
|
||||
sd = SAX2DOMTestHelper(fin, xml.sax.make_parser(),
|
||||
len(SMALL_SAMPLE))
|
||||
for evt, node in sd:
|
||||
if evt == pulldom.START_ELEMENT and node.tagName == "html":
|
||||
break
|
||||
# Because the buffer is the same length as the XML, all the
|
||||
# nodes should have been parsed and added:
|
||||
self.assertGreater(len(node.childNodes), 0)
|
||||
|
||||
def testSAX2DOM(self):
|
||||
"""Ensure SAX2DOM expands nodes as expected."""
|
||||
sax2dom = pulldom.SAX2DOM()
|
||||
sax2dom.startDocument()
|
||||
sax2dom.startElement("doc", {})
|
||||
sax2dom.characters("text")
|
||||
sax2dom.startElement("subelm", {})
|
||||
sax2dom.characters("text")
|
||||
sax2dom.endElement("subelm")
|
||||
sax2dom.characters("text")
|
||||
sax2dom.endElement("doc")
|
||||
sax2dom.endDocument()
|
||||
|
||||
doc = sax2dom.document
|
||||
root = doc.documentElement
|
||||
(text1, elm1, text2) = root.childNodes
|
||||
text3 = elm1.childNodes[0]
|
||||
|
||||
self.assertIsNone(text1.previousSibling)
|
||||
self.assertIs(text1.nextSibling, elm1)
|
||||
self.assertIs(elm1.previousSibling, text1)
|
||||
self.assertIs(elm1.nextSibling, text2)
|
||||
self.assertIs(text2.previousSibling, elm1)
|
||||
self.assertIsNone(text2.nextSibling)
|
||||
self.assertIsNone(text3.previousSibling)
|
||||
self.assertIsNone(text3.nextSibling)
|
||||
|
||||
self.assertIs(root.parentNode, doc)
|
||||
self.assertIs(text1.parentNode, root)
|
||||
self.assertIs(elm1.parentNode, root)
|
||||
self.assertIs(text2.parentNode, root)
|
||||
self.assertIs(text3.parentNode, elm1)
|
||||
doc.unlink()
|
||||
|
||||
|
||||
def test_main():
|
||||
run_unittest(PullDOMTestCase, ThoroughTestCase, SAX2DOMTestCase)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_main()
|
Loading…
Reference in New Issue