# IMPORTANT: the same tests are run from "test_xml_etree_c" in order
# to ensure consistency between the C implementation and the Python
# implementation.
#
# For this purpose, the module-level "ET" symbol is temporarily
# monkey-patched when running the "test_xml_etree_c" test suite.
import copy
import functools
import html
import io
import operator
import pickle
import sys
import types
import unittest
import warnings
import weakref
from itertools import product
from test import support
from test.support import TESTFN, findfile, import_fresh_module, gc_collect, swap_attr
# pyET is the pure-Python implementation.
#
# ET is pyET in test_xml_etree and is the C accelerated version in
# test_xml_etree_c.
pyET = None
ET = None
SIMPLE_XMLFILE = findfile("simple.xml", subdir="xmltestdata")
try:
SIMPLE_XMLFILE.encode("utf-8")
except UnicodeEncodeError:
raise unittest.SkipTest("filename is not encodable to utf8")
SIMPLE_NS_XMLFILE = findfile("simple-ns.xml", subdir="xmltestdata")
UTF8_BUG_XMLFILE = findfile("expat224_utf8_bug.xml", subdir="xmltestdata")
SAMPLE_XML = """\
textsubtext
"""
SAMPLE_SECTION = """\
subtext
"""
SAMPLE_XML_NS = """
textsubtext
"""
SAMPLE_XML_NS_ELEMS = """
ApplesBananasAfrican Coffee Table80120
"""
ENTITY_XML = """\
%user-entities;
]>
&entity;
"""
EXTERNAL_ENTITY_XML = """\
]>
&entity;
"""
def checkwarnings(*filters, quiet=False):
def decorator(test):
def newtest(*args, **kwargs):
with support.check_warnings(*filters, quiet=quiet):
test(*args, **kwargs)
functools.update_wrapper(newtest, test)
return newtest
return decorator
class ModuleTest(unittest.TestCase):
def test_sanity(self):
# Import sanity.
from xml.etree import ElementTree
from xml.etree import ElementInclude
from xml.etree import ElementPath
def test_all(self):
names = ("xml.etree.ElementTree", "_elementtree")
support.check__all__(self, ET, names, blacklist=("HTML_EMPTY",))
def serialize(elem, to_string=True, encoding='unicode', **options):
if encoding != 'unicode':
file = io.BytesIO()
else:
file = io.StringIO()
tree = ET.ElementTree(elem)
tree.write(file, encoding=encoding, **options)
if to_string:
return file.getvalue()
else:
file.seek(0)
return file
def summarize_list(seq):
return [elem.tag for elem in seq]
class ElementTestCase:
@classmethod
def setUpClass(cls):
cls.modules = {pyET, ET}
def pickleRoundTrip(self, obj, name, dumper, loader, proto):
save_m = sys.modules[name]
try:
sys.modules[name] = dumper
temp = pickle.dumps(obj, proto)
sys.modules[name] = loader
result = pickle.loads(temp)
except pickle.PicklingError as pe:
# pyET must be second, because pyET may be (equal to) ET.
human = dict([(ET, "cET"), (pyET, "pyET")])
raise support.TestFailed("Failed to round-trip %r from %r to %r"
% (obj,
human.get(dumper, dumper),
human.get(loader, loader))) from pe
finally:
sys.modules[name] = save_m
return result
def assertEqualElements(self, alice, bob):
self.assertIsInstance(alice, (ET.Element, pyET.Element))
self.assertIsInstance(bob, (ET.Element, pyET.Element))
self.assertEqual(len(list(alice)), len(list(bob)))
for x, y in zip(alice, bob):
self.assertEqualElements(x, y)
properties = operator.attrgetter('tag', 'tail', 'text', 'attrib')
self.assertEqual(properties(alice), properties(bob))
# --------------------------------------------------------------------
# element tree tests
class ElementTreeTest(unittest.TestCase):
def serialize_check(self, elem, expected):
self.assertEqual(serialize(elem), expected)
def test_interface(self):
# Test element tree interface.
def check_string(string):
len(string)
for char in string:
self.assertEqual(len(char), 1,
msg="expected one-character string, got %r" % char)
new_string = string + ""
new_string = string + " "
string[:0]
def check_mapping(mapping):
len(mapping)
keys = mapping.keys()
items = mapping.items()
for key in keys:
item = mapping[key]
mapping["key"] = "value"
self.assertEqual(mapping["key"], "value",
msg="expected value string, got %r" % mapping["key"])
def check_element(element):
self.assertTrue(ET.iselement(element), msg="not an element")
direlem = dir(element)
for attr in 'tag', 'attrib', 'text', 'tail':
self.assertTrue(hasattr(element, attr),
msg='no %s member' % attr)
self.assertIn(attr, direlem,
msg='no %s visible by dir' % attr)
check_string(element.tag)
check_mapping(element.attrib)
if element.text is not None:
check_string(element.text)
if element.tail is not None:
check_string(element.tail)
for elem in element:
check_element(elem)
element = ET.Element("tag")
check_element(element)
tree = ET.ElementTree(element)
check_element(tree.getroot())
element = ET.Element("t\xe4g", key="value")
tree = ET.ElementTree(element)
self.assertRegex(repr(element), r"^$")
element = ET.Element("tag", key="value")
# Make sure all standard element methods exist.
def check_method(method):
self.assertTrue(hasattr(method, '__call__'),
msg="%s not callable" % method)
check_method(element.append)
check_method(element.extend)
check_method(element.insert)
check_method(element.remove)
check_method(element.getchildren)
check_method(element.find)
check_method(element.iterfind)
check_method(element.findall)
check_method(element.findtext)
check_method(element.clear)
check_method(element.get)
check_method(element.set)
check_method(element.keys)
check_method(element.items)
check_method(element.iter)
check_method(element.itertext)
check_method(element.getiterator)
# These methods return an iterable. See bug 6472.
def check_iter(it):
check_method(it.__next__)
check_iter(element.iterfind("tag"))
check_iter(element.iterfind("*"))
check_iter(tree.iterfind("tag"))
check_iter(tree.iterfind("*"))
# These aliases are provided:
self.assertEqual(ET.XML, ET.fromstring)
self.assertEqual(ET.PI, ET.ProcessingInstruction)
def test_set_attribute(self):
element = ET.Element('tag')
self.assertEqual(element.tag, 'tag')
element.tag = 'Tag'
self.assertEqual(element.tag, 'Tag')
element.tag = 'TAG'
self.assertEqual(element.tag, 'TAG')
self.assertIsNone(element.text)
element.text = 'Text'
self.assertEqual(element.text, 'Text')
element.text = 'TEXT'
self.assertEqual(element.text, 'TEXT')
self.assertIsNone(element.tail)
element.tail = 'Tail'
self.assertEqual(element.tail, 'Tail')
element.tail = 'TAIL'
self.assertEqual(element.tail, 'TAIL')
self.assertEqual(element.attrib, {})
element.attrib = {'a': 'b', 'c': 'd'}
self.assertEqual(element.attrib, {'a': 'b', 'c': 'd'})
element.attrib = {'A': 'B', 'C': 'D'}
self.assertEqual(element.attrib, {'A': 'B', 'C': 'D'})
def test_simpleops(self):
# Basic method sanity checks.
elem = ET.XML("")
self.serialize_check(elem, '')
e = ET.Element("tag2")
elem.append(e)
self.serialize_check(elem, '')
elem.remove(e)
self.serialize_check(elem, '')
elem.insert(0, e)
self.serialize_check(elem, '')
elem.remove(e)
elem.extend([e])
self.serialize_check(elem, '')
elem.remove(e)
element = ET.Element("tag", key="value")
self.serialize_check(element, '') # 1
subelement = ET.Element("subtag")
element.append(subelement)
self.serialize_check(element, '') # 2
element.insert(0, subelement)
self.serialize_check(element,
'') # 3
element.remove(subelement)
self.serialize_check(element, '') # 4
element.remove(subelement)
self.serialize_check(element, '') # 5
with self.assertRaises(ValueError) as cm:
element.remove(subelement)
self.assertEqual(str(cm.exception), 'list.remove(x): x not in list')
self.serialize_check(element, '') # 6
element[0:0] = [subelement, subelement, subelement]
self.serialize_check(element[1], '')
self.assertEqual(element[1:9], [element[1], element[2]])
self.assertEqual(element[:9:2], [element[0], element[2]])
del element[1:2]
self.serialize_check(element,
'')
def test_cdata(self):
# Test CDATA handling (etc).
self.serialize_check(ET.XML("hello"),
'hello')
self.serialize_check(ET.XML("hello"),
'hello')
self.serialize_check(ET.XML(""),
'hello')
def test_file_init(self):
stringfile = io.BytesIO(SAMPLE_XML.encode("utf-8"))
tree = ET.ElementTree(file=stringfile)
self.assertEqual(tree.find("tag").tag, 'tag')
self.assertEqual(tree.find("section/tag").tag, 'tag')
tree = ET.ElementTree(file=SIMPLE_XMLFILE)
self.assertEqual(tree.find("element").tag, 'element')
self.assertEqual(tree.find("element/../empty-element").tag,
'empty-element')
def test_path_cache(self):
# Check that the path cache behaves sanely.
from xml.etree import ElementPath
elem = ET.XML(SAMPLE_XML)
for i in range(10): ET.ElementTree(elem).find('./'+str(i))
cache_len_10 = len(ElementPath._cache)
for i in range(10): ET.ElementTree(elem).find('./'+str(i))
self.assertEqual(len(ElementPath._cache), cache_len_10)
for i in range(20): ET.ElementTree(elem).find('./'+str(i))
self.assertGreater(len(ElementPath._cache), cache_len_10)
for i in range(600): ET.ElementTree(elem).find('./'+str(i))
self.assertLess(len(ElementPath._cache), 500)
def test_copy(self):
# Test copy handling (etc).
import copy
e1 = ET.XML("hello")
e2 = copy.copy(e1)
e3 = copy.deepcopy(e1)
e1.find("foo").tag = "bar"
self.serialize_check(e1, 'hello')
self.serialize_check(e2, 'hello')
self.serialize_check(e3, 'hello')
def test_attrib(self):
# Test attribute handling.
elem = ET.Element("tag")
elem.get("key") # 1.1
self.assertEqual(elem.get("key", "default"), 'default') # 1.2
elem.set("key", "value")
self.assertEqual(elem.get("key"), 'value') # 1.3
elem = ET.Element("tag", key="value")
self.assertEqual(elem.get("key"), 'value') # 2.1
self.assertEqual(elem.attrib, {'key': 'value'}) # 2.2
attrib = {"key": "value"}
elem = ET.Element("tag", attrib)
attrib.clear() # check for aliasing issues
self.assertEqual(elem.get("key"), 'value') # 3.1
self.assertEqual(elem.attrib, {'key': 'value'}) # 3.2
attrib = {"key": "value"}
elem = ET.Element("tag", **attrib)
attrib.clear() # check for aliasing issues
self.assertEqual(elem.get("key"), 'value') # 4.1
self.assertEqual(elem.attrib, {'key': 'value'}) # 4.2
elem = ET.Element("tag", {"key": "other"}, key="value")
self.assertEqual(elem.get("key"), 'value') # 5.1
self.assertEqual(elem.attrib, {'key': 'value'}) # 5.2
elem = ET.Element('test')
elem.text = "aa"
elem.set('testa', 'testval')
elem.set('testb', 'test2')
self.assertEqual(ET.tostring(elem),
b'aa')
self.assertEqual(sorted(elem.keys()), ['testa', 'testb'])
self.assertEqual(sorted(elem.items()),
[('testa', 'testval'), ('testb', 'test2')])
self.assertEqual(elem.attrib['testb'], 'test2')
elem.attrib['testb'] = 'test1'
elem.attrib['testc'] = 'test2'
self.assertEqual(ET.tostring(elem),
b'aa')
elem = ET.Element('test')
elem.set('a', '\r')
elem.set('b', '\r\n')
elem.set('c', '\t\n\r ')
elem.set('d', '\n\n')
self.assertEqual(ET.tostring(elem),
b'')
def test_makeelement(self):
# Test makeelement handling.
elem = ET.Element("tag")
attrib = {"key": "value"}
subelem = elem.makeelement("subtag", attrib)
self.assertIsNot(subelem.attrib, attrib, msg="attrib aliasing")
elem.append(subelem)
self.serialize_check(elem, '')
elem.clear()
self.serialize_check(elem, '')
elem.append(subelem)
self.serialize_check(elem, '')
elem.extend([subelem, subelem])
self.serialize_check(elem,
'')
elem[:] = [subelem]
self.serialize_check(elem, '')
elem[:] = tuple([subelem])
self.serialize_check(elem, '')
def test_parsefile(self):
# Test parsing from file.
tree = ET.parse(SIMPLE_XMLFILE)
stream = io.StringIO()
tree.write(stream, encoding='unicode')
self.assertEqual(stream.getvalue(),
'\n'
' text\n'
' texttail\n'
' \n'
'')
tree = ET.parse(SIMPLE_NS_XMLFILE)
stream = io.StringIO()
tree.write(stream, encoding='unicode')
self.assertEqual(stream.getvalue(),
'\n'
' text\n'
' texttail\n'
' \n'
'')
with open(SIMPLE_XMLFILE) as f:
data = f.read()
parser = ET.XMLParser()
self.assertRegex(parser.version, r'^Expat ')
parser.feed(data)
self.serialize_check(parser.close(),
'\n'
' text\n'
' texttail\n'
' \n'
'')
target = ET.TreeBuilder()
parser = ET.XMLParser(target=target)
parser.feed(data)
self.serialize_check(parser.close(),
'\n'
' text\n'
' texttail\n'
' \n'
'')
def test_parseliteral(self):
element = ET.XML("text")
self.assertEqual(ET.tostring(element, encoding='unicode'),
'text')
element = ET.fromstring("text")
self.assertEqual(ET.tostring(element, encoding='unicode'),
'text')
sequence = ["", "text"]
element = ET.fromstringlist(sequence)
self.assertEqual(ET.tostring(element),
b'text')
self.assertEqual(b"".join(ET.tostringlist(element)),
b'text')
self.assertEqual(ET.tostring(element, "ascii"),
b"\n"
b"text")
_, ids = ET.XMLID("text")
self.assertEqual(len(ids), 0)
_, ids = ET.XMLID("text")
self.assertEqual(len(ids), 1)
self.assertEqual(ids["body"].tag, 'body')
def test_iterparse(self):
# Test iterparse interface.
iterparse = ET.iterparse
context = iterparse(SIMPLE_XMLFILE)
action, elem = next(context)
self.assertEqual((action, elem.tag), ('end', 'element'))
self.assertEqual([(action, elem.tag) for action, elem in context], [
('end', 'element'),
('end', 'empty-element'),
('end', 'root'),
])
self.assertEqual(context.root.tag, 'root')
context = iterparse(SIMPLE_NS_XMLFILE)
self.assertEqual([(action, elem.tag) for action, elem in context], [
('end', '{namespace}element'),
('end', '{namespace}element'),
('end', '{namespace}empty-element'),
('end', '{namespace}root'),
])
events = ()
context = iterparse(SIMPLE_XMLFILE, events)
self.assertEqual([(action, elem.tag) for action, elem in context], [])
events = ()
context = iterparse(SIMPLE_XMLFILE, events=events)
self.assertEqual([(action, elem.tag) for action, elem in context], [])
events = ("start", "end")
context = iterparse(SIMPLE_XMLFILE, events)
self.assertEqual([(action, elem.tag) for action, elem in context], [
('start', 'root'),
('start', 'element'),
('end', 'element'),
('start', 'element'),
('end', 'element'),
('start', 'empty-element'),
('end', 'empty-element'),
('end', 'root'),
])
events = ("start", "end", "start-ns", "end-ns")
context = iterparse(SIMPLE_NS_XMLFILE, events)
self.assertEqual([(action, elem.tag) if action in ("start", "end")
else (action, elem)
for action, elem in context], [
('start-ns', ('', 'namespace')),
('start', '{namespace}root'),
('start', '{namespace}element'),
('end', '{namespace}element'),
('start', '{namespace}element'),
('end', '{namespace}element'),
('start', '{namespace}empty-element'),
('end', '{namespace}empty-element'),
('end', '{namespace}root'),
('end-ns', None),
])
events = ('start-ns', 'end-ns')
context = iterparse(io.StringIO(r""), events)
res = [action for action, elem in context]
self.assertEqual(res, ['start-ns', 'end-ns'])
events = ("start", "end", "bogus")
with open(SIMPLE_XMLFILE, "rb") as f:
with self.assertRaises(ValueError) as cm:
iterparse(f, events)
self.assertFalse(f.closed)
self.assertEqual(str(cm.exception), "unknown event 'bogus'")
with support.check_no_resource_warning(self):
with self.assertRaises(ValueError) as cm:
iterparse(SIMPLE_XMLFILE, events)
self.assertEqual(str(cm.exception), "unknown event 'bogus'")
del cm
source = io.BytesIO(
b"\n"
b"text\n")
events = ("start-ns",)
context = iterparse(source, events)
self.assertEqual([(action, elem) for action, elem in context], [
('start-ns', ('', 'http://\xe9ffbot.org/ns')),
('start-ns', ('cl\xe9', 'http://effbot.org/ns')),
])
source = io.StringIO("junk")
it = iterparse(source)
action, elem = next(it)
self.assertEqual((action, elem.tag), ('end', 'document'))
with self.assertRaises(ET.ParseError) as cm:
next(it)
self.assertEqual(str(cm.exception),
'junk after document element: line 1, column 12')
self.addCleanup(support.unlink, TESTFN)
with open(TESTFN, "wb") as f:
f.write(b"junk")
it = iterparse(TESTFN)
action, elem = next(it)
self.assertEqual((action, elem.tag), ('end', 'document'))
with support.check_no_resource_warning(self):
with self.assertRaises(ET.ParseError) as cm:
next(it)
self.assertEqual(str(cm.exception),
'junk after document element: line 1, column 12')
del cm, it
def test_writefile(self):
elem = ET.Element("tag")
elem.text = "text"
self.serialize_check(elem, 'text')
ET.SubElement(elem, "subtag").text = "subtext"
self.serialize_check(elem, 'textsubtext')
# Test tag suppression
elem.tag = None
self.serialize_check(elem, 'textsubtext')
elem.insert(0, ET.Comment("comment"))
self.serialize_check(elem,
'textsubtext') # assumes 1.3
elem[0] = ET.PI("key", "value")
self.serialize_check(elem, 'textsubtext')
def test_custom_builder(self):
# Test parser w. custom builder.
with open(SIMPLE_XMLFILE) as f:
data = f.read()
class Builder(list):
def start(self, tag, attrib):
self.append(("start", tag))
def end(self, tag):
self.append(("end", tag))
def data(self, text):
pass
builder = Builder()
parser = ET.XMLParser(target=builder)
parser.feed(data)
self.assertEqual(builder, [
('start', 'root'),
('start', 'element'),
('end', 'element'),
('start', 'element'),
('end', 'element'),
('start', 'empty-element'),
('end', 'empty-element'),
('end', 'root'),
])
with open(SIMPLE_NS_XMLFILE) as f:
data = f.read()
class Builder(list):
def start(self, tag, attrib):
self.append(("start", tag))
def end(self, tag):
self.append(("end", tag))
def data(self, text):
pass
def pi(self, target, data):
self.append(("pi", target, data))
def comment(self, data):
self.append(("comment", data))
builder = Builder()
parser = ET.XMLParser(target=builder)
parser.feed(data)
self.assertEqual(builder, [
('pi', 'pi', 'data'),
('comment', ' comment '),
('start', '{namespace}root'),
('start', '{namespace}element'),
('end', '{namespace}element'),
('start', '{namespace}element'),
('end', '{namespace}element'),
('start', '{namespace}empty-element'),
('end', '{namespace}empty-element'),
('end', '{namespace}root'),
])
# Element.getchildren() and ElementTree.getiterator() are deprecated.
@checkwarnings(("This method will be removed in future versions. "
"Use .+ instead.",
(DeprecationWarning, PendingDeprecationWarning)))
def test_getchildren(self):
# Test Element.getchildren()
with open(SIMPLE_XMLFILE, "rb") as f:
tree = ET.parse(f)
self.assertEqual([summarize_list(elem.getchildren())
for elem in tree.getroot().iter()], [
['element', 'element', 'empty-element'],
[],
[],
[],
])
self.assertEqual([summarize_list(elem.getchildren())
for elem in tree.getiterator()], [
['element', 'element', 'empty-element'],
[],
[],
[],
])
elem = ET.XML(SAMPLE_XML)
self.assertEqual(len(elem.getchildren()), 3)
self.assertEqual(len(elem[2].getchildren()), 1)
self.assertEqual(elem[:], elem.getchildren())
child1 = elem[0]
child2 = elem[2]
del elem[1:2]
self.assertEqual(len(elem.getchildren()), 2)
self.assertEqual(child1, elem[0])
self.assertEqual(child2, elem[1])
elem[0:2] = [child2, child1]
self.assertEqual(child2, elem[0])
self.assertEqual(child1, elem[1])
self.assertNotEqual(child1, elem[0])
elem.clear()
self.assertEqual(elem.getchildren(), [])
def test_writestring(self):
elem = ET.XML("text")
self.assertEqual(ET.tostring(elem), b'text')
elem = ET.fromstring("text")
self.assertEqual(ET.tostring(elem), b'text')
def test_encoding(self):
def check(encoding, body=''):
xml = ("%s" %
(encoding, body))
self.assertEqual(ET.XML(xml.encode(encoding)).text, body)
self.assertEqual(ET.XML(xml).text, body)
check("ascii", 'a')
check("us-ascii", 'a')
check("iso-8859-1", '\xbd')
check("iso-8859-15", '\u20ac')
check("cp437", '\u221a')
check("mac-roman", '\u02da')
def xml(encoding):
return "" % encoding
def bxml(encoding):
return xml(encoding).encode(encoding)
supported_encodings = [
'ascii', 'utf-8', 'utf-8-sig', 'utf-16', 'utf-16be', 'utf-16le',
'iso8859-1', 'iso8859-2', 'iso8859-3', 'iso8859-4', 'iso8859-5',
'iso8859-6', 'iso8859-7', 'iso8859-8', 'iso8859-9', 'iso8859-10',
'iso8859-13', 'iso8859-14', 'iso8859-15', 'iso8859-16',
'cp437', 'cp720', 'cp737', 'cp775', 'cp850', 'cp852',
'cp855', 'cp856', 'cp857', 'cp858', 'cp860', 'cp861', 'cp862',
'cp863', 'cp865', 'cp866', 'cp869', 'cp874', 'cp1006', 'cp1125',
'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
'cp1256', 'cp1257', 'cp1258',
'mac-cyrillic', 'mac-greek', 'mac-iceland', 'mac-latin2',
'mac-roman', 'mac-turkish',
'iso2022-jp', 'iso2022-jp-1', 'iso2022-jp-2', 'iso2022-jp-2004',
'iso2022-jp-3', 'iso2022-jp-ext',
'koi8-r', 'koi8-t', 'koi8-u', 'kz1048',
'hz', 'ptcp154',
]
for encoding in supported_encodings:
self.assertEqual(ET.tostring(ET.XML(bxml(encoding))), b'')
unsupported_ascii_compatible_encodings = [
'big5', 'big5hkscs',
'cp932', 'cp949', 'cp950',
'euc-jp', 'euc-jis-2004', 'euc-jisx0213', 'euc-kr',
'gb2312', 'gbk', 'gb18030',
'iso2022-kr', 'johab',
'shift-jis', 'shift-jis-2004', 'shift-jisx0213',
'utf-7',
]
for encoding in unsupported_ascii_compatible_encodings:
self.assertRaises(ValueError, ET.XML, bxml(encoding))
unsupported_ascii_incompatible_encodings = [
'cp037', 'cp424', 'cp500', 'cp864', 'cp875', 'cp1026', 'cp1140',
'utf_32', 'utf_32_be', 'utf_32_le',
]
for encoding in unsupported_ascii_incompatible_encodings:
self.assertRaises(ET.ParseError, ET.XML, bxml(encoding))
self.assertRaises(ValueError, ET.XML, xml('undefined').encode('ascii'))
self.assertRaises(LookupError, ET.XML, xml('xxx').encode('ascii'))
def test_methods(self):
# Test serialization methods.
e = ET.XML("")
e.tail = "\n"
self.assertEqual(serialize(e),
'\n')
self.assertEqual(serialize(e, method=None),
'\n')
self.assertEqual(serialize(e, method="xml"),
'\n')
self.assertEqual(serialize(e, method="html"),
'\n')
self.assertEqual(serialize(e, method="text"), '1 < 2\n')
def test_issue18347(self):
e = ET.XML('text')
self.assertEqual(serialize(e),
'text')
self.assertEqual(serialize(e, method="html"),
'text')
def test_entity(self):
# Test entity handling.
# 1) good entities
e = ET.XML("test")
self.assertEqual(serialize(e, encoding="us-ascii"),
b'test')
self.serialize_check(e, 'test')
# 2) bad entities
with self.assertRaises(ET.ParseError) as cm:
ET.XML("&entity;")
self.assertEqual(str(cm.exception),
'undefined entity: line 1, column 10')
with self.assertRaises(ET.ParseError) as cm:
ET.XML(ENTITY_XML)
self.assertEqual(str(cm.exception),
'undefined entity &entity;: line 5, column 10')
# 3) custom entity
parser = ET.XMLParser()
parser.entity["entity"] = "text"
parser.feed(ENTITY_XML)
root = parser.close()
self.serialize_check(root, 'text')
# 4) external (SYSTEM) entity
with self.assertRaises(ET.ParseError) as cm:
ET.XML(EXTERNAL_ENTITY_XML)
self.assertEqual(str(cm.exception),
'undefined entity &entity;: line 4, column 10')
def test_namespace(self):
# Test namespace issues.
# 1) xml namespace
elem = ET.XML("")
self.serialize_check(elem, '') # 1.1
# 2) other "well-known" namespaces
elem = ET.XML("")
self.serialize_check(elem,
'') # 2.1
elem = ET.XML("")
self.serialize_check(elem,
'') # 2.2
elem = ET.XML("")
self.serialize_check(elem,
'') # 2.3
# 3) unknown namespaces
elem = ET.XML(SAMPLE_XML_NS)
self.serialize_check(elem,
'\n'
' text\n'
' \n'
' \n'
' subtext\n'
' \n'
'')
def test_qname(self):
# Test QName handling.
# 1) decorated tags
elem = ET.Element("{uri}tag")
self.serialize_check(elem, '') # 1.1
elem = ET.Element(ET.QName("{uri}tag"))
self.serialize_check(elem, '') # 1.2
elem = ET.Element(ET.QName("uri", "tag"))
self.serialize_check(elem, '') # 1.3
elem = ET.Element(ET.QName("uri", "tag"))
subelem = ET.SubElement(elem, ET.QName("uri", "tag1"))
subelem = ET.SubElement(elem, ET.QName("uri", "tag2"))
self.serialize_check(elem,
'') # 1.4
# 2) decorated attributes
elem.clear()
elem.attrib["{uri}key"] = "value"
self.serialize_check(elem,
'') # 2.1
elem.clear()
elem.attrib[ET.QName("{uri}key")] = "value"
self.serialize_check(elem,
'') # 2.2
# 3) decorated values are not converted by default, but the
# QName wrapper can be used for values
elem.clear()
elem.attrib["{uri}key"] = "{uri}value"
self.serialize_check(elem,
'') # 3.1
elem.clear()
elem.attrib["{uri}key"] = ET.QName("{uri}value")
self.serialize_check(elem,
'') # 3.2
elem.clear()
subelem = ET.Element("tag")
subelem.attrib["{uri1}key"] = ET.QName("{uri2}value")
elem.append(subelem)
elem.append(subelem)
self.serialize_check(elem,
''
''
''
'') # 3.3
# 4) Direct QName tests
self.assertEqual(str(ET.QName('ns', 'tag')), '{ns}tag')
self.assertEqual(str(ET.QName('{ns}tag')), '{ns}tag')
q1 = ET.QName('ns', 'tag')
q2 = ET.QName('ns', 'tag')
self.assertEqual(q1, q2)
q2 = ET.QName('ns', 'other-tag')
self.assertNotEqual(q1, q2)
self.assertNotEqual(q1, 'ns:tag')
self.assertEqual(q1, '{ns}tag')
def test_doctype_public(self):
# Test PUBLIC doctype.
elem = ET.XML(''
'text')
def test_xpath_tokenizer(self):
# Test the XPath tokenizer.
from xml.etree import ElementPath
def check(p, expected):
self.assertEqual([op or tag
for op, tag in ElementPath.xpath_tokenizer(p)],
expected)
# tests from the xml specification
check("*", ['*'])
check("text()", ['text', '()'])
check("@name", ['@', 'name'])
check("@*", ['@', '*'])
check("para[1]", ['para', '[', '1', ']'])
check("para[last()]", ['para', '[', 'last', '()', ']'])
check("*/para", ['*', '/', 'para'])
check("/doc/chapter[5]/section[2]",
['/', 'doc', '/', 'chapter', '[', '5', ']',
'/', 'section', '[', '2', ']'])
check("chapter//para", ['chapter', '//', 'para'])
check("//para", ['//', 'para'])
check("//olist/item", ['//', 'olist', '/', 'item'])
check(".", ['.'])
check(".//para", ['.', '//', 'para'])
check("..", ['..'])
check("../@lang", ['..', '/', '@', 'lang'])
check("chapter[title]", ['chapter', '[', 'title', ']'])
check("employee[@secretary and @assistant]", ['employee',
'[', '@', 'secretary', '', 'and', '', '@', 'assistant', ']'])
# additional tests
check("{http://spam}egg", ['{http://spam}egg'])
check("./spam.egg", ['.', '/', 'spam.egg'])
check(".//{http://spam}egg", ['.', '//', '{http://spam}egg'])
def test_processinginstruction(self):
# Test ProcessingInstruction directly
self.assertEqual(ET.tostring(ET.ProcessingInstruction('test', 'instruction')),
b'')
self.assertEqual(ET.tostring(ET.PI('test', 'instruction')),
b'')
# Issue #2746
self.assertEqual(ET.tostring(ET.PI('test', '')),
b'?>')
self.assertEqual(ET.tostring(ET.PI('test', '\xe3'), 'latin-1'),
b"\n"
b"\xe3?>")
def test_html_empty_elems_serialization(self):
# issue 15970
# from http://www.w3.org/TR/html401/index/elements.html
for element in ['AREA', 'BASE', 'BASEFONT', 'BR', 'COL', 'FRAME', 'HR',
'IMG', 'INPUT', 'ISINDEX', 'LINK', 'META', 'PARAM']:
for elem in [element, element.lower()]:
expected = '<%s>' % elem
serialized = serialize(ET.XML('<%s />' % elem), method='html')
self.assertEqual(serialized, expected)
serialized = serialize(ET.XML('<%s>%s>' % (elem,elem)),
method='html')
self.assertEqual(serialized, expected)
class XMLPullParserTest(unittest.TestCase):
def _feed(self, parser, data, chunk_size=None):
if chunk_size is None:
parser.feed(data)
else:
for i in range(0, len(data), chunk_size):
parser.feed(data[i:i+chunk_size])
def assert_event_tags(self, parser, expected):
events = parser.read_events()
self.assertEqual([(action, elem.tag) for action, elem in events],
expected)
def test_simple_xml(self):
for chunk_size in (None, 1, 5):
with self.subTest(chunk_size=chunk_size):
parser = ET.XMLPullParser()
self.assert_event_tags(parser, [])
self._feed(parser, "\n", chunk_size)
self.assert_event_tags(parser, [])
self._feed(parser,
"\n text\n", chunk_size)
self.assert_event_tags(parser, [('end', 'element')])
self._feed(parser, "texttail\n", chunk_size)
self._feed(parser, "\n", chunk_size)
self.assert_event_tags(parser, [
('end', 'element'),
('end', 'empty-element'),
])
self._feed(parser, "\n", chunk_size)
self.assert_event_tags(parser, [('end', 'root')])
self.assertIsNone(parser.close())
def test_feed_while_iterating(self):
parser = ET.XMLPullParser()
it = parser.read_events()
self._feed(parser, "\n text\n")
action, elem = next(it)
self.assertEqual((action, elem.tag), ('end', 'element'))
self._feed(parser, "\n")
action, elem = next(it)
self.assertEqual((action, elem.tag), ('end', 'root'))
with self.assertRaises(StopIteration):
next(it)
def test_simple_xml_with_ns(self):
parser = ET.XMLPullParser()
self.assert_event_tags(parser, [])
self._feed(parser, "\n")
self.assert_event_tags(parser, [])
self._feed(parser, "\n")
self.assert_event_tags(parser, [])
self._feed(parser, "text\n")
self.assert_event_tags(parser, [('end', '{namespace}element')])
self._feed(parser, "texttail\n")
self._feed(parser, "\n")
self.assert_event_tags(parser, [
('end', '{namespace}element'),
('end', '{namespace}empty-element'),
])
self._feed(parser, "\n")
self.assert_event_tags(parser, [('end', '{namespace}root')])
self.assertIsNone(parser.close())
def test_ns_events(self):
parser = ET.XMLPullParser(events=('start-ns', 'end-ns'))
self._feed(parser, "\n")
self._feed(parser, "\n")
self.assertEqual(
list(parser.read_events()),
[('start-ns', ('', 'namespace'))])
self._feed(parser, "text\n")
self._feed(parser, "texttail\n")
self._feed(parser, "\n")
self._feed(parser, "\n")
self.assertEqual(list(parser.read_events()), [('end-ns', None)])
self.assertIsNone(parser.close())
def test_events(self):
parser = ET.XMLPullParser(events=())
self._feed(parser, "\n")
self.assert_event_tags(parser, [])
parser = ET.XMLPullParser(events=('start', 'end'))
self._feed(parser, "\n")
self.assert_event_tags(parser, [])
self._feed(parser, "\n")
self.assert_event_tags(parser, [('start', 'root')])
self._feed(parser, "text\n")
self.assert_event_tags(parser, [('end', 'element')])
self._feed(parser,
"texttail\n")
self.assert_event_tags(parser, [
('start', '{foo}element'),
('start', '{foo}empty-element'),
('end', '{foo}empty-element'),
('end', '{foo}element'),
])
self._feed(parser, "")
self.assertIsNone(parser.close())
self.assert_event_tags(parser, [('end', 'root')])
parser = ET.XMLPullParser(events=('start',))
self._feed(parser, "\n")
self.assert_event_tags(parser, [])
self._feed(parser, "\n")
self.assert_event_tags(parser, [('start', 'root')])
self._feed(parser, "text\n")
self.assert_event_tags(parser, [])
self._feed(parser,
"texttail\n")
self.assert_event_tags(parser, [
('start', '{foo}element'),
('start', '{foo}empty-element'),
])
self._feed(parser, "")
self.assertIsNone(parser.close())
def test_events_sequence(self):
# Test that events can be some sequence that's not just a tuple or list
eventset = {'end', 'start'}
parser = ET.XMLPullParser(events=eventset)
self._feed(parser, "bar")
self.assert_event_tags(parser, [('start', 'foo'), ('end', 'foo')])
class DummyIter:
def __init__(self):
self.events = iter(['start', 'end', 'start-ns'])
def __iter__(self):
return self
def __next__(self):
return next(self.events)
parser = ET.XMLPullParser(events=DummyIter())
self._feed(parser, "bar")
self.assert_event_tags(parser, [('start', 'foo'), ('end', 'foo')])
def test_unknown_event(self):
with self.assertRaises(ValueError):
ET.XMLPullParser(events=('start', 'end', 'bogus'))
#
# xinclude tests (samples from appendix C of the xinclude specification)
XINCLUDE = {}
XINCLUDE["C1.xml"] = """\
120 Mz is adequate for an average home user.
"""
XINCLUDE["disclaimer.xml"] = """\
The opinions represented herein represent those of the individual
and should not be interpreted as official policy endorsed by this
organization.
"""
class XIncludeTest(unittest.TestCase):
def xinclude_loader(self, href, parse="xml", encoding=None):
try:
data = XINCLUDE[href]
except KeyError:
raise OSError("resource not found")
if parse == "xml":
data = ET.XML(data)
return data
def none_loader(self, href, parser, encoding=None):
return None
def _my_loader(self, href, parse):
# Used to avoid a test-dependency problem where the default loader
# of ElementInclude uses the pyET parser for cET tests.
if parse == 'xml':
with open(href, 'rb') as f:
return ET.parse(f).getroot()
else:
return None
def test_xinclude_default(self):
from xml.etree import ElementInclude
doc = self.xinclude_loader('default.xml')
ElementInclude.include(doc, self._my_loader)
self.assertEqual(serialize(doc),
'\n'
'
The opinions represented herein represent those of the individual\n'
' and should not be interpreted as official policy endorsed by this\n'
' organization.