bpo-20928: support base-URL and recursive includes in etree.ElementInclude (#5723)
* bpo-20928: bring elementtree's XInclude support en-par with the implementation in lxml by adding support for recursive includes and a base-URL. * bpo-20928: Support xincluding the same file multiple times, just not recursively. * bpo-20928: Add 'max_depth' parameter to xinclude that limits the maximum recursion depth to 6 by default. * Add news entry for updated ElementInclude support
This commit is contained in:
parent
ded8888fbc
commit
c6a7bdb356
|
@ -1668,6 +1668,17 @@ XINCLUDE["default.xml"] = """\
|
||||||
</document>
|
</document>
|
||||||
""".format(html.escape(SIMPLE_XMLFILE, True))
|
""".format(html.escape(SIMPLE_XMLFILE, True))
|
||||||
|
|
||||||
|
XINCLUDE["include_c1_repeated.xml"] = """\
|
||||||
|
<?xml version='1.0'?>
|
||||||
|
<document xmlns:xi="http://www.w3.org/2001/XInclude">
|
||||||
|
<p>The following is the source code of Recursive1.xml:</p>
|
||||||
|
<xi:include href="C1.xml"/>
|
||||||
|
<xi:include href="C1.xml"/>
|
||||||
|
<xi:include href="C1.xml"/>
|
||||||
|
<xi:include href="C1.xml"/>
|
||||||
|
</document>
|
||||||
|
"""
|
||||||
|
|
||||||
#
|
#
|
||||||
# badly formatted xi:include tags
|
# badly formatted xi:include tags
|
||||||
|
|
||||||
|
@ -1688,6 +1699,31 @@ XINCLUDE_BAD["B2.xml"] = """\
|
||||||
</div>
|
</div>
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
XINCLUDE["Recursive1.xml"] = """\
|
||||||
|
<?xml version='1.0'?>
|
||||||
|
<document xmlns:xi="http://www.w3.org/2001/XInclude">
|
||||||
|
<p>The following is the source code of Recursive2.xml:</p>
|
||||||
|
<xi:include href="Recursive2.xml"/>
|
||||||
|
</document>
|
||||||
|
"""
|
||||||
|
|
||||||
|
XINCLUDE["Recursive2.xml"] = """\
|
||||||
|
<?xml version='1.0'?>
|
||||||
|
<document xmlns:xi="http://www.w3.org/2001/XInclude">
|
||||||
|
<p>The following is the source code of Recursive3.xml:</p>
|
||||||
|
<xi:include href="Recursive3.xml"/>
|
||||||
|
</document>
|
||||||
|
"""
|
||||||
|
|
||||||
|
XINCLUDE["Recursive3.xml"] = """\
|
||||||
|
<?xml version='1.0'?>
|
||||||
|
<document xmlns:xi="http://www.w3.org/2001/XInclude">
|
||||||
|
<p>The following is the source code of Recursive1.xml:</p>
|
||||||
|
<xi:include href="Recursive1.xml"/>
|
||||||
|
</document>
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
class XIncludeTest(unittest.TestCase):
|
class XIncludeTest(unittest.TestCase):
|
||||||
|
|
||||||
def xinclude_loader(self, href, parse="xml", encoding=None):
|
def xinclude_loader(self, href, parse="xml", encoding=None):
|
||||||
|
@ -1789,6 +1825,13 @@ class XIncludeTest(unittest.TestCase):
|
||||||
' </ns0:include>\n'
|
' </ns0:include>\n'
|
||||||
'</div>') # C5
|
'</div>') # C5
|
||||||
|
|
||||||
|
def test_xinclude_repeated(self):
|
||||||
|
from xml.etree import ElementInclude
|
||||||
|
|
||||||
|
document = self.xinclude_loader("include_c1_repeated.xml")
|
||||||
|
ElementInclude.include(document, self.xinclude_loader)
|
||||||
|
self.assertEqual(1+4*2, len(document.findall(".//p")))
|
||||||
|
|
||||||
def test_xinclude_failures(self):
|
def test_xinclude_failures(self):
|
||||||
from xml.etree import ElementInclude
|
from xml.etree import ElementInclude
|
||||||
|
|
||||||
|
@ -1821,6 +1864,45 @@ class XIncludeTest(unittest.TestCase):
|
||||||
"xi:fallback tag must be child of xi:include "
|
"xi:fallback tag must be child of xi:include "
|
||||||
"('{http://www.w3.org/2001/XInclude}fallback')")
|
"('{http://www.w3.org/2001/XInclude}fallback')")
|
||||||
|
|
||||||
|
# Test infinitely recursive includes.
|
||||||
|
document = self.xinclude_loader("Recursive1.xml")
|
||||||
|
with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
|
||||||
|
ElementInclude.include(document, self.xinclude_loader)
|
||||||
|
self.assertEqual(str(cm.exception),
|
||||||
|
"recursive include of Recursive2.xml")
|
||||||
|
|
||||||
|
# Test 'max_depth' limitation.
|
||||||
|
document = self.xinclude_loader("Recursive1.xml")
|
||||||
|
with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
|
||||||
|
ElementInclude.include(document, self.xinclude_loader, max_depth=None)
|
||||||
|
self.assertEqual(str(cm.exception),
|
||||||
|
"recursive include of Recursive2.xml")
|
||||||
|
|
||||||
|
document = self.xinclude_loader("Recursive1.xml")
|
||||||
|
with self.assertRaises(ElementInclude.LimitedRecursiveIncludeError) as cm:
|
||||||
|
ElementInclude.include(document, self.xinclude_loader, max_depth=0)
|
||||||
|
self.assertEqual(str(cm.exception),
|
||||||
|
"maximum xinclude depth reached when including file Recursive2.xml")
|
||||||
|
|
||||||
|
document = self.xinclude_loader("Recursive1.xml")
|
||||||
|
with self.assertRaises(ElementInclude.LimitedRecursiveIncludeError) as cm:
|
||||||
|
ElementInclude.include(document, self.xinclude_loader, max_depth=1)
|
||||||
|
self.assertEqual(str(cm.exception),
|
||||||
|
"maximum xinclude depth reached when including file Recursive3.xml")
|
||||||
|
|
||||||
|
document = self.xinclude_loader("Recursive1.xml")
|
||||||
|
with self.assertRaises(ElementInclude.LimitedRecursiveIncludeError) as cm:
|
||||||
|
ElementInclude.include(document, self.xinclude_loader, max_depth=2)
|
||||||
|
self.assertEqual(str(cm.exception),
|
||||||
|
"maximum xinclude depth reached when including file Recursive1.xml")
|
||||||
|
|
||||||
|
document = self.xinclude_loader("Recursive1.xml")
|
||||||
|
with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
|
||||||
|
ElementInclude.include(document, self.xinclude_loader, max_depth=3)
|
||||||
|
self.assertEqual(str(cm.exception),
|
||||||
|
"recursive include of Recursive2.xml")
|
||||||
|
|
||||||
|
|
||||||
# --------------------------------------------------------------------
|
# --------------------------------------------------------------------
|
||||||
# reported bugs
|
# reported bugs
|
||||||
|
|
||||||
|
|
|
@ -50,18 +50,28 @@
|
||||||
|
|
||||||
import copy
|
import copy
|
||||||
from . import ElementTree
|
from . import ElementTree
|
||||||
|
from urllib.parse import urljoin
|
||||||
|
|
||||||
XINCLUDE = "{http://www.w3.org/2001/XInclude}"
|
XINCLUDE = "{http://www.w3.org/2001/XInclude}"
|
||||||
|
|
||||||
XINCLUDE_INCLUDE = XINCLUDE + "include"
|
XINCLUDE_INCLUDE = XINCLUDE + "include"
|
||||||
XINCLUDE_FALLBACK = XINCLUDE + "fallback"
|
XINCLUDE_FALLBACK = XINCLUDE + "fallback"
|
||||||
|
|
||||||
|
# For security reasons, the inclusion depth is limited to this read-only value by default.
|
||||||
|
DEFAULT_MAX_INCLUSION_DEPTH = 6
|
||||||
|
|
||||||
|
|
||||||
##
|
##
|
||||||
# Fatal include error.
|
# Fatal include error.
|
||||||
|
|
||||||
class FatalIncludeError(SyntaxError):
|
class FatalIncludeError(SyntaxError):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class LimitedRecursiveIncludeError(FatalIncludeError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
##
|
##
|
||||||
# Default loader. This loader reads an included resource from disk.
|
# Default loader. This loader reads an included resource from disk.
|
||||||
#
|
#
|
||||||
|
@ -92,13 +102,33 @@ def default_loader(href, parse, encoding=None):
|
||||||
# @param loader Optional resource loader. If omitted, it defaults
|
# @param loader Optional resource loader. If omitted, it defaults
|
||||||
# to {@link default_loader}. If given, it should be a callable
|
# to {@link default_loader}. If given, it should be a callable
|
||||||
# that implements the same interface as <b>default_loader</b>.
|
# that implements the same interface as <b>default_loader</b>.
|
||||||
|
# @param base_url The base URL of the original file, to resolve
|
||||||
|
# relative include file references.
|
||||||
|
# @param max_depth The maximum number of recursive inclusions.
|
||||||
|
# Limited to reduce the risk of malicious content explosion.
|
||||||
|
# Pass a negative value to disable the limitation.
|
||||||
|
# @throws LimitedRecursiveIncludeError If the {@link max_depth} was exceeded.
|
||||||
# @throws FatalIncludeError If the function fails to include a given
|
# @throws FatalIncludeError If the function fails to include a given
|
||||||
# resource, or if the tree contains malformed XInclude elements.
|
# resource, or if the tree contains malformed XInclude elements.
|
||||||
# @throws OSError If the function fails to load a given resource.
|
# @throws IOError If the function fails to load a given resource.
|
||||||
|
# @returns the node or its replacement if it was an XInclude node
|
||||||
|
|
||||||
def include(elem, loader=None):
|
def include(elem, loader=None, base_url=None,
|
||||||
|
max_depth=DEFAULT_MAX_INCLUSION_DEPTH):
|
||||||
|
if max_depth is None:
|
||||||
|
max_depth = -1
|
||||||
|
elif max_depth < 0:
|
||||||
|
raise ValueError("expected non-negative depth or None for 'max_depth', got %r" % max_depth)
|
||||||
|
|
||||||
|
if hasattr(elem, 'getroot'):
|
||||||
|
elem = elem.getroot()
|
||||||
if loader is None:
|
if loader is None:
|
||||||
loader = default_loader
|
loader = default_loader
|
||||||
|
|
||||||
|
_include(elem, loader, base_url, max_depth, set())
|
||||||
|
|
||||||
|
|
||||||
|
def _include(elem, loader, base_url, max_depth, _parent_hrefs):
|
||||||
# look for xinclude elements
|
# look for xinclude elements
|
||||||
i = 0
|
i = 0
|
||||||
while i < len(elem):
|
while i < len(elem):
|
||||||
|
@ -106,14 +136,24 @@ def include(elem, loader=None):
|
||||||
if e.tag == XINCLUDE_INCLUDE:
|
if e.tag == XINCLUDE_INCLUDE:
|
||||||
# process xinclude directive
|
# process xinclude directive
|
||||||
href = e.get("href")
|
href = e.get("href")
|
||||||
|
if base_url:
|
||||||
|
href = urljoin(base_url, href)
|
||||||
parse = e.get("parse", "xml")
|
parse = e.get("parse", "xml")
|
||||||
if parse == "xml":
|
if parse == "xml":
|
||||||
|
if href in _parent_hrefs:
|
||||||
|
raise FatalIncludeError("recursive include of %s" % href)
|
||||||
|
if max_depth == 0:
|
||||||
|
raise LimitedRecursiveIncludeError(
|
||||||
|
"maximum xinclude depth reached when including file %s" % href)
|
||||||
|
_parent_hrefs.add(href)
|
||||||
node = loader(href, parse)
|
node = loader(href, parse)
|
||||||
if node is None:
|
if node is None:
|
||||||
raise FatalIncludeError(
|
raise FatalIncludeError(
|
||||||
"cannot load %r as %r" % (href, parse)
|
"cannot load %r as %r" % (href, parse)
|
||||||
)
|
)
|
||||||
node = copy.copy(node)
|
node = copy.copy(node) # FIXME: this makes little sense with recursive includes
|
||||||
|
_include(node, loader, href, max_depth - 1, _parent_hrefs)
|
||||||
|
_parent_hrefs.remove(href)
|
||||||
if e.tail:
|
if e.tail:
|
||||||
node.tail = (node.tail or "") + e.tail
|
node.tail = (node.tail or "") + e.tail
|
||||||
elem[i] = node
|
elem[i] = node
|
||||||
|
@ -123,11 +163,13 @@ def include(elem, loader=None):
|
||||||
raise FatalIncludeError(
|
raise FatalIncludeError(
|
||||||
"cannot load %r as %r" % (href, parse)
|
"cannot load %r as %r" % (href, parse)
|
||||||
)
|
)
|
||||||
|
if e.tail:
|
||||||
|
text += e.tail
|
||||||
if i:
|
if i:
|
||||||
node = elem[i-1]
|
node = elem[i-1]
|
||||||
node.tail = (node.tail or "") + text + (e.tail or "")
|
node.tail = (node.tail or "") + text
|
||||||
else:
|
else:
|
||||||
elem.text = (elem.text or "") + text + (e.tail or "")
|
elem.text = (elem.text or "") + text
|
||||||
del elem[i]
|
del elem[i]
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
|
@ -139,5 +181,5 @@ def include(elem, loader=None):
|
||||||
"xi:fallback tag must be child of xi:include (%r)" % e.tag
|
"xi:fallback tag must be child of xi:include (%r)" % e.tag
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
include(e, loader)
|
_include(e, loader, base_url, max_depth, _parent_hrefs)
|
||||||
i = i + 1
|
i += 1
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
ElementTree supports recursive XInclude processing. Patch by Stefan Behnel.
|
Loading…
Reference in New Issue