bpo-13611: C14N 2.0 implementation for ElementTree (GH-12966)
* Implement C14N 2.0 as a new canonicalize() function in ElementTree. Missing features: - prefix renaming in XPath expressions (tag and attribute text is supported) - preservation of original prefixes given redundant namespace declarations
This commit is contained in:
parent
ee88af3f4f
commit
e1d5dd645d
|
@ -465,6 +465,53 @@ Reference
|
|||
Functions
|
||||
^^^^^^^^^
|
||||
|
||||
.. function:: canonicalize(xml_data=None, *, out=None, from_file=None, **options)
|
||||
|
||||
`C14N 2.0 <https://www.w3.org/TR/xml-c14n2/>`_ transformation function.
|
||||
|
||||
Canonicalization is a way to normalise XML output in a way that allows
|
||||
byte-by-byte comparisons and digital signatures. It reduced the freedom
|
||||
that XML serializers have and instead generates a more constrained XML
|
||||
representation. The main restrictions regard the placement of namespace
|
||||
declarations, the ordering of attributes, and ignorable whitespace.
|
||||
|
||||
This function takes an XML data string (*xml_data*) or a file path or
|
||||
file-like object (*from_file*) as input, converts it to the canonical
|
||||
form, and writes it out using the *out* file(-like) object, if provided,
|
||||
or returns it as a text string if not. The output file receives text,
|
||||
not bytes. It should therefore be opened in text mode with ``utf-8``
|
||||
encoding.
|
||||
|
||||
Typical uses::
|
||||
|
||||
xml_data = "<root>...</root>"
|
||||
print(canonicalize(xml_data))
|
||||
|
||||
with open("c14n_output.xml", mode='w', encoding='utf-8') as out_file:
|
||||
canonicalize(xml_data, out=out_file)
|
||||
|
||||
with open("c14n_output.xml", mode='w', encoding='utf-8') as out_file:
|
||||
canonicalize(from_file="inputfile.xml", out=out_file)
|
||||
|
||||
The configuration *options* are as follows:
|
||||
|
||||
- *with_comments*: set to true to include comments (default: false)
|
||||
- *strip_text*: set to true to strip whitespace before and after text content
|
||||
(default: false)
|
||||
- *rewrite_prefixes*: set to true to replace namespace prefixes by "n{number}"
|
||||
(default: false)
|
||||
- *qname_aware_tags*: a set of qname aware tag names in which prefixes
|
||||
should be replaced in text content (default: empty)
|
||||
- *qname_aware_attrs*: a set of qname aware attribute names in which prefixes
|
||||
should be replaced in text content (default: empty)
|
||||
- *exclude_attrs*: a set of attribute names that should not be serialised
|
||||
- *exclude_tags*: a set of tag names that should not be serialised
|
||||
|
||||
In the option list above, "a set" refers to any collection or iterable of
|
||||
strings, no ordering is expected.
|
||||
|
||||
.. versionadded:: 3.8
|
||||
|
||||
|
||||
.. function:: Comment(text=None)
|
||||
|
||||
|
@ -1114,6 +1161,19 @@ TreeBuilder Objects
|
|||
.. versionadded:: 3.8
|
||||
|
||||
|
||||
.. class:: C14NWriterTarget(write, *, \
|
||||
with_comments=False, strip_text=False, rewrite_prefixes=False, \
|
||||
qname_aware_tags=None, qname_aware_attrs=None, \
|
||||
exclude_attrs=None, exclude_tags=None)
|
||||
|
||||
A `C14N 2.0 <https://www.w3.org/TR/xml-c14n2/>`_ writer. Arguments are the
|
||||
same as for the :func:`canonicalize` function. This class does not build a
|
||||
tree but translates the callback events directly into a serialised form
|
||||
using the *write* function.
|
||||
|
||||
.. versionadded:: 3.8
|
||||
|
||||
|
||||
.. _elementtree-xmlparser-objects:
|
||||
|
||||
XMLParser Objects
|
||||
|
|
|
@ -525,6 +525,10 @@ xml
|
|||
external entities by default.
|
||||
(Contributed by Christian Heimes in :issue:`17239`.)
|
||||
|
||||
* The :mod:`xml.etree.ElementTree` module provides a new function
|
||||
:func:`–xml.etree.ElementTree.canonicalize()` that implements C14N 2.0.
|
||||
(Contributed by Stefan Behnel in :issue:`13611`.)
|
||||
|
||||
|
||||
Optimizations
|
||||
=============
|
||||
|
|
|
@ -12,6 +12,7 @@ import io
|
|||
import itertools
|
||||
import locale
|
||||
import operator
|
||||
import os
|
||||
import pickle
|
||||
import sys
|
||||
import textwrap
|
||||
|
@ -20,6 +21,7 @@ import unittest
|
|||
import warnings
|
||||
import weakref
|
||||
|
||||
from functools import partial
|
||||
from itertools import product, islice
|
||||
from test import support
|
||||
from test.support import TESTFN, findfile, import_fresh_module, gc_collect, swap_attr
|
||||
|
@ -3527,6 +3529,231 @@ class NoAcceleratorTest(unittest.TestCase):
|
|||
self.assertIsInstance(pyET.Element.__init__, types.FunctionType)
|
||||
self.assertIsInstance(pyET.XMLParser.__init__, types.FunctionType)
|
||||
|
||||
|
||||
# --------------------------------------------------------------------
|
||||
|
||||
def c14n_roundtrip(xml, **options):
|
||||
return pyET.canonicalize(xml, **options)
|
||||
|
||||
|
||||
class C14NTest(unittest.TestCase):
|
||||
maxDiff = None
|
||||
|
||||
#
|
||||
# simple roundtrip tests (from c14n.py)
|
||||
|
||||
def test_simple_roundtrip(self):
|
||||
# Basics
|
||||
self.assertEqual(c14n_roundtrip("<doc/>"), '<doc></doc>')
|
||||
self.assertEqual(c14n_roundtrip("<doc xmlns='uri'/>"), # FIXME
|
||||
'<doc xmlns="uri"></doc>')
|
||||
self.assertEqual(c14n_roundtrip("<prefix:doc xmlns:prefix='uri'/>"),
|
||||
'<prefix:doc xmlns:prefix="uri"></prefix:doc>')
|
||||
self.assertEqual(c14n_roundtrip("<doc xmlns:prefix='uri'><prefix:bar/></doc>"),
|
||||
'<doc><prefix:bar xmlns:prefix="uri"></prefix:bar></doc>')
|
||||
self.assertEqual(c14n_roundtrip("<elem xmlns:wsu='http://docs.oasis-open.org/wss/2004/01/oasis-200401-wss-wssecurity-utility-1.0.xsd' xmlns:SOAP-ENV='http://schemas.xmlsoap.org/soap/envelope/' />"),
|
||||
'<elem></elem>')
|
||||
|
||||
# C14N spec
|
||||
self.assertEqual(c14n_roundtrip("<doc>Hello, world!<!-- Comment 1 --></doc>"),
|
||||
'<doc>Hello, world!</doc>')
|
||||
self.assertEqual(c14n_roundtrip("<value>2</value>"),
|
||||
'<value>2</value>')
|
||||
self.assertEqual(c14n_roundtrip('<compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>'),
|
||||
'<compute>value>"0" && value<"10" ?"valid":"error"</compute>')
|
||||
self.assertEqual(c14n_roundtrip('''<compute expr='value>"0" && value<"10" ?"valid":"error"'>valid</compute>'''),
|
||||
'<compute expr="value>"0" && value<"10" ?"valid":"error"">valid</compute>')
|
||||
self.assertEqual(c14n_roundtrip("<norm attr=' '   
	 ' '/>"),
|
||||
'<norm attr=" \' 
	 \' "></norm>')
|
||||
self.assertEqual(c14n_roundtrip("<normNames attr=' A   
	 B '/>"),
|
||||
'<normNames attr=" A 
	 B "></normNames>')
|
||||
self.assertEqual(c14n_roundtrip("<normId id=' '   
	 ' '/>"),
|
||||
'<normId id=" \' 
	 \' "></normId>')
|
||||
|
||||
# fragments from PJ's tests
|
||||
#self.assertEqual(c14n_roundtrip("<doc xmlns:x='http://example.com/x' xmlns='http://example.com/default'><b y:a1='1' xmlns='http://example.com/default' a3='3' xmlns:y='http://example.com/y' y:a2='2'/></doc>"),
|
||||
#'<doc xmlns:x="http://example.com/x"><b xmlns:y="http://example.com/y" a3="3" y:a1="1" y:a2="2"></b></doc>')
|
||||
|
||||
def test_c14n_exclusion(self):
|
||||
xml = textwrap.dedent("""\
|
||||
<root xmlns:x="http://example.com/x">
|
||||
<a x:attr="attrx">
|
||||
<b>abtext</b>
|
||||
</a>
|
||||
<b>btext</b>
|
||||
<c>
|
||||
<x:d>dtext</x:d>
|
||||
</c>
|
||||
</root>
|
||||
""")
|
||||
self.assertEqual(
|
||||
c14n_roundtrip(xml, strip_text=True),
|
||||
'<root>'
|
||||
'<a xmlns:x="http://example.com/x" x:attr="attrx"><b>abtext</b></a>'
|
||||
'<b>btext</b>'
|
||||
'<c><x:d xmlns:x="http://example.com/x">dtext</x:d></c>'
|
||||
'</root>')
|
||||
self.assertEqual(
|
||||
c14n_roundtrip(xml, strip_text=True, exclude_attrs=['{http://example.com/x}attr']),
|
||||
'<root>'
|
||||
'<a><b>abtext</b></a>'
|
||||
'<b>btext</b>'
|
||||
'<c><x:d xmlns:x="http://example.com/x">dtext</x:d></c>'
|
||||
'</root>')
|
||||
self.assertEqual(
|
||||
c14n_roundtrip(xml, strip_text=True, exclude_tags=['{http://example.com/x}d']),
|
||||
'<root>'
|
||||
'<a xmlns:x="http://example.com/x" x:attr="attrx"><b>abtext</b></a>'
|
||||
'<b>btext</b>'
|
||||
'<c></c>'
|
||||
'</root>')
|
||||
self.assertEqual(
|
||||
c14n_roundtrip(xml, strip_text=True, exclude_attrs=['{http://example.com/x}attr'],
|
||||
exclude_tags=['{http://example.com/x}d']),
|
||||
'<root>'
|
||||
'<a><b>abtext</b></a>'
|
||||
'<b>btext</b>'
|
||||
'<c></c>'
|
||||
'</root>')
|
||||
self.assertEqual(
|
||||
c14n_roundtrip(xml, strip_text=True, exclude_tags=['a', 'b']),
|
||||
'<root>'
|
||||
'<c><x:d xmlns:x="http://example.com/x">dtext</x:d></c>'
|
||||
'</root>')
|
||||
self.assertEqual(
|
||||
c14n_roundtrip(xml, exclude_tags=['a', 'b']),
|
||||
'<root>\n'
|
||||
' \n'
|
||||
' \n'
|
||||
' <c>\n'
|
||||
' <x:d xmlns:x="http://example.com/x">dtext</x:d>\n'
|
||||
' </c>\n'
|
||||
'</root>')
|
||||
self.assertEqual(
|
||||
c14n_roundtrip(xml, strip_text=True, exclude_tags=['{http://example.com/x}d', 'b']),
|
||||
'<root>'
|
||||
'<a xmlns:x="http://example.com/x" x:attr="attrx"></a>'
|
||||
'<c></c>'
|
||||
'</root>')
|
||||
self.assertEqual(
|
||||
c14n_roundtrip(xml, exclude_tags=['{http://example.com/x}d', 'b']),
|
||||
'<root>\n'
|
||||
' <a xmlns:x="http://example.com/x" x:attr="attrx">\n'
|
||||
' \n'
|
||||
' </a>\n'
|
||||
' \n'
|
||||
' <c>\n'
|
||||
' \n'
|
||||
' </c>\n'
|
||||
'</root>')
|
||||
|
||||
#
|
||||
# basic method=c14n tests from the c14n 2.0 specification. uses
|
||||
# test files under xmltestdata/c14n-20.
|
||||
|
||||
# note that this uses generated C14N versions of the standard ET.write
|
||||
# output, not roundtripped C14N (see above).
|
||||
|
||||
def test_xml_c14n2(self):
|
||||
datadir = findfile("c14n-20", subdir="xmltestdata")
|
||||
full_path = partial(os.path.join, datadir)
|
||||
|
||||
files = [filename[:-4] for filename in sorted(os.listdir(datadir))
|
||||
if filename.endswith('.xml')]
|
||||
input_files = [
|
||||
filename for filename in files
|
||||
if filename.startswith('in')
|
||||
]
|
||||
configs = {
|
||||
filename: {
|
||||
# <c14n2:PrefixRewrite>sequential</c14n2:PrefixRewrite>
|
||||
option.tag.split('}')[-1]: ((option.text or '').strip(), option)
|
||||
for option in ET.parse(full_path(filename) + ".xml").getroot()
|
||||
}
|
||||
for filename in files
|
||||
if filename.startswith('c14n')
|
||||
}
|
||||
|
||||
tests = {
|
||||
input_file: [
|
||||
(filename, configs[filename.rsplit('_', 1)[-1]])
|
||||
for filename in files
|
||||
if filename.startswith(f'out_{input_file}_')
|
||||
and filename.rsplit('_', 1)[-1] in configs
|
||||
]
|
||||
for input_file in input_files
|
||||
}
|
||||
|
||||
# Make sure we found all test cases.
|
||||
self.assertEqual(30, len([
|
||||
output_file for output_files in tests.values()
|
||||
for output_file in output_files]))
|
||||
|
||||
def get_option(config, option_name, default=None):
|
||||
return config.get(option_name, (default, ()))[0]
|
||||
|
||||
for input_file, output_files in tests.items():
|
||||
for output_file, config in output_files:
|
||||
keep_comments = get_option(
|
||||
config, 'IgnoreComments') == 'true' # no, it's right :)
|
||||
strip_text = get_option(
|
||||
config, 'TrimTextNodes') == 'true'
|
||||
rewrite_prefixes = get_option(
|
||||
config, 'PrefixRewrite') == 'sequential'
|
||||
if 'QNameAware' in config:
|
||||
qattrs = [
|
||||
f"{{{el.get('NS')}}}{el.get('Name')}"
|
||||
for el in config['QNameAware'][1].findall(
|
||||
'{http://www.w3.org/2010/xml-c14n2}QualifiedAttr')
|
||||
]
|
||||
qtags = [
|
||||
f"{{{el.get('NS')}}}{el.get('Name')}"
|
||||
for el in config['QNameAware'][1].findall(
|
||||
'{http://www.w3.org/2010/xml-c14n2}Element')
|
||||
]
|
||||
else:
|
||||
qtags = qattrs = None
|
||||
|
||||
# Build subtest description from config.
|
||||
config_descr = ','.join(
|
||||
f"{name}={value or ','.join(c.tag.split('}')[-1] for c in children)}"
|
||||
for name, (value, children) in sorted(config.items())
|
||||
)
|
||||
|
||||
with self.subTest(f"{output_file}({config_descr})"):
|
||||
if input_file == 'inNsRedecl' and not rewrite_prefixes:
|
||||
self.skipTest(
|
||||
f"Redeclared namespace handling is not supported in {output_file}")
|
||||
if input_file == 'inNsSuperfluous' and not rewrite_prefixes:
|
||||
self.skipTest(
|
||||
f"Redeclared namespace handling is not supported in {output_file}")
|
||||
if 'QNameAware' in config and config['QNameAware'][1].find(
|
||||
'{http://www.w3.org/2010/xml-c14n2}XPathElement') is not None:
|
||||
self.skipTest(
|
||||
f"QName rewriting in XPath text is not supported in {output_file}")
|
||||
|
||||
f = full_path(input_file + ".xml")
|
||||
if input_file == 'inC14N5':
|
||||
# Hack: avoid setting up external entity resolution in the parser.
|
||||
with open(full_path('world.txt'), 'rb') as entity_file:
|
||||
with open(f, 'rb') as f:
|
||||
f = io.BytesIO(f.read().replace(b'&ent2;', entity_file.read()))
|
||||
|
||||
text = ET.canonicalize(
|
||||
from_file=f,
|
||||
with_comments=keep_comments,
|
||||
strip_text=strip_text,
|
||||
rewrite_prefixes=rewrite_prefixes,
|
||||
qname_aware_tags=qtags, qname_aware_attrs=qattrs)
|
||||
|
||||
with open(full_path(output_file + ".xml"), 'r', encoding='utf8') as f:
|
||||
expected = f.read()
|
||||
if input_file == 'inC14N3':
|
||||
# FIXME: cET resolves default attributes but ET does not!
|
||||
expected = expected.replace(' attr="default"', '')
|
||||
text = text.replace(' attr="default"', '')
|
||||
self.assertEqual(expected, text)
|
||||
|
||||
# --------------------------------------------------------------------
|
||||
|
||||
|
||||
|
@ -3559,6 +3786,8 @@ def test_main(module=None):
|
|||
XMLParserTest,
|
||||
XMLPullParserTest,
|
||||
BugsTest,
|
||||
KeywordArgsTest,
|
||||
C14NTest,
|
||||
]
|
||||
|
||||
# These tests will only run for the pure-Python version that doesn't import
|
||||
|
|
|
@ -0,0 +1,4 @@
|
|||
<dsig:CanonicalizationMethod xmlns:dsig="http://www.w3.org/2000/09/xmldsig#" xmlns:c14n2="http://www.w3.org/2010/xml-c14n2" Algorithm="http://www.w3.org/2010/xml-c14n2">
|
||||
<c14n2:IgnoreComments>true</c14n2:IgnoreComments>
|
||||
</dsig:CanonicalizationMethod>
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
<dsig:CanonicalizationMethod xmlns:dsig="http://www.w3.org/2000/09/xmldsig#" Algorithm="http://www.w3.org/2010/xml-c14n2">
|
||||
</dsig:CanonicalizationMethod>
|
||||
|
|
@ -0,0 +1,4 @@
|
|||
<dsig:CanonicalizationMethod xmlns:dsig="http://www.w3.org/2000/09/xmldsig#" xmlns:c14n2="http://www.w3.org/2010/xml-c14n2" Algorithm="http://www.w3.org/2010/xml-c14n2">
|
||||
<c14n2:PrefixRewrite>sequential</c14n2:PrefixRewrite>
|
||||
</dsig:CanonicalizationMethod>
|
||||
|
|
@ -0,0 +1,7 @@
|
|||
<dsig:CanonicalizationMethod xmlns:dsig="http://www.w3.org/2000/09/xmldsig#" xmlns:c14n2="http://www.w3.org/2010/xml-c14n2" Algorithm="http://www.w3.org/2010/xml-c14n2">
|
||||
<c14n2:PrefixRewrite>sequential</c14n2:PrefixRewrite>
|
||||
<c14n2:QNameAware>
|
||||
<c14n2:QualifiedAttr Name="type" NS="http://www.w3.org/2001/XMLSchema-instance"/>
|
||||
</c14n2:QNameAware>
|
||||
</dsig:CanonicalizationMethod>
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
<dsig:CanonicalizationMethod xmlns:dsig="http://www.w3.org/2000/09/xmldsig#" xmlns:c14n2="http://www.w3.org/2010/xml-c14n2" Algorithm="http://www.w3.org/2010/xml-c14n2">
|
||||
<c14n2:PrefixRewrite>sequential</c14n2:PrefixRewrite>
|
||||
<c14n2:QNameAware>
|
||||
<c14n2:Element Name="bar" NS="http://a"/>
|
||||
<c14n2:XPathElement Name="IncludedXPath" NS="http://www.w3.org/2010/xmldsig2#"/>
|
||||
</c14n2:QNameAware>
|
||||
</dsig:CanonicalizationMethod>
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
<dsig:CanonicalizationMethod xmlns:dsig="http://www.w3.org/2000/09/xmldsig#" xmlns:c14n2="http://www.w3.org/2010/xml-c14n2" Algorithm="http://www.w3.org/2010/xml-c14n2">
|
||||
<c14n2:QNameAware>
|
||||
<c14n2:QualifiedAttr Name="type" NS="http://www.w3.org/2001/XMLSchema-instance"/>
|
||||
</c14n2:QNameAware>
|
||||
</dsig:CanonicalizationMethod>
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
<dsig:CanonicalizationMethod xmlns:dsig="http://www.w3.org/2000/09/xmldsig#" xmlns:c14n2="http://www.w3.org/2010/xml-c14n2" Algorithm="http://www.w3.org/2010/xml-c14n2">
|
||||
<c14n2:QNameAware>
|
||||
<c14n2:Element Name="bar" NS="http://a"/>
|
||||
</c14n2:QNameAware>
|
||||
</dsig:CanonicalizationMethod>
|
||||
|
|
@ -0,0 +1,7 @@
|
|||
<dsig:CanonicalizationMethod xmlns:dsig="http://www.w3.org/2000/09/xmldsig#" xmlns:c14n2="http://www.w3.org/2010/xml-c14n2" Algorithm="http://www.w3.org/2010/xml-c14n2">
|
||||
<c14n2:QNameAware>
|
||||
<c14n2:Element Name="bar" NS="http://a"/>
|
||||
<c14n2:XPathElement Name="IncludedXPath" NS="http://www.w3.org/2010/xmldsig2#"/>
|
||||
</c14n2:QNameAware>
|
||||
</dsig:CanonicalizationMethod>
|
||||
|
|
@ -0,0 +1,4 @@
|
|||
<dsig:CanonicalizationMethod xmlns:dsig="http://www.w3.org/2000/09/xmldsig#" xmlns:c14n2="http://www.w3.org/2010/xml-c14n2" Algorithm="http://www.w3.org/2010/xml-c14n2">
|
||||
<c14n2:TrimTextNodes>true</c14n2:TrimTextNodes>
|
||||
</dsig:CanonicalizationMethod>
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
|
||||
<!ELEMENT doc (#PCDATA)>
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,5 @@
|
|||
<?xml version="1.0"?>
|
||||
<xsl:stylesheet version="1.0"
|
||||
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
||||
>
|
||||
</xsl:stylesheet>
|
|
@ -0,0 +1,14 @@
|
|||
<?xml version="1.0"?>
|
||||
|
||||
<?xml-stylesheet href="doc.xsl"
|
||||
type="text/xsl" ?>
|
||||
|
||||
<!DOCTYPE doc SYSTEM "doc.dtd">
|
||||
|
||||
<doc>Hello, world!<!-- Comment 1 --></doc>
|
||||
|
||||
<?pi-without-data ?>
|
||||
|
||||
<!-- Comment 2 -->
|
||||
|
||||
<!-- Comment 3 -->
|
|
@ -0,0 +1,11 @@
|
|||
<doc>
|
||||
<clean> </clean>
|
||||
<dirty> A B </dirty>
|
||||
<mixed>
|
||||
A
|
||||
<clean> </clean>
|
||||
B
|
||||
<dirty> A B </dirty>
|
||||
C
|
||||
</mixed>
|
||||
</doc>
|
|
@ -0,0 +1,18 @@
|
|||
<!DOCTYPE doc [<!ATTLIST e9 attr CDATA "default">]>
|
||||
<doc>
|
||||
<e1 />
|
||||
<e2 ></e2>
|
||||
<e3 name = "elem3" id="elem3" />
|
||||
<e4 name="elem4" id="elem4" ></e4>
|
||||
<e5 a:attr="out" b:attr="sorted" attr2="all" attr="I'm"
|
||||
xmlns:b="http://www.ietf.org"
|
||||
xmlns:a="http://www.w3.org"
|
||||
xmlns="http://example.org"/>
|
||||
<e6 xmlns="" xmlns:a="http://www.w3.org">
|
||||
<e7 xmlns="http://www.ietf.org">
|
||||
<e8 xmlns="" xmlns:a="http://www.w3.org">
|
||||
<e9 xmlns="" xmlns:a="http://www.ietf.org"/>
|
||||
</e8>
|
||||
</e7>
|
||||
</e6>
|
||||
</doc>
|
|
@ -0,0 +1,13 @@
|
|||
<!DOCTYPE doc [
|
||||
<!ATTLIST normId id ID #IMPLIED>
|
||||
<!ATTLIST normNames attr NMTOKENS #IMPLIED>
|
||||
]>
|
||||
<doc>
|
||||
<text>First line
 Second line</text>
|
||||
<value>2</value>
|
||||
<compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
|
||||
<compute expr='value>"0" && value<"10" ?"valid":"error"'>valid</compute>
|
||||
<norm attr=' '   
	 ' '/>
|
||||
<normNames attr=' A   
	 B '/>
|
||||
<normId id=' '  
	 ' '/>
|
||||
</doc>
|
|
@ -0,0 +1,12 @@
|
|||
<!DOCTYPE doc [
|
||||
<!ATTLIST doc attrExtEnt CDATA #IMPLIED>
|
||||
<!ENTITY ent1 "Hello">
|
||||
<!ENTITY ent2 SYSTEM "world.txt">
|
||||
<!ENTITY entExt SYSTEM "earth.gif" NDATA gif>
|
||||
<!NOTATION gif SYSTEM "viewgif.exe">
|
||||
]>
|
||||
<doc attrExtEnt="entExt">
|
||||
&ent1;, &ent2;!
|
||||
</doc>
|
||||
|
||||
<!-- Let world.txt contain "world" (excluding the quotes) -->
|
|
@ -0,0 +1,2 @@
|
|||
<?xml version="1.0" encoding="ISO-8859-1"?>
|
||||
<doc>©</doc>
|
|
@ -0,0 +1,4 @@
|
|||
<a:foo xmlns:a="http://a" xmlns:b="http://b" xmlns:child="http://c" xmlns:soap-env="http://schemas.xmlsoap.org/wsdl/soap/" xmlns:xsd="http://www.w3.org/2001/XMLSchema">
|
||||
<a:bar>xsd:string</a:bar>
|
||||
<dsig2:IncludedXPath xmlns:dsig2="http://www.w3.org/2010/xmldsig2#">/soap-env:body/child::b:foo[@att1 != "c:val" and @att2 != 'xsd:string']</dsig2:IncludedXPath>
|
||||
</a:foo>
|
|
@ -0,0 +1,3 @@
|
|||
<foo xmlns:a="http://a" xmlns:b="http://b">
|
||||
<b:bar b:att1="val" att2="val"/>
|
||||
</foo>
|
|
@ -0,0 +1,6 @@
|
|||
<a:foo xmlns:a="http://a" xmlns:b="http://b" xmlns:c="http://c">
|
||||
<b:bar/>
|
||||
<b:bar/>
|
||||
<b:bar/>
|
||||
<a:bar b:att1="val"/>
|
||||
</a:foo>
|
|
@ -0,0 +1,3 @@
|
|||
<foo xmlns:a="http://z3" xmlns:b="http://z2" a:att1="val1" b:att2="val2">
|
||||
<bar xmlns="http://z0" xmlns:a="http://z2" a:att1="val1" b:att2="val2" xmlns:b="http://z3" />
|
||||
</foo>
|
|
@ -0,0 +1,4 @@
|
|||
<a:foo xmlns:a="http://z3" xmlns:b="http://z2" b:att1="val1" c:att3="val3" b:att2="val2" xmlns:c="http://z1" xmlns:d="http://z0">
|
||||
<c:bar/>
|
||||
<c:bar d:att3="val3"/>
|
||||
</a:foo>
|
|
@ -0,0 +1,4 @@
|
|||
<foo xmlns:a="http://z0" xmlns:b="http://z0" a:att1="val1" b:att2="val2" xmlns="http://z0">
|
||||
<c:bar xmlns:a="http://z0" xmlns:c="http://z0" c:att3="val3"/>
|
||||
<d:bar xmlns:d="http://z0"/>
|
||||
</foo>
|
|
@ -0,0 +1,3 @@
|
|||
<foo xmlns="http://z0" xml:id="23">
|
||||
<bar xsi:type="xsd:string" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">data</bar>
|
||||
</foo>
|
|
@ -0,0 +1,6 @@
|
|||
<?xml-stylesheet href="doc.xsl"
|
||||
type="text/xsl" ?>
|
||||
<doc>Hello, world!<!-- Comment 1 --></doc>
|
||||
<?pi-without-data?>
|
||||
<!-- Comment 2 -->
|
||||
<!-- Comment 3 -->
|
|
@ -0,0 +1,4 @@
|
|||
<?xml-stylesheet href="doc.xsl"
|
||||
type="text/xsl" ?>
|
||||
<doc>Hello, world!</doc>
|
||||
<?pi-without-data?>
|
|
@ -0,0 +1,11 @@
|
|||
<doc>
|
||||
<clean> </clean>
|
||||
<dirty> A B </dirty>
|
||||
<mixed>
|
||||
A
|
||||
<clean> </clean>
|
||||
B
|
||||
<dirty> A B </dirty>
|
||||
C
|
||||
</mixed>
|
||||
</doc>
|
|
@ -0,0 +1 @@
|
|||
<doc><clean></clean><dirty>A B</dirty><mixed>A<clean></clean>B<dirty>A B</dirty>C</mixed></doc>
|
|
@ -0,0 +1,14 @@
|
|||
<doc>
|
||||
<e1></e1>
|
||||
<e2></e2>
|
||||
<e3 id="elem3" name="elem3"></e3>
|
||||
<e4 id="elem4" name="elem4"></e4>
|
||||
<e5 xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I'm" attr2="all" b:attr="sorted" a:attr="out"></e5>
|
||||
<e6>
|
||||
<e7 xmlns="http://www.ietf.org">
|
||||
<e8 xmlns="">
|
||||
<e9 attr="default"></e9>
|
||||
</e8>
|
||||
</e7>
|
||||
</e6>
|
||||
</doc>
|
|
@ -0,0 +1,14 @@
|
|||
<n0:doc xmlns:n0="">
|
||||
<n0:e1></n0:e1>
|
||||
<n0:e2></n0:e2>
|
||||
<n0:e3 id="elem3" name="elem3"></n0:e3>
|
||||
<n0:e4 id="elem4" name="elem4"></n0:e4>
|
||||
<n1:e5 xmlns:n1="http://example.org" xmlns:n2="http://www.ietf.org" xmlns:n3="http://www.w3.org" attr="I'm" attr2="all" n2:attr="sorted" n3:attr="out"></n1:e5>
|
||||
<n0:e6>
|
||||
<n2:e7 xmlns:n2="http://www.ietf.org">
|
||||
<n0:e8>
|
||||
<n0:e9 attr="default"></n0:e9>
|
||||
</n0:e8>
|
||||
</n2:e7>
|
||||
</n0:e6>
|
||||
</n0:doc>
|
|
@ -0,0 +1 @@
|
|||
<doc><e1></e1><e2></e2><e3 id="elem3" name="elem3"></e3><e4 id="elem4" name="elem4"></e4><e5 xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I'm" attr2="all" b:attr="sorted" a:attr="out"></e5><e6><e7 xmlns="http://www.ietf.org"><e8 xmlns=""><e9 attr="default"></e9></e8></e7></e6></doc>
|
|
@ -0,0 +1,10 @@
|
|||
<doc>
|
||||
<text>First line
|
||||
Second line</text>
|
||||
<value>2</value>
|
||||
<compute>value>"0" && value<"10" ?"valid":"error"</compute>
|
||||
<compute expr="value>"0" && value<"10" ?"valid":"error"">valid</compute>
|
||||
<norm attr=" ' 
	 ' "></norm>
|
||||
<normNames attr="A 
	 B"></normNames>
|
||||
<normId id="' 
	 '"></normId>
|
||||
</doc>
|
|
@ -0,0 +1,2 @@
|
|||
<doc><text>First line
|
||||
Second line</text><value>2</value><compute>value>"0" && value<"10" ?"valid":"error"</compute><compute expr="value>"0" && value<"10" ?"valid":"error"">valid</compute><norm attr=" ' 
	 ' "></norm><normNames attr="A 
	 B"></normNames><normId id="' 
	 '"></normId></doc>
|
|
@ -0,0 +1,3 @@
|
|||
<doc attrExtEnt="entExt">
|
||||
Hello, world!
|
||||
</doc>
|
|
@ -0,0 +1 @@
|
|||
<doc attrExtEnt="entExt">Hello, world!</doc>
|
|
@ -0,0 +1 @@
|
|||
<doc>©</doc>
|
|
@ -0,0 +1,4 @@
|
|||
<a:foo xmlns:a="http://a">
|
||||
<a:bar>xsd:string</a:bar>
|
||||
<dsig2:IncludedXPath xmlns:dsig2="http://www.w3.org/2010/xmldsig2#">/soap-env:body/child::b:foo[@att1 != "c:val" and @att2 != 'xsd:string']</dsig2:IncludedXPath>
|
||||
</a:foo>
|
|
@ -0,0 +1,4 @@
|
|||
<n0:foo xmlns:n0="http://a">
|
||||
<n0:bar xmlns:n1="http://www.w3.org/2001/XMLSchema">n1:string</n0:bar>
|
||||
<n4:IncludedXPath xmlns:n2="http://b" xmlns:n3="http://schemas.xmlsoap.org/wsdl/soap/" xmlns:n4="http://www.w3.org/2010/xmldsig2#">/n3:body/child::n2:foo[@att1 != "c:val" and @att2 != 'xsd:string']</n4:IncludedXPath>
|
||||
</n0:foo>
|
|
@ -0,0 +1,4 @@
|
|||
<a:foo xmlns:a="http://a">
|
||||
<a:bar xmlns:xsd="http://www.w3.org/2001/XMLSchema">xsd:string</a:bar>
|
||||
<dsig2:IncludedXPath xmlns:dsig2="http://www.w3.org/2010/xmldsig2#">/soap-env:body/child::b:foo[@att1 != "c:val" and @att2 != 'xsd:string']</dsig2:IncludedXPath>
|
||||
</a:foo>
|
|
@ -0,0 +1,4 @@
|
|||
<a:foo xmlns:a="http://a">
|
||||
<a:bar xmlns:xsd="http://www.w3.org/2001/XMLSchema">xsd:string</a:bar>
|
||||
<dsig2:IncludedXPath xmlns:b="http://b" xmlns:dsig2="http://www.w3.org/2010/xmldsig2#" xmlns:soap-env="http://schemas.xmlsoap.org/wsdl/soap/">/soap-env:body/child::b:foo[@att1 != "c:val" and @att2 != 'xsd:string']</dsig2:IncludedXPath>
|
||||
</a:foo>
|
|
@ -0,0 +1,3 @@
|
|||
<foo>
|
||||
<b:bar xmlns:b="http://b" att2="val" b:att1="val"></b:bar>
|
||||
</foo>
|
|
@ -0,0 +1,3 @@
|
|||
<n0:foo xmlns:n0="">
|
||||
<n1:bar xmlns:n1="http://b" att2="val" n1:att1="val"></n1:bar>
|
||||
</n0:foo>
|
|
@ -0,0 +1,6 @@
|
|||
<a:foo xmlns:a="http://a">
|
||||
<b:bar xmlns:b="http://b"></b:bar>
|
||||
<b:bar xmlns:b="http://b"></b:bar>
|
||||
<b:bar xmlns:b="http://b"></b:bar>
|
||||
<a:bar xmlns:b="http://b" b:att1="val"></a:bar>
|
||||
</a:foo>
|
|
@ -0,0 +1,6 @@
|
|||
<n0:foo xmlns:n0="http://a">
|
||||
<n1:bar xmlns:n1="http://b"></n1:bar>
|
||||
<n1:bar xmlns:n1="http://b"></n1:bar>
|
||||
<n1:bar xmlns:n1="http://b"></n1:bar>
|
||||
<n0:bar xmlns:n1="http://b" n1:att1="val"></n0:bar>
|
||||
</n0:foo>
|
|
@ -0,0 +1,3 @@
|
|||
<foo xmlns:a="http://z3" xmlns:b="http://z2" b:att2="val2" a:att1="val1">
|
||||
<bar xmlns="http://z0" xmlns:a="http://z2" xmlns:b="http://z3" a:att1="val1" b:att2="val2"></bar>
|
||||
</foo>
|
|
@ -0,0 +1,3 @@
|
|||
<n0:foo xmlns:n0="" xmlns:n1="http://z2" xmlns:n2="http://z3" n1:att2="val2" n2:att1="val1">
|
||||
<n3:bar xmlns:n3="http://z0" n1:att1="val1" n2:att2="val2"></n3:bar>
|
||||
</n0:foo>
|
|
@ -0,0 +1,4 @@
|
|||
<a:foo xmlns:a="http://z3" xmlns:b="http://z2" xmlns:c="http://z1" c:att3="val3" b:att1="val1" b:att2="val2">
|
||||
<c:bar></c:bar>
|
||||
<c:bar xmlns:d="http://z0" d:att3="val3"></c:bar>
|
||||
</a:foo>
|
|
@ -0,0 +1,4 @@
|
|||
<n2:foo xmlns:n0="http://z1" xmlns:n1="http://z2" xmlns:n2="http://z3" n0:att3="val3" n1:att1="val1" n1:att2="val2">
|
||||
<n0:bar></n0:bar>
|
||||
<n0:bar xmlns:n3="http://z0" n3:att3="val3"></n0:bar>
|
||||
</n2:foo>
|
|
@ -0,0 +1,4 @@
|
|||
<foo xmlns="http://z0" xmlns:a="http://z0" xmlns:b="http://z0" a:att1="val1" b:att2="val2">
|
||||
<c:bar xmlns:c="http://z0" c:att3="val3"></c:bar>
|
||||
<d:bar xmlns:d="http://z0"></d:bar>
|
||||
</foo>
|
|
@ -0,0 +1,4 @@
|
|||
<n0:foo xmlns:n0="http://z0" n0:att1="val1" n0:att2="val2">
|
||||
<n0:bar n0:att3="val3"></n0:bar>
|
||||
<n0:bar></n0:bar>
|
||||
</n0:foo>
|
|
@ -0,0 +1,3 @@
|
|||
<foo xmlns="http://z0" xml:id="23">
|
||||
<bar xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="xsd:string">data</bar>
|
||||
</foo>
|
|
@ -0,0 +1,3 @@
|
|||
<n0:foo xmlns:n0="http://z0" xml:id="23">
|
||||
<n0:bar xmlns:n1="http://www.w3.org/2001/XMLSchema-instance" n1:type="xsd:string">data</n0:bar>
|
||||
</n0:foo>
|
|
@ -0,0 +1,3 @@
|
|||
<n0:foo xmlns:n0="http://z0" xml:id="23">
|
||||
<n0:bar xmlns:n1="http://www.w3.org/2001/XMLSchema" xmlns:n2="http://www.w3.org/2001/XMLSchema-instance" n2:type="n1:string">data</n0:bar>
|
||||
</n0:foo>
|
|
@ -0,0 +1,3 @@
|
|||
<foo xmlns="http://z0" xml:id="23">
|
||||
<bar xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="xsd:string">data</bar>
|
||||
</foo>
|
|
@ -0,0 +1 @@
|
|||
world
|
|
@ -87,6 +87,7 @@ __all__ = [
|
|||
"XML", "XMLID",
|
||||
"XMLParser", "XMLPullParser",
|
||||
"register_namespace",
|
||||
"canonicalize", "C14NWriterTarget",
|
||||
]
|
||||
|
||||
VERSION = "1.3.0"
|
||||
|
@ -1711,6 +1712,336 @@ class XMLParser:
|
|||
del self.target, self._target
|
||||
|
||||
|
||||
# --------------------------------------------------------------------
|
||||
# C14N 2.0
|
||||
|
||||
def canonicalize(xml_data=None, *, out=None, from_file=None, **options):
|
||||
"""Convert XML to its C14N 2.0 serialised form.
|
||||
|
||||
If *out* is provided, it must be a file or file-like object that receives
|
||||
the serialised canonical XML output (text, not bytes) through its ``.write()``
|
||||
method. To write to a file, open it in text mode with encoding "utf-8".
|
||||
If *out* is not provided, this function returns the output as text string.
|
||||
|
||||
Either *xml_data* (an XML string) or *from_file* (a file path or
|
||||
file-like object) must be provided as input.
|
||||
|
||||
The configuration options are the same as for the ``C14NWriterTarget``.
|
||||
"""
|
||||
if xml_data is None and from_file is None:
|
||||
raise ValueError("Either 'xml_data' or 'from_file' must be provided as input")
|
||||
sio = None
|
||||
if out is None:
|
||||
sio = out = io.StringIO()
|
||||
|
||||
parser = XMLParser(target=C14NWriterTarget(out.write, **options))
|
||||
|
||||
if xml_data is not None:
|
||||
parser.feed(xml_data)
|
||||
parser.close()
|
||||
elif from_file is not None:
|
||||
parse(from_file, parser=parser)
|
||||
|
||||
return sio.getvalue() if sio is not None else None
|
||||
|
||||
|
||||
_looks_like_prefix_name = re.compile(r'^\w+:\w+$', re.UNICODE).match
|
||||
|
||||
|
||||
class C14NWriterTarget:
|
||||
"""
|
||||
Canonicalization writer target for the XMLParser.
|
||||
|
||||
Serialises parse events to XML C14N 2.0.
|
||||
|
||||
The *write* function is used for writing out the resulting data stream
|
||||
as text (not bytes). To write to a file, open it in text mode with encoding
|
||||
"utf-8" and pass its ``.write`` method.
|
||||
|
||||
Configuration options:
|
||||
|
||||
- *with_comments*: set to true to include comments
|
||||
- *strip_text*: set to true to strip whitespace before and after text content
|
||||
- *rewrite_prefixes*: set to true to replace namespace prefixes by "n{number}"
|
||||
- *qname_aware_tags*: a set of qname aware tag names in which prefixes
|
||||
should be replaced in text content
|
||||
- *qname_aware_attrs*: a set of qname aware attribute names in which prefixes
|
||||
should be replaced in text content
|
||||
- *exclude_attrs*: a set of attribute names that should not be serialised
|
||||
- *exclude_tags*: a set of tag names that should not be serialised
|
||||
"""
|
||||
def __init__(self, write, *,
|
||||
with_comments=False, strip_text=False, rewrite_prefixes=False,
|
||||
qname_aware_tags=None, qname_aware_attrs=None,
|
||||
exclude_attrs=None, exclude_tags=None):
|
||||
self._write = write
|
||||
self._data = []
|
||||
self._with_comments = with_comments
|
||||
self._strip_text = strip_text
|
||||
self._exclude_attrs = set(exclude_attrs) if exclude_attrs else None
|
||||
self._exclude_tags = set(exclude_tags) if exclude_tags else None
|
||||
|
||||
self._rewrite_prefixes = rewrite_prefixes
|
||||
if qname_aware_tags:
|
||||
self._qname_aware_tags = set(qname_aware_tags)
|
||||
else:
|
||||
self._qname_aware_tags = None
|
||||
if qname_aware_attrs:
|
||||
self._find_qname_aware_attrs = set(qname_aware_attrs).intersection
|
||||
else:
|
||||
self._find_qname_aware_attrs = None
|
||||
|
||||
# Stack with globally and newly declared namespaces as (uri, prefix) pairs.
|
||||
self._declared_ns_stack = [[
|
||||
("http://www.w3.org/XML/1998/namespace", "xml"),
|
||||
]]
|
||||
# Stack with user declared namespace prefixes as (uri, prefix) pairs.
|
||||
self._ns_stack = []
|
||||
if not rewrite_prefixes:
|
||||
self._ns_stack.append(list(_namespace_map.items()))
|
||||
self._ns_stack.append([])
|
||||
self._prefix_map = {}
|
||||
self._preserve_space = [False]
|
||||
self._pending_start = None
|
||||
self._root_seen = False
|
||||
self._root_done = False
|
||||
self._ignored_depth = 0
|
||||
|
||||
def _iter_namespaces(self, ns_stack, _reversed=reversed):
|
||||
for namespaces in _reversed(ns_stack):
|
||||
if namespaces: # almost no element declares new namespaces
|
||||
yield from namespaces
|
||||
|
||||
def _resolve_prefix_name(self, prefixed_name):
|
||||
prefix, name = prefixed_name.split(':', 1)
|
||||
for uri, p in self._iter_namespaces(self._ns_stack):
|
||||
if p == prefix:
|
||||
return f'{{{uri}}}{name}'
|
||||
raise ValueError(f'Prefix {prefix} of QName "{prefixed_name}" is not declared in scope')
|
||||
|
||||
def _qname(self, qname, uri=None):
|
||||
if uri is None:
|
||||
uri, tag = qname[1:].rsplit('}', 1) if qname[:1] == '{' else ('', qname)
|
||||
else:
|
||||
tag = qname
|
||||
|
||||
prefixes_seen = set()
|
||||
for u, prefix in self._iter_namespaces(self._declared_ns_stack):
|
||||
if u == uri and prefix not in prefixes_seen:
|
||||
return f'{prefix}:{tag}' if prefix else tag, tag, uri
|
||||
prefixes_seen.add(prefix)
|
||||
|
||||
# Not declared yet => add new declaration.
|
||||
if self._rewrite_prefixes:
|
||||
if uri in self._prefix_map:
|
||||
prefix = self._prefix_map[uri]
|
||||
else:
|
||||
prefix = self._prefix_map[uri] = f'n{len(self._prefix_map)}'
|
||||
self._declared_ns_stack[-1].append((uri, prefix))
|
||||
return f'{prefix}:{tag}', tag, uri
|
||||
|
||||
if not uri and '' not in prefixes_seen:
|
||||
# No default namespace declared => no prefix needed.
|
||||
return tag, tag, uri
|
||||
|
||||
for u, prefix in self._iter_namespaces(self._ns_stack):
|
||||
if u == uri:
|
||||
self._declared_ns_stack[-1].append((uri, prefix))
|
||||
return f'{prefix}:{tag}' if prefix else tag, tag, uri
|
||||
|
||||
raise ValueError(f'Namespace "{uri}" is not declared in scope')
|
||||
|
||||
def data(self, data):
|
||||
if not self._ignored_depth:
|
||||
self._data.append(data)
|
||||
|
||||
def _flush(self, _join_text=''.join):
|
||||
data = _join_text(self._data)
|
||||
del self._data[:]
|
||||
if self._strip_text and not self._preserve_space[-1]:
|
||||
data = data.strip()
|
||||
if self._pending_start is not None:
|
||||
args, self._pending_start = self._pending_start, None
|
||||
qname_text = data if data and _looks_like_prefix_name(data) else None
|
||||
self._start(*args, qname_text)
|
||||
if qname_text is not None:
|
||||
return
|
||||
if data and self._root_seen:
|
||||
self._write(_escape_cdata_c14n(data))
|
||||
|
||||
def start_ns(self, prefix, uri):
|
||||
if self._ignored_depth:
|
||||
return
|
||||
# we may have to resolve qnames in text content
|
||||
if self._data:
|
||||
self._flush()
|
||||
self._ns_stack[-1].append((uri, prefix))
|
||||
|
||||
def start(self, tag, attrs):
|
||||
if self._exclude_tags is not None and (
|
||||
self._ignored_depth or tag in self._exclude_tags):
|
||||
self._ignored_depth += 1
|
||||
return
|
||||
if self._data:
|
||||
self._flush()
|
||||
|
||||
new_namespaces = []
|
||||
self._declared_ns_stack.append(new_namespaces)
|
||||
|
||||
if self._qname_aware_tags is not None and tag in self._qname_aware_tags:
|
||||
# Need to parse text first to see if it requires a prefix declaration.
|
||||
self._pending_start = (tag, attrs, new_namespaces)
|
||||
return
|
||||
self._start(tag, attrs, new_namespaces)
|
||||
|
||||
def _start(self, tag, attrs, new_namespaces, qname_text=None):
|
||||
if self._exclude_attrs is not None and attrs:
|
||||
attrs = {k: v for k, v in attrs.items() if k not in self._exclude_attrs}
|
||||
|
||||
qnames = {tag, *attrs}
|
||||
resolved_names = {}
|
||||
|
||||
# Resolve prefixes in attribute and tag text.
|
||||
if qname_text is not None:
|
||||
qname = resolved_names[qname_text] = self._resolve_prefix_name(qname_text)
|
||||
qnames.add(qname)
|
||||
if self._find_qname_aware_attrs is not None and attrs:
|
||||
qattrs = self._find_qname_aware_attrs(attrs)
|
||||
if qattrs:
|
||||
for attr_name in qattrs:
|
||||
value = attrs[attr_name]
|
||||
if _looks_like_prefix_name(value):
|
||||
qname = resolved_names[value] = self._resolve_prefix_name(value)
|
||||
qnames.add(qname)
|
||||
else:
|
||||
qattrs = None
|
||||
else:
|
||||
qattrs = None
|
||||
|
||||
# Assign prefixes in lexicographical order of used URIs.
|
||||
parse_qname = self._qname
|
||||
parsed_qnames = {n: parse_qname(n) for n in sorted(
|
||||
qnames, key=lambda n: n.split('}', 1))}
|
||||
|
||||
# Write namespace declarations in prefix order ...
|
||||
if new_namespaces:
|
||||
attr_list = [
|
||||
('xmlns:' + prefix if prefix else 'xmlns', uri)
|
||||
for uri, prefix in new_namespaces
|
||||
]
|
||||
attr_list.sort()
|
||||
else:
|
||||
# almost always empty
|
||||
attr_list = []
|
||||
|
||||
# ... followed by attributes in URI+name order
|
||||
if attrs:
|
||||
for k, v in sorted(attrs.items()):
|
||||
if qattrs is not None and k in qattrs and v in resolved_names:
|
||||
v = parsed_qnames[resolved_names[v]][0]
|
||||
attr_qname, attr_name, uri = parsed_qnames[k]
|
||||
# No prefix for attributes in default ('') namespace.
|
||||
attr_list.append((attr_qname if uri else attr_name, v))
|
||||
|
||||
# Honour xml:space attributes.
|
||||
space_behaviour = attrs.get('{http://www.w3.org/XML/1998/namespace}space')
|
||||
self._preserve_space.append(
|
||||
space_behaviour == 'preserve' if space_behaviour
|
||||
else self._preserve_space[-1])
|
||||
|
||||
# Write the tag.
|
||||
write = self._write
|
||||
write('<' + parsed_qnames[tag][0])
|
||||
if attr_list:
|
||||
write(''.join([f' {k}="{_escape_attrib_c14n(v)}"' for k, v in attr_list]))
|
||||
write('>')
|
||||
|
||||
# Write the resolved qname text content.
|
||||
if qname_text is not None:
|
||||
write(_escape_cdata_c14n(parsed_qnames[resolved_names[qname_text]][0]))
|
||||
|
||||
self._root_seen = True
|
||||
self._ns_stack.append([])
|
||||
|
||||
def end(self, tag):
|
||||
if self._ignored_depth:
|
||||
self._ignored_depth -= 1
|
||||
return
|
||||
if self._data:
|
||||
self._flush()
|
||||
self._write(f'</{self._qname(tag)[0]}>')
|
||||
self._preserve_space.pop()
|
||||
self._root_done = len(self._preserve_space) == 1
|
||||
self._declared_ns_stack.pop()
|
||||
self._ns_stack.pop()
|
||||
|
||||
def comment(self, text):
|
||||
if not self._with_comments:
|
||||
return
|
||||
if self._ignored_depth:
|
||||
return
|
||||
if self._root_done:
|
||||
self._write('\n')
|
||||
elif self._root_seen and self._data:
|
||||
self._flush()
|
||||
self._write(f'<!--{_escape_cdata_c14n(text)}-->')
|
||||
if not self._root_seen:
|
||||
self._write('\n')
|
||||
|
||||
def pi(self, target, data):
|
||||
if self._ignored_depth:
|
||||
return
|
||||
if self._root_done:
|
||||
self._write('\n')
|
||||
elif self._root_seen and self._data:
|
||||
self._flush()
|
||||
self._write(
|
||||
f'<?{target} {_escape_cdata_c14n(data)}?>' if data else f'<?{target}?>')
|
||||
if not self._root_seen:
|
||||
self._write('\n')
|
||||
|
||||
|
||||
def _escape_cdata_c14n(text):
|
||||
# escape character data
|
||||
try:
|
||||
# it's worth avoiding do-nothing calls for strings that are
|
||||
# shorter than 500 character, or so. assume that's, by far,
|
||||
# the most common case in most applications.
|
||||
if '&' in text:
|
||||
text = text.replace('&', '&')
|
||||
if '<' in text:
|
||||
text = text.replace('<', '<')
|
||||
if '>' in text:
|
||||
text = text.replace('>', '>')
|
||||
if '\r' in text:
|
||||
text = text.replace('\r', '
')
|
||||
return text
|
||||
except (TypeError, AttributeError):
|
||||
_raise_serialization_error(text)
|
||||
|
||||
|
||||
def _escape_attrib_c14n(text):
|
||||
# escape attribute value
|
||||
try:
|
||||
if '&' in text:
|
||||
text = text.replace('&', '&')
|
||||
if '<' in text:
|
||||
text = text.replace('<', '<')
|
||||
if '"' in text:
|
||||
text = text.replace('"', '"')
|
||||
if '\t' in text:
|
||||
text = text.replace('\t', '	')
|
||||
if '\n' in text:
|
||||
text = text.replace('\n', '
')
|
||||
if '\r' in text:
|
||||
text = text.replace('\r', '
')
|
||||
return text
|
||||
except (TypeError, AttributeError):
|
||||
_raise_serialization_error(text)
|
||||
|
||||
|
||||
# --------------------------------------------------------------------
|
||||
|
||||
# Import the C accelerators
|
||||
try:
|
||||
# Element is going to be shadowed by the C implementation. We need to keep
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
The xml.etree.ElementTree packages gained support for C14N 2.0 serialisation.
|
||||
Patch by Stefan Behnel.
|
Loading…
Reference in New Issue