From e1d5dd645d5f59867cb0ad63179110f310cbca89 Mon Sep 17 00:00:00 2001 From: Stefan Behnel Date: Wed, 1 May 2019 22:34:13 +0200 Subject: [PATCH] bpo-13611: C14N 2.0 implementation for ElementTree (GH-12966) * Implement C14N 2.0 as a new canonicalize() function in ElementTree. Missing features: - prefix renaming in XPath expressions (tag and attribute text is supported) - preservation of original prefixes given redundant namespace declarations --- Doc/library/xml.etree.elementtree.rst | 60 ++++ Doc/whatsnew/3.8.rst | 4 + Lib/test/test_xml_etree.py | 229 ++++++++++++ Lib/test/xmltestdata/c14n-20/c14nComment.xml | 4 + Lib/test/xmltestdata/c14n-20/c14nDefault.xml | 3 + Lib/test/xmltestdata/c14n-20/c14nPrefix.xml | 4 + .../xmltestdata/c14n-20/c14nPrefixQname.xml | 7 + .../c14n-20/c14nPrefixQnameXpathElem.xml | 8 + Lib/test/xmltestdata/c14n-20/c14nQname.xml | 6 + .../xmltestdata/c14n-20/c14nQnameElem.xml | 6 + .../c14n-20/c14nQnameXpathElem.xml | 7 + Lib/test/xmltestdata/c14n-20/c14nTrim.xml | 4 + Lib/test/xmltestdata/c14n-20/doc.dtd | 6 + Lib/test/xmltestdata/c14n-20/doc.xsl | 5 + Lib/test/xmltestdata/c14n-20/inC14N1.xml | 14 + Lib/test/xmltestdata/c14n-20/inC14N2.xml | 11 + Lib/test/xmltestdata/c14n-20/inC14N3.xml | 18 + Lib/test/xmltestdata/c14n-20/inC14N4.xml | 13 + Lib/test/xmltestdata/c14n-20/inC14N5.xml | 12 + Lib/test/xmltestdata/c14n-20/inC14N6.xml | 2 + Lib/test/xmltestdata/c14n-20/inNsContent.xml | 4 + Lib/test/xmltestdata/c14n-20/inNsDefault.xml | 3 + Lib/test/xmltestdata/c14n-20/inNsPushdown.xml | 6 + Lib/test/xmltestdata/c14n-20/inNsRedecl.xml | 3 + Lib/test/xmltestdata/c14n-20/inNsSort.xml | 4 + .../xmltestdata/c14n-20/inNsSuperfluous.xml | 4 + Lib/test/xmltestdata/c14n-20/inNsXml.xml | 3 + .../c14n-20/out_inC14N1_c14nComment.xml | 6 + .../c14n-20/out_inC14N1_c14nDefault.xml | 4 + .../c14n-20/out_inC14N2_c14nDefault.xml | 11 + .../c14n-20/out_inC14N2_c14nTrim.xml | 1 + .../c14n-20/out_inC14N3_c14nDefault.xml | 14 + .../c14n-20/out_inC14N3_c14nPrefix.xml | 14 + .../c14n-20/out_inC14N3_c14nTrim.xml | 1 + .../c14n-20/out_inC14N4_c14nDefault.xml | 10 + .../c14n-20/out_inC14N4_c14nTrim.xml | 2 + .../c14n-20/out_inC14N5_c14nDefault.xml | 3 + .../c14n-20/out_inC14N5_c14nTrim.xml | 1 + .../c14n-20/out_inC14N6_c14nDefault.xml | 1 + .../c14n-20/out_inNsContent_c14nDefault.xml | 4 + ...t_inNsContent_c14nPrefixQnameXpathElem.xml | 4 + .../c14n-20/out_inNsContent_c14nQnameElem.xml | 4 + .../out_inNsContent_c14nQnameXpathElem.xml | 4 + .../c14n-20/out_inNsDefault_c14nDefault.xml | 3 + .../c14n-20/out_inNsDefault_c14nPrefix.xml | 3 + .../c14n-20/out_inNsPushdown_c14nDefault.xml | 6 + .../c14n-20/out_inNsPushdown_c14nPrefix.xml | 6 + .../c14n-20/out_inNsRedecl_c14nDefault.xml | 3 + .../c14n-20/out_inNsRedecl_c14nPrefix.xml | 3 + .../c14n-20/out_inNsSort_c14nDefault.xml | 4 + .../c14n-20/out_inNsSort_c14nPrefix.xml | 4 + .../out_inNsSuperfluous_c14nDefault.xml | 4 + .../out_inNsSuperfluous_c14nPrefix.xml | 4 + .../c14n-20/out_inNsXml_c14nDefault.xml | 3 + .../c14n-20/out_inNsXml_c14nPrefix.xml | 3 + .../c14n-20/out_inNsXml_c14nPrefixQname.xml | 3 + .../c14n-20/out_inNsXml_c14nQname.xml | 3 + Lib/test/xmltestdata/c14n-20/world.txt | 1 + Lib/xml/etree/ElementTree.py | 331 ++++++++++++++++++ .../2019-04-26-10-10-34.bpo-13611.XEF4bg.rst | 2 + 60 files changed, 920 insertions(+) create mode 100644 Lib/test/xmltestdata/c14n-20/c14nComment.xml create mode 100644 Lib/test/xmltestdata/c14n-20/c14nDefault.xml create mode 100644 Lib/test/xmltestdata/c14n-20/c14nPrefix.xml create mode 100644 Lib/test/xmltestdata/c14n-20/c14nPrefixQname.xml create mode 100644 Lib/test/xmltestdata/c14n-20/c14nPrefixQnameXpathElem.xml create mode 100644 Lib/test/xmltestdata/c14n-20/c14nQname.xml create mode 100644 Lib/test/xmltestdata/c14n-20/c14nQnameElem.xml create mode 100644 Lib/test/xmltestdata/c14n-20/c14nQnameXpathElem.xml create mode 100644 Lib/test/xmltestdata/c14n-20/c14nTrim.xml create mode 100644 Lib/test/xmltestdata/c14n-20/doc.dtd create mode 100644 Lib/test/xmltestdata/c14n-20/doc.xsl create mode 100644 Lib/test/xmltestdata/c14n-20/inC14N1.xml create mode 100644 Lib/test/xmltestdata/c14n-20/inC14N2.xml create mode 100644 Lib/test/xmltestdata/c14n-20/inC14N3.xml create mode 100644 Lib/test/xmltestdata/c14n-20/inC14N4.xml create mode 100644 Lib/test/xmltestdata/c14n-20/inC14N5.xml create mode 100644 Lib/test/xmltestdata/c14n-20/inC14N6.xml create mode 100644 Lib/test/xmltestdata/c14n-20/inNsContent.xml create mode 100644 Lib/test/xmltestdata/c14n-20/inNsDefault.xml create mode 100644 Lib/test/xmltestdata/c14n-20/inNsPushdown.xml create mode 100644 Lib/test/xmltestdata/c14n-20/inNsRedecl.xml create mode 100644 Lib/test/xmltestdata/c14n-20/inNsSort.xml create mode 100644 Lib/test/xmltestdata/c14n-20/inNsSuperfluous.xml create mode 100644 Lib/test/xmltestdata/c14n-20/inNsXml.xml create mode 100644 Lib/test/xmltestdata/c14n-20/out_inC14N1_c14nComment.xml create mode 100644 Lib/test/xmltestdata/c14n-20/out_inC14N1_c14nDefault.xml create mode 100644 Lib/test/xmltestdata/c14n-20/out_inC14N2_c14nDefault.xml create mode 100644 Lib/test/xmltestdata/c14n-20/out_inC14N2_c14nTrim.xml create mode 100644 Lib/test/xmltestdata/c14n-20/out_inC14N3_c14nDefault.xml create mode 100644 Lib/test/xmltestdata/c14n-20/out_inC14N3_c14nPrefix.xml create mode 100644 Lib/test/xmltestdata/c14n-20/out_inC14N3_c14nTrim.xml create mode 100644 Lib/test/xmltestdata/c14n-20/out_inC14N4_c14nDefault.xml create mode 100644 Lib/test/xmltestdata/c14n-20/out_inC14N4_c14nTrim.xml create mode 100644 Lib/test/xmltestdata/c14n-20/out_inC14N5_c14nDefault.xml create mode 100644 Lib/test/xmltestdata/c14n-20/out_inC14N5_c14nTrim.xml create mode 100644 Lib/test/xmltestdata/c14n-20/out_inC14N6_c14nDefault.xml create mode 100644 Lib/test/xmltestdata/c14n-20/out_inNsContent_c14nDefault.xml create mode 100644 Lib/test/xmltestdata/c14n-20/out_inNsContent_c14nPrefixQnameXpathElem.xml create mode 100644 Lib/test/xmltestdata/c14n-20/out_inNsContent_c14nQnameElem.xml create mode 100644 Lib/test/xmltestdata/c14n-20/out_inNsContent_c14nQnameXpathElem.xml create mode 100644 Lib/test/xmltestdata/c14n-20/out_inNsDefault_c14nDefault.xml create mode 100644 Lib/test/xmltestdata/c14n-20/out_inNsDefault_c14nPrefix.xml create mode 100644 Lib/test/xmltestdata/c14n-20/out_inNsPushdown_c14nDefault.xml create mode 100644 Lib/test/xmltestdata/c14n-20/out_inNsPushdown_c14nPrefix.xml create mode 100644 Lib/test/xmltestdata/c14n-20/out_inNsRedecl_c14nDefault.xml create mode 100644 Lib/test/xmltestdata/c14n-20/out_inNsRedecl_c14nPrefix.xml create mode 100644 Lib/test/xmltestdata/c14n-20/out_inNsSort_c14nDefault.xml create mode 100644 Lib/test/xmltestdata/c14n-20/out_inNsSort_c14nPrefix.xml create mode 100644 Lib/test/xmltestdata/c14n-20/out_inNsSuperfluous_c14nDefault.xml create mode 100644 Lib/test/xmltestdata/c14n-20/out_inNsSuperfluous_c14nPrefix.xml create mode 100644 Lib/test/xmltestdata/c14n-20/out_inNsXml_c14nDefault.xml create mode 100644 Lib/test/xmltestdata/c14n-20/out_inNsXml_c14nPrefix.xml create mode 100644 Lib/test/xmltestdata/c14n-20/out_inNsXml_c14nPrefixQname.xml create mode 100644 Lib/test/xmltestdata/c14n-20/out_inNsXml_c14nQname.xml create mode 100644 Lib/test/xmltestdata/c14n-20/world.txt create mode 100644 Misc/NEWS.d/next/Library/2019-04-26-10-10-34.bpo-13611.XEF4bg.rst diff --git a/Doc/library/xml.etree.elementtree.rst b/Doc/library/xml.etree.elementtree.rst index 66090af00fa..ef74d0c852c 100644 --- a/Doc/library/xml.etree.elementtree.rst +++ b/Doc/library/xml.etree.elementtree.rst @@ -465,6 +465,53 @@ Reference Functions ^^^^^^^^^ +.. function:: canonicalize(xml_data=None, *, out=None, from_file=None, **options) + + `C14N 2.0 `_ transformation function. + + Canonicalization is a way to normalise XML output in a way that allows + byte-by-byte comparisons and digital signatures. It reduced the freedom + that XML serializers have and instead generates a more constrained XML + representation. The main restrictions regard the placement of namespace + declarations, the ordering of attributes, and ignorable whitespace. + + This function takes an XML data string (*xml_data*) or a file path or + file-like object (*from_file*) as input, converts it to the canonical + form, and writes it out using the *out* file(-like) object, if provided, + or returns it as a text string if not. The output file receives text, + not bytes. It should therefore be opened in text mode with ``utf-8`` + encoding. + + Typical uses:: + + xml_data = "..." + print(canonicalize(xml_data)) + + with open("c14n_output.xml", mode='w', encoding='utf-8') as out_file: + canonicalize(xml_data, out=out_file) + + with open("c14n_output.xml", mode='w', encoding='utf-8') as out_file: + canonicalize(from_file="inputfile.xml", out=out_file) + + The configuration *options* are as follows: + + - *with_comments*: set to true to include comments (default: false) + - *strip_text*: set to true to strip whitespace before and after text content + (default: false) + - *rewrite_prefixes*: set to true to replace namespace prefixes by "n{number}" + (default: false) + - *qname_aware_tags*: a set of qname aware tag names in which prefixes + should be replaced in text content (default: empty) + - *qname_aware_attrs*: a set of qname aware attribute names in which prefixes + should be replaced in text content (default: empty) + - *exclude_attrs*: a set of attribute names that should not be serialised + - *exclude_tags*: a set of tag names that should not be serialised + + In the option list above, "a set" refers to any collection or iterable of + strings, no ordering is expected. + + .. versionadded:: 3.8 + .. function:: Comment(text=None) @@ -1114,6 +1161,19 @@ TreeBuilder Objects .. versionadded:: 3.8 +.. class:: C14NWriterTarget(write, *, \ + with_comments=False, strip_text=False, rewrite_prefixes=False, \ + qname_aware_tags=None, qname_aware_attrs=None, \ + exclude_attrs=None, exclude_tags=None) + + A `C14N 2.0 `_ writer. Arguments are the + same as for the :func:`canonicalize` function. This class does not build a + tree but translates the callback events directly into a serialised form + using the *write* function. + + .. versionadded:: 3.8 + + .. _elementtree-xmlparser-objects: XMLParser Objects diff --git a/Doc/whatsnew/3.8.rst b/Doc/whatsnew/3.8.rst index bbc55ddd634..37570bcad52 100644 --- a/Doc/whatsnew/3.8.rst +++ b/Doc/whatsnew/3.8.rst @@ -525,6 +525,10 @@ xml external entities by default. (Contributed by Christian Heimes in :issue:`17239`.) +* The :mod:`xml.etree.ElementTree` module provides a new function + :func:`–xml.etree.ElementTree.canonicalize()` that implements C14N 2.0. + (Contributed by Stefan Behnel in :issue:`13611`.) + Optimizations ============= diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 0abc42a173d..a59a11f025d 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -12,6 +12,7 @@ import io import itertools import locale import operator +import os import pickle import sys import textwrap @@ -20,6 +21,7 @@ import unittest import warnings import weakref +from functools import partial from itertools import product, islice from test import support from test.support import TESTFN, findfile, import_fresh_module, gc_collect, swap_attr @@ -3527,6 +3529,231 @@ class NoAcceleratorTest(unittest.TestCase): self.assertIsInstance(pyET.Element.__init__, types.FunctionType) self.assertIsInstance(pyET.XMLParser.__init__, types.FunctionType) + +# -------------------------------------------------------------------- + +def c14n_roundtrip(xml, **options): + return pyET.canonicalize(xml, **options) + + +class C14NTest(unittest.TestCase): + maxDiff = None + + # + # simple roundtrip tests (from c14n.py) + + def test_simple_roundtrip(self): + # Basics + self.assertEqual(c14n_roundtrip(""), '') + self.assertEqual(c14n_roundtrip(""), # FIXME + '') + self.assertEqual(c14n_roundtrip(""), + '') + self.assertEqual(c14n_roundtrip(""), + '') + self.assertEqual(c14n_roundtrip(""), + '') + + # C14N spec + self.assertEqual(c14n_roundtrip("Hello, world!"), + 'Hello, world!') + self.assertEqual(c14n_roundtrip("2"), + '2') + self.assertEqual(c14n_roundtrip('"0" && value<"10" ?"valid":"error"]]>'), + 'value>"0" && value<"10" ?"valid":"error"') + self.assertEqual(c14n_roundtrip('''valid'''), + 'valid') + self.assertEqual(c14n_roundtrip(""), + '') + self.assertEqual(c14n_roundtrip(""), + '') + self.assertEqual(c14n_roundtrip(""), + '') + + # fragments from PJ's tests + #self.assertEqual(c14n_roundtrip(""), + #'') + + def test_c14n_exclusion(self): + xml = textwrap.dedent("""\ + + + abtext + + btext + + dtext + + + """) + self.assertEqual( + c14n_roundtrip(xml, strip_text=True), + '' + 'abtext' + 'btext' + 'dtext' + '') + self.assertEqual( + c14n_roundtrip(xml, strip_text=True, exclude_attrs=['{http://example.com/x}attr']), + '' + 'abtext' + 'btext' + 'dtext' + '') + self.assertEqual( + c14n_roundtrip(xml, strip_text=True, exclude_tags=['{http://example.com/x}d']), + '' + 'abtext' + 'btext' + '' + '') + self.assertEqual( + c14n_roundtrip(xml, strip_text=True, exclude_attrs=['{http://example.com/x}attr'], + exclude_tags=['{http://example.com/x}d']), + '' + 'abtext' + 'btext' + '' + '') + self.assertEqual( + c14n_roundtrip(xml, strip_text=True, exclude_tags=['a', 'b']), + '' + 'dtext' + '') + self.assertEqual( + c14n_roundtrip(xml, exclude_tags=['a', 'b']), + '\n' + ' \n' + ' \n' + ' \n' + ' dtext\n' + ' \n' + '') + self.assertEqual( + c14n_roundtrip(xml, strip_text=True, exclude_tags=['{http://example.com/x}d', 'b']), + '' + '' + '' + '') + self.assertEqual( + c14n_roundtrip(xml, exclude_tags=['{http://example.com/x}d', 'b']), + '\n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + ' \n' + '') + + # + # basic method=c14n tests from the c14n 2.0 specification. uses + # test files under xmltestdata/c14n-20. + + # note that this uses generated C14N versions of the standard ET.write + # output, not roundtripped C14N (see above). + + def test_xml_c14n2(self): + datadir = findfile("c14n-20", subdir="xmltestdata") + full_path = partial(os.path.join, datadir) + + files = [filename[:-4] for filename in sorted(os.listdir(datadir)) + if filename.endswith('.xml')] + input_files = [ + filename for filename in files + if filename.startswith('in') + ] + configs = { + filename: { + # sequential + option.tag.split('}')[-1]: ((option.text or '').strip(), option) + for option in ET.parse(full_path(filename) + ".xml").getroot() + } + for filename in files + if filename.startswith('c14n') + } + + tests = { + input_file: [ + (filename, configs[filename.rsplit('_', 1)[-1]]) + for filename in files + if filename.startswith(f'out_{input_file}_') + and filename.rsplit('_', 1)[-1] in configs + ] + for input_file in input_files + } + + # Make sure we found all test cases. + self.assertEqual(30, len([ + output_file for output_files in tests.values() + for output_file in output_files])) + + def get_option(config, option_name, default=None): + return config.get(option_name, (default, ()))[0] + + for input_file, output_files in tests.items(): + for output_file, config in output_files: + keep_comments = get_option( + config, 'IgnoreComments') == 'true' # no, it's right :) + strip_text = get_option( + config, 'TrimTextNodes') == 'true' + rewrite_prefixes = get_option( + config, 'PrefixRewrite') == 'sequential' + if 'QNameAware' in config: + qattrs = [ + f"{{{el.get('NS')}}}{el.get('Name')}" + for el in config['QNameAware'][1].findall( + '{http://www.w3.org/2010/xml-c14n2}QualifiedAttr') + ] + qtags = [ + f"{{{el.get('NS')}}}{el.get('Name')}" + for el in config['QNameAware'][1].findall( + '{http://www.w3.org/2010/xml-c14n2}Element') + ] + else: + qtags = qattrs = None + + # Build subtest description from config. + config_descr = ','.join( + f"{name}={value or ','.join(c.tag.split('}')[-1] for c in children)}" + for name, (value, children) in sorted(config.items()) + ) + + with self.subTest(f"{output_file}({config_descr})"): + if input_file == 'inNsRedecl' and not rewrite_prefixes: + self.skipTest( + f"Redeclared namespace handling is not supported in {output_file}") + if input_file == 'inNsSuperfluous' and not rewrite_prefixes: + self.skipTest( + f"Redeclared namespace handling is not supported in {output_file}") + if 'QNameAware' in config and config['QNameAware'][1].find( + '{http://www.w3.org/2010/xml-c14n2}XPathElement') is not None: + self.skipTest( + f"QName rewriting in XPath text is not supported in {output_file}") + + f = full_path(input_file + ".xml") + if input_file == 'inC14N5': + # Hack: avoid setting up external entity resolution in the parser. + with open(full_path('world.txt'), 'rb') as entity_file: + with open(f, 'rb') as f: + f = io.BytesIO(f.read().replace(b'&ent2;', entity_file.read())) + + text = ET.canonicalize( + from_file=f, + with_comments=keep_comments, + strip_text=strip_text, + rewrite_prefixes=rewrite_prefixes, + qname_aware_tags=qtags, qname_aware_attrs=qattrs) + + with open(full_path(output_file + ".xml"), 'r', encoding='utf8') as f: + expected = f.read() + if input_file == 'inC14N3': + # FIXME: cET resolves default attributes but ET does not! + expected = expected.replace(' attr="default"', '') + text = text.replace(' attr="default"', '') + self.assertEqual(expected, text) + # -------------------------------------------------------------------- @@ -3559,6 +3786,8 @@ def test_main(module=None): XMLParserTest, XMLPullParserTest, BugsTest, + KeywordArgsTest, + C14NTest, ] # These tests will only run for the pure-Python version that doesn't import diff --git a/Lib/test/xmltestdata/c14n-20/c14nComment.xml b/Lib/test/xmltestdata/c14n-20/c14nComment.xml new file mode 100644 index 00000000000..e95aa302d04 --- /dev/null +++ b/Lib/test/xmltestdata/c14n-20/c14nComment.xml @@ -0,0 +1,4 @@ + + true + + diff --git a/Lib/test/xmltestdata/c14n-20/c14nDefault.xml b/Lib/test/xmltestdata/c14n-20/c14nDefault.xml new file mode 100644 index 00000000000..c1364142cc5 --- /dev/null +++ b/Lib/test/xmltestdata/c14n-20/c14nDefault.xml @@ -0,0 +1,3 @@ + + + diff --git a/Lib/test/xmltestdata/c14n-20/c14nPrefix.xml b/Lib/test/xmltestdata/c14n-20/c14nPrefix.xml new file mode 100644 index 00000000000..fb233b42b13 --- /dev/null +++ b/Lib/test/xmltestdata/c14n-20/c14nPrefix.xml @@ -0,0 +1,4 @@ + + sequential + + diff --git a/Lib/test/xmltestdata/c14n-20/c14nPrefixQname.xml b/Lib/test/xmltestdata/c14n-20/c14nPrefixQname.xml new file mode 100644 index 00000000000..23188eedbc2 --- /dev/null +++ b/Lib/test/xmltestdata/c14n-20/c14nPrefixQname.xml @@ -0,0 +1,7 @@ + + sequential + + + + + diff --git a/Lib/test/xmltestdata/c14n-20/c14nPrefixQnameXpathElem.xml b/Lib/test/xmltestdata/c14n-20/c14nPrefixQnameXpathElem.xml new file mode 100644 index 00000000000..626fc48f410 --- /dev/null +++ b/Lib/test/xmltestdata/c14n-20/c14nPrefixQnameXpathElem.xml @@ -0,0 +1,8 @@ + + sequential + + + + + + diff --git a/Lib/test/xmltestdata/c14n-20/c14nQname.xml b/Lib/test/xmltestdata/c14n-20/c14nQname.xml new file mode 100644 index 00000000000..919e5903f5c --- /dev/null +++ b/Lib/test/xmltestdata/c14n-20/c14nQname.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/Lib/test/xmltestdata/c14n-20/c14nQnameElem.xml b/Lib/test/xmltestdata/c14n-20/c14nQnameElem.xml new file mode 100644 index 00000000000..0321f806195 --- /dev/null +++ b/Lib/test/xmltestdata/c14n-20/c14nQnameElem.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/Lib/test/xmltestdata/c14n-20/c14nQnameXpathElem.xml b/Lib/test/xmltestdata/c14n-20/c14nQnameXpathElem.xml new file mode 100644 index 00000000000..c4890bc8b01 --- /dev/null +++ b/Lib/test/xmltestdata/c14n-20/c14nQnameXpathElem.xml @@ -0,0 +1,7 @@ + + + + + + + diff --git a/Lib/test/xmltestdata/c14n-20/c14nTrim.xml b/Lib/test/xmltestdata/c14n-20/c14nTrim.xml new file mode 100644 index 00000000000..ccb9cf65db7 --- /dev/null +++ b/Lib/test/xmltestdata/c14n-20/c14nTrim.xml @@ -0,0 +1,4 @@ + + true + + diff --git a/Lib/test/xmltestdata/c14n-20/doc.dtd b/Lib/test/xmltestdata/c14n-20/doc.dtd new file mode 100644 index 00000000000..5c5d544a0df --- /dev/null +++ b/Lib/test/xmltestdata/c14n-20/doc.dtd @@ -0,0 +1,6 @@ + + + + + + diff --git a/Lib/test/xmltestdata/c14n-20/doc.xsl b/Lib/test/xmltestdata/c14n-20/doc.xsl new file mode 100644 index 00000000000..a3f2348cc2f --- /dev/null +++ b/Lib/test/xmltestdata/c14n-20/doc.xsl @@ -0,0 +1,5 @@ + + + diff --git a/Lib/test/xmltestdata/c14n-20/inC14N1.xml b/Lib/test/xmltestdata/c14n-20/inC14N1.xml new file mode 100644 index 00000000000..ed450c7341d --- /dev/null +++ b/Lib/test/xmltestdata/c14n-20/inC14N1.xml @@ -0,0 +1,14 @@ + + + + + + +Hello, world! + + + + + + diff --git a/Lib/test/xmltestdata/c14n-20/inC14N2.xml b/Lib/test/xmltestdata/c14n-20/inC14N2.xml new file mode 100644 index 00000000000..74eeea147c3 --- /dev/null +++ b/Lib/test/xmltestdata/c14n-20/inC14N2.xml @@ -0,0 +1,11 @@ + + + A B + + A + + B + A B + C + + diff --git a/Lib/test/xmltestdata/c14n-20/inC14N3.xml b/Lib/test/xmltestdata/c14n-20/inC14N3.xml new file mode 100644 index 00000000000..fea78213f1a --- /dev/null +++ b/Lib/test/xmltestdata/c14n-20/inC14N3.xml @@ -0,0 +1,18 @@ +]> + + + + + + + + + + + + + + diff --git a/Lib/test/xmltestdata/c14n-20/inC14N4.xml b/Lib/test/xmltestdata/c14n-20/inC14N4.xml new file mode 100644 index 00000000000..909a847435b --- /dev/null +++ b/Lib/test/xmltestdata/c14n-20/inC14N4.xml @@ -0,0 +1,13 @@ + + +]> + + First line Second line + 2 + "0" && value<"10" ?"valid":"error"]]> + valid + + + + diff --git a/Lib/test/xmltestdata/c14n-20/inC14N5.xml b/Lib/test/xmltestdata/c14n-20/inC14N5.xml new file mode 100644 index 00000000000..501161bad51 --- /dev/null +++ b/Lib/test/xmltestdata/c14n-20/inC14N5.xml @@ -0,0 +1,12 @@ + + + + + +]> + + &ent1;, &ent2;! + + + diff --git a/Lib/test/xmltestdata/c14n-20/inC14N6.xml b/Lib/test/xmltestdata/c14n-20/inC14N6.xml new file mode 100644 index 00000000000..31e20718672 --- /dev/null +++ b/Lib/test/xmltestdata/c14n-20/inC14N6.xml @@ -0,0 +1,2 @@ + +© diff --git a/Lib/test/xmltestdata/c14n-20/inNsContent.xml b/Lib/test/xmltestdata/c14n-20/inNsContent.xml new file mode 100644 index 00000000000..b9924660ba6 --- /dev/null +++ b/Lib/test/xmltestdata/c14n-20/inNsContent.xml @@ -0,0 +1,4 @@ + + xsd:string + /soap-env:body/child::b:foo[@att1 != "c:val" and @att2 != 'xsd:string'] + diff --git a/Lib/test/xmltestdata/c14n-20/inNsDefault.xml b/Lib/test/xmltestdata/c14n-20/inNsDefault.xml new file mode 100644 index 00000000000..3e0d323bad2 --- /dev/null +++ b/Lib/test/xmltestdata/c14n-20/inNsDefault.xml @@ -0,0 +1,3 @@ + + + diff --git a/Lib/test/xmltestdata/c14n-20/inNsPushdown.xml b/Lib/test/xmltestdata/c14n-20/inNsPushdown.xml new file mode 100644 index 00000000000..daa67d83f15 --- /dev/null +++ b/Lib/test/xmltestdata/c14n-20/inNsPushdown.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/Lib/test/xmltestdata/c14n-20/inNsRedecl.xml b/Lib/test/xmltestdata/c14n-20/inNsRedecl.xml new file mode 100644 index 00000000000..10bd97beda3 --- /dev/null +++ b/Lib/test/xmltestdata/c14n-20/inNsRedecl.xml @@ -0,0 +1,3 @@ + + + diff --git a/Lib/test/xmltestdata/c14n-20/inNsSort.xml b/Lib/test/xmltestdata/c14n-20/inNsSort.xml new file mode 100644 index 00000000000..8e9fc01c647 --- /dev/null +++ b/Lib/test/xmltestdata/c14n-20/inNsSort.xml @@ -0,0 +1,4 @@ + + + + diff --git a/Lib/test/xmltestdata/c14n-20/inNsSuperfluous.xml b/Lib/test/xmltestdata/c14n-20/inNsSuperfluous.xml new file mode 100644 index 00000000000..f77720f7b0b --- /dev/null +++ b/Lib/test/xmltestdata/c14n-20/inNsSuperfluous.xml @@ -0,0 +1,4 @@ + + + + diff --git a/Lib/test/xmltestdata/c14n-20/inNsXml.xml b/Lib/test/xmltestdata/c14n-20/inNsXml.xml new file mode 100644 index 00000000000..7520cf3fb9e --- /dev/null +++ b/Lib/test/xmltestdata/c14n-20/inNsXml.xml @@ -0,0 +1,3 @@ + + data + diff --git a/Lib/test/xmltestdata/c14n-20/out_inC14N1_c14nComment.xml b/Lib/test/xmltestdata/c14n-20/out_inC14N1_c14nComment.xml new file mode 100644 index 00000000000..d98d16840c6 --- /dev/null +++ b/Lib/test/xmltestdata/c14n-20/out_inC14N1_c14nComment.xml @@ -0,0 +1,6 @@ + +Hello, world! + + + \ No newline at end of file diff --git a/Lib/test/xmltestdata/c14n-20/out_inC14N1_c14nDefault.xml b/Lib/test/xmltestdata/c14n-20/out_inC14N1_c14nDefault.xml new file mode 100644 index 00000000000..af9a9770578 --- /dev/null +++ b/Lib/test/xmltestdata/c14n-20/out_inC14N1_c14nDefault.xml @@ -0,0 +1,4 @@ + +Hello, world! + \ No newline at end of file diff --git a/Lib/test/xmltestdata/c14n-20/out_inC14N2_c14nDefault.xml b/Lib/test/xmltestdata/c14n-20/out_inC14N2_c14nDefault.xml new file mode 100644 index 00000000000..2afa15ccb36 --- /dev/null +++ b/Lib/test/xmltestdata/c14n-20/out_inC14N2_c14nDefault.xml @@ -0,0 +1,11 @@ + + + A B + + A + + B + A B + C + + \ No newline at end of file diff --git a/Lib/test/xmltestdata/c14n-20/out_inC14N2_c14nTrim.xml b/Lib/test/xmltestdata/c14n-20/out_inC14N2_c14nTrim.xml new file mode 100644 index 00000000000..7a1dc32946b --- /dev/null +++ b/Lib/test/xmltestdata/c14n-20/out_inC14N2_c14nTrim.xml @@ -0,0 +1 @@ +A BABA BC \ No newline at end of file diff --git a/Lib/test/xmltestdata/c14n-20/out_inC14N3_c14nDefault.xml b/Lib/test/xmltestdata/c14n-20/out_inC14N3_c14nDefault.xml new file mode 100644 index 00000000000..662e108aa8a --- /dev/null +++ b/Lib/test/xmltestdata/c14n-20/out_inC14N3_c14nDefault.xml @@ -0,0 +1,14 @@ + + + + + + + + + + + + + + \ No newline at end of file diff --git a/Lib/test/xmltestdata/c14n-20/out_inC14N3_c14nPrefix.xml b/Lib/test/xmltestdata/c14n-20/out_inC14N3_c14nPrefix.xml new file mode 100644 index 00000000000..041e1ec8ebe --- /dev/null +++ b/Lib/test/xmltestdata/c14n-20/out_inC14N3_c14nPrefix.xml @@ -0,0 +1,14 @@ + + + + + + + + + + + + + + \ No newline at end of file diff --git a/Lib/test/xmltestdata/c14n-20/out_inC14N3_c14nTrim.xml b/Lib/test/xmltestdata/c14n-20/out_inC14N3_c14nTrim.xml new file mode 100644 index 00000000000..4f35ad9662d --- /dev/null +++ b/Lib/test/xmltestdata/c14n-20/out_inC14N3_c14nTrim.xml @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/Lib/test/xmltestdata/c14n-20/out_inC14N4_c14nDefault.xml b/Lib/test/xmltestdata/c14n-20/out_inC14N4_c14nDefault.xml new file mode 100644 index 00000000000..243d0e61f2e --- /dev/null +++ b/Lib/test/xmltestdata/c14n-20/out_inC14N4_c14nDefault.xml @@ -0,0 +1,10 @@ + + First line +Second line + 2 + value>"0" && value<"10" ?"valid":"error" + valid + + + + \ No newline at end of file diff --git a/Lib/test/xmltestdata/c14n-20/out_inC14N4_c14nTrim.xml b/Lib/test/xmltestdata/c14n-20/out_inC14N4_c14nTrim.xml new file mode 100644 index 00000000000..24d83ba8ab0 --- /dev/null +++ b/Lib/test/xmltestdata/c14n-20/out_inC14N4_c14nTrim.xml @@ -0,0 +1,2 @@ +First line +Second line2value>"0" && value<"10" ?"valid":"error"valid \ No newline at end of file diff --git a/Lib/test/xmltestdata/c14n-20/out_inC14N5_c14nDefault.xml b/Lib/test/xmltestdata/c14n-20/out_inC14N5_c14nDefault.xml new file mode 100644 index 00000000000..c232e740aee --- /dev/null +++ b/Lib/test/xmltestdata/c14n-20/out_inC14N5_c14nDefault.xml @@ -0,0 +1,3 @@ + + Hello, world! + \ No newline at end of file diff --git a/Lib/test/xmltestdata/c14n-20/out_inC14N5_c14nTrim.xml b/Lib/test/xmltestdata/c14n-20/out_inC14N5_c14nTrim.xml new file mode 100644 index 00000000000..3fa84b1e986 --- /dev/null +++ b/Lib/test/xmltestdata/c14n-20/out_inC14N5_c14nTrim.xml @@ -0,0 +1 @@ +Hello, world! \ No newline at end of file diff --git a/Lib/test/xmltestdata/c14n-20/out_inC14N6_c14nDefault.xml b/Lib/test/xmltestdata/c14n-20/out_inC14N6_c14nDefault.xml new file mode 100644 index 00000000000..0be38f98cb1 --- /dev/null +++ b/Lib/test/xmltestdata/c14n-20/out_inC14N6_c14nDefault.xml @@ -0,0 +1 @@ +© \ No newline at end of file diff --git a/Lib/test/xmltestdata/c14n-20/out_inNsContent_c14nDefault.xml b/Lib/test/xmltestdata/c14n-20/out_inNsContent_c14nDefault.xml new file mode 100644 index 00000000000..62d7e004a44 --- /dev/null +++ b/Lib/test/xmltestdata/c14n-20/out_inNsContent_c14nDefault.xml @@ -0,0 +1,4 @@ + + xsd:string + /soap-env:body/child::b:foo[@att1 != "c:val" and @att2 != 'xsd:string'] + \ No newline at end of file diff --git a/Lib/test/xmltestdata/c14n-20/out_inNsContent_c14nPrefixQnameXpathElem.xml b/Lib/test/xmltestdata/c14n-20/out_inNsContent_c14nPrefixQnameXpathElem.xml new file mode 100644 index 00000000000..20e1c2e9d6d --- /dev/null +++ b/Lib/test/xmltestdata/c14n-20/out_inNsContent_c14nPrefixQnameXpathElem.xml @@ -0,0 +1,4 @@ + + n1:string + /n3:body/child::n2:foo[@att1 != "c:val" and @att2 != 'xsd:string'] + \ No newline at end of file diff --git a/Lib/test/xmltestdata/c14n-20/out_inNsContent_c14nQnameElem.xml b/Lib/test/xmltestdata/c14n-20/out_inNsContent_c14nQnameElem.xml new file mode 100644 index 00000000000..db8680daa03 --- /dev/null +++ b/Lib/test/xmltestdata/c14n-20/out_inNsContent_c14nQnameElem.xml @@ -0,0 +1,4 @@ + + xsd:string + /soap-env:body/child::b:foo[@att1 != "c:val" and @att2 != 'xsd:string'] + \ No newline at end of file diff --git a/Lib/test/xmltestdata/c14n-20/out_inNsContent_c14nQnameXpathElem.xml b/Lib/test/xmltestdata/c14n-20/out_inNsContent_c14nQnameXpathElem.xml new file mode 100644 index 00000000000..df3b21579fa --- /dev/null +++ b/Lib/test/xmltestdata/c14n-20/out_inNsContent_c14nQnameXpathElem.xml @@ -0,0 +1,4 @@ + + xsd:string + /soap-env:body/child::b:foo[@att1 != "c:val" and @att2 != 'xsd:string'] + \ No newline at end of file diff --git a/Lib/test/xmltestdata/c14n-20/out_inNsDefault_c14nDefault.xml b/Lib/test/xmltestdata/c14n-20/out_inNsDefault_c14nDefault.xml new file mode 100644 index 00000000000..674b076dd6d --- /dev/null +++ b/Lib/test/xmltestdata/c14n-20/out_inNsDefault_c14nDefault.xml @@ -0,0 +1,3 @@ + + + \ No newline at end of file diff --git a/Lib/test/xmltestdata/c14n-20/out_inNsDefault_c14nPrefix.xml b/Lib/test/xmltestdata/c14n-20/out_inNsDefault_c14nPrefix.xml new file mode 100644 index 00000000000..83edaae91e7 --- /dev/null +++ b/Lib/test/xmltestdata/c14n-20/out_inNsDefault_c14nPrefix.xml @@ -0,0 +1,3 @@ + + + \ No newline at end of file diff --git a/Lib/test/xmltestdata/c14n-20/out_inNsPushdown_c14nDefault.xml b/Lib/test/xmltestdata/c14n-20/out_inNsPushdown_c14nDefault.xml new file mode 100644 index 00000000000..fa4f21b5d0a --- /dev/null +++ b/Lib/test/xmltestdata/c14n-20/out_inNsPushdown_c14nDefault.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/Lib/test/xmltestdata/c14n-20/out_inNsPushdown_c14nPrefix.xml b/Lib/test/xmltestdata/c14n-20/out_inNsPushdown_c14nPrefix.xml new file mode 100644 index 00000000000..6d579200c9d --- /dev/null +++ b/Lib/test/xmltestdata/c14n-20/out_inNsPushdown_c14nPrefix.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/Lib/test/xmltestdata/c14n-20/out_inNsRedecl_c14nDefault.xml b/Lib/test/xmltestdata/c14n-20/out_inNsRedecl_c14nDefault.xml new file mode 100644 index 00000000000..ba37f925103 --- /dev/null +++ b/Lib/test/xmltestdata/c14n-20/out_inNsRedecl_c14nDefault.xml @@ -0,0 +1,3 @@ + + + \ No newline at end of file diff --git a/Lib/test/xmltestdata/c14n-20/out_inNsRedecl_c14nPrefix.xml b/Lib/test/xmltestdata/c14n-20/out_inNsRedecl_c14nPrefix.xml new file mode 100644 index 00000000000..af3bb2d6f06 --- /dev/null +++ b/Lib/test/xmltestdata/c14n-20/out_inNsRedecl_c14nPrefix.xml @@ -0,0 +1,3 @@ + + + \ No newline at end of file diff --git a/Lib/test/xmltestdata/c14n-20/out_inNsSort_c14nDefault.xml b/Lib/test/xmltestdata/c14n-20/out_inNsSort_c14nDefault.xml new file mode 100644 index 00000000000..8a92c5c61c2 --- /dev/null +++ b/Lib/test/xmltestdata/c14n-20/out_inNsSort_c14nDefault.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/Lib/test/xmltestdata/c14n-20/out_inNsSort_c14nPrefix.xml b/Lib/test/xmltestdata/c14n-20/out_inNsSort_c14nPrefix.xml new file mode 100644 index 00000000000..8d44c84fe5d --- /dev/null +++ b/Lib/test/xmltestdata/c14n-20/out_inNsSort_c14nPrefix.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/Lib/test/xmltestdata/c14n-20/out_inNsSuperfluous_c14nDefault.xml b/Lib/test/xmltestdata/c14n-20/out_inNsSuperfluous_c14nDefault.xml new file mode 100644 index 00000000000..6bb862d763d --- /dev/null +++ b/Lib/test/xmltestdata/c14n-20/out_inNsSuperfluous_c14nDefault.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/Lib/test/xmltestdata/c14n-20/out_inNsSuperfluous_c14nPrefix.xml b/Lib/test/xmltestdata/c14n-20/out_inNsSuperfluous_c14nPrefix.xml new file mode 100644 index 00000000000..700a16d42a7 --- /dev/null +++ b/Lib/test/xmltestdata/c14n-20/out_inNsSuperfluous_c14nPrefix.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/Lib/test/xmltestdata/c14n-20/out_inNsXml_c14nDefault.xml b/Lib/test/xmltestdata/c14n-20/out_inNsXml_c14nDefault.xml new file mode 100644 index 00000000000..1689f3bf423 --- /dev/null +++ b/Lib/test/xmltestdata/c14n-20/out_inNsXml_c14nDefault.xml @@ -0,0 +1,3 @@ + + data + \ No newline at end of file diff --git a/Lib/test/xmltestdata/c14n-20/out_inNsXml_c14nPrefix.xml b/Lib/test/xmltestdata/c14n-20/out_inNsXml_c14nPrefix.xml new file mode 100644 index 00000000000..38508a47f6b --- /dev/null +++ b/Lib/test/xmltestdata/c14n-20/out_inNsXml_c14nPrefix.xml @@ -0,0 +1,3 @@ + + data + \ No newline at end of file diff --git a/Lib/test/xmltestdata/c14n-20/out_inNsXml_c14nPrefixQname.xml b/Lib/test/xmltestdata/c14n-20/out_inNsXml_c14nPrefixQname.xml new file mode 100644 index 00000000000..867980f82bf --- /dev/null +++ b/Lib/test/xmltestdata/c14n-20/out_inNsXml_c14nPrefixQname.xml @@ -0,0 +1,3 @@ + + data + \ No newline at end of file diff --git a/Lib/test/xmltestdata/c14n-20/out_inNsXml_c14nQname.xml b/Lib/test/xmltestdata/c14n-20/out_inNsXml_c14nQname.xml new file mode 100644 index 00000000000..0300f9d562d --- /dev/null +++ b/Lib/test/xmltestdata/c14n-20/out_inNsXml_c14nQname.xml @@ -0,0 +1,3 @@ + + data + \ No newline at end of file diff --git a/Lib/test/xmltestdata/c14n-20/world.txt b/Lib/test/xmltestdata/c14n-20/world.txt new file mode 100644 index 00000000000..04fea06420c --- /dev/null +++ b/Lib/test/xmltestdata/c14n-20/world.txt @@ -0,0 +1 @@ +world \ No newline at end of file diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index 5b26ac72fd1..645e999a0be 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -87,6 +87,7 @@ __all__ = [ "XML", "XMLID", "XMLParser", "XMLPullParser", "register_namespace", + "canonicalize", "C14NWriterTarget", ] VERSION = "1.3.0" @@ -1711,6 +1712,336 @@ class XMLParser: del self.target, self._target +# -------------------------------------------------------------------- +# C14N 2.0 + +def canonicalize(xml_data=None, *, out=None, from_file=None, **options): + """Convert XML to its C14N 2.0 serialised form. + + If *out* is provided, it must be a file or file-like object that receives + the serialised canonical XML output (text, not bytes) through its ``.write()`` + method. To write to a file, open it in text mode with encoding "utf-8". + If *out* is not provided, this function returns the output as text string. + + Either *xml_data* (an XML string) or *from_file* (a file path or + file-like object) must be provided as input. + + The configuration options are the same as for the ``C14NWriterTarget``. + """ + if xml_data is None and from_file is None: + raise ValueError("Either 'xml_data' or 'from_file' must be provided as input") + sio = None + if out is None: + sio = out = io.StringIO() + + parser = XMLParser(target=C14NWriterTarget(out.write, **options)) + + if xml_data is not None: + parser.feed(xml_data) + parser.close() + elif from_file is not None: + parse(from_file, parser=parser) + + return sio.getvalue() if sio is not None else None + + +_looks_like_prefix_name = re.compile(r'^\w+:\w+$', re.UNICODE).match + + +class C14NWriterTarget: + """ + Canonicalization writer target for the XMLParser. + + Serialises parse events to XML C14N 2.0. + + The *write* function is used for writing out the resulting data stream + as text (not bytes). To write to a file, open it in text mode with encoding + "utf-8" and pass its ``.write`` method. + + Configuration options: + + - *with_comments*: set to true to include comments + - *strip_text*: set to true to strip whitespace before and after text content + - *rewrite_prefixes*: set to true to replace namespace prefixes by "n{number}" + - *qname_aware_tags*: a set of qname aware tag names in which prefixes + should be replaced in text content + - *qname_aware_attrs*: a set of qname aware attribute names in which prefixes + should be replaced in text content + - *exclude_attrs*: a set of attribute names that should not be serialised + - *exclude_tags*: a set of tag names that should not be serialised + """ + def __init__(self, write, *, + with_comments=False, strip_text=False, rewrite_prefixes=False, + qname_aware_tags=None, qname_aware_attrs=None, + exclude_attrs=None, exclude_tags=None): + self._write = write + self._data = [] + self._with_comments = with_comments + self._strip_text = strip_text + self._exclude_attrs = set(exclude_attrs) if exclude_attrs else None + self._exclude_tags = set(exclude_tags) if exclude_tags else None + + self._rewrite_prefixes = rewrite_prefixes + if qname_aware_tags: + self._qname_aware_tags = set(qname_aware_tags) + else: + self._qname_aware_tags = None + if qname_aware_attrs: + self._find_qname_aware_attrs = set(qname_aware_attrs).intersection + else: + self._find_qname_aware_attrs = None + + # Stack with globally and newly declared namespaces as (uri, prefix) pairs. + self._declared_ns_stack = [[ + ("http://www.w3.org/XML/1998/namespace", "xml"), + ]] + # Stack with user declared namespace prefixes as (uri, prefix) pairs. + self._ns_stack = [] + if not rewrite_prefixes: + self._ns_stack.append(list(_namespace_map.items())) + self._ns_stack.append([]) + self._prefix_map = {} + self._preserve_space = [False] + self._pending_start = None + self._root_seen = False + self._root_done = False + self._ignored_depth = 0 + + def _iter_namespaces(self, ns_stack, _reversed=reversed): + for namespaces in _reversed(ns_stack): + if namespaces: # almost no element declares new namespaces + yield from namespaces + + def _resolve_prefix_name(self, prefixed_name): + prefix, name = prefixed_name.split(':', 1) + for uri, p in self._iter_namespaces(self._ns_stack): + if p == prefix: + return f'{{{uri}}}{name}' + raise ValueError(f'Prefix {prefix} of QName "{prefixed_name}" is not declared in scope') + + def _qname(self, qname, uri=None): + if uri is None: + uri, tag = qname[1:].rsplit('}', 1) if qname[:1] == '{' else ('', qname) + else: + tag = qname + + prefixes_seen = set() + for u, prefix in self._iter_namespaces(self._declared_ns_stack): + if u == uri and prefix not in prefixes_seen: + return f'{prefix}:{tag}' if prefix else tag, tag, uri + prefixes_seen.add(prefix) + + # Not declared yet => add new declaration. + if self._rewrite_prefixes: + if uri in self._prefix_map: + prefix = self._prefix_map[uri] + else: + prefix = self._prefix_map[uri] = f'n{len(self._prefix_map)}' + self._declared_ns_stack[-1].append((uri, prefix)) + return f'{prefix}:{tag}', tag, uri + + if not uri and '' not in prefixes_seen: + # No default namespace declared => no prefix needed. + return tag, tag, uri + + for u, prefix in self._iter_namespaces(self._ns_stack): + if u == uri: + self._declared_ns_stack[-1].append((uri, prefix)) + return f'{prefix}:{tag}' if prefix else tag, tag, uri + + raise ValueError(f'Namespace "{uri}" is not declared in scope') + + def data(self, data): + if not self._ignored_depth: + self._data.append(data) + + def _flush(self, _join_text=''.join): + data = _join_text(self._data) + del self._data[:] + if self._strip_text and not self._preserve_space[-1]: + data = data.strip() + if self._pending_start is not None: + args, self._pending_start = self._pending_start, None + qname_text = data if data and _looks_like_prefix_name(data) else None + self._start(*args, qname_text) + if qname_text is not None: + return + if data and self._root_seen: + self._write(_escape_cdata_c14n(data)) + + def start_ns(self, prefix, uri): + if self._ignored_depth: + return + # we may have to resolve qnames in text content + if self._data: + self._flush() + self._ns_stack[-1].append((uri, prefix)) + + def start(self, tag, attrs): + if self._exclude_tags is not None and ( + self._ignored_depth or tag in self._exclude_tags): + self._ignored_depth += 1 + return + if self._data: + self._flush() + + new_namespaces = [] + self._declared_ns_stack.append(new_namespaces) + + if self._qname_aware_tags is not None and tag in self._qname_aware_tags: + # Need to parse text first to see if it requires a prefix declaration. + self._pending_start = (tag, attrs, new_namespaces) + return + self._start(tag, attrs, new_namespaces) + + def _start(self, tag, attrs, new_namespaces, qname_text=None): + if self._exclude_attrs is not None and attrs: + attrs = {k: v for k, v in attrs.items() if k not in self._exclude_attrs} + + qnames = {tag, *attrs} + resolved_names = {} + + # Resolve prefixes in attribute and tag text. + if qname_text is not None: + qname = resolved_names[qname_text] = self._resolve_prefix_name(qname_text) + qnames.add(qname) + if self._find_qname_aware_attrs is not None and attrs: + qattrs = self._find_qname_aware_attrs(attrs) + if qattrs: + for attr_name in qattrs: + value = attrs[attr_name] + if _looks_like_prefix_name(value): + qname = resolved_names[value] = self._resolve_prefix_name(value) + qnames.add(qname) + else: + qattrs = None + else: + qattrs = None + + # Assign prefixes in lexicographical order of used URIs. + parse_qname = self._qname + parsed_qnames = {n: parse_qname(n) for n in sorted( + qnames, key=lambda n: n.split('}', 1))} + + # Write namespace declarations in prefix order ... + if new_namespaces: + attr_list = [ + ('xmlns:' + prefix if prefix else 'xmlns', uri) + for uri, prefix in new_namespaces + ] + attr_list.sort() + else: + # almost always empty + attr_list = [] + + # ... followed by attributes in URI+name order + if attrs: + for k, v in sorted(attrs.items()): + if qattrs is not None and k in qattrs and v in resolved_names: + v = parsed_qnames[resolved_names[v]][0] + attr_qname, attr_name, uri = parsed_qnames[k] + # No prefix for attributes in default ('') namespace. + attr_list.append((attr_qname if uri else attr_name, v)) + + # Honour xml:space attributes. + space_behaviour = attrs.get('{http://www.w3.org/XML/1998/namespace}space') + self._preserve_space.append( + space_behaviour == 'preserve' if space_behaviour + else self._preserve_space[-1]) + + # Write the tag. + write = self._write + write('<' + parsed_qnames[tag][0]) + if attr_list: + write(''.join([f' {k}="{_escape_attrib_c14n(v)}"' for k, v in attr_list])) + write('>') + + # Write the resolved qname text content. + if qname_text is not None: + write(_escape_cdata_c14n(parsed_qnames[resolved_names[qname_text]][0])) + + self._root_seen = True + self._ns_stack.append([]) + + def end(self, tag): + if self._ignored_depth: + self._ignored_depth -= 1 + return + if self._data: + self._flush() + self._write(f'') + self._preserve_space.pop() + self._root_done = len(self._preserve_space) == 1 + self._declared_ns_stack.pop() + self._ns_stack.pop() + + def comment(self, text): + if not self._with_comments: + return + if self._ignored_depth: + return + if self._root_done: + self._write('\n') + elif self._root_seen and self._data: + self._flush() + self._write(f'') + if not self._root_seen: + self._write('\n') + + def pi(self, target, data): + if self._ignored_depth: + return + if self._root_done: + self._write('\n') + elif self._root_seen and self._data: + self._flush() + self._write( + f'' if data else f'') + if not self._root_seen: + self._write('\n') + + +def _escape_cdata_c14n(text): + # escape character data + try: + # it's worth avoiding do-nothing calls for strings that are + # shorter than 500 character, or so. assume that's, by far, + # the most common case in most applications. + if '&' in text: + text = text.replace('&', '&') + if '<' in text: + text = text.replace('<', '<') + if '>' in text: + text = text.replace('>', '>') + if '\r' in text: + text = text.replace('\r', ' ') + return text + except (TypeError, AttributeError): + _raise_serialization_error(text) + + +def _escape_attrib_c14n(text): + # escape attribute value + try: + if '&' in text: + text = text.replace('&', '&') + if '<' in text: + text = text.replace('<', '<') + if '"' in text: + text = text.replace('"', '"') + if '\t' in text: + text = text.replace('\t', ' ') + if '\n' in text: + text = text.replace('\n', ' ') + if '\r' in text: + text = text.replace('\r', ' ') + return text + except (TypeError, AttributeError): + _raise_serialization_error(text) + + +# -------------------------------------------------------------------- + # Import the C accelerators try: # Element is going to be shadowed by the C implementation. We need to keep diff --git a/Misc/NEWS.d/next/Library/2019-04-26-10-10-34.bpo-13611.XEF4bg.rst b/Misc/NEWS.d/next/Library/2019-04-26-10-10-34.bpo-13611.XEF4bg.rst new file mode 100644 index 00000000000..d01decb9617 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2019-04-26-10-10-34.bpo-13611.XEF4bg.rst @@ -0,0 +1,2 @@ +The xml.etree.ElementTree packages gained support for C14N 2.0 serialisation. +Patch by Stefan Behnel.