From 54319287c9b3df17c0f6ce15b14619a9814ae292 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Tue, 9 Feb 2010 16:53:09 +0000 Subject: [PATCH] Merged revisions 78123 via svnmerge from svn+ssh://pythondev@svn.python.org/python/branches/py3k MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ........ r78123 | antoine.pitrou | 2010-02-09 17:51:16 +0100 (mar., 09 févr. 2010) | 5 lines Issue #6233: ElementTree failed converting unicode characters to XML entities when they could't be represented in the requested output encoding. Patch by Jerry Chen. ........ --- Lib/test/test_xml_etree.py | 11 +++++++++++ Lib/xml/etree/ElementTree.py | 15 +++++++++------ Misc/ACKS | 1 + Misc/NEWS | 4 ++++ 4 files changed, 25 insertions(+), 6 deletions(-) diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 895902f02f3..a7ad48b2e8e 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -210,6 +210,17 @@ def check_encoding(ET, encoding): """ ET.XML("" % encoding) +def check_issue6233(): + """ + >>> from xml.etree import ElementTree as ET + + >>> e = ET.XML("t\xe3g") + >>> ET.tostring(e, 'ascii') + b"\\ntãg" + >>> e = ET.XML("t\xe3g".encode('iso-8859-1')) # create byte string with the right encoding + >>> ET.tostring(e, 'ascii') + b"\\ntãg" + """ # # xinclude tests (samples from appendix C of the xinclude specification) diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index cfac4f7090f..c47573e3136 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -662,9 +662,9 @@ class ElementTree: # write XML to file tag = node.tag if tag is Comment: - file.write(_encode("" % _escape_cdata(node.text), encoding)) + file.write(b"") elif tag is ProcessingInstruction: - file.write(_encode("" % _escape_cdata(node.text), encoding)) + file.write(b"") else: items = list(node.items()) xmlns_items = [] # new namespaces in this scope @@ -696,7 +696,7 @@ class ElementTree: if node.text or len(node): file.write(_encode(">", encoding)) if node.text: - file.write(_encode(_escape_cdata(node.text), encoding)) + file.write(_encode_cdata(node.text, encoding)) for n in node: self._write(file, n, encoding, namespaces) file.write(_encode("", encoding)) @@ -705,7 +705,7 @@ class ElementTree: for k, v in xmlns_items: del namespaces[v] if node.tail: - file.write(_encode(_escape_cdata(node.tail), encoding)) + file.write(_encode_cdata(node.tail, encoding)) # -------------------------------------------------------------------- # helpers @@ -788,13 +788,16 @@ def _encode_entity(text, pattern=_escape): # the following functions assume an ascii-compatible encoding # (or "utf-16") -def _escape_cdata(text): +def _encode_cdata(text, encoding): # escape character data try: text = text.replace("&", "&") text = text.replace("<", "<") text = text.replace(">", ">") - return text + if encoding: + return text.encode(encoding, "xmlcharrefreplace") + else: + return text except (TypeError, AttributeError): _raise_serialization_error(text) diff --git a/Misc/ACKS b/Misc/ACKS index c5d0e306b14..2ea5ab9f0df 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -126,6 +126,7 @@ Greg Chapman Brad Chapman David Chaum Nicolas Chauvat +Jerry Chen Michael Chermside Albert Chin-A-Young Adal Chiriliuc diff --git a/Misc/NEWS b/Misc/NEWS index 7c862b0c9e6..c5b971d3ade 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -79,6 +79,10 @@ Core and Builtins Library ------- +- Issue #6233: ElementTree failed converting unicode characters to XML + entities when they could't be represented in the requested output + encoding. Patch by Jerry Chen. + - Issue #4772: Raise a ValueError when an unknown Bluetooth protocol is specified, rather than fall through to AF_PACKET (in the `socket` module). Also, raise ValueError rather than TypeError when an unknown TIPC address