Issue #6233: ElementTree failed converting unicode characters to XML
entities when they could't be represented in the requested output encoding. Patch by Jerry Chen.
This commit is contained in:
parent
28a817e3ba
commit
c77dd32be4
|
@ -210,6 +210,17 @@ def check_encoding(ET, encoding):
|
|||
"""
|
||||
ET.XML("<?xml version='1.0' encoding='%s'?><xml />" % encoding)
|
||||
|
||||
def check_issue6233():
|
||||
"""
|
||||
>>> from xml.etree import ElementTree as ET
|
||||
|
||||
>>> e = ET.XML("<?xml version='1.0' encoding='utf-8'?><body>t\xe3g</body>")
|
||||
>>> ET.tostring(e, 'ascii')
|
||||
b"<?xml version='1.0' encoding='ascii'?>\\n<body>tãg</body>"
|
||||
>>> e = ET.XML("<?xml version='1.0' encoding='iso-8859-1'?><body>t\xe3g</body>".encode('iso-8859-1')) # create byte string with the right encoding
|
||||
>>> ET.tostring(e, 'ascii')
|
||||
b"<?xml version='1.0' encoding='ascii'?>\\n<body>tãg</body>"
|
||||
"""
|
||||
|
||||
#
|
||||
# xinclude tests (samples from appendix C of the xinclude specification)
|
||||
|
|
|
@ -662,9 +662,9 @@ class ElementTree:
|
|||
# write XML to file
|
||||
tag = node.tag
|
||||
if tag is Comment:
|
||||
file.write(_encode("<!-- %s -->" % _escape_cdata(node.text), encoding))
|
||||
file.write(b"<!-- " + _encode_cdata(node.text, encoding) + b" -->")
|
||||
elif tag is ProcessingInstruction:
|
||||
file.write(_encode("<?%s?>" % _escape_cdata(node.text), encoding))
|
||||
file.write(b"<?" + _encode_cdata(node.text, encoding) + b"?>")
|
||||
else:
|
||||
items = list(node.items())
|
||||
xmlns_items = [] # new namespaces in this scope
|
||||
|
@ -696,7 +696,7 @@ class ElementTree:
|
|||
if node.text or len(node):
|
||||
file.write(_encode(">", encoding))
|
||||
if node.text:
|
||||
file.write(_encode(_escape_cdata(node.text), encoding))
|
||||
file.write(_encode_cdata(node.text, encoding))
|
||||
for n in node:
|
||||
self._write(file, n, encoding, namespaces)
|
||||
file.write(_encode("</" + tag + ">", encoding))
|
||||
|
@ -705,7 +705,7 @@ class ElementTree:
|
|||
for k, v in xmlns_items:
|
||||
del namespaces[v]
|
||||
if node.tail:
|
||||
file.write(_encode(_escape_cdata(node.tail), encoding))
|
||||
file.write(_encode_cdata(node.tail, encoding))
|
||||
|
||||
# --------------------------------------------------------------------
|
||||
# helpers
|
||||
|
@ -788,13 +788,16 @@ def _encode_entity(text, pattern=_escape):
|
|||
# the following functions assume an ascii-compatible encoding
|
||||
# (or "utf-16")
|
||||
|
||||
def _escape_cdata(text):
|
||||
def _encode_cdata(text, encoding):
|
||||
# escape character data
|
||||
try:
|
||||
text = text.replace("&", "&")
|
||||
text = text.replace("<", "<")
|
||||
text = text.replace(">", ">")
|
||||
return text
|
||||
if encoding:
|
||||
return text.encode(encoding, "xmlcharrefreplace")
|
||||
else:
|
||||
return text
|
||||
except (TypeError, AttributeError):
|
||||
_raise_serialization_error(text)
|
||||
|
||||
|
|
|
@ -131,6 +131,7 @@ Greg Chapman
|
|||
Brad Chapman
|
||||
David Chaum
|
||||
Nicolas Chauvat
|
||||
Jerry Chen
|
||||
Michael Chermside
|
||||
Albert Chin-A-Young
|
||||
Adal Chiriliuc
|
||||
|
|
|
@ -242,6 +242,10 @@ C-API
|
|||
Library
|
||||
-------
|
||||
|
||||
- Issue #6233: ElementTree failed converting unicode characters to XML
|
||||
entities when they could't be represented in the requested output
|
||||
encoding. Patch by Jerry Chen.
|
||||
|
||||
- Issue #6003: add an argument to ``zipfile.Zipfile.writestr`` to
|
||||
specify the compression type.
|
||||
|
||||
|
|
Loading…
Reference in New Issue