mirror of https://github.com/python/cpython
Merged revisions 78123 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/branches/py3k ........ r78123 | antoine.pitrou | 2010-02-09 17:51:16 +0100 (mar., 09 févr. 2010) | 5 lines Issue #6233: ElementTree failed converting unicode characters to XML entities when they could't be represented in the requested output encoding. Patch by Jerry Chen. ........
This commit is contained in:
parent
0f36573f10
commit
54319287c9
|
@ -210,6 +210,17 @@ def check_encoding(ET, encoding):
|
||||||
"""
|
"""
|
||||||
ET.XML("<?xml version='1.0' encoding='%s'?><xml />" % encoding)
|
ET.XML("<?xml version='1.0' encoding='%s'?><xml />" % encoding)
|
||||||
|
|
||||||
|
def check_issue6233():
|
||||||
|
"""
|
||||||
|
>>> from xml.etree import ElementTree as ET
|
||||||
|
|
||||||
|
>>> e = ET.XML("<?xml version='1.0' encoding='utf-8'?><body>t\xe3g</body>")
|
||||||
|
>>> ET.tostring(e, 'ascii')
|
||||||
|
b"<?xml version='1.0' encoding='ascii'?>\\n<body>tãg</body>"
|
||||||
|
>>> e = ET.XML("<?xml version='1.0' encoding='iso-8859-1'?><body>t\xe3g</body>".encode('iso-8859-1')) # create byte string with the right encoding
|
||||||
|
>>> ET.tostring(e, 'ascii')
|
||||||
|
b"<?xml version='1.0' encoding='ascii'?>\\n<body>tãg</body>"
|
||||||
|
"""
|
||||||
|
|
||||||
#
|
#
|
||||||
# xinclude tests (samples from appendix C of the xinclude specification)
|
# xinclude tests (samples from appendix C of the xinclude specification)
|
||||||
|
|
|
@ -662,9 +662,9 @@ class ElementTree:
|
||||||
# write XML to file
|
# write XML to file
|
||||||
tag = node.tag
|
tag = node.tag
|
||||||
if tag is Comment:
|
if tag is Comment:
|
||||||
file.write(_encode("<!-- %s -->" % _escape_cdata(node.text), encoding))
|
file.write(b"<!-- " + _encode_cdata(node.text, encoding) + b" -->")
|
||||||
elif tag is ProcessingInstruction:
|
elif tag is ProcessingInstruction:
|
||||||
file.write(_encode("<?%s?>" % _escape_cdata(node.text), encoding))
|
file.write(b"<?" + _encode_cdata(node.text, encoding) + b"?>")
|
||||||
else:
|
else:
|
||||||
items = list(node.items())
|
items = list(node.items())
|
||||||
xmlns_items = [] # new namespaces in this scope
|
xmlns_items = [] # new namespaces in this scope
|
||||||
|
@ -696,7 +696,7 @@ class ElementTree:
|
||||||
if node.text or len(node):
|
if node.text or len(node):
|
||||||
file.write(_encode(">", encoding))
|
file.write(_encode(">", encoding))
|
||||||
if node.text:
|
if node.text:
|
||||||
file.write(_encode(_escape_cdata(node.text), encoding))
|
file.write(_encode_cdata(node.text, encoding))
|
||||||
for n in node:
|
for n in node:
|
||||||
self._write(file, n, encoding, namespaces)
|
self._write(file, n, encoding, namespaces)
|
||||||
file.write(_encode("</" + tag + ">", encoding))
|
file.write(_encode("</" + tag + ">", encoding))
|
||||||
|
@ -705,7 +705,7 @@ class ElementTree:
|
||||||
for k, v in xmlns_items:
|
for k, v in xmlns_items:
|
||||||
del namespaces[v]
|
del namespaces[v]
|
||||||
if node.tail:
|
if node.tail:
|
||||||
file.write(_encode(_escape_cdata(node.tail), encoding))
|
file.write(_encode_cdata(node.tail, encoding))
|
||||||
|
|
||||||
# --------------------------------------------------------------------
|
# --------------------------------------------------------------------
|
||||||
# helpers
|
# helpers
|
||||||
|
@ -788,13 +788,16 @@ def _encode_entity(text, pattern=_escape):
|
||||||
# the following functions assume an ascii-compatible encoding
|
# the following functions assume an ascii-compatible encoding
|
||||||
# (or "utf-16")
|
# (or "utf-16")
|
||||||
|
|
||||||
def _escape_cdata(text):
|
def _encode_cdata(text, encoding):
|
||||||
# escape character data
|
# escape character data
|
||||||
try:
|
try:
|
||||||
text = text.replace("&", "&")
|
text = text.replace("&", "&")
|
||||||
text = text.replace("<", "<")
|
text = text.replace("<", "<")
|
||||||
text = text.replace(">", ">")
|
text = text.replace(">", ">")
|
||||||
return text
|
if encoding:
|
||||||
|
return text.encode(encoding, "xmlcharrefreplace")
|
||||||
|
else:
|
||||||
|
return text
|
||||||
except (TypeError, AttributeError):
|
except (TypeError, AttributeError):
|
||||||
_raise_serialization_error(text)
|
_raise_serialization_error(text)
|
||||||
|
|
||||||
|
|
|
@ -126,6 +126,7 @@ Greg Chapman
|
||||||
Brad Chapman
|
Brad Chapman
|
||||||
David Chaum
|
David Chaum
|
||||||
Nicolas Chauvat
|
Nicolas Chauvat
|
||||||
|
Jerry Chen
|
||||||
Michael Chermside
|
Michael Chermside
|
||||||
Albert Chin-A-Young
|
Albert Chin-A-Young
|
||||||
Adal Chiriliuc
|
Adal Chiriliuc
|
||||||
|
|
|
@ -79,6 +79,10 @@ Core and Builtins
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Issue #6233: ElementTree failed converting unicode characters to XML
|
||||||
|
entities when they could't be represented in the requested output
|
||||||
|
encoding. Patch by Jerry Chen.
|
||||||
|
|
||||||
- Issue #4772: Raise a ValueError when an unknown Bluetooth protocol is
|
- Issue #4772: Raise a ValueError when an unknown Bluetooth protocol is
|
||||||
specified, rather than fall through to AF_PACKET (in the `socket` module).
|
specified, rather than fall through to AF_PACKET (in the `socket` module).
|
||||||
Also, raise ValueError rather than TypeError when an unknown TIPC address
|
Also, raise ValueError rather than TypeError when an unknown TIPC address
|
||||||
|
|
Loading…
Reference in New Issue