Issue #8047: Fix the xml.etree serializer to return bytes by default.
Use ``encoding="unicode"`` to generate a Unicode string.
This commit is contained in:
parent
1a0a737b13
commit
c17f17294f
|
@ -148,20 +148,22 @@ Functions
|
|||
arguments. Returns an element instance.
|
||||
|
||||
|
||||
.. function:: tostring(element, encoding=None, method="xml")
|
||||
.. function:: tostring(element, encoding="us-ascii", method="xml")
|
||||
|
||||
Generates a string representation of an XML element, including all
|
||||
subelements. *element* is an :class:`Element` instance. *encoding* [1]_ is
|
||||
the output encoding (default is None). *method* is either ``"xml"``,
|
||||
the output encoding (default is US-ASCII). Use ``encoding="unicode"`` to
|
||||
generate a Unicode string. *method* is either ``"xml"``,
|
||||
``"html"`` or ``"text"`` (default is ``"xml"``). Returns an (optionally)
|
||||
encoded string containing the XML data.
|
||||
|
||||
|
||||
.. function:: tostringlist(element, encoding=None, method="xml")
|
||||
.. function:: tostringlist(element, encoding="us-ascii", method="xml")
|
||||
|
||||
Generates a string representation of an XML element, including all
|
||||
subelements. *element* is an :class:`Element` instance. *encoding* [1]_ is
|
||||
the output encoding (default is None). *method* is either ``"xml"``,
|
||||
the output encoding (default is US-ASCII). Use ``encoding="unicode"`` to
|
||||
generate a Unicode string. *method* is either ``"xml"``,
|
||||
``"html"`` or ``"text"`` (default is ``"xml"``). Returns a list of
|
||||
(optionally) encoded strings containing the XML data. It does not guarantee
|
||||
any specific sequence, except that ``"".join(tostringlist(element)) ==
|
||||
|
@ -430,6 +432,7 @@ ElementTree Objects
|
|||
|
||||
|
||||
.. method:: getroot()
|
||||
|
||||
Returns the root element for this tree.
|
||||
|
||||
|
||||
|
@ -457,15 +460,16 @@ ElementTree Objects
|
|||
root element.
|
||||
|
||||
|
||||
.. method:: write(file, encoding=None, xml_declaration=None, method="xml")
|
||||
.. method:: write(file, encoding="us-ascii", xml_declaration=None, method="xml")
|
||||
|
||||
Writes the element tree to a file, as XML. *file* is a file name, or a
|
||||
file object opened for writing. *encoding* [1]_ is the output encoding
|
||||
(default is None). *xml_declaration* controls if an XML declaration
|
||||
(default is US-ASCII). Use ``encoding="unicode"`` to write a Unicode string.
|
||||
*xml_declaration* controls if an XML declaration
|
||||
should be added to the file. Use False for never, True for always, None
|
||||
for only if not US-ASCII or UTF-8 (default is None). *method* is either
|
||||
``"xml"``, ``"html"`` or ``"text"`` (default is ``"xml"``). Returns an
|
||||
(optionally) encoded string.
|
||||
for only if not US-ASCII or UTF-8 or Unicode (default is None). *method* is
|
||||
either ``"xml"``, ``"html"`` or ``"text"`` (default is ``"xml"``).
|
||||
Returns an (optionally) encoded string.
|
||||
|
||||
This is the XML file that is going to be manipulated::
|
||||
|
||||
|
|
|
@ -71,14 +71,14 @@ def check_method(method):
|
|||
if not hasattr(method, '__call__'):
|
||||
print(method, "not callable")
|
||||
|
||||
def serialize(elem, to_string=True, **options):
|
||||
def serialize(elem, to_string=True, encoding='unicode', **options):
|
||||
import io
|
||||
if options.get("encoding"):
|
||||
if encoding != 'unicode':
|
||||
file = io.BytesIO()
|
||||
else:
|
||||
file = io.StringIO()
|
||||
tree = ET.ElementTree(elem)
|
||||
tree.write(file, **options)
|
||||
tree.write(file, encoding=encoding, **options)
|
||||
if to_string:
|
||||
return file.getvalue()
|
||||
else:
|
||||
|
@ -537,7 +537,7 @@ def attrib():
|
|||
>>> elem.set('testa', 'testval')
|
||||
>>> elem.set('testb', 'test2')
|
||||
>>> ET.tostring(elem)
|
||||
'<test testa="testval" testb="test2">aa</test>'
|
||||
b'<test testa="testval" testb="test2">aa</test>'
|
||||
>>> sorted(elem.keys())
|
||||
['testa', 'testb']
|
||||
>>> sorted(elem.items())
|
||||
|
@ -547,7 +547,7 @@ def attrib():
|
|||
>>> elem.attrib['testb'] = 'test1'
|
||||
>>> elem.attrib['testc'] = 'test2'
|
||||
>>> ET.tostring(elem)
|
||||
'<test testa="testval" testb="test1" testc="test2">aa</test>'
|
||||
b'<test testa="testval" testb="test1" testc="test2">aa</test>'
|
||||
"""
|
||||
|
||||
def makeelement():
|
||||
|
@ -587,7 +587,7 @@ def parsefile():
|
|||
|
||||
>>> tree = ET.parse(SIMPLE_XMLFILE)
|
||||
>>> normalize_crlf(tree)
|
||||
>>> tree.write(sys.stdout)
|
||||
>>> tree.write(sys.stdout, encoding='unicode')
|
||||
<root>
|
||||
<element key="value">text</element>
|
||||
<element>text</element>tail
|
||||
|
@ -595,7 +595,7 @@ def parsefile():
|
|||
</root>
|
||||
>>> tree = ET.parse(SIMPLE_NS_XMLFILE)
|
||||
>>> normalize_crlf(tree)
|
||||
>>> tree.write(sys.stdout)
|
||||
>>> tree.write(sys.stdout, encoding='unicode')
|
||||
<ns0:root xmlns:ns0="namespace">
|
||||
<ns0:element key="value">text</ns0:element>
|
||||
<ns0:element>text</ns0:element>tail
|
||||
|
@ -636,17 +636,17 @@ def parsefile():
|
|||
def parseliteral():
|
||||
"""
|
||||
>>> element = ET.XML("<html><body>text</body></html>")
|
||||
>>> ET.ElementTree(element).write(sys.stdout)
|
||||
>>> ET.ElementTree(element).write(sys.stdout, encoding='unicode')
|
||||
<html><body>text</body></html>
|
||||
>>> element = ET.fromstring("<html><body>text</body></html>")
|
||||
>>> ET.ElementTree(element).write(sys.stdout)
|
||||
>>> ET.ElementTree(element).write(sys.stdout, encoding='unicode')
|
||||
<html><body>text</body></html>
|
||||
>>> sequence = ["<html><body>", "text</bo", "dy></html>"]
|
||||
>>> element = ET.fromstringlist(sequence)
|
||||
>>> print(ET.tostring(element))
|
||||
<html><body>text</body></html>
|
||||
>>> print("".join(ET.tostringlist(element)))
|
||||
<html><body>text</body></html>
|
||||
b'<html><body>text</body></html>'
|
||||
>>> print(b"".join(ET.tostringlist(element)))
|
||||
b'<html><body>text</body></html>'
|
||||
>>> ET.tostring(element, "ascii")
|
||||
b"<?xml version='1.0' encoding='ascii'?>\\n<html><body>text</body></html>"
|
||||
>>> _, ids = ET.XMLID("<html><body>text</body></html>")
|
||||
|
@ -875,10 +875,10 @@ def writestring():
|
|||
"""
|
||||
>>> elem = ET.XML("<html><body>text</body></html>")
|
||||
>>> ET.tostring(elem)
|
||||
'<html><body>text</body></html>'
|
||||
b'<html><body>text</body></html>'
|
||||
>>> elem = ET.fromstring("<html><body>text</body></html>")
|
||||
>>> ET.tostring(elem)
|
||||
'<html><body>text</body></html>'
|
||||
b'<html><body>text</body></html>'
|
||||
"""
|
||||
|
||||
def check_encoding(encoding):
|
||||
|
@ -1233,14 +1233,14 @@ def processinginstruction():
|
|||
Test ProcessingInstruction directly
|
||||
|
||||
>>> ET.tostring(ET.ProcessingInstruction('test', 'instruction'))
|
||||
'<?test instruction?>'
|
||||
b'<?test instruction?>'
|
||||
>>> ET.tostring(ET.PI('test', 'instruction'))
|
||||
'<?test instruction?>'
|
||||
b'<?test instruction?>'
|
||||
|
||||
Issue #2746
|
||||
|
||||
>>> ET.tostring(ET.PI('test', '<testing&>'))
|
||||
'<?test <testing&>?>'
|
||||
b'<?test <testing&>?>'
|
||||
>>> ET.tostring(ET.PI('test', '<testing&>\xe3'), 'latin1')
|
||||
b"<?xml version='1.0' encoding='latin1'?>\\n<?test <testing&>\\xe3?>"
|
||||
"""
|
||||
|
@ -1643,11 +1643,11 @@ def bug_200708_newline():
|
|||
|
||||
>>> e = ET.Element('SomeTag', text="def _f():\n return 3\n")
|
||||
>>> ET.tostring(e)
|
||||
'<SomeTag text="def _f(): return 3 " />'
|
||||
b'<SomeTag text="def _f(): return 3 " />'
|
||||
>>> ET.XML(ET.tostring(e)).get("text")
|
||||
'def _f():\n return 3\n'
|
||||
>>> ET.tostring(ET.XML(ET.tostring(e)))
|
||||
'<SomeTag text="def _f(): return 3 " />'
|
||||
b'<SomeTag text="def _f(): return 3 " />'
|
||||
|
||||
"""
|
||||
|
||||
|
@ -1698,15 +1698,15 @@ def bug_200709_register_namespace():
|
|||
"""
|
||||
|
||||
>>> ET.tostring(ET.Element("{http://namespace.invalid/does/not/exist/}title"))
|
||||
'<ns0:title xmlns:ns0="http://namespace.invalid/does/not/exist/" />'
|
||||
b'<ns0:title xmlns:ns0="http://namespace.invalid/does/not/exist/" />'
|
||||
>>> ET.register_namespace("foo", "http://namespace.invalid/does/not/exist/")
|
||||
>>> ET.tostring(ET.Element("{http://namespace.invalid/does/not/exist/}title"))
|
||||
'<foo:title xmlns:foo="http://namespace.invalid/does/not/exist/" />'
|
||||
b'<foo:title xmlns:foo="http://namespace.invalid/does/not/exist/" />'
|
||||
|
||||
And the Dublin Core namespace is in the default list:
|
||||
|
||||
>>> ET.tostring(ET.Element("{http://purl.org/dc/elements/1.1/}title"))
|
||||
'<dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" />'
|
||||
b'<dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" />'
|
||||
|
||||
"""
|
||||
|
||||
|
@ -1792,7 +1792,7 @@ def check_issue3151():
|
|||
'{${stuff}}localname'
|
||||
>>> t = ET.ElementTree(e)
|
||||
>>> ET.tostring(e)
|
||||
'<ns0:localname xmlns:ns0="${stuff}" />'
|
||||
b'<ns0:localname xmlns:ns0="${stuff}" />'
|
||||
|
||||
"""
|
||||
|
||||
|
|
|
@ -792,12 +792,13 @@ class ElementTree:
|
|||
# @def write(file, **options)
|
||||
# @param file A file name, or a file object opened for writing.
|
||||
# @param **options Options, given as keyword arguments.
|
||||
# @keyparam encoding Optional output encoding (default is None).
|
||||
# @keyparam encoding Optional output encoding (default is US-ASCII).
|
||||
# Use "unicode" to return a Unicode string.
|
||||
# @keyparam method Optional output method ("xml", "html", "text" or
|
||||
# "c14n"; default is "xml").
|
||||
# @keyparam xml_declaration Controls if an XML declaration should
|
||||
# be added to the file. Use False for never, True for always,
|
||||
# None for only if not US-ASCII or UTF-8. None is default.
|
||||
# None for only if not US-ASCII or UTF-8 or Unicode. None is default.
|
||||
|
||||
def write(self, file_or_filename,
|
||||
# keyword arguments
|
||||
|
@ -811,14 +812,23 @@ class ElementTree:
|
|||
elif method not in _serialize:
|
||||
# FIXME: raise an ImportError for c14n if ElementC14N is missing?
|
||||
raise ValueError("unknown method %r" % method)
|
||||
if not encoding:
|
||||
if method == "c14n":
|
||||
encoding = "utf-8"
|
||||
else:
|
||||
encoding = "us-ascii"
|
||||
elif encoding == str: # lxml.etree compatibility.
|
||||
encoding = "unicode"
|
||||
else:
|
||||
encoding = encoding.lower()
|
||||
if hasattr(file_or_filename, "write"):
|
||||
file = file_or_filename
|
||||
else:
|
||||
if encoding:
|
||||
if encoding != "unicode":
|
||||
file = open(file_or_filename, "wb")
|
||||
else:
|
||||
file = open(file_or_filename, "w")
|
||||
if encoding:
|
||||
if encoding != "unicode":
|
||||
def write(text):
|
||||
try:
|
||||
return file.write(text.encode(encoding,
|
||||
|
@ -827,20 +837,15 @@ class ElementTree:
|
|||
_raise_serialization_error(text)
|
||||
else:
|
||||
write = file.write
|
||||
if not encoding:
|
||||
if method == "c14n":
|
||||
encoding = "utf-8"
|
||||
else:
|
||||
encoding = None
|
||||
elif xml_declaration or (xml_declaration is None and
|
||||
encoding not in ("utf-8", "us-ascii")):
|
||||
if method == "xml":
|
||||
encoding_ = encoding
|
||||
if not encoding:
|
||||
# Retrieve the default encoding for the xml declaration
|
||||
import locale
|
||||
encoding_ = locale.getpreferredencoding()
|
||||
write("<?xml version='1.0' encoding='%s'?>\n" % encoding_)
|
||||
if method == "xml" and (xml_declaration or
|
||||
(xml_declaration is None and
|
||||
encoding not in ("utf-8", "us-ascii", "unicode"))):
|
||||
declared_encoding = encoding
|
||||
if encoding == "unicode":
|
||||
# Retrieve the default encoding for the xml declaration
|
||||
import locale
|
||||
declared_encoding = locale.getpreferredencoding()
|
||||
write("<?xml version='1.0' encoding='%s'?>\n" % declared_encoding)
|
||||
if method == "text":
|
||||
_serialize_text(write, self._root)
|
||||
else:
|
||||
|
@ -1127,11 +1132,12 @@ def _escape_attrib_html(text):
|
|||
|
||||
##
|
||||
# Generates a string representation of an XML element, including all
|
||||
# subelements. If encoding is None, the return type is a string;
|
||||
# subelements. If encoding is "unicode", the return type is a string;
|
||||
# otherwise it is a bytes array.
|
||||
#
|
||||
# @param element An Element instance.
|
||||
# @keyparam encoding Optional output encoding (default is None).
|
||||
# @keyparam encoding Optional output encoding (default is US-ASCII).
|
||||
# Use "unicode" to return a Unicode string.
|
||||
# @keyparam method Optional output method ("xml", "html", "text" or
|
||||
# "c14n"; default is "xml").
|
||||
# @return An (optionally) encoded string containing the XML data.
|
||||
|
@ -1144,17 +1150,20 @@ def tostring(element, encoding=None, method=None):
|
|||
file = dummy()
|
||||
file.write = data.append
|
||||
ElementTree(element).write(file, encoding, method=method)
|
||||
if encoding:
|
||||
return b"".join(data)
|
||||
else:
|
||||
if encoding in (str, "unicode"):
|
||||
return "".join(data)
|
||||
else:
|
||||
return b"".join(data)
|
||||
|
||||
##
|
||||
# Generates a string representation of an XML element, including all
|
||||
# subelements. The string is returned as a sequence of string fragments.
|
||||
# subelements. If encoding is False, the string is returned as a
|
||||
# sequence of string fragments; otherwise it is a sequence of
|
||||
# bytestrings.
|
||||
#
|
||||
# @param element An Element instance.
|
||||
# @keyparam encoding Optional output encoding (default is US-ASCII).
|
||||
# Use "unicode" to return a Unicode string.
|
||||
# @keyparam method Optional output method ("xml", "html", "text" or
|
||||
# "c14n"; default is "xml").
|
||||
# @return A sequence object containing the XML data.
|
||||
|
@ -1184,7 +1193,7 @@ def dump(elem):
|
|||
# debugging
|
||||
if not isinstance(elem, ElementTree):
|
||||
elem = ElementTree(elem)
|
||||
elem.write(sys.stdout)
|
||||
elem.write(sys.stdout, encoding="unicode")
|
||||
tail = elem.getroot().tail
|
||||
if not tail or tail[-1] != "\n":
|
||||
sys.stdout.write("\n")
|
||||
|
|
|
@ -55,6 +55,9 @@ Extensions
|
|||
Library
|
||||
-------
|
||||
|
||||
- Issue #8047: Fix the xml.etree serializer to return bytes by default. Use
|
||||
``encoding="unicode"`` to generate a Unicode string.
|
||||
|
||||
- Fix Issue8280 - urllib2's Request method will remove fragements in the url.
|
||||
This is how it is supposed to work, wget and curl do the same. Previous
|
||||
behavior was wrong.
|
||||
|
|
Loading…
Reference in New Issue