Issue #25047: Merge Element Tree encoding from 3.4 into 3.5

This commit is contained in:
Martin Panter 2015-09-23 01:43:08 +00:00
commit 982a08f8bb
3 changed files with 22 additions and 12 deletions

View File

@ -2396,14 +2396,21 @@ class IOTest(unittest.TestCase):
elem = ET.Element("tag")
elem.text = "abc"
self.assertEqual(serialize(elem), '<tag>abc</tag>')
self.assertEqual(serialize(elem, encoding="utf-8"),
b'<tag>abc</tag>')
self.assertEqual(serialize(elem, encoding="us-ascii"),
b'<tag>abc</tag>')
for enc in ("utf-8", "us-ascii"):
with self.subTest(enc):
self.assertEqual(serialize(elem, encoding=enc),
b'<tag>abc</tag>')
self.assertEqual(serialize(elem, encoding=enc.upper()),
b'<tag>abc</tag>')
for enc in ("iso-8859-1", "utf-16", "utf-32"):
self.assertEqual(serialize(elem, encoding=enc),
("<?xml version='1.0' encoding='%s'?>\n"
"<tag>abc</tag>" % enc).encode(enc))
with self.subTest(enc):
self.assertEqual(serialize(elem, encoding=enc),
("<?xml version='1.0' encoding='%s'?>\n"
"<tag>abc</tag>" % enc).encode(enc))
upper = enc.upper()
self.assertEqual(serialize(elem, encoding=upper),
("<?xml version='1.0' encoding='%s'?>\n"
"<tag>abc</tag>" % upper).encode(enc))
elem = ET.Element("tag")
elem.text = "<&\"\'>"

View File

@ -752,14 +752,13 @@ class ElementTree:
encoding = "utf-8"
else:
encoding = "us-ascii"
else:
encoding = encoding.lower()
with _get_writer(file_or_filename, encoding) as write:
enc_lower = encoding.lower()
with _get_writer(file_or_filename, enc_lower) as write:
if method == "xml" and (xml_declaration or
(xml_declaration is None and
encoding not in ("utf-8", "us-ascii", "unicode"))):
enc_lower not in ("utf-8", "us-ascii", "unicode"))):
declared_encoding = encoding
if encoding == "unicode":
if enc_lower == "unicode":
# Retrieve the default encoding for the xml declaration
import locale
declared_encoding = locale.getpreferredencoding()

View File

@ -18,6 +18,10 @@ Core and Builtins
Library
-------
- Issue #25047: The XML encoding declaration written by Element Tree now
respects the letter case given by the user. This restores the ability to
write encoding names in uppercase like "UTF-8", which worked in Python 2.
- Issue #19143: platform module now reads Windows version from kernel32.dll to
avoid compatibility shims.