Issue #15296: Fix minidom.toxml/toprettyxml for non-unicode encodings. Patch by Serhiy Storchaka, with some minor style adjustments by me.

This commit is contained in:
Eli Bendersky 2012-07-13 09:52:39 +03:00
parent b674dcf53e
commit 8a80502d2c
3 changed files with 18 additions and 16 deletions

View File

@ -147,12 +147,7 @@ module documentation. This section lists the differences between the API and
the DOM node.
With an explicit *encoding* [1]_ argument, the result is a byte
string in the specified encoding. It is recommended that you
always specify an encoding; you may use any encoding you like, but
an argument of "utf-8" is the most common choice, avoiding
:exc:`UnicodeError` exceptions in case of unrepresentable text
data.
string in the specified encoding.
With no *encoding* argument, the result is a Unicode string, and the
XML declaration in the resulting string does not specify an
encoding. Encoding this string in an encoding other than UTF-8 is

View File

@ -1067,6 +1067,11 @@ class MinidomTest(unittest.TestCase):
b'<?xml version="1.0" encoding="utf-8"?><foo>\xe2\x82\xac</foo>')
self.assertEqual(doc.toxml('iso-8859-15'),
b'<?xml version="1.0" encoding="iso-8859-15"?><foo>\xa4</foo>')
self.assertEqual(doc.toxml('us-ascii'),
b'<?xml version="1.0" encoding="us-ascii"?><foo>&#8364;</foo>')
self.assertEqual(doc.toxml('utf-16'),
'<?xml version="1.0" encoding="utf-16"?>'
'<foo>\u20ac</foo>'.encode('utf-16'))
# Verify that character decoding errors throw exceptions instead
# of crashing

View File

@ -14,7 +14,6 @@ Todo:
* SAX 2 namespaces
"""
import codecs
import io
import xml.dom
@ -47,19 +46,22 @@ class Node(xml.dom.Node):
return self.toprettyxml("", "", encoding)
def toprettyxml(self, indent="\t", newl="\n", encoding=None):
# indent = the indentation string to prepend, per level
# newl = the newline string to append
use_encoding = "utf-8" if encoding is None else encoding
writer = codecs.getwriter(use_encoding)(io.BytesIO())
if encoding is None:
writer = io.StringIO()
else:
writer = io.TextIOWrapper(io.BytesIO(),
encoding=encoding,
errors="xmlcharrefreplace",
newline='\n')
if self.nodeType == Node.DOCUMENT_NODE:
# Can pass encoding only to document, to put it into XML header
self.writexml(writer, "", indent, newl, encoding)
else:
self.writexml(writer, "", indent, newl)
if encoding is None:
return writer.stream.getvalue().decode(use_encoding)
return writer.getvalue()
else:
return writer.stream.getvalue()
return writer.detach().getvalue()
def hasChildNodes(self):
return bool(self.childNodes)
@ -1788,12 +1790,12 @@ class Document(Node, DocumentLS):
raise xml.dom.NotSupportedErr("cannot import document type nodes")
return _clone_node(node, deep, self)
def writexml(self, writer, indent="", addindent="", newl="",
encoding = None):
def writexml(self, writer, indent="", addindent="", newl="", encoding=None):
if encoding is None:
writer.write('<?xml version="1.0" ?>'+newl)
else:
writer.write('<?xml version="1.0" encoding="%s"?>%s' % (encoding, newl))
writer.write('<?xml version="1.0" encoding="%s"?>%s' % (
encoding, newl))
for node in self.childNodes:
node.writexml(writer, indent, addindent, newl)