Issue #15296: Fix minidom.toxml/toprettyxml for non-unicode encodings. Patch by Serhiy Storchaka, with some minor style adjustments by me.

2012-07-13 09:52:39 +03:00 · 2012-07-13 09:52:39 +03:00 · 8a80502d2c
parent b674dcf53e
commit 8a80502d2c
3 changed files with 18 additions and 16 deletions
--- a/Doc/library/xml.dom.minidom.rst
+++ b/Doc/library/xml.dom.minidom.rst
@ -147,12 +147,7 @@ module documentation.  This section lists the differences between the API and
   the DOM node.

   With an explicit *encoding* [1]_ argument, the result is a byte
-   string in the specified encoding.  It is recommended that you
-   always specify an encoding; you may use any encoding you like, but
-   an argument of "utf-8" is the most common choice, avoiding
-   :exc:`UnicodeError` exceptions in case of unrepresentable text
-   data.
-
+   string in the specified encoding.
   With no *encoding* argument, the result is a Unicode string, and the
   XML declaration in the resulting string does not specify an
   encoding. Encoding this string in an encoding other than UTF-8 is
--- a/Lib/test/test_minidom.py
+++ b/Lib/test/test_minidom.py
@ -1067,6 +1067,11 @@ class MinidomTest(unittest.TestCase):
            b'<?xml version="1.0" encoding="utf-8"?><foo>\xe2\x82\xac</foo>')
        self.assertEqual(doc.toxml('iso-8859-15'),
            b'<?xml version="1.0" encoding="iso-8859-15"?><foo>\xa4</foo>')
+        self.assertEqual(doc.toxml('us-ascii'),
+            b'<?xml version="1.0" encoding="us-ascii"?><foo>&#8364;</foo>')
+        self.assertEqual(doc.toxml('utf-16'),
+            '<?xml version="1.0" encoding="utf-16"?>'
+            '<foo>\u20ac</foo>'.encode('utf-16'))

        # Verify that character decoding errors throw exceptions instead
        # of crashing
--- a/Lib/xml/dom/minidom.py
+++ b/Lib/xml/dom/minidom.py
@ -14,7 +14,6 @@ Todo:
 * SAX 2 namespaces
 """

-import codecs
 import io
 import xml.dom

@ -47,19 +46,22 @@ class Node(xml.dom.Node):
        return self.toprettyxml("", "", encoding)

    def toprettyxml(self, indent="\t", newl="\n", encoding=None):
-        # indent = the indentation string to prepend, per level
-        # newl = the newline string to append
-        use_encoding = "utf-8" if encoding is None else encoding
-        writer = codecs.getwriter(use_encoding)(io.BytesIO())
+        if encoding is None:
+            writer = io.StringIO()
+        else:
+            writer = io.TextIOWrapper(io.BytesIO(),
+                                      encoding=encoding,
+                                      errors="xmlcharrefreplace",
+                                      newline='\n')
        if self.nodeType == Node.DOCUMENT_NODE:
            # Can pass encoding only to document, to put it into XML header
            self.writexml(writer, "", indent, newl, encoding)
        else:
            self.writexml(writer, "", indent, newl)
        if encoding is None:
-            return writer.stream.getvalue().decode(use_encoding)
+            return writer.getvalue()
        else:
-            return writer.stream.getvalue()
+            return writer.detach().getvalue()

    def hasChildNodes(self):
        return bool(self.childNodes)
@ -1788,12 +1790,12 @@ class Document(Node, DocumentLS):
            raise xml.dom.NotSupportedErr("cannot import document type nodes")
        return _clone_node(node, deep, self)

-    def writexml(self, writer, indent="", addindent="", newl="",
-                 encoding = None):
+    def writexml(self, writer, indent="", addindent="", newl="", encoding=None):
        if encoding is None:
            writer.write('<?xml version="1.0" ?>'+newl)
        else:
-            writer.write('<?xml version="1.0" encoding="%s"?>%s' % (encoding, newl))
+            writer.write('<?xml version="1.0" encoding="%s"?>%s' % (
+                encoding, newl))
        for node in self.childNodes:
            node.writexml(writer, indent, addindent, newl)