Issue #15296: Fix minidom.toxml/toprettyxml for non-unicode encodings. Patch by Serhiy Storchaka, with some minor style adjustments by me.
This commit is contained in:
parent
b674dcf53e
commit
8a80502d2c
|
@ -147,12 +147,7 @@ module documentation. This section lists the differences between the API and
|
|||
the DOM node.
|
||||
|
||||
With an explicit *encoding* [1]_ argument, the result is a byte
|
||||
string in the specified encoding. It is recommended that you
|
||||
always specify an encoding; you may use any encoding you like, but
|
||||
an argument of "utf-8" is the most common choice, avoiding
|
||||
:exc:`UnicodeError` exceptions in case of unrepresentable text
|
||||
data.
|
||||
|
||||
string in the specified encoding.
|
||||
With no *encoding* argument, the result is a Unicode string, and the
|
||||
XML declaration in the resulting string does not specify an
|
||||
encoding. Encoding this string in an encoding other than UTF-8 is
|
||||
|
|
|
@ -1067,6 +1067,11 @@ class MinidomTest(unittest.TestCase):
|
|||
b'<?xml version="1.0" encoding="utf-8"?><foo>\xe2\x82\xac</foo>')
|
||||
self.assertEqual(doc.toxml('iso-8859-15'),
|
||||
b'<?xml version="1.0" encoding="iso-8859-15"?><foo>\xa4</foo>')
|
||||
self.assertEqual(doc.toxml('us-ascii'),
|
||||
b'<?xml version="1.0" encoding="us-ascii"?><foo>€</foo>')
|
||||
self.assertEqual(doc.toxml('utf-16'),
|
||||
'<?xml version="1.0" encoding="utf-16"?>'
|
||||
'<foo>\u20ac</foo>'.encode('utf-16'))
|
||||
|
||||
# Verify that character decoding errors throw exceptions instead
|
||||
# of crashing
|
||||
|
|
|
@ -14,7 +14,6 @@ Todo:
|
|||
* SAX 2 namespaces
|
||||
"""
|
||||
|
||||
import codecs
|
||||
import io
|
||||
import xml.dom
|
||||
|
||||
|
@ -47,19 +46,22 @@ class Node(xml.dom.Node):
|
|||
return self.toprettyxml("", "", encoding)
|
||||
|
||||
def toprettyxml(self, indent="\t", newl="\n", encoding=None):
|
||||
# indent = the indentation string to prepend, per level
|
||||
# newl = the newline string to append
|
||||
use_encoding = "utf-8" if encoding is None else encoding
|
||||
writer = codecs.getwriter(use_encoding)(io.BytesIO())
|
||||
if encoding is None:
|
||||
writer = io.StringIO()
|
||||
else:
|
||||
writer = io.TextIOWrapper(io.BytesIO(),
|
||||
encoding=encoding,
|
||||
errors="xmlcharrefreplace",
|
||||
newline='\n')
|
||||
if self.nodeType == Node.DOCUMENT_NODE:
|
||||
# Can pass encoding only to document, to put it into XML header
|
||||
self.writexml(writer, "", indent, newl, encoding)
|
||||
else:
|
||||
self.writexml(writer, "", indent, newl)
|
||||
if encoding is None:
|
||||
return writer.stream.getvalue().decode(use_encoding)
|
||||
return writer.getvalue()
|
||||
else:
|
||||
return writer.stream.getvalue()
|
||||
return writer.detach().getvalue()
|
||||
|
||||
def hasChildNodes(self):
|
||||
return bool(self.childNodes)
|
||||
|
@ -1788,12 +1790,12 @@ class Document(Node, DocumentLS):
|
|||
raise xml.dom.NotSupportedErr("cannot import document type nodes")
|
||||
return _clone_node(node, deep, self)
|
||||
|
||||
def writexml(self, writer, indent="", addindent="", newl="",
|
||||
encoding = None):
|
||||
def writexml(self, writer, indent="", addindent="", newl="", encoding=None):
|
||||
if encoding is None:
|
||||
writer.write('<?xml version="1.0" ?>'+newl)
|
||||
else:
|
||||
writer.write('<?xml version="1.0" encoding="%s"?>%s' % (encoding, newl))
|
||||
writer.write('<?xml version="1.0" encoding="%s"?>%s' % (
|
||||
encoding, newl))
|
||||
for node in self.childNodes:
|
||||
node.writexml(writer, indent, addindent, newl)
|
||||
|
||||
|
|
Loading…
Reference in New Issue