Implement the encoding argument for toxml and toprettyxml.

Document toprettyxml.
This commit is contained in:
Martin v. Löwis 2002-06-30 15:05:00 +00:00
parent 2ebfd09e58
commit 7d650ca83b
5 changed files with 68 additions and 9 deletions

View File

@ -121,10 +121,45 @@ children of that node.
Write XML to the writer object. The writer should have a
\method{write()} method which matches that of the file object
interface.
\versionadded[To support pretty output, new keyword parameters indent,
addindent, and newl have been added]{2.1}
\versionadded[For the \class{Document} node, an additional keyword
argument encoding can be used to specify the encoding field of the XML
header]{2.3}
\end{methoddesc}
\begin{methoddesc}{toxml}{}
\begin{methoddesc}{toxml}{\optional{encoding}}
Return the XML that the DOM represents as a string.
\versionadded[the \var{encoding} argument]{2.3}
With no argument, the XML header does not specify an encoding, and the
result is Unicode string if the default encoding cannot represent all
characters in the document. Encoding this string in an encoding other
than UTF-8 is likely incorrect, since UTF-8 is the default encoding of
XML.
With an explicit \var{encoding} argument, the result is a byte string
in the specified encoding. It is recommended that this argument is
always specified. To avoid UnicodeError exceptions in case of
unrepresentable text data, the encoding argument should be specified
as "utf-8".
\end{methoddesc}
\begin{methoddesc}{toprettyxml}{\optional{indent\optional{, newl}}}
Return a pretty-printed version of the document. \var{indent} specifies
the indentation string and defaults to a tabulator; \var{newl} specifies
the string emitted at the end of each line and defaults to \\n.
\versionadded{2.1}
\versionadded[the encoding argument; see \method{toxml}]{2.3}
\end{methoddesc}
The following standard DOM methods have special considerations with

View File

@ -98,6 +98,9 @@ Passed assertion: len(Node.allnodes) == 0
Passed Test
Test Succeeded testElementReprAndStr
Passed assertion: len(Node.allnodes) == 0
Passed testEncodings - encoding EURO SIGN
Test Succeeded testEncodings
Passed assertion: len(Node.allnodes) == 0
Test Succeeded testFirstChild
Passed assertion: len(Node.allnodes) == 0
Test Succeeded testGetAttrLength

View File

@ -606,6 +606,14 @@ def testSAX2DOM():
doc.unlink()
def testEncodings():
doc = parseString('<foo>&#x20ac;</foo>')
confirm(doc.toxml() == u'<?xml version="1.0" ?>\n<foo>\u20ac</foo>'
and doc.toxml('utf-8') == '<?xml version="1.0" encoding="utf-8"?>\n<foo>\xe2\x82\xac</foo>'
and doc.toxml('iso-8859-15') == '<?xml version="1.0" encoding="iso-8859-15"?>\n<foo>\xa4</foo>',
"testEncodings - encoding EURO SIGN")
doc.unlink()
# --- MAIN PROGRAM
names = globals().keys()

View File

@ -65,16 +65,22 @@ class Node(xml.dom.Node):
def __nonzero__(self):
return 1
def toxml(self):
writer = _get_StringIO()
self.writexml(writer)
return writer.getvalue()
def toxml(self, encoding = None):
return self.toprettyxml("", "", encoding)
def toprettyxml(self, indent="\t", newl="\n"):
def toprettyxml(self, indent="\t", newl="\n", encoding = None):
# indent = the indentation string to prepend, per level
# newl = the newline string to append
writer = _get_StringIO()
self.writexml(writer, "", indent, newl)
if encoding is not None:
import codecs
# Can't use codecs.getwriter to preserve 2.0 compatibility
writer = codecs.lookup(encoding)[3](writer)
if self.nodeType == Node.DOCUMENT_NODE:
# Can pass encoding only to document, to put it into XML header
self.writexml(writer, "", indent, newl, encoding)
else:
self.writexml(writer, "", indent, newl)
return writer.getvalue()
def hasChildNodes(self):
@ -934,8 +940,12 @@ class Document(Node):
return _getElementsByTagNameNSHelper(self, namespaceURI, localName,
NodeList())
def writexml(self, writer, indent="", addindent="", newl=""):
writer.write('<?xml version="1.0" ?>\n')
def writexml(self, writer, indent="", addindent="", newl="",
encoding = None):
if encoding is None:
writer.write('<?xml version="1.0" ?>\n')
else:
writer.write('<?xml version="1.0" encoding="%s"?>\n' % encoding)
for node in self.childNodes:
node.writexml(writer, indent, addindent, newl)

View File

@ -170,6 +170,9 @@ Extension modules
Library
- xml.dom.minidom.toxml and toprettyxml now take an optional encoding
argument.
- Some fixes in the copy module: when an object is copied through its
__reduce__ method, there was no check for a __setstate__ method on
the result [SF patch 565085]; deepcopy should treat instances of