Issue #1470548: XMLGenerator now works with UTF-16 and UTF-32 encodings.

This commit is contained in:
Serhiy Storchaka 2013-02-10 14:26:08 +02:00
parent 0dd3d309ab
commit f898038ca0
3 changed files with 124 additions and 47 deletions

View File

@ -14,6 +14,7 @@ from xml.sax.expatreader import create_parser
from xml.sax.handler import feature_namespaces
from xml.sax.xmlreader import InputSource, AttributesImpl, AttributesNSImpl
from cStringIO import StringIO
import io
import os.path
import shutil
import test.test_support as support
@ -170,9 +171,9 @@ class SaxutilsTest(unittest.TestCase):
start = '<?xml version="1.0" encoding="iso-8859-1"?>\n'
class XmlgenTest(unittest.TestCase):
class XmlgenTest:
def test_xmlgen_basic(self):
result = StringIO()
result = self.ioclass()
gen = XMLGenerator(result)
gen.startDocument()
gen.startElement("doc", {})
@ -182,7 +183,7 @@ class XmlgenTest(unittest.TestCase):
self.assertEqual(result.getvalue(), start + "<doc></doc>")
def test_xmlgen_content(self):
result = StringIO()
result = self.ioclass()
gen = XMLGenerator(result)
gen.startDocument()
@ -194,7 +195,7 @@ class XmlgenTest(unittest.TestCase):
self.assertEqual(result.getvalue(), start + "<doc>huhei</doc>")
def test_xmlgen_pi(self):
result = StringIO()
result = self.ioclass()
gen = XMLGenerator(result)
gen.startDocument()
@ -206,7 +207,7 @@ class XmlgenTest(unittest.TestCase):
self.assertEqual(result.getvalue(), start + "<?test data?><doc></doc>")
def test_xmlgen_content_escape(self):
result = StringIO()
result = self.ioclass()
gen = XMLGenerator(result)
gen.startDocument()
@ -219,7 +220,7 @@ class XmlgenTest(unittest.TestCase):
start + "<doc>&lt;huhei&amp;</doc>")
def test_xmlgen_attr_escape(self):
result = StringIO()
result = self.ioclass()
gen = XMLGenerator(result)
gen.startDocument()
@ -238,8 +239,41 @@ class XmlgenTest(unittest.TestCase):
"<e a=\"'&quot;\"></e>"
"<e a=\"&#10;&#13;&#9;\"></e></doc>"))
def test_xmlgen_encoding(self):
encodings = ('iso-8859-15', 'utf-8',
'utf-16be', 'utf-16le',
'utf-32be', 'utf-32le')
for encoding in encodings:
result = self.ioclass()
gen = XMLGenerator(result, encoding=encoding)
gen.startDocument()
gen.startElement("doc", {"a": u'\u20ac'})
gen.characters(u"\u20ac")
gen.endElement("doc")
gen.endDocument()
self.assertEqual(result.getvalue(), (
u'<?xml version="1.0" encoding="%s"?>\n'
u'<doc a="\u20ac">\u20ac</doc>' % encoding
).encode(encoding, 'xmlcharrefreplace'))
def test_xmlgen_unencodable(self):
result = self.ioclass()
gen = XMLGenerator(result, encoding='ascii')
gen.startDocument()
gen.startElement("doc", {"a": u'\u20ac'})
gen.characters(u"\u20ac")
gen.endElement("doc")
gen.endDocument()
self.assertEqual(result.getvalue(),
'<?xml version="1.0" encoding="ascii"?>\n'
'<doc a="&#8364;">&#8364;</doc>')
def test_xmlgen_ignorable(self):
result = StringIO()
result = self.ioclass()
gen = XMLGenerator(result)
gen.startDocument()
@ -251,7 +285,7 @@ class XmlgenTest(unittest.TestCase):
self.assertEqual(result.getvalue(), start + "<doc> </doc>")
def test_xmlgen_ns(self):
result = StringIO()
result = self.ioclass()
gen = XMLGenerator(result)
gen.startDocument()
@ -269,7 +303,7 @@ class XmlgenTest(unittest.TestCase):
ns_uri))
def test_1463026_1(self):
result = StringIO()
result = self.ioclass()
gen = XMLGenerator(result)
gen.startDocument()
@ -280,7 +314,7 @@ class XmlgenTest(unittest.TestCase):
self.assertEqual(result.getvalue(), start+'<a b="c"></a>')
def test_1463026_2(self):
result = StringIO()
result = self.ioclass()
gen = XMLGenerator(result)
gen.startDocument()
@ -293,7 +327,7 @@ class XmlgenTest(unittest.TestCase):
self.assertEqual(result.getvalue(), start+'<a xmlns="qux"></a>')
def test_1463026_3(self):
result = StringIO()
result = self.ioclass()
gen = XMLGenerator(result)
gen.startDocument()
@ -321,7 +355,7 @@ class XmlgenTest(unittest.TestCase):
parser = make_parser()
parser.setFeature(feature_namespaces, True)
result = StringIO()
result = self.ioclass()
gen = XMLGenerator(result)
parser.setContentHandler(gen)
parser.parse(test_xml)
@ -340,7 +374,7 @@ class XmlgenTest(unittest.TestCase):
#
# This test demonstrates the bug by direct manipulation of the
# XMLGenerator.
result = StringIO()
result = self.ioclass()
gen = XMLGenerator(result)
gen.startDocument()
@ -360,6 +394,29 @@ class XmlgenTest(unittest.TestCase):
'<a:g2 xml:lang="en">Hello</a:g2>'
'</a:g1>'))
def test_no_close_file(self):
result = self.ioclass()
def func(out):
gen = XMLGenerator(out)
gen.startDocument()
gen.startElement("doc", {})
func(result)
self.assertFalse(result.closed)
class StringXmlgenTest(XmlgenTest, unittest.TestCase):
ioclass = StringIO
class BytesIOXmlgenTest(XmlgenTest, unittest.TestCase):
ioclass = io.BytesIO
class WriterXmlgenTest(XmlgenTest, unittest.TestCase):
class ioclass(list):
write = list.append
closed = False
def getvalue(self):
return b''.join(self)
class XMLFilterBaseTest(unittest.TestCase):
def test_filter_basic(self):
@ -804,7 +861,9 @@ class XmlReaderTest(XmlTestBase):
def test_main():
run_unittest(MakeParserTest,
SaxutilsTest,
XmlgenTest,
StringXmlgenTest,
BytesIOXmlgenTest,
WriterXmlgenTest,
ExpatReaderTest,
ErrorReportingTest,
XmlReaderTest)

View File

@ -4,6 +4,7 @@ convenience of application and driver writers.
"""
import os, urlparse, urllib, types
import io
import sys
import handler
import xmlreader
@ -13,15 +14,6 @@ try:
except AttributeError:
_StringTypes = [types.StringType]
# See whether the xmlcharrefreplace error handler is
# supported
try:
from codecs import xmlcharrefreplace_errors
_error_handling = "xmlcharrefreplace"
del xmlcharrefreplace_errors
except ImportError:
_error_handling = "strict"
def __dict_replace(s, d):
"""Replace substrings of a string using a dictionary."""
for key, value in d.items():
@ -82,25 +74,46 @@ def quoteattr(data, entities={}):
return data
def _gettextwriter(out, encoding):
if out is None:
import sys
out = sys.stdout
if isinstance(out, io.RawIOBase):
buffer = io.BufferedIOBase(out)
# Keep the original file open when the TextIOWrapper is
# destroyed
buffer.close = lambda: None
else:
# This is to handle passed objects that aren't in the
# IOBase hierarchy, but just have a write method
buffer = io.BufferedIOBase()
buffer.writable = lambda: True
buffer.write = out.write
try:
# TextIOWrapper uses this methods to determine
# if BOM (for UTF-16, etc) should be added
buffer.seekable = out.seekable
buffer.tell = out.tell
except AttributeError:
pass
# wrap a binary writer with TextIOWrapper
return io.TextIOWrapper(buffer, encoding=encoding,
errors='xmlcharrefreplace',
newline='\n')
class XMLGenerator(handler.ContentHandler):
def __init__(self, out=None, encoding="iso-8859-1"):
if out is None:
import sys
out = sys.stdout
handler.ContentHandler.__init__(self)
self._out = out
out = _gettextwriter(out, encoding)
self._write = out.write
self._flush = out.flush
self._ns_contexts = [{}] # contains uri -> prefix dicts
self._current_context = self._ns_contexts[-1]
self._undeclared_ns_maps = []
self._encoding = encoding
def _write(self, text):
if isinstance(text, str):
self._out.write(text)
else:
self._out.write(text.encode(self._encoding, _error_handling))
def _qname(self, name):
"""Builds a qualified name from a (ns_url, localname) pair"""
if name[0]:
@ -121,9 +134,12 @@ class XMLGenerator(handler.ContentHandler):
# ContentHandler methods
def startDocument(self):
self._write('<?xml version="1.0" encoding="%s"?>\n' %
self._write(u'<?xml version="1.0" encoding="%s"?>\n' %
self._encoding)
def endDocument(self):
self._flush()
def startPrefixMapping(self, prefix, uri):
self._ns_contexts.append(self._current_context.copy())
self._current_context[uri] = prefix
@ -134,39 +150,39 @@ class XMLGenerator(handler.ContentHandler):
del self._ns_contexts[-1]
def startElement(self, name, attrs):
self._write('<' + name)
self._write(u'<' + name)
for (name, value) in attrs.items():
self._write(' %s=%s' % (name, quoteattr(value)))
self._write('>')
self._write(u' %s=%s' % (name, quoteattr(value)))
self._write(u'>')
def endElement(self, name):
self._write('</%s>' % name)
self._write(u'</%s>' % name)
def startElementNS(self, name, qname, attrs):
self._write('<' + self._qname(name))
self._write(u'<' + self._qname(name))
for prefix, uri in self._undeclared_ns_maps:
if prefix:
self._out.write(' xmlns:%s="%s"' % (prefix, uri))
self._write(u' xmlns:%s="%s"' % (prefix, uri))
else:
self._out.write(' xmlns="%s"' % uri)
self._write(u' xmlns="%s"' % uri)
self._undeclared_ns_maps = []
for (name, value) in attrs.items():
self._write(' %s=%s' % (self._qname(name), quoteattr(value)))
self._write('>')
self._write(u' %s=%s' % (self._qname(name), quoteattr(value)))
self._write(u'>')
def endElementNS(self, name, qname):
self._write('</%s>' % self._qname(name))
self._write(u'</%s>' % self._qname(name))
def characters(self, content):
self._write(escape(content))
self._write(escape(unicode(content)))
def ignorableWhitespace(self, content):
self._write(content)
self._write(unicode(content))
def processingInstruction(self, target, data):
self._write('<?%s %s?>' % (target, data))
self._write(u'<?%s %s?>' % (target, data))
class XMLFilterBase(xmlreader.XMLReader):

View File

@ -202,6 +202,8 @@ Core and Builtins
Library
-------
- Issue #1470548: XMLGenerator now works with UTF-16 and UTF-32 encodings.
- Issue #6975: os.path.realpath() now correctly resolves multiple nested
symlinks on POSIX platforms.