Closes Issue #14246: _elementtree parser will now handle io.StringIO

This commit is contained in:
Eli Bendersky 2012-03-16 05:53:30 +02:00
parent e53d977e80
commit f996e775ea
2 changed files with 36 additions and 1 deletions

View File

@ -16,6 +16,7 @@
import sys import sys
import html import html
import io
import unittest import unittest
from test import support from test import support
@ -2026,6 +2027,18 @@ class ElementSlicingTest(unittest.TestCase):
del e[::2] del e[::2]
self.assertEqual(self._subelem_tags(e), ['a1']) self.assertEqual(self._subelem_tags(e), ['a1'])
class StringIOTest(unittest.TestCase):
def test_read_from_stringio(self):
tree = ET.ElementTree()
stream = io.StringIO()
stream.write('''<?xml version="1.0"?><site></site>''')
stream.seek(0)
tree.parse(stream)
self.assertEqual(tree.getroot().tag, 'site')
# -------------------------------------------------------------------- # --------------------------------------------------------------------
@ -2077,6 +2090,7 @@ def test_main(module=pyET):
test_classes = [ test_classes = [
ElementSlicingTest, ElementSlicingTest,
StringIOTest,
ElementTreeTest, ElementTreeTest,
TreeBuilderTest] TreeBuilderTest]
if module is pyET: if module is pyET:

View File

@ -2682,6 +2682,7 @@ xmlparser_parse(XMLParserObject* self, PyObject* args)
PyObject* reader; PyObject* reader;
PyObject* buffer; PyObject* buffer;
PyObject* temp;
PyObject* res; PyObject* res;
PyObject* fileobj; PyObject* fileobj;
@ -2703,7 +2704,27 @@ xmlparser_parse(XMLParserObject* self, PyObject* args)
return NULL; return NULL;
} }
if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) { if (PyUnicode_CheckExact(buffer)) {
/* A unicode object is encoded into bytes using UTF-8 */
if (PyUnicode_GET_SIZE(buffer) == 0) {
Py_DECREF(buffer);
break;
}
temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
if (!temp) {
/* Propagate exception from PyUnicode_AsEncodedString */
Py_DECREF(buffer);
Py_DECREF(reader);
return NULL;
}
/* Here we no longer need the original buffer since it contains
* unicode. Make it point to the encoded bytes object.
*/
Py_DECREF(buffer);
buffer = temp;
}
else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
Py_DECREF(buffer); Py_DECREF(buffer);
break; break;
} }