From f996e775eaf22e6a6465e640a6de46ea74011bc0 Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Fri, 16 Mar 2012 05:53:30 +0200 Subject: [PATCH] Closes Issue #14246: _elementtree parser will now handle io.StringIO --- Lib/test/test_xml_etree.py | 14 ++++++++++++++ Modules/_elementtree.c | 23 ++++++++++++++++++++++- 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index fedf550a549..97fc6909e20 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -16,6 +16,7 @@ import sys import html +import io import unittest from test import support @@ -2026,6 +2027,18 @@ class ElementSlicingTest(unittest.TestCase): del e[::2] self.assertEqual(self._subelem_tags(e), ['a1']) + +class StringIOTest(unittest.TestCase): + def test_read_from_stringio(self): + tree = ET.ElementTree() + stream = io.StringIO() + stream.write('''''') + stream.seek(0) + tree.parse(stream) + + self.assertEqual(tree.getroot().tag, 'site') + + # -------------------------------------------------------------------- @@ -2077,6 +2090,7 @@ def test_main(module=pyET): test_classes = [ ElementSlicingTest, + StringIOTest, ElementTreeTest, TreeBuilderTest] if module is pyET: diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index ba37cd7b7a7..99935b92dc0 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -2682,6 +2682,7 @@ xmlparser_parse(XMLParserObject* self, PyObject* args) PyObject* reader; PyObject* buffer; + PyObject* temp; PyObject* res; PyObject* fileobj; @@ -2703,7 +2704,27 @@ xmlparser_parse(XMLParserObject* self, PyObject* args) return NULL; } - if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) { + if (PyUnicode_CheckExact(buffer)) { + /* A unicode object is encoded into bytes using UTF-8 */ + if (PyUnicode_GET_SIZE(buffer) == 0) { + Py_DECREF(buffer); + break; + } + temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass"); + if (!temp) { + /* Propagate exception from PyUnicode_AsEncodedString */ + Py_DECREF(buffer); + Py_DECREF(reader); + return NULL; + } + + /* Here we no longer need the original buffer since it contains + * unicode. Make it point to the encoded bytes object. + */ + Py_DECREF(buffer); + buffer = temp; + } + else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) { Py_DECREF(buffer); break; }