From 0965ee213e079a84686a6cfd1ada125f0ff8d575 Mon Sep 17 00:00:00 2001 From: Florent Xicluna Date: Tue, 1 Nov 2011 23:34:41 +0100 Subject: [PATCH] Issue #2892: preserve iterparse events in case of SyntaxError --- Lib/test/test_xml_etree.py | 1 + Lib/xml/etree/ElementTree.py | 42 +++++++++++++++++++++--------------- Modules/_elementtree.c | 42 +++++++++++++++++++++--------------- 3 files changed, 51 insertions(+), 34 deletions(-) diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 26e8eab796f..1b66a8988f4 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -739,6 +739,7 @@ def iterparse(): ... except ET.ParseError, v: ... print v junk after document element: line 1, column 12 + end document """ def writefile(): diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index 19862e2d34b..2b0cf0c7833 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -1210,6 +1210,7 @@ class _IterParseIterator(object): self._close_file = close_source self._events = [] self._index = 0 + self._error = None self.root = self._root = None self._parser = parser # wire up the parser for event reporting @@ -1255,24 +1256,31 @@ class _IterParseIterator(object): while 1: try: item = self._events[self._index] - except IndexError: - if self._parser is None: - self.root = self._root - if self._close_file: - self._file.close() - raise StopIteration - # load event buffer - del self._events[:] - self._index = 0 - data = self._file.read(16384) - if data: - self._parser.feed(data) - else: - self._root = self._parser.close() - self._parser = None - else: - self._index = self._index + 1 + self._index += 1 return item + except IndexError: + pass + if self._error: + e = self._error + self._error = None + raise e + if self._parser is None: + self.root = self._root + if self._close_file: + self._file.close() + raise StopIteration + # load event buffer + del self._events[:] + self._index = 0 + data = self._file.read(16384) + if data: + try: + self._parser.feed(data) + except SyntaxError as exc: + self._error = exc + else: + self._root = self._parser.close() + self._parser = None def __iter__(self): return self diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index c368e1456bd..379aa01d0a5 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -2970,6 +2970,7 @@ init_elementtree(void) " self._file = file\n" " self._events = []\n" " self._index = 0\n" + " self._error = None\n" " self.root = self._root = None\n" " b = cElementTree.TreeBuilder()\n" " self._parser = cElementTree.XMLParser(b)\n" @@ -2978,24 +2979,31 @@ init_elementtree(void) " while 1:\n" " try:\n" " item = self._events[self._index]\n" - " except IndexError:\n" - " if self._parser is None:\n" - " self.root = self._root\n" - " if self._close_file:\n" - " self._file.close()\n" - " raise StopIteration\n" - " # load event buffer\n" - " del self._events[:]\n" - " self._index = 0\n" - " data = self._file.read(16384)\n" - " if data:\n" - " self._parser.feed(data)\n" - " else:\n" - " self._root = self._parser.close()\n" - " self._parser = None\n" - " else:\n" - " self._index = self._index + 1\n" + " self._index += 1\n" " return item\n" + " except IndexError:\n" + " pass\n" + " if self._error:\n" + " e = self._error\n" + " self._error = None\n" + " raise e\n" + " if self._parser is None:\n" + " self.root = self._root\n" + " if self._close_file:\n" + " self._file.close()\n" + " raise StopIteration\n" + " # load event buffer\n" + " del self._events[:]\n" + " self._index = 0\n" + " data = self._file.read(16384)\n" + " if data:\n" + " try:\n" + " self._parser.feed(data)\n" + " except SyntaxError as exc:\n" + " self._error = exc\n" + " else:\n" + " self._root = self._parser.close()\n" + " self._parser = None\n" " def __iter__(self):\n" " return self\n" "cElementTree.iterparse = iterparse\n"