From 91d5193b3ad208f359107512ff12a416c9dbec3b Mon Sep 17 00:00:00 2001 From: Florent Xicluna Date: Tue, 1 Nov 2011 23:31:09 +0100 Subject: [PATCH] Closes #2892: preserve iterparse events in case of SyntaxError. --- Lib/test/test_xml_etree.py | 1 + Lib/xml/etree/ElementTree.py | 42 +++++++++++++++++++++--------------- Misc/NEWS | 2 ++ Modules/_elementtree.c | 42 +++++++++++++++++++++--------------- 4 files changed, 53 insertions(+), 34 deletions(-) diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 22fafa9a2e6..b7a996c29ca 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -754,6 +754,7 @@ def iterparse(): ... print(action, elem.tag) ... except ET.ParseError as v: ... print(v) + end document junk after document element: line 1, column 12 """ diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index ba338797841..f94c48c09c7 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -1250,6 +1250,7 @@ class _IterParseIterator: self._close_file = close_source self._events = [] self._index = 0 + self._error = None self.root = self._root = None self._parser = parser # wire up the parser for event reporting @@ -1291,24 +1292,31 @@ class _IterParseIterator: while 1: try: item = self._events[self._index] - except IndexError: - if self._parser is None: - self.root = self._root - if self._close_file: - self._file.close() - raise StopIteration - # load event buffer - del self._events[:] - self._index = 0 - data = self._file.read(16384) - if data: - self._parser.feed(data) - else: - self._root = self._parser.close() - self._parser = None - else: - self._index = self._index + 1 + self._index += 1 return item + except IndexError: + pass + if self._error: + e = self._error + self._error = None + raise e + if self._parser is None: + self.root = self._root + if self._close_file: + self._file.close() + raise StopIteration + # load event buffer + del self._events[:] + self._index = 0 + data = self._file.read(16384) + if data: + try: + self._parser.feed(data) + except SyntaxError as exc: + self._error = exc + else: + self._root = self._parser.close() + self._parser = None def __iter__(self): return self diff --git a/Misc/NEWS b/Misc/NEWS index 657c80290d1..9e7a96cc2f2 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -66,6 +66,8 @@ Core and Builtins Library ------- +- Issue #2892: preserve iterparse events in case of SyntaxError. + - Issue #670664: Fix HTMLParser to correctly handle the content of ```` and ````. diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index 3061d8eaf3f..788772113c4 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -3000,6 +3000,7 @@ PyInit__elementtree(void) " self._file = file\n" " self._events = []\n" " self._index = 0\n" + " self._error = None\n" " self.root = self._root = None\n" " b = cElementTree.TreeBuilder()\n" " self._parser = cElementTree.XMLParser(b)\n" @@ -3008,24 +3009,31 @@ PyInit__elementtree(void) " while 1:\n" " try:\n" " item = self._events[self._index]\n" - " except IndexError:\n" - " if self._parser is None:\n" - " self.root = self._root\n" - " if self._close_file:\n" - " self._file.close()\n" - " raise StopIteration\n" - " # load event buffer\n" - " del self._events[:]\n" - " self._index = 0\n" - " data = self._file.read(16384)\n" - " if data:\n" - " self._parser.feed(data)\n" - " else:\n" - " self._root = self._parser.close()\n" - " self._parser = None\n" - " else:\n" - " self._index = self._index + 1\n" + " self._index += 1\n" " return item\n" + " except IndexError:\n" + " pass\n" + " if self._error:\n" + " e = self._error\n" + " self._error = None\n" + " raise e\n" + " if self._parser is None:\n" + " self.root = self._root\n" + " if self._close_file:\n" + " self._file.close()\n" + " raise StopIteration\n" + " # load event buffer\n" + " del self._events[:]\n" + " self._index = 0\n" + " data = self._file.read(16384)\n" + " if data:\n" + " try:\n" + " self._parser.feed(data)\n" + " except SyntaxError as exc:\n" + " self._error = exc\n" + " else:\n" + " self._root = self._parser.close()\n" + " self._parser = None\n" " def __iter__(self):\n" " return self\n" "cElementTree.iterparse = iterparse\n"