diff --git a/Lib/HTMLParser.py b/Lib/HTMLParser.py index 2cbc2ecbc73..7cee47a7c5d 100644 --- a/Lib/HTMLParser.py +++ b/Lib/HTMLParser.py @@ -175,6 +175,9 @@ class HTMLParser(markupbase.ParserBase): i = self.updatepos(i, k) continue else: + if ";" in rawdata[i:]: #bail by consuming &# + self.handle_data(rawdata[0:2]) + i = self.updatepos(i, 2) break elif startswith('&', i): match = entityref.match(rawdata, i) diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py index 810af6c8cbc..c45cf00ecea 100755 --- a/Lib/test/test_htmlparser.py +++ b/Lib/test/test_htmlparser.py @@ -313,6 +313,13 @@ DOCTYPE html [ ("starttag", "html", [("foo", u"\u20AC&aa&unsupported;")]) ]) + def test_malformatted_charref(self): + self._run_check("

&#bad;

", [ + ("starttag", "p", []), + ("data", "&#bad;"), + ("endtag", "p"), + ]) + def test_main(): test_support.run_unittest(HTMLParserTestCase) diff --git a/Misc/ACKS b/Misc/ACKS index efaa20ffe34..94a22a860ec 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -191,7 +191,7 @@ Luke Dunstan Andy Dustman Gary Duzan Eugene Dvurechenski -Josip Dzolonga +Josip Dzolonga Maxim Dzumanenko Walter Dörwald Hans Eckardt @@ -812,3 +812,4 @@ Uwe Zessin Tarek ZiadŽ Peter Åstrand Jesse Noller +Fredrik Håård diff --git a/Misc/NEWS b/Misc/NEWS index badd19af6e9..eba95a01f28 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -55,6 +55,9 @@ C-API Library ------- +- Issue #6662: Fix parsing of malformatted charref (&#bad;), patch written by + Fredrik Håård + - Issue #1628205: Socket file objects returned by socket.socket.makefile() now properly handles EINTR within the read, readline, write & flush methods. The socket.sendall() method now properly handles interrupted system calls.