From 30c223cff53610ac07d575f19a5bd8dbc3715b72 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 24 May 2010 21:48:07 +0000 Subject: [PATCH] Merged revisions 81504 via svnmerge from svn+ssh://pythondev@svn.python.org/python/branches/py3k ................ r81504 | victor.stinner | 2010-05-24 23:46:25 +0200 (lun., 24 mai 2010) | 13 lines Recorded merge of revisions 81500-81501 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk ........ r81500 | victor.stinner | 2010-05-24 23:33:24 +0200 (lun., 24 mai 2010) | 2 lines Issue #6662: Fix parsing of malformatted charref (&#bad;) ........ r81501 | victor.stinner | 2010-05-24 23:37:28 +0200 (lun., 24 mai 2010) | 2 lines Add the author of the last fix (Issue #6662) ........ ................ --- Lib/html/parser.py | 3 +++ Lib/test/test_htmlparser.py | 7 +++++++ Misc/ACKS | 1 + Misc/NEWS | 3 +++ 4 files changed, 14 insertions(+) diff --git a/Lib/html/parser.py b/Lib/html/parser.py index 83a58258c7a..c2c7f6bf5da 100644 --- a/Lib/html/parser.py +++ b/Lib/html/parser.py @@ -175,6 +175,9 @@ class HTMLParser(_markupbase.ParserBase): i = self.updatepos(i, k) continue else: + if ";" in rawdata[i:]: #bail by consuming &# + self.handle_data(rawdata[0:2]) + i = self.updatepos(i, 2) break elif startswith('&', i): match = entityref.match(rawdata, i) diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py index dd74aac09b2..e982218dba1 100755 --- a/Lib/test/test_htmlparser.py +++ b/Lib/test/test_htmlparser.py @@ -136,6 +136,13 @@ text ("data", "\n"), ]) + def test_malformatted_charref(self): + self._run_check("

&#bad;

", [ + ("starttag", "p", []), + ("data", "&#bad;"), + ("endtag", "p"), + ]) + def test_unclosed_entityref(self): self._run_check("&entityref foo", [ ("entityref", "entityref"), diff --git a/Misc/ACKS b/Misc/ACKS index 838f6f073f1..459e21640df 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -846,3 +846,4 @@ Siebren van der Zee Uwe Zessin Tarek Ziadé Peter Åstrand +Fredrik Håård diff --git a/Misc/NEWS b/Misc/NEWS index 27d16aa42eb..2b5b7919a78 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -54,6 +54,9 @@ C-API Library ------- +- Issue #6662: Fix parsing of malformatted charref (&#bad;), patch written by + Fredrik Håård + - Issue #6268: Fix seek() method of codecs.open(), don't read or write the BOM twice after seek(0). Fix also reset() method of codecs, UTF-16, UTF-32 and StreamWriter classes.