From 3c5c94333959ccca84a3fbc17403cfeee4e72a44 Mon Sep 17 00:00:00 2001 From: Waylan Limberg Date: Sun, 11 Oct 2020 21:03:33 -0400 Subject: [PATCH] bpo:41989 Fix htmlparser "unclosed script tag causes data loss" When calling .close() the htmlparser should flush all remaining content, even when that content is in an unclosed script or style tag. --- Lib/html/parser.py | 2 +- Lib/test/test_htmlparser.py | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/Lib/html/parser.py b/Lib/html/parser.py index 60830779816..87a04899738 100644 --- a/Lib/html/parser.py +++ b/Lib/html/parser.py @@ -241,7 +241,7 @@ class HTMLParser(_markupbase.ParserBase): else: assert 0, "interesting.search() lied" # end while - if end and i < n and not self.cdata_elem: + if end and i < n: if self.convert_charrefs and not self.cdata_elem: self.handle_data(unescape(rawdata[i:n])) else: diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py index a2bfb39d16a..f6ed20901b7 100644 --- a/Lib/test/test_htmlparser.py +++ b/Lib/test/test_htmlparser.py @@ -315,6 +315,16 @@ text ("endtag", element_lower)], collector=Collector(convert_charrefs=False)) + def test_EOF_in_cdata(self): + content = """ ¬-an-entity-ref; +

+ ''""" + s = f'