bpo:41989 Fix htmlparser "unclosed script tag causes data loss"

When calling .close() the htmlparser should flush all remaining content,
even when that content is in an unclosed script or style tag.
This commit is contained in:
Waylan Limberg 2020-10-11 21:03:33 -04:00
parent 47ecfd8030
commit 3c5c943339
2 changed files with 11 additions and 1 deletions

View File

@ -241,7 +241,7 @@ class HTMLParser(_markupbase.ParserBase):
else:
assert 0, "interesting.search() lied"
# end while
if end and i < n and not self.cdata_elem:
if end and i < n:
if self.convert_charrefs and not self.cdata_elem:
self.handle_data(unescape(rawdata[i:n]))
else:

View File

@ -315,6 +315,16 @@ text
("endtag", element_lower)],
collector=Collector(convert_charrefs=False))
def test_EOF_in_cdata(self):
content = """<!-- not a comment --> &not-an-entity-ref;
<a href="" /> </p><p> <span></span></style>
'</script' + '>'"""
s = f'<script>{content}'
self._run_check(s, [
("starttag", 'script', []),
("data", content)
])
def test_comments(self):
html = ("<!-- I'm a valid comment -->"
'<!--me too!-->'