bpo:41989 Fix htmlparser "unclosed script tag causes data loss"
When calling .close() the htmlparser should flush all remaining content, even when that content is in an unclosed script or style tag.
This commit is contained in:
parent
47ecfd8030
commit
3c5c943339
|
@ -241,7 +241,7 @@ class HTMLParser(_markupbase.ParserBase):
|
||||||
else:
|
else:
|
||||||
assert 0, "interesting.search() lied"
|
assert 0, "interesting.search() lied"
|
||||||
# end while
|
# end while
|
||||||
if end and i < n and not self.cdata_elem:
|
if end and i < n:
|
||||||
if self.convert_charrefs and not self.cdata_elem:
|
if self.convert_charrefs and not self.cdata_elem:
|
||||||
self.handle_data(unescape(rawdata[i:n]))
|
self.handle_data(unescape(rawdata[i:n]))
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -315,6 +315,16 @@ text
|
||||||
("endtag", element_lower)],
|
("endtag", element_lower)],
|
||||||
collector=Collector(convert_charrefs=False))
|
collector=Collector(convert_charrefs=False))
|
||||||
|
|
||||||
|
def test_EOF_in_cdata(self):
|
||||||
|
content = """<!-- not a comment --> ¬-an-entity-ref;
|
||||||
|
<a href="" /> </p><p> <span></span></style>
|
||||||
|
'</script' + '>'"""
|
||||||
|
s = f'<script>{content}'
|
||||||
|
self._run_check(s, [
|
||||||
|
("starttag", 'script', []),
|
||||||
|
("data", content)
|
||||||
|
])
|
||||||
|
|
||||||
def test_comments(self):
|
def test_comments(self):
|
||||||
html = ("<!-- I'm a valid comment -->"
|
html = ("<!-- I'm a valid comment -->"
|
||||||
'<!--me too!-->'
|
'<!--me too!-->'
|
||||||
|
|
Loading…
Reference in New Issue