mirror of https://github.com/python/cpython
#13987: HTMLParser is now able to handle malformed start tags.
This commit is contained in:
parent
d2307cb48a
commit
65d36dab4d
|
@ -315,8 +315,8 @@ class HTMLParser(markupbase.ParserBase):
|
|||
- self.__starttag_text.rfind("\n")
|
||||
else:
|
||||
offset = offset + len(self.__starttag_text)
|
||||
self.error("junk characters in start tag: %r"
|
||||
% (rawdata[k:endpos][:20],))
|
||||
self.handle_data(rawdata[i:endpos])
|
||||
return endpos
|
||||
if end.endswith('/>'):
|
||||
# XHTML-style empty tag: <span attr="value" />
|
||||
self.handle_startendtag(tag, attrs)
|
||||
|
@ -353,8 +353,10 @@ class HTMLParser(markupbase.ParserBase):
|
|||
# end of input in or before attribute value, or we have the
|
||||
# '/' from a '/>' ending
|
||||
return -1
|
||||
self.updatepos(i, j)
|
||||
self.error("malformed start tag")
|
||||
if j > i:
|
||||
return j
|
||||
else:
|
||||
return i + 1
|
||||
raise AssertionError("we should not get here!")
|
||||
|
||||
# Internal -- parse endtag, return end or -1 if incomplete
|
||||
|
|
|
@ -206,7 +206,8 @@ text
|
|||
self._run_check("</$>", [('comment', '$')])
|
||||
self._run_check("</", [('data', '</')])
|
||||
self._run_check("</a", [('data', '</a')])
|
||||
self._parse_error("<a<a>")
|
||||
# XXX this might be wrong
|
||||
self._run_check("<a<a>", [('data', '<a'), ('starttag', 'a', [])])
|
||||
self._run_check("</a<a>", [('endtag', 'a<a')])
|
||||
self._run_check("<!", [('data', '<!')])
|
||||
self._run_check("<a", [('data', '<a')])
|
||||
|
|
Loading…
Reference in New Issue