diff --git a/Lib/_markupbase.py b/Lib/_markupbase.py index 98b9037afff..2af5f1c23b6 100644 --- a/Lib/_markupbase.py +++ b/Lib/_markupbase.py @@ -107,6 +107,10 @@ class ParserBase: if decltype == "doctype": self.handle_decl(data) else: + # According to the HTML5 specs sections "8.2.4.44 Bogus + # comment state" and "8.2.4.45 Markup declaration open + # state", a comment token should be emitted. + # Calling unknown_decl provides more flexibility though. self.unknown_decl(data) return j + 1 if c in "\"'": diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py index 87b5060611d..8c2e25e61a6 100644 --- a/Lib/test/test_htmlparser.py +++ b/Lib/test/test_htmlparser.py @@ -323,6 +323,16 @@ DOCTYPE html [ ("endtag", element_lower)], collector=Collector()) + def test_condcoms(self): + html = ('' + '' + '') + expected = [('comment', "[if IE & !(lte IE 8)]>aren'tcondcomspretty?' + html = ('broken condcom' + '' + '' + 'foo' + '') + # According to the HTML5 specs sections "8.2.4.44 Bogus comment state" + # and "8.2.4.45 Markup declaration open state", comment tokens should + # be emitted instead of 'unknown decl', but calling unknown_decl + # provides more flexibility. + # See also Lib/_markupbase.py:parse_declaration + expected = [ + ('unknown decl', 'if !(IE)'), + ('data', 'broken condcom'), + ('unknown decl', 'endif'), + ('unknown decl', 'if ! IE'), + ('startendtag', 'link', [('href', 'favicon.tiff')]), + ('unknown decl', 'endif'), + ('unknown decl', 'if !IE 6'), + ('startendtag', 'img', [('src', 'firefox.png')]), + ('unknown decl', 'endif'), + ('unknown decl', 'if !ie 6'), + ('starttag', 'b', []), + ('data', 'foo'), + ('endtag', 'b'), + ('unknown decl', 'endif'), + ('unknown decl', 'if (!IE)|(lt IE 9)'), + ('startendtag', 'img', [('src', 'mammoth.bmp')]), + ('unknown decl', 'endif') + ] + self._run_check(html, expected) + class AttributesStrictTestCase(TestCaseBase):