Fix an index, add more tests, avoid raising errors for unknown declarations, and clean up comments.

This commit is contained in:
Ezio Melotti 2012-02-13 20:36:55 +02:00
parent ef18737b7f
commit 369cbd744e
2 changed files with 27 additions and 2 deletions

View File

@ -229,12 +229,13 @@ class HTMLParser(markupbase.ParserBase):
if rawdata[i:i+2] != '<!':
self.error('unexpected call to parse_html_declaration()')
if rawdata[i:i+4] == '<!--':
# this case is actually already handled in goahead()
return self.parse_comment(i)
elif rawdata[i:i+3] == '<![':
return self.parse_marked_section(i)
elif rawdata[i:i+9].lower() == '<!doctype':
# find the closing >
gtpos = rawdata.find('>', 9)
gtpos = rawdata.find('>', i+9)
if gtpos == -1:
return -1
self.handle_decl(rawdata[i+2:gtpos])
@ -427,7 +428,7 @@ class HTMLParser(markupbase.ParserBase):
pass
def unknown_decl(self, data):
self.error("unknown declaration: %r" % (data,))
pass
# Internal -- helper to remove special character quoting
entitydefs = None

View File

@ -215,6 +215,30 @@ text
self._parse_error("<a foo='>'")
self._parse_error("<a foo='>")
def test_valid_doctypes(self):
# from http://www.w3.org/QA/2002/04/valid-dtd-list.html
dtds = ['HTML', # HTML5 doctype
('HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" '
'"http://www.w3.org/TR/html4/strict.dtd"'),
('HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" '
'"http://www.w3.org/TR/html4/loose.dtd"'),
('html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" '
'"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"'),
('html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN" '
'"http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd"'),
('math PUBLIC "-//W3C//DTD MathML 2.0//EN" '
'"http://www.w3.org/Math/DTD/mathml2/mathml2.dtd"'),
('html PUBLIC "-//W3C//DTD '
'XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN" '
'"http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd"'),
('svg PUBLIC "-//W3C//DTD SVG 1.1//EN" '
'"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"'),
'html PUBLIC "-//IETF//DTD HTML 2.0//EN"',
'html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN"']
for dtd in dtds:
self._run_check("<!DOCTYPE %s>" % dtd,
[('decl', 'DOCTYPE ' + dtd)])
def test_declaration_junk_chars(self):
self._run_check("<!DOCTYPE foo $ >", [('decl', 'DOCTYPE foo $ ')])