When bad HTML is encountered, ignore the page rather than failing with

a traceback.
This commit is contained in:
Mark Hammond 2003-02-27 06:59:10 +00:00
parent 05595e9d73
commit ce56c377a0
1 changed files with 9 additions and 1 deletions

View File

@ -400,7 +400,15 @@ class Checker:
if local_fragment and self.nonames: if local_fragment and self.nonames:
self.markdone(url_pair) self.markdone(url_pair)
return return
page = self.getpage(url_pair) try:
page = self.getpage(url_pair)
except sgmllib.SGMLParseError, msg:
msg = self.sanitize(msg)
self.note(0, "Error parsing %s: %s",
self.format_url(url_pair), msg)
# Dont actually mark the URL as bad - it exists, just
# we can't parse it!
page = None
if page: if page:
# Store the page which corresponds to this URL. # Store the page which corresponds to this URL.
self.name_table[url] = page self.name_table[url] = page