From ce56c377a0f548cdac3ab9c66117df654f934484 Mon Sep 17 00:00:00 2001 From: Mark Hammond Date: Thu, 27 Feb 2003 06:59:10 +0000 Subject: [PATCH] When bad HTML is encountered, ignore the page rather than failing with a traceback. --- Tools/webchecker/webchecker.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/Tools/webchecker/webchecker.py b/Tools/webchecker/webchecker.py index e8d0ed746fe..e89529e5cf3 100755 --- a/Tools/webchecker/webchecker.py +++ b/Tools/webchecker/webchecker.py @@ -400,7 +400,15 @@ class Checker: if local_fragment and self.nonames: self.markdone(url_pair) return - page = self.getpage(url_pair) + try: + page = self.getpage(url_pair) + except sgmllib.SGMLParseError, msg: + msg = self.sanitize(msg) + self.note(0, "Error parsing %s: %s", + self.format_url(url_pair), msg) + # Dont actually mark the URL as bad - it exists, just + # we can't parse it! + page = None if page: # Store the page which corresponds to this URL. self.name_table[url] = page