When bad HTML is encountered, ignore the page rather than failing with

a traceback.
2003-02-27 06:59:10 +00:00 · 2003-02-27 06:59:10 +00:00 · ce56c377a0
parent 05595e9d73
commit ce56c377a0
1 changed files with 9 additions and 1 deletions
--- a/Tools/webchecker/webchecker.py
+++ b/Tools/webchecker/webchecker.py
@ -400,7 +400,15 @@ class Checker:
        if local_fragment and self.nonames:
            self.markdone(url_pair)
            return
-        page = self.getpage(url_pair)
+        try:
+            page = self.getpage(url_pair)
+        except sgmllib.SGMLParseError, msg:
+            msg = self.sanitize(msg)
+            self.note(0, "Error parsing %s: %s",
+                          self.format_url(url_pair), msg)
+            # Dont actually mark the URL as bad - it exists, just
+            # we can't parse it!
+            page = None
        if page:
            # Store the page which corresponds to this URL.
            self.name_table[url] = page