mirror of https://github.com/python/cpython
When bad HTML is encountered, ignore the page rather than failing with
a traceback.
This commit is contained in:
parent
05595e9d73
commit
ce56c377a0
|
@ -400,7 +400,15 @@ class Checker:
|
||||||
if local_fragment and self.nonames:
|
if local_fragment and self.nonames:
|
||||||
self.markdone(url_pair)
|
self.markdone(url_pair)
|
||||||
return
|
return
|
||||||
page = self.getpage(url_pair)
|
try:
|
||||||
|
page = self.getpage(url_pair)
|
||||||
|
except sgmllib.SGMLParseError, msg:
|
||||||
|
msg = self.sanitize(msg)
|
||||||
|
self.note(0, "Error parsing %s: %s",
|
||||||
|
self.format_url(url_pair), msg)
|
||||||
|
# Dont actually mark the URL as bad - it exists, just
|
||||||
|
# we can't parse it!
|
||||||
|
page = None
|
||||||
if page:
|
if page:
|
||||||
# Store the page which corresponds to this URL.
|
# Store the page which corresponds to this URL.
|
||||||
self.name_table[url] = page
|
self.name_table[url] = page
|
||||||
|
|
Loading…
Reference in New Issue