From 0b0b5f0279683b6b48762cf11df05a0f1ffc1bbc Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 31 Jan 1997 18:57:23 +0000 Subject: [PATCH] Spin off checking of external page in a subroutine. Increase MAXPAGE to 150K. Add back printing of __doc__ for usage message. --- Tools/webchecker/webchecker.py | 37 ++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/Tools/webchecker/webchecker.py b/Tools/webchecker/webchecker.py index c4548616848..9e676caf015 100755 --- a/Tools/webchecker/webchecker.py +++ b/Tools/webchecker/webchecker.py @@ -121,7 +121,7 @@ import robotparser # Tunable parameters DEFROOT = "file:/usr/local/etc/httpd/htdocs/" # Default root URL -MAXPAGE = 50000 # Ignore files bigger than this +MAXPAGE = 150000 # Ignore files bigger than this ROUNDSIZE = 50 # Number of links processed per round DUMPFILE = "@webchecker.pickle" # Pickled checkpoint AGENTNAME = "webchecker" # Agent name for robots.txt parser @@ -145,6 +145,7 @@ def main(): except getopt.error, msg: sys.stdout = sys.stderr print msg + print __doc__%globals() sys.exit(2) for o, a in opts: if o == '-R': @@ -314,22 +315,24 @@ class Checker: for url in urls: if verbose > 0: show("HREF ", url, " from", self.ext[url]) - if not checkext: - continue - if url[:7] == 'mailto:': - if verbose > 2: print "Not checking", url - continue - if verbose > 2: print "Checking", url, "..." - try: - f = self.urlopener.open(url) - safeclose(f) - if verbose > 3: print "OK" - if self.bad.has_key(url): - self.setgood(url) - except IOError, msg: - msg = sanitize(msg) - if verbose > 0: print "Error", msg - self.setbad(url, msg) + if checkext: + self.checkextpage(url) + + def checkextpage(self, url): + if url[:7] == 'mailto:' or url[:5] == 'news:': + if verbose > 2: print "Not checking", url + return + if verbose > 2: print "Checking", url, "..." + try: + f = self.urlopener.open(url) + safeclose(f) + if verbose > 3: print "OK" + if self.bad.has_key(url): + self.setgood(url) + except IOError, msg: + msg = sanitize(msg) + if verbose > 0: print "Error", msg + self.setbad(url, msg) def report_errors(self): if not self.bad: