Catch I/O errors when parsing robots.txt file.

Add version number, printed at startup in non-quited mode.
This commit is contained in:
Guido van Rossum 1997-01-30 03:30:20 +00:00
parent df47bafa1c
commit 325a64f207
1 changed files with 14 additions and 6 deletions

View File

@ -93,6 +93,8 @@ rooturl -- URL to start checking
""" """
__version__ = "0.1"
import sys import sys
import os import os
@ -135,7 +137,6 @@ def main():
except getopt.error, msg: except getopt.error, msg:
sys.stdout = sys.stderr sys.stdout = sys.stderr
print msg print msg
print __doc__ % globals()
sys.exit(2) sys.exit(2)
for o, a in opts: for o, a in opts:
if o == '-R': if o == '-R':
@ -151,6 +152,9 @@ def main():
if o == '-v': if o == '-v':
verbose = verbose + 1 verbose = verbose + 1
if verbose:
print AGENTNAME, "version", __version__
if restart: if restart:
if verbose > 0: if verbose > 0:
print "Loading checkpoint from %s ..." % dumpfile print "Loading checkpoint from %s ..." % dumpfile
@ -234,13 +238,17 @@ class Checker:
self.addrobot(root) self.addrobot(root)
def addrobot(self, root): def addrobot(self, root):
self.robots[root] = rp = robotparser.RobotFileParser()
if verbose > 3:
print "Parsing robots.txt file"
rp.debug = 1
url = urlparse.urljoin(root, "/robots.txt") url = urlparse.urljoin(root, "/robots.txt")
self.robots[root] = rp = robotparser.RobotFileParser()
if verbose > 2:
print "Parsing", url
rp.debug = 1
rp.set_url(url) rp.set_url(url)
rp.read() try:
rp.read()
except IOError, msg:
if verbose > 1:
print "I/O error parsing", url, ":", msg
def run(self): def run(self):
while self.todo: while self.todo: