Catch I/O errors when parsing robots.txt file.
Add version number, printed at startup in non-quited mode.
This commit is contained in:
parent
df47bafa1c
commit
325a64f207
|
@ -93,6 +93,8 @@ rooturl -- URL to start checking
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
__version__ = "0.1"
|
||||||
|
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
|
@ -135,7 +137,6 @@ def main():
|
||||||
except getopt.error, msg:
|
except getopt.error, msg:
|
||||||
sys.stdout = sys.stderr
|
sys.stdout = sys.stderr
|
||||||
print msg
|
print msg
|
||||||
print __doc__ % globals()
|
|
||||||
sys.exit(2)
|
sys.exit(2)
|
||||||
for o, a in opts:
|
for o, a in opts:
|
||||||
if o == '-R':
|
if o == '-R':
|
||||||
|
@ -151,6 +152,9 @@ def main():
|
||||||
if o == '-v':
|
if o == '-v':
|
||||||
verbose = verbose + 1
|
verbose = verbose + 1
|
||||||
|
|
||||||
|
if verbose:
|
||||||
|
print AGENTNAME, "version", __version__
|
||||||
|
|
||||||
if restart:
|
if restart:
|
||||||
if verbose > 0:
|
if verbose > 0:
|
||||||
print "Loading checkpoint from %s ..." % dumpfile
|
print "Loading checkpoint from %s ..." % dumpfile
|
||||||
|
@ -234,13 +238,17 @@ class Checker:
|
||||||
self.addrobot(root)
|
self.addrobot(root)
|
||||||
|
|
||||||
def addrobot(self, root):
|
def addrobot(self, root):
|
||||||
self.robots[root] = rp = robotparser.RobotFileParser()
|
|
||||||
if verbose > 3:
|
|
||||||
print "Parsing robots.txt file"
|
|
||||||
rp.debug = 1
|
|
||||||
url = urlparse.urljoin(root, "/robots.txt")
|
url = urlparse.urljoin(root, "/robots.txt")
|
||||||
|
self.robots[root] = rp = robotparser.RobotFileParser()
|
||||||
|
if verbose > 2:
|
||||||
|
print "Parsing", url
|
||||||
|
rp.debug = 1
|
||||||
rp.set_url(url)
|
rp.set_url(url)
|
||||||
|
try:
|
||||||
rp.read()
|
rp.read()
|
||||||
|
except IOError, msg:
|
||||||
|
if verbose > 1:
|
||||||
|
print "I/O error parsing", url, ":", msg
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
while self.todo:
|
while self.todo:
|
||||||
|
|
Loading…
Reference in New Issue