Merged revisions 83209 via svnmerge from

svn+ssh://pythondev@svn.python.org/python/branches/py3k

........
  r83209 | senthil.kumaran | 2010-07-28 21:57:56 +0530 (Wed, 28 Jul 2010) | 3 lines

  Fix Issue6325 - robotparse to honor urls with query strings.
........
This commit is contained in:
Senthil Kumaran 2010-07-28 16:35:35 +00:00
parent 946963fdc5
commit a4f79f97db
2 changed files with 17 additions and 1 deletions

View File

@ -131,7 +131,12 @@ class RobotFileParser:
return True return True
# search for given user agent matches # search for given user agent matches
# the first match counts # the first match counts
url = urllib.quote(urlparse.urlparse(urllib.unquote(url))[2]) or "/" parsed_url = urlparse.urlparse(urllib.unquote(url))
url = urlparse.urlunparse(('', '', parsed_url.path,
parsed_url.params, parsed_url.query, parsed_url.fragment))
url = urllib.quote(url)
if not url:
url = "/"
for entry in self.entries: for entry in self.entries:
if entry.applies_to(useragent): if entry.applies_to(useragent):
return entry.allowance(url) return entry.allowance(url)

View File

@ -202,6 +202,17 @@ bad = ['/folder1/anotherfile.html']
RobotTest(13, doc, good, bad, agent="googlebot") RobotTest(13, doc, good, bad, agent="googlebot")
# 14. For issue #6325 (query string support)
doc = """
User-agent: *
Disallow: /some/path?name=value
"""
good = ['/some/path']
bad = ['/some/path?name=value']
RobotTest(14, doc, good, bad)
class NetworkTestCase(unittest.TestCase): class NetworkTestCase(unittest.TestCase):