mirror of https://github.com/python/cpython
Merged revisions 83209 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/branches/py3k ........ r83209 | senthil.kumaran | 2010-07-28 21:57:56 +0530 (Wed, 28 Jul 2010) | 3 lines Fix Issue6325 - robotparse to honor urls with query strings. ........
This commit is contained in:
parent
946963fdc5
commit
a4f79f97db
|
@ -131,7 +131,12 @@ class RobotFileParser:
|
||||||
return True
|
return True
|
||||||
# search for given user agent matches
|
# search for given user agent matches
|
||||||
# the first match counts
|
# the first match counts
|
||||||
url = urllib.quote(urlparse.urlparse(urllib.unquote(url))[2]) or "/"
|
parsed_url = urlparse.urlparse(urllib.unquote(url))
|
||||||
|
url = urlparse.urlunparse(('', '', parsed_url.path,
|
||||||
|
parsed_url.params, parsed_url.query, parsed_url.fragment))
|
||||||
|
url = urllib.quote(url)
|
||||||
|
if not url:
|
||||||
|
url = "/"
|
||||||
for entry in self.entries:
|
for entry in self.entries:
|
||||||
if entry.applies_to(useragent):
|
if entry.applies_to(useragent):
|
||||||
return entry.allowance(url)
|
return entry.allowance(url)
|
||||||
|
|
|
@ -202,6 +202,17 @@ bad = ['/folder1/anotherfile.html']
|
||||||
RobotTest(13, doc, good, bad, agent="googlebot")
|
RobotTest(13, doc, good, bad, agent="googlebot")
|
||||||
|
|
||||||
|
|
||||||
|
# 14. For issue #6325 (query string support)
|
||||||
|
doc = """
|
||||||
|
User-agent: *
|
||||||
|
Disallow: /some/path?name=value
|
||||||
|
"""
|
||||||
|
|
||||||
|
good = ['/some/path']
|
||||||
|
bad = ['/some/path?name=value']
|
||||||
|
|
||||||
|
RobotTest(14, doc, good, bad)
|
||||||
|
|
||||||
|
|
||||||
class NetworkTestCase(unittest.TestCase):
|
class NetworkTestCase(unittest.TestCase):
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue