Fix Issue6325 - robotparse to honor urls with query strings.
This commit is contained in:
parent
96a60ae90c
commit
3f8ab965f7
|
@ -205,6 +205,17 @@ bad = ['/folder1/anotherfile.html']
|
|||
RobotTest(13, doc, good, bad, agent="googlebot")
|
||||
|
||||
|
||||
# 14. For issue #6325 (query string support)
|
||||
doc = """
|
||||
User-agent: *
|
||||
Disallow: /some/path?name=value
|
||||
"""
|
||||
|
||||
good = ['/some/path']
|
||||
bad = ['/some/path?name=value']
|
||||
|
||||
RobotTest(14, doc, good, bad)
|
||||
|
||||
|
||||
class NetworkTestCase(unittest.TestCase):
|
||||
|
||||
|
|
|
@ -129,8 +129,10 @@ class RobotFileParser:
|
|||
return True
|
||||
# search for given user agent matches
|
||||
# the first match counts
|
||||
url = urllib.parse.quote(
|
||||
urllib.parse.urlparse(urllib.parse.unquote(url))[2])
|
||||
parsed_url = urllib.parse.urlparse(urllib.parse.unquote(url))
|
||||
url = urllib.parse.urlunparse(('','',parsed_url.path,
|
||||
parsed_url.params,parsed_url.query, parsed_url.fragment))
|
||||
url = urllib.parse.quote(url)
|
||||
if not url:
|
||||
url = "/"
|
||||
for entry in self.entries:
|
||||
|
|
Loading…
Reference in New Issue