#17403: urllib.parse.robotparser normalizes the urls before adding to ruleline.

This helps in handling certain types invalid urls in a conservative manner.
This commit is contained in:
Senthil Kumaran 2013-05-29 05:54:31 -07:00
parent eb4c9c77b8
commit c70a6ae49b
3 changed files with 17 additions and 0 deletions

View File

@ -234,6 +234,18 @@ bad = ['/some/path']
RobotTest(15, doc, good, bad)
# 16. Empty query (issue #17403). Normalizing the url first.
doc = """
User-agent: *
Allow: /some/path?
Disallow: /another/path?
"""
good = ['/some/path?']
bad = ['/another/path?']
RobotTest(16, doc, good, bad)
class NetworkTestCase(unittest.TestCase):

View File

@ -157,6 +157,7 @@ class RuleLine:
if path == '' and not allowance:
# an empty value means allow all
allowance = True
path = urllib.parse.urlunparse(urllib.parse.urlparse(path))
self.path = urllib.parse.quote(path)
self.allowance = allowance

View File

@ -24,6 +24,10 @@ Core and Builtins
Library
-------
- Issue #17403: urllib.parse.robotparser normalizes the urls before adding to
ruleline. This helps in handling certain types invalid urls in a conservative
manner.
- Issue #18025: Fixed a segfault in io.BufferedIOBase.readinto() when raw
stream's read() returns more bytes than requested.