#17403: urllib.parse.robotparser normalizes the urls before adding to ruleline.
This helps in handling certain types invalid urls in a conservative manner.
This commit is contained in:
parent
eb4c9c77b8
commit
c70a6ae49b
|
@ -234,6 +234,18 @@ bad = ['/some/path']
|
|||
|
||||
RobotTest(15, doc, good, bad)
|
||||
|
||||
# 16. Empty query (issue #17403). Normalizing the url first.
|
||||
doc = """
|
||||
User-agent: *
|
||||
Allow: /some/path?
|
||||
Disallow: /another/path?
|
||||
"""
|
||||
|
||||
good = ['/some/path?']
|
||||
bad = ['/another/path?']
|
||||
|
||||
RobotTest(16, doc, good, bad)
|
||||
|
||||
|
||||
class NetworkTestCase(unittest.TestCase):
|
||||
|
||||
|
|
|
@ -157,6 +157,7 @@ class RuleLine:
|
|||
if path == '' and not allowance:
|
||||
# an empty value means allow all
|
||||
allowance = True
|
||||
path = urllib.parse.urlunparse(urllib.parse.urlparse(path))
|
||||
self.path = urllib.parse.quote(path)
|
||||
self.allowance = allowance
|
||||
|
||||
|
|
|
@ -24,6 +24,10 @@ Core and Builtins
|
|||
Library
|
||||
-------
|
||||
|
||||
- Issue #17403: urllib.parse.robotparser normalizes the urls before adding to
|
||||
ruleline. This helps in handling certain types invalid urls in a conservative
|
||||
manner.
|
||||
|
||||
- Issue #18025: Fixed a segfault in io.BufferedIOBase.readinto() when raw
|
||||
stream's read() returns more bytes than requested.
|
||||
|
||||
|
|
Loading…
Reference in New Issue