bpo-43882 Remove the newline, and tab early. From query and fragments. (GH-25921)

This commit is contained in:
Senthil Kumaran 2021-05-05 15:50:05 -07:00 committed by GitHub
parent 47895e31b6
commit 985ac01637
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 21 additions and 11 deletions

View File

@ -614,32 +614,40 @@ class UrlParseTestCase(unittest.TestCase):
def test_urlsplit_remove_unsafe_bytes(self):
# Remove ASCII tabs and newlines from input
url = "http://www.python.org/java\nscript:\talert('msg\r\n')/#frag"
url = "http\t://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment"
p = urllib.parse.urlsplit(url)
self.assertEqual(p.scheme, "http")
self.assertEqual(p.netloc, "www.python.org")
self.assertEqual(p.path, "/javascript:alert('msg')/")
self.assertEqual(p.query, "")
self.assertEqual(p.fragment, "frag")
self.assertEqual(p.query, "query=something")
self.assertEqual(p.fragment, "fragment")
self.assertEqual(p.username, None)
self.assertEqual(p.password, None)
self.assertEqual(p.hostname, "www.python.org")
self.assertEqual(p.port, None)
self.assertEqual(p.geturl(), "http://www.python.org/javascript:alert('msg')/#frag")
self.assertEqual(p.geturl(), "http://www.python.org/javascript:alert('msg')/?query=something#fragment")
# Remove ASCII tabs and newlines from input as bytes.
url = b"http://www.python.org/java\nscript:\talert('msg\r\n')/#frag"
url = b"http\t://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment"
p = urllib.parse.urlsplit(url)
self.assertEqual(p.scheme, b"http")
self.assertEqual(p.netloc, b"www.python.org")
self.assertEqual(p.path, b"/javascript:alert('msg')/")
self.assertEqual(p.query, b"")
self.assertEqual(p.fragment, b"frag")
self.assertEqual(p.query, b"query=something")
self.assertEqual(p.fragment, b"fragment")
self.assertEqual(p.username, None)
self.assertEqual(p.password, None)
self.assertEqual(p.hostname, b"www.python.org")
self.assertEqual(p.port, None)
self.assertEqual(p.geturl(), b"http://www.python.org/javascript:alert('msg')/#frag")
self.assertEqual(p.geturl(), b"http://www.python.org/javascript:alert('msg')/?query=something#fragment")
# with scheme as cache-key
url = "http://www.python.org/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment"
scheme = "ht\ntp"
for _ in range(2):
p = urllib.parse.urlsplit(url, scheme=scheme)
self.assertEqual(p.scheme, "http")
self.assertEqual(p.geturl(), "http://www.python.org/javascript:alert('msg')/?query=something#fragment")
def test_attributes_bad_port(self):
"""Check handling of invalid ports."""

View File

@ -456,6 +456,11 @@ def urlsplit(url, scheme='', allow_fragments=True):
"""
url, scheme, _coerce_result = _coerce_args(url, scheme)
for b in _UNSAFE_URL_BYTES_TO_REMOVE:
url = url.replace(b, "")
scheme = scheme.replace(b, "")
allow_fragments = bool(allow_fragments)
key = url, scheme, allow_fragments, type(url), type(scheme)
cached = _parse_cache.get(key, None)
@ -472,9 +477,6 @@ def urlsplit(url, scheme='', allow_fragments=True):
else:
scheme, url = url[:i].lower(), url[i+1:]
for b in _UNSAFE_URL_BYTES_TO_REMOVE:
url = url.replace(b, "")
if url[:2] == '//':
netloc, url = _splitnetloc(url, 2)
if (('[' in netloc and ']' not in netloc) or