bpo-27657: Fix urlparse() with numeric paths (#661)
* bpo-27657: Fix urlparse() with numeric paths Revert parsing decision from bpo-754016 in favor of the documented consensus in bpo-16932 of how to treat strings without a // to designate the netloc. * bpo-22891: Remove urlsplit() optimization for 'http' prefixed inputs.
This commit is contained in:
parent
fbe3c76c7c
commit
5a88d50ff0
|
@ -709,15 +709,17 @@ class UrlParseTestCase(unittest.TestCase):
|
||||||
|
|
||||||
def test_portseparator(self):
|
def test_portseparator(self):
|
||||||
# Issue 754016 makes changes for port separator ':' from scheme separator
|
# Issue 754016 makes changes for port separator ':' from scheme separator
|
||||||
self.assertEqual(urllib.parse.urlparse("path:80"),
|
self.assertEqual(urllib.parse.urlparse("http:80"), ('http','','80','','',''))
|
||||||
('','','path:80','','',''))
|
self.assertEqual(urllib.parse.urlparse("https:80"), ('https','','80','','',''))
|
||||||
|
self.assertEqual(urllib.parse.urlparse("path:80"), ('path','','80','','',''))
|
||||||
self.assertEqual(urllib.parse.urlparse("http:"),('http','','','','',''))
|
self.assertEqual(urllib.parse.urlparse("http:"),('http','','','','',''))
|
||||||
self.assertEqual(urllib.parse.urlparse("https:"),('https','','','','',''))
|
self.assertEqual(urllib.parse.urlparse("https:"),('https','','','','',''))
|
||||||
self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
|
self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
|
||||||
('http','www.python.org:80','','','',''))
|
('http','www.python.org:80','','','',''))
|
||||||
# As usual, need to check bytes input as well
|
# As usual, need to check bytes input as well
|
||||||
self.assertEqual(urllib.parse.urlparse(b"path:80"),
|
self.assertEqual(urllib.parse.urlparse(b"http:80"), (b'http',b'',b'80',b'',b'',b''))
|
||||||
(b'',b'',b'path:80',b'',b'',b''))
|
self.assertEqual(urllib.parse.urlparse(b"https:80"), (b'https',b'',b'80',b'',b'',b''))
|
||||||
|
self.assertEqual(urllib.parse.urlparse(b"path:80"), (b'path',b'',b'80',b'',b'',b''))
|
||||||
self.assertEqual(urllib.parse.urlparse(b"http:"),(b'http',b'',b'',b'',b'',b''))
|
self.assertEqual(urllib.parse.urlparse(b"http:"),(b'http',b'',b'',b'',b'',b''))
|
||||||
self.assertEqual(urllib.parse.urlparse(b"https:"),(b'https',b'',b'',b'',b'',b''))
|
self.assertEqual(urllib.parse.urlparse(b"https:"),(b'https',b'',b'',b'',b'',b''))
|
||||||
self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),
|
self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),
|
||||||
|
|
|
@ -431,31 +431,11 @@ def urlsplit(url, scheme='', allow_fragments=True):
|
||||||
netloc = query = fragment = ''
|
netloc = query = fragment = ''
|
||||||
i = url.find(':')
|
i = url.find(':')
|
||||||
if i > 0:
|
if i > 0:
|
||||||
if url[:i] == 'http': # optimize the common case
|
|
||||||
url = url[i+1:]
|
|
||||||
if url[:2] == '//':
|
|
||||||
netloc, url = _splitnetloc(url, 2)
|
|
||||||
if (('[' in netloc and ']' not in netloc) or
|
|
||||||
(']' in netloc and '[' not in netloc)):
|
|
||||||
raise ValueError("Invalid IPv6 URL")
|
|
||||||
if allow_fragments and '#' in url:
|
|
||||||
url, fragment = url.split('#', 1)
|
|
||||||
if '?' in url:
|
|
||||||
url, query = url.split('?', 1)
|
|
||||||
_checknetloc(netloc)
|
|
||||||
v = SplitResult('http', netloc, url, query, fragment)
|
|
||||||
_parse_cache[key] = v
|
|
||||||
return _coerce_result(v)
|
|
||||||
for c in url[:i]:
|
for c in url[:i]:
|
||||||
if c not in scheme_chars:
|
if c not in scheme_chars:
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
# make sure "url" is not actually a port number (in which case
|
scheme, url = url[:i].lower(), url[i+1:]
|
||||||
# "scheme" is really part of the path)
|
|
||||||
rest = url[i+1:]
|
|
||||||
if not rest or any(c not in '0123456789' for c in rest):
|
|
||||||
# not a port number
|
|
||||||
scheme, url = url[:i].lower(), rest
|
|
||||||
|
|
||||||
if url[:2] == '//':
|
if url[:2] == '//':
|
||||||
netloc, url = _splitnetloc(url, 2)
|
netloc, url = _splitnetloc(url, 2)
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
Fix urllib.parse.urlparse() with numeric paths. A string like "path:80" is
|
||||||
|
no longer parsed as a path but as a scheme ("path") and a path ("80").
|
Loading…
Reference in New Issue