diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index 4ae6ed33858..762500789f7 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -709,15 +709,17 @@ class UrlParseTestCase(unittest.TestCase): def test_portseparator(self): # Issue 754016 makes changes for port separator ':' from scheme separator - self.assertEqual(urllib.parse.urlparse("path:80"), - ('','','path:80','','','')) + self.assertEqual(urllib.parse.urlparse("http:80"), ('http','','80','','','')) + self.assertEqual(urllib.parse.urlparse("https:80"), ('https','','80','','','')) + self.assertEqual(urllib.parse.urlparse("path:80"), ('path','','80','','','')) self.assertEqual(urllib.parse.urlparse("http:"),('http','','','','','')) self.assertEqual(urllib.parse.urlparse("https:"),('https','','','','','')) self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"), ('http','www.python.org:80','','','','')) # As usual, need to check bytes input as well - self.assertEqual(urllib.parse.urlparse(b"path:80"), - (b'',b'',b'path:80',b'',b'',b'')) + self.assertEqual(urllib.parse.urlparse(b"http:80"), (b'http',b'',b'80',b'',b'',b'')) + self.assertEqual(urllib.parse.urlparse(b"https:80"), (b'https',b'',b'80',b'',b'',b'')) + self.assertEqual(urllib.parse.urlparse(b"path:80"), (b'path',b'',b'80',b'',b'',b'')) self.assertEqual(urllib.parse.urlparse(b"http:"),(b'http',b'',b'',b'',b'',b'')) self.assertEqual(urllib.parse.urlparse(b"https:"),(b'https',b'',b'',b'',b'',b'')) self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"), diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index 3a38dc14c90..31fd7e16ee7 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -431,31 +431,11 @@ def urlsplit(url, scheme='', allow_fragments=True): netloc = query = fragment = '' i = url.find(':') if i > 0: - if url[:i] == 'http': # optimize the common case - url = url[i+1:] - if url[:2] == '//': - netloc, url = _splitnetloc(url, 2) - if (('[' in netloc and ']' not in netloc) or - (']' in netloc and '[' not in netloc)): - raise ValueError("Invalid IPv6 URL") - if allow_fragments and '#' in url: - url, fragment = url.split('#', 1) - if '?' in url: - url, query = url.split('?', 1) - _checknetloc(netloc) - v = SplitResult('http', netloc, url, query, fragment) - _parse_cache[key] = v - return _coerce_result(v) for c in url[:i]: if c not in scheme_chars: break else: - # make sure "url" is not actually a port number (in which case - # "scheme" is really part of the path) - rest = url[i+1:] - if not rest or any(c not in '0123456789' for c in rest): - # not a port number - scheme, url = url[:i].lower(), rest + scheme, url = url[:i].lower(), url[i+1:] if url[:2] == '//': netloc, url = _splitnetloc(url, 2) diff --git a/Misc/NEWS.d/next/Library/2017-12-26-14-32-23.bpo-27657.6BhyVK.rst b/Misc/NEWS.d/next/Library/2017-12-26-14-32-23.bpo-27657.6BhyVK.rst new file mode 100644 index 00000000000..77746c0ce63 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2017-12-26-14-32-23.bpo-27657.6BhyVK.rst @@ -0,0 +1,2 @@ +Fix urllib.parse.urlparse() with numeric paths. A string like "path:80" is +no longer parsed as a path but as a scheme ("path") and a path ("80").