Revert "[3.8] bpo-27657: Fix urlparse() with numeric paths (GH-16839)" (GH-18525)

This reverts commit 0f3187c1ce.

The change broke the backwards compatibility of parsing behavior in a
patch release of Python (3.8.1). A decision was taken to revert this
patch in 3.8.2.

In https://bugs.python.org/issue27657 it was decided that the previous
behavior like

>>> urlparse('localhost:8080')
ParseResult(scheme='', netloc='', path='localhost:8080', params='', query='', fragment='')

>>> urlparse('undefined:8080')
ParseResult(scheme='', netloc='', path='undefined:8080', params='', query='', fragment='')

needs to be preserved in patch releases as number of users rely upon it.

Explicitly mention the releases involved with the revert in NEWS.
Adopt the wording suggested by @ned-deily.
This commit is contained in:
Senthil Kumaran 2020-02-16 13:47:21 -08:00 committed by GitHub
parent 0d860dd43c
commit ea316fd215
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 30 additions and 7 deletions

View File

@ -709,17 +709,15 @@ class UrlParseTestCase(unittest.TestCase):
def test_portseparator(self): def test_portseparator(self):
# Issue 754016 makes changes for port separator ':' from scheme separator # Issue 754016 makes changes for port separator ':' from scheme separator
self.assertEqual(urllib.parse.urlparse("http:80"), ('http','','80','','','')) self.assertEqual(urllib.parse.urlparse("path:80"),
self.assertEqual(urllib.parse.urlparse("https:80"), ('https','','80','','','')) ('','','path:80','','',''))
self.assertEqual(urllib.parse.urlparse("path:80"), ('path','','80','','',''))
self.assertEqual(urllib.parse.urlparse("http:"),('http','','','','','')) self.assertEqual(urllib.parse.urlparse("http:"),('http','','','','',''))
self.assertEqual(urllib.parse.urlparse("https:"),('https','','','','','')) self.assertEqual(urllib.parse.urlparse("https:"),('https','','','','',''))
self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"), self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
('http','www.python.org:80','','','','')) ('http','www.python.org:80','','','',''))
# As usual, need to check bytes input as well # As usual, need to check bytes input as well
self.assertEqual(urllib.parse.urlparse(b"http:80"), (b'http',b'',b'80',b'',b'',b'')) self.assertEqual(urllib.parse.urlparse(b"path:80"),
self.assertEqual(urllib.parse.urlparse(b"https:80"), (b'https',b'',b'80',b'',b'',b'')) (b'',b'',b'path:80',b'',b'',b''))
self.assertEqual(urllib.parse.urlparse(b"path:80"), (b'path',b'',b'80',b'',b'',b''))
self.assertEqual(urllib.parse.urlparse(b"http:"),(b'http',b'',b'',b'',b'',b'')) self.assertEqual(urllib.parse.urlparse(b"http:"),(b'http',b'',b'',b'',b'',b''))
self.assertEqual(urllib.parse.urlparse(b"https:"),(b'https',b'',b'',b'',b'',b'')) self.assertEqual(urllib.parse.urlparse(b"https:"),(b'https',b'',b'',b'',b'',b''))
self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"), self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),

View File

@ -431,11 +431,31 @@ def urlsplit(url, scheme='', allow_fragments=True):
netloc = query = fragment = '' netloc = query = fragment = ''
i = url.find(':') i = url.find(':')
if i > 0: if i > 0:
if url[:i] == 'http': # optimize the common case
url = url[i+1:]
if url[:2] == '//':
netloc, url = _splitnetloc(url, 2)
if (('[' in netloc and ']' not in netloc) or
(']' in netloc and '[' not in netloc)):
raise ValueError("Invalid IPv6 URL")
if allow_fragments and '#' in url:
url, fragment = url.split('#', 1)
if '?' in url:
url, query = url.split('?', 1)
_checknetloc(netloc)
v = SplitResult('http', netloc, url, query, fragment)
_parse_cache[key] = v
return _coerce_result(v)
for c in url[:i]: for c in url[:i]:
if c not in scheme_chars: if c not in scheme_chars:
break break
else: else:
scheme, url = url[:i].lower(), url[i+1:] # make sure "url" is not actually a port number (in which case
# "scheme" is really part of the path)
rest = url[i+1:]
if not rest or any(c not in '0123456789' for c in rest):
# not a port number
scheme, url = url[:i].lower(), rest
if url[:2] == '//': if url[:2] == '//':
netloc, url = _splitnetloc(url, 2) netloc, url = _splitnetloc(url, 2)

View File

@ -0,0 +1,5 @@
The original fix for bpo-27657, "Fix urlparse() with numeric paths" (GH-16839)
included in 3.8.1, inadvertently introduced a behavior change that broke
several third-party packages relying on the original undefined parsing
behavior. The change is reverted in 3.8.2, restoring the behavior of 3.8.0 and
earlier releases.