diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index ada0ca87887..578438159d8 100755 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -636,11 +636,20 @@ class UrlParseTestCase(unittest.TestCase): ('s3', 'foo.com', '/stuff', '', '', '')) self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff"), ('x-newscheme', 'foo.com', '/stuff', '', '', '')) + self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query#fragment"), + ('x-newscheme', 'foo.com', '/stuff', '', 'query', 'fragment')) + self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query"), + ('x-newscheme', 'foo.com', '/stuff', '', 'query', '')) + # And for bytes... self.assertEqual(urllib.parse.urlparse(b"s3://foo.com/stuff"), (b's3', b'foo.com', b'/stuff', b'', b'', b'')) self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff"), (b'x-newscheme', b'foo.com', b'/stuff', b'', b'', b'')) + self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query#fragment"), + (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b'fragment')) + self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query"), + (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b'')) def test_mixed_types_rejected(self): # Several functions that process either strings or ASCII encoded bytes diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index 01067ae6ac0..47b7962662a 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -44,16 +44,9 @@ uses_netloc = ['ftp', 'http', 'gopher', 'nntp', 'telnet', 'imap', 'wais', 'file', 'mms', 'https', 'shttp', 'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '', 'svn', 'svn+ssh', 'sftp', 'nfs', 'git', 'git+ssh'] -non_hierarchical = ['gopher', 'hdl', 'mailto', 'news', - 'telnet', 'wais', 'imap', 'snews', 'sip', 'sips'] uses_params = ['ftp', 'hdl', 'prospero', 'http', 'imap', 'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips', 'mms', '', 'sftp'] -uses_query = ['http', 'wais', 'imap', 'https', 'shttp', 'mms', - 'gopher', 'rtsp', 'rtspu', 'sip', 'sips', ''] -uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news', - 'nntp', 'wais', 'https', 'shttp', 'snews', - 'file', 'prospero', ''] # Characters valid in scheme names scheme_chars = ('abcdefghijklmnopqrstuvwxyz' @@ -357,9 +350,9 @@ def urlsplit(url, scheme='', allow_fragments=True): if (('[' in netloc and ']' not in netloc) or (']' in netloc and '[' not in netloc)): raise ValueError("Invalid IPv6 URL") - if allow_fragments and scheme in uses_fragment and '#' in url: + if allow_fragments and '#' in url: url, fragment = url.split('#', 1) - if scheme in uses_query and '?' in url: + if '?' in url: url, query = url.split('?', 1) v = SplitResult(scheme, netloc, url, query, fragment) _parse_cache[key] = v diff --git a/Misc/NEWS b/Misc/NEWS index 1e7153d0807..f4203124839 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -63,6 +63,9 @@ Core and Builtins Library ------- +- Issue #9374: Generic parsing of query and fragment portions of url for any + scheme. Supported both by RFC3986 and RFC2396. + - Issue #14798: Fix the functions in pyclbr to raise an ImportError when the first part of a dotted name is not a package. Patch by Xavier de Gaye.