From ea24dda01f0bc151b55e922a1d5ec2b6193c8006 Mon Sep 17 00:00:00 2001 From: Senthil Kumaran Date: Sat, 19 May 2012 08:10:40 +0800 Subject: [PATCH] Issue9374 - Generic parsing of query and fragment portion of urls for any scheme --- Lib/test/test_urlparse.py | 4 ++++ Lib/urlparse.py | 11 ++--------- Misc/NEWS | 3 +++ 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index 39a897a1a91..827282cedc0 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -493,6 +493,10 @@ class UrlParseTestCase(unittest.TestCase): ('s3','foo.com','/stuff','','','')) self.assertEqual(urlparse.urlparse("x-newscheme://foo.com/stuff"), ('x-newscheme','foo.com','/stuff','','','')) + self.assertEqual(urlparse.urlparse("x-newscheme://foo.com/stuff?query#fragment"), + ('x-newscheme','foo.com','/stuff','','query','fragment')) + self.assertEqual(urlparse.urlparse("x-newscheme://foo.com/stuff?query"), + ('x-newscheme','foo.com','/stuff','','query','')) def test_withoutscheme(self): # Test urlparse without scheme diff --git a/Lib/urlparse.py b/Lib/urlparse.py index b42e0f4c621..32eebe69bd4 100644 --- a/Lib/urlparse.py +++ b/Lib/urlparse.py @@ -40,16 +40,9 @@ uses_netloc = ['ftp', 'http', 'gopher', 'nntp', 'telnet', 'imap', 'wais', 'file', 'mms', 'https', 'shttp', 'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '', 'svn', 'svn+ssh', 'sftp','nfs','git', 'git+ssh'] -non_hierarchical = ['gopher', 'hdl', 'mailto', 'news', - 'telnet', 'wais', 'imap', 'snews', 'sip', 'sips'] uses_params = ['ftp', 'hdl', 'prospero', 'http', 'imap', 'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips', 'mms', '', 'sftp'] -uses_query = ['http', 'wais', 'imap', 'https', 'shttp', 'mms', - 'gopher', 'rtsp', 'rtspu', 'sip', 'sips', ''] -uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news', - 'nntp', 'wais', 'https', 'shttp', 'snews', - 'file', 'prospero', ''] # Characters valid in scheme names scheme_chars = ('abcdefghijklmnopqrstuvwxyz' @@ -204,9 +197,9 @@ def urlsplit(url, scheme='', allow_fragments=True): if (('[' in netloc and ']' not in netloc) or (']' in netloc and '[' not in netloc)): raise ValueError("Invalid IPv6 URL") - if allow_fragments and scheme in uses_fragment and '#' in url: + if allow_fragments and '#' in url: url, fragment = url.split('#', 1) - if scheme in uses_query and '?' in url: + if '?' in url: url, query = url.split('?', 1) v = SplitResult(scheme, netloc, url, query, fragment) _parse_cache[key] = v diff --git a/Misc/NEWS b/Misc/NEWS index 088d1e4e9e0..646b35155ae 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -60,6 +60,9 @@ Core and Builtins Library ------- +- Issue #9374: Generic parsing of query and fragment portions of url for any + scheme. Supported both by RFC3986 and RFC2396. + - Issue #14798: Fix the functions in pyclbr to raise an ImportError when the first part of a dotted name is not a package. Patch by Xavier de Gaye.