From bd3e771a979de04e273f735e69fb8842006137be Mon Sep 17 00:00:00 2001 From: "Michael W. Hudson" Date: Mon, 18 Mar 2002 13:06:00 +0000 Subject: [PATCH] amk's fix attached to [ 516299 ] urlparse can get fragments wrong --- Lib/test/output/test_urlparse | 5 +++++ Lib/test/test_urlparse.py | 18 ++++++++++++++++++ Lib/urlparse.py | 4 +++- 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/Lib/test/output/test_urlparse b/Lib/test/output/test_urlparse index ca71729e7ff..c478783295b 100644 --- a/Lib/test/output/test_urlparse +++ b/Lib/test/output/test_urlparse @@ -1,4 +1,9 @@ test_urlparse +http://www.python.org = ('http', 'www.python.org', '', '', '', '') +http://www.python.org#abc = ('http', 'www.python.org', '', '', '', 'abc') +http://www.python.org/#abc = ('http', 'www.python.org', '/', '', '', 'abc') +http://a/b/c/d;p?q#f = ('http', 'a', '/b/c/d', 'p', 'q', 'f') + urlparse.urljoin() tests g:h = 'g:h' diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index 20336bc7081..48c526bf394 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -4,6 +4,24 @@ errors = 0 RFC1808_BASE = "http://a/b/c/d;p?q#f" +for url, expected in [('http://www.python.org', + ('http', 'www.python.org', '', '', '', '')), + ('http://www.python.org#abc', + ('http', 'www.python.org', '', '', '', 'abc')), + ('http://www.python.org/#abc', + ('http', 'www.python.org', '/', '', '', 'abc')), + (RFC1808_BASE, + ('http', 'a', '/b/c/d', 'p', 'q', 'f')), + ]: + result = urlparse.urlparse(url) + print "%-13s = %r" % (url, result) + if result != expected: + errors += 1 + print "urlparse(%r)" % url + print ("expected %r,\n" + " got %r") % (expected, result) +print + def checkJoin(relurl, expected): global errors result = urlparse.urljoin(RFC1808_BASE, relurl) diff --git a/Lib/urlparse.py b/Lib/urlparse.py index cd6ad26dae7..ee99645d59b 100644 --- a/Lib/urlparse.py +++ b/Lib/urlparse.py @@ -87,7 +87,9 @@ def urlsplit(url, scheme='', allow_fragments=1): if url[:2] == '//': i = url.find('/', 2) if i < 0: - i = len(url) + i = url.find('#') + if i < 0: + i = len(url) netloc = url[2:i] url = url[i:] if allow_fragments and '#' in url: