cpython/Lib/urlparse.py

"""Parse (absolute and relative) URLs.

See RFC 1808: "Relative Uniform Resource Locators", by R. Fielding,
UC Irvine, June 1995.
"""

# Standard/builtin Python modules
import string
from string import join, split, rfind

# A classification of schemes ('' means apply by default)
uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'wais', 'file',
		 'https', 'shttp',
		 'prospero', 'rtsp', 'rtspu', '']
uses_netloc = ['ftp', 'http', 'gopher', 'nntp', 'telnet', 'wais',
	       'file',
	       'https', 'shttp', 'snews',
	       'prospero', 'rtsp', 'rtspu', '']
non_hierarchical = ['gopher', 'hdl', 'mailto', 'news', 'telnet', 'wais',
		    'snews',
		    ]
uses_params = ['ftp', 'hdl', 'prospero', 'http',
	       'https', 'shttp', 'rtsp', 'rtspu',
	       '']
uses_query = ['http', 'wais',
	      'https', 'shttp',
	      'gopher', 'rtsp', 'rtspu',
	      '']
uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news', 'nntp', 'wais',
		 'https', 'shttp', 'snews',
		 'file', 'prospero', '']

# Characters valid in scheme names
scheme_chars = string.letters + string.digits + '+-.'

MAX_CACHE_SIZE = 20
_parse_cache = {}

def clear_cache():
	"""Clear the parse cache."""
	global _parse_cache
	_parse_cache = {}


def urlparse(url, scheme = '', allow_fragments = 1):
	"""Parse a URL into 6 components:
	<scheme>://<netloc>/<path>;<params>?<query>#<fragment>
	Return a 6-tuple: (scheme, netloc, path, params, query, fragment).
	Note that we don't break the components up in smaller bits
	(e.g. netloc is a single string) and we don't expand % escapes."""
	key = url, scheme, allow_fragments
	cached = _parse_cache.get(key, None)
	if cached:
		return cached
	if len(_parse_cache) >= MAX_CACHE_SIZE:	# avoid runaway growth
		clear_cache()
	find = string.find
	netloc = path = params = query = fragment = ''
	i = find(url, ':')
	if i > 0:
		if url[:i] == 'http': # optimize the common case
			scheme = string.lower(url[:i])
			url = url[i+1:]
			if url[:2] == '//':
				i = find(url, '/', 2)
				if i < 0:
					i = len(url)
				netloc = url[2:i]
				url = url[i:]
			if allow_fragments:
				i = string.rfind(url, '#')
				if i >= 0:
					fragment = url[i+1:]
					url = url[:i]
			i = find(url, '?')
			if i >= 0:
				query = url[i+1:]
				url = url[:i]
			i = find(url, ';')
			if i >= 0:
				params = url[i+1:]
				url = url[:i]
			tuple = scheme, netloc, url, params, query, fragment
			_parse_cache[key] = tuple
			return tuple
		for c in url[:i]:
			if c not in scheme_chars:
				break
		else:
			scheme, url = string.lower(url[:i]), url[i+1:]
	if scheme in uses_netloc:
		if url[:2] == '//':
			i = find(url, '/', 2)
			if i < 0:
				i = len(url)
			netloc, url = url[2:i], url[i:]
	if allow_fragments and scheme in uses_fragment:
		i = string.rfind(url, '#')
		if i >= 0:
			url, fragment = url[:i], url[i+1:]
	if scheme in uses_query:
		i = find(url, '?')
		if i >= 0:
			url, query = url[:i], url[i+1:]
	if scheme in uses_params:
		i = find(url, ';')
		if i >= 0:
			url, params = url[:i], url[i+1:]
	tuple = scheme, netloc, url, params, query, fragment
	_parse_cache[key] = tuple
	return tuple

def urlunparse((scheme, netloc, url, params, query, fragment)):
	"""Put a parsed URL back together again.  This may result in a
	slightly different, but equivalent URL, if the URL that was parsed
	originally had redundant delimiters, e.g. a ? with an empty query
	(the draft states that these are equivalent)."""
	if netloc or (scheme in uses_netloc and url[:2] == '//'):
		if url[:1] != '/': url = '/' + url
		url = '//' + (netloc or '') + url
	if scheme:
		url = scheme + ':' + url
	if params:
		url = url + ';' + params
	if query:
		url = url + '?' + query
	if fragment:
		url = url + '#' + fragment
	return url

def urljoin(base, url, allow_fragments = 1):
	"""Join a base URL and a possibly relative URL to form an absolute
	interpretation of the latter."""
	if not base:
		return url
	bscheme, bnetloc, bpath, bparams, bquery, bfragment = \
		urlparse(base, '', allow_fragments)
	scheme, netloc, path, params, query, fragment = \
		urlparse(url, bscheme, allow_fragments)
	if scheme != bscheme or scheme not in uses_relative:
		return urlunparse((scheme, netloc, path,
				   params, query, fragment))
	if scheme in uses_netloc:
		if netloc:
			return urlunparse((scheme, netloc, path,
					   params, query, fragment))
		netloc = bnetloc
	if path[:1] == '/':
		return urlunparse((scheme, netloc, path,
				   params, query, fragment))
	if not path:
		return urlunparse((scheme, netloc, bpath,
				   params, query or bquery, fragment))
	segments = split(bpath, '/')[:-1] + split(path, '/')
	# XXX The stuff below is bogus in various ways...
	if segments[-1] == '.':
		segments[-1] = ''
	while '.' in segments:
		segments.remove('.')
	while 1:
		i = 1
		n = len(segments) - 1
		while i < n:
			if segments[i] == '..' and segments[i-1]:
				del segments[i-1:i+1]
				break
			i = i+1
		else:
			break
	if len(segments) == 2 and segments[1] == '..' and segments[0] == '':
		segments[-1] = ''
	elif len(segments) >= 2 and segments[-1] == '..':
		segments[-2:] = ['']
	return urlunparse((scheme, netloc, join(segments, '/'),
			   params, query, fragment))

def urldefrag(url):
	"""Removes any existing fragment from URL.

	Returns a tuple of the defragmented URL and the fragment.  If
	the URL contained no fragments, the second element is the
	empty string.
	"""
	s, n, p, a, q, frag = urlparse(url)
	defrag = urlunparse((s, n, p, a, q, ''))
	return defrag, frag


test_input = """
      http://a/b/c/d

      g:h        = <URL:g:h>
      http:g     = <URL:http://a/b/c/g>
      http:      = <URL:http://a/b/c/d>
      g          = <URL:http://a/b/c/g>
      ./g        = <URL:http://a/b/c/g>
      g/         = <URL:http://a/b/c/g/>
      /g         = <URL:http://a/g>
      //g        = <URL:http://g>
      ?y         = <URL:http://a/b/c/d?y>
      g?y        = <URL:http://a/b/c/g?y>
      g?y/./x    = <URL:http://a/b/c/g?y/./x>
      .          = <URL:http://a/b/c/>
      ./         = <URL:http://a/b/c/>
      ..         = <URL:http://a/b/>
      ../        = <URL:http://a/b/>
      ../g       = <URL:http://a/b/g>
      ../..      = <URL:http://a/>
      ../../g    = <URL:http://a/g>
      ../../../g = <URL:http://a/../g>
      ./../g     = <URL:http://a/b/g>
      ./g/.      = <URL:http://a/b/c/g/>
      /./g       = <URL:http://a/./g>
      g/./h      = <URL:http://a/b/c/g/h>
      g/../h     = <URL:http://a/b/c/h>
      http:g     = <URL:http://a/b/c/g>
      http:      = <URL:http://a/b/c/d>
      http:?y         = <URL:http://a/b/c/d?y>
      http:g?y        = <URL:http://a/b/c/g?y>
      http:g?y/./x    = <URL:http://a/b/c/g?y/./x>
"""
# XXX The result for //g is actually http://g/; is this a problem?

def test():
	import sys
	base = ''
	if sys.argv[1:]:
		fn = sys.argv[1]
		if fn == '-':
			fp = sys.stdin
		else:
			fp = open(fn)
	else:
		import StringIO
		fp = StringIO.StringIO(test_input)
	while 1:
		line = fp.readline()
		if not line: break
		words = string.split(line)
		if not words:
			continue
		url = words[0]
		parts = urlparse(url)
		print '%-10s : %s' % (url, parts)
		abs = urljoin(base, url)
		if not base:
			base = abs
		wrapped = '<URL:%s>' % abs
		print '%-10s = %s' % (url, wrapped)
		if len(words) == 3 and words[1] == '=':
			if wrapped != words[2]:
				print 'EXPECTED', words[2], '!!!!!!!!!!'

if __name__ == '__main__':
	test()
The third and final doc-string sweep by Ka-Ping Yee. The attached patches update the standard library so that all modules have docstrings beginning with one-line summaries. A new docstring was added to formatter. The docstring for os.py was updated to mention nt, os2, ce in addition to posix, dos, mac. 2000-02-04 11:28:42 -04:00			`"""Parse (absolute and relative) URLs.`

			`See RFC 1808: "Relative Uniform Resource Locators", by R. Fielding,`
			`UC Irvine, June 1995.`
			`"""`
New tty/pty modules by Steen; new urlparser. 1994-09-12 07:36:35 -03:00
			`# Standard/builtin Python modules`
			`import string`
Some cleanup -- don't use splitfields/joinfields, standardize indentation (tabs only), rationalize some code in urljoin... 2000-04-10 14:02:46 -03:00			`from string import join, split, rfind`
New tty/pty modules by Steen; new urlparser. 1994-09-12 07:36:35 -03:00
			`# A classification of schemes ('' means apply by default)`
			`uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'wais', 'file',`
Added characteristics of shttp, https, and snews. 1997-01-02 14:18:27 -04:00			`'https', 'shttp',`
Anthony Baxter <anthony@interlink.com.au>: The following adds support for RTSP (RFC2326) URLs to the standard urlparse.py module. (Augmented by FLD to include rtspu:, specified in the same RFC & OK'd by Anthony.) 2000-04-14 11:01:34 -03:00			`'prospero', 'rtsp', 'rtspu', '']`
New tty/pty modules by Steen; new urlparser. 1994-09-12 07:36:35 -03:00			`uses_netloc = ['ftp', 'http', 'gopher', 'nntp', 'telnet', 'wais',`
Steve Clift pointed out that 'file' allows a netloc. 1999-02-22 11:38:46 -04:00			`'file',`
Added characteristics of shttp, https, and snews. 1997-01-02 14:18:27 -04:00			`'https', 'shttp', 'snews',`
Anthony Baxter <anthony@interlink.com.au>: The following adds support for RTSP (RFC2326) URLs to the standard urlparse.py module. (Augmented by FLD to include rtspu:, specified in the same RFC & OK'd by Anthony.) 2000-04-14 11:01:34 -03:00			`'prospero', 'rtsp', 'rtspu', '']`
Added characteristics of shttp, https, and snews. 1997-01-02 14:18:27 -04:00			`non_hierarchical = ['gopher', 'hdl', 'mailto', 'news', 'telnet', 'wais',`
			`'snews',`
			`]`
			`uses_params = ['ftp', 'hdl', 'prospero', 'http',`
Anthony Baxter <anthony@interlink.com.au>: The following adds support for RTSP (RFC2326) URLs to the standard urlparse.py module. (Augmented by FLD to include rtspu:, specified in the same RFC & OK'd by Anthony.) 2000-04-14 11:01:34 -03:00			`'https', 'shttp', 'rtsp', 'rtspu',`
Added characteristics of shttp, https, and snews. 1997-01-02 14:18:27 -04:00			`'']`
			`uses_query = ['http', 'wais',`
			`'https', 'shttp',`
Anthony Baxter <anthony@interlink.com.au>: The following adds support for RTSP (RFC2326) URLs to the standard urlparse.py module. (Augmented by FLD to include rtspu:, specified in the same RFC & OK'd by Anthony.) 2000-04-14 11:01:34 -03:00			`'gopher', 'rtsp', 'rtspu',`
Added characteristics of shttp, https, and snews. 1997-01-02 14:18:27 -04:00			`'']`
added hdl protocol properties 1996-05-28 20:10:02 -03:00			`uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news', 'nntp', 'wais',`
Added characteristics of shttp, https, and snews. 1997-01-02 14:18:27 -04:00			`'https', 'shttp', 'snews',`
New tty/pty modules by Steen; new urlparser. 1994-09-12 07:36:35 -03:00			`'file', 'prospero', '']`

			`# Characters valid in scheme names`
			`scheme_chars = string.letters + string.digits + '+-.'`

After some discussion with Jeremy and Fred, decided to limit the default urlparse cache size to 20 instead of 2000. The main use of the cache seems to be to gain some speed in Grail, which is calling urljoin with the same base for each anchor. 2000 is a bit too big for Jeremy, who doesn't need the cache at all. 20 should keep at least 95% of the Grail speedup while wasting an insignificant amount of memory in Jeremy's application. 1997-07-14 16:08:15 -03:00			`MAX_CACHE_SIZE = 20`
optimizations due to Fred Drake; added urldefrag() function 1996-05-28 20:54:24 -03:00			`_parse_cache = {}`

			`def clear_cache():`
Some cleanup -- don't use splitfields/joinfields, standardize indentation (tabs only), rationalize some code in urljoin... 2000-04-10 14:02:46 -03:00			`"""Clear the parse cache."""`
			`global _parse_cache`
			`_parse_cache = {}`
optimizations due to Fred Drake; added urldefrag() function 1996-05-28 20:54:24 -03:00

fix typo in keyword argument 'allow_frament' should be 'allow_fragment' 1998-08-25 16:45:24 -03:00			`def urlparse(url, scheme = '', allow_fragments = 1):`
The third and final doc-string sweep by Ka-Ping Yee. The attached patches update the standard library so that all modules have docstrings beginning with one-line summaries. A new docstring was added to formatter. The docstring for os.py was updated to mention nt, os2, ce in addition to posix, dos, mac. 2000-02-04 11:28:42 -04:00			`"""Parse a URL into 6 components:`
			`<scheme>://<netloc>/<path>;<params>?<query>#<fragment>`
			`Return a 6-tuple: (scheme, netloc, path, params, query, fragment).`
			`Note that we don't break the components up in smaller bits`
			`(e.g. netloc is a single string) and we don't expand % escapes."""`
fix typo in keyword argument 'allow_frament' should be 'allow_fragment' 1998-08-25 16:45:24 -03:00			`key = url, scheme, allow_fragments`
Easy optimizations of urlparse for the common case of parsing an http URL. 1. use dict.get instead of try/except KeyError 2. if the url scheme is 'http' then avoid the series of 'if var in [someseq]:'. instead, inline all of the code. 3. find = string.find 1998-09-02 18:53:16 -03:00			`cached = _parse_cache.get(key, None)`
			`if cached:`
			`return cached`
Crude but effective hack to clear the parser cache every so often. (Fred Drake.) 1996-12-27 11:26:15 -04:00			`if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth`
Some cleanup -- don't use splitfields/joinfields, standardize indentation (tabs only), rationalize some code in urljoin... 2000-04-10 14:02:46 -03:00			`clear_cache()`
Easy optimizations of urlparse for the common case of parsing an http URL. 1. use dict.get instead of try/except KeyError 2. if the url scheme is 'http' then avoid the series of 'if var in [someseq]:'. instead, inline all of the code. 3. find = string.find 1998-09-02 18:53:16 -03:00			`find = string.find`
optimizations due to Fred Drake; added urldefrag() function 1996-05-28 20:54:24 -03:00			`netloc = path = params = query = fragment = ''`
Easy optimizations of urlparse for the common case of parsing an http URL. 1. use dict.get instead of try/except KeyError 2. if the url scheme is 'http' then avoid the series of 'if var in [someseq]:'. instead, inline all of the code. 3. find = string.find 1998-09-02 18:53:16 -03:00			`i = find(url, ':')`
New tty/pty modules by Steen; new urlparser. 1994-09-12 07:36:35 -03:00			`if i > 0:`
Fixed bug in the common-case code for HTTP URLs; it would lose the query, fragment, and/or parameter information. 3 cases added to the test suite to check for this bug. 1999-01-06 18:13:09 -04:00			`if url[:i] == 'http': # optimize the common case`
Easy optimizations of urlparse for the common case of parsing an http URL. 1. use dict.get instead of try/except KeyError 2. if the url scheme is 'http' then avoid the series of 'if var in [someseq]:'. instead, inline all of the code. 3. find = string.find 1998-09-02 18:53:16 -03:00			`scheme = string.lower(url[:i])`
			`url = url[i+1:]`
			`if url[:2] == '//':`
			`i = find(url, '/', 2)`
			`if i < 0:`
			`i = len(url)`
			`netloc = url[2:i]`
			`url = url[i:]`
			`if allow_fragments:`
			`i = string.rfind(url, '#')`
			`if i >= 0:`
			`fragment = url[i+1:]`
Fixed bug in the common-case code for HTTP URLs; it would lose the query, fragment, and/or parameter information. 3 cases added to the test suite to check for this bug. 1999-01-06 18:13:09 -04:00			`url = url[:i]`
Easy optimizations of urlparse for the common case of parsing an http URL. 1. use dict.get instead of try/except KeyError 2. if the url scheme is 'http' then avoid the series of 'if var in [someseq]:'. instead, inline all of the code. 3. find = string.find 1998-09-02 18:53:16 -03:00			`i = find(url, '?')`
			`if i >= 0:`
			`query = url[i+1:]`
Fixed bug in the common-case code for HTTP URLs; it would lose the query, fragment, and/or parameter information. 3 cases added to the test suite to check for this bug. 1999-01-06 18:13:09 -04:00			`url = url[:i]`
Easy optimizations of urlparse for the common case of parsing an http URL. 1. use dict.get instead of try/except KeyError 2. if the url scheme is 'http' then avoid the series of 'if var in [someseq]:'. instead, inline all of the code. 3. find = string.find 1998-09-02 18:53:16 -03:00			`i = find(url, ';')`
			`if i >= 0:`
			`params = url[i+1:]`
Fixed bug in the common-case code for HTTP URLs; it would lose the query, fragment, and/or parameter information. 3 cases added to the test suite to check for this bug. 1999-01-06 18:13:09 -04:00			`url = url[:i]`
Easy optimizations of urlparse for the common case of parsing an http URL. 1. use dict.get instead of try/except KeyError 2. if the url scheme is 'http' then avoid the series of 'if var in [someseq]:'. instead, inline all of the code. 3. find = string.find 1998-09-02 18:53:16 -03:00			`tuple = scheme, netloc, url, params, query, fragment`
			`_parse_cache[key] = tuple`
			`return tuple`
New tty/pty modules by Steen; new urlparser. 1994-09-12 07:36:35 -03:00			`for c in url[:i]:`
			`if c not in scheme_chars:`
			`break`
			`else:`
			`scheme, url = string.lower(url[:i]), url[i+1:]`
			`if scheme in uses_netloc:`
			`if url[:2] == '//':`
Easy optimizations of urlparse for the common case of parsing an http URL. 1. use dict.get instead of try/except KeyError 2. if the url scheme is 'http' then avoid the series of 'if var in [someseq]:'. instead, inline all of the code. 3. find = string.find 1998-09-02 18:53:16 -03:00			`i = find(url, '/', 2)`
New tty/pty modules by Steen; new urlparser. 1994-09-12 07:36:35 -03:00			`if i < 0:`
			`i = len(url)`
			`netloc, url = url[2:i], url[i:]`
fix typo in keyword argument 'allow_frament' should be 'allow_fragment' 1998-08-25 16:45:24 -03:00			`if allow_fragments and scheme in uses_fragment:`
New tty/pty modules by Steen; new urlparser. 1994-09-12 07:36:35 -03:00			`i = string.rfind(url, '#')`
			`if i >= 0:`
			`url, fragment = url[:i], url[i+1:]`
			`if scheme in uses_query:`
Easy optimizations of urlparse for the common case of parsing an http URL. 1. use dict.get instead of try/except KeyError 2. if the url scheme is 'http' then avoid the series of 'if var in [someseq]:'. instead, inline all of the code. 3. find = string.find 1998-09-02 18:53:16 -03:00			`i = find(url, '?')`
New tty/pty modules by Steen; new urlparser. 1994-09-12 07:36:35 -03:00			`if i >= 0:`
			`url, query = url[:i], url[i+1:]`
			`if scheme in uses_params:`
Easy optimizations of urlparse for the common case of parsing an http URL. 1. use dict.get instead of try/except KeyError 2. if the url scheme is 'http' then avoid the series of 'if var in [someseq]:'. instead, inline all of the code. 3. find = string.find 1998-09-02 18:53:16 -03:00			`i = find(url, ';')`
New tty/pty modules by Steen; new urlparser. 1994-09-12 07:36:35 -03:00			`if i >= 0:`
			`url, params = url[:i], url[i+1:]`
optimizations due to Fred Drake; added urldefrag() function 1996-05-28 20:54:24 -03:00			`tuple = scheme, netloc, url, params, query, fragment`
			`_parse_cache[key] = tuple`
			`return tuple`
New tty/pty modules by Steen; new urlparser. 1994-09-12 07:36:35 -03:00
			`def urlunparse((scheme, netloc, url, params, query, fragment)):`
The third and final doc-string sweep by Ka-Ping Yee. The attached patches update the standard library so that all modules have docstrings beginning with one-line summaries. A new docstring was added to formatter. The docstring for os.py was updated to mention nt, os2, ce in addition to posix, dos, mac. 2000-02-04 11:28:42 -04:00			`"""Put a parsed URL back together again. This may result in a`
			`slightly different, but equivalent URL, if the URL that was parsed`
			`originally had redundant delimiters, e.g. a ? with an empty query`
			`(the draft states that these are equivalent)."""`
Sjoerd Mullender writes: If a filename on Windows starts with \\, it is converted to a URL which starts with ////. If this URL is passed to urlparse.urlparse you get a path that starts with // (and an empty netloc). If you pass the result back to urlparse.urlunparse, you get a URL that starts with //, which is parsed differently by urlparse.urlparse. The fix is to add the (empty) netloc with accompanying slashes if the path in urlunparse starts with //. Do this for all schemes that use a netloc. 1999-03-18 11:10:44 -04:00			`if netloc or (scheme in uses_netloc and url[:2] == '//'):`
subtle changes to relative rurl joins 1995-08-04 01:29:32 -03:00			`if url[:1] != '/': url = '/' + url`
Sjoerd Mullender writes: If a filename on Windows starts with \\, it is converted to a URL which starts with ////. If this URL is passed to urlparse.urlparse you get a path that starts with // (and an empty netloc). If you pass the result back to urlparse.urlunparse, you get a URL that starts with //, which is parsed differently by urlparse.urlparse. The fix is to add the (empty) netloc with accompanying slashes if the path in urlunparse starts with //. Do this for all schemes that use a netloc. 1999-03-18 11:10:44 -04:00			`url = '//' + (netloc or '') + url`
New tty/pty modules by Steen; new urlparser. 1994-09-12 07:36:35 -03:00			`if scheme:`
			`url = scheme + ':' + url`
			`if params:`
			`url = url + ';' + params`
			`if query:`
			`url = url + '?' + query`
			`if fragment:`
			`url = url + '#' + fragment`
			`return url`

fix typo in keyword argument 'allow_frament' should be 'allow_fragment' 1998-08-25 16:45:24 -03:00			`def urljoin(base, url, allow_fragments = 1):`
The third and final doc-string sweep by Ka-Ping Yee. The attached patches update the standard library so that all modules have docstrings beginning with one-line summaries. A new docstring was added to formatter. The docstring for os.py was updated to mention nt, os2, ce in addition to posix, dos, mac. 2000-02-04 11:28:42 -04:00			`"""Join a base URL and a possibly relative URL to form an absolute`
			`interpretation of the latter."""`
New tty/pty modules by Steen; new urlparser. 1994-09-12 07:36:35 -03:00			`if not base:`
			`return url`
			`bscheme, bnetloc, bpath, bparams, bquery, bfragment = \`
fix typo in keyword argument 'allow_frament' should be 'allow_fragment' 1998-08-25 16:45:24 -03:00			`urlparse(base, '', allow_fragments)`
New tty/pty modules by Steen; new urlparser. 1994-09-12 07:36:35 -03:00			`scheme, netloc, path, params, query, fragment = \`
fix typo in keyword argument 'allow_frament' should be 'allow_fragment' 1998-08-25 16:45:24 -03:00			`urlparse(url, bscheme, allow_fragments)`
New tty/pty modules by Steen; new urlparser. 1994-09-12 07:36:35 -03:00			`if scheme != bscheme or scheme not in uses_relative:`
			`return urlunparse((scheme, netloc, path,`
			`params, query, fragment))`
			`if scheme in uses_netloc:`
			`if netloc:`
			`return urlunparse((scheme, netloc, path,`
			`params, query, fragment))`
			`netloc = bnetloc`
			`if path[:1] == '/':`
			`return urlunparse((scheme, netloc, path,`
			`params, query, fragment))`
			`if not path:`
optimizations due to Fred Drake; added urldefrag() function 1996-05-28 20:54:24 -03:00			`return urlunparse((scheme, netloc, bpath,`
			`params, query or bquery, fragment))`
Some cleanup -- don't use splitfields/joinfields, standardize indentation (tabs only), rationalize some code in urljoin... 2000-04-10 14:02:46 -03:00			`segments = split(bpath, '/')[:-1] + split(path, '/')`
			`# XXX The stuff below is bogus in various ways...`
New tty/pty modules by Steen; new urlparser. 1994-09-12 07:36:35 -03:00			`if segments[-1] == '.':`
			`segments[-1] = ''`
			`while '.' in segments:`
			`segments.remove('.')`
			`while 1:`
			`i = 1`
			`n = len(segments) - 1`
			`while i < n:`
			`if segments[i] == '..' and segments[i-1]:`
			`del segments[i-1:i+1]`
			`break`
			`i = i+1`
			`else:`
			`break`
Patch my Marc Lemburg to fix urljoin("/a", "..") and urljoin("/a", "..#1"). 1997-12-03 18:38:56 -04:00			`if len(segments) == 2 and segments[1] == '..' and segments[0] == '':`
			`segments[-1] = ''`
			`elif len(segments) >= 2 and segments[-1] == '..':`
New tty/pty modules by Steen; new urlparser. 1994-09-12 07:36:35 -03:00			`segments[-2:] = ['']`
Some cleanup -- don't use splitfields/joinfields, standardize indentation (tabs only), rationalize some code in urljoin... 2000-04-10 14:02:46 -03:00			`return urlunparse((scheme, netloc, join(segments, '/'),`
New tty/pty modules by Steen; new urlparser. 1994-09-12 07:36:35 -03:00			`params, query, fragment))`

optimizations due to Fred Drake; added urldefrag() function 1996-05-28 20:54:24 -03:00			`def urldefrag(url):`
Some cleanup -- don't use splitfields/joinfields, standardize indentation (tabs only), rationalize some code in urljoin... 2000-04-10 14:02:46 -03:00			`"""Removes any existing fragment from URL.`
optimizations due to Fred Drake; added urldefrag() function 1996-05-28 20:54:24 -03:00
Some cleanup -- don't use splitfields/joinfields, standardize indentation (tabs only), rationalize some code in urljoin... 2000-04-10 14:02:46 -03:00			`Returns a tuple of the defragmented URL and the fragment. If`
			`the URL contained no fragments, the second element is the`
			`empty string.`
			`"""`
			`s, n, p, a, q, frag = urlparse(url)`
			`defrag = urlunparse((s, n, p, a, q, ''))`
			`return defrag, frag`
optimizations due to Fred Drake; added urldefrag() function 1996-05-28 20:54:24 -03:00

New tty/pty modules by Steen; new urlparser. 1994-09-12 07:36:35 -03:00			`test_input = """`
			`http://a/b/c/d`

			`g:h = <URL:g:h>`
			`http:g = <URL:http://a/b/c/g>`
			`http: = <URL:http://a/b/c/d>`
			`g = <URL:http://a/b/c/g>`
			`./g = <URL:http://a/b/c/g>`
			`g/ = <URL:http://a/b/c/g/>`
			`/g = <URL:http://a/g>`
			`//g = <URL:http://g>`
			`?y = <URL:http://a/b/c/d?y>`
			`g?y = <URL:http://a/b/c/g?y>`
			`g?y/./x = <URL:http://a/b/c/g?y/./x>`
			`. = <URL:http://a/b/c/>`
			`./ = <URL:http://a/b/c/>`
			`.. = <URL:http://a/b/>`
			`../ = <URL:http://a/b/>`
			`../g = <URL:http://a/b/g>`
			`../.. = <URL:http://a/>`
			`../../g = <URL:http://a/g>`
			`../../../g = <URL:http://a/../g>`
			`./../g = <URL:http://a/b/g>`
			`./g/. = <URL:http://a/b/c/g/>`
			`/./g = <URL:http://a/./g>`
			`g/./h = <URL:http://a/b/c/g/h>`
			`g/../h = <URL:http://a/b/c/h>`
			`http:g = <URL:http://a/b/c/g>`
			`http: = <URL:http://a/b/c/d>`
Fixed bug in the common-case code for HTTP URLs; it would lose the query, fragment, and/or parameter information. 3 cases added to the test suite to check for this bug. 1999-01-06 18:13:09 -04:00			`http:?y = <URL:http://a/b/c/d?y>`
			`http:g?y = <URL:http://a/b/c/g?y>`
			`http:g?y/./x = <URL:http://a/b/c/g?y/./x>`
New tty/pty modules by Steen; new urlparser. 1994-09-12 07:36:35 -03:00			`"""`
Add XXX comment about a test that doesn't seem right -- no time to explore this now. 1998-12-21 14:24:09 -04:00			`# XXX The result for //g is actually http://g/; is this a problem?`
New tty/pty modules by Steen; new urlparser. 1994-09-12 07:36:35 -03:00
			`def test():`
			`import sys`
			`base = ''`
			`if sys.argv[1:]:`
			`fn = sys.argv[1]`
			`if fn == '-':`
			`fp = sys.stdin`
			`else:`
			`fp = open(fn)`
			`else:`
			`import StringIO`
			`fp = StringIO.StringIO(test_input)`
			`while 1:`
			`line = fp.readline()`
			`if not line: break`
			`words = string.split(line)`
			`if not words:`
			`continue`
			`url = words[0]`
			`parts = urlparse(url)`
			`print '%-10s : %s' % (url, parts)`
			`abs = urljoin(base, url)`
			`if not base:`
			`base = abs`
			`wrapped = '<URL:%s>' % abs`
			`print '%-10s = %s' % (url, wrapped)`
			`if len(words) == 3 and words[1] == '=':`
			`if wrapped != words[2]:`
			`print 'EXPECTED', words[2], '!!!!!!!!!!'`

			`if __name__ == '__main__':`
			`test()`