Fixed basejoin. There were two main problems:
- basejoin('some/url', '#name') would strip the last component of some/url and resturn some/#name. - basejoin('file.html', 'relative/path') would return something like file:/relative/path, making a relative path into an absolute one. These bugs are fixed by some drastic changes. No scheme is added when none is present (i.e. it works as replacement for posix.joinpath). If a scheme is present in the second argument, it is returned unprocessed. No hostname are added in this case. If no scheme is present, the scheme of the first argument, if present, is used. The algorithm is commented profusely. Also fixed a typo in a comment.
This commit is contained in:
parent
17914f4e17
commit
e0371b8415
|
@ -482,25 +482,49 @@ class addinfo(addbase):
|
||||||
|
|
||||||
def basejoin(base, url):
|
def basejoin(base, url):
|
||||||
type, path = splittype(url)
|
type, path = splittype(url)
|
||||||
|
if type:
|
||||||
|
# if url is complete (i.e., it contains a type), return it
|
||||||
|
return url
|
||||||
host, path = splithost(path)
|
host, path = splithost(path)
|
||||||
if type and host: return url
|
type, basepath = splittype(base) # inherit type from base
|
||||||
basetype, basepath = splittype(base)
|
if host:
|
||||||
basehost, basepath = splithost(basepath)
|
# if url contains host, just inherit type
|
||||||
basepath, basetag = splittag(basepath)
|
if type: return type + '://' + host + path
|
||||||
basepath, basequery = splitquery(basepath)
|
else:
|
||||||
if not type: type = basetype or 'file'
|
# no type inherited, so url must have started with //
|
||||||
|
# just return it
|
||||||
|
return url
|
||||||
|
host, basepath = splithost(basepath) # inherit host
|
||||||
|
basepath, basetag = splittag(basepath) # remove extraneuous cruft
|
||||||
|
basepath, basequery = splitquery(basepath) # idem
|
||||||
if path[:1] != '/':
|
if path[:1] != '/':
|
||||||
|
# non-absolute path name
|
||||||
|
if path[:1] in ('#', '?'):
|
||||||
|
# path is just a tag or query, attach to basepath
|
||||||
|
i = len(basepath)
|
||||||
|
else:
|
||||||
|
# else replace last component
|
||||||
i = string.rfind(basepath, '/')
|
i = string.rfind(basepath, '/')
|
||||||
if i < 0: basepath = '/'
|
if i < 0:
|
||||||
else: basepath = basepath[:i+1]
|
# basepath not absolute
|
||||||
|
if host:
|
||||||
|
# host present, make absolute
|
||||||
|
basepath = '/'
|
||||||
|
else:
|
||||||
|
# else keep non-absolute
|
||||||
|
basepath = ''
|
||||||
|
else:
|
||||||
|
# remove last file component
|
||||||
|
basepath = basepath[:i+1]
|
||||||
path = basepath + path
|
path = basepath + path
|
||||||
if not host: host = basehost
|
if type and host: return type + '://' + host + path
|
||||||
if host: return type + '://' + host + path
|
elif type: return type + ':' + path
|
||||||
else: return type + ':' + path
|
elif host: return '//' + host + path # don't know what this means
|
||||||
|
else: return path
|
||||||
|
|
||||||
|
|
||||||
# Utilities to parse URLs (most of these return None for missing parts):
|
# Utilities to parse URLs (most of these return None for missing parts):
|
||||||
# unwrap('<URL:type//host/path>') --> 'type//host/path'
|
# unwrap('<URL:type://host/path>') --> 'type://host/path'
|
||||||
# splittype('type:opaquestring') --> 'type', 'opaquestring'
|
# splittype('type:opaquestring') --> 'type', 'opaquestring'
|
||||||
# splithost('//host[:port]/path') --> 'host[:port]', '/path'
|
# splithost('//host[:port]/path') --> 'host[:port]', '/path'
|
||||||
# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
|
# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
|
||||||
|
|
Loading…
Reference in New Issue