Fixed basejoin. There were two main problems:

- basejoin('some/url', '#name') would strip the last component of
  some/url and resturn some/#name.
- basejoin('file.html', 'relative/path') would return something like
  file:/relative/path, making a relative path into an absolute one.
These bugs are fixed by some drastic changes.  No scheme is added when
none is present (i.e. it works as replacement for posix.joinpath).
If a scheme is present in the second argument, it is returned
unprocessed.  No hostname are added in this case.  If no scheme is
present, the scheme of the first argument, if present, is used.
The algorithm is commented profusely.
Also fixed a typo in a comment.
This commit is contained in:
Sjoerd Mullender 1995-11-10 10:36:07 +00:00
parent 17914f4e17
commit e0371b8415
1 changed files with 37 additions and 13 deletions

View File

@ -482,25 +482,49 @@ class addinfo(addbase):
def basejoin(base, url): def basejoin(base, url):
type, path = splittype(url) type, path = splittype(url)
if type:
# if url is complete (i.e., it contains a type), return it
return url
host, path = splithost(path) host, path = splithost(path)
if type and host: return url type, basepath = splittype(base) # inherit type from base
basetype, basepath = splittype(base) if host:
basehost, basepath = splithost(basepath) # if url contains host, just inherit type
basepath, basetag = splittag(basepath) if type: return type + '://' + host + path
basepath, basequery = splitquery(basepath) else:
if not type: type = basetype or 'file' # no type inherited, so url must have started with //
# just return it
return url
host, basepath = splithost(basepath) # inherit host
basepath, basetag = splittag(basepath) # remove extraneuous cruft
basepath, basequery = splitquery(basepath) # idem
if path[:1] != '/': if path[:1] != '/':
# non-absolute path name
if path[:1] in ('#', '?'):
# path is just a tag or query, attach to basepath
i = len(basepath)
else:
# else replace last component
i = string.rfind(basepath, '/') i = string.rfind(basepath, '/')
if i < 0: basepath = '/' if i < 0:
else: basepath = basepath[:i+1] # basepath not absolute
if host:
# host present, make absolute
basepath = '/'
else:
# else keep non-absolute
basepath = ''
else:
# remove last file component
basepath = basepath[:i+1]
path = basepath + path path = basepath + path
if not host: host = basehost if type and host: return type + '://' + host + path
if host: return type + '://' + host + path elif type: return type + ':' + path
else: return type + ':' + path elif host: return '//' + host + path # don't know what this means
else: return path
# Utilities to parse URLs (most of these return None for missing parts): # Utilities to parse URLs (most of these return None for missing parts):
# unwrap('<URL:type//host/path>') --> 'type//host/path' # unwrap('<URL:type://host/path>') --> 'type://host/path'
# splittype('type:opaquestring') --> 'type', 'opaquestring' # splittype('type:opaquestring') --> 'type', 'opaquestring'
# splithost('//host[:port]/path') --> 'host[:port]', '/path' # splithost('//host[:port]/path') --> 'host[:port]', '/path'
# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]' # splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'