diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index 762500789f7..717d8f76574 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -1027,6 +1027,17 @@ class UrlParseTestCase(unittest.TestCase): with self.assertRaises(ValueError): urllib.parse.urlsplit(url) + def test_urlsplit_invalid_scheme(self): + base_url = "git+ssh://git@github.com/user/project.git" + illegal_prefixes = "1234567890+-" + illegal_chars = "Å_/" + + self.assertEqual(urllib.parse.urlsplit(base_url).scheme, "git+ssh") + + for prefix in illegal_prefixes + illegal_chars: + split_url = urllib.parse.urlsplit(prefix+base_url) + self.assertEqual(split_url.scheme, "") + class Utility_Tests(unittest.TestCase): """Testcase to test the various utility functions in the urllib.""" # In Python 2 this test class was in test_urllib. diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index ea897c30322..2f8eb23e623 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -73,10 +73,11 @@ uses_fragment = ['', 'ftp', 'hdl', 'http', 'gopher', 'news', 'file', 'prospero'] # Characters valid in scheme names -scheme_chars = ('abcdefghijklmnopqrstuvwxyz' - 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' - '0123456789' - '+-.') +scheme_alpha_chars = frozenset('abcdefghijklmnopqrstuvwxyz' + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ') + +scheme_chars = scheme_alpha_chars.union('0123456789' + '+-.') # XXX: Consider replacing with functools.lru_cache MAX_CACHE_SIZE = 20 @@ -462,8 +463,8 @@ def urlsplit(url, scheme='', allow_fragments=True): clear_cache() netloc = query = fragment = '' i = url.find(':') - if i > 0: - for c in url[:i]: + if i > 0 and url[0] in scheme_alpha_chars: + for c in url[1:i]: if c not in scheme_chars: break else: