From 6a265f0d0c0a4b3b8fecf4275d49187a384167f4 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sun, 5 Jan 2020 14:14:31 +0200 Subject: [PATCH] bpo-39057: Fix urllib.request.proxy_bypass_environment(). (GH-17619) Ignore leading dots and no longer ignore a trailing newline. --- Lib/test/test_urllib.py | 22 +++++++++++++++++ Lib/urllib/parse.py | 4 ++-- Lib/urllib/request.py | 24 ++++++++++--------- .../2019-12-15-21-47-54.bpo-39057.FOxn-w.rst | 2 ++ 4 files changed, 39 insertions(+), 13 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2019-12-15-21-47-54.bpo-39057.FOxn-w.rst diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index 95c4ecc4dcf..2e82fc7b7b8 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -270,14 +270,36 @@ class ProxyTests(unittest.TestCase): self.assertTrue(bypass('localhost')) self.assertTrue(bypass('LocalHost')) # MixedCase self.assertTrue(bypass('LOCALHOST')) # UPPERCASE + self.assertTrue(bypass('.localhost')) self.assertTrue(bypass('newdomain.com:1234')) + self.assertTrue(bypass('.newdomain.com:1234')) self.assertTrue(bypass('foo.d.o.t')) # issue 29142 + self.assertTrue(bypass('d.o.t')) self.assertTrue(bypass('anotherdomain.com:8888')) + self.assertTrue(bypass('.anotherdomain.com:8888')) self.assertTrue(bypass('www.newdomain.com:1234')) self.assertFalse(bypass('prelocalhost')) self.assertFalse(bypass('newdomain.com')) # no port self.assertFalse(bypass('newdomain.com:1235')) # wrong port + def test_proxy_bypass_environment_always_match(self): + bypass = urllib.request.proxy_bypass_environment + self.env.set('NO_PROXY', '*') + self.assertTrue(bypass('newdomain.com')) + self.assertTrue(bypass('newdomain.com:1234')) + self.env.set('NO_PROXY', '*, anotherdomain.com') + self.assertTrue(bypass('anotherdomain.com')) + self.assertFalse(bypass('newdomain.com')) + self.assertFalse(bypass('newdomain.com:1234')) + + def test_proxy_bypass_environment_newline(self): + bypass = urllib.request.proxy_bypass_environment + self.env.set('NO_PROXY', + 'localhost, anotherdomain.com, newdomain.com:1234') + self.assertFalse(bypass('localhost\n')) + self.assertFalse(bypass('anotherdomain.com:8888\n')) + self.assertFalse(bypass('newdomain.com:1234\n')) + class ProxyTests_withOrderedEnv(unittest.TestCase): diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index 31fd7e16ee7..34d5f95dd79 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -1056,9 +1056,9 @@ def _splitport(host): """splitport('host:port') --> 'host', 'port'.""" global _portprog if _portprog is None: - _portprog = re.compile('(.*):([0-9]*)$', re.DOTALL) + _portprog = re.compile('(.*):([0-9]*)', re.DOTALL) - match = _portprog.match(host) + match = _portprog.fullmatch(host) if match: host, port = match.groups() if port: diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index 39553d809a3..a6d350a97a4 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -2492,24 +2492,26 @@ def proxy_bypass_environment(host, proxies=None): try: no_proxy = proxies['no'] except KeyError: - return 0 + return False # '*' is special case for always bypass if no_proxy == '*': - return 1 + return True + host = host.lower() # strip port off host hostonly, port = _splitport(host) # check if the host ends with any of the DNS suffixes - no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')] - for name in no_proxy_list: + for name in no_proxy.split(','): + name = name.strip() if name: name = name.lstrip('.') # ignore leading dots - name = re.escape(name) - pattern = r'(.+\.)?%s$' % name - if (re.match(pattern, hostonly, re.I) - or re.match(pattern, host, re.I)): - return 1 + name = name.lower() + if hostonly == name or host == name: + return True + name = '.' + name + if hostonly.endswith(name) or host.endswith(name): + return True # otherwise, don't bypass - return 0 + return False # This code tests an OSX specific data structure but is testable on all @@ -2635,7 +2637,7 @@ elif os.name == 'nt': for p in proxyServer.split(';'): protocol, address = p.split('=', 1) # See if address has a type:// prefix - if not re.match('^([^/:]+)://', address): + if not re.match('(?:[^/:]+)://', address): address = '%s://%s' % (protocol, address) proxies[protocol] = address else: diff --git a/Misc/NEWS.d/next/Library/2019-12-15-21-47-54.bpo-39057.FOxn-w.rst b/Misc/NEWS.d/next/Library/2019-12-15-21-47-54.bpo-39057.FOxn-w.rst new file mode 100644 index 00000000000..24a17444b97 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2019-12-15-21-47-54.bpo-39057.FOxn-w.rst @@ -0,0 +1,2 @@ +:func:`urllib.request.proxy_bypass_environment` now ignores leading dots and +no longer ignores a trailing newline.