Patch #1462790: fix urllib2 ProxyHandler for host:port proxies
This commit is contained in:
parent
4eb521e595
commit
720096a6bf
|
@ -13,8 +13,7 @@ from urllib2 import Request, OpenerDirector
|
||||||
# parse_keqv_list, parse_http_list (I'm leaving this for Anthony Baxter
|
# parse_keqv_list, parse_http_list (I'm leaving this for Anthony Baxter
|
||||||
# and Greg Stein, since they're doing Digest Authentication)
|
# and Greg Stein, since they're doing Digest Authentication)
|
||||||
# Authentication stuff (ditto)
|
# Authentication stuff (ditto)
|
||||||
# ProxyHandler, CustomProxy, CustomProxyHandler (I don't use a proxy)
|
# CustomProxy, CustomProxyHandler
|
||||||
# GopherHandler (haven't used gopher for a decade or so...)
|
|
||||||
|
|
||||||
class TrivialTests(unittest.TestCase):
|
class TrivialTests(unittest.TestCase):
|
||||||
def test_trivial(self):
|
def test_trivial(self):
|
||||||
|
@ -90,6 +89,7 @@ class FakeMethod:
|
||||||
return self.handle(self.meth_name, self.action, *args)
|
return self.handle(self.meth_name, self.action, *args)
|
||||||
|
|
||||||
class MockHandler:
|
class MockHandler:
|
||||||
|
handler_order = 500
|
||||||
def __init__(self, methods):
|
def __init__(self, methods):
|
||||||
self._define_methods(methods)
|
self._define_methods(methods)
|
||||||
def _define_methods(self, methods):
|
def _define_methods(self, methods):
|
||||||
|
@ -154,7 +154,7 @@ def add_ordered_mock_handlers(opener, meth_spec):
|
||||||
for meths in meth_spec:
|
for meths in meth_spec:
|
||||||
class MockHandlerSubclass(MockHandler): pass
|
class MockHandlerSubclass(MockHandler): pass
|
||||||
h = MockHandlerSubclass(meths)
|
h = MockHandlerSubclass(meths)
|
||||||
h.handler_order = count
|
h.handler_order += count
|
||||||
h.add_parent(opener)
|
h.add_parent(opener)
|
||||||
count = count + 1
|
count = count + 1
|
||||||
handlers.append(h)
|
handlers.append(h)
|
||||||
|
@ -642,6 +642,23 @@ class HandlerTests(unittest.TestCase):
|
||||||
o.open("http://www.example.com/")
|
o.open("http://www.example.com/")
|
||||||
self.assert_(not hh.req.has_header("Cookie"))
|
self.assert_(not hh.req.has_header("Cookie"))
|
||||||
|
|
||||||
|
def test_proxy(self):
|
||||||
|
o = OpenerDirector()
|
||||||
|
ph = urllib2.ProxyHandler(dict(http="proxy.example.com:3128"))
|
||||||
|
o.add_handler(ph)
|
||||||
|
meth_spec = [
|
||||||
|
[("http_open", "return response")]
|
||||||
|
]
|
||||||
|
handlers = add_ordered_mock_handlers(o, meth_spec)
|
||||||
|
|
||||||
|
req = Request("http://acme.example.com/")
|
||||||
|
self.assertEqual(req.get_host(), "acme.example.com")
|
||||||
|
r = o.open(req)
|
||||||
|
self.assertEqual(req.get_host(), "proxy.example.com:3128")
|
||||||
|
|
||||||
|
self.assertEqual([(handlers[0], "http_open")],
|
||||||
|
[tup[0:2] for tup in o.calls])
|
||||||
|
|
||||||
|
|
||||||
class MiscTests(unittest.TestCase):
|
class MiscTests(unittest.TestCase):
|
||||||
|
|
||||||
|
@ -827,6 +844,7 @@ class NetworkTests(unittest.TestCase):
|
||||||
|
|
||||||
|
|
||||||
def test_main(verbose=None):
|
def test_main(verbose=None):
|
||||||
|
test_support.run_doctest(urllib2, verbose)
|
||||||
tests = (TrivialTests,
|
tests = (TrivialTests,
|
||||||
OpenerDirectorTests,
|
OpenerDirectorTests,
|
||||||
HandlerTests,
|
HandlerTests,
|
||||||
|
|
108
Lib/urllib2.py
108
Lib/urllib2.py
|
@ -119,7 +119,8 @@ from urllib import (unwrap, unquote, splittype, splithost, quote,
|
||||||
# support for FileHandler, proxies via environment variables
|
# support for FileHandler, proxies via environment variables
|
||||||
from urllib import localhost, url2pathname, getproxies
|
from urllib import localhost, url2pathname, getproxies
|
||||||
|
|
||||||
__version__ = "2.5"
|
# used in User-Agent header sent
|
||||||
|
__version__ = sys.version[:3]
|
||||||
|
|
||||||
_opener = None
|
_opener = None
|
||||||
def urlopen(url, data=None):
|
def urlopen(url, data=None):
|
||||||
|
@ -563,6 +564,80 @@ class HTTPRedirectHandler(BaseHandler):
|
||||||
"lead to an infinite loop.\n" \
|
"lead to an infinite loop.\n" \
|
||||||
"The last 30x error message was:\n"
|
"The last 30x error message was:\n"
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_proxy(proxy):
|
||||||
|
"""Return (scheme, user, password, host/port) given a URL or an authority.
|
||||||
|
|
||||||
|
If a URL is supplied, it must have an authority (host:port) component.
|
||||||
|
According to RFC 3986, having an authority component means the URL must
|
||||||
|
have two slashes after the scheme:
|
||||||
|
|
||||||
|
>>> _parse_proxy('file:/ftp.example.com/')
|
||||||
|
Traceback (most recent call last):
|
||||||
|
ValueError: proxy URL with no authority: 'file:/ftp.example.com/'
|
||||||
|
|
||||||
|
The first three items of the returned tuple may be None.
|
||||||
|
|
||||||
|
Examples of authority parsing:
|
||||||
|
|
||||||
|
>>> _parse_proxy('proxy.example.com')
|
||||||
|
(None, None, None, 'proxy.example.com')
|
||||||
|
>>> _parse_proxy('proxy.example.com:3128')
|
||||||
|
(None, None, None, 'proxy.example.com:3128')
|
||||||
|
|
||||||
|
The authority component may optionally include userinfo (assumed to be
|
||||||
|
username:password):
|
||||||
|
|
||||||
|
>>> _parse_proxy('joe:password@proxy.example.com')
|
||||||
|
(None, 'joe', 'password', 'proxy.example.com')
|
||||||
|
>>> _parse_proxy('joe:password@proxy.example.com:3128')
|
||||||
|
(None, 'joe', 'password', 'proxy.example.com:3128')
|
||||||
|
|
||||||
|
Same examples, but with URLs instead:
|
||||||
|
|
||||||
|
>>> _parse_proxy('http://proxy.example.com/')
|
||||||
|
('http', None, None, 'proxy.example.com')
|
||||||
|
>>> _parse_proxy('http://proxy.example.com:3128/')
|
||||||
|
('http', None, None, 'proxy.example.com:3128')
|
||||||
|
>>> _parse_proxy('http://joe:password@proxy.example.com/')
|
||||||
|
('http', 'joe', 'password', 'proxy.example.com')
|
||||||
|
>>> _parse_proxy('http://joe:password@proxy.example.com:3128')
|
||||||
|
('http', 'joe', 'password', 'proxy.example.com:3128')
|
||||||
|
|
||||||
|
Everything after the authority is ignored:
|
||||||
|
|
||||||
|
>>> _parse_proxy('ftp://joe:password@proxy.example.com/rubbish:3128')
|
||||||
|
('ftp', 'joe', 'password', 'proxy.example.com')
|
||||||
|
|
||||||
|
Test for no trailing '/' case:
|
||||||
|
|
||||||
|
>>> _parse_proxy('http://joe:password@proxy.example.com')
|
||||||
|
('http', 'joe', 'password', 'proxy.example.com')
|
||||||
|
|
||||||
|
"""
|
||||||
|
from urlparse import _splitnetloc
|
||||||
|
scheme, r_scheme = splittype(proxy)
|
||||||
|
if not r_scheme.startswith("/"):
|
||||||
|
# authority
|
||||||
|
scheme = None
|
||||||
|
authority = proxy
|
||||||
|
else:
|
||||||
|
# URL
|
||||||
|
if not r_scheme.startswith("//"):
|
||||||
|
raise ValueError("proxy URL with no authority: %r" % proxy)
|
||||||
|
# We have an authority, so for RFC 3986-compliant URLs (by ss 3.
|
||||||
|
# and 3.3.), path is empty or starts with '/'
|
||||||
|
end = r_scheme.find("/", 2)
|
||||||
|
if end == -1:
|
||||||
|
end = None
|
||||||
|
authority = r_scheme[2:end]
|
||||||
|
userinfo, hostport = splituser(authority)
|
||||||
|
if userinfo is not None:
|
||||||
|
user, password = splitpasswd(userinfo)
|
||||||
|
else:
|
||||||
|
user = password = None
|
||||||
|
return scheme, user, password, hostport
|
||||||
|
|
||||||
class ProxyHandler(BaseHandler):
|
class ProxyHandler(BaseHandler):
|
||||||
# Proxies must be in front
|
# Proxies must be in front
|
||||||
handler_order = 100
|
handler_order = 100
|
||||||
|
@ -579,30 +654,25 @@ class ProxyHandler(BaseHandler):
|
||||||
|
|
||||||
def proxy_open(self, req, proxy, type):
|
def proxy_open(self, req, proxy, type):
|
||||||
orig_type = req.get_type()
|
orig_type = req.get_type()
|
||||||
type, r_type = splittype(proxy)
|
proxy_type, user, password, hostport = _parse_proxy(proxy)
|
||||||
if not type or r_type.isdigit():
|
if proxy_type is None:
|
||||||
# proxy is specified without protocol
|
proxy_type = orig_type
|
||||||
type = orig_type
|
|
||||||
host = proxy
|
|
||||||
else:
|
|
||||||
host, r_host = splithost(r_type)
|
|
||||||
user_pass, host = splituser(host)
|
|
||||||
user, password = splitpasswd(user_pass)
|
|
||||||
if user and password:
|
if user and password:
|
||||||
user, password = user_pass.split(':', 1)
|
user_pass = '%s:%s' % (unquote(user), unquote(password))
|
||||||
user_pass = base64.encodestring('%s:%s' % (unquote(user),
|
creds = base64.encodestring(user_pass).strip()
|
||||||
unquote(password))).strip()
|
req.add_header('Proxy-authorization', 'Basic ' + creds)
|
||||||
req.add_header('Proxy-authorization', 'Basic ' + user_pass)
|
hostport = unquote(hostport)
|
||||||
host = unquote(host)
|
req.set_proxy(hostport, proxy_type)
|
||||||
req.set_proxy(host, type)
|
if orig_type == proxy_type:
|
||||||
if orig_type == type:
|
|
||||||
# let other handlers take care of it
|
# let other handlers take care of it
|
||||||
# XXX this only makes sense if the proxy is before the
|
|
||||||
# other handlers
|
|
||||||
return None
|
return None
|
||||||
else:
|
else:
|
||||||
# need to start over, because the other handlers don't
|
# need to start over, because the other handlers don't
|
||||||
# grok the proxy's URL type
|
# grok the proxy's URL type
|
||||||
|
# e.g. if we have a constructor arg proxies like so:
|
||||||
|
# {'http': 'ftp://proxy.example.com'}, we may end up turning
|
||||||
|
# a request for http://acme.example.com/a into one for
|
||||||
|
# ftp://proxy.example.com/a
|
||||||
return self.parent.open(req)
|
return self.parent.open(req)
|
||||||
|
|
||||||
# feature suggested by Duncan Booth
|
# feature suggested by Duncan Booth
|
||||||
|
|
Loading…
Reference in New Issue