From 4c7f995e805f4fddcf54b90f35ea30c7e26a4a95 Mon Sep 17 00:00:00 2001 From: R David Murray Date: Thu, 16 Apr 2015 16:36:18 -0400 Subject: [PATCH] #7159: generalize urllib prior auth support. This fix is a superset of the functionality introduced by the issue #19494 enhancement, and supersedes that fix. Instead of a new handler, we have a new password manager that tracks whether we should send the auth for a given uri. This allows us to say "always send", satisfying #19494, or track that we've succeeded in auth and send the creds right away on every *subsequent* request. The support for using the password manager is added to AbstractBasicAuth, which means the proxy handler also now can handle prior auth if passed the new password manager. Patch by Akshit Khurana, docs mostly by me. --- Doc/library/urllib.request.rst | 72 +++++++++++++++++++---- Doc/whatsnew/3.5.rst | 12 ++-- Lib/test/test_urllib2.py | 104 ++++++++++++++++++++++++++++----- Lib/urllib/request.py | 78 +++++++++++++++++++------ Misc/ACKS | 1 + Misc/NEWS | 4 ++ 6 files changed, 220 insertions(+), 51 deletions(-) diff --git a/Doc/library/urllib.request.rst b/Doc/library/urllib.request.rst index 82fc1b28125..1ae3e434617 100644 --- a/Doc/library/urllib.request.rst +++ b/Doc/library/urllib.request.rst @@ -283,13 +283,36 @@ The following classes are provided: fits. +.. class:: HTTPPasswordMgrWithPriorAuth() + + A variant of :class:`HTTPPasswordMgrWithDefaultRealm` that also has a + database of ``uri -> is_authenticated`` mappings. Can be used by a + BasicAuth handler to determine when to send authentication credentials + immediately instead of waiting for a ``401`` response first. + + .. versionadded:: 3.5 + + .. class:: AbstractBasicAuthHandler(password_mgr=None) This is a mixin class that helps with HTTP authentication, both to the remote host and to a proxy. *password_mgr*, if given, should be something that is compatible with :class:`HTTPPasswordMgr`; refer to section :ref:`http-password-mgr` for information on the interface that must be - supported. + supported. If *passwd_mgr* also provides ``is_authenticated`` and + ``update_authenticated`` methods (see + :ref:`http-password-mgr-with-prior-auth`), then the handler will use the + ``is_authenticated`` result for a given URI to determine whether or not to + send authentication credentials with the request. If ``is_authenticated`` + returns ``True`` for the URI, credentials are sent. If ``is_authenticated + is ``False``, credentials are not sent, and then if a ``401`` response is + received the request is re-sent with the authentication credentials. If + authentication succeeds, ``update_authenticated`` is called to set + ``is_authenticated`` ``True`` for the URI, so that subsequent requests to + the URI or any of its super-URIs will automatically include the + authentication credentials. + + .. versionadded:: 3.5: added ``is_authenticated`` support. .. class:: HTTPBasicAuthHandler(password_mgr=None) @@ -301,17 +324,6 @@ The following classes are provided: presented with a wrong Authentication scheme. -.. class:: HTTPBasicPriorAuthHandler(password_mgr=None) - - A variant of :class:`HTTPBasicAuthHandler` which automatically sends - authorization credentials with the first request, rather than waiting to - first receive a HTTP 401 "Unauthorised" error response. This allows - authentication to sites that don't provide a 401 response when receiving - a request without an Authorization header. Aside from this difference, - this behaves exactly as :class:`HTTPBasicAuthHandler`. - - .. versionadded:: 3.5 - .. class:: ProxyBasicAuthHandler(password_mgr=None) Handle authentication with the proxy. *password_mgr*, if given, should be @@ -852,6 +864,42 @@ These methods are available on :class:`HTTPPasswordMgr` and searched if the given *realm* has no matching user/password. +.. _http-password-mgr-with-prior-auth: + +HTTPPasswordMgrWithPriorAuth Objects +------------------------------------ + +This password manager extends :class:`HTTPPasswordMgrWithDefaultRealm` to support +tracking URIs for which authentication credentials should always be sent. + + +.. method:: HTTPPasswordMgrWithPriorAuth.add_password(realm, uri, user, \ + passwd, is_authenticated=False) + + *realm*, *uri*, *user*, *passwd* are as for + :meth:`HTTPPasswordMgr.add_password`. *is_authenticated* sets the initial + value of the ``is_authenticated`` flag for the given URI or list of URIs. + If *is_authenticated* is specified as ``True``, *realm* is ignored. + + +.. method:: HTTPPasswordMgr.find_user_password(realm, authuri) + + Same as for :class:`HTTPPasswordMgrWithDefaultRealm` objects + + +.. method:: HTTPPasswordMgrWithPriorAuth.update_authenticated(self, uri, \ + is_authenticated=False) + + Update the ``is_authenticated`` flag for the given *uri* or list + of URIs. + + +.. method:: HTTPPasswordMgrWithPriorAuth.is_authenticated(self, authuri) + + Returns the current state of the ``is_authenticated`` flag for + the given URI. + + .. _abstract-basic-auth-handler: AbstractBasicAuthHandler Objects diff --git a/Doc/whatsnew/3.5.rst b/Doc/whatsnew/3.5.rst index 44fc8cf95ca..65119edde9f 100644 --- a/Doc/whatsnew/3.5.rst +++ b/Doc/whatsnew/3.5.rst @@ -520,11 +520,13 @@ time urllib ------ -* A new :class:`urllib.request.HTTPBasicPriorAuthHandler` allows HTTP Basic - Authentication credentials to be sent unconditionally with the first HTTP - request, rather than waiting for a HTTP 401 Unauthorized response from the - server. - (Contributed by Matej Cepl in :issue:`19494`.) +* A new :class:`~urllib.request.HTTPPasswordMgrWithPriorAuth` allows HTTP Basic + Authentication credentials to be managed so as to eliminate unnecessary + ``401`` response handling, or to unconditionally send credentials + on the first request in order to communicate with servers that return a + ``404`` response instead of a ``401`` if the ``Authorization`` header is not + sent. (Contributed by Matej Cepl in :issue:`19494` and Akshit Khurana in + :issue:`7159`.) wsgiref ------- diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py index 36d7e872187..3819d4b1405 100644 --- a/Lib/test/test_urllib2.py +++ b/Lib/test/test_urllib2.py @@ -11,7 +11,9 @@ import sys import urllib.request # The proxy bypass method imported below has logic specific to the OSX # proxy config data structure but is testable on all platforms. -from urllib.request import Request, OpenerDirector, _parse_proxy, _proxy_bypass_macosx_sysconf +from urllib.request import (Request, OpenerDirector, HTTPBasicAuthHandler, + HTTPPasswordMgrWithPriorAuth, _parse_proxy, + _proxy_bypass_macosx_sysconf) from urllib.parse import urlparse import urllib.error import http.client @@ -447,6 +449,25 @@ class MockHTTPSHandler(urllib.request.AbstractHTTPHandler): def https_open(self, req): return self.do_open(self.httpconn, req) + +class MockHTTPHandlerCheckAuth(urllib.request.BaseHandler): + # useful for testing auth + # sends supplied code response + # checks if auth header is specified in request + def __init__(self, code): + self.code = code + self.has_auth_header = False + + def reset(self): + self.has_auth_header = False + + def http_open(self, req): + if req.has_header('Authorization'): + self.has_auth_header = True + name = http.client.responses[self.code] + return MockResponse(self.code, name, MockFile(), "", req.get_full_url()) + + class MockPasswordManager: def add_password(self, realm, uri, user, password): self.realm = realm @@ -1395,6 +1416,72 @@ class HandlerTests(unittest.TestCase): self.assertEqual(len(http_handler.requests), 1) self.assertFalse(http_handler.requests[0].has_header(auth_header)) + def test_basic_prior_auth_auto_send(self): + # Assume already authenticated if is_authenticated=True + # for APIs like Github that don't return 401 + + user, password = "wile", "coyote" + request_url = "http://acme.example.com/protected" + + http_handler = MockHTTPHandlerCheckAuth(200) + + pwd_manager = HTTPPasswordMgrWithPriorAuth() + auth_prior_handler = HTTPBasicAuthHandler(pwd_manager) + auth_prior_handler.add_password( + None, request_url, user, password, is_authenticated=True) + + is_auth = pwd_manager.is_authenticated(request_url) + self.assertTrue(is_auth) + + opener = OpenerDirector() + opener.add_handler(auth_prior_handler) + opener.add_handler(http_handler) + + opener.open(request_url) + + # expect request to be sent with auth header + self.assertTrue(http_handler.has_auth_header) + + def test_basic_prior_auth_send_after_first_success(self): + # Auto send auth header after authentication is successful once + + user, password = 'wile', 'coyote' + request_url = 'http://acme.example.com/protected' + realm = 'ACME' + + pwd_manager = HTTPPasswordMgrWithPriorAuth() + auth_prior_handler = HTTPBasicAuthHandler(pwd_manager) + auth_prior_handler.add_password(realm, request_url, user, password) + + is_auth = pwd_manager.is_authenticated(request_url) + self.assertFalse(is_auth) + + opener = OpenerDirector() + opener.add_handler(auth_prior_handler) + + http_handler = MockHTTPHandler( + 401, 'WWW-Authenticate: Basic realm="%s"\r\n\r\n' % None) + opener.add_handler(http_handler) + + opener.open(request_url) + + is_auth = pwd_manager.is_authenticated(request_url) + self.assertTrue(is_auth) + + http_handler = MockHTTPHandlerCheckAuth(200) + self.assertFalse(http_handler.has_auth_header) + + opener = OpenerDirector() + opener.add_handler(auth_prior_handler) + opener.add_handler(http_handler) + + # After getting 200 from MockHTTPHandler + # Next request sends header in the first request + opener.open(request_url) + + # expect request to be sent with auth header + self.assertTrue(http_handler.has_auth_header) + def test_http_closed(self): """Test the connection is cleaned up when the response is closed""" for (transfer, data) in ( @@ -1422,21 +1509,6 @@ class HandlerTests(unittest.TestCase): handler.do_open(conn, req) self.assertTrue(conn.fakesock.closed, "Connection not closed") - def test_auth_prior_handler(self): - pwd_manager = MockPasswordManager() - pwd_manager.add_password(None, 'https://example.com', - 'somebody', 'verysecret') - auth_prior_handler = urllib.request.HTTPBasicPriorAuthHandler( - pwd_manager) - http_hand = MockHTTPSHandler() - - opener = OpenerDirector() - opener.add_handler(http_hand) - opener.add_handler(auth_prior_handler) - - req = Request("https://example.com") - opener.open(req) - self.assertNotIn('Authorization', http_hand.httpconn.req_headers) class MiscTests(unittest.TestCase): diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index 2e436ecfda9..eada0a9132a 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -120,9 +120,10 @@ __all__ = [ 'Request', 'OpenerDirector', 'BaseHandler', 'HTTPDefaultErrorHandler', 'HTTPRedirectHandler', 'HTTPCookieProcessor', 'ProxyHandler', 'HTTPPasswordMgr', 'HTTPPasswordMgrWithDefaultRealm', - 'AbstractBasicAuthHandler', 'HTTPBasicAuthHandler', 'ProxyBasicAuthHandler', - 'AbstractDigestAuthHandler', 'HTTPDigestAuthHandler', 'ProxyDigestAuthHandler', - 'HTTPHandler', 'FileHandler', 'FTPHandler', 'CacheFTPHandler', 'DataHandler', + 'HTTPPasswordMgrWithPriorAuth', 'AbstractBasicAuthHandler', + 'HTTPBasicAuthHandler', 'ProxyBasicAuthHandler', 'AbstractDigestAuthHandler', + 'HTTPDigestAuthHandler', 'ProxyDigestAuthHandler', 'HTTPHandler', + 'FileHandler', 'FTPHandler', 'CacheFTPHandler', 'DataHandler', 'UnknownHandler', 'HTTPErrorProcessor', # Functions 'urlopen', 'install_opener', 'build_opener', @@ -835,6 +836,37 @@ class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr): return HTTPPasswordMgr.find_user_password(self, None, authuri) +class HTTPPasswordMgrWithPriorAuth(HTTPPasswordMgrWithDefaultRealm): + + def __init__(self, *args, **kwargs): + self.authenticated = {} + super().__init__(*args, **kwargs) + + def add_password(self, realm, uri, user, passwd, is_authenticated=False): + self.update_authenticated(uri, is_authenticated) + # Add a default for prior auth requests + if realm is not None: + super().add_password(None, uri, user, passwd) + super().add_password(realm, uri, user, passwd) + + def update_authenticated(self, uri, is_authenticated=False): + # uri could be a single URI or a sequence + if isinstance(uri, str): + uri = [uri] + + for default_port in True, False: + for u in uri: + reduced_uri = self.reduce_uri(u, default_port) + self.authenticated[reduced_uri] = is_authenticated + + def is_authenticated(self, authuri): + for default_port in True, False: + reduced_authuri = self.reduce_uri(authuri, default_port) + for uri in self.authenticated: + if self.is_suburi(uri, reduced_authuri): + return self.authenticated[uri] + + class AbstractBasicAuthHandler: # XXX this allows for multiple auth-schemes, but will stupidly pick @@ -889,6 +921,31 @@ class AbstractBasicAuthHandler: else: return None + def http_request(self, req): + if (not hasattr(self.passwd, 'is_authenticated') or + not self.passwd.is_authenticated(req.full_url)): + return req + + if not req.has_header('Authorization'): + user, passwd = self.passwd.find_user_password(None, req.full_url) + credentials = '{0}:{1}'.format(user, passwd).encode() + auth_str = base64.standard_b64encode(credentials).decode() + req.add_unredirected_header('Authorization', + 'Basic {}'.format(auth_str.strip())) + return req + + def http_response(self, req, response): + if hasattr(self.passwd, 'is_authenticated'): + if 200 <= response.code < 300: + self.passwd.update_authenticated(req.full_url, True) + else: + self.passwd.update_authenticated(req.full_url, False) + return response + + https_request = http_request + https_response = http_response + + class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler): @@ -916,21 +973,6 @@ class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler): return response -class HTTPBasicPriorAuthHandler(HTTPBasicAuthHandler): - handler_order = 400 - - def http_request(self, req): - if not req.has_header('Authorization'): - user, passwd = self.passwd.find_user_password(None, req.host) - credentials = '{0}:{1}'.format(user, passwd).encode() - auth_str = base64.standard_b64encode(credentials).decode() - req.add_unredirected_header('Authorization', - 'Basic {}'.format(auth_str.strip())) - return req - - https_request = http_request - - # Return n random bytes. _randombytes = os.urandom diff --git a/Misc/ACKS b/Misc/ACKS index 2ebaa9becac..164180852c2 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -722,6 +722,7 @@ Magnus Kessler Lawrence Kesteloot Vivek Khera Dhiru Kholia +Akshit Khurana Mads Kiilerich Jason Killen Jan Kim diff --git a/Misc/NEWS b/Misc/NEWS index b8d0c2ef42d..c4a35b9810d 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -37,6 +37,10 @@ Core and Builtins Library ------- +- Issue #7159: urllib.request now supports sending auth credentials + automatically after the first 401. This enhancement is a superset of the + enhancement from issue #19494 and supersedes that change. + - Issue #23703: Fix a regression in urljoin() introduced in 901e4e52b20a. Patch by Demian Brecht.