From db232dc86a9a9992b7f0e9bdbc27426cc04ccf52 Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Sun, 20 Aug 2006 13:15:43 +0000 Subject: [PATCH] Patch #1542948: fix urllib2 header casing issue. With new test. (backport from rev. 51416) --- Lib/test/test_urllib2.py | 77 ++++++++++++++++++++++++++++++++++++---- Lib/urllib2.py | 28 ++++++++------- Misc/NEWS | 4 +-- 3 files changed, 87 insertions(+), 22 deletions(-) diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py index 67218b81c19..5ca760ef4bf 100644 --- a/Lib/test/test_urllib2.py +++ b/Lib/test/test_urllib2.py @@ -46,6 +46,69 @@ class TrivialTests(unittest.TestCase): self.assertEquals(urllib2.parse_http_list(string), list) +def test_request_headers_dict(): + """ + The Request.headers dictionary is not a documented interface. It should + stay that way, because the complete set of headers are only accessible + through the .get_header(), .has_header(), .header_items() interface. + However, .headers pre-dates those methods, and so real code will be using + the dictionary. + + The introduction in 2.4 of those methods was a mistake for the same reason: + code that previously saw all (urllib2 user)-provided headers in .headers + now sees only a subset (and the function interface is ugly and incomplete). + A better change would have been to replace .headers dict with a dict + subclass (or UserDict.DictMixin instance?) that preserved the .headers + interface and also provided access to the "unredirected" headers. It's + probably too late to fix that, though. + + + Check .capitalize() case normalization: + + >>> url = "http://example.com" + >>> Request(url, headers={"Spam-eggs": "blah"}).headers["Spam-eggs"] + 'blah' + >>> Request(url, headers={"spam-EggS": "blah"}).headers["Spam-eggs"] + 'blah' + + Currently, Request(url, "Spam-eggs").headers["Spam-Eggs"] raises KeyError, + but that could be changed in future. + + """ + +def test_request_headers_methods(): + """ + Note the case normalization of header names here, to .capitalize()-case. + This should be preserved for backwards-compatibility. (In the HTTP case, + normalization to .title()-case is done by urllib2 before sending headers to + httplib). + + >>> url = "http://example.com" + >>> r = Request(url, headers={"Spam-eggs": "blah"}) + >>> r.has_header("Spam-eggs") + True + >>> r.header_items() + [('Spam-eggs', 'blah')] + >>> r.add_header("Foo-Bar", "baz") + >>> items = r.header_items() + >>> items.sort() + >>> items + [('Foo-bar', 'baz'), ('Spam-eggs', 'blah')] + + Note that e.g. r.has_header("spam-EggS") is currently False, and + r.get_header("spam-EggS") returns None, but that could be changed in + future. + + >>> r.has_header("Not-there") + False + >>> print r.get_header("Not-there") + None + >>> r.get_header("Not-there", "default") + 'default' + + """ + + def test_password_manager(self): """ >>> mgr = urllib2.HTTPPasswordMgr() @@ -676,11 +739,11 @@ class HandlerTests(unittest.TestCase): r = MockResponse(200, "OK", {}, "") newreq = h.do_request_(req) if data is None: # GET - self.assert_("Content-Length" not in req.unredirected_hdrs) - self.assert_("Content-Type" not in req.unredirected_hdrs) + self.assert_("Content-length" not in req.unredirected_hdrs) + self.assert_("Content-type" not in req.unredirected_hdrs) else: # POST - self.assertEqual(req.unredirected_hdrs["Content-Length"], "0") - self.assertEqual(req.unredirected_hdrs["Content-Type"], + self.assertEqual(req.unredirected_hdrs["Content-length"], "0") + self.assertEqual(req.unredirected_hdrs["Content-type"], "application/x-www-form-urlencoded") # XXX the details of Host could be better tested self.assertEqual(req.unredirected_hdrs["Host"], "example.com") @@ -692,8 +755,8 @@ class HandlerTests(unittest.TestCase): req.add_unredirected_header("Host", "baz") req.add_unredirected_header("Spam", "foo") newreq = h.do_request_(req) - self.assertEqual(req.unredirected_hdrs["Content-Length"], "foo") - self.assertEqual(req.unredirected_hdrs["Content-Type"], "bar") + self.assertEqual(req.unredirected_hdrs["Content-length"], "foo") + self.assertEqual(req.unredirected_hdrs["Content-type"], "bar") self.assertEqual(req.unredirected_hdrs["Host"], "baz") self.assertEqual(req.unredirected_hdrs["Spam"], "foo") @@ -847,7 +910,7 @@ class HandlerTests(unittest.TestCase): 407, 'Proxy-Authenticate: Basic realm="%s"\r\n\r\n' % realm) opener.add_handler(auth_handler) opener.add_handler(http_handler) - self._test_basic_auth(opener, auth_handler, "Proxy-Authorization", + self._test_basic_auth(opener, auth_handler, "Proxy-authorization", realm, http_handler, password_manager, "http://acme.example.com:3128/protected", "proxy.example.com:3128", diff --git a/Lib/urllib2.py b/Lib/urllib2.py index 6ee9e2c8ee7..3459f0d230b 100644 --- a/Lib/urllib2.py +++ b/Lib/urllib2.py @@ -263,11 +263,11 @@ class Request: def add_header(self, key, val): # useful for something like authentication - self.headers[key.title()] = val + self.headers[key.capitalize()] = val def add_unredirected_header(self, key, val): # will not be added to a redirected request - self.unredirected_hdrs[key.title()] = val + self.unredirected_hdrs[key.capitalize()] = val def has_header(self, header_name): return (header_name in self.headers or @@ -286,7 +286,7 @@ class Request: class OpenerDirector: def __init__(self): client_version = "Python-urllib/%s" % __version__ - self.addheaders = [('User-Agent', client_version)] + self.addheaders = [('User-agent', client_version)] # manage the individual handlers self.handlers = [] self.handle_open = {} @@ -675,7 +675,7 @@ class ProxyHandler(BaseHandler): if user and password: user_pass = '%s:%s' % (unquote(user), unquote(password)) creds = base64.encodestring(user_pass).strip() - req.add_header('Proxy-Authorization', 'Basic ' + creds) + req.add_header('Proxy-authorization', 'Basic ' + creds) hostport = unquote(hostport) req.set_proxy(hostport, proxy_type) if orig_type == proxy_type: @@ -819,7 +819,7 @@ class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler): class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler): - auth_header = 'Proxy-Authorization' + auth_header = 'Proxy-authorization' def http_error_407(self, req, fp, code, msg, headers): # http_error_auth_reqed requires that there is no userinfo component in @@ -1022,20 +1022,20 @@ class AbstractHTTPHandler(BaseHandler): if request.has_data(): # POST data = request.get_data() - if not request.has_header('Content-Type'): + if not request.has_header('Content-type'): request.add_unredirected_header( - 'Content-Type', + 'Content-type', 'application/x-www-form-urlencoded') - if not request.has_header('Content-Length'): + if not request.has_header('Content-length'): request.add_unredirected_header( - 'Content-Length', '%d' % len(data)) + 'Content-length', '%d' % len(data)) scheme, sel = splittype(request.get_selector()) sel_host, sel_path = splithost(sel) if not request.has_header('Host'): request.add_unredirected_header('Host', sel_host or host) for name, value in self.parent.addheaders: - name = name.title() + name = name.capitalize() if not request.has_header(name): request.add_unredirected_header(name, value) @@ -1067,6 +1067,8 @@ class AbstractHTTPHandler(BaseHandler): # So make sure the connection gets closed after the (only) # request. headers["Connection"] = "close" + headers = dict( + (name.title(), val) for name, val in headers.items()) try: h.request(req.get_method(), req.get_selector(), req.data, headers) r = h.getresponse() @@ -1217,7 +1219,7 @@ class FileHandler(BaseHandler): modified = email.Utils.formatdate(stats.st_mtime, usegmt=True) mtype = mimetypes.guess_type(file)[0] headers = mimetools.Message(StringIO( - 'Content-Type: %s\nContent-Length: %d\nLast-Modified: %s\n' % + 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' % (mtype or 'text/plain', size, modified))) if host: host, port = splitport(host) @@ -1272,9 +1274,9 @@ class FTPHandler(BaseHandler): headers = "" mtype = mimetypes.guess_type(req.get_full_url())[0] if mtype: - headers += "Content-Type: %s\n" % mtype + headers += "Content-type: %s\n" % mtype if retrlen is not None and retrlen >= 0: - headers += "Content-Length: %d\n" % retrlen + headers += "Content-length: %d\n" % retrlen sf = StringIO(headers) headers = mimetools.Message(sf) return addinfourl(fp, headers, req.get_full_url()) diff --git a/Misc/NEWS b/Misc/NEWS index 872563d5cd9..537c4b1508d 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -290,8 +290,8 @@ Library - Bug #978833: Really close underlying socket in _socketobject.close. -- Bug #1459963: urllib and urllib2 now normalize HTTP header names correctly - with title(). +- Bug #1459963: urllib and urllib2 now normalize HTTP header names with + title(). - Patch #1525766: In pkgutil.walk_packages, correctly pass the onerror callback to recursive calls and call it with the failing package name.