mirror of https://github.com/python/cpython
Issue #17214: Percent-encode non-ASCII bytes in redirect targets
Some servers send Location header fields with non-ASCII bytes, but "http. client" requires the request target to be ASCII-encodable, otherwise a UnicodeEncodeError is raised. Based on patch by Christian Heimes. Python 2 does not suffer any problem because it allows non-ASCII bytes in the HTTP request target.
This commit is contained in:
parent
ce6e06874b
commit
e6f060903c
|
@ -1224,6 +1224,41 @@ class HandlerTests(unittest.TestCase):
|
||||||
fp = urllib.request.urlopen("http://python.org/path")
|
fp = urllib.request.urlopen("http://python.org/path")
|
||||||
self.assertEqual(fp.geturl(), "http://python.org/path?query")
|
self.assertEqual(fp.geturl(), "http://python.org/path?query")
|
||||||
|
|
||||||
|
def test_redirect_encoding(self):
|
||||||
|
# Some characters in the redirect target may need special handling,
|
||||||
|
# but most ASCII characters should be treated as already encoded
|
||||||
|
class Handler(urllib.request.HTTPHandler):
|
||||||
|
def http_open(self, req):
|
||||||
|
result = self.do_open(self.connection, req)
|
||||||
|
self.last_buf = self.connection.buf
|
||||||
|
# Set up a normal response for the next request
|
||||||
|
self.connection = test_urllib.fakehttp(
|
||||||
|
b'HTTP/1.1 200 OK\r\n'
|
||||||
|
b'Content-Length: 3\r\n'
|
||||||
|
b'\r\n'
|
||||||
|
b'123'
|
||||||
|
)
|
||||||
|
return result
|
||||||
|
handler = Handler()
|
||||||
|
opener = urllib.request.build_opener(handler)
|
||||||
|
tests = (
|
||||||
|
(b'/p\xC3\xA5-dansk/', b'/p%C3%A5-dansk/'),
|
||||||
|
(b'/spaced%20path/', b'/spaced%20path/'),
|
||||||
|
(b'/spaced path/', b'/spaced%20path/'),
|
||||||
|
(b'/?p\xC3\xA5-dansk', b'/?p%C3%A5-dansk'),
|
||||||
|
)
|
||||||
|
for [location, result] in tests:
|
||||||
|
with self.subTest(repr(location)):
|
||||||
|
handler.connection = test_urllib.fakehttp(
|
||||||
|
b'HTTP/1.1 302 Redirect\r\n'
|
||||||
|
b'Location: ' + location + b'\r\n'
|
||||||
|
b'\r\n'
|
||||||
|
)
|
||||||
|
response = opener.open('http://example.com/')
|
||||||
|
expected = b'GET ' + result + b' '
|
||||||
|
request = handler.last_buf
|
||||||
|
self.assertTrue(request.startswith(expected), repr(request))
|
||||||
|
|
||||||
def test_proxy(self):
|
def test_proxy(self):
|
||||||
o = OpenerDirector()
|
o = OpenerDirector()
|
||||||
ph = urllib.request.ProxyHandler(dict(http="proxy.example.com:3128"))
|
ph = urllib.request.ProxyHandler(dict(http="proxy.example.com:3128"))
|
||||||
|
|
|
@ -91,6 +91,7 @@ import os
|
||||||
import posixpath
|
import posixpath
|
||||||
import re
|
import re
|
||||||
import socket
|
import socket
|
||||||
|
import string
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
import collections
|
import collections
|
||||||
|
@ -616,8 +617,12 @@ class HTTPRedirectHandler(BaseHandler):
|
||||||
# from the user (of urllib.request, in this case). In practice,
|
# from the user (of urllib.request, in this case). In practice,
|
||||||
# essentially all clients do redirect in this case, so we do
|
# essentially all clients do redirect in this case, so we do
|
||||||
# the same.
|
# the same.
|
||||||
# be conciliant with URIs containing a space
|
|
||||||
|
# Be conciliant with URIs containing a space. This is mainly
|
||||||
|
# redundant with the more complete encoding done in http_error_302(),
|
||||||
|
# but it is kept for compatibility with other callers.
|
||||||
newurl = newurl.replace(' ', '%20')
|
newurl = newurl.replace(' ', '%20')
|
||||||
|
|
||||||
CONTENT_HEADERS = ("content-length", "content-type")
|
CONTENT_HEADERS = ("content-length", "content-type")
|
||||||
newheaders = dict((k, v) for k, v in req.headers.items()
|
newheaders = dict((k, v) for k, v in req.headers.items()
|
||||||
if k.lower() not in CONTENT_HEADERS)
|
if k.lower() not in CONTENT_HEADERS)
|
||||||
|
@ -657,6 +662,11 @@ class HTTPRedirectHandler(BaseHandler):
|
||||||
urlparts[2] = "/"
|
urlparts[2] = "/"
|
||||||
newurl = urlunparse(urlparts)
|
newurl = urlunparse(urlparts)
|
||||||
|
|
||||||
|
# http.client.parse_headers() decodes as ISO-8859-1. Recover the
|
||||||
|
# original bytes and percent-encode non-ASCII bytes, and any special
|
||||||
|
# characters such as the space.
|
||||||
|
newurl = quote(
|
||||||
|
newurl, encoding="iso-8859-1", safe=string.punctuation)
|
||||||
newurl = urljoin(req.full_url, newurl)
|
newurl = urljoin(req.full_url, newurl)
|
||||||
|
|
||||||
# XXX Probably want to forget about the state of the current
|
# XXX Probably want to forget about the state of the current
|
||||||
|
|
|
@ -121,6 +121,12 @@ Library
|
||||||
- Issue #14132: Fix urllib.request redirect handling when the target only has
|
- Issue #14132: Fix urllib.request redirect handling when the target only has
|
||||||
a query string. Original fix by Ján Janech.
|
a query string. Original fix by Ján Janech.
|
||||||
|
|
||||||
|
- Issue #17214: The "urllib.request" module now percent-encodes non-ASCII
|
||||||
|
bytes found in redirect target URLs. Some servers send Location header
|
||||||
|
fields with non-ASCII bytes, but "http.client" requires the request target
|
||||||
|
to be ASCII-encodable, otherwise a UnicodeEncodeError is raised. Based on
|
||||||
|
patch by Christian Heimes.
|
||||||
|
|
||||||
- Issue #26892: Honor debuglevel flag in urllib.request.HTTPHandler. Patch
|
- Issue #26892: Honor debuglevel flag in urllib.request.HTTPHandler. Patch
|
||||||
contributed by Chi Hsuan Yen.
|
contributed by Chi Hsuan Yen.
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue