gh-66897: Upgrade HTTP CONNECT to protocol HTTP/1.1 (#8305)

* bpo-22708: Upgrade HTTP CONNECT to protocol HTTP/1.1 (GH-NNNN)

Use protocol HTTP/1.1 when sending HTTP CONNECT tunnelling requests;
generate Host: headers if one is not already provided (required by
HTTP/1.1), convert IDN domains to punycode in HTTP CONNECT requests.

* Refactor tests to pass under -bb (fix ByteWarnings); missed some lines >80.

* Use consistent 'tunnelling' spelling in Lib/http/client.py

* Lib/test/test_httplib: Remove remnant of obsoleted test.

* Use dict.copy() not copy.copy()

* fix version changed

* Update Lib/http/client.py

Co-authored-by: bgehman <bgehman@users.noreply.github.com>

* Switch to for/else: syntax, as suggested

* Don't use for: else:

* Sure, fine, w/e

* Oops

* 1nm to the left

---------

Co-authored-by: Éric <merwok@netwok.org>
Co-authored-by: bgehman <bgehman@users.noreply.github.com>
Co-authored-by: Oleg Iarygin <oleg@arhadthedev.net>
This commit is contained in:
Michael Handler 2023-04-04 21:55:24 -07:00 committed by GitHub
parent a62ff97075
commit 1a8f862e32
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 167 additions and 21 deletions

View File

@ -353,6 +353,13 @@ HTTPConnection Objects
The *headers* argument should be a mapping of extra HTTP headers to send with
the CONNECT request.
As HTTP/1.1 is used for HTTP CONNECT tunnelling request, `as per the RFC
<https://tools.ietf.org/html/rfc7231#section-4.3.6>`_, a HTTP ``Host:``
header must be provided, matching the authority-form of the request target
provided as the destination for the CONNECT request. If a HTTP ``Host:``
header is not provided via the headers argument, one is generated and
transmitted automatically.
For example, to tunnel through a HTTPS proxy server running locally on port
8080, we would pass the address of the proxy to the :class:`HTTPSConnection`
constructor, and the address of the host that we eventually want to reach to
@ -365,6 +372,11 @@ HTTPConnection Objects
.. versionadded:: 3.2
.. versionchanged:: 3.12
HTTP CONNECT tunnelling requests use protocol HTTP/1.1, upgraded from
protocol HTTP/1.0. ``Host:`` HTTP headers are mandatory for HTTP/1.1, so
one will be automatically generated and transmitted if not provided in
the headers argument.
.. method:: HTTPConnection.connect()

View File

@ -870,9 +870,9 @@ class HTTPConnection:
def set_tunnel(self, host, port=None, headers=None):
"""Set up host and port for HTTP CONNECT tunnelling.
In a connection that uses HTTP CONNECT tunneling, the host passed to the
constructor is used as a proxy server that relays all communication to
the endpoint passed to `set_tunnel`. This done by sending an HTTP
In a connection that uses HTTP CONNECT tunnelling, the host passed to
the constructor is used as a proxy server that relays all communication
to the endpoint passed to `set_tunnel`. This done by sending an HTTP
CONNECT request to the proxy server when the connection is established.
This method must be called before the HTTP connection has been
@ -880,6 +880,13 @@ class HTTPConnection:
The headers argument should be a mapping of extra HTTP headers to send
with the CONNECT request.
As HTTP/1.1 is used for HTTP CONNECT tunnelling request, as per the RFC
(https://tools.ietf.org/html/rfc7231#section-4.3.6), a HTTP Host:
header must be provided, matching the authority-form of the request
target provided as the destination for the CONNECT request. If a
HTTP Host: header is not provided via the headers argument, one
is generated and transmitted automatically.
"""
if self.sock:
@ -887,10 +894,15 @@ class HTTPConnection:
self._tunnel_host, self._tunnel_port = self._get_hostport(host, port)
if headers:
self._tunnel_headers = headers
self._tunnel_headers = headers.copy()
else:
self._tunnel_headers.clear()
if not any(header.lower() == "host" for header in self._tunnel_headers):
encoded_host = self._tunnel_host.encode("idna").decode("ascii")
self._tunnel_headers["Host"] = "%s:%d" % (
encoded_host, self._tunnel_port)
def _get_hostport(self, host, port):
if port is None:
i = host.rfind(':')
@ -915,8 +927,9 @@ class HTTPConnection:
self.debuglevel = level
def _tunnel(self):
connect = b"CONNECT %s:%d HTTP/1.0\r\n" % (
self._tunnel_host.encode("ascii"), self._tunnel_port)
connect = b"CONNECT %s:%d %s\r\n" % (
self._tunnel_host.encode("idna"), self._tunnel_port,
self._http_vsn_str.encode("ascii"))
headers = [connect]
for header, value in self._tunnel_headers.items():
headers.append(f"{header}: {value}\r\n".encode("latin-1"))

View File

@ -2187,11 +2187,12 @@ class HTTPResponseTest(TestCase):
class TunnelTests(TestCase):
def setUp(self):
response_text = (
'HTTP/1.0 200 OK\r\n\r\n' # Reply to CONNECT
'HTTP/1.1 200 OK\r\n\r\n' # Reply to CONNECT
'HTTP/1.1 200 OK\r\n' # Reply to HEAD
'Content-Length: 42\r\n\r\n'
)
self.host = 'proxy.com'
self.port = client.HTTP_PORT
self.conn = client.HTTPConnection(self.host)
self.conn._create_connection = self._create_connection(response_text)
@ -2203,15 +2204,45 @@ class TunnelTests(TestCase):
return FakeSocket(response_text, host=address[0], port=address[1])
return create_connection
def test_set_tunnel_host_port_headers(self):
def test_set_tunnel_host_port_headers_add_host_missing(self):
tunnel_host = 'destination.com'
tunnel_port = 8888
tunnel_headers = {'User-Agent': 'Mozilla/5.0 (compatible, MSIE 11)'}
tunnel_headers_after = tunnel_headers.copy()
tunnel_headers_after['Host'] = '%s:%d' % (tunnel_host, tunnel_port)
self.conn.set_tunnel(tunnel_host, port=tunnel_port,
headers=tunnel_headers)
self.conn.request('HEAD', '/', '')
self.assertEqual(self.conn.sock.host, self.host)
self.assertEqual(self.conn.sock.port, client.HTTP_PORT)
self.assertEqual(self.conn.sock.port, self.port)
self.assertEqual(self.conn._tunnel_host, tunnel_host)
self.assertEqual(self.conn._tunnel_port, tunnel_port)
self.assertEqual(self.conn._tunnel_headers, tunnel_headers_after)
def test_set_tunnel_host_port_headers_set_host_identical(self):
tunnel_host = 'destination.com'
tunnel_port = 8888
tunnel_headers = {'User-Agent': 'Mozilla/5.0 (compatible, MSIE 11)',
'Host': '%s:%d' % (tunnel_host, tunnel_port)}
self.conn.set_tunnel(tunnel_host, port=tunnel_port,
headers=tunnel_headers)
self.conn.request('HEAD', '/', '')
self.assertEqual(self.conn.sock.host, self.host)
self.assertEqual(self.conn.sock.port, self.port)
self.assertEqual(self.conn._tunnel_host, tunnel_host)
self.assertEqual(self.conn._tunnel_port, tunnel_port)
self.assertEqual(self.conn._tunnel_headers, tunnel_headers)
def test_set_tunnel_host_port_headers_set_host_different(self):
tunnel_host = 'destination.com'
tunnel_port = 8888
tunnel_headers = {'User-Agent': 'Mozilla/5.0 (compatible, MSIE 11)',
'Host': '%s:%d' % ('example.com', 4200)}
self.conn.set_tunnel(tunnel_host, port=tunnel_port,
headers=tunnel_headers)
self.conn.request('HEAD', '/', '')
self.assertEqual(self.conn.sock.host, self.host)
self.assertEqual(self.conn.sock.port, self.port)
self.assertEqual(self.conn._tunnel_host, tunnel_host)
self.assertEqual(self.conn._tunnel_port, tunnel_port)
self.assertEqual(self.conn._tunnel_headers, tunnel_headers)
@ -2223,17 +2254,96 @@ class TunnelTests(TestCase):
'destination.com')
def test_connect_with_tunnel(self):
self.conn.set_tunnel('destination.com')
d = {
b'host': b'destination.com',
b'port': client.HTTP_PORT,
}
self.conn.set_tunnel(d[b'host'].decode('ascii'))
self.conn.request('HEAD', '/', '')
self.assertEqual(self.conn.sock.host, self.host)
self.assertEqual(self.conn.sock.port, self.port)
self.assertIn(b'CONNECT %(host)s:%(port)d HTTP/1.1\r\n'
b'Host: %(host)s:%(port)d\r\n\r\n' % d,
self.conn.sock.data)
self.assertIn(b'HEAD / HTTP/1.1\r\nHost: %(host)s\r\n' % d,
self.conn.sock.data)
def test_connect_with_tunnel_with_default_port(self):
d = {
b'host': b'destination.com',
b'port': client.HTTP_PORT,
}
self.conn.set_tunnel(d[b'host'].decode('ascii'), port=d[b'port'])
self.conn.request('HEAD', '/', '')
self.assertEqual(self.conn.sock.host, self.host)
self.assertEqual(self.conn.sock.port, self.port)
self.assertIn(b'CONNECT %(host)s:%(port)d HTTP/1.1\r\n'
b'Host: %(host)s:%(port)d\r\n\r\n' % d,
self.conn.sock.data)
self.assertIn(b'HEAD / HTTP/1.1\r\nHost: %(host)s\r\n' % d,
self.conn.sock.data)
def test_connect_with_tunnel_with_nonstandard_port(self):
d = {
b'host': b'destination.com',
b'port': 8888,
}
self.conn.set_tunnel(d[b'host'].decode('ascii'), port=d[b'port'])
self.conn.request('HEAD', '/', '')
self.assertEqual(self.conn.sock.host, self.host)
self.assertEqual(self.conn.sock.port, self.port)
self.assertIn(b'CONNECT %(host)s:%(port)d HTTP/1.1\r\n'
b'Host: %(host)s:%(port)d\r\n\r\n' % d,
self.conn.sock.data)
self.assertIn(b'HEAD / HTTP/1.1\r\nHost: %(host)s:%(port)d\r\n' % d,
self.conn.sock.data)
# This request is not RFC-valid, but it's been possible with the library
# for years, so don't break it unexpectedly... This also tests
# case-insensitivity when injecting Host: headers if they're missing.
def test_connect_with_tunnel_with_different_host_header(self):
d = {
b'host': b'destination.com',
b'tunnel_host_header': b'example.com:9876',
b'port': client.HTTP_PORT,
}
self.conn.set_tunnel(
d[b'host'].decode('ascii'),
headers={'HOST': d[b'tunnel_host_header'].decode('ascii')})
self.conn.request('HEAD', '/', '')
self.assertEqual(self.conn.sock.host, self.host)
self.assertEqual(self.conn.sock.port, self.port)
self.assertIn(b'CONNECT %(host)s:%(port)d HTTP/1.1\r\n'
b'HOST: %(tunnel_host_header)s\r\n\r\n' % d,
self.conn.sock.data)
self.assertIn(b'HEAD / HTTP/1.1\r\nHost: %(host)s\r\n' % d,
self.conn.sock.data)
def test_connect_with_tunnel_different_host(self):
d = {
b'host': b'destination.com',
b'port': client.HTTP_PORT,
}
self.conn.set_tunnel(d[b'host'].decode('ascii'))
self.conn.request('HEAD', '/', '')
self.assertEqual(self.conn.sock.host, self.host)
self.assertEqual(self.conn.sock.port, self.port)
self.assertIn(b'CONNECT %(host)s:%(port)d HTTP/1.1\r\n'
b'Host: %(host)s:%(port)d\r\n\r\n' % d,
self.conn.sock.data)
self.assertIn(b'HEAD / HTTP/1.1\r\nHost: %(host)s\r\n' % d,
self.conn.sock.data)
def test_connect_with_tunnel_idna(self):
dest = '\u03b4\u03c0\u03b8.gr'
dest_port = b'%s:%d' % (dest.encode('idna'), client.HTTP_PORT)
expected = b'CONNECT %s HTTP/1.1\r\nHost: %s\r\n\r\n' % (
dest_port, dest_port)
self.conn.set_tunnel(dest)
self.conn.request('HEAD', '/', '')
self.assertEqual(self.conn.sock.host, self.host)
self.assertEqual(self.conn.sock.port, client.HTTP_PORT)
self.assertIn(b'CONNECT destination.com', self.conn.sock.data)
# issue22095
self.assertNotIn(b'Host: destination.com:None', self.conn.sock.data)
self.assertIn(b'Host: destination.com', self.conn.sock.data)
# This test should be removed when CONNECT gets the HTTP/1.1 blessing
self.assertNotIn(b'Host: proxy.com', self.conn.sock.data)
self.assertIn(expected, self.conn.sock.data)
def test_tunnel_connect_single_send_connection_setup(self):
"""Regresstion test for https://bugs.python.org/issue43332."""
@ -2253,12 +2363,19 @@ class TunnelTests(TestCase):
msg=f'unexpected proxy data sent {proxy_setup_data_sent!r}')
def test_connect_put_request(self):
self.conn.set_tunnel('destination.com')
d = {
b'host': b'destination.com',
b'port': client.HTTP_PORT,
}
self.conn.set_tunnel(d[b'host'].decode('ascii'))
self.conn.request('PUT', '/', '')
self.assertEqual(self.conn.sock.host, self.host)
self.assertEqual(self.conn.sock.port, client.HTTP_PORT)
self.assertIn(b'CONNECT destination.com', self.conn.sock.data)
self.assertIn(b'Host: destination.com', self.conn.sock.data)
self.assertEqual(self.conn.sock.port, self.port)
self.assertIn(b'CONNECT %(host)s:%(port)d HTTP/1.1\r\n'
b'Host: %(host)s:%(port)d\r\n\r\n' % d,
self.conn.sock.data)
self.assertIn(b'PUT / HTTP/1.1\r\nHost: %(host)s\r\n' % d,
self.conn.sock.data)
def test_tunnel_debuglog(self):
expected_header = 'X-Dummy: 1'

View File

@ -693,6 +693,7 @@ Anders Hammarquist
Mark Hammond
Harald Hanche-Olsen
Manus Hand
Michael Handler
Andreas Hangauer
Milton L. Hankins
Carl Bordum Hansen

View File

@ -0,0 +1,3 @@
http.client CONNECT method tunnel improvements: Use HTTP 1.1 protocol; send
a matching Host: header with CONNECT, if one is not provided; convert IDN
domain names to Punycode. Patch by Michael Handler.