From 9165addc22d05e776a54319a8531ebd0b2fe01ef Mon Sep 17 00:00:00 2001 From: Ashwin Ramaswami Date: Sat, 14 Mar 2020 14:56:06 -0400 Subject: [PATCH] bpo-38576: Disallow control characters in hostnames in http.client (GH-18995) Add host validation for control characters for more CVE-2019-18348 protection. --- Lib/http/client.py | 10 ++++++ Lib/test/test_httplib.py | 13 ++++++- Lib/test/test_urllib.py | 36 +++++++++++++++++-- .../2020-03-14-14-57-44.bpo-38576.OowwQn.rst | 1 + 4 files changed, 57 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Security/2020-03-14-14-57-44.bpo-38576.OowwQn.rst diff --git a/Lib/http/client.py b/Lib/http/client.py index 33a434733f8..019380a7203 100644 --- a/Lib/http/client.py +++ b/Lib/http/client.py @@ -828,6 +828,8 @@ class HTTPConnection: (self.host, self.port) = self._get_hostport(host, port) + self._validate_host(self.host) + # This is stored as an instance variable to allow unit # tests to replace it with a suitable mockup self._create_connection = socket.create_connection @@ -1183,6 +1185,14 @@ class HTTPConnection: raise InvalidURL(f"URL can't contain control characters. {url!r} " f"(found at least {match.group()!r})") + def _validate_host(self, host): + """Validate a host so it doesn't contain control characters.""" + # Prevent CVE-2019-18348. + match = _contains_disallowed_url_pchar_re.search(host) + if match: + raise InvalidURL(f"URL can't contain control characters. {host!r} " + f"(found at least {match.group()!r})") + def putheader(self, header, *values): """Send a request header line to the server. diff --git a/Lib/test/test_httplib.py b/Lib/test/test_httplib.py index 95eca7e00a0..7f4decc8fda 100644 --- a/Lib/test/test_httplib.py +++ b/Lib/test/test_httplib.py @@ -1155,7 +1155,7 @@ class BasicTest(TestCase): thread.join() self.assertEqual(result, b"proxied data\n") - def test_putrequest_override_validation(self): + def test_putrequest_override_domain_validation(self): """ It should be possible to override the default validation behavior in putrequest (bpo-38216). @@ -1168,6 +1168,17 @@ class BasicTest(TestCase): conn.sock = FakeSocket('') conn.putrequest('GET', '/\x00') + def test_putrequest_override_host_validation(self): + class UnsafeHTTPConnection(client.HTTPConnection): + def _validate_host(self, url): + pass + + conn = UnsafeHTTPConnection('example.com\r\n') + conn.sock = FakeSocket('') + # set skip_host so a ValueError is not raised upon adding the + # invalid URL as the value of the "Host:" header + conn.putrequest('GET', '/', skip_host=1) + def test_putrequest_override_encoding(self): """ It should be possible to override the default encoding diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index 2e82fc7b7b8..ebeb9a00145 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -370,7 +370,7 @@ class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin): self.unfakehttp() @unittest.skipUnless(ssl, "ssl module required") - def test_url_with_control_char_rejected(self): + def test_url_path_with_control_char_rejected(self): for char_no in list(range(0, 0x21)) + [0x7f]: char = chr(char_no) schemeless_url = f"//localhost:7777/test{char}/" @@ -397,7 +397,7 @@ class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin): self.unfakehttp() @unittest.skipUnless(ssl, "ssl module required") - def test_url_with_newline_header_injection_rejected(self): + def test_url_path_with_newline_header_injection_rejected(self): self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.") host = "localhost:7777?a=1 HTTP/1.1\r\nX-injected: header\r\nTEST: 123" schemeless_url = "//" + host + ":8080/test/?test=a" @@ -422,6 +422,38 @@ class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin): finally: self.unfakehttp() + @unittest.skipUnless(ssl, "ssl module required") + def test_url_host_with_control_char_rejected(self): + for char_no in list(range(0, 0x21)) + [0x7f]: + char = chr(char_no) + schemeless_url = f"//localhost{char}/test/" + self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.") + try: + escaped_char_repr = repr(char).replace('\\', r'\\') + InvalidURL = http.client.InvalidURL + with self.assertRaisesRegex( + InvalidURL, f"contain control.*{escaped_char_repr}"): + urlopen(f"http:{schemeless_url}") + with self.assertRaisesRegex(InvalidURL, f"contain control.*{escaped_char_repr}"): + urlopen(f"https:{schemeless_url}") + finally: + self.unfakehttp() + + @unittest.skipUnless(ssl, "ssl module required") + def test_url_host_with_newline_header_injection_rejected(self): + self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.") + host = "localhost\r\nX-injected: header\r\n" + schemeless_url = "//" + host + ":8080/test/?test=a" + try: + InvalidURL = http.client.InvalidURL + with self.assertRaisesRegex( + InvalidURL, r"contain control.*\\r"): + urlopen(f"http:{schemeless_url}") + with self.assertRaisesRegex(InvalidURL, r"contain control.*\\n"): + urlopen(f"https:{schemeless_url}") + finally: + self.unfakehttp() + def test_read_0_9(self): # "0.9" response accepted (but not "simple responses" without # a status line) diff --git a/Misc/NEWS.d/next/Security/2020-03-14-14-57-44.bpo-38576.OowwQn.rst b/Misc/NEWS.d/next/Security/2020-03-14-14-57-44.bpo-38576.OowwQn.rst new file mode 100644 index 00000000000..34b8af28988 --- /dev/null +++ b/Misc/NEWS.d/next/Security/2020-03-14-14-57-44.bpo-38576.OowwQn.rst @@ -0,0 +1 @@ +Disallow control characters in hostnames in http.client, addressing CVE-2019-18348. Such potentially malicious header injection URLs now cause a InvalidURL to be raised. \ No newline at end of file