From 155ceaa454ad9a623cade5ed326e6e1e70ce109d Mon Sep 17 00:00:00 2001 From: Benjamin Peterson Date: Sun, 25 Jan 2015 23:30:30 -0500 Subject: [PATCH] handle headers with no key (closes #19996) Patch by Cory Benfield. --- Lib/email/feedparser.py | 11 ++++++++++- Lib/test/test_email/test_email.py | 6 ++++++ Lib/test/test_httplib.py | 10 ++++++++++ Misc/NEWS | 3 +++ 4 files changed, 29 insertions(+), 1 deletion(-) diff --git a/Lib/email/feedparser.py b/Lib/email/feedparser.py index 0c3b572684f..c95b27f12f3 100644 --- a/Lib/email/feedparser.py +++ b/Lib/email/feedparser.py @@ -33,7 +33,7 @@ NLCRE_eol = re.compile('(\r\n|\r|\n)\Z') NLCRE_crack = re.compile('(\r\n|\r|\n)') # RFC 2822 $3.6.8 Optional fields. ftext is %d33-57 / %d59-126, Any character # except controls, SP, and ":". -headerRE = re.compile(r'^(From |[\041-\071\073-\176]{1,}:|[\t ])') +headerRE = re.compile(r'^(From |[\041-\071\073-\176]*:|[\t ])') EMPTYSTRING = '' NL = '\n' @@ -511,6 +511,15 @@ class FeedParser: # There will always be a colon, because if there wasn't the part of # the parser that calls us would have started parsing the body. i = line.find(':') + + # If the colon is on the start of the line the header is clearly + # malformed, but we might be able to salvage the rest of the + # message. Track the error but keep going. + if i == 0: + defect = errors.InvalidHeaderDefect("Missing header name.") + self._cur.defects.append(defect) + continue + assert i>0, "_parse_headers fed line with no : and no leading WS" lastheader = line[:i] lastvalue = [line] diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index c3ecd0ab221..227110ffb11 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -3389,6 +3389,12 @@ class TestFeedParsers(TestEmailBase): feedparser.feed(chunk) return feedparser.close() + def test_empty_header_name_handled(self): + # Issue 19996 + msg = self.parse("First: val\n: bad\nSecond: val") + self.assertEqual(msg['First'], 'val') + self.assertEqual(msg['Second'], 'val') + def test_newlines(self): m = self.parse(['a:\nb:\rc:\r\nd:\n']) self.assertEqual(m.keys(), ['a', 'b', 'c', 'd']) diff --git a/Lib/test/test_httplib.py b/Lib/test/test_httplib.py index fa962b36692..3fc34665da4 100644 --- a/Lib/test/test_httplib.py +++ b/Lib/test/test_httplib.py @@ -167,6 +167,16 @@ class HeaderTests(TestCase): conn.request('GET', '/foo') self.assertTrue(sock.data.startswith(expected)) + def test_malformed_headers_coped_with(self): + # Issue 19996 + body = "HTTP/1.1 200 OK\r\nFirst: val\r\n: nval\r\nSecond: val\r\n\r\n" + sock = FakeSocket(body) + resp = client.HTTPResponse(sock) + resp.begin() + + self.assertEqual(resp.getheader('First'), 'val') + self.assertEqual(resp.getheader('Second'), 'val') + class BasicTest(TestCase): def test_status_lines(self): diff --git a/Misc/NEWS b/Misc/NEWS index 4c4db0f9b0b..06df4db08b5 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -47,6 +47,9 @@ Core and Builtins Library ------- +- Issue #19996: :class:`email.feedparser.FeedParser` now handles (malformed) + headers with no key rather than amusing the body has started. + - Issue #23248: Update ssl error codes from latest OpenSSL git master. - Issue #23098: 64-bit dev_t is now supported in the os module.