From dcd24ae5015bb94ef83015a4f584e5b8f173e999 Mon Sep 17 00:00:00 2001 From: Barry Warsaw Date: Wed, 14 Mar 2007 04:59:50 +0000 Subject: [PATCH] SF bug #1582282; decode_header() incorrectly splits not-conformant RFC 2047-like headers where there is no whitespace between encoded words. This fix changes the matching regexp to include a trailing lookahead assertion that the closing ?= must be followed by whitespace, newline, or end-of-string. This also changes the regexp to add the MULTILINE flag. --- Lib/email/header.py | 3 ++- Lib/email/test/test_email.py | 12 ++++++++++++ Lib/email/test/test_email_renamed.py | 12 ++++++++++++ 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/Lib/email/header.py b/Lib/email/header.py index 183c337560d..e139ccf64fa 100644 --- a/Lib/email/header.py +++ b/Lib/email/header.py @@ -39,7 +39,8 @@ ecre = re.compile(r''' \? # literal ? (?P.*?) # non-greedy up to the next ?= is the encoded string \?= # literal ?= - ''', re.VERBOSE | re.IGNORECASE) + (?=[ \t]|$) # whitespace or the end of the string + ''', re.VERBOSE | re.IGNORECASE | re.MULTILINE) # Field name regexp, including trailing colon, but not separating whitespace, # according to RFC 2822. Character range is from tilde to exclamation mark. diff --git a/Lib/email/test/test_email.py b/Lib/email/test/test_email.py index 14b8a1b8dc2..a03299991ef 100644 --- a/Lib/email/test/test_email.py +++ b/Lib/email/test/test_email.py @@ -1527,6 +1527,18 @@ class TestRFC2047(unittest.TestCase): hu = make_header(dh).__unicode__() eq(hu, u'The quick brown fox jumped over the lazy dog') + def test_rfc2047_without_whitespace(self): + s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord' + dh = decode_header(s) + self.assertEqual(dh, [(s, None)]) + + def test_rfc2047_with_whitespace(self): + s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord' + dh = decode_header(s) + self.assertEqual(dh, [('Sm', None), ('\xf6', 'iso-8859-1'), + ('rg', None), ('\xe5', 'iso-8859-1'), + ('sbord', None)]) + # Test the MIMEMessage class diff --git a/Lib/email/test/test_email_renamed.py b/Lib/email/test/test_email_renamed.py index 19950408718..44238c78f8c 100644 --- a/Lib/email/test/test_email_renamed.py +++ b/Lib/email/test/test_email_renamed.py @@ -1525,6 +1525,18 @@ class TestRFC2047(unittest.TestCase): hu = make_header(dh).__unicode__() eq(hu, u'The quick brown fox jumped over the lazy dog') + def test_rfc2047_missing_whitespace(self): + s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord' + dh = decode_header(s) + self.assertEqual(dh, [(s, None)]) + + def test_rfc2047_with_whitespace(self): + s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord' + dh = decode_header(s) + self.assertEqual(dh, [('Sm', None), ('\xf6', 'iso-8859-1'), + ('rg', None), ('\xe5', 'iso-8859-1'), + ('sbord', None)]) + # Test the MIMEMessage class