SF bug #1582282; decode_header() incorrectly splits not-conformant RFC

2047-like headers where there is no whitespace between encoded words.  This
fix changes the matching regexp to include a trailing lookahead assertion that
the closing ?= must be followed by whitespace, newline, or end-of-string.
This also changes the regexp to add the MULTILINE flag.
This commit is contained in:
Barry Warsaw 2007-03-14 04:59:50 +00:00
parent 47c52a8b60
commit dcd24ae501
3 changed files with 26 additions and 1 deletions

View File

@ -39,7 +39,8 @@ ecre = re.compile(r'''
\? # literal ? \? # literal ?
(?P<encoded>.*?) # non-greedy up to the next ?= is the encoded string (?P<encoded>.*?) # non-greedy up to the next ?= is the encoded string
\?= # literal ?= \?= # literal ?=
''', re.VERBOSE | re.IGNORECASE) (?=[ \t]|$) # whitespace or the end of the string
''', re.VERBOSE | re.IGNORECASE | re.MULTILINE)
# Field name regexp, including trailing colon, but not separating whitespace, # Field name regexp, including trailing colon, but not separating whitespace,
# according to RFC 2822. Character range is from tilde to exclamation mark. # according to RFC 2822. Character range is from tilde to exclamation mark.

View File

@ -1527,6 +1527,18 @@ class TestRFC2047(unittest.TestCase):
hu = make_header(dh).__unicode__() hu = make_header(dh).__unicode__()
eq(hu, u'The quick brown fox jumped over the lazy dog') eq(hu, u'The quick brown fox jumped over the lazy dog')
def test_rfc2047_without_whitespace(self):
s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
dh = decode_header(s)
self.assertEqual(dh, [(s, None)])
def test_rfc2047_with_whitespace(self):
s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
dh = decode_header(s)
self.assertEqual(dh, [('Sm', None), ('\xf6', 'iso-8859-1'),
('rg', None), ('\xe5', 'iso-8859-1'),
('sbord', None)])
# Test the MIMEMessage class # Test the MIMEMessage class

View File

@ -1525,6 +1525,18 @@ class TestRFC2047(unittest.TestCase):
hu = make_header(dh).__unicode__() hu = make_header(dh).__unicode__()
eq(hu, u'The quick brown fox jumped over the lazy dog') eq(hu, u'The quick brown fox jumped over the lazy dog')
def test_rfc2047_missing_whitespace(self):
s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
dh = decode_header(s)
self.assertEqual(dh, [(s, None)])
def test_rfc2047_with_whitespace(self):
s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
dh = decode_header(s)
self.assertEqual(dh, [('Sm', None), ('\xf6', 'iso-8859-1'),
('rg', None), ('\xe5', 'iso-8859-1'),
('sbord', None)])
# Test the MIMEMessage class # Test the MIMEMessage class