Forward port some fixes that were in email 2.5 but for some reason didn't make
it into email 4.0. Specifically, in Message.get_content_charset(), handle RFC 2231 headers that contain an encoding not known to Python, or a character in the data that isn't in the charset encoding. Also forward port the appropriate unit tests.
This commit is contained in:
parent
9815f8b252
commit
d92ae78bdb
|
@ -747,7 +747,18 @@ class Message:
|
|||
if isinstance(charset, tuple):
|
||||
# RFC 2231 encoded, so decode it, and it better end up as ascii.
|
||||
pcharset = charset[0] or 'us-ascii'
|
||||
charset = unicode(charset[2], pcharset).encode('us-ascii')
|
||||
try:
|
||||
# LookupError will be raised if the charset isn't known to
|
||||
# Python. UnicodeError will be raised if the encoded text
|
||||
# contains a character not in the charset.
|
||||
charset = unicode(charset[2], pcharset).encode('us-ascii')
|
||||
except (LookupError, UnicodeError):
|
||||
charset = charset[2]
|
||||
# charset character must be in us-ascii range
|
||||
try:
|
||||
charset = unicode(charset, 'us-ascii').encode('us-ascii')
|
||||
except UnicodeError:
|
||||
return failobj
|
||||
# RFC 2046, $4.1.2 says charsets are not case sensitive
|
||||
return charset.lower()
|
||||
|
||||
|
|
|
@ -3086,6 +3086,50 @@ Content-Type: text/plain;
|
|||
self.assertEqual(msg.get_content_charset(),
|
||||
'this is even more ***fun*** is it not.pdf')
|
||||
|
||||
def test_rfc2231_bad_encoding_in_filename(self):
|
||||
m = '''\
|
||||
Content-Disposition: inline;
|
||||
\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
|
||||
\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
|
||||
\tfilename*2="is it not.pdf"
|
||||
|
||||
'''
|
||||
msg = email.message_from_string(m)
|
||||
self.assertEqual(msg.get_filename(),
|
||||
'This is even more ***fun*** is it not.pdf')
|
||||
|
||||
def test_rfc2231_bad_encoding_in_charset(self):
|
||||
m = """\
|
||||
Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
|
||||
|
||||
"""
|
||||
msg = email.message_from_string(m)
|
||||
# This should return None because non-ascii characters in the charset
|
||||
# are not allowed.
|
||||
self.assertEqual(msg.get_content_charset(), None)
|
||||
|
||||
def test_rfc2231_bad_character_in_charset(self):
|
||||
m = """\
|
||||
Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
|
||||
|
||||
"""
|
||||
msg = email.message_from_string(m)
|
||||
# This should return None because non-ascii characters in the charset
|
||||
# are not allowed.
|
||||
self.assertEqual(msg.get_content_charset(), None)
|
||||
|
||||
def test_rfc2231_bad_character_in_filename(self):
|
||||
m = '''\
|
||||
Content-Disposition: inline;
|
||||
\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
|
||||
\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
|
||||
\tfilename*2*="is it not.pdf%E2"
|
||||
|
||||
'''
|
||||
msg = email.message_from_string(m)
|
||||
self.assertEqual(msg.get_filename(),
|
||||
u'This is even more ***fun*** is it not.pdf\ufffd')
|
||||
|
||||
def test_rfc2231_unknown_encoding(self):
|
||||
m = """\
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
|
|
@ -3092,6 +3092,50 @@ Content-Type: text/plain;
|
|||
self.assertEqual(msg.get_content_charset(),
|
||||
'this is even more ***fun*** is it not.pdf')
|
||||
|
||||
def test_rfc2231_bad_encoding_in_filename(self):
|
||||
m = '''\
|
||||
Content-Disposition: inline;
|
||||
\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
|
||||
\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
|
||||
\tfilename*2="is it not.pdf"
|
||||
|
||||
'''
|
||||
msg = email.message_from_string(m)
|
||||
self.assertEqual(msg.get_filename(),
|
||||
'This is even more ***fun*** is it not.pdf')
|
||||
|
||||
def test_rfc2231_bad_encoding_in_charset(self):
|
||||
m = """\
|
||||
Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
|
||||
|
||||
"""
|
||||
msg = email.message_from_string(m)
|
||||
# This should return None because non-ascii characters in the charset
|
||||
# are not allowed.
|
||||
self.assertEqual(msg.get_content_charset(), None)
|
||||
|
||||
def test_rfc2231_bad_character_in_charset(self):
|
||||
m = """\
|
||||
Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
|
||||
|
||||
"""
|
||||
msg = email.message_from_string(m)
|
||||
# This should return None because non-ascii characters in the charset
|
||||
# are not allowed.
|
||||
self.assertEqual(msg.get_content_charset(), None)
|
||||
|
||||
def test_rfc2231_bad_character_in_filename(self):
|
||||
m = '''\
|
||||
Content-Disposition: inline;
|
||||
\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
|
||||
\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
|
||||
\tfilename*2*="is it not.pdf%E2"
|
||||
|
||||
'''
|
||||
msg = email.message_from_string(m)
|
||||
self.assertEqual(msg.get_filename(),
|
||||
u'This is even more ***fun*** is it not.pdf\ufffd')
|
||||
|
||||
def test_rfc2231_unknown_encoding(self):
|
||||
m = """\
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
|
Loading…
Reference in New Issue