decode_rfc2231(): Be more robust against buggy RFC 2231 encodings.

Specifically, instead of raising a ValueError when there is a single tick in
the parameter, simply return that the entire string unquoted, with None for
both the charset and the language.  Also, if there are more than 2 ticks in
the parameter, interpret the first three parts as the standard RFC 2231 parts,
then the rest of the parts as the encoded string.

Test cases added.

Original fewer-than-3-parts fix by Tokio Kikuchi.

Resolves SF bug # 1218081.  I will back port the fix and tests to Python 2.4
(email 3.0) and Python 2.3 (email 2.5).

Also, bump the version number to email 4.0.1, removing the 'alpha' moniker.
This commit is contained in:
Barry Warsaw 2006-07-17 23:07:51 +00:00
parent a2f60a47b5
commit 18d2f39af7
3 changed files with 43 additions and 4 deletions

View File

@ -4,7 +4,7 @@
"""A package for parsing, handling, and generating email messages.""" """A package for parsing, handling, and generating email messages."""
__version__ = '4.0a2' __version__ = '4.0.1'
__all__ = [ __all__ = [
# Old names # Old names

View File

@ -3060,6 +3060,40 @@ Content-Disposition: inline; filename*0=X-UNKNOWN''myfile.txt
msg = email.message_from_string(m) msg = email.message_from_string(m)
self.assertEqual(msg.get_filename(), 'myfile.txt') self.assertEqual(msg.get_filename(), 'myfile.txt')
def test_rfc2231_single_tick_in_filename(self):
eq = self.assertEqual
m = """\
Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
"""
msg = email.message_from_string(m)
charset, language, s = msg.get_param('name')
eq(charset, None)
eq(language, None)
eq(s, "Frank's Document")
def test_rfc2231_tick_attack(self):
eq = self.assertEqual
m = """\
Content-Type: application/x-foo;
\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
"""
msg = email.message_from_string(m)
charset, language, s = msg.get_param('name')
eq(charset, 'us-ascii')
eq(language, 'en-us')
eq(s, "Frank's Document")
def test_rfc2231_no_extended_values(self):
eq = self.assertEqual
m = """\
Content-Type: application/x-foo; name=\"Frank's Document\"
"""
msg = email.message_from_string(m)
eq(msg.get_param('name'), "Frank's Document")
def _testclasses(): def _testclasses():

View File

@ -45,6 +45,7 @@ COMMASPACE = ', '
EMPTYSTRING = '' EMPTYSTRING = ''
UEMPTYSTRING = u'' UEMPTYSTRING = u''
CRLF = '\r\n' CRLF = '\r\n'
TICK = "'"
specialsre = re.compile(r'[][\\()<>@,:;".]') specialsre = re.compile(r'[][\\()<>@,:;".]')
escapesre = re.compile(r'[][\\()"]') escapesre = re.compile(r'[][\\()"]')
@ -231,9 +232,13 @@ def unquote(str):
def decode_rfc2231(s): def decode_rfc2231(s):
"""Decode string according to RFC 2231""" """Decode string according to RFC 2231"""
import urllib import urllib
parts = s.split("'", 2) parts = s.split(TICK, 2)
if len(parts) == 1: if len(parts) <= 2:
return None, None, urllib.unquote(s) return None, None, urllib.unquote(s)
if len(parts) > 3:
charset, language = parts[:2]
s = TICK.join(parts[2:])
else:
charset, language, s = parts charset, language, s = parts
return charset, language, urllib.unquote(s) return charset, language, urllib.unquote(s)