decode_rfc2231(): Be more robust against buggy RFC 2231 encodings.

Specifically, instead of raising a ValueError when there is a single tick in the parameter, simply return that the entire string unquoted, with None for both the charset and the language. Also, if there are more than 2 ticks in the parameter, interpret the first three parts as the standard RFC 2231 parts, then the rest of the parts as the encoded string. Test cases added. Original fewer-than-3-parts fix by Tokio Kikuchi. Resolves SF bug # 1218081. I will back port the fix and tests to Python 2.4 (email 3.0) and Python 2.3 (email 2.5). Also, bump the version number to email 4.0.1, removing the 'alpha' moniker.
2006-07-17 23:07:51 +00:00 · 2006-07-17 23:07:51 +00:00 · 18d2f39af7
parent a2f60a47b5
commit 18d2f39af7
3 changed files with 43 additions and 4 deletions
--- a/Lib/email/init.py
+++ b/Lib/email/init.py
@ -4,7 +4,7 @@
 """A package for parsing, handling, and generating email messages."""
-__version__ = '4.0a2'
+__version__ = '4.0.1'
 __all__ = [
    # Old names
--- a/Lib/email/test/test_email_renamed.py
+++ b/Lib/email/test/test_email_renamed.py
@ -3060,6 +3060,40 @@ Content-Disposition: inline; filename*0=X-UNKNOWN''myfile.txt
        msg = email.message_from_string(m)
        self.assertEqual(msg.get_filename(), 'myfile.txt')
    def test_rfc2231_single_tick_in_filename(self):
        eq = self.assertEqual
        m = """\
 Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
 """
        msg = email.message_from_string(m)
        charset, language, s = msg.get_param('name')
        eq(charset, None)
        eq(language, None)
        eq(s, "Frank's Document")
    def test_rfc2231_tick_attack(self):
        eq = self.assertEqual
        m = """\
 Content-Type: application/x-foo;
 \tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
 """
        msg = email.message_from_string(m)
        charset, language, s = msg.get_param('name')
        eq(charset, 'us-ascii')
        eq(language, 'en-us')
        eq(s, "Frank's Document")
    def test_rfc2231_no_extended_values(self):
        eq = self.assertEqual
        m = """\
 Content-Type: application/x-foo; name=\"Frank's Document\"
 """
        msg = email.message_from_string(m)
        eq(msg.get_param('name'), "Frank's Document")
 def _testclasses():
--- a/Lib/email/utils.py
+++ b/Lib/email/utils.py
@ -45,6 +45,7 @@ COMMASPACE = ', '
 EMPTYSTRING = ''
 UEMPTYSTRING = u''
 CRLF = '\r\n'
 TICK = "'"
 specialsre = re.compile(r'[][\\()<>@,:;".]')
 escapesre = re.compile(r'[][\\()"]')
@ -231,9 +232,13 @@ def unquote(str):
 def decode_rfc2231(s):
    """Decode string according to RFC 2231"""
    import urllib
-    parts = s.split("'", 2)
+    parts = s.split(TICK, 2)
-    if len(parts) == 1:
+    if len(parts) <= 2:
        return None, None, urllib.unquote(s)
    if len(parts) > 3:
        charset, language = parts[:2]
        s = TICK.join(parts[2:])
    else:
        charset, language, s = parts
    return charset, language, urllib.unquote(s)