#1379416: encode charset name to ascii to avoid unicode promotion of output

2010-12-27 19:17:17 +00:00 · 2010-12-27 19:17:17 +00:00 · 5360d003b6
parent 49ee82c4eb
commit 5360d003b6
3 changed files with 11 additions and 1 deletions
--- a/Lib/email/charset.py
+++ b/Lib/email/charset.py
@ -209,7 +209,7 @@ class Charset:
                input_charset = unicode(input_charset, 'ascii')
        except UnicodeError:
            raise errors.CharsetError(input_charset)
-        input_charset = input_charset.lower()
+        input_charset = input_charset.lower().encode('ascii')
        # Set the input charset after filtering through the aliases and/or codecs
        if not (input_charset in ALIASES or input_charset in CHARSETS):
            try:
--- a/Lib/email/test/test_email.py
+++ b/Lib/email/test/test_email.py
@ -3140,6 +3140,13 @@ A very long line that must get split to something other than at the
            'attachment; filename*="iso-8859-1\'\'Fu%DFballer.ppt"',
            msg['Content-Disposition'])

+    def test_encode_unaliased_charset(self):
+        # Issue 1379416: when the charset has no output conversion,
+        # output was accidentally getting coerced to unicode.
+        res = Header('abc','iso-8859-2').encode()
+        self.assertEqual(res, '=?iso-8859-2?q?abc?=')
+        self.assertIsInstance(res, str)
+

 # Test RFC 2231 header parameters (en/de)coding
 class TestRFC2231(TestEmailBase):
--- a/Misc/NEWS
+++ b/Misc/NEWS
@ -22,6 +22,9 @@ Core and Builtins
 Library
 -------

+- Issue #1379416: eliminated a source of accidental unicode promotion in
+  email.header.Header.encode.
+
 - Issue #5258/#10642: if site.py encounters a .pth file that generates an error,
  it now prints the filename, line number, and traceback to stderr and skips
  the rest of that individual file, instead of stopping processing entirely.