#1379416: encode charset name to ascii to avoid unicode promotion of output

This commit is contained in:
R. David Murray 2010-12-27 19:17:17 +00:00
parent 49ee82c4eb
commit 5360d003b6
3 changed files with 11 additions and 1 deletions

View File

@ -209,7 +209,7 @@ class Charset:
input_charset = unicode(input_charset, 'ascii')
except UnicodeError:
raise errors.CharsetError(input_charset)
input_charset = input_charset.lower()
input_charset = input_charset.lower().encode('ascii')
# Set the input charset after filtering through the aliases and/or codecs
if not (input_charset in ALIASES or input_charset in CHARSETS):
try:

View File

@ -3140,6 +3140,13 @@ A very long line that must get split to something other than at the
'attachment; filename*="iso-8859-1\'\'Fu%DFballer.ppt"',
msg['Content-Disposition'])
def test_encode_unaliased_charset(self):
# Issue 1379416: when the charset has no output conversion,
# output was accidentally getting coerced to unicode.
res = Header('abc','iso-8859-2').encode()
self.assertEqual(res, '=?iso-8859-2?q?abc?=')
self.assertIsInstance(res, str)
# Test RFC 2231 header parameters (en/de)coding
class TestRFC2231(TestEmailBase):

View File

@ -22,6 +22,9 @@ Core and Builtins
Library
-------
- Issue #1379416: eliminated a source of accidental unicode promotion in
email.header.Header.encode.
- Issue #5258/#10642: if site.py encounters a .pth file that generates an error,
it now prints the filename, line number, and traceback to stderr and skips
the rest of that individual file, instead of stopping processing entirely.