From e036af0ce5a564b958381e49c61a91f7a3591d1a Mon Sep 17 00:00:00 2001 From: "R. David Murray" Date: Wed, 2 Jun 2010 22:11:01 +0000 Subject: [PATCH] Merged revisions 81658 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk ........ r81658 | r.david.murray | 2010-06-02 18:03:15 -0400 (Wed, 02 Jun 2010) | 9 lines #1368247: make set_charset/MIMEText automatically encode unicode _payload. Fixes (mysterious, to the end user) UnicodeErrors when using utf-8 as the charset and unicode as the _text argument. Also makes the way in which unicode gets encoded to quoted printable for other charsets more sane (it only worked by accident previously). The _payload now is encoded to the charset.output_charset if it is unicode. ........ --- Doc/library/email.message.rst | 7 ++++--- Doc/library/email.mime.rst | 8 +++++--- Lib/email/message.py | 2 ++ Lib/email/test/test_email.py | 25 +++++++++++++++++++++++++ Misc/NEWS | 3 +++ 5 files changed, 39 insertions(+), 6 deletions(-) diff --git a/Doc/library/email.message.rst b/Doc/library/email.message.rst index 85a5d83eba0..4cc38803907 100644 --- a/Doc/library/email.message.rst +++ b/Doc/library/email.message.rst @@ -136,9 +136,10 @@ Here are the methods of the :class:`Message` class: :mailheader:`Content-Type` header. Anything else will generate a :exc:`TypeError`. - The message will be assumed to be of type :mimetype:`text/\*` encoded with - *charset.input_charset*. It will be converted to *charset.output_charset* - and encoded properly, if needed, when generating the plain text + The message will be assumed to be of type :mimetype:`text/\*`, with the + payload either in unicode or encoded with *charset.input_charset*. + It will be encoded or converted to *charset.output_charset* + and transfer encoded properly, if needed, when generating the plain text representation of the message. MIME headers (:mailheader:`MIME-Version`, :mailheader:`Content-Type`, :mailheader:`Content-Transfer-Encoding`) will be added as needed. diff --git a/Doc/library/email.mime.rst b/Doc/library/email.mime.rst index 10f3e37f80e..a092feb5eda 100644 --- a/Doc/library/email.mime.rst +++ b/Doc/library/email.mime.rst @@ -191,9 +191,11 @@ Here are the classes: minor type and defaults to :mimetype:`plain`. *_charset* is the character set of the text and is passed as a parameter to the :class:`~email.mime.nonmultipart.MIMENonMultipart` constructor; it defaults - to ``us-ascii``. No guessing or encoding is performed on the text data. + to ``us-ascii``. If *_text* is unicode, it is encoded using the + *output_charset* of *_charset*, otherwise it is used as-is. .. versionchanged:: 2.4 - The previously deprecated *_encoding* argument has been removed. Encoding - happens implicitly based on the *_charset* argument. + The previously deprecated *_encoding* argument has been removed. Content + Transfer Encoding now happens happens implicitly based on the *_charset* + argument. diff --git a/Lib/email/message.py b/Lib/email/message.py index 26dacf41fe3..626739becd1 100644 --- a/Lib/email/message.py +++ b/Lib/email/message.py @@ -256,6 +256,8 @@ class Message: charset=charset.get_output_charset()) else: self.set_param('charset', charset.get_output_charset()) + if isinstance(self._payload, unicode): + self._payload = self._payload.encode(charset.output_charset) if str(charset) != charset.get_output_charset(): self._payload = charset.body_encode(self._payload) if not self.has_key('Content-Transfer-Encoding'): diff --git a/Lib/email/test/test_email.py b/Lib/email/test/test_email.py index e94c07f34f1..350d0b0e92a 100644 --- a/Lib/email/test/test_email.py +++ b/Lib/email/test/test_email.py @@ -1025,6 +1025,31 @@ class TestMIMEText(unittest.TestCase): eq(msg.get_charset().input_charset, 'us-ascii') eq(msg['content-type'], 'text/plain; charset="us-ascii"') + def test_7bit_unicode_input(self): + eq = self.assertEqual + msg = MIMEText(u'hello there', _charset='us-ascii') + eq(msg.get_charset().input_charset, 'us-ascii') + eq(msg['content-type'], 'text/plain; charset="us-ascii"') + + def test_7bit_unicode_input_no_charset(self): + eq = self.assertEqual + msg = MIMEText(u'hello there') + eq(msg.get_charset(), 'us-ascii') + eq(msg['content-type'], 'text/plain; charset="us-ascii"') + self.assertTrue('hello there' in msg.as_string()) + + def test_8bit_unicode_input(self): + teststr = u'\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430' + eq = self.assertEqual + msg = MIMEText(teststr, _charset='utf-8') + eq(msg.get_charset().output_charset, 'utf-8') + eq(msg['content-type'], 'text/plain; charset="utf-8"') + eq(msg.get_payload(decode=True), teststr.encode('utf-8')) + + def test_8bit_unicode_input_no_charset(self): + teststr = u'\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430' + self.assertRaises(UnicodeEncodeError, MIMEText, teststr) + # Test complicated multipart/* messages diff --git a/Misc/NEWS b/Misc/NEWS index 5f3b31dd6cc..70e3ceeee42 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -58,6 +58,9 @@ C-API Library ------- +- Issue #1368247: set_charset (and therefore MIMEText) now automatically + encodes a unicode _payload to the output_charset. + - Issue #7150: Raise OverflowError if the result of adding or subtracting timedelta from date or datetime falls outside of the MINYEAR:MAXYEAR range.