#14380: Have MIMEText defaults to utf-8 when passed non-ASCII unicode

Previously it would just accept the unicode, which would wind up as unicode in
the transfer-encoded message object, which is just wrong.

Patch by Jeff Knupp.
This commit is contained in:
R David Murray 2012-03-22 22:17:51 -04:00
parent 192195a4fc
commit 8680bcc5db
5 changed files with 31 additions and 4 deletions

View File

@ -175,7 +175,7 @@ Here are the classes:
.. currentmodule:: email.mime.text
.. class:: MIMEText(_text, _subtype='plain', _charset='us-ascii')
.. class:: MIMEText(_text, _subtype='plain', _charset=None)
Module: :mod:`email.mime.text`
@ -185,5 +185,5 @@ Here are the classes:
minor type and defaults to :mimetype:`plain`. *_charset* is the character
set of the text and is passed as a parameter to the
:class:`~email.mime.nonmultipart.MIMENonMultipart` constructor; it defaults
to ``us-ascii``. No guessing or encoding is performed on the text data.
to ``us-ascii`` if the string contains only ``ascii`` codepoints, and
``utf-8`` otherwise.

View File

@ -27,4 +27,14 @@ class MIMEText(MIMENonMultipart):
"""
MIMENonMultipart.__init__(self, 'text', _subtype,
**{'charset': _charset})
# If _charset was defualted, check to see see if there are non-ascii
# characters present. Default to utf-8 if there are.
# XXX: This can be removed once #7304 is fixed.
if _charset =='us-ascii':
try:
_text.encode(_charset)
except UnicodeEncodeError:
_charset = 'utf-8'
self.set_payload(_text, _charset)

View File

@ -617,6 +617,19 @@ class TestMessageAPI(TestEmailBase):
abc
"""))
def test_unicode_body_defaults_to_utf8_encoding(self):
# Issue 14291
m = MIMEText('É testabc\n')
self.assertEqual(str(m),textwrap.dedent("""\
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: base64
w4kgdGVzdGFiYwo=
"""))
# Test the email.encoders module
class TestEncoders(unittest.TestCase):
@ -642,7 +655,7 @@ class TestEncoders(unittest.TestCase):
eq(msg['content-transfer-encoding'], '7bit')
# Similar, but with 8bit data
msg = MIMEText('hello \xf8 world')
eq(msg['content-transfer-encoding'], '8bit')
eq(msg['content-transfer-encoding'], 'base64')
# And now with a different charset
msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
eq(msg['content-transfer-encoding'], 'quoted-printable')

View File

@ -548,6 +548,7 @@ Thomas Kluyver
Kim Knapp
Lenny Kneler
Pat Knight
Jeff Knupp
Greg Kochanski
Damon Kohler
Marko Kohtala

View File

@ -34,6 +34,9 @@ Core and Builtins
Library
-------
- Issue #14380: MIMEText now defaults to utf-8 when passed non-ASCII unicode
with no charset specified.
- Issue #10340: asyncore - properly handle EINVAL in dispatcher constructor on
OSX; avoid to call handle_connect in case of a disconnected socket which
was not meant to connect.