#14380: Have MIMEText defaults to utf-8 when passed non-ASCII unicode
Previously it would just accept the unicode, which would wind up as unicode in the transfer-encoded message object, which is just wrong. Patch by Jeff Knupp.
This commit is contained in:
parent
192195a4fc
commit
8680bcc5db
|
@ -175,7 +175,7 @@ Here are the classes:
|
|||
|
||||
.. currentmodule:: email.mime.text
|
||||
|
||||
.. class:: MIMEText(_text, _subtype='plain', _charset='us-ascii')
|
||||
.. class:: MIMEText(_text, _subtype='plain', _charset=None)
|
||||
|
||||
Module: :mod:`email.mime.text`
|
||||
|
||||
|
@ -185,5 +185,5 @@ Here are the classes:
|
|||
minor type and defaults to :mimetype:`plain`. *_charset* is the character
|
||||
set of the text and is passed as a parameter to the
|
||||
:class:`~email.mime.nonmultipart.MIMENonMultipart` constructor; it defaults
|
||||
to ``us-ascii``. No guessing or encoding is performed on the text data.
|
||||
|
||||
to ``us-ascii`` if the string contains only ``ascii`` codepoints, and
|
||||
``utf-8`` otherwise.
|
||||
|
|
|
@ -27,4 +27,14 @@ class MIMEText(MIMENonMultipart):
|
|||
"""
|
||||
MIMENonMultipart.__init__(self, 'text', _subtype,
|
||||
**{'charset': _charset})
|
||||
|
||||
# If _charset was defualted, check to see see if there are non-ascii
|
||||
# characters present. Default to utf-8 if there are.
|
||||
# XXX: This can be removed once #7304 is fixed.
|
||||
if _charset =='us-ascii':
|
||||
try:
|
||||
_text.encode(_charset)
|
||||
except UnicodeEncodeError:
|
||||
_charset = 'utf-8'
|
||||
|
||||
self.set_payload(_text, _charset)
|
||||
|
|
|
@ -617,6 +617,19 @@ class TestMessageAPI(TestEmailBase):
|
|||
abc
|
||||
"""))
|
||||
|
||||
def test_unicode_body_defaults_to_utf8_encoding(self):
|
||||
# Issue 14291
|
||||
m = MIMEText('É testabc\n')
|
||||
self.assertEqual(str(m),textwrap.dedent("""\
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset="utf-8"
|
||||
Content-Transfer-Encoding: base64
|
||||
|
||||
w4kgdGVzdGFiYwo=
|
||||
"""))
|
||||
|
||||
|
||||
|
||||
# Test the email.encoders module
|
||||
class TestEncoders(unittest.TestCase):
|
||||
|
||||
|
@ -642,7 +655,7 @@ class TestEncoders(unittest.TestCase):
|
|||
eq(msg['content-transfer-encoding'], '7bit')
|
||||
# Similar, but with 8bit data
|
||||
msg = MIMEText('hello \xf8 world')
|
||||
eq(msg['content-transfer-encoding'], '8bit')
|
||||
eq(msg['content-transfer-encoding'], 'base64')
|
||||
# And now with a different charset
|
||||
msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
|
||||
eq(msg['content-transfer-encoding'], 'quoted-printable')
|
||||
|
|
|
@ -548,6 +548,7 @@ Thomas Kluyver
|
|||
Kim Knapp
|
||||
Lenny Kneler
|
||||
Pat Knight
|
||||
Jeff Knupp
|
||||
Greg Kochanski
|
||||
Damon Kohler
|
||||
Marko Kohtala
|
||||
|
|
|
@ -34,6 +34,9 @@ Core and Builtins
|
|||
Library
|
||||
-------
|
||||
|
||||
- Issue #14380: MIMEText now defaults to utf-8 when passed non-ASCII unicode
|
||||
with no charset specified.
|
||||
|
||||
- Issue #10340: asyncore - properly handle EINVAL in dispatcher constructor on
|
||||
OSX; avoid to call handle_connect in case of a disconnected socket which
|
||||
was not meant to connect.
|
||||
|
|
Loading…
Reference in New Issue