mirror of https://github.com/python/cpython
#1368247: make set_charset/MIMEText automatically encode unicode _payload.
Fixes (mysterious, to the end user) UnicodeErrors when using utf-8 as the charset and unicode as the _text argument. Also makes the way in which unicode gets encoded to quoted printable for other charsets more sane (it only worked by accident previously). The _payload now is encoded to the charset.output_charset if it is unicode.
This commit is contained in:
parent
c395545a4a
commit
52dcd45906
|
@ -136,9 +136,10 @@ Here are the methods of the :class:`Message` class:
|
||||||
:mailheader:`Content-Type` header. Anything else will generate a
|
:mailheader:`Content-Type` header. Anything else will generate a
|
||||||
:exc:`TypeError`.
|
:exc:`TypeError`.
|
||||||
|
|
||||||
The message will be assumed to be of type :mimetype:`text/\*` encoded with
|
The message will be assumed to be of type :mimetype:`text/\*`, with the
|
||||||
*charset.input_charset*. It will be converted to *charset.output_charset*
|
payload either in unicode or encoded with *charset.input_charset*.
|
||||||
and encoded properly, if needed, when generating the plain text
|
It will be encoded or converted to *charset.output_charset*
|
||||||
|
and transfer encoded properly, if needed, when generating the plain text
|
||||||
representation of the message. MIME headers (:mailheader:`MIME-Version`,
|
representation of the message. MIME headers (:mailheader:`MIME-Version`,
|
||||||
:mailheader:`Content-Type`, :mailheader:`Content-Transfer-Encoding`) will
|
:mailheader:`Content-Type`, :mailheader:`Content-Transfer-Encoding`) will
|
||||||
be added as needed.
|
be added as needed.
|
||||||
|
|
|
@ -191,9 +191,11 @@ Here are the classes:
|
||||||
minor type and defaults to :mimetype:`plain`. *_charset* is the character
|
minor type and defaults to :mimetype:`plain`. *_charset* is the character
|
||||||
set of the text and is passed as a parameter to the
|
set of the text and is passed as a parameter to the
|
||||||
:class:`~email.mime.nonmultipart.MIMENonMultipart` constructor; it defaults
|
:class:`~email.mime.nonmultipart.MIMENonMultipart` constructor; it defaults
|
||||||
to ``us-ascii``. No guessing or encoding is performed on the text data.
|
to ``us-ascii``. If *_text* is unicode, it is encoded using the
|
||||||
|
*output_charset* of *_charset*, otherwise it is used as-is.
|
||||||
|
|
||||||
.. versionchanged:: 2.4
|
.. versionchanged:: 2.4
|
||||||
The previously deprecated *_encoding* argument has been removed. Encoding
|
The previously deprecated *_encoding* argument has been removed. Content
|
||||||
happens implicitly based on the *_charset* argument.
|
Transfer Encoding now happens happens implicitly based on the *_charset*
|
||||||
|
argument.
|
||||||
|
|
||||||
|
|
|
@ -256,6 +256,8 @@ class Message:
|
||||||
charset=charset.get_output_charset())
|
charset=charset.get_output_charset())
|
||||||
else:
|
else:
|
||||||
self.set_param('charset', charset.get_output_charset())
|
self.set_param('charset', charset.get_output_charset())
|
||||||
|
if isinstance(self._payload, unicode):
|
||||||
|
self._payload = self._payload.encode(charset.output_charset)
|
||||||
if str(charset) != charset.get_output_charset():
|
if str(charset) != charset.get_output_charset():
|
||||||
self._payload = charset.body_encode(self._payload)
|
self._payload = charset.body_encode(self._payload)
|
||||||
if 'Content-Transfer-Encoding' not in self:
|
if 'Content-Transfer-Encoding' not in self:
|
||||||
|
|
|
@ -1045,6 +1045,31 @@ class TestMIMEText(unittest.TestCase):
|
||||||
eq(msg.get_charset().input_charset, 'us-ascii')
|
eq(msg.get_charset().input_charset, 'us-ascii')
|
||||||
eq(msg['content-type'], 'text/plain; charset="us-ascii"')
|
eq(msg['content-type'], 'text/plain; charset="us-ascii"')
|
||||||
|
|
||||||
|
def test_7bit_unicode_input(self):
|
||||||
|
eq = self.assertEqual
|
||||||
|
msg = MIMEText(u'hello there', _charset='us-ascii')
|
||||||
|
eq(msg.get_charset().input_charset, 'us-ascii')
|
||||||
|
eq(msg['content-type'], 'text/plain; charset="us-ascii"')
|
||||||
|
|
||||||
|
def test_7bit_unicode_input_no_charset(self):
|
||||||
|
eq = self.assertEqual
|
||||||
|
msg = MIMEText(u'hello there')
|
||||||
|
eq(msg.get_charset(), 'us-ascii')
|
||||||
|
eq(msg['content-type'], 'text/plain; charset="us-ascii"')
|
||||||
|
self.assertTrue('hello there' in msg.as_string())
|
||||||
|
|
||||||
|
def test_8bit_unicode_input(self):
|
||||||
|
teststr = u'\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
|
||||||
|
eq = self.assertEqual
|
||||||
|
msg = MIMEText(teststr, _charset='utf-8')
|
||||||
|
eq(msg.get_charset().output_charset, 'utf-8')
|
||||||
|
eq(msg['content-type'], 'text/plain; charset="utf-8"')
|
||||||
|
eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
|
||||||
|
|
||||||
|
def test_8bit_unicode_input_no_charset(self):
|
||||||
|
teststr = u'\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
|
||||||
|
self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Test complicated multipart/* messages
|
# Test complicated multipart/* messages
|
||||||
|
|
|
@ -46,6 +46,9 @@ C-API
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Issue #1368247: set_charset (and therefore MIMEText) now automatically
|
||||||
|
encodes a unicode _payload to the output_charset.
|
||||||
|
|
||||||
- Issue #7150: Raise OverflowError if the result of adding or subtracting
|
- Issue #7150: Raise OverflowError if the result of adding or subtracting
|
||||||
timedelta from date or datetime falls outside of the MINYEAR:MAXYEAR range.
|
timedelta from date or datetime falls outside of the MINYEAR:MAXYEAR range.
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue