mirror of https://github.com/python/cpython
#19063: partially fix set_payload handling of non-ASCII string input.
This is a backward compatible partial fix, the complete fix requires raising an error instead of accepting the invalid input, so the real fix is only suitable for 3.4.
This commit is contained in:
parent
31a655411a
commit
d5c4c7411a
|
@ -386,7 +386,8 @@ class Charset:
|
|||
string using the ascii codec produces the correct string version
|
||||
of the content.
|
||||
"""
|
||||
# 7bit/8bit encodings return the string unchanged (module conversions)
|
||||
if not string:
|
||||
return string
|
||||
if self.body_encoding is BASE64:
|
||||
if isinstance(string, str):
|
||||
string = string.encode(self.output_charset)
|
||||
|
@ -398,13 +399,9 @@ class Charset:
|
|||
# character set, then, we must turn it into pseudo bytes via the
|
||||
# latin1 charset, which will encode any byte as a single code point
|
||||
# between 0 and 255, which is what body_encode is expecting.
|
||||
#
|
||||
# Note that this clause doesn't handle the case of a _payload that
|
||||
# is already bytes. It never did, and the semantics of _payload
|
||||
# being bytes has never been nailed down, so fixing that is a
|
||||
# longer term TODO.
|
||||
if isinstance(string, str):
|
||||
string = string.encode(self.output_charset).decode('latin1')
|
||||
string = string.encode(self.output_charset)
|
||||
string = string.decode('latin1')
|
||||
return email.quoprimime.body_encode(string)
|
||||
else:
|
||||
if isinstance(string, str):
|
||||
|
|
|
@ -275,9 +275,19 @@ class Message:
|
|||
Optional charset sets the message's default character set. See
|
||||
set_charset() for details.
|
||||
"""
|
||||
if isinstance(payload, bytes):
|
||||
payload = payload.decode('ascii', 'surrogateescape')
|
||||
self._payload = payload
|
||||
if hasattr(payload, 'encode'):
|
||||
if charset is None:
|
||||
# We should check for ASCII-only here, but we can't do that
|
||||
# for backward compatibility reasons. Fixed in 3.4.
|
||||
self._payload = payload
|
||||
return
|
||||
if not isinstance(charset, Charset):
|
||||
charset = Charset(charset)
|
||||
payload = payload.encode(charset.output_charset)
|
||||
if hasattr(payload, 'decode'):
|
||||
self._payload = payload.decode('ascii', 'surrogateescape')
|
||||
else:
|
||||
self._payload = payload
|
||||
if charset is not None:
|
||||
self.set_charset(charset)
|
||||
|
||||
|
@ -316,7 +326,15 @@ class Message:
|
|||
try:
|
||||
cte(self)
|
||||
except TypeError:
|
||||
self._payload = charset.body_encode(self._payload)
|
||||
# This if is for backward compatibility and will be removed
|
||||
# in 3.4 when the ascii check is added to set_payload.
|
||||
payload = self._payload
|
||||
if payload:
|
||||
try:
|
||||
payload = payload.encode('ascii', 'surrogateescape')
|
||||
except UnicodeError:
|
||||
payload = payload.encode(charset.output_charset)
|
||||
self._payload = charset.body_encode(payload)
|
||||
self.add_header('Content-Transfer-Encoding', cte)
|
||||
|
||||
def get_charset(self):
|
||||
|
|
|
@ -92,6 +92,38 @@ class TestMessageAPI(TestEmailBase):
|
|||
msg.set_payload('This is a string payload', charset)
|
||||
self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
|
||||
|
||||
def test_set_payload_with_8bit_data_and_charset(self):
|
||||
data = b'\xd0\x90\xd0\x91\xd0\x92'
|
||||
charset = Charset('utf-8')
|
||||
msg = Message()
|
||||
msg.set_payload(data, charset)
|
||||
self.assertEqual(msg['content-transfer-encoding'], 'base64')
|
||||
self.assertEqual(msg.get_payload(decode=True), data)
|
||||
self.assertEqual(msg.get_payload(), '0JDQkdCS\n')
|
||||
|
||||
def test_set_payload_with_non_ascii_and_charset_body_encoding_none(self):
|
||||
data = b'\xd0\x90\xd0\x91\xd0\x92'
|
||||
charset = Charset('utf-8')
|
||||
charset.body_encoding = None # Disable base64 encoding
|
||||
msg = Message()
|
||||
msg.set_payload(data.decode('utf-8'), charset)
|
||||
self.assertEqual(msg['content-transfer-encoding'], '8bit')
|
||||
self.assertEqual(msg.get_payload(decode=True), data)
|
||||
|
||||
def test_set_payload_with_8bit_data_and_charset_body_encoding_none(self):
|
||||
data = b'\xd0\x90\xd0\x91\xd0\x92'
|
||||
charset = Charset('utf-8')
|
||||
charset.body_encoding = None # Disable base64 encoding
|
||||
msg = Message()
|
||||
msg.set_payload(data, charset)
|
||||
self.assertEqual(msg['content-transfer-encoding'], '8bit')
|
||||
self.assertEqual(msg.get_payload(decode=True), data)
|
||||
|
||||
def test_set_payload_to_list(self):
|
||||
msg = Message()
|
||||
msg.set_payload([])
|
||||
self.assertEqual(msg.get_payload(), [])
|
||||
|
||||
def test_get_charsets(self):
|
||||
eq = self.assertEqual
|
||||
|
||||
|
|
|
@ -23,6 +23,11 @@ Core and Builtins
|
|||
Library
|
||||
-------
|
||||
|
||||
- Issue #19063: if a Charset's body_encoding was set to None, the email
|
||||
package would generate a message claiming the Content-Transfer-Encoding
|
||||
was 7bit, and produce garbage output for the content. This now works.
|
||||
A couple of other set_payload mishandlings of non-ASCII are also fixed.
|
||||
|
||||
- Issue #17200: telnetlib's read_until and expect timeout was broken by the
|
||||
fix to Issue #14635 in Python 3.3.0 to be interpreted as milliseconds
|
||||
instead of seconds when the platform supports select.poll (ie: everywhere).
|
||||
|
|
Loading…
Reference in New Issue