cpython/Lib/email/encoders.py

76 lines
2.0 KiB
Python
Raw Normal View History

# Copyright (C) 2001-2006 Python Software Foundation
# Author: Barry Warsaw
# Contact: email-sig@python.org
"""Encodings and related functions."""
__all__ = [
'encode_7or8bit',
'encode_base64',
'encode_noop',
'encode_quopri',
]
from base64 import b64encode as _bencode
from quopri import encodestring as _encodestring
def _qencode(s):
enc = _encodestring(s, quotetabs=True)
# Must encode spaces, which quopri.encodestring() doesn't do
return enc.replace(' ', '=20')
def encode_base64(msg):
"""Encode the message's payload in Base64.
Also, add an appropriate Content-Transfer-Encoding header.
"""
orig = msg.get_payload()
encdata = _bencode(orig)
msg.set_payload(encdata)
msg['Content-Transfer-Encoding'] = 'base64'
def encode_quopri(msg):
"""Encode the message's payload in quoted-printable.
Also, add an appropriate Content-Transfer-Encoding header.
"""
orig = msg.get_payload()
encdata = _qencode(orig)
msg.set_payload(encdata)
msg['Content-Transfer-Encoding'] = 'quoted-printable'
def encode_7or8bit(msg):
"""Set the Content-Transfer-Encoding header to 7bit or 8bit."""
orig = msg.get_payload()
if orig is None:
# There's no payload. For backwards compatibility we use 7bit
msg['Content-Transfer-Encoding'] = '7bit'
return
# We play a trick to make this go fast. If encoding to ASCII succeeds, we
# know the data must be 7bit, otherwise treat it as 8bit.
try:
orig.encode('ascii')
except UnicodeError:
# iso-2022-* is non-ASCII but still 7-bit
charset = msg.get_charset()
output_cset = charset and charset.output_charset
Merged revisions 79996,80855 via svnmerge from svn+ssh://pythondev@svn.python.org/python/branches/py3k ................ r79996 | r.david.murray | 2010-04-12 10:48:58 -0400 (Mon, 12 Apr 2010) | 15 lines Merged revisions 79994 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk ........ r79994 | r.david.murray | 2010-04-12 10:26:06 -0400 (Mon, 12 Apr 2010) | 9 lines Issue #7472: ISO-2022 charsets now consistently use 7bit CTE. Fixed a typo in the email.encoders module so that messages output using an ISO-2022 character set will use a content-transfer-encoding of 7bit consistently. Previously if the input data had any eight bit characters the output data would get marked as 8bit even though it was actually 7bit. ........ ................ r80855 | r.david.murray | 2010-05-05 21:41:14 -0400 (Wed, 05 May 2010) | 24 lines Merged revisions 80800 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk It turns out that email5 (py3k), because it is using unicode for the payload, doesn't do the encoding to the output character set until later in the process. Specifically, charset.body_encode no longer does the input-to-output charset conversion. So the if test in the exception clause in encoders.encode_7or8bit really is needed in email5. So, this merge only merges the test, not the removal of the 'if'. ........ r80800 | r.david.murray | 2010-05-05 13:31:03 -0400 (Wed, 05 May 2010) | 9 lines Issue #7472: remove unused code from email.encoders.encode_7or8bit. Yukihiro Nakadaira noticed a typo in encode_7or8bit that was trying to special case iso-2022 codecs. It turns out that the code in question is never used, because whereas it was designed to trigger if the payload encoding was eight bit but its output encoding was 7 bit, in practice the payload is always converted to the 7bit encoding before encode_7or8bit is called. Patch by Shawat Anand. ........ ................
2010-05-05 22:53:03 -03:00
if output_cset and output_cset.lower().startswith('iso-2022-'):
msg['Content-Transfer-Encoding'] = '7bit'
else:
msg['Content-Transfer-Encoding'] = '8bit'
else:
msg['Content-Transfer-Encoding'] = '7bit'
def encode_noop(msg):
"""Do nothing."""