cpython/Lib/email/base64mime.py

# Copyright (C) 2002-2006 Python Software Foundation
# Author: Ben Gertzfield
# Contact: email-sig@python.org

"""Base64 content transfer encoding per RFCs 2045-2047.

This module handles the content transfer encoding method defined in RFC 2045
to encode arbitrary 8-bit data using the three 8-bit bytes in four 7-bit
characters encoding known as Base64.

It is used in the MIME standards for email to attach images, audio, and text
using some 8-bit character sets to messages.

This module provides an interface to encode and decode both headers and bodies
with Base64 encoding.

RFC 2045 defines a method for including character set information in an
`encoded-word' in a header.  This method is commonly used for 8-bit real names
in To:, From:, Cc:, etc. fields, as well as Subject: lines.

This module does not do the line wrapping or end-of-line character conversion
necessary for proper internationalized headers; it only does dumb encoding and
decoding.  To deal with the various line wrapping issues, use the email.header
module.
"""

__all__ = [
    'base64_len',
    'body_decode',
    'body_encode',
    'decode',
    'decodestring',
    'encode',
    'encodestring',
    'header_encode',
    ]


from binascii import b2a_base64, a2b_base64
from email.utils import fix_eols

CRLF = '\r\n'
NL = '\n'
EMPTYSTRING = ''

# See also Charset.py
MISC_LEN = 7


# Helpers
def base64_len(s):
    """Return the length of s when it is encoded with base64."""
    groups_of_3, leftover = divmod(len(s), 3)
    # 4 bytes out for each 3 bytes (or nonzero fraction thereof) in.
    # Thanks, Tim!
    n = groups_of_3 * 4
    if leftover:
        n += 4
    return n


def header_encode(header, charset='iso-8859-1', keep_eols=False,
                  maxlinelen=76, eol=NL):
    """Encode a single header line with Base64 encoding in a given charset.

    Defined in RFC 2045, this Base64 encoding is identical to normal Base64
    encoding, except that each line must be intelligently wrapped (respecting
    the Base64 encoding), and subsequent lines must start with a space.

    charset names the character set to use to encode the header.  It defaults
    to iso-8859-1.

    End-of-line characters (\\r, \\n, \\r\\n) will be automatically converted
    to the canonical email line separator \\r\\n unless the keep_eols
    parameter is True (the default is False).

    Each line of the header will be terminated in the value of eol, which
    defaults to "\\n".  Set this to "\\r\\n" if you are using the result of
    this function directly in email.

    The resulting string will be in the form:

    "=?charset?b?WW/5ciBtYXp66XLrIHf8eiBhIGhhbXBzdGHuciBBIFlv+XIgbWF6euly?=\\n
      =?charset?b?6yB3/HogYSBoYW1wc3Rh7nIgQkMgWW/5ciBtYXp66XLrIHf8eiBhIGhh?="

    with each line wrapped at, at most, maxlinelen characters (defaults to 76
    characters).
    """
    # Return empty headers unchanged
    if not header:
        return header

    if not keep_eols:
        header = fix_eols(header)

    # Base64 encode each line, in encoded chunks no greater than maxlinelen in
    # length, after the RFC chrome is added in.
    base64ed = []
    max_encoded = maxlinelen - len(charset) - MISC_LEN
    max_unencoded = max_encoded * 3 // 4

    for i in range(0, len(header), max_unencoded):
        base64ed.append(b2a_base64(header[i:i+max_unencoded]))

    # Now add the RFC chrome to each encoded chunk
    lines = []
    for line in base64ed:
        # Ignore the last character of each line if it is a newline
        if line.endswith(NL):
            line = line[:-1]
        # Add the chrome
        lines.append('=?%s?b?%s?=' % (charset, line))
    # Glue the lines together and return it.  BAW: should we be able to
    # specify the leading whitespace in the joiner?
    joiner = eol + ' '
    return joiner.join(lines)


def encode(s, binary=True, maxlinelen=76, eol=NL):
    """Encode a string with base64.

    Each line will be wrapped at, at most, maxlinelen characters (defaults to
    76 characters).

    If binary is False, end-of-line characters will be converted to the
    canonical email end-of-line sequence \\r\\n.  Otherwise they will be left
    verbatim (this is the default).

    Each line of encoded text will end with eol, which defaults to "\\n".  Set
    this to "\\r\\n" if you will be using the result of this function directly
    in an email.
    """
    if not s:
        return s

    if not binary:
        s = fix_eols(s)

    encvec = []
    max_unencoded = maxlinelen * 3 // 4
    for i in range(0, len(s), max_unencoded):
        # BAW: should encode() inherit b2a_base64()'s dubious behavior in
        # adding a newline to the encoded string?
        enc = b2a_base64(s[i:i + max_unencoded])
        if enc.endswith(NL) and eol != NL:
            enc = enc[:-1] + eol
        encvec.append(enc)
    return EMPTYSTRING.join(encvec)


# For convenience and backwards compatibility w/ standard base64 module
body_encode = encode
encodestring = encode


def decode(s, convert_eols=None):
    """Decode a raw base64 string.

    If convert_eols is set to a string value, all canonical email linefeeds,
    e.g. "\\r\\n", in the decoded text will be converted to the value of
    convert_eols.  os.linesep is a good choice for convert_eols if you are
    decoding a text attachment.

    This function does not parse a full MIME header value encoded with
    base64 (like =?iso-8859-1?b?bmloISBuaWgh?=) -- please use the high
    level email.header class for that functionality.
    """
    if not s:
        return s

    dec = a2b_base64(s)
    if convert_eols:
        return dec.replace(CRLF, convert_eols)
    return dec


# For convenience and backwards compatibility w/ standard base64 module
body_decode = decode
decodestring = decode
-												Merge email package 4.0 from the sandbox, including documentation, test cases,
and NEWS updates.

											
										
										
											2006-03-18 11:41:53 -04:00
+								# Copyright (C) 2002-2006 Python Software Foundation
-												Big email 3.0 API changes, with updated unit tests and documentation.
Briefly (from the NEWS file):

- Updates for the email package:
  + All deprecated APIs that in email 2.x issued warnings have been removed:
    _encoder argument to the MIMEText constructor, Message.add_payload(),
    Utils.dump_address_pair(), Utils.decode(), Utils.encode()
  + New deprecations: Generator.__call__(), Message.get_type(),
    Message.get_main_type(), Message.get_subtype(), the 'strict' argument to
    the Parser constructor.  These will be removed in email 3.1.
  + Support for Python earlier than 2.3 has been removed (see PEP 291).
  + All defect classes have been renamed to end in 'Defect'.
  + Some FeedParser fixes; also a MultipartInvariantViolationDefect will be
    added to messages that claim to be multipart but really aren't.
  + Updates to documentation.

											
										
										
											2004-10-03 00:16:19 -03:00
+								# Author: Ben Gertzfield
 								# Contact: email-sig@python.org
-												Sync'ing with standalone email package 2.0.1.  This adds support for
non-us-ascii character sets in headers and bodies.  Some API changes
(with DeprecationWarnings for the old APIs).  Better RFC-compliant
implementations of base64 and quoted-printable.

Updated test cases.  Documentation updates to follow (after I finish
writing them ;).

											
										
										
											2002-04-10 18:01:31 -03:00
 								"""Base64 content transfer encoding per RFCs 2045-2047.
 								This module handles the content transfer encoding method defined in RFC 2045
 								to encode arbitrary 8-bit data using the three 8-bit bytes in four 7-bit
 								characters encoding known as Base64.
 								It is used in the MIME standards for email to attach images, audio, and text
 								using some 8-bit character sets to messages.
 								This module provides an interface to encode and decode both headers and bodies
 								with Base64 encoding.
 								RFC 2045 defines a method for including character set information in an
 								`encoded-word' in a header.  This method is commonly used for 8-bit real names
 								in To:, From:, Cc:, etc. fields, as well as Subject: lines.
 								This module does not do the line wrapping or end-of-line character conversion
 								necessary for proper internationalized headers; it only does dumb encoding and
-												#2622 Import errors in email.message, from a py2app standalone application.

Patch by Mads Kiilerich, Reviewed by Barry Warsaw.

											
										
										
											2009-07-11 11:33:51 -03:00
+								decoding.  To deal with the various line wrapping issues, use the email.header
-												Sync'ing with standalone email package 2.0.1.  This adds support for
non-us-ascii character sets in headers and bodies.  Some API changes
(with DeprecationWarnings for the old APIs).  Better RFC-compliant
implementations of base64 and quoted-printable.

Updated test cases.  Documentation updates to follow (after I finish
writing them ;).

											
										
										
											2002-04-10 18:01:31 -03:00
+								module.
 								"""
-												Merge email package 4.0 from the sandbox, including documentation, test cases,
and NEWS updates.

											
										
										
											2006-03-18 11:41:53 -04:00
+								__all__ = [
 								    'base64_len',
 								    'body_decode',
 								    'body_encode',
 								    'decode',
 								    'decodestring',
 								    'encode',
 								    'encodestring',
 								    'header_encode',
 								    ]
-												Sync'ing with standalone email package 2.0.1.  This adds support for
non-us-ascii character sets in headers and bodies.  Some API changes
(with DeprecationWarnings for the old APIs).  Better RFC-compliant
implementations of base64 and quoted-printable.

Updated test cases.  Documentation updates to follow (after I finish
writing them ;).

											
										
										
											2002-04-10 18:01:31 -03:00
+								from binascii import b2a_base64, a2b_base64
-												Merge email package 4.0 from the sandbox, including documentation, test cases,
and NEWS updates.

											
										
										
											2006-03-18 11:41:53 -04:00
+								from email.utils import fix_eols
-												Sync'ing with standalone email package 2.0.1.  This adds support for
non-us-ascii character sets in headers and bodies.  Some API changes
(with DeprecationWarnings for the old APIs).  Better RFC-compliant
implementations of base64 and quoted-printable.

Updated test cases.  Documentation updates to follow (after I finish
writing them ;).

											
										
										
											2002-04-10 18:01:31 -03:00
 								CRLF = '\r\n'
 								NL = '\n'
 								EMPTYSTRING = ''
 								# See also Charset.py
 								MISC_LEN = 7
 								# Helpers
 								def base64_len(s):
 								    """Return the length of s when it is encoded with base64."""
-												Whitespace normalization.

											
										
										
											2002-05-23 12:15:30 -03:00
+								    groups_of_3, leftover = divmod(len(s), 3)
 								    # 4 bytes out for each 3 bytes (or nonzero fraction thereof) in.
-												Sync'ing with standalone email package 2.0.1.  This adds support for
non-us-ascii character sets in headers and bodies.  Some API changes
(with DeprecationWarnings for the old APIs).  Better RFC-compliant
implementations of base64 and quoted-printable.

Updated test cases.  Documentation updates to follow (after I finish
writing them ;).

											
										
										
											2002-04-10 18:01:31 -03:00
+								    # Thanks, Tim!
-												Whitespace normalization.

											
										
										
											2002-05-23 12:15:30 -03:00
+								    n = groups_of_3 * 4
 								    if leftover:
 								        n += 4
 								    return n
-												Sync'ing with standalone email package 2.0.1.  This adds support for
non-us-ascii character sets in headers and bodies.  Some API changes
(with DeprecationWarnings for the old APIs).  Better RFC-compliant
implementations of base64 and quoted-printable.

Updated test cases.  Documentation updates to follow (after I finish
writing them ;).

											
										
										
											2002-04-10 18:01:31 -03:00
-												Use True/False everywhere, and other code cleanups.

											
										
										
											2002-09-28 18:02:51 -03:00
+								def header_encode(header, charset='iso-8859-1', keep_eols=False,
 								                  maxlinelen=76, eol=NL):
-												Sync'ing with standalone email package 2.0.1.  This adds support for
non-us-ascii character sets in headers and bodies.  Some API changes
(with DeprecationWarnings for the old APIs).  Better RFC-compliant
implementations of base64 and quoted-printable.

Updated test cases.  Documentation updates to follow (after I finish
writing them ;).

											
										
										
											2002-04-10 18:01:31 -03:00
+								    """Encode a single header line with Base64 encoding in a given charset.
-												Whitespace normalization.

											
										
										
											2002-05-23 12:15:30 -03:00
-												Sync'ing with standalone email package 2.0.1.  This adds support for
non-us-ascii character sets in headers and bodies.  Some API changes
(with DeprecationWarnings for the old APIs).  Better RFC-compliant
implementations of base64 and quoted-printable.

Updated test cases.  Documentation updates to follow (after I finish
writing them ;).

											
										
										
											2002-04-10 18:01:31 -03:00
+								    Defined in RFC 2045, this Base64 encoding is identical to normal Base64
 								    encoding, except that each line must be intelligently wrapped (respecting
 								    the Base64 encoding), and subsequent lines must start with a space.
 								    charset names the character set to use to encode the header.  It defaults
 								    to iso-8859-1.
 								    End-of-line characters (\\r, \\n, \\r\\n) will be automatically converted
 								    to the canonical email line separator \\r\\n unless the keep_eols
-												Use True/False everywhere, and other code cleanups.

											
										
										
											2002-09-28 18:02:51 -03:00
+								    parameter is True (the default is False).
-												Sync'ing with standalone email package 2.0.1.  This adds support for
non-us-ascii character sets in headers and bodies.  Some API changes
(with DeprecationWarnings for the old APIs).  Better RFC-compliant
implementations of base64 and quoted-printable.

Updated test cases.  Documentation updates to follow (after I finish
writing them ;).

											
										
										
											2002-04-10 18:01:31 -03:00
 								    Each line of the header will be terminated in the value of eol, which
 								    defaults to "\\n".  Set this to "\\r\\n" if you are using the result of
 								    this function directly in email.
 								    The resulting string will be in the form:
 								    "=?charset?b?WW/5ciBtYXp66XLrIHf8eiBhIGhhbXBzdGHuciBBIFlv+XIgbWF6euly?=\\n
 								      =?charset?b?6yB3/HogYSBoYW1wc3Rh7nIgQkMgWW/5ciBtYXp66XLrIHf8eiBhIGhh?="
-												Whitespace normalization.

											
										
										
											2002-05-23 12:15:30 -03:00
-												Sync'ing with standalone email package 2.0.1.  This adds support for
non-us-ascii character sets in headers and bodies.  Some API changes
(with DeprecationWarnings for the old APIs).  Better RFC-compliant
implementations of base64 and quoted-printable.

Updated test cases.  Documentation updates to follow (after I finish
writing them ;).

											
										
										
											2002-04-10 18:01:31 -03:00
+								    with each line wrapped at, at most, maxlinelen characters (defaults to 76
 								    characters).
 								    """
 								    # Return empty headers unchanged
 								    if not header:
 								        return header
 								    if not keep_eols:
 								        header = fix_eols(header)
-												Whitespace normalization.

											
										
										
											2002-05-23 12:15:30 -03:00
-												Sync'ing with standalone email package 2.0.1.  This adds support for
non-us-ascii character sets in headers and bodies.  Some API changes
(with DeprecationWarnings for the old APIs).  Better RFC-compliant
implementations of base64 and quoted-printable.

Updated test cases.  Documentation updates to follow (after I finish
writing them ;).

											
										
										
											2002-04-10 18:01:31 -03:00
+								    # Base64 encode each line, in encoded chunks no greater than maxlinelen in
 								    # length, after the RFC chrome is added in.
 								    base64ed = []
 								    max_encoded = maxlinelen - len(charset) - MISC_LEN
-												Update to Python 2.3, getting rid of backward compatiblity crud.

											
										
										
											2004-05-09 00:55:11 -03:00
+								    max_unencoded = max_encoded * 3 // 4
-												Sync'ing with standalone email package 2.0.1.  This adds support for
non-us-ascii character sets in headers and bodies.  Some API changes
(with DeprecationWarnings for the old APIs).  Better RFC-compliant
implementations of base64 and quoted-printable.

Updated test cases.  Documentation updates to follow (after I finish
writing them ;).

											
										
										
											2002-04-10 18:01:31 -03:00
 								    for i in range(0, len(header), max_unencoded):
 								        base64ed.append(b2a_base64(header[i:i+max_unencoded]))
 								    # Now add the RFC chrome to each encoded chunk
 								    lines = []
 								    for line in base64ed:
 								        # Ignore the last character of each line if it is a newline
-												Use True/False everywhere, and other code cleanups.

											
										
										
											2002-09-28 18:02:51 -03:00
+								        if line.endswith(NL):
-												Sync'ing with standalone email package 2.0.1.  This adds support for
non-us-ascii character sets in headers and bodies.  Some API changes
(with DeprecationWarnings for the old APIs).  Better RFC-compliant
implementations of base64 and quoted-printable.

Updated test cases.  Documentation updates to follow (after I finish
writing them ;).

											
										
										
											2002-04-10 18:01:31 -03:00
+								            line = line[:-1]
 								        # Add the chrome
 								        lines.append('=?%s?b?%s?=' % (charset, line))
 								    # Glue the lines together and return it.  BAW: should we be able to
 								    # specify the leading whitespace in the joiner?
 								    joiner = eol + ' '
 								    return joiner.join(lines)
-												Use True/False everywhere, and other code cleanups.

											
										
										
											2002-09-28 18:02:51 -03:00
+								def encode(s, binary=True, maxlinelen=76, eol=NL):
-												Sync'ing with standalone email package 2.0.1.  This adds support for
non-us-ascii character sets in headers and bodies.  Some API changes
(with DeprecationWarnings for the old APIs).  Better RFC-compliant
implementations of base64 and quoted-printable.

Updated test cases.  Documentation updates to follow (after I finish
writing them ;).

											
										
										
											2002-04-10 18:01:31 -03:00
+								    """Encode a string with base64.
 								    Each line will be wrapped at, at most, maxlinelen characters (defaults to
 characters).
-												Use True/False everywhere, and other code cleanups.

											
										
										
											2002-09-28 18:02:51 -03:00
+								    If binary is False, end-of-line characters will be converted to the
-												Sync'ing with standalone email package 2.0.1.  This adds support for
non-us-ascii character sets in headers and bodies.  Some API changes
(with DeprecationWarnings for the old APIs).  Better RFC-compliant
implementations of base64 and quoted-printable.

Updated test cases.  Documentation updates to follow (after I finish
writing them ;).

											
										
										
											2002-04-10 18:01:31 -03:00
+								    canonical email end-of-line sequence \\r\\n.  Otherwise they will be left
 								    verbatim (this is the default).
 								    Each line of encoded text will end with eol, which defaults to "\\n".  Set
-												#15980: properly escape newlines in docstrings.  Patch by Serhiy Storchaka.

											
										
										
											2012-09-21 10:27:45 -03:00
+								    this to "\\r\\n" if you will be using the result of this function directly
-												Sync'ing with standalone email package 2.0.1.  This adds support for
non-us-ascii character sets in headers and bodies.  Some API changes
(with DeprecationWarnings for the old APIs).  Better RFC-compliant
implementations of base64 and quoted-printable.

Updated test cases.  Documentation updates to follow (after I finish
writing them ;).

											
										
										
											2002-04-10 18:01:31 -03:00
+								    in an email.
 								    """
 								    if not s:
 								        return s
-												Whitespace normalization.

											
										
										
											2002-05-23 12:15:30 -03:00
-												Sync'ing with standalone email package 2.0.1.  This adds support for
non-us-ascii character sets in headers and bodies.  Some API changes
(with DeprecationWarnings for the old APIs).  Better RFC-compliant
implementations of base64 and quoted-printable.

Updated test cases.  Documentation updates to follow (after I finish
writing them ;).

											
										
										
											2002-04-10 18:01:31 -03:00
+								    if not binary:
 								        s = fix_eols(s)
-												Whitespace normalization.

											
										
										
											2002-05-23 12:15:30 -03:00
-												Sync'ing with standalone email package 2.0.1.  This adds support for
non-us-ascii character sets in headers and bodies.  Some API changes
(with DeprecationWarnings for the old APIs).  Better RFC-compliant
implementations of base64 and quoted-printable.

Updated test cases.  Documentation updates to follow (after I finish
writing them ;).

											
										
										
											2002-04-10 18:01:31 -03:00
+								    encvec = []
-												Update to Python 2.3, getting rid of backward compatiblity crud.

											
										
										
											2004-05-09 00:55:11 -03:00
+								    max_unencoded = maxlinelen * 3 // 4
-												Sync'ing with standalone email package 2.0.1.  This adds support for
non-us-ascii character sets in headers and bodies.  Some API changes
(with DeprecationWarnings for the old APIs).  Better RFC-compliant
implementations of base64 and quoted-printable.

Updated test cases.  Documentation updates to follow (after I finish
writing them ;).

											
										
										
											2002-04-10 18:01:31 -03:00
+								    for i in range(0, len(s), max_unencoded):
 								        # BAW: should encode() inherit b2a_base64()'s dubious behavior in
 								        # adding a newline to the encoded string?
 								        enc = b2a_base64(s[i:i + max_unencoded])
-												Remove Barry's love of deprecated syntax to silence warnings in the email
package, when run under -3, about using <>.

											
										
										
											2008-08-03 20:27:32 -03:00
+								        if enc.endswith(NL) and eol != NL:
-												Sync'ing with standalone email package 2.0.1.  This adds support for
non-us-ascii character sets in headers and bodies.  Some API changes
(with DeprecationWarnings for the old APIs).  Better RFC-compliant
implementations of base64 and quoted-printable.

Updated test cases.  Documentation updates to follow (after I finish
writing them ;).

											
										
										
											2002-04-10 18:01:31 -03:00
+								            enc = enc[:-1] + eol
 								        encvec.append(enc)
 								    return EMPTYSTRING.join(encvec)
 								# For convenience and backwards compatibility w/ standard base64 module
 								body_encode = encode
 								encodestring = encode
 								def decode(s, convert_eols=None):
 								    """Decode a raw base64 string.
 								    If convert_eols is set to a string value, all canonical email linefeeds,
 								    e.g. "\\r\\n", in the decoded text will be converted to the value of
 								    convert_eols.  os.linesep is a good choice for convert_eols if you are
 								    decoding a text attachment.
 								    This function does not parse a full MIME header value encoded with
-												Correct misspellings of ISO-8859

											
										
										
											2016-08-10 02:25:16 -03:00
+								    base64 (like =?iso-8859-1?b?bmloISBuaWgh?=) -- please use the high
-												#2622 Import errors in email.message, from a py2app standalone application.

Patch by Mads Kiilerich, Reviewed by Barry Warsaw.

											
										
										
											2009-07-11 11:33:51 -03:00
+								    level email.header class for that functionality.
-												Sync'ing with standalone email package 2.0.1.  This adds support for
non-us-ascii character sets in headers and bodies.  Some API changes
(with DeprecationWarnings for the old APIs).  Better RFC-compliant
implementations of base64 and quoted-printable.

Updated test cases.  Documentation updates to follow (after I finish
writing them ;).

											
										
										
											2002-04-10 18:01:31 -03:00
+								    """
 								    if not s:
 								        return s
-												Whitespace normalization.

											
										
										
											2002-05-23 12:15:30 -03:00
-												Sync'ing with standalone email package 2.0.1.  This adds support for
non-us-ascii character sets in headers and bodies.  Some API changes
(with DeprecationWarnings for the old APIs).  Better RFC-compliant
implementations of base64 and quoted-printable.

Updated test cases.  Documentation updates to follow (after I finish
writing them ;).

											
										
										
											2002-04-10 18:01:31 -03:00
+								    dec = a2b_base64(s)
 								    if convert_eols:
 								        return dec.replace(CRLF, convert_eols)
 								    return dec
 								# For convenience and backwards compatibility w/ standard base64 module
 								body_decode = decode
 								decodestring = decode