cpython/Lib/email/Utils.py

# Copyright (C) 2001,2002 Python Software Foundation
# Author: barry@zope.com (Barry Warsaw)

"""Miscellaneous utilities.
"""

import time
import socket
import re
import random
import os
import warnings
from cStringIO import StringIO
from types import ListType

from email._parseaddr import quote
from email._parseaddr import AddressList as _AddressList
from email._parseaddr import mktime_tz

# We need wormarounds for bugs in these methods in older Pythons (see below)
from email._parseaddr import parsedate as _parsedate
from email._parseaddr import parsedate_tz as _parsedate_tz

try:
    True, False
except NameError:
    True = 1
    False = 0

try:
    from quopri import decodestring as _qdecode
except ImportError:
    # Python 2.1 doesn't have quopri.decodestring()
    def _qdecode(s):
        import quopri as _quopri

        if not s:
            return s
        infp = StringIO(s)
        outfp = StringIO()
        _quopri.decode(infp, outfp)
        value = outfp.getvalue()
        if not s.endswith('\n') and value.endswith('\n'):
            return value[:-1]
        return value

import base64

# Intrapackage imports
from email.Encoders import _bencode, _qencode

COMMASPACE = ', '
EMPTYSTRING = ''
UEMPTYSTRING = u''
CRLF = '\r\n'

specialsre = re.compile(r'[][\\()<>@,:;".]')
escapesre = re.compile(r'[][\\()"]')


# Helpers

def _identity(s):
    return s


def _bdecode(s):
    # We can't quite use base64.encodestring() since it tacks on a "courtesy
    # newline".  Blech!
    if not s:
        return s
    value = base64.decodestring(s)
    if not s.endswith('\n') and value.endswith('\n'):
        return value[:-1]
    return value


def fix_eols(s):
    """Replace all line-ending characters with \r\n."""
    # Fix newlines with no preceding carriage return
    s = re.sub(r'(?<!\r)\n', CRLF, s)
    # Fix carriage returns with no following newline
    s = re.sub(r'\r(?!\n)', CRLF, s)
    return s


def formataddr(pair):
    """The inverse of parseaddr(), this takes a 2-tuple of the form
    (realname, email_address) and returns the string value suitable
    for an RFC 2822 From, To or Cc header.

    If the first element of pair is false, then the second element is
    returned unmodified.
    """
    name, address = pair
    if name:
        quotes = ''
        if specialsre.search(name):
            quotes = '"'
        name = escapesre.sub(r'\\\g<0>', name)
        return '%s%s%s <%s>' % (quotes, name, quotes, address)
    return address

# For backwards compatibility
def dump_address_pair(pair):
    warnings.warn('Use email.Utils.formataddr() instead',
                  DeprecationWarning, 2)
    return formataddr(pair)


def getaddresses(fieldvalues):
    """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
    all = COMMASPACE.join(fieldvalues)
    a = _AddressList(all)
    return a.addresslist


ecre = re.compile(r'''
  =\?                   # literal =?
  (?P<charset>[^?]*?)   # non-greedy up to the next ? is the charset
  \?                    # literal ?
  (?P<encoding>[qb])    # either a "q" or a "b", case insensitive
  \?                    # literal ?
  (?P<atom>.*?)         # non-greedy up to the next ?= is the atom
  \?=                   # literal ?=
  ''', re.VERBOSE | re.IGNORECASE)


def decode(s):
    """Return a decoded string according to RFC 2047, as a unicode string.

    NOTE: This function is deprecated.  Use Header.decode_header() instead.
    """
    warnings.warn('Use Header.decode_header() instead.', DeprecationWarning, 2)
    # Intra-package import here to avoid circular import problems.
    from email.Header import decode_header
    L = decode_header(s)
    if not isinstance(L, ListType):
        # s wasn't decoded
        return s

    rtn = []
    for atom, charset in L:
        if charset is None:
            rtn.append(atom)
        else:
            # Convert the string to Unicode using the given encoding.  Leave
            # Unicode conversion errors to strict.
            rtn.append(unicode(atom, charset))
    # Now that we've decoded everything, we just need to join all the parts
    # together into the final string.
    return UEMPTYSTRING.join(rtn)


def encode(s, charset='iso-8859-1', encoding='q'):
    """Encode a string according to RFC 2047."""
    warnings.warn('Use Header.Header.encode() instead.', DeprecationWarning, 2)
    encoding = encoding.lower()
    if encoding == 'q':
        estr = _qencode(s)
    elif encoding == 'b':
        estr = _bencode(s)
    else:
        raise ValueError, 'Illegal encoding code: ' + encoding
    return '=?%s?%s?%s?=' % (charset.lower(), encoding, estr)


def formatdate(timeval=None, localtime=False):
    """Returns a date string as specified by RFC 2822, e.g.:

    Fri, 09 Nov 2001 01:08:47 -0000

    Optional timeval if given is a floating point time value as accepted by
    gmtime() and localtime(), otherwise the current time is used.

    Optional localtime is a flag that when True, interprets timeval, and
    returns a date relative to the local timezone instead of UTC, properly
    taking daylight savings time into account.
    """
    # Note: we cannot use strftime() because that honors the locale and RFC
    # 2822 requires that day and month names be the English abbreviations.
    if timeval is None:
        timeval = time.time()
    if localtime:
        now = time.localtime(timeval)
        # Calculate timezone offset, based on whether the local zone has
        # daylight savings time, and whether DST is in effect.
        if time.daylight and now[-1]:
            offset = time.altzone
        else:
            offset = time.timezone
        hours, minutes = divmod(abs(offset), 3600)
        # Remember offset is in seconds west of UTC, but the timezone is in
        # minutes east of UTC, so the signs differ.
        if offset > 0:
            sign = '-'
        else:
            sign = '+'
        zone = '%s%02d%02d' % (sign, hours, minutes / 60)
    else:
        now = time.gmtime(timeval)
        # Timezone offset is always -0000
        zone = '-0000'
    return '%s, %02d %s %04d %02d:%02d:%02d %s' % (
        ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][now[6]],
        now[2],
        ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
         'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][now[1] - 1],
        now[0], now[3], now[4], now[5],
        zone)


def make_msgid(idstring=None):
    """Returns a string suitable for RFC 2822 compliant Message-ID, e.g:

    <20020201195627.33539.96671@nightshade.la.mastaler.com>

    Optional idstring if given is a string used to strengthen the
    uniqueness of the message id.
    """
    timeval = time.time()
    utcdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(timeval))
    pid = os.getpid()
    randint = random.randrange(100000)
    if idstring is None:
        idstring = ''
    else:
        idstring = '.' + idstring
    idhost = socket.getfqdn()
    msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, idhost)
    return msgid


# These functions are in the standalone mimelib version only because they've
# subsequently been fixed in the latest Python versions.  We use this to worm
# around broken older Pythons.
def parsedate(data):
    if not data:
        return None
    return _parsedate(data)


def parsedate_tz(data):
    if not data:
        return None
    return _parsedate_tz(data)


def parseaddr(addr):
    addrs = _AddressList(addr).addresslist
    if not addrs:
        return '', ''
    return addrs[0]


# rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3.
def unquote(str):
    """Remove quotes from a string."""
    if len(str) > 1:
        if str.startswith('"') and str.endswith('"'):
            return str[1:-1].replace('\\\\', '\\').replace('\\"', '"')
        if str.startswith('<') and str.endswith('>'):
            return str[1:-1]
    return str


# RFC2231-related functions - parameter encoding and decoding
def decode_rfc2231(s):
    """Decode string according to RFC 2231"""
    import urllib
    parts = s.split("'", 2)
    if len(parts) == 1:
        return None, None, urllib.unquote(s)
    charset, language, s = parts
    return charset, language, urllib.unquote(s)


def encode_rfc2231(s, charset=None, language=None):
    """Encode string according to RFC 2231.

    If neither charset nor language is given, then s is returned as-is.  If
    charset is given but not language, the string is encoded using the empty
    string for language.
    """
    import urllib
    s = urllib.quote(s, safe='')
    if charset is None and language is None:
        return s
    if language is None:
        language = ''
    return "%s'%s'%s" % (charset, language, s)


rfc2231_continuation = re.compile(r'^(?P<name>\w+)\*((?P<num>[0-9]+)\*?)?$')

def decode_params(params):
    """Decode parameters list according to RFC 2231.

    params is a sequence of 2-tuples containing (content type, string value).
    """
    new_params = []
    # maps parameter's name to a list of continuations
    rfc2231_params = {}
    # params is a sequence of 2-tuples containing (content_type, string value)
    name, value = params[0]
    new_params.append((name, value))
    # Cycle through each of the rest of the parameters.
    for name, value in params[1:]:
        value = unquote(value)
        mo = rfc2231_continuation.match(name)
        if mo:
            name, num = mo.group('name', 'num')
            if num is not None:
                num = int(num)
            rfc2231_param1 = rfc2231_params.setdefault(name, [])
            rfc2231_param1.append((num, value))
        else:
            new_params.append((name, '"%s"' % quote(value)))
    if rfc2231_params:
        for name, continuations in rfc2231_params.items():
            value = []
            # Sort by number
            continuations.sort()
            # And now append all values in num order
            for num, continuation in continuations:
                value.append(continuation)
            charset, language, value = decode_rfc2231(EMPTYSTRING.join(value))
            new_params.append(
                (name, (charset, language, '"%s"' % quote(value))))
    return new_params
-												Sync'ing with standalone email package 2.0.1.  This adds support for
non-us-ascii character sets in headers and bodies.  Some API changes
(with DeprecationWarnings for the old APIs).  Better RFC-compliant
implementations of base64 and quoted-printable.

Updated test cases.  Documentation updates to follow (after I finish
writing them ;).

											
										
										
											2002-04-10 18:01:31 -03:00
+								# Copyright (C) 2001,2002 Python Software Foundation
-												The email package version 1.0, prototyped as mimelib
<http://sf.net/projects/mimelib>.  There /are/ API differences between
mimelib and email, but most of the implementations are shared (except
where cool Py2.2 stuff like generators are used).

											
										
										
											2001-09-23 00:17:28 -03:00
+								# Author: barry@zope.com (Barry Warsaw)
 								"""Miscellaneous utilities.
 								"""
-												Forgot to import time.

											
										
										
											2001-11-09 13:45:48 -04:00
+								import time
-												Sync'ing with standalone email package 2.0.1.  This adds support for
non-us-ascii character sets in headers and bodies.  Some API changes
(with DeprecationWarnings for the old APIs).  Better RFC-compliant
implementations of base64 and quoted-printable.

Updated test cases.  Documentation updates to follow (after I finish
writing them ;).

											
										
										
											2002-04-10 18:01:31 -03:00
+								import socket
-												The email package version 1.0, prototyped as mimelib
<http://sf.net/projects/mimelib>.  There /are/ API differences between
mimelib and email, but most of the implementations are shared (except
where cool Py2.2 stuff like generators are used).

											
										
										
											2001-09-23 00:17:28 -03:00
+								import re
-												Sync'ing with standalone email package 2.0.1.  This adds support for
non-us-ascii character sets in headers and bodies.  Some API changes
(with DeprecationWarnings for the old APIs).  Better RFC-compliant
implementations of base64 and quoted-printable.

Updated test cases.  Documentation updates to follow (after I finish
writing them ;).

											
										
										
											2002-04-10 18:01:31 -03:00
+								import random
 								import os
 								import warnings
 								from cStringIO import StringIO
 								from types import ListType
-												The email package version 1.0, prototyped as mimelib
<http://sf.net/projects/mimelib>.  There /are/ API differences between
mimelib and email, but most of the implementations are shared (except
where cool Py2.2 stuff like generators are used).

											
										
										
											2001-09-23 00:17:28 -03:00
-												Jason Mastaler's patch to break the dependence on rfc822.py for the
address parsing routines.  Closes SF patch #613434.

											
										
										
											2002-11-05 15:54:52 -04:00
+								from email._parseaddr import quote
 								from email._parseaddr import AddressList as _AddressList
 								from email._parseaddr import mktime_tz
-												Sync'ing with standalone email package 2.0.1.  This adds support for
non-us-ascii character sets in headers and bodies.  Some API changes
(with DeprecationWarnings for the old APIs).  Better RFC-compliant
implementations of base64 and quoted-printable.

Updated test cases.  Documentation updates to follow (after I finish
writing them ;).

											
										
										
											2002-04-10 18:01:31 -03:00
 								# We need wormarounds for bugs in these methods in older Pythons (see below)
-												Jason Mastaler's patch to break the dependence on rfc822.py for the
address parsing routines.  Closes SF patch #613434.

											
										
										
											2002-11-05 15:54:52 -04:00
+								from email._parseaddr import parsedate as _parsedate
 								from email._parseaddr import parsedate_tz as _parsedate_tz
-												The email package version 1.0, prototyped as mimelib
<http://sf.net/projects/mimelib>.  There /are/ API differences between
mimelib and email, but most of the implementations are shared (except
where cool Py2.2 stuff like generators are used).

											
										
										
											2001-09-23 00:17:28 -03:00
-												Use True/False everywhere, and other code cleanups.

											
										
										
											2002-09-28 17:49:57 -03:00
+								try:
 								    True, False
 								except NameError:
 								    True = 1
 								    False = 0
-												Complete a merge of the mimelib project and the Python cvs codebases
for the email package.  The former is now just a shell project that
has some extra files for packaging for independent use (e.g. setup.py
and README).

Added a compatibility layer so that the same API can be used in Python
2.1 and 2.2/2.3 with the major differences shuffled off into helper
modules (_compat21.py and _compat22.py).

Also bumped the package version number to 2.0.3 for some fixes to be
checked in momentarily.

											
										
										
											2002-05-19 20:44:19 -03:00
+								try:
 								    from quopri import decodestring as _qdecode
 								except ImportError:
 								    # Python 2.1 doesn't have quopri.decodestring()
 								    def _qdecode(s):
 								        import quopri as _quopri
 								        if not s:
 								            return s
 								        infp = StringIO(s)
 								        outfp = StringIO()
 								        _quopri.decode(infp, outfp)
 								        value = outfp.getvalue()
-												Use True/False everywhere, and other code cleanups.

											
										
										
											2002-09-28 17:49:57 -03:00
+								        if not s.endswith('\n') and value.endswith('\n'):
-												Complete a merge of the mimelib project and the Python cvs codebases
for the email package.  The former is now just a shell project that
has some extra files for packaging for independent use (e.g. setup.py
and README).

Added a compatibility layer so that the same API can be used in Python
2.1 and 2.2/2.3 with the major differences shuffled off into helper
modules (_compat21.py and _compat22.py).

Also bumped the package version number to 2.0.3 for some fixes to be
checked in momentarily.

											
										
										
											2002-05-19 20:44:19 -03:00
+								            return value[:-1]
 								        return value
-												The email package version 1.0, prototyped as mimelib
<http://sf.net/projects/mimelib>.  There /are/ API differences between
mimelib and email, but most of the implementations are shared (except
where cool Py2.2 stuff like generators are used).

											
										
										
											2001-09-23 00:17:28 -03:00
+								import base64
 								# Intrapackage imports
-												Use absolute import paths for intrapackage imports.

											
										
										
											2002-06-02 16:07:16 -03:00
+								from email.Encoders import _bencode, _qencode
-												The email package version 1.0, prototyped as mimelib
<http://sf.net/projects/mimelib>.  There /are/ API differences between
mimelib and email, but most of the implementations are shared (except
where cool Py2.2 stuff like generators are used).

											
										
										
											2001-09-23 00:17:28 -03:00
 								COMMASPACE = ', '
-												Oleg Broytmann's support for RFC 2231 encoded parameters, SF patch #549133

Specifically,

decode_rfc2231(), encode_rfc2231(): Functions to encode and decode RFC
2231 style parameters.

decode_params(): Function to decode a list of parameters.

											
										
										
											2002-06-29 02:58:04 -03:00
+								EMPTYSTRING = ''
-												The email package version 1.0, prototyped as mimelib
<http://sf.net/projects/mimelib>.  There /are/ API differences between
mimelib and email, but most of the implementations are shared (except
where cool Py2.2 stuff like generators are used).

											
										
										
											2001-09-23 00:17:28 -03:00
+								UEMPTYSTRING = u''
-												Sync'ing with standalone email package 2.0.1.  This adds support for
non-us-ascii character sets in headers and bodies.  Some API changes
(with DeprecationWarnings for the old APIs).  Better RFC-compliant
implementations of base64 and quoted-printable.

Updated test cases.  Documentation updates to follow (after I finish
writing them ;).

											
										
										
											2002-04-10 18:01:31 -03:00
+								CRLF = '\r\n'
-												specialsre, escapesre: In SF bug #663369, Matthew Woodcraft points out
that backslashes must be escaped in character sets.

											
										
										
											2003-03-10 15:20:18 -04:00
+								specialsre = re.compile(r'[][\\()<>@,:;".]')
 								escapesre = re.compile(r'[][\\()"]')
-												The email package version 1.0, prototyped as mimelib
<http://sf.net/projects/mimelib>.  There /are/ API differences between
mimelib and email, but most of the implementations are shared (except
where cool Py2.2 stuff like generators are used).

											
										
										
											2001-09-23 00:17:28 -03:00
-												Give me back my page breaks.

											
										
										
											2001-10-04 14:05:11 -03:00
-												The email package version 1.0, prototyped as mimelib
<http://sf.net/projects/mimelib>.  There /are/ API differences between
mimelib and email, but most of the implementations are shared (except
where cool Py2.2 stuff like generators are used).

											
										
										
											2001-09-23 00:17:28 -03:00
+								# Helpers
 								def _identity(s):
 								    return s
 								def _bdecode(s):
 								    # We can't quite use base64.encodestring() since it tacks on a "courtesy
 								    # newline".  Blech!
 								    if not s:
 								        return s
 								    value = base64.decodestring(s)
-												Use True/False everywhere, and other code cleanups.

											
										
										
											2002-09-28 17:49:57 -03:00
+								    if not s.endswith('\n') and value.endswith('\n'):
-												The email package version 1.0, prototyped as mimelib
<http://sf.net/projects/mimelib>.  There /are/ API differences between
mimelib and email, but most of the implementations are shared (except
where cool Py2.2 stuff like generators are used).

											
										
										
											2001-09-23 00:17:28 -03:00
+								        return value[:-1]
 								    return value
-												Sync'ing with standalone email package 2.0.1.  This adds support for
non-us-ascii character sets in headers and bodies.  Some API changes
(with DeprecationWarnings for the old APIs).  Better RFC-compliant
implementations of base64 and quoted-printable.

Updated test cases.  Documentation updates to follow (after I finish
writing them ;).

											
										
										
											2002-04-10 18:01:31 -03:00
 								def fix_eols(s):
 								    """Replace all line-ending characters with \r\n."""
 								    # Fix newlines with no preceding carriage return
 								    s = re.sub(r'(?<!\r)\n', CRLF, s)
 								    # Fix carriage returns with no following newline
 								    s = re.sub(r'\r(?!\n)', CRLF, s)
 								    return s
 								def formataddr(pair):
 								    """The inverse of parseaddr(), this takes a 2-tuple of the form
 								    (realname, email_address) and returns the string value suitable
-												Use True/False everywhere, and other code cleanups.

											
										
										
											2002-09-28 17:49:57 -03:00
+								    for an RFC 2822 From, To or Cc header.
-												Whitespace normalization.

											
										
										
											2002-05-23 12:15:30 -03:00
-												Sync'ing with standalone email package 2.0.1.  This adds support for
non-us-ascii character sets in headers and bodies.  Some API changes
(with DeprecationWarnings for the old APIs).  Better RFC-compliant
implementations of base64 and quoted-printable.

Updated test cases.  Documentation updates to follow (after I finish
writing them ;).

											
										
										
											2002-04-10 18:01:31 -03:00
+								    If the first element of pair is false, then the second element is
 								    returned unmodified.
 								    """
 								    name, address = pair
 								    if name:
 								        quotes = ''
 								        if specialsre.search(name):
 								            quotes = '"'
 								        name = escapesre.sub(r'\\\g<0>', name)
 								        return '%s%s%s <%s>' % (quotes, name, quotes, address)
 								    return address
 								# For backwards compatibility
 								def dump_address_pair(pair):
 								    warnings.warn('Use email.Utils.formataddr() instead',
 								                  DeprecationWarning, 2)
 								    return formataddr(pair)
-												Give me back my page breaks.

											
										
										
											2001-10-04 14:05:11 -03:00
-												The email package version 1.0, prototyped as mimelib
<http://sf.net/projects/mimelib>.  There /are/ API differences between
mimelib and email, but most of the implementations are shared (except
where cool Py2.2 stuff like generators are used).

											
										
										
											2001-09-23 00:17:28 -03:00
+								def getaddresses(fieldvalues):
 								    """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
 								    all = COMMASPACE.join(fieldvalues)
-												AddrlistClass -> AddressList

											
										
										
											2002-04-12 17:50:05 -03:00
+								    a = _AddressList(all)
-												getaddresses(): Like the change in rfc822.py, this one needs to access
the AddressList.addresslist attribute directly.

Also, add a test case for the email.Utils.getaddresses() interface.

											
										
										
											2002-05-21 22:52:10 -03:00
+								    return a.addresslist
-												The email package version 1.0, prototyped as mimelib
<http://sf.net/projects/mimelib>.  There /are/ API differences between
mimelib and email, but most of the implementations are shared (except
where cool Py2.2 stuff like generators are used).

											
										
										
											2001-09-23 00:17:28 -03:00
-												Give me back my page breaks.

											
										
										
											2001-10-04 14:05:11 -03:00
-												The email package version 1.0, prototyped as mimelib
<http://sf.net/projects/mimelib>.  There /are/ API differences between
mimelib and email, but most of the implementations are shared (except
where cool Py2.2 stuff like generators are used).

											
										
										
											2001-09-23 00:17:28 -03:00
+								ecre = re.compile(r'''
 								  =\?                   # literal =?
 								  (?P<charset>[^?]*?)   # non-greedy up to the next ? is the charset
 								  \?                    # literal ?
 								  (?P<encoding>[qb])    # either a "q" or a "b", case insensitive
 								  \?                    # literal ?
 								  (?P<atom>.*?)         # non-greedy up to the next ?= is the atom
 								  \?=                   # literal ?=
 								  ''', re.VERBOSE | re.IGNORECASE)
 								def decode(s):
-												Sync'ing with standalone email package 2.0.1.  This adds support for
non-us-ascii character sets in headers and bodies.  Some API changes
(with DeprecationWarnings for the old APIs).  Better RFC-compliant
implementations of base64 and quoted-printable.

Updated test cases.  Documentation updates to follow (after I finish
writing them ;).

											
										
										
											2002-04-10 18:01:31 -03:00
+								    """Return a decoded string according to RFC 2047, as a unicode string.
 								    NOTE: This function is deprecated.  Use Header.decode_header() instead.
 								    """
 								    warnings.warn('Use Header.decode_header() instead.', DeprecationWarning, 2)
 								    # Intra-package import here to avoid circular import problems.
-												Use absolute import paths for intrapackage imports.

											
										
										
											2002-06-02 16:07:16 -03:00
+								    from email.Header import decode_header
-												Sync'ing with standalone email package 2.0.1.  This adds support for
non-us-ascii character sets in headers and bodies.  Some API changes
(with DeprecationWarnings for the old APIs).  Better RFC-compliant
implementations of base64 and quoted-printable.

Updated test cases.  Documentation updates to follow (after I finish
writing them ;).

											
										
										
											2002-04-10 18:01:31 -03:00
+								    L = decode_header(s)
 								    if not isinstance(L, ListType):
 								        # s wasn't decoded
 								        return s
-												The email package version 1.0, prototyped as mimelib
<http://sf.net/projects/mimelib>.  There /are/ API differences between
mimelib and email, but most of the implementations are shared (except
where cool Py2.2 stuff like generators are used).

											
										
										
											2001-09-23 00:17:28 -03:00
+								    rtn = []
-												Sync'ing with standalone email package 2.0.1.  This adds support for
non-us-ascii character sets in headers and bodies.  Some API changes
(with DeprecationWarnings for the old APIs).  Better RFC-compliant
implementations of base64 and quoted-printable.

Updated test cases.  Documentation updates to follow (after I finish
writing them ;).

											
										
										
											2002-04-10 18:01:31 -03:00
+								    for atom, charset in L:
 								        if charset is None:
 								            rtn.append(atom)
-												The email package version 1.0, prototyped as mimelib
<http://sf.net/projects/mimelib>.  There /are/ API differences between
mimelib and email, but most of the implementations are shared (except
where cool Py2.2 stuff like generators are used).

											
										
										
											2001-09-23 00:17:28 -03:00
+								        else:
-												Sync'ing with standalone email package 2.0.1.  This adds support for
non-us-ascii character sets in headers and bodies.  Some API changes
(with DeprecationWarnings for the old APIs).  Better RFC-compliant
implementations of base64 and quoted-printable.

Updated test cases.  Documentation updates to follow (after I finish
writing them ;).

											
										
										
											2002-04-10 18:01:31 -03:00
+								            # Convert the string to Unicode using the given encoding.  Leave
 								            # Unicode conversion errors to strict.
 								            rtn.append(unicode(atom, charset))
-												The email package version 1.0, prototyped as mimelib
<http://sf.net/projects/mimelib>.  There /are/ API differences between
mimelib and email, but most of the implementations are shared (except
where cool Py2.2 stuff like generators are used).

											
										
										
											2001-09-23 00:17:28 -03:00
+								    # Now that we've decoded everything, we just need to join all the parts
 								    # together into the final string.
 								    return UEMPTYSTRING.join(rtn)
-												Give me back my page breaks.

											
										
										
											2001-10-04 14:05:11 -03:00
-												The email package version 1.0, prototyped as mimelib
<http://sf.net/projects/mimelib>.  There /are/ API differences between
mimelib and email, but most of the implementations are shared (except
where cool Py2.2 stuff like generators are used).

											
										
										
											2001-09-23 00:17:28 -03:00
+								def encode(s, charset='iso-8859-1', encoding='q'):
 								    """Encode a string according to RFC 2047."""
-												Sync'ing with standalone email package 2.0.1.  This adds support for
non-us-ascii character sets in headers and bodies.  Some API changes
(with DeprecationWarnings for the old APIs).  Better RFC-compliant
implementations of base64 and quoted-printable.

Updated test cases.  Documentation updates to follow (after I finish
writing them ;).

											
										
										
											2002-04-10 18:01:31 -03:00
+								    warnings.warn('Use Header.Header.encode() instead.', DeprecationWarning, 2)
-												decode(), encode(): Accepting the minor optimizations from SF patch
#486375, but not the rest of it, since that changes the documented
semantics of encode().

											
										
										
											2001-12-03 15:26:40 -04:00
+								    encoding = encoding.lower()
 								    if encoding == 'q':
-												The email package version 1.0, prototyped as mimelib
<http://sf.net/projects/mimelib>.  There /are/ API differences between
mimelib and email, but most of the implementations are shared (except
where cool Py2.2 stuff like generators are used).

											
										
										
											2001-09-23 00:17:28 -03:00
+								        estr = _qencode(s)
-												decode(), encode(): Accepting the minor optimizations from SF patch
#486375, but not the rest of it, since that changes the documented
semantics of encode().

											
										
										
											2001-12-03 15:26:40 -04:00
+								    elif encoding == 'b':
-												The email package version 1.0, prototyped as mimelib
<http://sf.net/projects/mimelib>.  There /are/ API differences between
mimelib and email, but most of the implementations are shared (except
where cool Py2.2 stuff like generators are used).

											
										
										
											2001-09-23 00:17:28 -03:00
+								        estr = _bencode(s)
 								    else:
 								        raise ValueError, 'Illegal encoding code: ' + encoding
-												decode(), encode(): Accepting the minor optimizations from SF patch
#486375, but not the rest of it, since that changes the documented
semantics of encode().

											
										
										
											2001-12-03 15:26:40 -04:00
+								    return '=?%s?%s?%s?=' % (charset.lower(), encoding, estr)
-												formatdate(): An implementation to replace the one borrowed from
rfc822.py.  The old rfc822.formatdate() produced date strings using
obsolete syntax.  The new version produces the preferred RFC 2822
dates.

Also, an optional argument `localtime' is added, which if true,
produces a date relative to the local timezone, with daylight savings
time properly taken into account.

											
										
										
											2001-11-09 12:59:56 -04:00
-												Use True/False everywhere, and other code cleanups.

											
										
										
											2002-09-28 17:49:57 -03:00
+								def formatdate(timeval=None, localtime=False):
-												formatdate(): A better docstring.

											
										
										
											2001-11-09 13:07:28 -04:00
+								    """Returns a date string as specified by RFC 2822, e.g.:
-												formatdate(): An implementation to replace the one borrowed from
rfc822.py.  The old rfc822.formatdate() produced date strings using
obsolete syntax.  The new version produces the preferred RFC 2822
dates.

Also, an optional argument `localtime' is added, which if true,
produces a date relative to the local timezone, with daylight savings
time properly taken into account.

											
										
										
											2001-11-09 12:59:56 -04:00
 								    Fri, 09 Nov 2001 01:08:47 -0000
-												formatdate(): A better docstring.

											
										
										
											2001-11-09 13:07:28 -04:00
+								    Optional timeval if given is a floating point time value as accepted by
 								    gmtime() and localtime(), otherwise the current time is used.
-												Use True/False everywhere, and other code cleanups.

											
										
										
											2002-09-28 17:49:57 -03:00
+								    Optional localtime is a flag that when True, interprets timeval, and
-												formatdate(): A better docstring.

											
										
										
											2001-11-09 13:07:28 -04:00
+								    returns a date relative to the local timezone instead of UTC, properly
 								    taking daylight savings time into account.
-												formatdate(): An implementation to replace the one borrowed from
rfc822.py.  The old rfc822.formatdate() produced date strings using
obsolete syntax.  The new version produces the preferred RFC 2822
dates.

Also, an optional argument `localtime' is added, which if true,
produces a date relative to the local timezone, with daylight savings
time properly taken into account.

											
										
										
											2001-11-09 12:59:56 -04:00
+								    """
 								    # Note: we cannot use strftime() because that honors the locale and RFC
 								    # 2822 requires that day and month names be the English abbreviations.
 								    if timeval is None:
 								        timeval = time.time()
 								    if localtime:
 								        now = time.localtime(timeval)
 								        # Calculate timezone offset, based on whether the local zone has
 								        # daylight savings time, and whether DST is in effect.
 								        if time.daylight and now[-1]:
 								            offset = time.altzone
 								        else:
 								            offset = time.timezone
-												formatdate(): Jason Mastaler correctly points out that divmod with a
negative modulus won't return the right values.  So always do positive
modulus on an absolute value and twiddle the sign as appropriate after
the fact.

											
										
										
											2001-11-19 14:36:43 -04:00
+								        hours, minutes = divmod(abs(offset), 3600)
 								        # Remember offset is in seconds west of UTC, but the timezone is in
 								        # minutes east of UTC, so the signs differ.
 								        if offset > 0:
 								            sign = '-'
 								        else:
 								            sign = '+'
 								        zone = '%s%02d%02d' % (sign, hours, minutes / 60)
-												formatdate(): An implementation to replace the one borrowed from
rfc822.py.  The old rfc822.formatdate() produced date strings using
obsolete syntax.  The new version produces the preferred RFC 2822
dates.

Also, an optional argument `localtime' is added, which if true,
produces a date relative to the local timezone, with daylight savings
time properly taken into account.

											
										
										
											2001-11-09 12:59:56 -04:00
+								    else:
 								        now = time.gmtime(timeval)
 								        # Timezone offset is always -0000
 								        zone = '-0000'
 								    return '%s, %02d %s %04d %02d:%02d:%02d %s' % (
 								        ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][now[6]],
 								        now[2],
 								        ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
 								         'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][now[1] - 1],
 								        now[0], now[3], now[4], now[5],
 								        zone)
-												Sync'ing with standalone email package 2.0.1.  This adds support for
non-us-ascii character sets in headers and bodies.  Some API changes
(with DeprecationWarnings for the old APIs).  Better RFC-compliant
implementations of base64 and quoted-printable.

Updated test cases.  Documentation updates to follow (after I finish
writing them ;).

											
										
										
											2002-04-10 18:01:31 -03:00
 								def make_msgid(idstring=None):
-												Docstring consistency with the updated .tex files.

											
										
										
											2002-09-30 21:44:13 -03:00
+								    """Returns a string suitable for RFC 2822 compliant Message-ID, e.g:
-												Sync'ing with standalone email package 2.0.1.  This adds support for
non-us-ascii character sets in headers and bodies.  Some API changes
(with DeprecationWarnings for the old APIs).  Better RFC-compliant
implementations of base64 and quoted-printable.

Updated test cases.  Documentation updates to follow (after I finish
writing them ;).

											
										
										
											2002-04-10 18:01:31 -03:00
 								    <20020201195627.33539.96671@nightshade.la.mastaler.com>
 								    Optional idstring if given is a string used to strengthen the
-												Docstring consistency with the updated .tex files.

											
										
										
											2002-09-30 21:44:13 -03:00
+								    uniqueness of the message id.
-												Sync'ing with standalone email package 2.0.1.  This adds support for
non-us-ascii character sets in headers and bodies.  Some API changes
(with DeprecationWarnings for the old APIs).  Better RFC-compliant
implementations of base64 and quoted-printable.

Updated test cases.  Documentation updates to follow (after I finish
writing them ;).

											
										
										
											2002-04-10 18:01:31 -03:00
+								    """
 								    timeval = time.time()
 								    utcdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(timeval))
 								    pid = os.getpid()
 								    randint = random.randrange(100000)
 								    if idstring is None:
 								        idstring = ''
 								    else:
 								        idstring = '.' + idstring
 								    idhost = socket.getfqdn()
 								    msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, idhost)
 								    return msgid
 								# These functions are in the standalone mimelib version only because they've
 								# subsequently been fixed in the latest Python versions.  We use this to worm
 								# around broken older Pythons.
 								def parsedate(data):
 								    if not data:
 								        return None
 								    return _parsedate(data)
 								def parsedate_tz(data):
 								    if not data:
 								        return None
 								    return _parsedate_tz(data)
 								def parseaddr(addr):
-												parseaddr(): Don't use rfc822.parseaddr() because this now implies a
double call to AddressList.getaddrlist(), and /that/ always returns an
empty list for the second and subsequent calls.

Instead, instantiate an AddressList directly, and get the parsed
addresses out of the addresslist attribute.

											
										
										
											2002-04-15 19:00:25 -03:00
+								    addrs = _AddressList(addr).addresslist
 								    if not addrs:
-												Sync'ing with standalone email package 2.0.1.  This adds support for
non-us-ascii character sets in headers and bodies.  Some API changes
(with DeprecationWarnings for the old APIs).  Better RFC-compliant
implementations of base64 and quoted-printable.

Updated test cases.  Documentation updates to follow (after I finish
writing them ;).

											
										
										
											2002-04-10 18:01:31 -03:00
+								        return '', ''
-												parseaddr(): Don't use rfc822.parseaddr() because this now implies a
double call to AddressList.getaddrlist(), and /that/ always returns an
empty list for the second and subsequent calls.

Instead, instantiate an AddressList directly, and get the parsed
addresses out of the addresslist attribute.

											
										
										
											2002-04-15 19:00:25 -03:00
+								    return addrs[0]
-												Oleg Broytmann's support for RFC 2231 encoded parameters, SF patch #549133

Specifically,

decode_rfc2231(), encode_rfc2231(): Functions to encode and decode RFC
2231 style parameters.

decode_params(): Function to decode a list of parameters.

											
										
										
											2002-06-29 02:58:04 -03:00
-												rfc822.unquote() doesn't properly de-backslash-ify in Python prior to
2.3.  This patch (adapted from Quinn Dunkan's SF patch #573204) fixes
the problem and should get ported to rfc822.py.

											
										
										
											2002-09-10 23:22:48 -03:00
+								# rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3.
 								def unquote(str):
 								    """Remove quotes from a string."""
 								    if len(str) > 1:
 								        if str.startswith('"') and str.endswith('"'):
 								            return str[1:-1].replace('\\\\', '\\').replace('\\"', '"')
 								        if str.startswith('<') and str.endswith('>'):
 								            return str[1:-1]
 								    return str
-												Oleg Broytmann's support for RFC 2231 encoded parameters, SF patch #549133

Specifically,

decode_rfc2231(), encode_rfc2231(): Functions to encode and decode RFC
2231 style parameters.

decode_params(): Function to decode a list of parameters.

											
										
										
											2002-06-29 02:58:04 -03:00
 								# RFC2231-related functions - parameter encoding and decoding
 								def decode_rfc2231(s):
 								    """Decode string according to RFC 2231"""
 								    import urllib
-												decode_rfc2231(): RFC 2231 allows leaving out both the charset and
language without including any single quotes.

											
										
										
											2003-03-07 18:46:41 -04:00
+								    parts = s.split("'", 2)
 								    if len(parts) == 1:
-												decode_rfc2231(): We need to urllib.unquote() the value even if the
charset and language fields are not present, e.g. as in:

    title*0="This%20is%20encoded"

											
										
										
											2003-08-19 00:49:34 -03:00
+								        return None, None, urllib.unquote(s)
-												decode_rfc2231(): RFC 2231 allows leaving out both the charset and
language without including any single quotes.

											
										
										
											2003-03-07 18:46:41 -04:00
+								    charset, language, s = parts
 								    return charset, language, urllib.unquote(s)
-												Oleg Broytmann's support for RFC 2231 encoded parameters, SF patch #549133

Specifically,

decode_rfc2231(), encode_rfc2231(): Functions to encode and decode RFC
2231 style parameters.

decode_params(): Function to decode a list of parameters.

											
										
										
											2002-06-29 02:58:04 -03:00
 								def encode_rfc2231(s, charset=None, language=None):
-												Docstring consistency with the updated .tex files.

											
										
										
											2002-09-30 21:44:13 -03:00
+								    """Encode string according to RFC 2231.
 								    If neither charset nor language is given, then s is returned as-is.  If
 								    charset is given but not language, the string is encoded using the empty
 								    string for language.
 								    """
-												Oleg Broytmann's support for RFC 2231 encoded parameters, SF patch #549133

Specifically,

decode_rfc2231(), encode_rfc2231(): Functions to encode and decode RFC
2231 style parameters.

decode_params(): Function to decode a list of parameters.

											
										
										
											2002-06-29 02:58:04 -03:00
+								    import urllib
 								    s = urllib.quote(s, safe='')
 								    if charset is None and language is None:
 								        return s
-												Docstring consistency with the updated .tex files.

											
										
										
											2002-09-30 21:44:13 -03:00
+								    if language is None:
 								        language = ''
 								    return "%s'%s'%s" % (charset, language, s)
-												Oleg Broytmann's support for RFC 2231 encoded parameters, SF patch #549133

Specifically,

decode_rfc2231(), encode_rfc2231(): Functions to encode and decode RFC
2231 style parameters.

decode_params(): Function to decode a list of parameters.

											
										
										
											2002-06-29 02:58:04 -03:00
 								rfc2231_continuation = re.compile(r'^(?P<name>\w+)\*((?P<num>[0-9]+)\*?)?$')
 								def decode_params(params):
-												Docstring consistency with the updated .tex files.

											
										
										
											2002-09-30 21:44:13 -03:00
+								    """Decode parameters list according to RFC 2231.
 								    params is a sequence of 2-tuples containing (content type, string value).
 								    """
-												Oleg Broytmann's support for RFC 2231 encoded parameters, SF patch #549133

Specifically,

decode_rfc2231(), encode_rfc2231(): Functions to encode and decode RFC
2231 style parameters.

decode_params(): Function to decode a list of parameters.

											
										
										
											2002-06-29 02:58:04 -03:00
+								    new_params = []
 								    # maps parameter's name to a list of continuations
 								    rfc2231_params = {}
 								    # params is a sequence of 2-tuples containing (content_type, string value)
 								    name, value = params[0]
 								    new_params.append((name, value))
 								    # Cycle through each of the rest of the parameters.
 								    for name, value in params[1:]:
 								        value = unquote(value)
 								        mo = rfc2231_continuation.match(name)
 								        if mo:
 								            name, num = mo.group('name', 'num')
 								            if num is not None:
 								                num = int(num)
 								            rfc2231_param1 = rfc2231_params.setdefault(name, [])
 								            rfc2231_param1.append((num, value))
 								        else:
 								            new_params.append((name, '"%s"' % quote(value)))
 								    if rfc2231_params:
 								        for name, continuations in rfc2231_params.items():
 								            value = []
 								            # Sort by number
 								            continuations.sort()
 								            # And now append all values in num order
 								            for num, continuation in continuations:
 								                value.append(continuation)
 								            charset, language, value = decode_rfc2231(EMPTYSTRING.join(value))
-												decode_rfc2231(): RFC 2231 allows leaving out both the charset and
language without including any single quotes.

											
										
										
											2003-03-07 18:46:41 -04:00
+								            new_params.append(
 								                (name, (charset, language, '"%s"' % quote(value))))
-												Oleg Broytmann's support for RFC 2231 encoded parameters, SF patch #549133

Specifically,

decode_rfc2231(), encode_rfc2231(): Functions to encode and decode RFC
2231 style parameters.

decode_params(): Function to decode a list of parameters.

											
										
										
											2002-06-29 02:58:04 -03:00
+								    return new_params