cpython/Lib/email/Utils.py

# Copyright (C) 2001 Python Software Foundation
# Author: barry@zope.com (Barry Warsaw)

"""Miscellaneous utilities.
"""

import re

from rfc822 import unquote, quote, parseaddr
from rfc822 import dump_address_pair
from rfc822 import AddrlistClass as _AddrlistClass
from rfc822 import parsedate_tz, parsedate, mktime_tz

from quopri import decodestring as _qdecode
import base64

# Intrapackage imports
from Encoders import _bencode, _qencode

COMMASPACE = ', '
UEMPTYSTRING = u''


# Helpers

def _identity(s):
    return s


def _bdecode(s):
    if not s:
        return s
    # We can't quite use base64.encodestring() since it tacks on a "courtesy
    # newline".  Blech!
    if not s:
        return s
    hasnewline = (s[-1] == '\n')
    value = base64.decodestring(s)
    if not hasnewline and value[-1] == '\n':
        return value[:-1]
    return value


def getaddresses(fieldvalues):
    """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
    all = COMMASPACE.join(fieldvalues)
    a = _AddrlistClass(all)
    return a.getaddrlist()


ecre = re.compile(r'''
  =\?                   # literal =?
  (?P<charset>[^?]*?)   # non-greedy up to the next ? is the charset
  \?                    # literal ?
  (?P<encoding>[qb])    # either a "q" or a "b", case insensitive
  \?                    # literal ?
  (?P<atom>.*?)         # non-greedy up to the next ?= is the atom
  \?=                   # literal ?=
  ''', re.VERBOSE | re.IGNORECASE)


def decode(s):
    """Return a decoded string according to RFC 2047, as a unicode string."""
    rtn = []
    parts = ecre.split(s, 1)
    while parts:
        # If there are less than 4 parts, it can't be encoded and we're done
        if len(parts) < 5:
            rtn.extend(parts)
            break
        # The first element is any non-encoded leading text
        rtn.append(parts[0])
        charset = parts[1]
        encoding = parts[2]
        atom = parts[3]
        # The next chunk to decode should be in parts[4]
        parts = ecre.split(parts[4])
        # The encoding must be either `q' or `b', case-insensitive
        if encoding.lower() == 'q':
            func = _qdecode
        elif encoding.lower() == 'b':
            func = _bdecode
        else:
            func = _identity
        # Decode and get the unicode in the charset
        rtn.append(unicode(func(atom), charset))
    # Now that we've decoded everything, we just need to join all the parts
    # together into the final string.
    return UEMPTYSTRING.join(rtn)


def encode(s, charset='iso-8859-1', encoding='q'):
    """Encode a string according to RFC 2047."""
    if encoding.lower() == 'q':
        estr = _qencode(s)
    elif encoding.lower() == 'b':
        estr = _bencode(s)
    else:
        raise ValueError, 'Illegal encoding code: ' + encoding
    return '=?%s?%s?%s?=' % (charset.lower(), encoding.lower(), estr)


def formatdate(timeval=None, localtime=0):
    """Returns a date string as specified by RFC 2822, e.g.:

    Fri, 09 Nov 2001 01:08:47 -0000

    Optional timeval if given is a floating point time value as accepted by
    gmtime() and localtime(), otherwise the current time is used.

    Optional localtime is a flag that when true, interprets timeval, and
    returns a date relative to the local timezone instead of UTC, properly
    taking daylight savings time into account.
    """
    # Note: we cannot use strftime() because that honors the locale and RFC
    # 2822 requires that day and month names be the English abbreviations.
    if timeval is None:
        timeval = time.time()
    if localtime:
        now = time.localtime(timeval)
        # Calculate timezone offset, based on whether the local zone has
        # daylight savings time, and whether DST is in effect.
        if time.daylight and now[-1]:
            offset = time.altzone
        else:
            offset = time.timezone
        zone = '%+03d%02d' % (offset / -3600, offset % 60)
    else:
        now = time.gmtime(timeval)
        # Timezone offset is always -0000
        zone = '-0000'
    return '%s, %02d %s %04d %02d:%02d:%02d %s' % (
        ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][now[6]],
        now[2],
        ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
         'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][now[1] - 1],
        now[0], now[3], now[4], now[5],
        zone)
-												The email package version 1.0, prototyped as mimelib
<http://sf.net/projects/mimelib>.  There /are/ API differences between
mimelib and email, but most of the implementations are shared (except
where cool Py2.2 stuff like generators are used).

											
										
										
											2001-09-23 00:17:28 -03:00
+								# Copyright (C) 2001 Python Software Foundation
 								# Author: barry@zope.com (Barry Warsaw)
 								"""Miscellaneous utilities.
 								"""
 								import re
 								from rfc822 import unquote, quote, parseaddr
 								from rfc822 import dump_address_pair
 								from rfc822 import AddrlistClass as _AddrlistClass
-												formatdate(): An implementation to replace the one borrowed from
rfc822.py.  The old rfc822.formatdate() produced date strings using
obsolete syntax.  The new version produces the preferred RFC 2822
dates.

Also, an optional argument `localtime' is added, which if true,
produces a date relative to the local timezone, with daylight savings
time properly taken into account.

											
										
										
											2001-11-09 12:59:56 -04:00
+								from rfc822 import parsedate_tz, parsedate, mktime_tz
-												The email package version 1.0, prototyped as mimelib
<http://sf.net/projects/mimelib>.  There /are/ API differences between
mimelib and email, but most of the implementations are shared (except
where cool Py2.2 stuff like generators are used).

											
										
										
											2001-09-23 00:17:28 -03:00
 								from quopri import decodestring as _qdecode
 								import base64
 								# Intrapackage imports
 								from Encoders import _bencode, _qencode
 								COMMASPACE = ', '
 								UEMPTYSTRING = u''
-												Give me back my page breaks.

											
										
										
											2001-10-04 14:05:11 -03:00
-												The email package version 1.0, prototyped as mimelib
<http://sf.net/projects/mimelib>.  There /are/ API differences between
mimelib and email, but most of the implementations are shared (except
where cool Py2.2 stuff like generators are used).

											
										
										
											2001-09-23 00:17:28 -03:00
+								# Helpers
 								def _identity(s):
 								    return s
 								def _bdecode(s):
 								    if not s:
 								        return s
 								    # We can't quite use base64.encodestring() since it tacks on a "courtesy
 								    # newline".  Blech!
 								    if not s:
 								        return s
 								    hasnewline = (s[-1] == '\n')
 								    value = base64.decodestring(s)
 								    if not hasnewline and value[-1] == '\n':
 								        return value[:-1]
 								    return value
-												Give me back my page breaks.

											
										
										
											2001-10-04 14:05:11 -03:00
-												The email package version 1.0, prototyped as mimelib
<http://sf.net/projects/mimelib>.  There /are/ API differences between
mimelib and email, but most of the implementations are shared (except
where cool Py2.2 stuff like generators are used).

											
										
										
											2001-09-23 00:17:28 -03:00
+								def getaddresses(fieldvalues):
 								    """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
 								    all = COMMASPACE.join(fieldvalues)
 								    a = _AddrlistClass(all)
 								    return a.getaddrlist()
-												Give me back my page breaks.

											
										
										
											2001-10-04 14:05:11 -03:00
-												The email package version 1.0, prototyped as mimelib
<http://sf.net/projects/mimelib>.  There /are/ API differences between
mimelib and email, but most of the implementations are shared (except
where cool Py2.2 stuff like generators are used).

											
										
										
											2001-09-23 00:17:28 -03:00
+								ecre = re.compile(r'''
 								  =\?                   # literal =?
 								  (?P<charset>[^?]*?)   # non-greedy up to the next ? is the charset
 								  \?                    # literal ?
 								  (?P<encoding>[qb])    # either a "q" or a "b", case insensitive
 								  \?                    # literal ?
 								  (?P<atom>.*?)         # non-greedy up to the next ?= is the atom
 								  \?=                   # literal ?=
 								  ''', re.VERBOSE | re.IGNORECASE)
 								def decode(s):
 								    """Return a decoded string according to RFC 2047, as a unicode string."""
 								    rtn = []
 								    parts = ecre.split(s, 1)
 								    while parts:
 								        # If there are less than 4 parts, it can't be encoded and we're done
 								        if len(parts) < 5:
 								            rtn.extend(parts)
 								            break
 								        # The first element is any non-encoded leading text
 								        rtn.append(parts[0])
 								        charset = parts[1]
 								        encoding = parts[2]
 								        atom = parts[3]
 								        # The next chunk to decode should be in parts[4]
 								        parts = ecre.split(parts[4])
 								        # The encoding must be either `q' or `b', case-insensitive
 								        if encoding.lower() == 'q':
 								            func = _qdecode
 								        elif encoding.lower() == 'b':
 								            func = _bdecode
 								        else:
 								            func = _identity
 								        # Decode and get the unicode in the charset
 								        rtn.append(unicode(func(atom), charset))
 								    # Now that we've decoded everything, we just need to join all the parts
 								    # together into the final string.
 								    return UEMPTYSTRING.join(rtn)
-												Give me back my page breaks.

											
										
										
											2001-10-04 14:05:11 -03:00
-												The email package version 1.0, prototyped as mimelib
<http://sf.net/projects/mimelib>.  There /are/ API differences between
mimelib and email, but most of the implementations are shared (except
where cool Py2.2 stuff like generators are used).

											
										
										
											2001-09-23 00:17:28 -03:00
+								def encode(s, charset='iso-8859-1', encoding='q'):
 								    """Encode a string according to RFC 2047."""
 								    if encoding.lower() == 'q':
 								        estr = _qencode(s)
 								    elif encoding.lower() == 'b':
 								        estr = _bencode(s)
 								    else:
 								        raise ValueError, 'Illegal encoding code: ' + encoding
 								    return '=?%s?%s?%s?=' % (charset.lower(), encoding.lower(), estr)
-												formatdate(): An implementation to replace the one borrowed from
rfc822.py.  The old rfc822.formatdate() produced date strings using
obsolete syntax.  The new version produces the preferred RFC 2822
dates.

Also, an optional argument `localtime' is added, which if true,
produces a date relative to the local timezone, with daylight savings
time properly taken into account.

											
										
										
											2001-11-09 12:59:56 -04:00
 								def formatdate(timeval=None, localtime=0):
-												formatdate(): A better docstring.

											
										
										
											2001-11-09 13:07:28 -04:00
+								    """Returns a date string as specified by RFC 2822, e.g.:
-												formatdate(): An implementation to replace the one borrowed from
rfc822.py.  The old rfc822.formatdate() produced date strings using
obsolete syntax.  The new version produces the preferred RFC 2822
dates.

Also, an optional argument `localtime' is added, which if true,
produces a date relative to the local timezone, with daylight savings
time properly taken into account.

											
										
										
											2001-11-09 12:59:56 -04:00
 								    Fri, 09 Nov 2001 01:08:47 -0000
-												formatdate(): A better docstring.

											
										
										
											2001-11-09 13:07:28 -04:00
+								    Optional timeval if given is a floating point time value as accepted by
 								    gmtime() and localtime(), otherwise the current time is used.
 								    Optional localtime is a flag that when true, interprets timeval, and
 								    returns a date relative to the local timezone instead of UTC, properly
 								    taking daylight savings time into account.
-												formatdate(): An implementation to replace the one borrowed from
rfc822.py.  The old rfc822.formatdate() produced date strings using
obsolete syntax.  The new version produces the preferred RFC 2822
dates.

Also, an optional argument `localtime' is added, which if true,
produces a date relative to the local timezone, with daylight savings
time properly taken into account.

											
										
										
											2001-11-09 12:59:56 -04:00
+								    """
 								    # Note: we cannot use strftime() because that honors the locale and RFC
 								    # 2822 requires that day and month names be the English abbreviations.
 								    if timeval is None:
 								        timeval = time.time()
 								    if localtime:
 								        now = time.localtime(timeval)
 								        # Calculate timezone offset, based on whether the local zone has
 								        # daylight savings time, and whether DST is in effect.
 								        if time.daylight and now[-1]:
 								            offset = time.altzone
 								        else:
 								            offset = time.timezone
 								        zone = '%+03d%02d' % (offset / -3600, offset % 60)
 								    else:
 								        now = time.gmtime(timeval)
 								        # Timezone offset is always -0000
 								        zone = '-0000'
 								    return '%s, %02d %s %04d %02d:%02d:%02d %s' % (
 								        ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][now[6]],
 								        now[2],
 								        ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
 								         'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][now[1] - 1],
 								        now[0], now[3], now[4], now[5],
 								        zone)