cpython/Lib/email/utils.py

# Copyright (C) 2001-2006 Python Software Foundation
# Author: Barry Warsaw
# Contact: email-sig@python.org

"""Miscellaneous utilities."""

__all__ = [
    'collapse_rfc2231_value',
    'decode_params',
    'decode_rfc2231',
    'encode_rfc2231',
    'formataddr',
    'formatdate',
    'getaddresses',
    'make_msgid',
    'parseaddr',
    'parsedate',
    'parsedate_tz',
    'unquote',
    ]

import os
import re
import time
import base64
import random
import socket
import urllib
import warnings
from cStringIO import StringIO

from email._parseaddr import quote
from email._parseaddr import AddressList as _AddressList
from email._parseaddr import mktime_tz

# We need wormarounds for bugs in these methods in older Pythons (see below)
from email._parseaddr import parsedate as _parsedate
from email._parseaddr import parsedate_tz as _parsedate_tz

from quopri import decodestring as _qdecode

# Intrapackage imports
from email.encoders import _bencode, _qencode

COMMASPACE = ', '
EMPTYSTRING = ''
UEMPTYSTRING = u''
CRLF = '\r\n'
TICK = "'"

specialsre = re.compile(r'[][\\()<>@,:;".]')
escapesre = re.compile(r'[][\\()"]')


# Helpers

def _identity(s):
    return s


def _bdecode(s):
    # We can't quite use base64.encodestring() since it tacks on a "courtesy
    # newline".  Blech!
    if not s:
        return s
    value = base64.decodestring(s)
    if not s.endswith('\n') and value.endswith('\n'):
        return value[:-1]
    return value


def fix_eols(s):
    """Replace all line-ending characters with \r\n."""
    # Fix newlines with no preceding carriage return
    s = re.sub(r'(?<!\r)\n', CRLF, s)
    # Fix carriage returns with no following newline
    s = re.sub(r'\r(?!\n)', CRLF, s)
    return s


def formataddr(pair):
    """The inverse of parseaddr(), this takes a 2-tuple of the form
    (realname, email_address) and returns the string value suitable
    for an RFC 2822 From, To or Cc header.

    If the first element of pair is false, then the second element is
    returned unmodified.
    """
    name, address = pair
    if name:
        quotes = ''
        if specialsre.search(name):
            quotes = '"'
        name = escapesre.sub(r'\\\g<0>', name)
        return '%s%s%s <%s>' % (quotes, name, quotes, address)
    return address


def getaddresses(fieldvalues):
    """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
    all = COMMASPACE.join(fieldvalues)
    a = _AddressList(all)
    return a.addresslist


ecre = re.compile(r'''
  =\?                   # literal =?
  (?P<charset>[^?]*?)   # non-greedy up to the next ? is the charset
  \?                    # literal ?
  (?P<encoding>[qb])    # either a "q" or a "b", case insensitive
  \?                    # literal ?
  (?P<atom>.*?)         # non-greedy up to the next ?= is the atom
  \?=                   # literal ?=
  ''', re.VERBOSE | re.IGNORECASE)


def formatdate(timeval=None, localtime=False, usegmt=False):
    """Returns a date string as specified by RFC 2822, e.g.:

    Fri, 09 Nov 2001 01:08:47 -0000

    Optional timeval if given is a floating point time value as accepted by
    gmtime() and localtime(), otherwise the current time is used.

    Optional localtime is a flag that when True, interprets timeval, and
    returns a date relative to the local timezone instead of UTC, properly
    taking daylight savings time into account.

    Optional argument usegmt means that the timezone is written out as
    an ascii string, not numeric one (so "GMT" instead of "+0000"). This
    is needed for HTTP, and is only used when localtime==False.
    """
    # Note: we cannot use strftime() because that honors the locale and RFC
    # 2822 requires that day and month names be the English abbreviations.
    if timeval is None:
        timeval = time.time()
    if localtime:
        now = time.localtime(timeval)
        # Calculate timezone offset, based on whether the local zone has
        # daylight savings time, and whether DST is in effect.
        if time.daylight and now[-1]:
            offset = time.altzone
        else:
            offset = time.timezone
        hours, minutes = divmod(abs(offset), 3600)
        # Remember offset is in seconds west of UTC, but the timezone is in
        # minutes east of UTC, so the signs differ.
        if offset > 0:
            sign = '-'
        else:
            sign = '+'
        zone = '%s%02d%02d' % (sign, hours, minutes // 60)
    else:
        now = time.gmtime(timeval)
        # Timezone offset is always -0000
        if usegmt:
            zone = 'GMT'
        else:
            zone = '-0000'
    return '%s, %02d %s %04d %02d:%02d:%02d %s' % (
        ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][now[6]],
        now[2],
        ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
         'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][now[1] - 1],
        now[0], now[3], now[4], now[5],
        zone)


def make_msgid(idstring=None):
    """Returns a string suitable for RFC 2822 compliant Message-ID, e.g:

    <20020201195627.33539.96671@nightshade.la.mastaler.com>

    Optional idstring if given is a string used to strengthen the
    uniqueness of the message id.
    """
    timeval = time.time()
    utcdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(timeval))
    pid = os.getpid()
    randint = random.randrange(100000)
    if idstring is None:
        idstring = ''
    else:
        idstring = '.' + idstring
    idhost = socket.getfqdn()
    msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, idhost)
    return msgid


# These functions are in the standalone mimelib version only because they've
# subsequently been fixed in the latest Python versions.  We use this to worm
# around broken older Pythons.
def parsedate(data):
    if not data:
        return None
    return _parsedate(data)


def parsedate_tz(data):
    if not data:
        return None
    return _parsedate_tz(data)


def parseaddr(addr):
    addrs = _AddressList(addr).addresslist
    if not addrs:
        return '', ''
    return addrs[0]


# rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3.
def unquote(str):
    """Remove quotes from a string."""
    if len(str) > 1:
        if str.startswith('"') and str.endswith('"'):
            return str[1:-1].replace('\\\\', '\\').replace('\\"', '"')
        if str.startswith('<') and str.endswith('>'):
            return str[1:-1]
    return str


# RFC2231-related functions - parameter encoding and decoding
def decode_rfc2231(s):
    """Decode string according to RFC 2231"""
    parts = s.split(TICK, 2)
    if len(parts) <= 2:
        return None, None, s
    if len(parts) > 3:
        charset, language = parts[:2]
        s = TICK.join(parts[2:])
        return charset, language, s
    return parts


def encode_rfc2231(s, charset=None, language=None):
    """Encode string according to RFC 2231.

    If neither charset nor language is given, then s is returned as-is.  If
    charset is given but not language, the string is encoded using the empty
    string for language.
    """
    import urllib
    s = urllib.quote(s, safe='')
    if charset is None and language is None:
        return s
    if language is None:
        language = ''
    return "%s'%s'%s" % (charset, language, s)


rfc2231_continuation = re.compile(r'^(?P<name>\w+)\*((?P<num>[0-9]+)\*?)?$')

def decode_params(params):
    """Decode parameters list according to RFC 2231.

    params is a sequence of 2-tuples containing (param name, string value).
    """
    # Copy params so we don't mess with the original
    params = params[:]
    new_params = []
    # Map parameter's name to a list of continuations.  The values are a
    # 3-tuple of the continuation number, the string value, and a flag
    # specifying whether a particular segment is %-encoded.
    rfc2231_params = {}
    name, value = params.pop(0)
    new_params.append((name, value))
    while params:
        name, value = params.pop(0)
        if name.endswith('*'):
            encoded = True
        else:
            encoded = False
        value = unquote(value)
        mo = rfc2231_continuation.match(name)
        if mo:
            name, num = mo.group('name', 'num')
            if num is not None:
                num = int(num)
            rfc2231_params.setdefault(name, []).append((num, value, encoded))
        else:
            new_params.append((name, '"%s"' % quote(value)))
    if rfc2231_params:
        for name, continuations in rfc2231_params.items():
            value = []
            extended = False
            # Sort by number
            continuations.sort()
            # And now append all values in numerical order, converting
            # %-encodings for the encoded segments.  If any of the
            # continuation names ends in a *, then the entire string, after
            # decoding segments and concatenating, must have the charset and
            # language specifiers at the beginning of the string.
            for num, s, encoded in continuations:
                if encoded:
                    s = urllib.unquote(s)
                    extended = True
                value.append(s)
            value = quote(EMPTYSTRING.join(value))
            if extended:
                charset, language, value = decode_rfc2231(value)
                new_params.append((name, (charset, language, '"%s"' % value)))
            else:
                new_params.append((name, '"%s"' % value))
    return new_params

def collapse_rfc2231_value(value, errors='replace',
                           fallback_charset='us-ascii'):
    if isinstance(value, tuple):
        rawval = unquote(value[2])
        charset = value[0] or 'us-ascii'
        try:
            return unicode(rawval, charset, errors)
        except LookupError:
            # XXX charset is unknown to Python.
            return unicode(rawval, fallback_charset, errors)
    else:
        return unquote(value)
-												Merge email package 4.0 from the sandbox, including documentation, test cases,
and NEWS updates.

											
										
										
											2006-03-18 11:41:53 -04:00
+								# Copyright (C) 2001-2006 Python Software Foundation
-												Big email 3.0 API changes, with updated unit tests and documentation.
Briefly (from the NEWS file):

- Updates for the email package:
  + All deprecated APIs that in email 2.x issued warnings have been removed:
    _encoder argument to the MIMEText constructor, Message.add_payload(),
    Utils.dump_address_pair(), Utils.decode(), Utils.encode()
  + New deprecations: Generator.__call__(), Message.get_type(),
    Message.get_main_type(), Message.get_subtype(), the 'strict' argument to
    the Parser constructor.  These will be removed in email 3.1.
  + Support for Python earlier than 2.3 has been removed (see PEP 291).
  + All defect classes have been renamed to end in 'Defect'.
  + Some FeedParser fixes; also a MultipartInvariantViolationDefect will be
    added to messages that claim to be multipart but really aren't.
  + Updates to documentation.

											
										
										
											2004-10-03 00:16:19 -03:00
+								# Author: Barry Warsaw
 								# Contact: email-sig@python.org
-												The email package version 1.0, prototyped as mimelib
<http://sf.net/projects/mimelib>.  There /are/ API differences between
mimelib and email, but most of the implementations are shared (except
where cool Py2.2 stuff like generators are used).

											
										
										
											2001-09-23 00:17:28 -03:00
-												Update to Python 2.3, getting rid of backward compatiblity crud.

											
										
										
											2004-05-09 00:55:11 -03:00
+								"""Miscellaneous utilities."""
-												The email package version 1.0, prototyped as mimelib
<http://sf.net/projects/mimelib>.  There /are/ API differences between
mimelib and email, but most of the implementations are shared (except
where cool Py2.2 stuff like generators are used).

											
										
										
											2001-09-23 00:17:28 -03:00
-												Merge email package 4.0 from the sandbox, including documentation, test cases,
and NEWS updates.

											
										
										
											2006-03-18 11:41:53 -04:00
+								__all__ = [
 								    'collapse_rfc2231_value',
 								    'decode_params',
 								    'decode_rfc2231',
 								    'encode_rfc2231',
 								    'formataddr',
 								    'formatdate',
 								    'getaddresses',
 								    'make_msgid',
 								    'parseaddr',
 								    'parsedate',
 								    'parsedate_tz',
 								    'unquote',
 								    ]
-												Update to Python 2.3, getting rid of backward compatiblity crud.

											
										
										
											2004-05-09 00:55:11 -03:00
+								import os
-												The email package version 1.0, prototyped as mimelib
<http://sf.net/projects/mimelib>.  There /are/ API differences between
mimelib and email, but most of the implementations are shared (except
where cool Py2.2 stuff like generators are used).

											
										
										
											2001-09-23 00:17:28 -03:00
+								import re
-												Update to Python 2.3, getting rid of backward compatiblity crud.

											
										
										
											2004-05-09 00:55:11 -03:00
+								import time
 								import base64
-												Sync'ing with standalone email package 2.0.1.  This adds support for
non-us-ascii character sets in headers and bodies.  Some API changes
(with DeprecationWarnings for the old APIs).  Better RFC-compliant
implementations of base64 and quoted-printable.

Updated test cases.  Documentation updates to follow (after I finish
writing them ;).

											
										
										
											2002-04-10 18:01:31 -03:00
+								import random
-												Update to Python 2.3, getting rid of backward compatiblity crud.

											
										
										
											2004-05-09 00:55:11 -03:00
+								import socket
-												More RFC 2231 improvements for the email 4.0 package.  As Mark Sapiro rightly
points out there are really two types of continued headers defined in this
RFC (i.e. "encoded" parameters with the form "name*0*=" and unencoded
parameters with the form "name*0="), but we were were handling them both the
same way and that isn't correct.

This patch should be much more RFC compliant in that only encoded params are
%-decoded and the charset/language information is only extract if there are
any encoded params in the segments.  If there are no encoded params then the
RFC says that there will be no charset/language parts.

Note however that this will change the return value for Message.get_param() in
some cases.  For example, whereas before if you had all unencoded param
continuations you would have still gotten a 3-tuple back from this method
(with charset and language == None), you will now get just a string.  I don't
believe this is a backward incompatible change though because the
documentation for this method already indicates that either return value is
possible and that you must do an isinstance(val, tuple) check to discriminate
between the two.  (Yeah that API kind of sucks but we can't change /that/
without breaking code.)

Test cases, some documentation updates, and a NEWS item accompany this patch.

											
										
										
											2006-07-21 11:51:07 -03:00
+								import urllib
-												Sync'ing with standalone email package 2.0.1.  This adds support for
non-us-ascii character sets in headers and bodies.  Some API changes
(with DeprecationWarnings for the old APIs).  Better RFC-compliant
implementations of base64 and quoted-printable.

Updated test cases.  Documentation updates to follow (after I finish
writing them ;).

											
										
										
											2002-04-10 18:01:31 -03:00
+								import warnings
 								from cStringIO import StringIO
-												The email package version 1.0, prototyped as mimelib
<http://sf.net/projects/mimelib>.  There /are/ API differences between
mimelib and email, but most of the implementations are shared (except
where cool Py2.2 stuff like generators are used).

											
										
										
											2001-09-23 00:17:28 -03:00
-												Jason Mastaler's patch to break the dependence on rfc822.py for the
address parsing routines.  Closes SF patch #613434.

											
										
										
											2002-11-05 15:54:52 -04:00
+								from email._parseaddr import quote
 								from email._parseaddr import AddressList as _AddressList
 								from email._parseaddr import mktime_tz
-												Sync'ing with standalone email package 2.0.1.  This adds support for
non-us-ascii character sets in headers and bodies.  Some API changes
(with DeprecationWarnings for the old APIs).  Better RFC-compliant
implementations of base64 and quoted-printable.

Updated test cases.  Documentation updates to follow (after I finish
writing them ;).

											
										
										
											2002-04-10 18:01:31 -03:00
 								# We need wormarounds for bugs in these methods in older Pythons (see below)
-												Jason Mastaler's patch to break the dependence on rfc822.py for the
address parsing routines.  Closes SF patch #613434.

											
										
										
											2002-11-05 15:54:52 -04:00
+								from email._parseaddr import parsedate as _parsedate
 								from email._parseaddr import parsedate_tz as _parsedate_tz
-												The email package version 1.0, prototyped as mimelib
<http://sf.net/projects/mimelib>.  There /are/ API differences between
mimelib and email, but most of the implementations are shared (except
where cool Py2.2 stuff like generators are used).

											
										
										
											2001-09-23 00:17:28 -03:00
-												Update to Python 2.3, getting rid of backward compatiblity crud.

											
										
										
											2004-05-09 00:55:11 -03:00
+								from quopri import decodestring as _qdecode
-												The email package version 1.0, prototyped as mimelib
<http://sf.net/projects/mimelib>.  There /are/ API differences between
mimelib and email, but most of the implementations are shared (except
where cool Py2.2 stuff like generators are used).

											
										
										
											2001-09-23 00:17:28 -03:00
 								# Intrapackage imports
-												Merge email package 4.0 from the sandbox, including documentation, test cases,
and NEWS updates.

											
										
										
											2006-03-18 11:41:53 -04:00
+								from email.encoders import _bencode, _qencode
-												The email package version 1.0, prototyped as mimelib
<http://sf.net/projects/mimelib>.  There /are/ API differences between
mimelib and email, but most of the implementations are shared (except
where cool Py2.2 stuff like generators are used).

											
										
										
											2001-09-23 00:17:28 -03:00
 								COMMASPACE = ', '
-												Oleg Broytmann's support for RFC 2231 encoded parameters, SF patch #549133

Specifically,

decode_rfc2231(), encode_rfc2231(): Functions to encode and decode RFC
2231 style parameters.

decode_params(): Function to decode a list of parameters.

											
										
										
											2002-06-29 02:58:04 -03:00
+								EMPTYSTRING = ''
-												The email package version 1.0, prototyped as mimelib
<http://sf.net/projects/mimelib>.  There /are/ API differences between
mimelib and email, but most of the implementations are shared (except
where cool Py2.2 stuff like generators are used).

											
										
										
											2001-09-23 00:17:28 -03:00
+								UEMPTYSTRING = u''
-												Sync'ing with standalone email package 2.0.1.  This adds support for
non-us-ascii character sets in headers and bodies.  Some API changes
(with DeprecationWarnings for the old APIs).  Better RFC-compliant
implementations of base64 and quoted-printable.

Updated test cases.  Documentation updates to follow (after I finish
writing them ;).

											
										
										
											2002-04-10 18:01:31 -03:00
+								CRLF = '\r\n'
-												decode_rfc2231(): Be more robust against buggy RFC 2231 encodings.
Specifically, instead of raising a ValueError when there is a single tick in
the parameter, simply return that the entire string unquoted, with None for
both the charset and the language.  Also, if there are more than 2 ticks in
the parameter, interpret the first three parts as the standard RFC 2231 parts,
then the rest of the parts as the encoded string.

Test cases added.

Original fewer-than-3-parts fix by Tokio Kikuchi.

Resolves SF bug # 1218081.  I will back port the fix and tests to Python 2.4
(email 3.0) and Python 2.3 (email 2.5).

Also, bump the version number to email 4.0.1, removing the 'alpha' moniker.

											
										
										
											2006-07-17 20:07:51 -03:00
+								TICK = "'"
-												Sync'ing with standalone email package 2.0.1.  This adds support for
non-us-ascii character sets in headers and bodies.  Some API changes
(with DeprecationWarnings for the old APIs).  Better RFC-compliant
implementations of base64 and quoted-printable.

Updated test cases.  Documentation updates to follow (after I finish
writing them ;).

											
										
										
											2002-04-10 18:01:31 -03:00
-												specialsre, escapesre: In SF bug #663369, Matthew Woodcraft points out
that backslashes must be escaped in character sets.

											
										
										
											2003-03-10 15:20:18 -04:00
+								specialsre = re.compile(r'[][\\()<>@,:;".]')
 								escapesre = re.compile(r'[][\\()"]')
-												The email package version 1.0, prototyped as mimelib
<http://sf.net/projects/mimelib>.  There /are/ API differences between
mimelib and email, but most of the implementations are shared (except
where cool Py2.2 stuff like generators are used).

											
										
										
											2001-09-23 00:17:28 -03:00
-												Give me back my page breaks.

											
										
										
											2001-10-04 14:05:11 -03:00
-												The email package version 1.0, prototyped as mimelib
<http://sf.net/projects/mimelib>.  There /are/ API differences between
mimelib and email, but most of the implementations are shared (except
where cool Py2.2 stuff like generators are used).

											
										
										
											2001-09-23 00:17:28 -03:00
+								# Helpers
 								def _identity(s):
 								    return s
 								def _bdecode(s):
 								    # We can't quite use base64.encodestring() since it tacks on a "courtesy
 								    # newline".  Blech!
 								    if not s:
 								        return s
 								    value = base64.decodestring(s)
-												Use True/False everywhere, and other code cleanups.

											
										
										
											2002-09-28 17:49:57 -03:00
+								    if not s.endswith('\n') and value.endswith('\n'):
-												The email package version 1.0, prototyped as mimelib
<http://sf.net/projects/mimelib>.  There /are/ API differences between
mimelib and email, but most of the implementations are shared (except
where cool Py2.2 stuff like generators are used).

											
										
										
											2001-09-23 00:17:28 -03:00
+								        return value[:-1]
 								    return value
-												Sync'ing with standalone email package 2.0.1.  This adds support for
non-us-ascii character sets in headers and bodies.  Some API changes
(with DeprecationWarnings for the old APIs).  Better RFC-compliant
implementations of base64 and quoted-printable.

Updated test cases.  Documentation updates to follow (after I finish
writing them ;).

											
										
										
											2002-04-10 18:01:31 -03:00
 								def fix_eols(s):
 								    """Replace all line-ending characters with \r\n."""
 								    # Fix newlines with no preceding carriage return
 								    s = re.sub(r'(?<!\r)\n', CRLF, s)
 								    # Fix carriage returns with no following newline
 								    s = re.sub(r'\r(?!\n)', CRLF, s)
 								    return s
 								def formataddr(pair):
 								    """The inverse of parseaddr(), this takes a 2-tuple of the form
 								    (realname, email_address) and returns the string value suitable
-												Use True/False everywhere, and other code cleanups.

											
										
										
											2002-09-28 17:49:57 -03:00
+								    for an RFC 2822 From, To or Cc header.
-												Whitespace normalization.

											
										
										
											2002-05-23 12:15:30 -03:00
-												Sync'ing with standalone email package 2.0.1.  This adds support for
non-us-ascii character sets in headers and bodies.  Some API changes
(with DeprecationWarnings for the old APIs).  Better RFC-compliant
implementations of base64 and quoted-printable.

Updated test cases.  Documentation updates to follow (after I finish
writing them ;).

											
										
										
											2002-04-10 18:01:31 -03:00
+								    If the first element of pair is false, then the second element is
 								    returned unmodified.
 								    """
 								    name, address = pair
 								    if name:
 								        quotes = ''
 								        if specialsre.search(name):
 								            quotes = '"'
 								        name = escapesre.sub(r'\\\g<0>', name)
 								        return '%s%s%s <%s>' % (quotes, name, quotes, address)
 								    return address
-												Give me back my page breaks.

											
										
										
											2001-10-04 14:05:11 -03:00
-												The email package version 1.0, prototyped as mimelib
<http://sf.net/projects/mimelib>.  There /are/ API differences between
mimelib and email, but most of the implementations are shared (except
where cool Py2.2 stuff like generators are used).

											
										
										
											2001-09-23 00:17:28 -03:00
+								def getaddresses(fieldvalues):
 								    """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
 								    all = COMMASPACE.join(fieldvalues)
-												AddrlistClass -> AddressList

											
										
										
											2002-04-12 17:50:05 -03:00
+								    a = _AddressList(all)
-												getaddresses(): Like the change in rfc822.py, this one needs to access
the AddressList.addresslist attribute directly.

Also, add a test case for the email.Utils.getaddresses() interface.

											
										
										
											2002-05-21 22:52:10 -03:00
+								    return a.addresslist
-												The email package version 1.0, prototyped as mimelib
<http://sf.net/projects/mimelib>.  There /are/ API differences between
mimelib and email, but most of the implementations are shared (except
where cool Py2.2 stuff like generators are used).

											
										
										
											2001-09-23 00:17:28 -03:00
-												Give me back my page breaks.

											
										
										
											2001-10-04 14:05:11 -03:00
-												The email package version 1.0, prototyped as mimelib
<http://sf.net/projects/mimelib>.  There /are/ API differences between
mimelib and email, but most of the implementations are shared (except
where cool Py2.2 stuff like generators are used).

											
										
										
											2001-09-23 00:17:28 -03:00
+								ecre = re.compile(r'''
 								  =\?                   # literal =?
 								  (?P<charset>[^?]*?)   # non-greedy up to the next ? is the charset
 								  \?                    # literal ?
 								  (?P<encoding>[qb])    # either a "q" or a "b", case insensitive
 								  \?                    # literal ?
 								  (?P<atom>.*?)         # non-greedy up to the next ?= is the atom
 								  \?=                   # literal ?=
 								  ''', re.VERBOSE | re.IGNORECASE)
-												formatdate(): An implementation to replace the one borrowed from
rfc822.py.  The old rfc822.formatdate() produced date strings using
obsolete syntax.  The new version produces the preferred RFC 2822
dates.

Also, an optional argument `localtime' is added, which if true,
produces a date relative to the local timezone, with daylight savings
time properly taken into account.

											
										
										
											2001-11-09 12:59:56 -04:00
-												Added a usegmt flag to email.Utils.formatdate - this allows it to be
used to replace rfc822.formatdate for protocols like HTTP (where 'GMT' must
be the timezone string).

											
										
										
											2004-10-11 10:53:08 -03:00
+								def formatdate(timeval=None, localtime=False, usegmt=False):
-												formatdate(): A better docstring.

											
										
										
											2001-11-09 13:07:28 -04:00
+								    """Returns a date string as specified by RFC 2822, e.g.:
-												formatdate(): An implementation to replace the one borrowed from
rfc822.py.  The old rfc822.formatdate() produced date strings using
obsolete syntax.  The new version produces the preferred RFC 2822
dates.

Also, an optional argument `localtime' is added, which if true,
produces a date relative to the local timezone, with daylight savings
time properly taken into account.

											
										
										
											2001-11-09 12:59:56 -04:00
 								    Fri, 09 Nov 2001 01:08:47 -0000
-												formatdate(): A better docstring.

											
										
										
											2001-11-09 13:07:28 -04:00
+								    Optional timeval if given is a floating point time value as accepted by
 								    gmtime() and localtime(), otherwise the current time is used.
-												Use True/False everywhere, and other code cleanups.

											
										
										
											2002-09-28 17:49:57 -03:00
+								    Optional localtime is a flag that when True, interprets timeval, and
-												formatdate(): A better docstring.

											
										
										
											2001-11-09 13:07:28 -04:00
+								    returns a date relative to the local timezone instead of UTC, properly
 								    taking daylight savings time into account.
-												Added a usegmt flag to email.Utils.formatdate - this allows it to be
used to replace rfc822.formatdate for protocols like HTTP (where 'GMT' must
be the timezone string).

											
										
										
											2004-10-11 10:53:08 -03:00
-												Whitespace normalization.

											
										
										
											2004-10-12 18:51:32 -03:00
+								    Optional argument usegmt means that the timezone is written out as
-												Added a usegmt flag to email.Utils.formatdate - this allows it to be
used to replace rfc822.formatdate for protocols like HTTP (where 'GMT' must
be the timezone string).

											
										
										
											2004-10-11 10:53:08 -03:00
+								    an ascii string, not numeric one (so "GMT" instead of "+0000"). This
 								    is needed for HTTP, and is only used when localtime==False.
-												formatdate(): An implementation to replace the one borrowed from
rfc822.py.  The old rfc822.formatdate() produced date strings using
obsolete syntax.  The new version produces the preferred RFC 2822
dates.

Also, an optional argument `localtime' is added, which if true,
produces a date relative to the local timezone, with daylight savings
time properly taken into account.

											
										
										
											2001-11-09 12:59:56 -04:00
+								    """
 								    # Note: we cannot use strftime() because that honors the locale and RFC
 								    # 2822 requires that day and month names be the English abbreviations.
 								    if timeval is None:
 								        timeval = time.time()
 								    if localtime:
 								        now = time.localtime(timeval)
 								        # Calculate timezone offset, based on whether the local zone has
 								        # daylight savings time, and whether DST is in effect.
 								        if time.daylight and now[-1]:
 								            offset = time.altzone
 								        else:
 								            offset = time.timezone
-												formatdate(): Jason Mastaler correctly points out that divmod with a
negative modulus won't return the right values.  So always do positive
modulus on an absolute value and twiddle the sign as appropriate after
the fact.

											
										
										
											2001-11-19 14:36:43 -04:00
+								        hours, minutes = divmod(abs(offset), 3600)
 								        # Remember offset is in seconds west of UTC, but the timezone is in
 								        # minutes east of UTC, so the signs differ.
 								        if offset > 0:
 								            sign = '-'
 								        else:
 								            sign = '+'
-												Big email 3.0 API changes, with updated unit tests and documentation.
Briefly (from the NEWS file):

- Updates for the email package:
  + All deprecated APIs that in email 2.x issued warnings have been removed:
    _encoder argument to the MIMEText constructor, Message.add_payload(),
    Utils.dump_address_pair(), Utils.decode(), Utils.encode()
  + New deprecations: Generator.__call__(), Message.get_type(),
    Message.get_main_type(), Message.get_subtype(), the 'strict' argument to
    the Parser constructor.  These will be removed in email 3.1.
  + Support for Python earlier than 2.3 has been removed (see PEP 291).
  + All defect classes have been renamed to end in 'Defect'.
  + Some FeedParser fixes; also a MultipartInvariantViolationDefect will be
    added to messages that claim to be multipart but really aren't.
  + Updates to documentation.

											
										
										
											2004-10-03 00:16:19 -03:00
+								        zone = '%s%02d%02d' % (sign, hours, minutes // 60)
-												formatdate(): An implementation to replace the one borrowed from
rfc822.py.  The old rfc822.formatdate() produced date strings using
obsolete syntax.  The new version produces the preferred RFC 2822
dates.

Also, an optional argument `localtime' is added, which if true,
produces a date relative to the local timezone, with daylight savings
time properly taken into account.

											
										
										
											2001-11-09 12:59:56 -04:00
+								    else:
 								        now = time.gmtime(timeval)
 								        # Timezone offset is always -0000
-												Added a usegmt flag to email.Utils.formatdate - this allows it to be
used to replace rfc822.formatdate for protocols like HTTP (where 'GMT' must
be the timezone string).

											
										
										
											2004-10-11 10:53:08 -03:00
+								        if usegmt:
 								            zone = 'GMT'
 								        else:
 								            zone = '-0000'
-												formatdate(): An implementation to replace the one borrowed from
rfc822.py.  The old rfc822.formatdate() produced date strings using
obsolete syntax.  The new version produces the preferred RFC 2822
dates.

Also, an optional argument `localtime' is added, which if true,
produces a date relative to the local timezone, with daylight savings
time properly taken into account.

											
										
										
											2001-11-09 12:59:56 -04:00
+								    return '%s, %02d %s %04d %02d:%02d:%02d %s' % (
 								        ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][now[6]],
 								        now[2],
 								        ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
 								         'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][now[1] - 1],
 								        now[0], now[3], now[4], now[5],
 								        zone)
-												Sync'ing with standalone email package 2.0.1.  This adds support for
non-us-ascii character sets in headers and bodies.  Some API changes
(with DeprecationWarnings for the old APIs).  Better RFC-compliant
implementations of base64 and quoted-printable.

Updated test cases.  Documentation updates to follow (after I finish
writing them ;).

											
										
										
											2002-04-10 18:01:31 -03:00
 								def make_msgid(idstring=None):
-												Docstring consistency with the updated .tex files.

											
										
										
											2002-09-30 21:44:13 -03:00
+								    """Returns a string suitable for RFC 2822 compliant Message-ID, e.g:
-												Sync'ing with standalone email package 2.0.1.  This adds support for
non-us-ascii character sets in headers and bodies.  Some API changes
(with DeprecationWarnings for the old APIs).  Better RFC-compliant
implementations of base64 and quoted-printable.

Updated test cases.  Documentation updates to follow (after I finish
writing them ;).

											
										
										
											2002-04-10 18:01:31 -03:00
 								    <20020201195627.33539.96671@nightshade.la.mastaler.com>
 								    Optional idstring if given is a string used to strengthen the
-												Docstring consistency with the updated .tex files.

											
										
										
											2002-09-30 21:44:13 -03:00
+								    uniqueness of the message id.
-												Sync'ing with standalone email package 2.0.1.  This adds support for
non-us-ascii character sets in headers and bodies.  Some API changes
(with DeprecationWarnings for the old APIs).  Better RFC-compliant
implementations of base64 and quoted-printable.

Updated test cases.  Documentation updates to follow (after I finish
writing them ;).

											
										
										
											2002-04-10 18:01:31 -03:00
+								    """
 								    timeval = time.time()
 								    utcdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(timeval))
 								    pid = os.getpid()
 								    randint = random.randrange(100000)
 								    if idstring is None:
 								        idstring = ''
 								    else:
 								        idstring = '.' + idstring
 								    idhost = socket.getfqdn()
 								    msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, idhost)
 								    return msgid
 								# These functions are in the standalone mimelib version only because they've
 								# subsequently been fixed in the latest Python versions.  We use this to worm
 								# around broken older Pythons.
 								def parsedate(data):
 								    if not data:
 								        return None
 								    return _parsedate(data)
 								def parsedate_tz(data):
 								    if not data:
 								        return None
 								    return _parsedate_tz(data)
 								def parseaddr(addr):
-												parseaddr(): Don't use rfc822.parseaddr() because this now implies a
double call to AddressList.getaddrlist(), and /that/ always returns an
empty list for the second and subsequent calls.

Instead, instantiate an AddressList directly, and get the parsed
addresses out of the addresslist attribute.

											
										
										
											2002-04-15 19:00:25 -03:00
+								    addrs = _AddressList(addr).addresslist
 								    if not addrs:
-												Sync'ing with standalone email package 2.0.1.  This adds support for
non-us-ascii character sets in headers and bodies.  Some API changes
(with DeprecationWarnings for the old APIs).  Better RFC-compliant
implementations of base64 and quoted-printable.

Updated test cases.  Documentation updates to follow (after I finish
writing them ;).

											
										
										
											2002-04-10 18:01:31 -03:00
+								        return '', ''
-												parseaddr(): Don't use rfc822.parseaddr() because this now implies a
double call to AddressList.getaddrlist(), and /that/ always returns an
empty list for the second and subsequent calls.

Instead, instantiate an AddressList directly, and get the parsed
addresses out of the addresslist attribute.

											
										
										
											2002-04-15 19:00:25 -03:00
+								    return addrs[0]
-												Oleg Broytmann's support for RFC 2231 encoded parameters, SF patch #549133

Specifically,

decode_rfc2231(), encode_rfc2231(): Functions to encode and decode RFC
2231 style parameters.

decode_params(): Function to decode a list of parameters.

											
										
										
											2002-06-29 02:58:04 -03:00
-												rfc822.unquote() doesn't properly de-backslash-ify in Python prior to
2.3.  This patch (adapted from Quinn Dunkan's SF patch #573204) fixes
the problem and should get ported to rfc822.py.

											
										
										
											2002-09-10 23:22:48 -03:00
+								# rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3.
 								def unquote(str):
 								    """Remove quotes from a string."""
 								    if len(str) > 1:
 								        if str.startswith('"') and str.endswith('"'):
 								            return str[1:-1].replace('\\\\', '\\').replace('\\"', '"')
 								        if str.startswith('<') and str.endswith('>'):
 								            return str[1:-1]
 								    return str
-												Oleg Broytmann's support for RFC 2231 encoded parameters, SF patch #549133

Specifically,

decode_rfc2231(), encode_rfc2231(): Functions to encode and decode RFC
2231 style parameters.

decode_params(): Function to decode a list of parameters.

											
										
										
											2002-06-29 02:58:04 -03:00
 								# RFC2231-related functions - parameter encoding and decoding
 								def decode_rfc2231(s):
 								    """Decode string according to RFC 2231"""
-												decode_rfc2231(): Be more robust against buggy RFC 2231 encodings.
Specifically, instead of raising a ValueError when there is a single tick in
the parameter, simply return that the entire string unquoted, with None for
both the charset and the language.  Also, if there are more than 2 ticks in
the parameter, interpret the first three parts as the standard RFC 2231 parts,
then the rest of the parts as the encoded string.

Test cases added.

Original fewer-than-3-parts fix by Tokio Kikuchi.

Resolves SF bug # 1218081.  I will back port the fix and tests to Python 2.4
(email 3.0) and Python 2.3 (email 2.5).

Also, bump the version number to email 4.0.1, removing the 'alpha' moniker.

											
										
										
											2006-07-17 20:07:51 -03:00
+								    parts = s.split(TICK, 2)
 								    if len(parts) <= 2:
-												More RFC 2231 improvements for the email 4.0 package.  As Mark Sapiro rightly
points out there are really two types of continued headers defined in this
RFC (i.e. "encoded" parameters with the form "name*0*=" and unencoded
parameters with the form "name*0="), but we were were handling them both the
same way and that isn't correct.

This patch should be much more RFC compliant in that only encoded params are
%-decoded and the charset/language information is only extract if there are
any encoded params in the segments.  If there are no encoded params then the
RFC says that there will be no charset/language parts.

Note however that this will change the return value for Message.get_param() in
some cases.  For example, whereas before if you had all unencoded param
continuations you would have still gotten a 3-tuple back from this method
(with charset and language == None), you will now get just a string.  I don't
believe this is a backward incompatible change though because the
documentation for this method already indicates that either return value is
possible and that you must do an isinstance(val, tuple) check to discriminate
between the two.  (Yeah that API kind of sucks but we can't change /that/
without breaking code.)

Test cases, some documentation updates, and a NEWS item accompany this patch.

											
										
										
											2006-07-21 11:51:07 -03:00
+								        return None, None, s
-												decode_rfc2231(): Be more robust against buggy RFC 2231 encodings.
Specifically, instead of raising a ValueError when there is a single tick in
the parameter, simply return that the entire string unquoted, with None for
both the charset and the language.  Also, if there are more than 2 ticks in
the parameter, interpret the first three parts as the standard RFC 2231 parts,
then the rest of the parts as the encoded string.

Test cases added.

Original fewer-than-3-parts fix by Tokio Kikuchi.

Resolves SF bug # 1218081.  I will back port the fix and tests to Python 2.4
(email 3.0) and Python 2.3 (email 2.5).

Also, bump the version number to email 4.0.1, removing the 'alpha' moniker.

											
										
										
											2006-07-17 20:07:51 -03:00
+								    if len(parts) > 3:
 								        charset, language = parts[:2]
 								        s = TICK.join(parts[2:])
-												More RFC 2231 improvements for the email 4.0 package.  As Mark Sapiro rightly
points out there are really two types of continued headers defined in this
RFC (i.e. "encoded" parameters with the form "name*0*=" and unencoded
parameters with the form "name*0="), but we were were handling them both the
same way and that isn't correct.

This patch should be much more RFC compliant in that only encoded params are
%-decoded and the charset/language information is only extract if there are
any encoded params in the segments.  If there are no encoded params then the
RFC says that there will be no charset/language parts.

Note however that this will change the return value for Message.get_param() in
some cases.  For example, whereas before if you had all unencoded param
continuations you would have still gotten a 3-tuple back from this method
(with charset and language == None), you will now get just a string.  I don't
believe this is a backward incompatible change though because the
documentation for this method already indicates that either return value is
possible and that you must do an isinstance(val, tuple) check to discriminate
between the two.  (Yeah that API kind of sucks but we can't change /that/
without breaking code.)

Test cases, some documentation updates, and a NEWS item accompany this patch.

											
										
										
											2006-07-21 11:51:07 -03:00
+								        return charset, language, s
 								    return parts
-												Oleg Broytmann's support for RFC 2231 encoded parameters, SF patch #549133

Specifically,

decode_rfc2231(), encode_rfc2231(): Functions to encode and decode RFC
2231 style parameters.

decode_params(): Function to decode a list of parameters.

											
										
										
											2002-06-29 02:58:04 -03:00
 								def encode_rfc2231(s, charset=None, language=None):
-												Docstring consistency with the updated .tex files.

											
										
										
											2002-09-30 21:44:13 -03:00
+								    """Encode string according to RFC 2231.
 								    If neither charset nor language is given, then s is returned as-is.  If
 								    charset is given but not language, the string is encoded using the empty
 								    string for language.
 								    """
-												Oleg Broytmann's support for RFC 2231 encoded parameters, SF patch #549133

Specifically,

decode_rfc2231(), encode_rfc2231(): Functions to encode and decode RFC
2231 style parameters.

decode_params(): Function to decode a list of parameters.

											
										
										
											2002-06-29 02:58:04 -03:00
+								    import urllib
 								    s = urllib.quote(s, safe='')
 								    if charset is None and language is None:
 								        return s
-												Docstring consistency with the updated .tex files.

											
										
										
											2002-09-30 21:44:13 -03:00
+								    if language is None:
 								        language = ''
 								    return "%s'%s'%s" % (charset, language, s)
-												Oleg Broytmann's support for RFC 2231 encoded parameters, SF patch #549133

Specifically,

decode_rfc2231(), encode_rfc2231(): Functions to encode and decode RFC
2231 style parameters.

decode_params(): Function to decode a list of parameters.

											
										
										
											2002-06-29 02:58:04 -03:00
 								rfc2231_continuation = re.compile(r'^(?P<name>\w+)\*((?P<num>[0-9]+)\*?)?$')
 								def decode_params(params):
-												Docstring consistency with the updated .tex files.

											
										
										
											2002-09-30 21:44:13 -03:00
+								    """Decode parameters list according to RFC 2231.
-												More RFC 2231 improvements for the email 4.0 package.  As Mark Sapiro rightly
points out there are really two types of continued headers defined in this
RFC (i.e. "encoded" parameters with the form "name*0*=" and unencoded
parameters with the form "name*0="), but we were were handling them both the
same way and that isn't correct.

This patch should be much more RFC compliant in that only encoded params are
%-decoded and the charset/language information is only extract if there are
any encoded params in the segments.  If there are no encoded params then the
RFC says that there will be no charset/language parts.

Note however that this will change the return value for Message.get_param() in
some cases.  For example, whereas before if you had all unencoded param
continuations you would have still gotten a 3-tuple back from this method
(with charset and language == None), you will now get just a string.  I don't
believe this is a backward incompatible change though because the
documentation for this method already indicates that either return value is
possible and that you must do an isinstance(val, tuple) check to discriminate
between the two.  (Yeah that API kind of sucks but we can't change /that/
without breaking code.)

Test cases, some documentation updates, and a NEWS item accompany this patch.

											
										
										
											2006-07-21 11:51:07 -03:00
+								    params is a sequence of 2-tuples containing (param name, string value).
-												Docstring consistency with the updated .tex files.

											
										
										
											2002-09-30 21:44:13 -03:00
+								    """
-												More RFC 2231 improvements for the email 4.0 package.  As Mark Sapiro rightly
points out there are really two types of continued headers defined in this
RFC (i.e. "encoded" parameters with the form "name*0*=" and unencoded
parameters with the form "name*0="), but we were were handling them both the
same way and that isn't correct.

This patch should be much more RFC compliant in that only encoded params are
%-decoded and the charset/language information is only extract if there are
any encoded params in the segments.  If there are no encoded params then the
RFC says that there will be no charset/language parts.

Note however that this will change the return value for Message.get_param() in
some cases.  For example, whereas before if you had all unencoded param
continuations you would have still gotten a 3-tuple back from this method
(with charset and language == None), you will now get just a string.  I don't
believe this is a backward incompatible change though because the
documentation for this method already indicates that either return value is
possible and that you must do an isinstance(val, tuple) check to discriminate
between the two.  (Yeah that API kind of sucks but we can't change /that/
without breaking code.)

Test cases, some documentation updates, and a NEWS item accompany this patch.

											
										
										
											2006-07-21 11:51:07 -03:00
+								    # Copy params so we don't mess with the original
 								    params = params[:]
-												Oleg Broytmann's support for RFC 2231 encoded parameters, SF patch #549133

Specifically,

decode_rfc2231(), encode_rfc2231(): Functions to encode and decode RFC
2231 style parameters.

decode_params(): Function to decode a list of parameters.

											
										
										
											2002-06-29 02:58:04 -03:00
+								    new_params = []
-												More RFC 2231 improvements for the email 4.0 package.  As Mark Sapiro rightly
points out there are really two types of continued headers defined in this
RFC (i.e. "encoded" parameters with the form "name*0*=" and unencoded
parameters with the form "name*0="), but we were were handling them both the
same way and that isn't correct.

This patch should be much more RFC compliant in that only encoded params are
%-decoded and the charset/language information is only extract if there are
any encoded params in the segments.  If there are no encoded params then the
RFC says that there will be no charset/language parts.

Note however that this will change the return value for Message.get_param() in
some cases.  For example, whereas before if you had all unencoded param
continuations you would have still gotten a 3-tuple back from this method
(with charset and language == None), you will now get just a string.  I don't
believe this is a backward incompatible change though because the
documentation for this method already indicates that either return value is
possible and that you must do an isinstance(val, tuple) check to discriminate
between the two.  (Yeah that API kind of sucks but we can't change /that/
without breaking code.)

Test cases, some documentation updates, and a NEWS item accompany this patch.

											
										
										
											2006-07-21 11:51:07 -03:00
+								    # Map parameter's name to a list of continuations.  The values are a
 								    # 3-tuple of the continuation number, the string value, and a flag
 								    # specifying whether a particular segment is %-encoded.
-												Oleg Broytmann's support for RFC 2231 encoded parameters, SF patch #549133

Specifically,

decode_rfc2231(), encode_rfc2231(): Functions to encode and decode RFC
2231 style parameters.

decode_params(): Function to decode a list of parameters.

											
										
										
											2002-06-29 02:58:04 -03:00
+								    rfc2231_params = {}
-												More RFC 2231 improvements for the email 4.0 package.  As Mark Sapiro rightly
points out there are really two types of continued headers defined in this
RFC (i.e. "encoded" parameters with the form "name*0*=" and unencoded
parameters with the form "name*0="), but we were were handling them both the
same way and that isn't correct.

This patch should be much more RFC compliant in that only encoded params are
%-decoded and the charset/language information is only extract if there are
any encoded params in the segments.  If there are no encoded params then the
RFC says that there will be no charset/language parts.

Note however that this will change the return value for Message.get_param() in
some cases.  For example, whereas before if you had all unencoded param
continuations you would have still gotten a 3-tuple back from this method
(with charset and language == None), you will now get just a string.  I don't
believe this is a backward incompatible change though because the
documentation for this method already indicates that either return value is
possible and that you must do an isinstance(val, tuple) check to discriminate
between the two.  (Yeah that API kind of sucks but we can't change /that/
without breaking code.)

Test cases, some documentation updates, and a NEWS item accompany this patch.

											
										
										
											2006-07-21 11:51:07 -03:00
+								    name, value = params.pop(0)
-												Oleg Broytmann's support for RFC 2231 encoded parameters, SF patch #549133

Specifically,

decode_rfc2231(), encode_rfc2231(): Functions to encode and decode RFC
2231 style parameters.

decode_params(): Function to decode a list of parameters.

											
										
										
											2002-06-29 02:58:04 -03:00
+								    new_params.append((name, value))
-												More RFC 2231 improvements for the email 4.0 package.  As Mark Sapiro rightly
points out there are really two types of continued headers defined in this
RFC (i.e. "encoded" parameters with the form "name*0*=" and unencoded
parameters with the form "name*0="), but we were were handling them both the
same way and that isn't correct.

This patch should be much more RFC compliant in that only encoded params are
%-decoded and the charset/language information is only extract if there are
any encoded params in the segments.  If there are no encoded params then the
RFC says that there will be no charset/language parts.

Note however that this will change the return value for Message.get_param() in
some cases.  For example, whereas before if you had all unencoded param
continuations you would have still gotten a 3-tuple back from this method
(with charset and language == None), you will now get just a string.  I don't
believe this is a backward incompatible change though because the
documentation for this method already indicates that either return value is
possible and that you must do an isinstance(val, tuple) check to discriminate
between the two.  (Yeah that API kind of sucks but we can't change /that/
without breaking code.)

Test cases, some documentation updates, and a NEWS item accompany this patch.

											
										
										
											2006-07-21 11:51:07 -03:00
+								    while params:
 								        name, value = params.pop(0)
 								        if name.endswith('*'):
 								            encoded = True
 								        else:
 								            encoded = False
-												Oleg Broytmann's support for RFC 2231 encoded parameters, SF patch #549133

Specifically,

decode_rfc2231(), encode_rfc2231(): Functions to encode and decode RFC
2231 style parameters.

decode_params(): Function to decode a list of parameters.

											
										
										
											2002-06-29 02:58:04 -03:00
+								        value = unquote(value)
 								        mo = rfc2231_continuation.match(name)
 								        if mo:
 								            name, num = mo.group('name', 'num')
 								            if num is not None:
 								                num = int(num)
-												More RFC 2231 improvements for the email 4.0 package.  As Mark Sapiro rightly
points out there are really two types of continued headers defined in this
RFC (i.e. "encoded" parameters with the form "name*0*=" and unencoded
parameters with the form "name*0="), but we were were handling them both the
same way and that isn't correct.

This patch should be much more RFC compliant in that only encoded params are
%-decoded and the charset/language information is only extract if there are
any encoded params in the segments.  If there are no encoded params then the
RFC says that there will be no charset/language parts.

Note however that this will change the return value for Message.get_param() in
some cases.  For example, whereas before if you had all unencoded param
continuations you would have still gotten a 3-tuple back from this method
(with charset and language == None), you will now get just a string.  I don't
believe this is a backward incompatible change though because the
documentation for this method already indicates that either return value is
possible and that you must do an isinstance(val, tuple) check to discriminate
between the two.  (Yeah that API kind of sucks but we can't change /that/
without breaking code.)

Test cases, some documentation updates, and a NEWS item accompany this patch.

											
										
										
											2006-07-21 11:51:07 -03:00
+								            rfc2231_params.setdefault(name, []).append((num, value, encoded))
-												Oleg Broytmann's support for RFC 2231 encoded parameters, SF patch #549133

Specifically,

decode_rfc2231(), encode_rfc2231(): Functions to encode and decode RFC
2231 style parameters.

decode_params(): Function to decode a list of parameters.

											
										
										
											2002-06-29 02:58:04 -03:00
+								        else:
 								            new_params.append((name, '"%s"' % quote(value)))
 								    if rfc2231_params:
 								        for name, continuations in rfc2231_params.items():
 								            value = []
-												More RFC 2231 improvements for the email 4.0 package.  As Mark Sapiro rightly
points out there are really two types of continued headers defined in this
RFC (i.e. "encoded" parameters with the form "name*0*=" and unencoded
parameters with the form "name*0="), but we were were handling them both the
same way and that isn't correct.

This patch should be much more RFC compliant in that only encoded params are
%-decoded and the charset/language information is only extract if there are
any encoded params in the segments.  If there are no encoded params then the
RFC says that there will be no charset/language parts.

Note however that this will change the return value for Message.get_param() in
some cases.  For example, whereas before if you had all unencoded param
continuations you would have still gotten a 3-tuple back from this method
(with charset and language == None), you will now get just a string.  I don't
believe this is a backward incompatible change though because the
documentation for this method already indicates that either return value is
possible and that you must do an isinstance(val, tuple) check to discriminate
between the two.  (Yeah that API kind of sucks but we can't change /that/
without breaking code.)

Test cases, some documentation updates, and a NEWS item accompany this patch.

											
										
										
											2006-07-21 11:51:07 -03:00
+								            extended = False
-												Oleg Broytmann's support for RFC 2231 encoded parameters, SF patch #549133

Specifically,

decode_rfc2231(), encode_rfc2231(): Functions to encode and decode RFC
2231 style parameters.

decode_params(): Function to decode a list of parameters.

											
										
										
											2002-06-29 02:58:04 -03:00
+								            # Sort by number
 								            continuations.sort()
-												More RFC 2231 improvements for the email 4.0 package.  As Mark Sapiro rightly
points out there are really two types of continued headers defined in this
RFC (i.e. "encoded" parameters with the form "name*0*=" and unencoded
parameters with the form "name*0="), but we were were handling them both the
same way and that isn't correct.

This patch should be much more RFC compliant in that only encoded params are
%-decoded and the charset/language information is only extract if there are
any encoded params in the segments.  If there are no encoded params then the
RFC says that there will be no charset/language parts.

Note however that this will change the return value for Message.get_param() in
some cases.  For example, whereas before if you had all unencoded param
continuations you would have still gotten a 3-tuple back from this method
(with charset and language == None), you will now get just a string.  I don't
believe this is a backward incompatible change though because the
documentation for this method already indicates that either return value is
possible and that you must do an isinstance(val, tuple) check to discriminate
between the two.  (Yeah that API kind of sucks but we can't change /that/
without breaking code.)

Test cases, some documentation updates, and a NEWS item accompany this patch.

											
										
										
											2006-07-21 11:51:07 -03:00
+								            # And now append all values in numerical order, converting
 								            # %-encodings for the encoded segments.  If any of the
 								            # continuation names ends in a *, then the entire string, after
 								            # decoding segments and concatenating, must have the charset and
 								            # language specifiers at the beginning of the string.
 								            for num, s, encoded in continuations:
 								                if encoded:
 								                    s = urllib.unquote(s)
 								                    extended = True
 								                value.append(s)
 								            value = quote(EMPTYSTRING.join(value))
 								            if extended:
 								                charset, language, value = decode_rfc2231(value)
 								                new_params.append((name, (charset, language, '"%s"' % value)))
 								            else:
 								                new_params.append((name, '"%s"' % value))
-												Oleg Broytmann's support for RFC 2231 encoded parameters, SF patch #549133

Specifically,

decode_rfc2231(), encode_rfc2231(): Functions to encode and decode RFC
2231 style parameters.

decode_params(): Function to decode a list of parameters.

											
										
										
											2002-06-29 02:58:04 -03:00
+								    return new_params
-												Big email 3.0 API changes, with updated unit tests and documentation.
Briefly (from the NEWS file):

- Updates for the email package:
  + All deprecated APIs that in email 2.x issued warnings have been removed:
    _encoder argument to the MIMEText constructor, Message.add_payload(),
    Utils.dump_address_pair(), Utils.decode(), Utils.encode()
  + New deprecations: Generator.__call__(), Message.get_type(),
    Message.get_main_type(), Message.get_subtype(), the 'strict' argument to
    the Parser constructor.  These will be removed in email 3.1.
  + Support for Python earlier than 2.3 has been removed (see PEP 291).
  + All defect classes have been renamed to end in 'Defect'.
  + Some FeedParser fixes; also a MultipartInvariantViolationDefect will be
    added to messages that claim to be multipart but really aren't.
  + Updates to documentation.

											
										
										
											2004-10-03 00:16:19 -03:00
 								def collapse_rfc2231_value(value, errors='replace',
 								                           fallback_charset='us-ascii'):
 								    if isinstance(value, tuple):
 								        rawval = unquote(value[2])
 								        charset = value[0] or 'us-ascii'
 								        try:
 								            return unicode(rawval, charset, errors)
 								        except LookupError:
 								            # XXX charset is unknown to Python.
 								            return unicode(rawval, fallback_charset, errors)
 								    else:
 								        return unquote(value)