2002-04-10 18:01:31 -03:00
|
|
|
|
# Copyright (C) 2001,2002 Python Software Foundation
|
2001-09-23 00:17:28 -03:00
|
|
|
|
# Author: barry@zope.com (Barry Warsaw)
|
|
|
|
|
|
|
|
|
|
"""Basic message object for the email package object model.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
import re
|
2002-04-10 18:01:31 -03:00
|
|
|
|
import warnings
|
2001-09-23 00:17:28 -03:00
|
|
|
|
from cStringIO import StringIO
|
2002-06-29 02:56:15 -03:00
|
|
|
|
from types import ListType, TupleType, StringType
|
2001-09-23 00:17:28 -03:00
|
|
|
|
|
|
|
|
|
# Intrapackage imports
|
2002-06-02 16:05:51 -03:00
|
|
|
|
from email import Errors
|
|
|
|
|
from email import Utils
|
|
|
|
|
from email import Charset
|
2001-09-23 00:17:28 -03:00
|
|
|
|
|
2001-09-26 02:41:51 -03:00
|
|
|
|
SEMISPACE = '; '
|
2002-04-10 18:01:31 -03:00
|
|
|
|
|
2002-09-28 17:40:25 -03:00
|
|
|
|
try:
|
|
|
|
|
True, False
|
|
|
|
|
except NameError:
|
|
|
|
|
True = 1
|
|
|
|
|
False = 0
|
|
|
|
|
|
2002-04-10 18:01:31 -03:00
|
|
|
|
# Regular expression used to split header parameters. BAW: this may be too
|
|
|
|
|
# simple. It isn't strictly RFC 2045 (section 5.1) compliant, but it catches
|
|
|
|
|
# most headers found in the wild. We may eventually need a full fledged
|
|
|
|
|
# parser eventually.
|
2001-10-25 19:43:46 -03:00
|
|
|
|
paramre = re.compile(r'\s*;\s*')
|
2002-04-10 18:01:31 -03:00
|
|
|
|
# Regular expression that matches `special' characters in parameters, the
|
|
|
|
|
# existance of which force quoting of the parameter value.
|
|
|
|
|
tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2002-06-29 02:56:15 -03:00
|
|
|
|
# Helper functions
|
2002-09-28 17:40:25 -03:00
|
|
|
|
def _formatparam(param, value=None, quote=True):
|
2002-04-10 18:01:31 -03:00
|
|
|
|
"""Convenience function to format and return a key=value pair.
|
|
|
|
|
|
2002-06-29 02:56:15 -03:00
|
|
|
|
This will quote the value if needed or if quote is true.
|
2002-04-10 18:01:31 -03:00
|
|
|
|
"""
|
|
|
|
|
if value is not None and len(value) > 0:
|
2002-06-29 02:56:15 -03:00
|
|
|
|
# TupleType is used for RFC 2231 encoded parameter values where items
|
|
|
|
|
# are (charset, language, value). charset is a string, not a Charset
|
|
|
|
|
# instance.
|
|
|
|
|
if isinstance(value, TupleType):
|
2002-09-06 00:55:04 -03:00
|
|
|
|
# Encode as per RFC 2231
|
|
|
|
|
param += '*'
|
|
|
|
|
value = Utils.encode_rfc2231(value[2], value[0], value[1])
|
2002-04-10 18:01:31 -03:00
|
|
|
|
# BAW: Please check this. I think that if quote is set it should
|
|
|
|
|
# force quoting even if not necessary.
|
|
|
|
|
if quote or tspecials.search(value):
|
|
|
|
|
return '%s="%s"' % (param, Utils.quote(value))
|
|
|
|
|
else:
|
|
|
|
|
return '%s=%s' % (param, value)
|
|
|
|
|
else:
|
|
|
|
|
return param
|
2001-09-26 02:41:51 -03:00
|
|
|
|
|
2001-09-23 00:17:28 -03:00
|
|
|
|
|
2002-06-29 02:56:15 -03:00
|
|
|
|
def _unquotevalue(value):
|
|
|
|
|
if isinstance(value, TupleType):
|
2002-09-26 14:19:34 -03:00
|
|
|
|
return value[0], value[1], Utils.unquote(value[2])
|
2002-06-29 02:56:15 -03:00
|
|
|
|
else:
|
2002-08-23 15:19:30 -03:00
|
|
|
|
return Utils.unquote(value)
|
2002-06-29 02:56:15 -03:00
|
|
|
|
|
|
|
|
|
|
2002-08-27 19:34:44 -03:00
|
|
|
|
|
2001-09-23 00:17:28 -03:00
|
|
|
|
class Message:
|
|
|
|
|
"""Basic message object for use inside the object tree.
|
|
|
|
|
|
|
|
|
|
A message object is defined as something that has a bunch of RFC 2822
|
|
|
|
|
headers and a payload. If the body of the message is a multipart, then
|
|
|
|
|
the payload is a list of Messages, otherwise it is a string.
|
|
|
|
|
|
|
|
|
|
These objects implement part of the `mapping' interface, which assumes
|
|
|
|
|
there is exactly one occurrance of the header per message. Some headers
|
2002-09-28 17:40:25 -03:00
|
|
|
|
do in fact appear multiple times (e.g. Received) and for those headers,
|
2001-09-23 00:17:28 -03:00
|
|
|
|
you must use the explicit API to set or get all the headers. Not all of
|
|
|
|
|
the mapping methods are implemented.
|
|
|
|
|
"""
|
|
|
|
|
def __init__(self):
|
|
|
|
|
self._headers = []
|
|
|
|
|
self._unixfrom = None
|
|
|
|
|
self._payload = None
|
2002-04-10 18:01:31 -03:00
|
|
|
|
self._charset = None
|
2001-09-23 00:17:28 -03:00
|
|
|
|
# Defaults for multipart messages
|
|
|
|
|
self.preamble = self.epilogue = None
|
2002-07-08 23:46:12 -03:00
|
|
|
|
# Default content type
|
|
|
|
|
self._default_type = 'text/plain'
|
2001-09-23 00:17:28 -03:00
|
|
|
|
|
|
|
|
|
def __str__(self):
|
|
|
|
|
"""Return the entire formatted message as a string.
|
|
|
|
|
This includes the headers, body, and `unixfrom' line.
|
|
|
|
|
"""
|
2002-09-28 17:40:25 -03:00
|
|
|
|
return self.as_string(unixfrom=True)
|
2001-09-23 00:17:28 -03:00
|
|
|
|
|
2002-09-28 17:40:25 -03:00
|
|
|
|
def as_string(self, unixfrom=False):
|
2001-09-23 00:17:28 -03:00
|
|
|
|
"""Return the entire formatted message as a string.
|
2002-09-28 17:40:25 -03:00
|
|
|
|
Optional `unixfrom' when True, means include the Unix From_ envelope
|
2001-09-23 00:17:28 -03:00
|
|
|
|
header.
|
|
|
|
|
"""
|
2002-06-02 16:05:51 -03:00
|
|
|
|
from email.Generator import Generator
|
2001-09-23 00:17:28 -03:00
|
|
|
|
fp = StringIO()
|
|
|
|
|
g = Generator(fp)
|
2002-06-02 16:05:51 -03:00
|
|
|
|
g.flatten(self, unixfrom=unixfrom)
|
2001-09-23 00:17:28 -03:00
|
|
|
|
return fp.getvalue()
|
|
|
|
|
|
|
|
|
|
def is_multipart(self):
|
2002-09-28 17:40:25 -03:00
|
|
|
|
"""Return True if the message consists of multiple parts."""
|
2001-09-23 00:17:28 -03:00
|
|
|
|
if type(self._payload) is ListType:
|
2002-09-28 17:40:25 -03:00
|
|
|
|
return True
|
|
|
|
|
return False
|
2001-09-23 00:17:28 -03:00
|
|
|
|
|
|
|
|
|
#
|
|
|
|
|
# Unix From_ line
|
|
|
|
|
#
|
|
|
|
|
def set_unixfrom(self, unixfrom):
|
|
|
|
|
self._unixfrom = unixfrom
|
|
|
|
|
|
|
|
|
|
def get_unixfrom(self):
|
|
|
|
|
return self._unixfrom
|
|
|
|
|
|
|
|
|
|
#
|
|
|
|
|
# Payload manipulation.
|
|
|
|
|
#
|
|
|
|
|
def add_payload(self, payload):
|
|
|
|
|
"""Add the given payload to the current payload.
|
|
|
|
|
|
|
|
|
|
If the current payload is empty, then the current payload will be made
|
|
|
|
|
a scalar, set to the given value.
|
|
|
|
|
"""
|
2002-04-10 18:01:31 -03:00
|
|
|
|
warnings.warn('add_payload() is deprecated, use attach() instead.',
|
|
|
|
|
DeprecationWarning, 2)
|
2001-09-23 00:17:28 -03:00
|
|
|
|
if self._payload is None:
|
|
|
|
|
self._payload = payload
|
2002-09-28 17:40:25 -03:00
|
|
|
|
elif isinstance(self._payload, ListType):
|
2001-09-23 00:17:28 -03:00
|
|
|
|
self._payload.append(payload)
|
|
|
|
|
elif self.get_main_type() not in (None, 'multipart'):
|
|
|
|
|
raise Errors.MultipartConversionError(
|
2002-09-28 17:40:25 -03:00
|
|
|
|
'Message main content type must be "multipart" or missing')
|
2001-09-23 00:17:28 -03:00
|
|
|
|
else:
|
|
|
|
|
self._payload = [self._payload, payload]
|
|
|
|
|
|
2002-04-10 18:01:31 -03:00
|
|
|
|
def attach(self, payload):
|
|
|
|
|
"""Add the given payload to the current payload.
|
|
|
|
|
|
|
|
|
|
The current payload will always be a list of objects after this method
|
|
|
|
|
is called. If you want to set the payload to a scalar object
|
|
|
|
|
(e.g. because you're attaching a message/rfc822 subpart), use
|
|
|
|
|
set_payload() instead.
|
|
|
|
|
"""
|
|
|
|
|
if self._payload is None:
|
|
|
|
|
self._payload = [payload]
|
|
|
|
|
else:
|
|
|
|
|
self._payload.append(payload)
|
2001-09-23 00:17:28 -03:00
|
|
|
|
|
2002-09-28 17:40:25 -03:00
|
|
|
|
def get_payload(self, i=None, decode=False):
|
2002-09-11 11:11:35 -03:00
|
|
|
|
"""Return a reference to the payload.
|
2001-09-23 00:17:28 -03:00
|
|
|
|
|
2002-09-11 11:11:35 -03:00
|
|
|
|
The payload is typically either a list object or a string. If you
|
|
|
|
|
mutate the list object, you modify the message's payload in place.
|
2001-09-23 00:17:28 -03:00
|
|
|
|
Optional i returns that index into the payload.
|
|
|
|
|
|
|
|
|
|
Optional decode is a flag indicating whether the payload should be
|
2002-09-28 17:40:25 -03:00
|
|
|
|
decoded or not, according to the Content-Transfer-Encoding header.
|
|
|
|
|
When True and the message is not a multipart, the payload will be
|
2001-09-23 00:17:28 -03:00
|
|
|
|
decoded if this header's value is `quoted-printable' or `base64'. If
|
|
|
|
|
some other encoding is used, or the header is missing, the payload is
|
|
|
|
|
returned as-is (undecoded). If the message is a multipart and the
|
2002-09-28 17:40:25 -03:00
|
|
|
|
decode flag is True, then None is returned.
|
2001-09-23 00:17:28 -03:00
|
|
|
|
"""
|
|
|
|
|
if i is None:
|
|
|
|
|
payload = self._payload
|
2002-09-28 17:40:25 -03:00
|
|
|
|
elif not isinstance(self._payload, ListType):
|
2001-09-23 00:17:28 -03:00
|
|
|
|
raise TypeError, i
|
|
|
|
|
else:
|
|
|
|
|
payload = self._payload[i]
|
|
|
|
|
if decode:
|
|
|
|
|
if self.is_multipart():
|
|
|
|
|
return None
|
|
|
|
|
cte = self.get('content-transfer-encoding', '')
|
|
|
|
|
if cte.lower() == 'quoted-printable':
|
|
|
|
|
return Utils._qdecode(payload)
|
|
|
|
|
elif cte.lower() == 'base64':
|
|
|
|
|
return Utils._bdecode(payload)
|
|
|
|
|
# Everything else, including encodings with 8bit or 7bit are returned
|
|
|
|
|
# unchanged.
|
|
|
|
|
return payload
|
|
|
|
|
|
2002-04-10 18:01:31 -03:00
|
|
|
|
def set_payload(self, payload, charset=None):
|
|
|
|
|
"""Set the payload to the given value.
|
|
|
|
|
|
|
|
|
|
Optionally set the charset, which must be a Charset instance."""
|
2001-09-23 00:17:28 -03:00
|
|
|
|
self._payload = payload
|
2002-04-10 18:01:31 -03:00
|
|
|
|
if charset is not None:
|
|
|
|
|
self.set_charset(charset)
|
|
|
|
|
|
|
|
|
|
def set_charset(self, charset):
|
|
|
|
|
"""Set the charset of the payload to a given character set.
|
|
|
|
|
|
|
|
|
|
charset can be a string or a Charset object. If it is a string, it
|
|
|
|
|
will be converted to a Charset object by calling Charset's
|
|
|
|
|
constructor. If charset is None, the charset parameter will be
|
2002-09-28 17:40:25 -03:00
|
|
|
|
removed from the Content-Type field. Anything else will generate a
|
2002-04-10 18:01:31 -03:00
|
|
|
|
TypeError.
|
|
|
|
|
|
|
|
|
|
The message will be assumed to be a text message encoded with
|
|
|
|
|
charset.input_charset. It will be converted to charset.output_charset
|
|
|
|
|
and encoded properly, if needed, when generating the plain text
|
|
|
|
|
representation of the message. MIME headers (MIME-Version,
|
|
|
|
|
Content-Type, Content-Transfer-Encoding) will be added as needed.
|
|
|
|
|
"""
|
|
|
|
|
if charset is None:
|
|
|
|
|
self.del_param('charset')
|
|
|
|
|
self._charset = None
|
|
|
|
|
return
|
|
|
|
|
if isinstance(charset, StringType):
|
|
|
|
|
charset = Charset.Charset(charset)
|
|
|
|
|
if not isinstance(charset, Charset.Charset):
|
|
|
|
|
raise TypeError, charset
|
|
|
|
|
# BAW: should we accept strings that can serve as arguments to the
|
|
|
|
|
# Charset constructor?
|
|
|
|
|
self._charset = charset
|
|
|
|
|
if not self.has_key('MIME-Version'):
|
|
|
|
|
self.add_header('MIME-Version', '1.0')
|
|
|
|
|
if not self.has_key('Content-Type'):
|
|
|
|
|
self.add_header('Content-Type', 'text/plain',
|
|
|
|
|
charset=charset.get_output_charset())
|
|
|
|
|
else:
|
|
|
|
|
self.set_param('charset', charset.get_output_charset())
|
|
|
|
|
if not self.has_key('Content-Transfer-Encoding'):
|
|
|
|
|
cte = charset.get_body_encoding()
|
|
|
|
|
if callable(cte):
|
|
|
|
|
cte(self)
|
|
|
|
|
else:
|
|
|
|
|
self.add_header('Content-Transfer-Encoding', cte)
|
2001-09-23 00:17:28 -03:00
|
|
|
|
|
2002-04-10 18:01:31 -03:00
|
|
|
|
def get_charset(self):
|
|
|
|
|
"""Return the Charset object associated with the message's payload."""
|
|
|
|
|
return self._charset
|
2002-05-23 12:15:30 -03:00
|
|
|
|
|
2001-09-23 00:17:28 -03:00
|
|
|
|
#
|
|
|
|
|
# MAPPING INTERFACE (partial)
|
|
|
|
|
#
|
|
|
|
|
def __len__(self):
|
2001-09-26 02:41:51 -03:00
|
|
|
|
"""Return the total number of headers, including duplicates."""
|
2001-09-23 00:17:28 -03:00
|
|
|
|
return len(self._headers)
|
|
|
|
|
|
|
|
|
|
def __getitem__(self, name):
|
|
|
|
|
"""Get a header value.
|
|
|
|
|
|
|
|
|
|
Return None if the header is missing instead of raising an exception.
|
|
|
|
|
|
|
|
|
|
Note that if the header appeared multiple times, exactly which
|
|
|
|
|
occurrance gets returned is undefined. Use getall() to get all
|
|
|
|
|
the values matching a header field name.
|
|
|
|
|
"""
|
|
|
|
|
return self.get(name)
|
|
|
|
|
|
|
|
|
|
def __setitem__(self, name, val):
|
|
|
|
|
"""Set the value of a header.
|
|
|
|
|
|
|
|
|
|
Note: this does not overwrite an existing header with the same field
|
|
|
|
|
name. Use __delitem__() first to delete any existing headers.
|
|
|
|
|
"""
|
|
|
|
|
self._headers.append((name, val))
|
|
|
|
|
|
|
|
|
|
def __delitem__(self, name):
|
|
|
|
|
"""Delete all occurrences of a header, if present.
|
|
|
|
|
|
|
|
|
|
Does not raise an exception if the header is missing.
|
|
|
|
|
"""
|
|
|
|
|
name = name.lower()
|
|
|
|
|
newheaders = []
|
|
|
|
|
for k, v in self._headers:
|
|
|
|
|
if k.lower() <> name:
|
|
|
|
|
newheaders.append((k, v))
|
|
|
|
|
self._headers = newheaders
|
|
|
|
|
|
|
|
|
|
def __contains__(self, key):
|
|
|
|
|
return key.lower() in [k.lower() for k, v in self._headers]
|
|
|
|
|
|
|
|
|
|
def has_key(self, name):
|
|
|
|
|
"""Return true if the message contains the header."""
|
2001-09-26 02:41:51 -03:00
|
|
|
|
missing = []
|
|
|
|
|
return self.get(name, missing) is not missing
|
2001-09-23 00:17:28 -03:00
|
|
|
|
|
|
|
|
|
def keys(self):
|
|
|
|
|
"""Return a list of all the message's header field names.
|
|
|
|
|
|
|
|
|
|
These will be sorted in the order they appeared in the original
|
|
|
|
|
message, and may contain duplicates. Any fields deleted and
|
|
|
|
|
re-inserted are always appended to the header list.
|
|
|
|
|
"""
|
|
|
|
|
return [k for k, v in self._headers]
|
|
|
|
|
|
|
|
|
|
def values(self):
|
|
|
|
|
"""Return a list of all the message's header values.
|
|
|
|
|
|
|
|
|
|
These will be sorted in the order they appeared in the original
|
|
|
|
|
message, and may contain duplicates. Any fields deleted and
|
2001-11-24 12:56:56 -04:00
|
|
|
|
re-inserted are always appended to the header list.
|
2001-09-23 00:17:28 -03:00
|
|
|
|
"""
|
|
|
|
|
return [v for k, v in self._headers]
|
|
|
|
|
|
|
|
|
|
def items(self):
|
|
|
|
|
"""Get all the message's header fields and values.
|
|
|
|
|
|
|
|
|
|
These will be sorted in the order they appeared in the original
|
|
|
|
|
message, and may contain duplicates. Any fields deleted and
|
2001-11-24 12:56:56 -04:00
|
|
|
|
re-inserted are always appended to the header list.
|
2001-09-23 00:17:28 -03:00
|
|
|
|
"""
|
|
|
|
|
return self._headers[:]
|
|
|
|
|
|
|
|
|
|
def get(self, name, failobj=None):
|
|
|
|
|
"""Get a header value.
|
|
|
|
|
|
|
|
|
|
Like __getitem__() but return failobj instead of None when the field
|
|
|
|
|
is missing.
|
|
|
|
|
"""
|
|
|
|
|
name = name.lower()
|
|
|
|
|
for k, v in self._headers:
|
|
|
|
|
if k.lower() == name:
|
|
|
|
|
return v
|
|
|
|
|
return failobj
|
|
|
|
|
|
|
|
|
|
#
|
|
|
|
|
# Additional useful stuff
|
|
|
|
|
#
|
|
|
|
|
|
|
|
|
|
def get_all(self, name, failobj=None):
|
|
|
|
|
"""Return a list of all the values for the named field.
|
|
|
|
|
|
|
|
|
|
These will be sorted in the order they appeared in the original
|
|
|
|
|
message, and may contain duplicates. Any fields deleted and
|
2001-11-24 11:49:53 -04:00
|
|
|
|
re-inserted are always appended to the header list.
|
2001-10-09 12:48:29 -03:00
|
|
|
|
|
|
|
|
|
If no such fields exist, failobj is returned (defaults to None).
|
2001-09-23 00:17:28 -03:00
|
|
|
|
"""
|
|
|
|
|
values = []
|
|
|
|
|
name = name.lower()
|
|
|
|
|
for k, v in self._headers:
|
|
|
|
|
if k.lower() == name:
|
|
|
|
|
values.append(v)
|
2001-10-09 12:48:29 -03:00
|
|
|
|
if not values:
|
|
|
|
|
return failobj
|
2001-09-23 00:17:28 -03:00
|
|
|
|
return values
|
|
|
|
|
|
|
|
|
|
def add_header(self, _name, _value, **_params):
|
|
|
|
|
"""Extended header setting.
|
|
|
|
|
|
|
|
|
|
name is the header field to add. keyword arguments can be used to set
|
|
|
|
|
additional parameters for the header field, with underscores converted
|
|
|
|
|
to dashes. Normally the parameter will be added as key="value" unless
|
|
|
|
|
value is None, in which case only the key will be added.
|
|
|
|
|
|
|
|
|
|
Example:
|
|
|
|
|
|
|
|
|
|
msg.add_header('content-disposition', 'attachment', filename='bud.gif')
|
|
|
|
|
"""
|
|
|
|
|
parts = []
|
|
|
|
|
for k, v in _params.items():
|
|
|
|
|
if v is None:
|
|
|
|
|
parts.append(k.replace('_', '-'))
|
|
|
|
|
else:
|
2002-04-10 18:01:31 -03:00
|
|
|
|
parts.append(_formatparam(k.replace('_', '-'), v))
|
2001-09-23 00:17:28 -03:00
|
|
|
|
if _value is not None:
|
|
|
|
|
parts.insert(0, _value)
|
|
|
|
|
self._headers.append((_name, SEMISPACE.join(parts)))
|
|
|
|
|
|
2002-09-06 00:38:12 -03:00
|
|
|
|
def replace_header(self, _name, _value):
|
|
|
|
|
"""Replace a header.
|
|
|
|
|
|
|
|
|
|
Replace the first matching header found in the message, retaining
|
|
|
|
|
header order and case. If no matching header was found, a KeyError is
|
|
|
|
|
raised.
|
|
|
|
|
"""
|
|
|
|
|
_name = _name.lower()
|
|
|
|
|
for i, (k, v) in zip(range(len(self._headers)), self._headers):
|
|
|
|
|
if k.lower() == _name:
|
|
|
|
|
self._headers[i] = (k, _value)
|
|
|
|
|
break
|
|
|
|
|
else:
|
|
|
|
|
raise KeyError, _name
|
|
|
|
|
|
2002-07-19 19:24:55 -03:00
|
|
|
|
#
|
|
|
|
|
# These methods are silently deprecated in favor of get_content_type() and
|
|
|
|
|
# friends (see below). They will be noisily deprecated in email 3.0.
|
|
|
|
|
#
|
|
|
|
|
|
2001-09-23 00:17:28 -03:00
|
|
|
|
def get_type(self, failobj=None):
|
|
|
|
|
"""Returns the message's content type.
|
|
|
|
|
|
|
|
|
|
The returned string is coerced to lowercase and returned as a single
|
2002-09-28 17:40:25 -03:00
|
|
|
|
string of the form `maintype/subtype'. If there was no Content-Type
|
2001-09-23 00:17:28 -03:00
|
|
|
|
header in the message, failobj is returned (defaults to None).
|
|
|
|
|
"""
|
|
|
|
|
missing = []
|
|
|
|
|
value = self.get('content-type', missing)
|
|
|
|
|
if value is missing:
|
|
|
|
|
return failobj
|
2002-07-18 20:09:09 -03:00
|
|
|
|
return paramre.split(value)[0].lower().strip()
|
2001-09-23 00:17:28 -03:00
|
|
|
|
|
|
|
|
|
def get_main_type(self, failobj=None):
|
|
|
|
|
"""Return the message's main content type if present."""
|
|
|
|
|
missing = []
|
|
|
|
|
ctype = self.get_type(missing)
|
|
|
|
|
if ctype is missing:
|
|
|
|
|
return failobj
|
2002-07-19 19:24:55 -03:00
|
|
|
|
if ctype.count('/') <> 1:
|
|
|
|
|
return failobj
|
|
|
|
|
return ctype.split('/')[0]
|
2001-09-23 00:17:28 -03:00
|
|
|
|
|
|
|
|
|
def get_subtype(self, failobj=None):
|
|
|
|
|
"""Return the message's content subtype if present."""
|
|
|
|
|
missing = []
|
|
|
|
|
ctype = self.get_type(missing)
|
|
|
|
|
if ctype is missing:
|
|
|
|
|
return failobj
|
2002-07-19 19:24:55 -03:00
|
|
|
|
if ctype.count('/') <> 1:
|
|
|
|
|
return failobj
|
|
|
|
|
return ctype.split('/')[1]
|
|
|
|
|
|
|
|
|
|
#
|
|
|
|
|
# Use these three methods instead of the three above.
|
|
|
|
|
#
|
|
|
|
|
|
|
|
|
|
def get_content_type(self):
|
|
|
|
|
"""Returns the message's content type.
|
|
|
|
|
|
|
|
|
|
The returned string is coerced to lowercase and returned as a ingle
|
2002-09-28 17:40:25 -03:00
|
|
|
|
string of the form `maintype/subtype'. If there was no Content-Type
|
2002-07-19 19:24:55 -03:00
|
|
|
|
header in the message, the default type as give by get_default_type()
|
|
|
|
|
will be returned. Since messages always have a default type this will
|
|
|
|
|
always return a value.
|
|
|
|
|
|
|
|
|
|
The current state of RFC standards define a message's default type to
|
|
|
|
|
be text/plain unless it appears inside a multipart/digest container,
|
|
|
|
|
in which case it would be message/rfc822.
|
|
|
|
|
"""
|
|
|
|
|
missing = []
|
|
|
|
|
value = self.get('content-type', missing)
|
|
|
|
|
if value is missing:
|
|
|
|
|
# This should have no parameters
|
|
|
|
|
return self.get_default_type()
|
2002-08-20 11:50:09 -03:00
|
|
|
|
ctype = paramre.split(value)[0].lower().strip()
|
|
|
|
|
# RFC 2045, section 5.2 says if its invalid, use text/plain
|
|
|
|
|
if ctype.count('/') <> 1:
|
|
|
|
|
return 'text/plain'
|
|
|
|
|
return ctype
|
2002-07-19 19:24:55 -03:00
|
|
|
|
|
|
|
|
|
def get_content_maintype(self):
|
|
|
|
|
"""Returns the message's main content type.
|
|
|
|
|
|
|
|
|
|
This is the `maintype' part of the string returned by
|
|
|
|
|
get_content_type(). If no slash is found in the full content type, a
|
|
|
|
|
ValueError is raised.
|
|
|
|
|
"""
|
|
|
|
|
ctype = self.get_content_type()
|
|
|
|
|
return ctype.split('/')[0]
|
|
|
|
|
|
|
|
|
|
def get_content_subtype(self):
|
|
|
|
|
"""Returns the message's sub content type.
|
|
|
|
|
|
|
|
|
|
This is the `subtype' part of the string returned by
|
|
|
|
|
get_content_type(). If no slash is found in the full content type, a
|
|
|
|
|
ValueError is raised.
|
|
|
|
|
"""
|
|
|
|
|
ctype = self.get_content_type()
|
|
|
|
|
return ctype.split('/')[1]
|
2001-09-23 00:17:28 -03:00
|
|
|
|
|
2002-07-08 23:46:12 -03:00
|
|
|
|
def get_default_type(self):
|
|
|
|
|
"""Return the `default' content type.
|
|
|
|
|
|
|
|
|
|
Most messages have a default content type of text/plain, except for
|
|
|
|
|
messages that are subparts of multipart/digest containers. Such
|
|
|
|
|
subparts then have a default content type of message/rfc822.
|
|
|
|
|
"""
|
|
|
|
|
return self._default_type
|
|
|
|
|
|
|
|
|
|
def set_default_type(self, ctype):
|
|
|
|
|
"""Set the `default' content type.
|
|
|
|
|
|
2002-07-19 19:24:55 -03:00
|
|
|
|
ctype should be either "text/plain" or "message/rfc822", although this
|
|
|
|
|
is not enforced. The default content type is not stored in the
|
2002-09-28 17:40:25 -03:00
|
|
|
|
Content-Type header.
|
2002-07-08 23:46:12 -03:00
|
|
|
|
"""
|
|
|
|
|
self._default_type = ctype
|
|
|
|
|
|
2001-09-26 02:41:51 -03:00
|
|
|
|
def _get_params_preserve(self, failobj, header):
|
|
|
|
|
# Like get_params() but preserves the quoting of values. BAW:
|
|
|
|
|
# should this be part of the public interface?
|
|
|
|
|
missing = []
|
|
|
|
|
value = self.get(header, missing)
|
|
|
|
|
if value is missing:
|
|
|
|
|
return failobj
|
|
|
|
|
params = []
|
|
|
|
|
for p in paramre.split(value):
|
|
|
|
|
try:
|
|
|
|
|
name, val = p.split('=', 1)
|
2002-07-18 20:09:09 -03:00
|
|
|
|
name = name.strip()
|
|
|
|
|
val = val.strip()
|
2001-09-26 02:41:51 -03:00
|
|
|
|
except ValueError:
|
|
|
|
|
# Must have been a bare attribute
|
2002-07-18 20:09:09 -03:00
|
|
|
|
name = p.strip()
|
2001-09-26 02:41:51 -03:00
|
|
|
|
val = ''
|
|
|
|
|
params.append((name, val))
|
2002-06-29 02:56:15 -03:00
|
|
|
|
params = Utils.decode_params(params)
|
2001-09-26 02:41:51 -03:00
|
|
|
|
return params
|
|
|
|
|
|
2002-09-28 17:40:25 -03:00
|
|
|
|
def get_params(self, failobj=None, header='content-type', unquote=True):
|
|
|
|
|
"""Return the message's Content-Type parameters, as a list.
|
2001-09-23 00:17:28 -03:00
|
|
|
|
|
2001-09-26 02:41:51 -03:00
|
|
|
|
The elements of the returned list are 2-tuples of key/value pairs, as
|
|
|
|
|
split on the `=' sign. The left hand side of the `=' is the key,
|
|
|
|
|
while the right hand side is the value. If there is no `=' sign in
|
2002-09-26 14:19:34 -03:00
|
|
|
|
the parameter the value is the empty string. The value is as
|
|
|
|
|
described in the get_param() method.
|
2001-09-26 02:41:51 -03:00
|
|
|
|
|
2002-09-28 17:40:25 -03:00
|
|
|
|
Optional failobj is the object to return if there is no Content-Type
|
2001-09-23 00:17:28 -03:00
|
|
|
|
header. Optional header is the header to search instead of
|
2002-09-28 17:40:25 -03:00
|
|
|
|
Content-Type. If unquote is True, the value is unquoted.
|
2001-09-23 00:17:28 -03:00
|
|
|
|
"""
|
|
|
|
|
missing = []
|
2001-09-26 02:41:51 -03:00
|
|
|
|
params = self._get_params_preserve(missing, header)
|
|
|
|
|
if params is missing:
|
2001-09-23 00:17:28 -03:00
|
|
|
|
return failobj
|
2002-04-10 18:01:31 -03:00
|
|
|
|
if unquote:
|
2002-06-29 02:56:15 -03:00
|
|
|
|
return [(k, _unquotevalue(v)) for k, v in params]
|
2002-04-10 18:01:31 -03:00
|
|
|
|
else:
|
|
|
|
|
return params
|
2001-09-23 00:17:28 -03:00
|
|
|
|
|
2002-09-28 17:40:25 -03:00
|
|
|
|
def get_param(self, param, failobj=None, header='content-type',
|
|
|
|
|
unquote=True):
|
|
|
|
|
"""Return the parameter value if found in the Content-Type header.
|
2001-09-23 00:17:28 -03:00
|
|
|
|
|
2002-09-28 17:40:25 -03:00
|
|
|
|
Optional failobj is the object to return if there is no Content-Type
|
2002-09-26 14:19:34 -03:00
|
|
|
|
header, or the Content-Type header has no such parameter. Optional
|
2002-09-28 17:40:25 -03:00
|
|
|
|
header is the header to search instead of Content-Type.
|
2002-09-26 14:19:34 -03:00
|
|
|
|
|
|
|
|
|
Parameter keys are always compared case insensitively. The return
|
|
|
|
|
value can either be a string, or a 3-tuple if the parameter was RFC
|
|
|
|
|
2231 encoded. When it's a 3-tuple, the elements of the value are of
|
|
|
|
|
the form (CHARSET, LANGUAGE, VALUE), where LANGUAGE may be the empty
|
|
|
|
|
string. Your application should be prepared to deal with these, and
|
|
|
|
|
can convert the parameter to a Unicode string like so:
|
|
|
|
|
|
|
|
|
|
param = msg.get_param('foo')
|
|
|
|
|
if isinstance(param, tuple):
|
|
|
|
|
param = unicode(param[2], param[0])
|
|
|
|
|
|
|
|
|
|
In any case, the parameter value (either the returned string, or the
|
|
|
|
|
VALUE item in the 3-tuple) is always unquoted, unless unquote is set
|
2002-09-28 17:40:25 -03:00
|
|
|
|
to False.
|
2001-09-23 00:17:28 -03:00
|
|
|
|
"""
|
2001-09-26 02:41:51 -03:00
|
|
|
|
if not self.has_key(header):
|
2001-09-23 00:17:28 -03:00
|
|
|
|
return failobj
|
2001-09-26 02:41:51 -03:00
|
|
|
|
for k, v in self._get_params_preserve(failobj, header):
|
|
|
|
|
if k.lower() == param.lower():
|
2002-04-10 18:01:31 -03:00
|
|
|
|
if unquote:
|
2002-06-29 02:56:15 -03:00
|
|
|
|
return _unquotevalue(v)
|
2002-04-10 18:01:31 -03:00
|
|
|
|
else:
|
|
|
|
|
return v
|
2001-09-23 00:17:28 -03:00
|
|
|
|
return failobj
|
|
|
|
|
|
2002-09-28 17:40:25 -03:00
|
|
|
|
def set_param(self, param, value, header='Content-Type', requote=True,
|
2002-09-06 00:55:04 -03:00
|
|
|
|
charset=None, language=''):
|
2002-09-28 17:40:25 -03:00
|
|
|
|
"""Set a parameter in the Content-Type header.
|
2002-04-10 18:01:31 -03:00
|
|
|
|
|
|
|
|
|
If the parameter already exists in the header, its value will be
|
|
|
|
|
replaced with the new value.
|
|
|
|
|
|
2002-09-28 17:40:25 -03:00
|
|
|
|
If header is Content-Type and has not yet been defined in this
|
2002-04-10 18:01:31 -03:00
|
|
|
|
message, it will be set to "text/plain" and the new parameter and
|
|
|
|
|
value will be appended, as per RFC 2045.
|
|
|
|
|
|
2002-09-28 17:40:25 -03:00
|
|
|
|
An alternate header can specified in the header argument, and all
|
|
|
|
|
parameters will be quoted as appropriate unless requote is False.
|
2002-09-06 00:55:04 -03:00
|
|
|
|
|
|
|
|
|
If charset is specified the parameter will be encoded according to RFC
|
|
|
|
|
2231. In this case language is optional.
|
2002-04-10 18:01:31 -03:00
|
|
|
|
"""
|
2002-09-06 00:55:04 -03:00
|
|
|
|
if not isinstance(value, TupleType) and charset:
|
|
|
|
|
value = (charset, language, value)
|
|
|
|
|
|
2002-04-10 18:01:31 -03:00
|
|
|
|
if not self.has_key(header) and header.lower() == 'content-type':
|
|
|
|
|
ctype = 'text/plain'
|
|
|
|
|
else:
|
|
|
|
|
ctype = self.get(header)
|
|
|
|
|
if not self.get_param(param, header=header):
|
|
|
|
|
if not ctype:
|
|
|
|
|
ctype = _formatparam(param, value, requote)
|
|
|
|
|
else:
|
|
|
|
|
ctype = SEMISPACE.join(
|
|
|
|
|
[ctype, _formatparam(param, value, requote)])
|
|
|
|
|
else:
|
|
|
|
|
ctype = ''
|
|
|
|
|
for old_param, old_value in self.get_params(header=header,
|
|
|
|
|
unquote=requote):
|
|
|
|
|
append_param = ''
|
|
|
|
|
if old_param.lower() == param.lower():
|
|
|
|
|
append_param = _formatparam(param, value, requote)
|
|
|
|
|
else:
|
|
|
|
|
append_param = _formatparam(old_param, old_value, requote)
|
|
|
|
|
if not ctype:
|
|
|
|
|
ctype = append_param
|
|
|
|
|
else:
|
|
|
|
|
ctype = SEMISPACE.join([ctype, append_param])
|
|
|
|
|
if ctype <> self.get(header):
|
|
|
|
|
del self[header]
|
|
|
|
|
self[header] = ctype
|
|
|
|
|
|
2002-09-28 17:40:25 -03:00
|
|
|
|
def del_param(self, param, header='content-type', requote=True):
|
2002-04-10 18:01:31 -03:00
|
|
|
|
"""Remove the given parameter completely from the Content-Type header.
|
|
|
|
|
|
|
|
|
|
The header will be re-written in place without param or its value.
|
2002-09-28 17:40:25 -03:00
|
|
|
|
All values will be quoted as appropriate unless requote is False.
|
2002-04-10 18:01:31 -03:00
|
|
|
|
"""
|
|
|
|
|
if not self.has_key(header):
|
|
|
|
|
return
|
|
|
|
|
new_ctype = ''
|
|
|
|
|
for p, v in self.get_params(header, unquote=requote):
|
|
|
|
|
if p.lower() <> param.lower():
|
|
|
|
|
if not new_ctype:
|
|
|
|
|
new_ctype = _formatparam(p, v, requote)
|
|
|
|
|
else:
|
|
|
|
|
new_ctype = SEMISPACE.join([new_ctype,
|
|
|
|
|
_formatparam(p, v, requote)])
|
|
|
|
|
if new_ctype <> self.get(header):
|
|
|
|
|
del self[header]
|
|
|
|
|
self[header] = new_ctype
|
|
|
|
|
|
2002-09-28 17:40:25 -03:00
|
|
|
|
def set_type(self, type, header='Content-Type', requote=True):
|
|
|
|
|
"""Set the main type and subtype for the Content-Type header.
|
2002-04-10 18:01:31 -03:00
|
|
|
|
|
|
|
|
|
type must be a string in the form "maintype/subtype", otherwise a
|
|
|
|
|
ValueError is raised.
|
|
|
|
|
|
2002-09-28 17:40:25 -03:00
|
|
|
|
This method replaces the Content-Type header, keeping all the
|
|
|
|
|
parameters in place. If requote is False, this leaves the existing
|
2002-04-10 18:01:31 -03:00
|
|
|
|
header's quoting as is. Otherwise, the parameters will be quoted (the
|
|
|
|
|
default).
|
|
|
|
|
|
|
|
|
|
An alternate header can be specified in the header argument. When the
|
2002-09-28 17:40:25 -03:00
|
|
|
|
Content-Type header is set, we'll always also add a MIME-Version
|
2002-04-10 18:01:31 -03:00
|
|
|
|
header.
|
|
|
|
|
"""
|
|
|
|
|
# BAW: should we be strict?
|
|
|
|
|
if not type.count('/') == 1:
|
|
|
|
|
raise ValueError
|
2002-09-28 17:40:25 -03:00
|
|
|
|
# Set the Content-Type, you get a MIME-Version
|
2002-04-10 18:01:31 -03:00
|
|
|
|
if header.lower() == 'content-type':
|
|
|
|
|
del self['mime-version']
|
|
|
|
|
self['MIME-Version'] = '1.0'
|
|
|
|
|
if not self.has_key(header):
|
|
|
|
|
self[header] = type
|
|
|
|
|
return
|
|
|
|
|
params = self.get_params(header, unquote=requote)
|
|
|
|
|
del self[header]
|
|
|
|
|
self[header] = type
|
|
|
|
|
# Skip the first param; it's the old type.
|
|
|
|
|
for p, v in params[1:]:
|
|
|
|
|
self.set_param(p, v, header, requote)
|
|
|
|
|
|
2001-09-23 00:17:28 -03:00
|
|
|
|
def get_filename(self, failobj=None):
|
|
|
|
|
"""Return the filename associated with the payload if present.
|
|
|
|
|
|
2002-09-28 17:40:25 -03:00
|
|
|
|
The filename is extracted from the Content-Disposition header's
|
2001-09-23 00:17:28 -03:00
|
|
|
|
`filename' parameter, and it is unquoted.
|
|
|
|
|
"""
|
|
|
|
|
missing = []
|
|
|
|
|
filename = self.get_param('filename', missing, 'content-disposition')
|
|
|
|
|
if filename is missing:
|
|
|
|
|
return failobj
|
2002-06-29 02:56:15 -03:00
|
|
|
|
if isinstance(filename, TupleType):
|
|
|
|
|
# It's an RFC 2231 encoded parameter
|
|
|
|
|
newvalue = _unquotevalue(filename)
|
|
|
|
|
return unicode(newvalue[2], newvalue[0])
|
|
|
|
|
else:
|
|
|
|
|
newvalue = _unquotevalue(filename.strip())
|
|
|
|
|
return newvalue
|
2001-09-23 00:17:28 -03:00
|
|
|
|
|
|
|
|
|
def get_boundary(self, failobj=None):
|
|
|
|
|
"""Return the boundary associated with the payload if present.
|
|
|
|
|
|
2002-09-28 17:40:25 -03:00
|
|
|
|
The boundary is extracted from the Content-Type header's `boundary'
|
2001-09-23 00:17:28 -03:00
|
|
|
|
parameter, and it is unquoted.
|
|
|
|
|
"""
|
|
|
|
|
missing = []
|
|
|
|
|
boundary = self.get_param('boundary', missing)
|
|
|
|
|
if boundary is missing:
|
|
|
|
|
return failobj
|
2002-09-26 14:19:34 -03:00
|
|
|
|
if isinstance(boundary, TupleType):
|
|
|
|
|
# RFC 2231 encoded, so decode. It better end up as ascii
|
|
|
|
|
return unicode(boundary[2], boundary[0]).encode('us-ascii')
|
2002-06-29 02:56:15 -03:00
|
|
|
|
return _unquotevalue(boundary.strip())
|
2001-09-23 00:17:28 -03:00
|
|
|
|
|
|
|
|
|
def set_boundary(self, boundary):
|
2002-09-28 17:40:25 -03:00
|
|
|
|
"""Set the boundary parameter in Content-Type to 'boundary'.
|
2001-09-23 00:17:28 -03:00
|
|
|
|
|
2002-09-28 17:40:25 -03:00
|
|
|
|
This is subtly different than deleting the Content-Type header and
|
2001-09-23 00:17:28 -03:00
|
|
|
|
adding a new one with a new boundary parameter via add_header(). The
|
|
|
|
|
main difference is that using the set_boundary() method preserves the
|
2002-09-28 17:40:25 -03:00
|
|
|
|
order of the Content-Type header in the original message.
|
2001-09-23 00:17:28 -03:00
|
|
|
|
|
2002-09-28 17:40:25 -03:00
|
|
|
|
HeaderParseError is raised if the message has no Content-Type header.
|
2001-09-23 00:17:28 -03:00
|
|
|
|
"""
|
2001-09-26 02:41:51 -03:00
|
|
|
|
missing = []
|
|
|
|
|
params = self._get_params_preserve(missing, 'content-type')
|
|
|
|
|
if params is missing:
|
2002-09-28 17:40:25 -03:00
|
|
|
|
# There was no Content-Type header, and we don't know what type
|
2001-09-23 00:17:28 -03:00
|
|
|
|
# to set it to, so raise an exception.
|
2002-09-28 17:40:25 -03:00
|
|
|
|
raise Errors.HeaderParseError, 'No Content-Type header found'
|
2001-09-23 00:17:28 -03:00
|
|
|
|
newparams = []
|
2002-09-28 17:40:25 -03:00
|
|
|
|
foundp = False
|
2001-09-26 02:41:51 -03:00
|
|
|
|
for pk, pv in params:
|
|
|
|
|
if pk.lower() == 'boundary':
|
|
|
|
|
newparams.append(('boundary', '"%s"' % boundary))
|
2002-09-28 17:40:25 -03:00
|
|
|
|
foundp = True
|
2001-09-23 00:17:28 -03:00
|
|
|
|
else:
|
2001-09-26 02:41:51 -03:00
|
|
|
|
newparams.append((pk, pv))
|
2001-09-23 00:17:28 -03:00
|
|
|
|
if not foundp:
|
2002-09-28 17:40:25 -03:00
|
|
|
|
# The original Content-Type header had no boundary attribute.
|
2001-09-23 00:17:28 -03:00
|
|
|
|
# Tack one one the end. BAW: should we raise an exception
|
|
|
|
|
# instead???
|
2001-09-26 02:41:51 -03:00
|
|
|
|
newparams.append(('boundary', '"%s"' % boundary))
|
2002-09-28 17:40:25 -03:00
|
|
|
|
# Replace the existing Content-Type header with the new value
|
2001-09-23 00:17:28 -03:00
|
|
|
|
newheaders = []
|
|
|
|
|
for h, v in self._headers:
|
|
|
|
|
if h.lower() == 'content-type':
|
2001-09-26 02:41:51 -03:00
|
|
|
|
parts = []
|
|
|
|
|
for k, v in newparams:
|
|
|
|
|
if v == '':
|
|
|
|
|
parts.append(k)
|
|
|
|
|
else:
|
|
|
|
|
parts.append('%s=%s' % (k, v))
|
|
|
|
|
newheaders.append((h, SEMISPACE.join(parts)))
|
|
|
|
|
|
2001-09-23 00:17:28 -03:00
|
|
|
|
else:
|
|
|
|
|
newheaders.append((h, v))
|
|
|
|
|
self._headers = newheaders
|
|
|
|
|
|
2002-05-19 20:44:19 -03:00
|
|
|
|
try:
|
|
|
|
|
from email._compat22 import walk
|
|
|
|
|
except SyntaxError:
|
|
|
|
|
# Must be using Python 2.1
|
|
|
|
|
from email._compat21 import walk
|
2001-09-23 00:17:28 -03:00
|
|
|
|
|
2002-09-26 14:19:34 -03:00
|
|
|
|
def get_content_charset(self, failobj=None):
|
|
|
|
|
"""Return the charset parameter of the Content-Type header.
|
|
|
|
|
|
|
|
|
|
If there is no Content-Type header, or if that header has no charset
|
|
|
|
|
parameter, failobj is returned.
|
|
|
|
|
"""
|
|
|
|
|
missing = []
|
|
|
|
|
charset = self.get_param('charset', missing)
|
|
|
|
|
if charset is missing:
|
|
|
|
|
return failobj
|
|
|
|
|
if isinstance(charset, TupleType):
|
|
|
|
|
# RFC 2231 encoded, so decode it, and it better end up as ascii.
|
|
|
|
|
return unicode(charset[2], charset[0]).encode('us-ascii')
|
|
|
|
|
return charset
|
|
|
|
|
|
2001-09-23 00:17:28 -03:00
|
|
|
|
def get_charsets(self, failobj=None):
|
|
|
|
|
"""Return a list containing the charset(s) used in this message.
|
2001-10-04 02:36:56 -03:00
|
|
|
|
|
2002-09-28 17:40:25 -03:00
|
|
|
|
The returned list of items describes the Content-Type headers'
|
2001-09-23 00:17:28 -03:00
|
|
|
|
charset parameter for this message and all the subparts in its
|
|
|
|
|
payload.
|
|
|
|
|
|
|
|
|
|
Each item will either be a string (the value of the charset parameter
|
2002-09-28 17:40:25 -03:00
|
|
|
|
in the Content-Type header of that part) or the value of the
|
2001-09-23 00:17:28 -03:00
|
|
|
|
'failobj' parameter (defaults to None), if the part does not have a
|
|
|
|
|
main MIME type of "text", or the charset is not defined.
|
|
|
|
|
|
|
|
|
|
The list will contain one string for each part of the message, plus
|
|
|
|
|
one for the container message (i.e. self), so that a non-multipart
|
|
|
|
|
message will still return a list of length 1.
|
|
|
|
|
"""
|
2002-09-26 14:19:34 -03:00
|
|
|
|
return [part.get_content_charset(failobj) for part in self.walk()]
|