cpython/Lib/email/Utils.py

105 lines
2.8 KiB
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# Copyright (C) 2001 Python Software Foundation
# Author: barry@zope.com (Barry Warsaw)
"""Miscellaneous utilities.
"""
import re
from rfc822 import unquote, quote, parseaddr
from rfc822 import dump_address_pair
from rfc822 import AddrlistClass as _AddrlistClass
from rfc822 import parsedate_tz, parsedate, mktime_tz, formatdate
from quopri import decodestring as _qdecode
import base64
# Intrapackage imports
from Encoders import _bencode, _qencode
COMMASPACE = ', '
UEMPTYSTRING = u''
# Helpers
def _identity(s):
return s
def _bdecode(s):
if not s:
return s
# We can't quite use base64.encodestring() since it tacks on a "courtesy
# newline". Blech!
if not s:
return s
hasnewline = (s[-1] == '\n')
value = base64.decodestring(s)
if not hasnewline and value[-1] == '\n':
return value[:-1]
return value
def getaddresses(fieldvalues):
"""Return a list of (REALNAME, EMAIL) for each fieldvalue."""
all = COMMASPACE.join(fieldvalues)
a = _AddrlistClass(all)
return a.getaddrlist()
ecre = re.compile(r'''
=\? # literal =?
(?P<charset>[^?]*?) # non-greedy up to the next ? is the charset
\? # literal ?
(?P<encoding>[qb]) # either a "q" or a "b", case insensitive
\? # literal ?
(?P<atom>.*?) # non-greedy up to the next ?= is the atom
\?= # literal ?=
''', re.VERBOSE | re.IGNORECASE)
def decode(s):
"""Return a decoded string according to RFC 2047, as a unicode string."""
rtn = []
parts = ecre.split(s, 1)
while parts:
# If there are less than 4 parts, it can't be encoded and we're done
if len(parts) < 5:
rtn.extend(parts)
break
# The first element is any non-encoded leading text
rtn.append(parts[0])
charset = parts[1]
encoding = parts[2]
atom = parts[3]
# The next chunk to decode should be in parts[4]
parts = ecre.split(parts[4])
# The encoding must be either `q' or `b', case-insensitive
if encoding.lower() == 'q':
func = _qdecode
elif encoding.lower() == 'b':
func = _bdecode
else:
func = _identity
# Decode and get the unicode in the charset
rtn.append(unicode(func(atom), charset))
# Now that we've decoded everything, we just need to join all the parts
# together into the final string.
return UEMPTYSTRING.join(rtn)
def encode(s, charset='iso-8859-1', encoding='q'):
"""Encode a string according to RFC 2047."""
if encoding.lower() == 'q':
estr = _qencode(s)
elif encoding.lower() == 'b':
estr = _bencode(s)
else:
raise ValueError, 'Illegal encoding code: ' + encoding
return '=?%s?%s?%s?=' % (charset.lower(), encoding.lower(), estr)