mirror of https://github.com/python/cpython
442 lines
14 KiB
Python
442 lines
14 KiB
Python
|
# Copyright (C) 2002 Python Software Foundation
|
||
|
|
||
|
"""Email address parsing code.
|
||
|
|
||
|
Lifted directly from rfc822.py. This should eventually be rewritten.
|
||
|
"""
|
||
|
|
||
|
import time
|
||
|
|
||
|
# Parse a date field
|
||
|
_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
|
||
|
'aug', 'sep', 'oct', 'nov', 'dec',
|
||
|
'january', 'february', 'march', 'april', 'may', 'june', 'july',
|
||
|
'august', 'september', 'october', 'november', 'december']
|
||
|
|
||
|
_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
|
||
|
|
||
|
# The timezone table does not include the military time zones defined
|
||
|
# in RFC822, other than Z. According to RFC1123, the description in
|
||
|
# RFC822 gets the signs wrong, so we can't rely on any such time
|
||
|
# zones. RFC1123 recommends that numeric timezone indicators be used
|
||
|
# instead of timezone names.
|
||
|
|
||
|
_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
|
||
|
'AST': -400, 'ADT': -300, # Atlantic (used in Canada)
|
||
|
'EST': -500, 'EDT': -400, # Eastern
|
||
|
'CST': -600, 'CDT': -500, # Central
|
||
|
'MST': -700, 'MDT': -600, # Mountain
|
||
|
'PST': -800, 'PDT': -700 # Pacific
|
||
|
}
|
||
|
|
||
|
|
||
|
def parsedate_tz(data):
|
||
|
"""Convert a date string to a time tuple.
|
||
|
|
||
|
Accounts for military timezones.
|
||
|
"""
|
||
|
data = data.split()
|
||
|
if data[0][-1] in (',', '.') or data[0].lower() in _daynames:
|
||
|
# There's a dayname here. Skip it
|
||
|
del data[0]
|
||
|
if len(data) == 3: # RFC 850 date, deprecated
|
||
|
stuff = data[0].split('-')
|
||
|
if len(stuff) == 3:
|
||
|
data = stuff + data[1:]
|
||
|
if len(data) == 4:
|
||
|
s = data[3]
|
||
|
i = s.find('+')
|
||
|
if i > 0:
|
||
|
data[3:] = [s[:i], s[i+1:]]
|
||
|
else:
|
||
|
data.append('') # Dummy tz
|
||
|
if len(data) < 5:
|
||
|
return None
|
||
|
data = data[:5]
|
||
|
[dd, mm, yy, tm, tz] = data
|
||
|
mm = mm.lower()
|
||
|
if not mm in _monthnames:
|
||
|
dd, mm = mm, dd.lower()
|
||
|
if not mm in _monthnames:
|
||
|
return None
|
||
|
mm = _monthnames.index(mm)+1
|
||
|
if mm > 12: mm = mm - 12
|
||
|
if dd[-1] == ',':
|
||
|
dd = dd[:-1]
|
||
|
i = yy.find(':')
|
||
|
if i > 0:
|
||
|
yy, tm = tm, yy
|
||
|
if yy[-1] == ',':
|
||
|
yy = yy[:-1]
|
||
|
if not yy[0].isdigit():
|
||
|
yy, tz = tz, yy
|
||
|
if tm[-1] == ',':
|
||
|
tm = tm[:-1]
|
||
|
tm = tm.split(':')
|
||
|
if len(tm) == 2:
|
||
|
[thh, tmm] = tm
|
||
|
tss = '0'
|
||
|
elif len(tm) == 3:
|
||
|
[thh, tmm, tss] = tm
|
||
|
else:
|
||
|
return None
|
||
|
try:
|
||
|
yy = int(yy)
|
||
|
dd = int(dd)
|
||
|
thh = int(thh)
|
||
|
tmm = int(tmm)
|
||
|
tss = int(tss)
|
||
|
except ValueError:
|
||
|
return None
|
||
|
tzoffset = None
|
||
|
tz = tz.upper()
|
||
|
if _timezones.has_key(tz):
|
||
|
tzoffset = _timezones[tz]
|
||
|
else:
|
||
|
try:
|
||
|
tzoffset = int(tz)
|
||
|
except ValueError:
|
||
|
pass
|
||
|
# Convert a timezone offset into seconds ; -0500 -> -18000
|
||
|
if tzoffset:
|
||
|
if tzoffset < 0:
|
||
|
tzsign = -1
|
||
|
tzoffset = -tzoffset
|
||
|
else:
|
||
|
tzsign = 1
|
||
|
tzoffset = tzsign * ( (tzoffset/100)*3600 + (tzoffset % 100)*60)
|
||
|
tuple = (yy, mm, dd, thh, tmm, tss, 0, 0, 0, tzoffset)
|
||
|
return tuple
|
||
|
|
||
|
|
||
|
def parsedate(data):
|
||
|
"""Convert a time string to a time tuple."""
|
||
|
t = parsedate_tz(data)
|
||
|
if type(t) == type( () ):
|
||
|
return t[:9]
|
||
|
else: return t
|
||
|
|
||
|
|
||
|
def mktime_tz(data):
|
||
|
"""Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp."""
|
||
|
if data[9] is None:
|
||
|
# No zone info, so localtime is better assumption than GMT
|
||
|
return time.mktime(data[:8] + (-1,))
|
||
|
else:
|
||
|
t = time.mktime(data[:8] + (0,))
|
||
|
return t - data[9] - time.timezone
|
||
|
|
||
|
|
||
|
def quote(str):
|
||
|
"""Add quotes around a string."""
|
||
|
return str.replace('\\', '\\\\').replace('"', '\\"')
|
||
|
|
||
|
|
||
|
class AddrlistClass:
|
||
|
"""Address parser class by Ben Escoto.
|
||
|
|
||
|
To understand what this class does, it helps to have a copy of
|
||
|
RFC-822 in front of you.
|
||
|
|
||
|
Note: this class interface is deprecated and may be removed in the future.
|
||
|
Use rfc822.AddressList instead.
|
||
|
"""
|
||
|
|
||
|
def __init__(self, field):
|
||
|
"""Initialize a new instance.
|
||
|
|
||
|
`field' is an unparsed address header field, containing
|
||
|
one or more addresses.
|
||
|
"""
|
||
|
self.specials = '()<>@,:;.\"[]'
|
||
|
self.pos = 0
|
||
|
self.LWS = ' \t'
|
||
|
self.CR = '\r\n'
|
||
|
self.atomends = self.specials + self.LWS + self.CR
|
||
|
self.field = field
|
||
|
self.commentlist = []
|
||
|
|
||
|
def gotonext(self):
|
||
|
"""Parse up to the start of the next address."""
|
||
|
while self.pos < len(self.field):
|
||
|
if self.field[self.pos] in self.LWS + '\n\r':
|
||
|
self.pos = self.pos + 1
|
||
|
elif self.field[self.pos] == '(':
|
||
|
self.commentlist.append(self.getcomment())
|
||
|
else: break
|
||
|
|
||
|
def getaddrlist(self):
|
||
|
"""Parse all addresses.
|
||
|
|
||
|
Returns a list containing all of the addresses.
|
||
|
"""
|
||
|
ad = self.getaddress()
|
||
|
if ad:
|
||
|
return ad + self.getaddrlist()
|
||
|
else: return []
|
||
|
|
||
|
def getaddress(self):
|
||
|
"""Parse the next address."""
|
||
|
self.commentlist = []
|
||
|
self.gotonext()
|
||
|
|
||
|
oldpos = self.pos
|
||
|
oldcl = self.commentlist
|
||
|
plist = self.getphraselist()
|
||
|
|
||
|
self.gotonext()
|
||
|
returnlist = []
|
||
|
|
||
|
if self.pos >= len(self.field):
|
||
|
# Bad email address technically, no domain.
|
||
|
if plist:
|
||
|
returnlist = [(' '.join(self.commentlist), plist[0])]
|
||
|
|
||
|
elif self.field[self.pos] in '.@':
|
||
|
# email address is just an addrspec
|
||
|
# this isn't very efficient since we start over
|
||
|
self.pos = oldpos
|
||
|
self.commentlist = oldcl
|
||
|
addrspec = self.getaddrspec()
|
||
|
returnlist = [(' '.join(self.commentlist), addrspec)]
|
||
|
|
||
|
elif self.field[self.pos] == ':':
|
||
|
# address is a group
|
||
|
returnlist = []
|
||
|
|
||
|
fieldlen = len(self.field)
|
||
|
self.pos = self.pos + 1
|
||
|
while self.pos < len(self.field):
|
||
|
self.gotonext()
|
||
|
if self.pos < fieldlen and self.field[self.pos] == ';':
|
||
|
self.pos = self.pos + 1
|
||
|
break
|
||
|
returnlist = returnlist + self.getaddress()
|
||
|
|
||
|
elif self.field[self.pos] == '<':
|
||
|
# Address is a phrase then a route addr
|
||
|
routeaddr = self.getrouteaddr()
|
||
|
|
||
|
if self.commentlist:
|
||
|
returnlist = [(' '.join(plist) + ' (' + \
|
||
|
' '.join(self.commentlist) + ')', routeaddr)]
|
||
|
else: returnlist = [(' '.join(plist), routeaddr)]
|
||
|
|
||
|
else:
|
||
|
if plist:
|
||
|
returnlist = [(' '.join(self.commentlist), plist[0])]
|
||
|
elif self.field[self.pos] in self.specials:
|
||
|
self.pos = self.pos + 1
|
||
|
|
||
|
self.gotonext()
|
||
|
if self.pos < len(self.field) and self.field[self.pos] == ',':
|
||
|
self.pos = self.pos + 1
|
||
|
return returnlist
|
||
|
|
||
|
def getrouteaddr(self):
|
||
|
"""Parse a route address (Return-path value).
|
||
|
|
||
|
This method just skips all the route stuff and returns the addrspec.
|
||
|
"""
|
||
|
if self.field[self.pos] != '<':
|
||
|
return
|
||
|
|
||
|
expectroute = 0
|
||
|
self.pos = self.pos + 1
|
||
|
self.gotonext()
|
||
|
adlist = ""
|
||
|
while self.pos < len(self.field):
|
||
|
if expectroute:
|
||
|
self.getdomain()
|
||
|
expectroute = 0
|
||
|
elif self.field[self.pos] == '>':
|
||
|
self.pos = self.pos + 1
|
||
|
break
|
||
|
elif self.field[self.pos] == '@':
|
||
|
self.pos = self.pos + 1
|
||
|
expectroute = 1
|
||
|
elif self.field[self.pos] == ':':
|
||
|
self.pos = self.pos + 1
|
||
|
expectaddrspec = 1
|
||
|
else:
|
||
|
adlist = self.getaddrspec()
|
||
|
self.pos = self.pos + 1
|
||
|
break
|
||
|
self.gotonext()
|
||
|
|
||
|
return adlist
|
||
|
|
||
|
def getaddrspec(self):
|
||
|
"""Parse an RFC-822 addr-spec."""
|
||
|
aslist = []
|
||
|
|
||
|
self.gotonext()
|
||
|
while self.pos < len(self.field):
|
||
|
if self.field[self.pos] == '.':
|
||
|
aslist.append('.')
|
||
|
self.pos = self.pos + 1
|
||
|
elif self.field[self.pos] == '"':
|
||
|
aslist.append('"%s"' % self.getquote())
|
||
|
elif self.field[self.pos] in self.atomends:
|
||
|
break
|
||
|
else: aslist.append(self.getatom())
|
||
|
self.gotonext()
|
||
|
|
||
|
if self.pos >= len(self.field) or self.field[self.pos] != '@':
|
||
|
return ''.join(aslist)
|
||
|
|
||
|
aslist.append('@')
|
||
|
self.pos = self.pos + 1
|
||
|
self.gotonext()
|
||
|
return ''.join(aslist) + self.getdomain()
|
||
|
|
||
|
def getdomain(self):
|
||
|
"""Get the complete domain name from an address."""
|
||
|
sdlist = []
|
||
|
while self.pos < len(self.field):
|
||
|
if self.field[self.pos] in self.LWS:
|
||
|
self.pos = self.pos + 1
|
||
|
elif self.field[self.pos] == '(':
|
||
|
self.commentlist.append(self.getcomment())
|
||
|
elif self.field[self.pos] == '[':
|
||
|
sdlist.append(self.getdomainliteral())
|
||
|
elif self.field[self.pos] == '.':
|
||
|
self.pos = self.pos + 1
|
||
|
sdlist.append('.')
|
||
|
elif self.field[self.pos] in self.atomends:
|
||
|
break
|
||
|
else: sdlist.append(self.getatom())
|
||
|
return ''.join(sdlist)
|
||
|
|
||
|
def getdelimited(self, beginchar, endchars, allowcomments = 1):
|
||
|
"""Parse a header fragment delimited by special characters.
|
||
|
|
||
|
`beginchar' is the start character for the fragment.
|
||
|
If self is not looking at an instance of `beginchar' then
|
||
|
getdelimited returns the empty string.
|
||
|
|
||
|
`endchars' is a sequence of allowable end-delimiting characters.
|
||
|
Parsing stops when one of these is encountered.
|
||
|
|
||
|
If `allowcomments' is non-zero, embedded RFC-822 comments
|
||
|
are allowed within the parsed fragment.
|
||
|
"""
|
||
|
if self.field[self.pos] != beginchar:
|
||
|
return ''
|
||
|
|
||
|
slist = ['']
|
||
|
quote = 0
|
||
|
self.pos = self.pos + 1
|
||
|
while self.pos < len(self.field):
|
||
|
if quote == 1:
|
||
|
slist.append(self.field[self.pos])
|
||
|
quote = 0
|
||
|
elif self.field[self.pos] in endchars:
|
||
|
self.pos = self.pos + 1
|
||
|
break
|
||
|
elif allowcomments and self.field[self.pos] == '(':
|
||
|
slist.append(self.getcomment())
|
||
|
elif self.field[self.pos] == '\\':
|
||
|
quote = 1
|
||
|
else:
|
||
|
slist.append(self.field[self.pos])
|
||
|
self.pos = self.pos + 1
|
||
|
|
||
|
return ''.join(slist)
|
||
|
|
||
|
def getquote(self):
|
||
|
"""Get a quote-delimited fragment from self's field."""
|
||
|
return self.getdelimited('"', '"\r', 0)
|
||
|
|
||
|
def getcomment(self):
|
||
|
"""Get a parenthesis-delimited fragment from self's field."""
|
||
|
return self.getdelimited('(', ')\r', 1)
|
||
|
|
||
|
def getdomainliteral(self):
|
||
|
"""Parse an RFC-822 domain-literal."""
|
||
|
return '[%s]' % self.getdelimited('[', ']\r', 0)
|
||
|
|
||
|
def getatom(self):
|
||
|
"""Parse an RFC-822 atom."""
|
||
|
atomlist = ['']
|
||
|
|
||
|
while self.pos < len(self.field):
|
||
|
if self.field[self.pos] in self.atomends:
|
||
|
break
|
||
|
else: atomlist.append(self.field[self.pos])
|
||
|
self.pos = self.pos + 1
|
||
|
|
||
|
return ''.join(atomlist)
|
||
|
|
||
|
def getphraselist(self):
|
||
|
"""Parse a sequence of RFC-822 phrases.
|
||
|
|
||
|
A phrase is a sequence of words, which are in turn either
|
||
|
RFC-822 atoms or quoted-strings. Phrases are canonicalized
|
||
|
by squeezing all runs of continuous whitespace into one space.
|
||
|
"""
|
||
|
plist = []
|
||
|
|
||
|
while self.pos < len(self.field):
|
||
|
if self.field[self.pos] in self.LWS:
|
||
|
self.pos = self.pos + 1
|
||
|
elif self.field[self.pos] == '"':
|
||
|
plist.append(self.getquote())
|
||
|
elif self.field[self.pos] == '(':
|
||
|
self.commentlist.append(self.getcomment())
|
||
|
elif self.field[self.pos] in self.atomends:
|
||
|
break
|
||
|
else: plist.append(self.getatom())
|
||
|
|
||
|
return plist
|
||
|
|
||
|
class AddressList(AddrlistClass):
|
||
|
"""An AddressList encapsulates a list of parsed RFC822 addresses."""
|
||
|
def __init__(self, field):
|
||
|
AddrlistClass.__init__(self, field)
|
||
|
if field:
|
||
|
self.addresslist = self.getaddrlist()
|
||
|
else:
|
||
|
self.addresslist = []
|
||
|
|
||
|
def __len__(self):
|
||
|
return len(self.addresslist)
|
||
|
|
||
|
def __str__(self):
|
||
|
return ", ".join(map(dump_address_pair, self.addresslist))
|
||
|
|
||
|
def __add__(self, other):
|
||
|
# Set union
|
||
|
newaddr = AddressList(None)
|
||
|
newaddr.addresslist = self.addresslist[:]
|
||
|
for x in other.addresslist:
|
||
|
if not x in self.addresslist:
|
||
|
newaddr.addresslist.append(x)
|
||
|
return newaddr
|
||
|
|
||
|
def __iadd__(self, other):
|
||
|
# Set union, in-place
|
||
|
for x in other.addresslist:
|
||
|
if not x in self.addresslist:
|
||
|
self.addresslist.append(x)
|
||
|
return self
|
||
|
|
||
|
def __sub__(self, other):
|
||
|
# Set difference
|
||
|
newaddr = AddressList(None)
|
||
|
for x in self.addresslist:
|
||
|
if not x in other.addresslist:
|
||
|
newaddr.addresslist.append(x)
|
||
|
return newaddr
|
||
|
|
||
|
def __isub__(self, other):
|
||
|
# Set difference, in-place
|
||
|
for x in other.addresslist:
|
||
|
if x in self.addresslist:
|
||
|
self.addresslist.remove(x)
|
||
|
return self
|
||
|
|
||
|
def __getitem__(self, index):
|
||
|
# Make indexing, slices, and 'in' work
|
||
|
return self.addresslist[index]
|