2011-07-26 13:01:08 -03:00
|
|
|
#!/usr/bin/env python3
|
2011-07-26 12:36:19 -03:00
|
|
|
"""Classes to parse mailer-daemon messages."""
|
1995-10-19 06:35:31 -03:00
|
|
|
|
1996-07-20 23:50:30 -03:00
|
|
|
import calendar
|
2008-06-12 19:23:59 -03:00
|
|
|
import email.message
|
1998-10-09 10:27:49 -03:00
|
|
|
import re
|
1995-10-19 06:35:31 -03:00
|
|
|
import os
|
|
|
|
import sys
|
|
|
|
|
2011-07-26 12:36:19 -03:00
|
|
|
|
|
|
|
class Unparseable(Exception):
|
|
|
|
pass
|
|
|
|
|
1995-10-19 06:35:31 -03:00
|
|
|
|
2008-06-12 19:23:59 -03:00
|
|
|
class ErrorMessage(email.message.Message):
|
|
|
|
def __init__(self):
|
|
|
|
email.message.Message.__init__(self)
|
1998-10-09 10:27:49 -03:00
|
|
|
self.sub = ''
|
1995-10-19 06:35:31 -03:00
|
|
|
|
|
|
|
def is_warning(self):
|
2008-06-12 01:06:45 -03:00
|
|
|
sub = self.get('Subject')
|
1998-03-24 01:30:29 -04:00
|
|
|
if not sub:
|
|
|
|
return 0
|
2002-09-11 17:36:02 -03:00
|
|
|
sub = sub.lower()
|
2011-07-26 12:36:19 -03:00
|
|
|
if sub.startswith('waiting mail'):
|
|
|
|
return 1
|
|
|
|
if 'warning' in sub:
|
|
|
|
return 1
|
1998-03-24 01:30:29 -04:00
|
|
|
self.sub = sub
|
|
|
|
return 0
|
1995-10-19 06:35:31 -03:00
|
|
|
|
|
|
|
def get_errors(self):
|
1998-03-24 01:30:29 -04:00
|
|
|
for p in EMPARSERS:
|
|
|
|
self.rewindbody()
|
|
|
|
try:
|
|
|
|
return p(self.fp, self.sub)
|
|
|
|
except Unparseable:
|
|
|
|
pass
|
|
|
|
raise Unparseable
|
1995-10-19 06:35:31 -03:00
|
|
|
|
1998-10-09 10:27:49 -03:00
|
|
|
# List of re's or tuples of re's.
|
|
|
|
# If a re, it should contain at least a group (?P<email>...) which
|
|
|
|
# should refer to the email address. The re can also contain a group
|
|
|
|
# (?P<reason>...) which should refer to the reason (error message).
|
|
|
|
# If no reason is present, the emparse_list_reason list is used to
|
|
|
|
# find a reason.
|
|
|
|
# If a tuple, the tuple should contain 2 re's. The first re finds a
|
|
|
|
# location, the second re is repeated one or more times to find
|
|
|
|
# multiple email addresses. The second re is matched (not searched)
|
|
|
|
# where the previous match ended.
|
|
|
|
# The re's are compiled using the re module.
|
|
|
|
emparse_list_list = [
|
|
|
|
'error: (?P<reason>unresolvable): (?P<email>.+)',
|
|
|
|
('----- The following addresses had permanent fatal errors -----\n',
|
|
|
|
'(?P<email>[^ \n].*)\n( .*\n)?'),
|
|
|
|
'remote execution.*\n.*rmail (?P<email>.+)',
|
|
|
|
('The following recipients did not receive your message:\n\n',
|
|
|
|
' +(?P<email>.*)\n(The following recipients did not receive your message:\n\n)?'),
|
|
|
|
'------- Failure Reasons --------\n\n(?P<reason>.*)\n(?P<email>.*)',
|
|
|
|
'^<(?P<email>.*)>:\n(?P<reason>.*)',
|
|
|
|
'^(?P<reason>User mailbox exceeds allowed size): (?P<email>.+)',
|
|
|
|
'^5\\d{2} <(?P<email>[^\n>]+)>\\.\\.\\. (?P<reason>.+)',
|
|
|
|
'^Original-Recipient: rfc822;(?P<email>.*)',
|
|
|
|
'^did not reach the following recipient\\(s\\):\n\n(?P<email>.*) on .*\n +(?P<reason>.*)',
|
|
|
|
'^ <(?P<email>[^\n>]+)> \\.\\.\\. (?P<reason>.*)',
|
|
|
|
'^Report on your message to: (?P<email>.*)\nReason: (?P<reason>.*)',
|
|
|
|
'^Your message was not delivered to +(?P<email>.*)\n +for the following reason:\n +(?P<reason>.*)',
|
|
|
|
'^ was not +(?P<email>[^ \n].*?) *\n.*\n.*\n.*\n because:.*\n +(?P<reason>[^ \n].*?) *\n',
|
|
|
|
]
|
|
|
|
# compile the re's in the list and store them in-place.
|
|
|
|
for i in range(len(emparse_list_list)):
|
|
|
|
x = emparse_list_list[i]
|
|
|
|
if type(x) is type(''):
|
|
|
|
x = re.compile(x, re.MULTILINE)
|
|
|
|
else:
|
|
|
|
xl = []
|
|
|
|
for x in x:
|
|
|
|
xl.append(re.compile(x, re.MULTILINE))
|
|
|
|
x = tuple(xl)
|
|
|
|
del xl
|
|
|
|
emparse_list_list[i] = x
|
|
|
|
del x
|
|
|
|
del i
|
|
|
|
|
|
|
|
# list of re's used to find reasons (error messages).
|
|
|
|
# if a string, "<>" is replaced by a copy of the email address.
|
|
|
|
# The expressions are searched for in order. After the first match,
|
|
|
|
# no more expressions are searched for. So, order is important.
|
|
|
|
emparse_list_reason = [
|
|
|
|
r'^5\d{2} <>\.\.\. (?P<reason>.*)',
|
2016-09-08 14:59:53 -03:00
|
|
|
r'<>\.\.\. (?P<reason>.*)',
|
1998-10-09 10:27:49 -03:00
|
|
|
re.compile(r'^<<< 5\d{2} (?P<reason>.*)', re.MULTILINE),
|
|
|
|
re.compile('===== stderr was =====\nrmail: (?P<reason>.*)'),
|
|
|
|
re.compile('^Diagnostic-Code: (?P<reason>.*)', re.MULTILINE),
|
|
|
|
]
|
|
|
|
emparse_list_from = re.compile('^From:', re.IGNORECASE|re.MULTILINE)
|
|
|
|
def emparse_list(fp, sub):
|
|
|
|
data = fp.read()
|
|
|
|
res = emparse_list_from.search(data)
|
|
|
|
if res is None:
|
|
|
|
from_index = len(data)
|
|
|
|
else:
|
|
|
|
from_index = res.start(0)
|
1995-10-19 06:35:31 -03:00
|
|
|
errors = []
|
1998-10-09 10:27:49 -03:00
|
|
|
emails = []
|
|
|
|
reason = None
|
|
|
|
for regexp in emparse_list_list:
|
|
|
|
if type(regexp) is type(()):
|
|
|
|
res = regexp[0].search(data, 0, from_index)
|
|
|
|
if res is not None:
|
|
|
|
try:
|
|
|
|
reason = res.group('reason')
|
|
|
|
except IndexError:
|
|
|
|
pass
|
|
|
|
while 1:
|
|
|
|
res = regexp[1].match(data, res.end(0), from_index)
|
|
|
|
if res is None:
|
|
|
|
break
|
|
|
|
emails.append(res.group('email'))
|
1998-03-24 01:30:29 -04:00
|
|
|
break
|
|
|
|
else:
|
1998-10-09 10:27:49 -03:00
|
|
|
res = regexp.search(data, 0, from_index)
|
|
|
|
if res is not None:
|
|
|
|
emails.append(res.group('email'))
|
|
|
|
try:
|
|
|
|
reason = res.group('reason')
|
|
|
|
except IndexError:
|
|
|
|
pass
|
|
|
|
break
|
|
|
|
if not emails:
|
1998-03-24 01:30:29 -04:00
|
|
|
raise Unparseable
|
1998-10-09 10:27:49 -03:00
|
|
|
if not reason:
|
|
|
|
reason = sub
|
|
|
|
if reason[:15] == 'returned mail: ':
|
|
|
|
reason = reason[15:]
|
|
|
|
for regexp in emparse_list_reason:
|
|
|
|
if type(regexp) is type(''):
|
|
|
|
for i in range(len(emails)-1,-1,-1):
|
|
|
|
email = emails[i]
|
2002-09-11 17:36:02 -03:00
|
|
|
exp = re.compile(re.escape(email).join(regexp.split('<>')), re.MULTILINE)
|
1998-10-09 10:27:49 -03:00
|
|
|
res = exp.search(data)
|
|
|
|
if res is not None:
|
2002-09-11 17:36:02 -03:00
|
|
|
errors.append(' '.join((email.strip()+': '+res.group('reason')).split()))
|
1998-10-09 10:27:49 -03:00
|
|
|
del emails[i]
|
|
|
|
continue
|
|
|
|
res = regexp.search(data)
|
|
|
|
if res is not None:
|
|
|
|
reason = res.group('reason')
|
|
|
|
break
|
|
|
|
for email in emails:
|
2002-09-11 17:36:02 -03:00
|
|
|
errors.append(' '.join((email.strip()+': '+reason).split()))
|
1995-10-30 06:23:10 -04:00
|
|
|
return errors
|
1995-10-19 06:35:31 -03:00
|
|
|
|
2011-07-26 12:36:19 -03:00
|
|
|
EMPARSERS = [emparse_list]
|
1996-07-20 23:50:30 -03:00
|
|
|
|
|
|
|
def sort_numeric(a, b):
|
2002-09-11 17:36:02 -03:00
|
|
|
a = int(a)
|
|
|
|
b = int(b)
|
2011-07-26 12:36:19 -03:00
|
|
|
if a < b:
|
|
|
|
return -1
|
|
|
|
elif a > b:
|
|
|
|
return 1
|
|
|
|
else:
|
|
|
|
return 0
|
1995-10-19 06:35:31 -03:00
|
|
|
|
|
|
|
def parsedir(dir, modify):
|
|
|
|
os.chdir(dir)
|
1998-10-09 10:27:49 -03:00
|
|
|
pat = re.compile('^[0-9]*$')
|
1995-10-19 06:35:31 -03:00
|
|
|
errordict = {}
|
1996-07-20 23:50:30 -03:00
|
|
|
errorfirst = {}
|
1995-10-19 06:35:31 -03:00
|
|
|
errorlast = {}
|
|
|
|
nok = nwarn = nbad = 0
|
1996-07-20 23:50:30 -03:00
|
|
|
|
|
|
|
# find all numeric file names and sort them
|
2008-05-16 12:23:30 -03:00
|
|
|
files = list(filter(lambda fn, pat=pat: pat.match(fn) is not None, os.listdir('.')))
|
1996-07-20 23:50:30 -03:00
|
|
|
files.sort(sort_numeric)
|
2001-01-17 04:48:39 -04:00
|
|
|
|
1995-10-19 06:35:31 -03:00
|
|
|
for fn in files:
|
1998-03-24 01:30:29 -04:00
|
|
|
# Lets try to parse the file.
|
|
|
|
fp = open(fn)
|
2008-06-12 19:23:59 -03:00
|
|
|
m = email.message_from_file(fp, _class=ErrorMessage)
|
1998-03-24 01:30:29 -04:00
|
|
|
sender = m.getaddr('From')
|
2007-08-03 14:06:41 -03:00
|
|
|
print('%s\t%-40s\t'%(fn, sender[1]), end=' ')
|
1995-10-19 06:35:31 -03:00
|
|
|
|
1998-03-24 01:30:29 -04:00
|
|
|
if m.is_warning():
|
1998-10-09 10:27:49 -03:00
|
|
|
fp.close()
|
2007-08-03 14:06:41 -03:00
|
|
|
print('warning only')
|
1998-03-24 01:30:29 -04:00
|
|
|
nwarn = nwarn + 1
|
|
|
|
if modify:
|
1998-10-09 10:27:49 -03:00
|
|
|
os.rename(fn, ','+fn)
|
|
|
|
## os.unlink(fn)
|
1998-03-24 01:30:29 -04:00
|
|
|
continue
|
1996-07-20 23:50:30 -03:00
|
|
|
|
1998-03-24 01:30:29 -04:00
|
|
|
try:
|
|
|
|
errors = m.get_errors()
|
|
|
|
except Unparseable:
|
2007-08-03 14:06:41 -03:00
|
|
|
print('** Not parseable')
|
1998-03-24 01:30:29 -04:00
|
|
|
nbad = nbad + 1
|
1998-10-09 10:27:49 -03:00
|
|
|
fp.close()
|
1998-03-24 01:30:29 -04:00
|
|
|
continue
|
2007-08-03 14:06:41 -03:00
|
|
|
print(len(errors), 'errors')
|
1996-07-20 23:50:30 -03:00
|
|
|
|
1998-03-24 01:30:29 -04:00
|
|
|
# Remember them
|
|
|
|
for e in errors:
|
|
|
|
try:
|
|
|
|
mm, dd = m.getdate('date')[1:1+2]
|
|
|
|
date = '%s %02d' % (calendar.month_abbr[mm], dd)
|
|
|
|
except:
|
|
|
|
date = '??????'
|
2008-05-16 12:23:30 -03:00
|
|
|
if e not in errordict:
|
1998-03-24 01:30:29 -04:00
|
|
|
errordict[e] = 1
|
|
|
|
errorfirst[e] = '%s (%s)' % (fn, date)
|
|
|
|
else:
|
|
|
|
errordict[e] = errordict[e] + 1
|
|
|
|
errorlast[e] = '%s (%s)' % (fn, date)
|
1996-07-20 23:50:30 -03:00
|
|
|
|
1998-10-09 10:27:49 -03:00
|
|
|
fp.close()
|
1998-03-24 01:30:29 -04:00
|
|
|
nok = nok + 1
|
|
|
|
if modify:
|
1998-10-09 10:27:49 -03:00
|
|
|
os.rename(fn, ','+fn)
|
|
|
|
## os.unlink(fn)
|
1995-10-19 06:35:31 -03:00
|
|
|
|
2007-08-03 14:06:41 -03:00
|
|
|
print('--------------')
|
|
|
|
print(nok, 'files parsed,',nwarn,'files warning-only,', end=' ')
|
|
|
|
print(nbad,'files unparseable')
|
|
|
|
print('--------------')
|
1998-10-09 10:27:49 -03:00
|
|
|
list = []
|
1995-10-19 06:35:31 -03:00
|
|
|
for e in errordict.keys():
|
2000-02-29 09:00:16 -04:00
|
|
|
list.append((errordict[e], errorfirst[e], errorlast[e], e))
|
1998-10-09 10:27:49 -03:00
|
|
|
list.sort()
|
|
|
|
for num, first, last, e in list:
|
2007-08-03 14:06:41 -03:00
|
|
|
print('%d %s - %s\t%s' % (num, first, last, e))
|
1995-10-19 06:35:31 -03:00
|
|
|
|
|
|
|
def main():
|
|
|
|
modify = 0
|
|
|
|
if len(sys.argv) > 1 and sys.argv[1] == '-d':
|
1998-03-24 01:30:29 -04:00
|
|
|
modify = 1
|
|
|
|
del sys.argv[1]
|
1995-10-19 06:35:31 -03:00
|
|
|
if len(sys.argv) > 1:
|
1998-03-24 01:30:29 -04:00
|
|
|
for folder in sys.argv[1:]:
|
|
|
|
parsedir(folder, modify)
|
1995-10-19 06:35:31 -03:00
|
|
|
else:
|
1998-03-24 01:30:29 -04:00
|
|
|
parsedir('/ufs/jack/Mail/errorsinbox', modify)
|
1995-10-19 06:35:31 -03:00
|
|
|
|
|
|
|
if __name__ == '__main__' or sys.argv[0] == __name__:
|
|
|
|
main()
|