mirror of
https://github.com/python/cpython.git
synced 2025-01-14 20:45:34 +08:00
44b548dda8
And most of the tools. Patch by Emanual Barry, reviewed by me, Serhiy Storchaka, and Martin Panter.
247 lines
7.9 KiB
Python
Executable File
247 lines
7.9 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""Classes to parse mailer-daemon messages."""
|
|
|
|
import calendar
|
|
import email.message
|
|
import re
|
|
import os
|
|
import sys
|
|
|
|
|
|
class Unparseable(Exception):
|
|
pass
|
|
|
|
|
|
class ErrorMessage(email.message.Message):
|
|
def __init__(self):
|
|
email.message.Message.__init__(self)
|
|
self.sub = ''
|
|
|
|
def is_warning(self):
|
|
sub = self.get('Subject')
|
|
if not sub:
|
|
return 0
|
|
sub = sub.lower()
|
|
if sub.startswith('waiting mail'):
|
|
return 1
|
|
if 'warning' in sub:
|
|
return 1
|
|
self.sub = sub
|
|
return 0
|
|
|
|
def get_errors(self):
|
|
for p in EMPARSERS:
|
|
self.rewindbody()
|
|
try:
|
|
return p(self.fp, self.sub)
|
|
except Unparseable:
|
|
pass
|
|
raise Unparseable
|
|
|
|
# List of re's or tuples of re's.
|
|
# If a re, it should contain at least a group (?P<email>...) which
|
|
# should refer to the email address. The re can also contain a group
|
|
# (?P<reason>...) which should refer to the reason (error message).
|
|
# If no reason is present, the emparse_list_reason list is used to
|
|
# find a reason.
|
|
# If a tuple, the tuple should contain 2 re's. The first re finds a
|
|
# location, the second re is repeated one or more times to find
|
|
# multiple email addresses. The second re is matched (not searched)
|
|
# where the previous match ended.
|
|
# The re's are compiled using the re module.
|
|
emparse_list_list = [
|
|
'error: (?P<reason>unresolvable): (?P<email>.+)',
|
|
('----- The following addresses had permanent fatal errors -----\n',
|
|
'(?P<email>[^ \n].*)\n( .*\n)?'),
|
|
'remote execution.*\n.*rmail (?P<email>.+)',
|
|
('The following recipients did not receive your message:\n\n',
|
|
' +(?P<email>.*)\n(The following recipients did not receive your message:\n\n)?'),
|
|
'------- Failure Reasons --------\n\n(?P<reason>.*)\n(?P<email>.*)',
|
|
'^<(?P<email>.*)>:\n(?P<reason>.*)',
|
|
'^(?P<reason>User mailbox exceeds allowed size): (?P<email>.+)',
|
|
'^5\\d{2} <(?P<email>[^\n>]+)>\\.\\.\\. (?P<reason>.+)',
|
|
'^Original-Recipient: rfc822;(?P<email>.*)',
|
|
'^did not reach the following recipient\\(s\\):\n\n(?P<email>.*) on .*\n +(?P<reason>.*)',
|
|
'^ <(?P<email>[^\n>]+)> \\.\\.\\. (?P<reason>.*)',
|
|
'^Report on your message to: (?P<email>.*)\nReason: (?P<reason>.*)',
|
|
'^Your message was not delivered to +(?P<email>.*)\n +for the following reason:\n +(?P<reason>.*)',
|
|
'^ was not +(?P<email>[^ \n].*?) *\n.*\n.*\n.*\n because:.*\n +(?P<reason>[^ \n].*?) *\n',
|
|
]
|
|
# compile the re's in the list and store them in-place.
|
|
for i in range(len(emparse_list_list)):
|
|
x = emparse_list_list[i]
|
|
if type(x) is type(''):
|
|
x = re.compile(x, re.MULTILINE)
|
|
else:
|
|
xl = []
|
|
for x in x:
|
|
xl.append(re.compile(x, re.MULTILINE))
|
|
x = tuple(xl)
|
|
del xl
|
|
emparse_list_list[i] = x
|
|
del x
|
|
del i
|
|
|
|
# list of re's used to find reasons (error messages).
|
|
# if a string, "<>" is replaced by a copy of the email address.
|
|
# The expressions are searched for in order. After the first match,
|
|
# no more expressions are searched for. So, order is important.
|
|
emparse_list_reason = [
|
|
r'^5\d{2} <>\.\.\. (?P<reason>.*)',
|
|
r'<>\.\.\. (?P<reason>.*)',
|
|
re.compile(r'^<<< 5\d{2} (?P<reason>.*)', re.MULTILINE),
|
|
re.compile('===== stderr was =====\nrmail: (?P<reason>.*)'),
|
|
re.compile('^Diagnostic-Code: (?P<reason>.*)', re.MULTILINE),
|
|
]
|
|
emparse_list_from = re.compile('^From:', re.IGNORECASE|re.MULTILINE)
|
|
def emparse_list(fp, sub):
|
|
data = fp.read()
|
|
res = emparse_list_from.search(data)
|
|
if res is None:
|
|
from_index = len(data)
|
|
else:
|
|
from_index = res.start(0)
|
|
errors = []
|
|
emails = []
|
|
reason = None
|
|
for regexp in emparse_list_list:
|
|
if type(regexp) is type(()):
|
|
res = regexp[0].search(data, 0, from_index)
|
|
if res is not None:
|
|
try:
|
|
reason = res.group('reason')
|
|
except IndexError:
|
|
pass
|
|
while 1:
|
|
res = regexp[1].match(data, res.end(0), from_index)
|
|
if res is None:
|
|
break
|
|
emails.append(res.group('email'))
|
|
break
|
|
else:
|
|
res = regexp.search(data, 0, from_index)
|
|
if res is not None:
|
|
emails.append(res.group('email'))
|
|
try:
|
|
reason = res.group('reason')
|
|
except IndexError:
|
|
pass
|
|
break
|
|
if not emails:
|
|
raise Unparseable
|
|
if not reason:
|
|
reason = sub
|
|
if reason[:15] == 'returned mail: ':
|
|
reason = reason[15:]
|
|
for regexp in emparse_list_reason:
|
|
if type(regexp) is type(''):
|
|
for i in range(len(emails)-1,-1,-1):
|
|
email = emails[i]
|
|
exp = re.compile(re.escape(email).join(regexp.split('<>')), re.MULTILINE)
|
|
res = exp.search(data)
|
|
if res is not None:
|
|
errors.append(' '.join((email.strip()+': '+res.group('reason')).split()))
|
|
del emails[i]
|
|
continue
|
|
res = regexp.search(data)
|
|
if res is not None:
|
|
reason = res.group('reason')
|
|
break
|
|
for email in emails:
|
|
errors.append(' '.join((email.strip()+': '+reason).split()))
|
|
return errors
|
|
|
|
EMPARSERS = [emparse_list]
|
|
|
|
def sort_numeric(a, b):
|
|
a = int(a)
|
|
b = int(b)
|
|
if a < b:
|
|
return -1
|
|
elif a > b:
|
|
return 1
|
|
else:
|
|
return 0
|
|
|
|
def parsedir(dir, modify):
|
|
os.chdir(dir)
|
|
pat = re.compile('^[0-9]*$')
|
|
errordict = {}
|
|
errorfirst = {}
|
|
errorlast = {}
|
|
nok = nwarn = nbad = 0
|
|
|
|
# find all numeric file names and sort them
|
|
files = list(filter(lambda fn, pat=pat: pat.match(fn) is not None, os.listdir('.')))
|
|
files.sort(sort_numeric)
|
|
|
|
for fn in files:
|
|
# Lets try to parse the file.
|
|
fp = open(fn)
|
|
m = email.message_from_file(fp, _class=ErrorMessage)
|
|
sender = m.getaddr('From')
|
|
print('%s\t%-40s\t'%(fn, sender[1]), end=' ')
|
|
|
|
if m.is_warning():
|
|
fp.close()
|
|
print('warning only')
|
|
nwarn = nwarn + 1
|
|
if modify:
|
|
os.rename(fn, ','+fn)
|
|
## os.unlink(fn)
|
|
continue
|
|
|
|
try:
|
|
errors = m.get_errors()
|
|
except Unparseable:
|
|
print('** Not parseable')
|
|
nbad = nbad + 1
|
|
fp.close()
|
|
continue
|
|
print(len(errors), 'errors')
|
|
|
|
# Remember them
|
|
for e in errors:
|
|
try:
|
|
mm, dd = m.getdate('date')[1:1+2]
|
|
date = '%s %02d' % (calendar.month_abbr[mm], dd)
|
|
except:
|
|
date = '??????'
|
|
if e not in errordict:
|
|
errordict[e] = 1
|
|
errorfirst[e] = '%s (%s)' % (fn, date)
|
|
else:
|
|
errordict[e] = errordict[e] + 1
|
|
errorlast[e] = '%s (%s)' % (fn, date)
|
|
|
|
fp.close()
|
|
nok = nok + 1
|
|
if modify:
|
|
os.rename(fn, ','+fn)
|
|
## os.unlink(fn)
|
|
|
|
print('--------------')
|
|
print(nok, 'files parsed,',nwarn,'files warning-only,', end=' ')
|
|
print(nbad,'files unparseable')
|
|
print('--------------')
|
|
list = []
|
|
for e in errordict.keys():
|
|
list.append((errordict[e], errorfirst[e], errorlast[e], e))
|
|
list.sort()
|
|
for num, first, last, e in list:
|
|
print('%d %s - %s\t%s' % (num, first, last, e))
|
|
|
|
def main():
|
|
modify = 0
|
|
if len(sys.argv) > 1 and sys.argv[1] == '-d':
|
|
modify = 1
|
|
del sys.argv[1]
|
|
if len(sys.argv) > 1:
|
|
for folder in sys.argv[1:]:
|
|
parsedir(folder, modify)
|
|
else:
|
|
parsedir('/ufs/jack/Mail/errorsinbox', modify)
|
|
|
|
if __name__ == '__main__' or sys.argv[0] == __name__:
|
|
main()
|