1996-11-27 15:52:01 -04:00
|
|
|
#! /usr/bin/env python
|
1994-01-07 06:53:41 -04:00
|
|
|
|
|
|
|
# Extract statistics from ftp daemon log.
|
|
|
|
|
|
|
|
# Usage:
|
|
|
|
# ftpstats [-m maxitems] [-s search] [file]
|
|
|
|
# -m maxitems: restrict number of items in "top-N" lists, default 25.
|
|
|
|
# -s string: restrict statistics to lines containing this string.
|
2004-07-17 11:44:17 -03:00
|
|
|
# Default file is /usr/adm/ftpd; a "-" means read standard input.
|
1994-01-07 06:53:41 -04:00
|
|
|
|
|
|
|
# The script must be run on the host where the ftp daemon runs.
|
|
|
|
# (At CWI this is currently buizerd.)
|
|
|
|
|
|
|
|
import os
|
|
|
|
import sys
|
2006-03-16 02:50:13 -04:00
|
|
|
import re
|
1994-01-07 06:53:41 -04:00
|
|
|
import string
|
|
|
|
import getopt
|
|
|
|
|
2006-03-16 02:50:13 -04:00
|
|
|
pat = '^([a-zA-Z0-9 :]*)!(.*)!(.*)!([<>].*)!([0-9]+)!([0-9]+)$'
|
|
|
|
prog = re.compile(pat)
|
1994-01-07 06:53:41 -04:00
|
|
|
|
|
|
|
def main():
|
2004-07-18 02:56:09 -03:00
|
|
|
maxitems = 25
|
|
|
|
search = None
|
|
|
|
try:
|
|
|
|
opts, args = getopt.getopt(sys.argv[1:], 'm:s:')
|
|
|
|
except getopt.error, msg:
|
|
|
|
print msg
|
|
|
|
print 'usage: ftpstats [-m maxitems] [file]'
|
|
|
|
sys.exit(2)
|
|
|
|
for o, a in opts:
|
|
|
|
if o == '-m':
|
|
|
|
maxitems = string.atoi(a)
|
|
|
|
if o == '-s':
|
|
|
|
search = a
|
|
|
|
file = '/usr/adm/ftpd'
|
|
|
|
if args: file = args[0]
|
|
|
|
if file == '-':
|
|
|
|
f = sys.stdin
|
|
|
|
else:
|
|
|
|
try:
|
|
|
|
f = open(file, 'r')
|
|
|
|
except IOError, msg:
|
|
|
|
print file, ':', msg
|
|
|
|
sys.exit(1)
|
|
|
|
bydate = {}
|
|
|
|
bytime = {}
|
|
|
|
byfile = {}
|
|
|
|
bydir = {}
|
|
|
|
byhost = {}
|
|
|
|
byuser = {}
|
|
|
|
bytype = {}
|
|
|
|
lineno = 0
|
|
|
|
try:
|
|
|
|
while 1:
|
|
|
|
line = f.readline()
|
|
|
|
if not line: break
|
|
|
|
lineno = lineno + 1
|
|
|
|
if search and string.find(line, search) < 0:
|
|
|
|
continue
|
|
|
|
if prog.match(line) < 0:
|
|
|
|
print 'Bad line', lineno, ':', repr(line)
|
|
|
|
continue
|
|
|
|
items = prog.group(1, 2, 3, 4, 5, 6)
|
|
|
|
(logtime, loguser, loghost, logfile, logbytes,
|
|
|
|
logxxx2) = items
|
|
|
|
## print logtime
|
|
|
|
## print '-->', loguser
|
|
|
|
## print '--> -->', loghost
|
|
|
|
## print '--> --> -->', logfile
|
|
|
|
## print '--> --> --> -->', logbytes
|
|
|
|
## print '--> --> --> --> -->', logxxx2
|
|
|
|
## for i in logtime, loghost, logbytes, logxxx2:
|
|
|
|
## if '!' in i: print '???', i
|
|
|
|
add(bydate, logtime[-4:] + ' ' + logtime[:6], items)
|
|
|
|
add(bytime, logtime[7:9] + ':00-59', items)
|
|
|
|
direction, logfile = logfile[0], logfile[1:]
|
|
|
|
# The real path probably starts at the last //...
|
|
|
|
while 1:
|
|
|
|
i = string.find(logfile, '//')
|
|
|
|
if i < 0: break
|
|
|
|
logfile = logfile[i+1:]
|
|
|
|
add(byfile, logfile + ' ' + direction, items)
|
|
|
|
logdir = os.path.dirname(logfile)
|
|
|
|
## logdir = os.path.normpath(logdir) + '/.'
|
|
|
|
while 1:
|
|
|
|
add(bydir, logdir + ' ' + direction, items)
|
|
|
|
dirhead = os.path.dirname(logdir)
|
|
|
|
if dirhead == logdir: break
|
|
|
|
logdir = dirhead
|
|
|
|
add(byhost, loghost, items)
|
|
|
|
add(byuser, loguser, items)
|
|
|
|
add(bytype, direction, items)
|
|
|
|
except KeyboardInterrupt:
|
|
|
|
print 'Interrupted at line', lineno
|
|
|
|
show(bytype, 'by transfer direction', maxitems)
|
|
|
|
show(bydir, 'by directory', maxitems)
|
|
|
|
show(byfile, 'by file', maxitems)
|
|
|
|
show(byhost, 'by host', maxitems)
|
|
|
|
show(byuser, 'by user', maxitems)
|
|
|
|
showbar(bydate, 'by date')
|
|
|
|
showbar(bytime, 'by time of day')
|
1994-01-07 06:53:41 -04:00
|
|
|
|
|
|
|
def showbar(dict, title):
|
2004-07-18 02:56:09 -03:00
|
|
|
n = len(title)
|
2008-09-12 22:43:28 -03:00
|
|
|
print '='*((70-n)//2), title, '='*((71-n)//2)
|
2004-07-18 02:56:09 -03:00
|
|
|
list = []
|
|
|
|
keys = dict.keys()
|
|
|
|
keys.sort()
|
|
|
|
for key in keys:
|
|
|
|
n = len(str(key))
|
|
|
|
list.append((len(dict[key]), key))
|
|
|
|
maxkeylength = 0
|
|
|
|
maxcount = 0
|
|
|
|
for count, key in list:
|
|
|
|
maxkeylength = max(maxkeylength, len(key))
|
|
|
|
maxcount = max(maxcount, count)
|
|
|
|
maxbarlength = 72 - maxkeylength - 7
|
|
|
|
for count, key in list:
|
|
|
|
barlength = int(round(maxbarlength*float(count)/maxcount))
|
|
|
|
bar = '*'*barlength
|
|
|
|
print '%5d %-*s %s' % (count, maxkeylength, key, bar)
|
1994-01-07 06:53:41 -04:00
|
|
|
|
|
|
|
def show(dict, title, maxitems):
|
2004-07-18 02:56:09 -03:00
|
|
|
if len(dict) > maxitems:
|
|
|
|
title = title + ' (first %d)'%maxitems
|
|
|
|
n = len(title)
|
2008-09-12 22:43:28 -03:00
|
|
|
print '='*((70-n)//2), title, '='*((71-n)//2)
|
2004-07-18 02:56:09 -03:00
|
|
|
list = []
|
|
|
|
keys = dict.keys()
|
|
|
|
for key in keys:
|
|
|
|
list.append((-len(dict[key]), key))
|
|
|
|
list.sort()
|
|
|
|
for count, key in list[:maxitems]:
|
|
|
|
print '%5d %s' % (-count, key)
|
1994-01-07 06:53:41 -04:00
|
|
|
|
|
|
|
def add(dict, key, item):
|
2004-07-18 02:56:09 -03:00
|
|
|
if dict.has_key(key):
|
|
|
|
dict[key].append(item)
|
|
|
|
else:
|
|
|
|
dict[key] = [item]
|
1994-01-07 06:53:41 -04:00
|
|
|
|
2004-09-11 13:34:35 -03:00
|
|
|
if __name__ == "__main__":
|
|
|
|
main()
|