#! /usr/bin/env python # Extract statistics from ftp daemon log. # Usage: # ftpstats [-m maxitems] [-s search] [file] # -m maxitems: restrict number of items in "top-N" lists, default 25. # -s string: restrict statistics to lines containing this string. # Default file is /usr/adm/ftpd; a "-" means read standard input. # The script must be run on the host where the ftp daemon runs. # (At CWI this is currently buizerd.) import os import sys import regex import string import getopt pat = '^\([a-zA-Z0-9 :]*\)!\(.*\)!\(.*\)!\([<>].*\)!\([0-9]+\)!\([0-9]+\)$' prog = regex.compile(pat) def main(): maxitems = 25 search = None try: opts, args = getopt.getopt(sys.argv[1:], 'm:s:') except getopt.error, msg: print msg print 'usage: ftpstats [-m maxitems] [file]' sys.exit(2) for o, a in opts: if o == '-m': maxitems = string.atoi(a) if o == '-s': search = a file = '/usr/adm/ftpd' if args: file = args[0] if file == '-': f = sys.stdin else: try: f = open(file, 'r') except IOError, msg: print file, ':', msg sys.exit(1) bydate = {} bytime = {} byfile = {} bydir = {} byhost = {} byuser = {} bytype = {} lineno = 0 try: while 1: line = f.readline() if not line: break lineno = lineno + 1 if search and string.find(line, search) < 0: continue if prog.match(line) < 0: print 'Bad line', lineno, ':', repr(line) continue items = prog.group(1, 2, 3, 4, 5, 6) (logtime, loguser, loghost, logfile, logbytes, logxxx2) = items ## print logtime ## print '-->', loguser ## print '--> -->', loghost ## print '--> --> -->', logfile ## print '--> --> --> -->', logbytes ## print '--> --> --> --> -->', logxxx2 ## for i in logtime, loghost, logbytes, logxxx2: ## if '!' in i: print '???', i add(bydate, logtime[-4:] + ' ' + logtime[:6], items) add(bytime, logtime[7:9] + ':00-59', items) direction, logfile = logfile[0], logfile[1:] # The real path probably starts at the last //... while 1: i = string.find(logfile, '//') if i < 0: break logfile = logfile[i+1:] add(byfile, logfile + ' ' + direction, items) logdir = os.path.dirname(logfile) ## logdir = os.path.normpath(logdir) + '/.' while 1: add(bydir, logdir + ' ' + direction, items) dirhead = os.path.dirname(logdir) if dirhead == logdir: break logdir = dirhead add(byhost, loghost, items) add(byuser, loguser, items) add(bytype, direction, items) except KeyboardInterrupt: print 'Interrupted at line', lineno show(bytype, 'by transfer direction', maxitems) show(bydir, 'by directory', maxitems) show(byfile, 'by file', maxitems) show(byhost, 'by host', maxitems) show(byuser, 'by user', maxitems) showbar(bydate, 'by date') showbar(bytime, 'by time of day') def showbar(dict, title): n = len(title) print '='*((70-n)/2), title, '='*((71-n)/2) list = [] keys = dict.keys() keys.sort() for key in keys: n = len(str(key)) list.append((len(dict[key]), key)) maxkeylength = 0 maxcount = 0 for count, key in list: maxkeylength = max(maxkeylength, len(key)) maxcount = max(maxcount, count) maxbarlength = 72 - maxkeylength - 7 for count, key in list: barlength = int(round(maxbarlength*float(count)/maxcount)) bar = '*'*barlength print '%5d %-*s %s' % (count, maxkeylength, key, bar) def show(dict, title, maxitems): if len(dict) > maxitems: title = title + ' (first %d)'%maxitems n = len(title) print '='*((70-n)/2), title, '='*((71-n)/2) list = [] keys = dict.keys() for key in keys: list.append((-len(dict[key]), key)) list.sort() for count, key in list[:maxitems]: print '%5d %s' % (-count, key) def add(dict, key, item): if dict.has_key(key): dict[key].append(item) else: dict[key] = [item] main()