#! /usr/bin/env python """Show file statistics by extension.""" import os import sys class Stats: def __init__(self): self.stats = {} def statargs(self, args): for arg in args: if os.path.isdir(arg): self.statdir(arg) elif os.path.isfile(arg): self.statfile(arg) else: sys.stderr.write("Can't find %s\n" % file) self.addstats("", "unknown", 1) def statdir(self, dir): self.addstats("", "dirs", 1) try: names = os.listdir(dir) except os.error, err: sys.stderr.write("Can't list %s: %s\n" % (file, err)) self.addstats(ext, "unlistable", 1) return names.sort() for name in names: full = os.path.join(dir, name) if os.path.islink(full): self.addstats("", "links", 1) elif os.path.isdir(full): self.statdir(full) else: self.statfile(full) def statfile(self, file): head, ext = os.path.splitext(file) head, base = os.path.split(file) if ext == base: ext = "" # .cvsignore is deemed not to have an extension self.addstats(ext, "files", 1) try: f = open(file, "rb") except IOError, err: sys.stderr.write("Can't open %s: %s\n" % (file, err)) self.addstats(ext, "unopenable", 1) return data = f.read() f.close() self.addstats(ext, "bytes", len(data)) if '\0' in data: self.addstats(ext, "binary", 1) return if not data: self.addstats(ext, "empty", 1) #self.addstats(ext, "chars", len(data)) lines = data.splitlines() self.addstats(ext, "lines", len(lines)) del lines words = data.split() self.addstats(ext, "words", len(words)) def addstats(self, ext, key, n): d = self.stats.setdefault(ext, {}) d[key] = d.get(key, 0) + n def report(self): totals = {} exts = self.stats.keys() exts.sort() # Get the column keys columns = {} for ext in exts: columns.update(self.stats[ext]) cols = columns.keys() cols.sort() minwidth = 7 extwidth = max([len(ext) for ext in exts]) print "%*s" % (extwidth, "ext"), for col in cols: width = max(len(col), minwidth) print "%*s" % (width, col), print for ext in exts: print "%*s" % (extwidth, ext), for col in cols: width = max(len(col), minwidth) value = self.stats[ext].get(col) if value is None: s = "" else: s = "%d" % value totals[col] = totals.get(col, 0) + value print "%*s" % (width, s), print print "%*s" % (extwidth, "TOTAL"), for col in cols: width = max(len(col), minwidth) print "%*s" % (width, totals[col]), print def main(): args = sys.argv[1:] if not args: args = [os.curdir] s = Stats() s.statargs(args) s.report() if __name__ == "__main__": main()