cpython/Tools/scripts/byext.py

133 lines
3.8 KiB
Python
Raw Permalink Normal View History

#! /usr/bin/env python3
"""Show file statistics by extension."""
import os
import sys
2010-08-09 09:24:20 -03:00
class Stats:
def __init__(self):
self.stats = {}
def statargs(self, args):
for arg in args:
if os.path.isdir(arg):
self.statdir(arg)
elif os.path.isfile(arg):
self.statfile(arg)
else:
sys.stderr.write("Can't find %s\n" % arg)
self.addstats("<???>", "unknown", 1)
def statdir(self, dir):
self.addstats("<dir>", "dirs", 1)
try:
names = os.listdir(dir)
except OSError as err:
sys.stderr.write("Can't list %s: %s\n" % (dir, err))
self.addstats("<dir>", "unlistable", 1)
return
2010-08-09 09:24:20 -03:00
for name in sorted(names):
if name.startswith(".#"):
2010-08-09 09:24:20 -03:00
continue # Skip CVS temp files
if name.endswith("~"):
2010-08-09 09:24:20 -03:00
continue # Skip Emacs backup files
full = os.path.join(dir, name)
if os.path.islink(full):
self.addstats("<lnk>", "links", 1)
elif os.path.isdir(full):
self.statdir(full)
else:
self.statfile(full)
def statfile(self, filename):
head, ext = os.path.splitext(filename)
head, base = os.path.split(filename)
if ext == base:
2010-08-09 09:24:20 -03:00
ext = "" # E.g. .cvsignore is deemed not to have an extension
ext = os.path.normcase(ext)
if not ext:
ext = "<none>"
self.addstats(ext, "files", 1)
try:
2010-08-09 09:24:20 -03:00
with open(filename, "rb") as f:
data = f.read()
except IOError as err:
sys.stderr.write("Can't open %s: %s\n" % (filename, err))
self.addstats(ext, "unopenable", 1)
return
self.addstats(ext, "bytes", len(data))
2007-09-27 19:39:12 -03:00
if b'\0' in data:
self.addstats(ext, "binary", 1)
return
if not data:
self.addstats(ext, "empty", 1)
2010-08-09 09:24:20 -03:00
# self.addstats(ext, "chars", len(data))
2007-09-27 19:39:12 -03:00
lines = str(data, "latin-1").splitlines()
self.addstats(ext, "lines", len(lines))
del lines
words = data.split()
self.addstats(ext, "words", len(words))
def addstats(self, ext, key, n):
d = self.stats.setdefault(ext, {})
d[key] = d.get(key, 0) + n
def report(self):
2007-09-27 19:39:12 -03:00
exts = sorted(self.stats)
# Get the column keys
columns = {}
for ext in exts:
columns.update(self.stats[ext])
2007-09-27 19:39:12 -03:00
cols = sorted(columns)
colwidth = {}
colwidth["ext"] = max(map(len, exts))
minwidth = 6
self.stats["TOTAL"] = {}
for col in cols:
total = 0
cw = max(minwidth, len(col))
for ext in exts:
value = self.stats[ext].get(col)
if value is None:
w = 0
else:
w = len("%d" % value)
total += value
cw = max(cw, w)
cw = max(cw, len(str(total)))
colwidth[col] = cw
self.stats["TOTAL"][col] = total
exts.append("TOTAL")
for ext in exts:
self.stats[ext]["ext"] = ext
cols.insert(0, "ext")
2010-08-09 09:24:20 -03:00
def printheader():
for col in cols:
print("%*s" % (colwidth[col], col), end=' ')
print()
2010-08-09 09:24:20 -03:00
printheader()
for ext in exts:
for col in cols:
value = self.stats[ext].get(col, "")
print("%*s" % (colwidth[col], value), end=' ')
print()
2010-08-09 09:24:20 -03:00
printheader() # Another header at the bottom
def main():
args = sys.argv[1:]
if not args:
args = [os.curdir]
s = Stats()
s.statargs(args)
s.report()
2010-08-09 09:24:20 -03:00
if __name__ == "__main__":
main()