Kill a couple of "<>"
[python.git] / Tools / scripts / byext.py
blob09610b00b9574cd7bbc5751b8134c4cbe9145971
1 #! /usr/bin/env python
3 """Show file statistics by extension."""
5 import os
6 import sys
8 class Stats:
10 def __init__(self):
11 self.stats = {}
13 def statargs(self, args):
14 for arg in args:
15 if os.path.isdir(arg):
16 self.statdir(arg)
17 elif os.path.isfile(arg):
18 self.statfile(arg)
19 else:
20 sys.stderr.write("Can't find %s\n" % arg)
21 self.addstats("<???>", "unknown", 1)
23 def statdir(self, dir):
24 self.addstats("<dir>", "dirs", 1)
25 try:
26 names = os.listdir(dir)
27 except os.error, err:
28 sys.stderr.write("Can't list %s: %s\n" % (dir, err))
29 self.addstats("<dir>", "unlistable", 1)
30 return
31 names.sort()
32 for name in names:
33 if name.startswith(".#"):
34 continue # Skip CVS temp files
35 if name.endswith("~"):
36 continue# Skip Emacs backup files
37 full = os.path.join(dir, name)
38 if os.path.islink(full):
39 self.addstats("<lnk>", "links", 1)
40 elif os.path.isdir(full):
41 self.statdir(full)
42 else:
43 self.statfile(full)
45 def statfile(self, filename):
46 head, ext = os.path.splitext(filename)
47 head, base = os.path.split(filename)
48 if ext == base:
49 ext = "" # E.g. .cvsignore is deemed not to have an extension
50 ext = os.path.normcase(ext)
51 if not ext:
52 ext = "<none>"
53 self.addstats(ext, "files", 1)
54 try:
55 f = open(filename, "rb")
56 except IOError, err:
57 sys.stderr.write("Can't open %s: %s\n" % (filename, err))
58 self.addstats(ext, "unopenable", 1)
59 return
60 data = f.read()
61 f.close()
62 self.addstats(ext, "bytes", len(data))
63 if '\0' in data:
64 self.addstats(ext, "binary", 1)
65 return
66 if not data:
67 self.addstats(ext, "empty", 1)
68 #self.addstats(ext, "chars", len(data))
69 lines = data.splitlines()
70 self.addstats(ext, "lines", len(lines))
71 del lines
72 words = data.split()
73 self.addstats(ext, "words", len(words))
75 def addstats(self, ext, key, n):
76 d = self.stats.setdefault(ext, {})
77 d[key] = d.get(key, 0) + n
79 def report(self):
80 exts = self.stats.keys()
81 exts.sort()
82 # Get the column keys
83 columns = {}
84 for ext in exts:
85 columns.update(self.stats[ext])
86 cols = columns.keys()
87 cols.sort()
88 colwidth = {}
89 colwidth["ext"] = max([len(ext) for ext in exts])
90 minwidth = 6
91 self.stats["TOTAL"] = {}
92 for col in cols:
93 total = 0
94 cw = max(minwidth, len(col))
95 for ext in exts:
96 value = self.stats[ext].get(col)
97 if value is None:
98 w = 0
99 else:
100 w = len("%d" % value)
101 total += value
102 cw = max(cw, w)
103 cw = max(cw, len(str(total)))
104 colwidth[col] = cw
105 self.stats["TOTAL"][col] = total
106 exts.append("TOTAL")
107 for ext in exts:
108 self.stats[ext]["ext"] = ext
109 cols.insert(0, "ext")
110 def printheader():
111 for col in cols:
112 print "%*s" % (colwidth[col], col),
113 print
114 printheader()
115 for ext in exts:
116 for col in cols:
117 value = self.stats[ext].get(col, "")
118 print "%*s" % (colwidth[col], value),
119 print
120 printheader() # Another header at the bottom
122 def main():
123 args = sys.argv[1:]
124 if not args:
125 args = [os.curdir]
126 s = Stats()
127 s.statargs(args)
128 s.report()
130 if __name__ == "__main__":
131 main()