Various fixes and debug code removal.
[git-dm.git] / treeplot
blob9f7b03f088c7b47b8a13b88fb5f2221904952096
1 #!/usr/bin/python
3 # git log --pretty="%H %P" | this program
5 import sys, subprocess, argparse, pickle
6 import graphviz
7 import patterns
9 Mergepat = patterns.patterns['ExtMerge']
10 IntMerge = patterns.patterns['IntMerge']
11 IntMerge2 = patterns.patterns['IntMerge2']
12 Mergelist = { }
14 class Merge:
15 def __init__(self, id, tree = None):
16 self.id = id
17 self.commits = [ ]
18 self.merges = [ ]
19 self.tree = tree or '?'
20 self.internal = False
21 if tree is None:
22 self.getdesc()
23 Mergelist[id] = self
25 def normalize_tree(self, tree):
26 if tree[:6] == 'git://':
27 tree = tree[6:]
28 if tree.find('git.kernel.org') >= 0:
29 stree = tree.split('/')
30 return '$KORG/%s/%s' % (stree[-2], stree[-1])
31 return tree
33 def getdesc(self):
34 command = ['git', 'log', '-1', self.id]
35 p = subprocess.Popen(command, cwd = Repo, stdout = subprocess.PIPE,
36 bufsize = 1)
37 for line in p.stdout.readlines():
39 # Maybe it's a merge of an external tree.
41 m = Mergepat.search(line)
42 if m:
43 self.tree = self.normalize_tree(m.group(3))
44 self.internal = False
45 break
47 # Or maybe it's an internal merge.
49 m = IntMerge.search(line) or IntMerge2.search(line)
50 if m:
51 self.internal = True
52 break
53 p.wait()
55 def add_commit(self, id):
56 self.commits.append(id)
58 def add_merge(self, merge):
59 self.merges.append(merge)
62 # Read the list of commits from the input stream and find which
63 # merge brought in each.
65 def ingest_commits(src):
66 count = 0
67 for line in src.readlines():
68 sline = line.split()
69 commit = sline[0]
70 mc = Mergelist[find_merge(sline[0])] # Needs try
71 if len(sline) > 2: # is a merge
72 mc.add_merge(Merge(commit))
73 else:
74 mc.add_commit(commit)
75 count += 1
76 if (count % 50) == 0:
77 sys.stderr.write('\r%5d ' % (count))
78 sys.stderr.flush()
79 print
82 # Figure out which merge brought in a commit.
84 MergeIDs = { }
86 def find_merge(commit):
87 command = ['git', 'describe', '--contains', commit]
88 p = subprocess.Popen(command, cwd = Repo, stdout = subprocess.PIPE,
89 bufsize = 1)
90 desc = p.stdout.readline().decode('utf8')
91 p.wait()
93 # The description line has the form:
95 # tag~N^M~n...
97 # the portion up to the last ^ describes the merge we are after;
98 # in the absence of an ^, assume it's on the main branch.
100 uparrow = desc.rfind('^')
101 if uparrow < 0:
102 return 'mainline'
104 # OK, now get the real commit ID of the merge. Maybe we have
105 # it stashed?
107 try:
108 return MergeIDs[desc[:uparrow]]
109 except KeyError:
110 pass
112 # Nope, we have to dig it out the hard way.
114 command = ['git', 'log', '--pretty=%H', '-1', desc[:uparrow]]
115 p = subprocess.Popen(command, cwd = Repo, stdout = subprocess.PIPE,
116 bufsize = 1)
117 merge = p.stdout.readline().decode('utf8').strip()
119 # If we get back the same commit, we're looking at one of Linus's
120 # version number tags.
122 if merge == commit:
123 merge = 'mainline'
124 MergeIDs[desc[:uparrow]] = merge
125 p.wait()
126 return merge
129 # Internal merges aren't interesting from our point of view. So go through,
130 # find them all, and move any commits from such into the parent.
132 def zorch_internals(merge):
133 new_merges = [ ]
134 for m in merge.merges:
135 zorch_internals(m)
136 if m.internal:
137 merge.commits += m.commits
138 new_merges += m.merges
139 else:
140 new_merges.append(m)
141 merge.merges = new_merges
144 # Figure out how many commits flowed at each stage.
146 def count_commits(merge):
147 merge.ccount = len(merge.commits) + 1 # +1 to count the merge itself
148 for m in merge.merges:
149 merge.ccount += count_commits(m)
150 return merge.ccount
153 # ...and how many flowed between each pair of trees
155 Treecounts = { }
157 def tree_stats(merge):
158 try:
159 tcount = Treecounts[merge.tree]
160 except KeyError:
161 tcount = Treecounts[merge.tree] = { }
162 for m in merge.merges:
163 mcount = tcount.get(m.tree, 0)
164 tcount[m.tree] = mcount + m.ccount
165 tree_stats(m)
168 # Maybe we only want so many top-level trees
170 def trim_trees(limit):
171 srcs = Treecounts['mainline']
172 srcnames = srcs.keys()
173 srcnames.sort(lambda t1, t2: srcs[t2] - srcs[t1])
174 nextra = len(srcnames) - limit
175 zapped = 0
176 for extra in srcnames[limit:]:
177 zapped += srcs[extra]
178 del srcs[extra]
179 srcs['%d other trees' % (nextra)] = zapped
181 # Take our map of the commit structure and boil it down to how many commits
182 # moved from one tree to the next.
185 def dumptree(start, indent = ''):
186 int = ''
187 if start.internal:
188 int = 'I: '
189 print '%s%s%s: %d/%d %s' % (indent, int, start.id[:10],
190 len(start.merges), len(start.commits),
191 start.tree)
192 for merge in start.merges:
193 dumptree(merge, indent + ' ')
195 def dumpflow(tree, indent = '', seen = []):
196 try:
197 srcs = Treecounts[tree]
198 except KeyError:
199 return
200 srctrees = srcs.keys()
201 srctrees.sort(lambda t1, t2: srcs[t2] - srcs[t1])
202 for src in srctrees:
203 if src in seen:
204 print 'Skip', src, srcs[src], seen
205 else:
206 print '%s%4d %s' % (indent, srcs[src], src)
207 dumpflow(src, indent = indent + ' ', seen = seen + [tree])
210 # Graphviz.
212 def GV_out(file):
213 graph = graphviz.Digraph('mainline', filename = file, format = 'png')
214 graph.body.extend(['label="Patch flow into the mainline"',
215 'concentrate=true',
216 'rankdir=LR' ])
217 graph.attr('node', fontsize="20", color="red", shape='ellipse')
218 graph.node('mainline')
219 graph.attr('node', fontsize="14", color="black", shape='polygon',
220 sides='4')
221 GV_out_node(graph, 'mainline')
222 graph.view()
224 def GV_fixname(name):
225 return name.replace(':', '/') # or Graphviz chokes
227 def GV_color(count):
228 if count >= RedThresh:
229 return 'red'
230 if count >= YellowThresh:
231 return 'orange'
232 return 'black'
234 def GV_out_node(graph, node, seen = []):
235 try:
236 srcs = Treecounts[node]
237 except KeyError: # "applied by linus"
238 return
239 srctrees = srcs.keys()
240 srctrees.sort(lambda t1, t2: srcs[t2] - srcs[t1])
241 for src in srctrees:
242 if src not in seen:
243 graph.edge(GV_fixname(src), GV_fixname(node),
244 taillabel='%d' % srcs[src], labelfontsize="14",
245 color = GV_color(srcs[src]), penwidth='2')
246 GV_out_node(graph, src, seen + [node])
248 # argument parsing stuff.
250 def setup_args():
251 p = argparse.ArgumentParser()
252 p.add_argument('-d', '--dump', help = 'Dump merge list to file',
253 required = False, default = '')
254 p.add_argument('-g', '--gvoutput', help = 'Graphviz output',
255 required = False, default = '')
256 p.add_argument('-l', '--load', help = 'Load merge list from file',
257 required = False, default = '')
258 p.add_argument('-o', '--output', help = 'Output file',
259 required = False, default = '-')
260 p.add_argument('-r', '--repo', help = 'Repository location',
261 required = False, default = '/home/corbet/kernel')
262 p.add_argument('-t', '--trim', help = 'Trim top level to this many trees',
263 required = False, default = 0, type = int)
264 p.add_argument('-R', '--red', help = 'Red color threshold',
265 required = False, default = 800, type = int)
266 p.add_argument('-Y', '--yellow', help = 'Yellow color threshold',
267 required = False, default = 200, type = int)
268 return p
271 p = setup_args()
272 args = p.parse_args()
273 Repo = args.repo
274 RedThresh = args.red
275 YellowThresh = args.yellow
277 # Find our commits.
279 if args.load:
280 dumpfile = open(args.load, 'r')
281 Mergelist = pickle.loads(dumpfile.read())
282 dumpfile.close
283 Mainline = Mergelist['mainline']
284 else:
285 Mainline = Merge('mainline', tree = 'mainline')
286 ingest_commits(sys.stdin)
287 if args.dump:
288 dumpfile = open(args.dump, 'w')
289 dumpfile.write(pickle.dumps(Mergelist))
290 dumpfile.close()
292 # Now generate the flow graph.
294 #dumptree(Mainline)
295 zorch_internals(Mainline)
296 #dumptree(Mainline)
297 Treecounts['mainline'] = { 'Applied by Linus': len(Mainline.commits) }
298 print 'total commits', count_commits(Mainline)
299 tree_stats(Mainline)
300 if args.trim:
301 trim_trees(args.trim)
302 print 'Tree flow'
303 dumpflow('mainline')
304 if args.gvoutput:
305 GV_out(args.gvoutput)