treeplot: show signed trees in black
[git-dm.git] / treeplot
blob688c150f0a069e3694ec2c833612e5ab45cc4b01
1 #!/usr/bin/python
3 # git log --pretty="%H %P" | this program
4 # See option descriptions at bottom
6 # This little program cranks through a series of patches, trying to determine
7 # which trees each flowed through on its way to the mainline. It does a
8 # 'git describe' on each, so don't expect it to be fast for large numbers
9 # of patches.
11 # One warning: it is easily confused by local branches, tags, etc. For
12 # best results, run it on a mainline tree with no added frobs. Using
13 # "git clone --reference" is a relatively easy way to come up with such
14 # a tree without redownloading the whole mess.
16 import sys, subprocess, argparse, pickle
17 import graphviz
18 import patterns
20 Mergepat = patterns.patterns['ExtMerge']
21 IntMerge = patterns.patterns['IntMerge']
22 IntMerge2 = patterns.patterns['IntMerge2']
23 Mergelist = { }
25 class Merge:
26 def __init__(self, id, tree = None):
27 self.id = id
28 self.commits = [ ]
29 self.merges = [ ]
30 self.tree = tree or '?'
31 self.internal = False
32 self.signed = False
33 if tree is None:
34 self.getdesc()
35 Mergelist[id] = self
37 def normalize_tree(self, tree):
38 colonslash = tree.find('://')
39 if colonslash > 0:
40 tree = tree[colonslash+3:]
41 if tree.find('git.kernel.org') >= 0:
42 stree = tree.split('/')
43 return '$KORG/%s/%s' % (stree[-2], stree[-1])
44 return tree
46 def getdesc(self):
47 command = ['git', 'log', '-1', '--show-signature', self.id]
48 p = subprocess.Popen(command, cwd = Repo, stdout = subprocess.PIPE,
49 bufsize = 1)
51 # Sometimes we don't match a pattern; that means that the
52 # committer radically modified the merge message. A certain
53 # Eric makes them look like ordinary commits... Others use
54 # it to justify backmerges of the mainline. Either way, the
55 # best response is to treat it like an internal merge.
57 self.internal = True
58 for line in p.stdout.readlines():
60 # Note if there's a GPG signature
62 if line.startswith('gpg:'):
63 self.signed = True
64 continue
66 # Maybe it's a merge of an external tree.
68 m = Mergepat.search(line)
69 if m:
70 self.tree = self.normalize_tree(m.group(3))
71 self.internal = False
72 break
74 # Or maybe it's an internal merge.
76 m = IntMerge.search(line) or IntMerge2.search(line)
77 if m:
78 self.internal = True
79 break
80 p.wait()
82 def add_commit(self, id):
83 self.commits.append(id)
85 def add_merge(self, merge):
86 self.merges.append(merge)
89 # Read the list of commits from the input stream and find which
90 # merge brought in each.
92 def ingest_commits(src):
93 count = 0
94 expected = 'nothing yet'
95 for line in src.readlines():
96 sline = line[:-1].split()
97 commit = sline[0]
98 is_merge = (len(sline) > 2)
99 if (commit == expected) and not is_merge:
100 mc = last_merge
101 else:
102 mc = Mergelist[find_merge(sline[0])] # Needs try
103 if is_merge:
104 mc.add_merge(Merge(commit))
105 else:
106 mc.add_commit(commit)
107 count += 1
108 if (count % 50) == 0:
109 sys.stderr.write('\r%5d ' % (count))
110 sys.stderr.flush()
111 expected = sline[1]
112 last_merge = mc
113 print
115 # Figure out which merge brought in a commit.
117 MergeIDs = { }
119 def find_merge(commit):
120 command = ['git', 'describe', '--contains', commit]
121 p = subprocess.Popen(command, cwd = Repo, stdout = subprocess.PIPE,
122 bufsize = 1)
123 desc = p.stdout.readline().decode('utf8')
124 p.wait()
126 # The description line has the form:
128 # tag~N^M~n...
130 # the portion up to the last ^ describes the merge we are after;
131 # in the absence of an ^, assume it's on the main branch.
133 uparrow = desc.rfind('^')
134 if uparrow < 0:
135 return 'mainline'
137 # OK, now get the real commit ID of the merge. Maybe we have
138 # it stashed?
140 try:
141 return MergeIDs[desc[:uparrow]]
142 except KeyError:
143 pass
145 # Nope, we have to dig it out the hard way.
147 command = ['git', 'log', '--pretty=%H', '-1', desc[:uparrow]]
148 p = subprocess.Popen(command, cwd = Repo, stdout = subprocess.PIPE,
149 bufsize = 1)
150 merge = p.stdout.readline().decode('utf8').strip()
152 # If we get back the same commit, we're looking at one of Linus's
153 # version number tags.
155 if merge == commit:
156 merge = 'mainline'
157 MergeIDs[desc[:uparrow]] = merge
158 p.wait()
159 return merge
162 # Internal merges aren't interesting from our point of view. So go through,
163 # find them all, and move any commits from such into the parent.
165 def zorch_internals(merge):
166 new_merges = [ ]
167 for m in merge.merges:
168 zorch_internals(m)
169 if m.internal:
170 merge.commits += m.commits
171 new_merges += m.merges
172 else:
173 new_merges.append(m)
174 merge.merges = new_merges
177 # Figure out how many commits flowed at each stage.
179 def count_commits(merge):
180 merge.ccount = len(merge.commits) + 1 # +1 to count the merge itself
181 for m in merge.merges:
182 merge.ccount += count_commits(m)
183 return merge.ccount
186 # ...and how many flowed between each pair of trees
188 Treecounts = { }
189 SignedTrees = set()
191 def tree_stats(merge):
192 try:
193 tcount = Treecounts[merge.tree]
194 except KeyError:
195 tcount = Treecounts[merge.tree] = { }
196 for m in merge.merges:
197 if m.signed:
198 SignedTrees.add(m.tree)
199 mcount = tcount.get(m.tree, 0)
200 tcount[m.tree] = mcount + m.ccount
201 tree_stats(m)
204 # Maybe we only want so many top-level trees
206 def trim_trees(limit):
207 srcs = Treecounts['mainline']
208 srcnames = srcs.keys()
209 srcnames.sort(lambda t1, t2: srcs[t2] - srcs[t1])
210 nextra = len(srcnames) - limit
211 zapped = 0
212 for extra in srcnames[limit:]:
213 zapped += srcs[extra]
214 del srcs[extra]
215 srcs['%d other trees' % (nextra)] = zapped
217 # Take our map of the commit structure and boil it down to how many commits
218 # moved from one tree to the next.
221 def dumptree(start, indent = ''):
222 int = ''
223 if start.internal:
224 int = 'I: '
225 print '%s%s%s: %d/%d %s' % (indent, int, start.id[:10],
226 len(start.merges), len(start.commits),
227 start.tree)
228 for merge in start.merges:
229 dumptree(merge, indent + ' ')
231 def dumpflow(tree, indent = '', seen = []):
232 try:
233 srcs = Treecounts[tree]
234 except KeyError:
235 return
236 srctrees = srcs.keys()
237 srctrees.sort(lambda t1, t2: srcs[t2] - srcs[t1])
238 for src in srctrees:
239 if src in seen:
240 print 'Skip', src, srcs[src], seen
241 else:
242 if src in SignedTrees:
243 print '%s%4d ** %s' % (indent, srcs[src], src)
244 else:
245 print '%s%4d %s' % (indent, srcs[src], src)
246 dumpflow(src, indent = indent + ' ', seen = seen + [tree])
248 def SigStats(tree):
249 srcs = Treecounts[tree]
250 spulls = upulls = scommits = ucommits = 0
251 for src in srcs.keys():
252 if src in SignedTrees:
253 spulls += 1
254 scommits += srcs[src]
255 else:
256 upulls += 1
257 ucommits += srcs[src]
258 print '%d repos total, %d signed, %d unsigned' % (spulls + upulls,
259 spulls, upulls)
260 print ' %d commits from signed, %d from unsigned' % (scommits, ucommits)
263 # Graphviz.
265 def GV_out(file):
266 graph = graphviz.Digraph('mainline', filename = file, format = 'svg')
267 graph.body.extend(['label="Patch flow into the mainline"',
268 'concentrate=true',
269 'rankdir=LR' ])
270 graph.attr('node', fontsize="20", color="blue", penwidth='4',
271 shape='ellipse')
272 graph.node('mainline')
273 graph.attr('node', fontsize="14", color="black", shape='polygon',
274 sides='4')
275 if DoSigned:
276 GV_out_node_signed(graph, 'mainline')
277 else:
278 GV_out_node(graph, 'mainline')
279 graph.view()
281 def GV_fixname(name):
282 return name.replace(':', '/') # or Graphviz chokes
284 def GV_color(count):
285 if count >= RedThresh:
286 return 'red'
287 if count >= YellowThresh:
288 return 'orange'
289 return 'black'
292 # Output nodes with traffic coloring
294 def GV_out_node(graph, node, seen = []):
295 try:
296 srcs = Treecounts[node]
297 except KeyError: # "applied by linus"
298 return
299 srctrees = srcs.keys()
300 srctrees.sort(lambda t1, t2: srcs[t2] - srcs[t1])
301 for src in srctrees:
302 if src not in seen:
303 graph.edge(GV_fixname(src), GV_fixname(node),
304 taillabel='%d' % srcs[src], labelfontsize="14",
305 color = GV_color(srcs[src]), penwidth='2')
306 GV_out_node(graph, src, seen + [node])
309 # Output nodes showing signature status
311 def GV_out_node_signed(graph, node, seen = []):
312 try:
313 srcs = Treecounts[node]
314 except KeyError: # "applied by linus"
315 return
316 srctrees = srcs.keys()
317 srctrees.sort(lambda t1, t2: srcs[t2] - srcs[t1])
318 for src in srctrees:
319 color = 'red'
320 if src in SignedTrees:
321 color = 'black'
322 if src not in seen:
323 graph.attr('node', color=color)
324 graph.edge(GV_fixname(src), GV_fixname(node),
325 taillabel='%d' % srcs[src], labelfontsize="14",
326 color = color, penwidth='2')
327 GV_out_node_signed(graph, src, seen + [node])
329 # argument parsing stuff.
331 def setup_args():
332 p = argparse.ArgumentParser()
333 p.add_argument('-d', '--dump', help = 'Dump merge list to file',
334 required = False, default = '')
335 p.add_argument('-g', '--gvoutput', help = 'Graphviz output',
336 required = False, default = '')
337 p.add_argument('-l', '--load', help = 'Load merge list from file',
338 required = False, default = '')
339 p.add_argument('-o', '--output', help = 'Output file',
340 required = False, default = '-')
341 p.add_argument('-r', '--repo', help = 'Repository location',
342 required = False, default = '/home/corbet/kernel')
343 p.add_argument('-t', '--trim', help = 'Trim top level to this many trees',
344 required = False, default = 0, type = int)
345 p.add_argument('-R', '--red', help = 'Red color threshold',
346 required = False, default = 800, type = int)
347 p.add_argument('-Y', '--yellow', help = 'Yellow color threshold',
348 required = False, default = 200, type = int)
349 p.add_argument('-s', '--signed', help = 'Display signed trees',
350 action='store_true', default = False)
351 return p
354 p = setup_args()
355 args = p.parse_args()
356 Repo = args.repo
357 RedThresh = args.red
358 YellowThresh = args.yellow
359 DoSigned = args.signed
361 # Find our commits.
363 if args.load:
364 dumpfile = open(args.load, 'r')
365 Mergelist = pickle.loads(dumpfile.read())
366 dumpfile.close
367 Mainline = Mergelist['mainline']
368 else:
369 Mainline = Merge('mainline', tree = 'mainline')
370 ingest_commits(sys.stdin)
371 if args.dump:
372 dumpfile = open(args.dump, 'w')
373 dumpfile.write(pickle.dumps(Mergelist))
374 dumpfile.close()
376 # Now generate the flow graph.
378 #dumptree(Mainline)
379 zorch_internals(Mainline)
380 #dumptree(Mainline)
381 Treecounts['mainline'] = { 'Applied by Linus': len(Mainline.commits) }
382 print 'total commits', count_commits(Mainline)
383 tree_stats(Mainline)
384 if args.trim:
385 trim_trees(args.trim)
386 print 'Tree flow'
387 dumpflow('mainline')
388 if args.gvoutput:
389 GV_out(args.gvoutput)
390 if DoSigned:
391 SigStats('mainline')