Add an introductory comment.
[git-dm.git] / treeplot
blobe8bfe4b8e957c1ad4803e7777db6ab6842d7296b
1 #!/usr/bin/python
3 # git log --pretty="%H %P" | this program
4 # See option descriptions at bottom
6 # This little program cranks through a series of patches, trying to determine
7 # which trees each flowed through on its way to the mainline. It does a
8 # 'git describe' on each, so don't expect it to be fast for large numbers
9 # of patches.
11 # One warning: it is easily confused by local branches, tags, etc. For
12 # best results, run it on a mainline tree with no added frobs. Using
13 # "git clone --reference" is a relatively easy way to come up with such
14 # a tree without redownloading the whole mess.
16 import sys, subprocess, argparse, pickle
17 import graphviz
18 import patterns
20 Mergepat = patterns.patterns['ExtMerge']
21 IntMerge = patterns.patterns['IntMerge']
22 IntMerge2 = patterns.patterns['IntMerge2']
23 Mergelist = { }
25 class Merge:
26 def __init__(self, id, tree = None):
27 self.id = id
28 self.commits = [ ]
29 self.merges = [ ]
30 self.tree = tree or '?'
31 self.internal = False
32 if tree is None:
33 self.getdesc()
34 Mergelist[id] = self
36 def normalize_tree(self, tree):
37 if tree[:6] == 'git://':
38 tree = tree[6:]
39 if tree.find('git.kernel.org') >= 0:
40 stree = tree.split('/')
41 return '$KORG/%s/%s' % (stree[-2], stree[-1])
42 return tree
44 def getdesc(self):
45 command = ['git', 'log', '-1', self.id]
46 p = subprocess.Popen(command, cwd = Repo, stdout = subprocess.PIPE,
47 bufsize = 1)
48 for line in p.stdout.readlines():
50 # Maybe it's a merge of an external tree.
52 m = Mergepat.search(line)
53 if m:
54 self.tree = self.normalize_tree(m.group(3))
55 self.internal = False
56 break
58 # Or maybe it's an internal merge.
60 m = IntMerge.search(line) or IntMerge2.search(line)
61 if m:
62 self.internal = True
63 break
64 p.wait()
66 def add_commit(self, id):
67 self.commits.append(id)
69 def add_merge(self, merge):
70 self.merges.append(merge)
73 # Read the list of commits from the input stream and find which
74 # merge brought in each.
76 def ingest_commits(src):
77 count = 0
78 for line in src.readlines():
79 sline = line.split()
80 commit = sline[0]
81 mc = Mergelist[find_merge(sline[0])] # Needs try
82 if len(sline) > 2: # is a merge
83 mc.add_merge(Merge(commit))
84 else:
85 mc.add_commit(commit)
86 count += 1
87 if (count % 50) == 0:
88 sys.stderr.write('\r%5d ' % (count))
89 sys.stderr.flush()
90 print
93 # Figure out which merge brought in a commit.
95 MergeIDs = { }
97 def find_merge(commit):
98 command = ['git', 'describe', '--contains', commit]
99 p = subprocess.Popen(command, cwd = Repo, stdout = subprocess.PIPE,
100 bufsize = 1)
101 desc = p.stdout.readline().decode('utf8')
102 p.wait()
104 # The description line has the form:
106 # tag~N^M~n...
108 # the portion up to the last ^ describes the merge we are after;
109 # in the absence of an ^, assume it's on the main branch.
111 uparrow = desc.rfind('^')
112 if uparrow < 0:
113 return 'mainline'
115 # OK, now get the real commit ID of the merge. Maybe we have
116 # it stashed?
118 try:
119 return MergeIDs[desc[:uparrow]]
120 except KeyError:
121 pass
123 # Nope, we have to dig it out the hard way.
125 command = ['git', 'log', '--pretty=%H', '-1', desc[:uparrow]]
126 p = subprocess.Popen(command, cwd = Repo, stdout = subprocess.PIPE,
127 bufsize = 1)
128 merge = p.stdout.readline().decode('utf8').strip()
130 # If we get back the same commit, we're looking at one of Linus's
131 # version number tags.
133 if merge == commit:
134 merge = 'mainline'
135 MergeIDs[desc[:uparrow]] = merge
136 p.wait()
137 return merge
140 # Internal merges aren't interesting from our point of view. So go through,
141 # find them all, and move any commits from such into the parent.
143 def zorch_internals(merge):
144 new_merges = [ ]
145 for m in merge.merges:
146 zorch_internals(m)
147 if m.internal:
148 merge.commits += m.commits
149 new_merges += m.merges
150 else:
151 new_merges.append(m)
152 merge.merges = new_merges
155 # Figure out how many commits flowed at each stage.
157 def count_commits(merge):
158 merge.ccount = len(merge.commits) + 1 # +1 to count the merge itself
159 for m in merge.merges:
160 merge.ccount += count_commits(m)
161 return merge.ccount
164 # ...and how many flowed between each pair of trees
166 Treecounts = { }
168 def tree_stats(merge):
169 try:
170 tcount = Treecounts[merge.tree]
171 except KeyError:
172 tcount = Treecounts[merge.tree] = { }
173 for m in merge.merges:
174 mcount = tcount.get(m.tree, 0)
175 tcount[m.tree] = mcount + m.ccount
176 tree_stats(m)
179 # Maybe we only want so many top-level trees
181 def trim_trees(limit):
182 srcs = Treecounts['mainline']
183 srcnames = srcs.keys()
184 srcnames.sort(lambda t1, t2: srcs[t2] - srcs[t1])
185 nextra = len(srcnames) - limit
186 zapped = 0
187 for extra in srcnames[limit:]:
188 zapped += srcs[extra]
189 del srcs[extra]
190 srcs['%d other trees' % (nextra)] = zapped
192 # Take our map of the commit structure and boil it down to how many commits
193 # moved from one tree to the next.
196 def dumptree(start, indent = ''):
197 int = ''
198 if start.internal:
199 int = 'I: '
200 print '%s%s%s: %d/%d %s' % (indent, int, start.id[:10],
201 len(start.merges), len(start.commits),
202 start.tree)
203 for merge in start.merges:
204 dumptree(merge, indent + ' ')
206 def dumpflow(tree, indent = '', seen = []):
207 try:
208 srcs = Treecounts[tree]
209 except KeyError:
210 return
211 srctrees = srcs.keys()
212 srctrees.sort(lambda t1, t2: srcs[t2] - srcs[t1])
213 for src in srctrees:
214 if src in seen:
215 print 'Skip', src, srcs[src], seen
216 else:
217 print '%s%4d %s' % (indent, srcs[src], src)
218 dumpflow(src, indent = indent + ' ', seen = seen + [tree])
221 # Graphviz.
223 def GV_out(file):
224 graph = graphviz.Digraph('mainline', filename = file, format = 'png')
225 graph.body.extend(['label="Patch flow into the mainline"',
226 'concentrate=true',
227 'rankdir=LR' ])
228 graph.attr('node', fontsize="20", color="red", shape='ellipse')
229 graph.node('mainline')
230 graph.attr('node', fontsize="14", color="black", shape='polygon',
231 sides='4')
232 GV_out_node(graph, 'mainline')
233 graph.view()
235 def GV_fixname(name):
236 return name.replace(':', '/') # or Graphviz chokes
238 def GV_color(count):
239 if count >= RedThresh:
240 return 'red'
241 if count >= YellowThresh:
242 return 'orange'
243 return 'black'
245 def GV_out_node(graph, node, seen = []):
246 try:
247 srcs = Treecounts[node]
248 except KeyError: # "applied by linus"
249 return
250 srctrees = srcs.keys()
251 srctrees.sort(lambda t1, t2: srcs[t2] - srcs[t1])
252 for src in srctrees:
253 if src not in seen:
254 graph.edge(GV_fixname(src), GV_fixname(node),
255 taillabel='%d' % srcs[src], labelfontsize="14",
256 color = GV_color(srcs[src]), penwidth='2')
257 GV_out_node(graph, src, seen + [node])
259 # argument parsing stuff.
261 def setup_args():
262 p = argparse.ArgumentParser()
263 p.add_argument('-d', '--dump', help = 'Dump merge list to file',
264 required = False, default = '')
265 p.add_argument('-g', '--gvoutput', help = 'Graphviz output',
266 required = False, default = '')
267 p.add_argument('-l', '--load', help = 'Load merge list from file',
268 required = False, default = '')
269 p.add_argument('-o', '--output', help = 'Output file',
270 required = False, default = '-')
271 p.add_argument('-r', '--repo', help = 'Repository location',
272 required = False, default = '/home/corbet/kernel')
273 p.add_argument('-t', '--trim', help = 'Trim top level to this many trees',
274 required = False, default = 0, type = int)
275 p.add_argument('-R', '--red', help = 'Red color threshold',
276 required = False, default = 800, type = int)
277 p.add_argument('-Y', '--yellow', help = 'Yellow color threshold',
278 required = False, default = 200, type = int)
279 return p
282 p = setup_args()
283 args = p.parse_args()
284 Repo = args.repo
285 RedThresh = args.red
286 YellowThresh = args.yellow
288 # Find our commits.
290 if args.load:
291 dumpfile = open(args.load, 'r')
292 Mergelist = pickle.loads(dumpfile.read())
293 dumpfile.close
294 Mainline = Mergelist['mainline']
295 else:
296 Mainline = Merge('mainline', tree = 'mainline')
297 ingest_commits(sys.stdin)
298 if args.dump:
299 dumpfile = open(args.dump, 'w')
300 dumpfile.write(pickle.dumps(Mergelist))
301 dumpfile.close()
303 # Now generate the flow graph.
305 #dumptree(Mainline)
306 zorch_internals(Mainline)
307 #dumptree(Mainline)
308 Treecounts['mainline'] = { 'Applied by Linus': len(Mainline.commits) }
309 print 'total commits', count_commits(Mainline)
310 tree_stats(Mainline)
311 if args.trim:
312 trim_trees(args.trim)
313 print 'Tree flow'
314 dumpflow('mainline')
315 if args.gvoutput:
316 GV_out(args.gvoutput)