Replace treeplot with a better version
[git-dm.git] / treeplot
blobf5ba9a85e387deab55077d95ba213d2a2f4054e6
1 #!/usr/bin/python
3 # git log --pretty="%H %P" | this program
5 import sys, subprocess, argparse, pickle
6 import graphviz
7 import patterns
9 Mergepat = patterns.patterns['ExtMerge']
10 IntMerge = patterns.patterns['IntMerge']
11 IntMerge2 = patterns.patterns['IntMerge2']
12 Mergelist = { }
14 class Merge:
15 def __init__(self, id, tree = None):
16 self.id = id
17 self.commits = [ ]
18 self.merges = [ ]
19 self.tree = tree or '?'
20 self.internal = False
21 if tree is None:
22 self.getdesc()
23 Mergelist[id] = self
25 def normalize_tree(self, tree):
26 if tree[:6] == 'git://':
27 tree = tree[6:]
28 if tree.find('git.kernel.org') >= 0:
29 stree = tree.split('/')
30 return '$KORG/%s/%s' % (stree[-2], stree[-1])
31 return tree
33 def getdesc(self):
34 command = ['git', 'log', '-1', self.id]
35 p = subprocess.Popen(command, cwd = Repo, stdout = subprocess.PIPE,
36 bufsize = 1)
37 for line in p.stdout.readlines():
39 # Maybe it's a merge of an external tree.
41 m = Mergepat.search(line)
42 if m:
43 self.tree = self.normalize_tree(m.group(3))
44 self.internal = False
45 break
47 # Or maybe it's an internal merge.
49 m = IntMerge.search(line) or IntMerge2.search(line)
50 if m:
51 self.internal = True
52 break
53 p.wait()
55 def add_commit(self, id):
56 self.commits.append(id)
58 def add_merge(self, merge):
59 self.merges.append(merge)
62 # Read the list of commits from the input stream and find which
63 # merge brought in each.
65 def ingest_commits(src):
66 count = 0
67 for line in src.readlines():
68 sline = line.split()
69 commit = sline[0]
70 mc = Mergelist[find_merge(sline[0])] # Needs try
71 if len(sline) > 2: # is a merge
72 mc.add_merge(Merge(commit))
73 else:
74 mc.add_commit(commit)
75 count += 1
76 if (count % 50) == 0:
77 print '\r%5d ' % (count),
78 sys.stdout.flush()
79 print
82 # Figure out which merge brought in a commit.
84 MergeIDs = { }
86 def find_merge(commit):
87 command = ['git', 'describe', '--contains', commit]
88 p = subprocess.Popen(command, cwd = Repo, stdout = subprocess.PIPE,
89 bufsize = 1)
90 desc = p.stdout.readline().decode('utf8')
91 p.wait()
93 # The description line has the form:
95 # tag~N^M~n...
97 # the portion up to the last ^ describes the merge we are after;
98 # in the absence of an ^, assume it's on the main branch.
100 uparrow = desc.rfind('^')
101 if uparrow < 0:
102 return 'mainline'
104 # OK, now get the real commit ID of the merge. Maybe we have
105 # it stashed?
107 try:
108 return MergeIDs[desc[:uparrow]]
109 except KeyError:
110 pass
112 # Nope, we have to dig it out the hard way.
114 command = ['git', 'log', '--pretty=%H', '-1', desc[:uparrow]]
115 p = subprocess.Popen(command, cwd = Repo, stdout = subprocess.PIPE,
116 bufsize = 1)
117 merge = p.stdout.readline().decode('utf8').strip()
119 # If we get back the same commit, we're looking at one of Linus's
120 # version number tags.
122 if merge == commit:
123 merge = 'mainline'
124 MergeIDs[desc[:uparrow]] = merge
125 p.wait()
126 return merge
129 # Internal merges aren't interesting from our point of view. So go through,
130 # find them all, and move any commits from such into the parent.
132 def zorch_internals(merge):
133 new_merges = [ ]
134 for m in merge.merges:
135 zorch_internals(m)
136 if m.internal:
137 merge.commits += m.commits
138 new_merges += m.merges
139 else:
140 new_merges.append(m)
141 merge.merges = new_merges
144 # Figure out how many commits flowed at each stage.
146 def count_commits(merge):
147 merge.ccount = len(merge.commits)
148 for m in merge.merges:
149 merge.ccount += count_commits(m)
150 return merge.ccount
153 # ...and how many flowed between each pair of trees
155 Treecounts = { }
157 def tree_stats(merge):
158 try:
159 tcount = Treecounts[merge.tree]
160 except KeyError:
161 tcount = Treecounts[merge.tree] = { }
162 for m in merge.merges:
163 mcount = tcount.get(m.tree, 0)
164 try:
165 tcount[m.tree] = mcount + m.ccount
166 except AttributeError:
167 print 'AError', m.id, m.tree, mcount
168 continue
169 tree_stats(m)
172 # Maybe we only want so many top-level trees
174 def trim_trees(limit):
175 srcs = Treecounts['mainline']
176 srcnames = srcs.keys()
177 srcnames.sort(lambda t1, t2: srcs[t2] - srcs[t1])
178 nextra = len(srcnames) - limit
179 zapped = 0
180 for extra in srcnames[limit:]:
181 zapped += srcs[extra]
182 del srcs[extra]
183 srcs['%d other trees' % (nextra)] = zapped
185 # Take our map of the commit structure and boil it down to how many commits
186 # moved from one tree to the next.
189 def dumptree(start, indent = ''):
190 int = ''
191 if start.internal:
192 int = 'I: '
193 print '%s%s%s: %d/%d %s' % (indent, int, start.id[:10],
194 len(start.merges), len(start.commits),
195 start.tree)
196 for merge in start.merges:
197 dumptree(merge, indent + ' ')
199 def dumpflow(tree, indent = '', seen = []):
200 try:
201 srcs = Treecounts[tree]
202 except KeyError:
203 return
204 srctrees = srcs.keys()
205 srctrees.sort(lambda t1, t2: srcs[t2] - srcs[t1])
206 for src in srctrees:
207 if src in seen:
208 print 'Skip', src, srcs[src], seen
209 else:
210 print '%s%4d %s' % (indent, srcs[src], src)
211 dumpflow(src, indent = indent + ' ', seen = seen + [tree])
214 # Graphviz.
216 def GV_out(file):
217 graph = graphviz.Digraph('mainline', filename = file)
218 graph.body.extend(['label="Patch flow into the mainline"',
219 'concentrate=true',
220 'rankdir=LR' ])
221 graph.attr('node', fontsize="20", color="red", shape='ellipse')
222 graph.node('mainline')
223 graph.attr('node', fontsize="14", color="black", shape='polygon',
224 sides='4')
225 GV_out_node(graph, 'mainline')
226 graph.view()
228 def GV_out_node(graph, node, seen = []):
229 try:
230 srcs = Treecounts[node]
231 except KeyError: # "applied by linus"
232 return
233 srctrees = srcs.keys()
234 srctrees.sort(lambda t1, t2: srcs[t2] - srcs[t1])
235 for src in srctrees:
236 if src not in seen:
237 graph.edge(src, node, taillabel='%d' % srcs[src],
238 labelfontsize="14")
239 GV_out_node(graph, src, seen + [node])
241 # argument parsing stuff.
243 def setup_args():
244 p = argparse.ArgumentParser()
245 p.add_argument('-d', '--dump', help = 'Dump merge list to file',
246 required = False, default = '')
247 p.add_argument('-g', '--gvoutput', help = 'Graphviz output',
248 required = False, default = '')
249 p.add_argument('-l', '--load', help = 'Load merge list from file',
250 required = False, default = '')
251 p.add_argument('-o', '--output', help = 'Output file',
252 required = False, default = '-')
253 p.add_argument('-r', '--repo', help = 'Repository location',
254 required = False, default = '/home/corbet/kernel')
255 p.add_argument('-t', '--trim', help = 'Trim top level to this many trees',
256 required = False, default = 0, type = int)
257 return p
260 p = setup_args()
261 args = p.parse_args()
262 Repo = args.repo
265 # Find our commits.
267 if args.load:
268 dumpfile = open(args.load, 'r')
269 Mergelist = pickle.loads(dumpfile.read())
270 dumpfile.close
271 Mainline = Mergelist['mainline']
272 else:
273 Mainline = Merge('mainline', tree = 'mainline')
274 ingest_commits(sys.stdin)
275 if args.dump:
276 dumpfile = open(args.dump, 'w')
277 dumpfile.write(pickle.dumps(Mergelist))
278 dumpfile.close()
280 # Now generate the flow graph.
282 zorch_internals(Mainline)
283 # dumptree(Mainline)
284 Treecounts['mainline'] = { 'Applied by Linus': len(Mainline.commits) }
285 print 'total commits', count_commits(Mainline)
286 tree_stats(Mainline)
287 if args.trim:
288 trim_trees(args.trim)
289 print 'Tree flow'
290 dumpflow('mainline')
291 if args.gvoutput:
292 GV_out(args.gvoutput)