Hacky stuff for 4.0
[git-dm.git] / treeplot
blobb6eedcb9d7686d58d6af2236127b3f7ad157d37c
1 #!/usr/bin/python
3 # Create a graph of patch flow into the mainline.
5 # This code is part of the LWN git data miner.
7 # Copyright 2007-11 Eklektix, Inc.
8 # Copyright 2007-11 Jonathan Corbet <corbet@lwn.net>
10 # This file may be distributed under the terms of the GNU General
11 # Public License, version 2.
13 import sys
14 from patterns import patterns
17 # The various types of commit we understand.
19 class Commit:
20 def __init__(self, id, parent):
21 self.id = id
22 self.parent = parent
23 self.ismerge = 0
24 self.treepriority = 0
26 # Merges are special
28 class Merge (Commit):
29 def __init__(self, id, parent):
30 Commit.__init__(self, id, parent)
31 self.ismerge = 1
32 self.internal = 1 # Two branches within a repo?
33 self.parents = [ parent ]
35 def addparent(self, parentid):
36 self.parents.append(parentid)
38 def addtree(self, tree):
39 self.tree = tree
40 self.internal = 0
43 # Trees: where the commits come from.
45 class Tree:
46 def __init__(self, name, url):
47 self.name = name
48 self.url = url
49 self.inputs = [ ]
50 self.commits = [ ]
52 def addcommit(self, id):
53 self.commits.append(id)
55 def addinput(self, tree):
56 if tree not in self.inputs:
57 self.inputs.append(tree)
58 # print '%s -> %s' % (tree.name, self.name)
60 Mainline = Tree('Mainline',
61 'git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git')
62 KnownTrees = { Mainline.url: Mainline }
64 def NormalizeURL(url):
65 if url[:4] == 'git:':
66 return url
67 if url == '../net-2.6/':
68 url = 'git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6'
69 url = url.replace('master.kernel.org:', 'git://git.kernel.org')
70 if url[-18:] == 'torvalds/linux-2.6':
71 url += '.git'
72 if url[:8] == '/pub/scm':
73 url = 'git://git.kernel.org' + url
74 return url
76 def LookupTree(url):
77 url = NormalizeURL(url)
78 try:
79 return KnownTrees[url]
80 except KeyError:
81 tree = Tree(url, url)
82 KnownTrees[url] = tree
83 return tree
86 # We track which tree every commit belongs to.
88 CommitTrees = { }
89 class CTEntry:
90 def __init__ (self, tree, priority, path):
91 self.tree = tree
92 self.priority = priority
93 self.path = path
95 def AddCommitTree(id, entry):
96 # print 'add: ', id, '[',
97 # for tree in entry.path:
98 # print tree.name,
99 # print ']'
100 try:
101 oldentry = CommitTrees[id]
102 if entry.priority < oldentry.priority:
103 CommitTrees[id] = entry
104 except KeyError:
105 CommitTrees[id] = entry
108 def LookupCommitTree(id):
109 try:
110 return CommitTrees[id]
111 except KeyError:
112 print 'Unfound commit %s' % (id)
113 return CTEntry (Mainline, 0, [])
116 # Input handling with one-line pushback.
118 SavedLine = None
119 Input = sys.stdin
121 def GetLine():
122 global SavedLine
123 if SavedLine:
124 ret = SavedLine
125 SavedLine = None
126 return ret
127 return Input.readline()
129 def SaveLine(line):
130 global SavedLine
131 SavedLine = line
134 # Pull in a commit and see what it is.
136 def GetCommit():
138 # Skip junk up to the next commit.
140 while 1:
141 line = GetLine()
142 if not line:
143 return None
144 m = patterns['commit'].match(line)
145 if m:
146 break
149 # Look at the commit and see how many parents we have.
151 ids = m.group(1).split()
152 if len(ids) <= 1:
153 if len(CommitTrees.values()) > 0:
154 print 'No-Parent commit:', ids[0]
155 return GetCommit()
156 print 'Did you run git with --parents?'
157 print ids
158 sys.exit(1)
159 if len(ids) == 2: # Simple commit
160 return Commit(ids[0], ids[1])
162 # OK, we have a merge.
164 merge = Merge(ids[0], ids[1])
165 for id in ids[2:]:
166 merge.addparent(id)
168 # We need to figure out what kind of merge it is, so read through the
169 # descriptive text to the merge line.
171 while 1:
172 line = GetLine()
173 if not line:
174 print 'EOF looking for merge line'
175 return None
177 # Maybe it's an external merge?
179 m = patterns['ExtMerge'].match(line)
180 if m:
181 merge.addtree(LookupTree(m.group(3)))
182 return merge
184 # OK, maybe it's internal
186 if patterns['IntMerge'].match(line) or patterns['IntMerge2'].match(line):
187 #print 'Internal:', line[:-1]
188 merge.internal = 1
189 return merge
190 m = patterns['commit'].match(line)
191 if m:
192 print 'Hit next commit (%s) looking for merge line' % (m.group(1))
193 SaveLine(line)
194 return GetCommit()
197 # Print out a tree and its inputs
199 def PrintTree(tree, indent = ''):
200 print '%s%4d %s' % (indent, len(tree.commits), tree.name)
201 for input in tree.inputs:
202 PrintTree(input, indent + ' ')
205 # Let's try to build a data structure giving the patch flows.
207 class FlowNode:
208 def __init__(self, tree):
209 self.tree = tree
210 self.inputs = { }
211 self.commits = 0
213 def BuildFlowTree():
214 rootnode = FlowNode(Mainline)
215 notree = Tree('[No tree]', '')
216 for centry in CommitTrees.values():
217 path = centry.path
218 if not path:
219 path = [ notree ]
220 FillFlowPath(path, rootnode)
221 return rootnode
223 def FillFlowPath(path, node):
224 node.commits += 1
225 if len(path) == 0:
226 return
227 next, rest = path[0], path[1:]
228 try:
229 nextnode = node.inputs[next.name]
230 except KeyError:
231 nextnode = node.inputs[next.name] = FlowNode(next)
232 return FillFlowPath(rest, nextnode)
234 def PrintFlowTree(ftree, indent = ''):
235 print '%s%3d %s' % (indent, ftree.commits, ftree.tree.name)
236 inputs = ftree.inputs.values()
237 inputs.sort(GVSort)
238 for input in inputs:
239 PrintFlowTree(input, indent + ' ')
242 # Something for graphviz
244 GVHeader = '''digraph "runtree" {
245 graph [ label = "Patch flow into the mainline",
246 concentrate = true,
247 nodesep = 0.1,
248 rankdir = LR ];
249 node [shape = polygon,
250 sides = 4,
251 height = 0.3
252 fontsize = 8];
256 MainlineCommits = 0
258 def GVTree(ftree):
259 global MainlineCommits
260 MainlineCommits = ftree.commits
261 gvf = open('runtree.gv', 'w')
262 gvf.write(GVHeader)
263 inputs = ftree.inputs.values()
264 inputs.sort(GVSort)
265 for input in inputs:
266 GVPrintNode(gvf, input, 'Mainline')
267 gvf.write('}\n')
269 def GVNodeName(treename):
270 sname = treename.split('/')
271 if treename.find('kernel.org') >= 0:
272 return '%s/%s' % (sname[-2], sname[-1])
273 sep = treename.find ('://')
274 if sep > 0:
275 return treename[sep+3:]
276 return treename
278 def GVSort(n1, n2):
279 return n2.commits - n1.commits
281 def GVPrintNode(gvf, node, parent):
282 name = GVNodeName(node.tree.name)
283 gvf.write ('"%s" -> "%s" [taillabel = "%d", labelfontsize = 8' % (name, parent, node.commits))
284 gvf.write (', arrowsize = 0.5')
285 if MainlineCommits/node.commits < 20:
286 gvf.write(', color = red')
287 elif MainlineCommits/node.commits < 100:
288 gvf.write(', color = orange');
289 gvf.write(']\n')
290 inputs = node.inputs.values()
291 if inputs:
292 inputs.sort(GVSort)
293 for input in inputs:
294 GVPrintNode(gvf, input, name)
297 # Main code.
299 commit = GetCommit()
300 ncommits = 0
301 while commit:
302 ncommits += 1
303 entry = LookupCommitTree(commit.id)
304 tree = entry.tree
305 priority = entry.priority
306 tree.addcommit(commit.id)
308 # For regular commits, just remember the tree involved
310 if not commit.ismerge:
311 AddCommitTree(commit.parent, entry)
313 # For merges we have to deal with all the parents.
315 else:
316 AddCommitTree(commit.parents[0], CTEntry (tree, priority, entry.path))
317 if commit.internal:
318 for p in commit.parents[1:]:
319 path = entry.path + [tree]
320 AddCommitTree(p, CTEntry (tree, priority, entry.path))
321 else:
322 for p in commit.parents[1:]:
323 path = entry.path + [commit.tree]
324 AddCommitTree(p, CTEntry (commit.tree, priority + 1, path))
325 if commit.tree is not Mainline:
326 tree.addinput(commit.tree)
327 commit = GetCommit()
329 #PrintTree(Mainline)
330 ftree = BuildFlowTree()
331 PrintFlowTree(ftree)
332 GVTree(ftree)
333 print '%d commits total, %d trees' % (MainlineCommits, len (KnownTrees.keys()))