treeplot: show signed trees in black
[git-dm.git] / database.py
blobbf13227841d69e17403b6a63e3a1c93d26289b98
2 # The "database".
4 # This code is part of the LWN git data miner.
6 # Copyright 2007-11 Eklektix, Inc.
7 # Copyright 2007-11 Jonathan Corbet <corbet@lwn.net>
9 # This file may be distributed under the terms of the GNU General
10 # Public License, version 2.
12 import sys, datetime
15 class Hacker:
16 def __init__ (self, name, id, elist, email):
17 self.name = name
18 self.id = id
19 self.employer = [ elist ]
20 self.email = [ email ]
21 self.changed = self.added = self.removed = 0
22 self.patches = [ ]
23 self.signoffs = [ ]
24 self.reviews = [ ]
25 self.tested = [ ]
26 self.reports = [ ]
27 self.testcred = self.repcred = 0
28 self.activity_start = datetime.date.max
29 self.activity_end = datetime.date.min
30 self.versions = [ ]
32 def addemail (self, email, elist):
33 self.email.append (email)
34 self.employer.append (elist)
35 HackersByEmail[email] = self
37 def emailemployer (self, email, date):
38 for i in range (0, len (self.email)):
39 if (email is None) or (self.email[i] == email):
40 for edate, empl in self.employer[i]:
41 if edate > date:
42 return empl
43 print 'OOPS. ', self.name, self.employer, self.email, email, date
44 return None # Should not happen
46 def addpatch (self, patch):
47 self.added += patch.added
48 self.removed += patch.removed
49 self.changed += max(patch.added, patch.removed)
50 self.patches.append (patch)
51 if patch.date < self.activity_start:
52 self.activity_start = patch.date
53 if patch.date > self.activity_end:
54 self.activity_end= patch.date
57 # Note that the author is represented in this release.
59 def addversion (self, release):
60 if release not in self.versions:
61 self.versions.append (release)
63 # There's got to be a better way.
65 def addsob (self, patch):
66 self.signoffs.append (patch)
67 def addreview (self, patch):
68 self.reviews.append (patch)
69 def addtested (self, patch):
70 self.tested.append (patch)
71 def addreport (self, patch):
72 self.reports.append (patch)
74 def reportcredit (self, patch):
75 self.repcred += 1
76 def testcredit (self, patch):
77 self.testcred += 1
79 HackersByName = { }
80 HackersByEmail = { }
81 HackersByID = { }
82 MaxID = 0
84 def StoreHacker (name, elist, email):
85 global MaxID
87 id = MaxID
88 MaxID += 1
89 h = Hacker (name, id, elist, email)
90 HackersByName[name] = h
91 HackersByEmail[email] = h
92 HackersByID[id] = h
93 return h
95 def LookupEmail (addr):
96 try:
97 return HackersByEmail[addr]
98 except KeyError:
99 return None
101 def LookupName (name):
102 try:
103 return HackersByName[name]
104 except KeyError:
105 return None
107 def LookupID (id):
108 try:
109 return HackersByID[id]
110 except KeyError:
111 return None
113 def LookupStoreHacker(name, email, mapunknown = True):
114 email = RemapEmail(email)
115 h = LookupEmail(email)
116 if h: # already there
117 return h
118 elist = LookupEmployer(email, mapunknown)
119 h = LookupName(name)
120 if h: # new email
121 h.addemail(email, elist)
122 return h
123 return StoreHacker(name, elist, email)
126 def AllHackers ():
127 return HackersByID.values ()
129 def DumpDB ():
130 out = open ('database.dump', 'w')
131 names = HackersByName.keys ()
132 names.sort ()
133 for name in names:
134 h = HackersByName[name]
135 out.write ('%4d %s %d p (+%d -%d) sob: %d\n' % (h.id, h.name,
136 len (h.patches),
137 h.added, h.removed,
138 len (h.signoffs)))
139 for i in range (0, len (h.email)):
140 out.write ('\t%s -> \n' % (h.email[i]))
141 for date, empl in h.employer[i]:
142 out.write ('\t\t %d-%d-%d %s\n' % (date.year, date.month, date.day,
143 empl.name))
144 if h.versions:
145 out.write ('\tVersions: %s\n' % ','.join (h.versions))
148 # Hack: The first visible tag comes a ways into the stream; when we see it,
149 # push it backward through the changes we've already seen.
151 def ApplyFirstTag (tag):
152 for n in HackersByName.keys ():
153 if HackersByName[n].versions:
154 HackersByName[n].versions = [tag]
157 # Employer info.
159 class Employer:
160 def __init__ (self, name):
161 self.name = name
162 self.added = self.removed = self.count = self.changed = 0
163 self.sobs = 0
164 self.hackers = [ ]
166 def AddCSet (self, patch):
167 self.added += patch.added
168 self.removed += patch.removed
169 self.changed += max(patch.added, patch.removed)
170 self.count += 1
171 if patch.author not in self.hackers:
172 self.hackers.append (patch.author)
174 def AddSOB (self):
175 self.sobs += 1
177 Employers = { }
179 def GetEmployer (name):
180 try:
181 return Employers[name]
182 except KeyError:
183 e = Employer (name)
184 Employers[name] = e
185 return e
187 def AllEmployers ():
188 return Employers.values ()
191 # Certain obnoxious developers, who will remain nameless (because we
192 # would never want to run afoul of Thomas) want their work split among
193 # multiple companies. Let's try to cope with that. Let's also hope
194 # this doesn't spread.
196 class VirtualEmployer (Employer):
197 def __init__ (self, name):
198 Employer.__init__ (self, name)
199 self.splits = [ ]
201 def addsplit (self, name, fraction):
202 self.splits.append ((name, fraction))
205 # Go through and (destructively) apply our credits to the
206 # real employer. Only one level of weirdness is supported.
208 def applysplits (self):
209 for name, fraction in self.splits:
210 real = GetEmployer (name)
211 real.added += int (self.added*fraction)
212 real.removed += int (self.removed*fraction)
213 real.changed += int (self.changed*fraction)
214 real.count += int (self.count*fraction)
215 self.__init__ (name) # Reset counts just in case
217 def store (self):
218 if Employers.has_key (self.name):
219 print Employers[self.name]
220 sys.stderr.write ('WARNING: Virtual empl %s overwrites another\n'
221 % (self.name))
222 if len (self.splits) == 0:
223 sys.stderr.write ('WARNING: Virtual empl %s has no splits\n'
224 % (self.name))
225 # Should check that they add up too, but I'm lazy
226 Employers[self.name] = self
228 class FileType:
229 def __init__ (self, patterns={}, order=[]):
230 self.patterns = patterns
231 self.order = order
233 def guess_file_type (self, filename, patterns=None, order=None):
234 patterns = patterns or self.patterns
235 order = order or self.order
237 for file_type in order:
238 if patterns.has_key (file_type):
239 for patt in patterns[file_type]:
240 if patt.search (filename):
241 return file_type
243 return 'unknown'
246 # By default we recognize nothing.
248 FileTypes = FileType ({}, [])
251 # Mix all the virtual employers into their real destinations.
253 def MixVirtuals ():
254 for empl in AllEmployers ():
255 if isinstance (empl, VirtualEmployer):
256 empl.applysplits ()
259 # The email map.
261 EmailAliases = { }
263 def AddEmailAlias (variant, canonical):
264 if EmailAliases.has_key (variant):
265 sys.stderr.write ('Duplicate email alias for %s\n' % (variant))
266 EmailAliases[variant] = canonical
268 def RemapEmail (email):
269 email = email.lower ()
270 try:
271 return EmailAliases[email]
272 except KeyError:
273 return email
276 # Email-to-employer mapping.
278 EmailToEmployer = { }
279 nextyear = datetime.date.today () + datetime.timedelta (days = 365)
281 def AddEmailEmployerMapping (email, employer, end = nextyear):
282 if end is None:
283 end = nextyear
284 email = email.lower ()
285 empl = GetEmployer (employer)
286 try:
287 l = EmailToEmployer[email]
288 for i in range (0, len(l)):
289 date, xempl = l[i]
290 if date == end: # probably both nextyear
291 print 'WARNING: duplicate email/empl for %s' % (email)
292 if date > end:
293 l.insert (i, (end, empl))
294 return
295 l.append ((end, empl))
296 except KeyError:
297 EmailToEmployer[email] = [(end, empl)]
299 def MapToEmployer (email, unknown = 0):
300 # Somebody sometimes does s/@/ at /; let's fix it.
301 email = email.lower ().replace (' at ', '@')
302 try:
303 return EmailToEmployer[email]
304 except KeyError:
305 pass
306 namedom = email.split ('@')
307 if len (namedom) < 2:
308 print 'Oops...funky email %s' % email
309 return [(nextyear, GetEmployer ('Funky'))]
310 s = namedom[1].split ('.')
311 for dots in range (len (s) - 2, -1, -1):
312 addr = '.'.join (s[dots:])
313 try:
314 return EmailToEmployer[addr]
315 except KeyError:
316 pass
318 # We don't know who they work for.
320 if unknown:
321 return [(nextyear, GetEmployer ('(Unknown)'))]
322 return [(nextyear, GetEmployer (email))]
325 def LookupEmployer (email, mapunknown = 0):
326 elist = MapToEmployer (email, mapunknown)
327 return elist # GetEmployer (ename)