Various fixes and debug code removal.
[git-dm.git] / database.py
blobca483458696a28dbb38d1622540d11a9c0127998
2 # The "database".
4 # This code is part of the LWN git data miner.
6 # Copyright 2007-11 Eklektix, Inc.
7 # Copyright 2007-11 Jonathan Corbet <corbet@lwn.net>
9 # This file may be distributed under the terms of the GNU General
10 # Public License, version 2.
12 import sys, datetime
15 class Hacker:
16 def __init__ (self, name, id, elist, email):
17 self.name = name
18 self.id = id
19 self.employer = [ elist ]
20 self.email = [ email ]
21 self.added = self.removed = 0
22 self.patches = [ ]
23 self.signoffs = [ ]
24 self.reviews = [ ]
25 self.tested = [ ]
26 self.reports = [ ]
27 self.testcred = self.repcred = 0
28 self.versions = [ ]
30 def addemail (self, email, elist):
31 self.email.append (email)
32 self.employer.append (elist)
33 HackersByEmail[email] = self
35 def emailemployer (self, email, date):
36 for i in range (0, len (self.email)):
37 if self.email[i] == email:
38 for edate, empl in self.employer[i]:
39 if edate > date:
40 return empl
41 print 'OOPS. ', self.name, self.employer, self.email, email, date
42 return None # Should not happen
44 def addpatch (self, patch):
45 self.added += patch.added
46 self.removed += patch.removed
47 self.patches.append (patch)
50 # Note that the author is represented in this release.
52 def addversion (self, release):
53 if release not in self.versions:
54 self.versions.append (release)
56 # There's got to be a better way.
58 def addsob (self, patch):
59 self.signoffs.append (patch)
60 def addreview (self, patch):
61 self.reviews.append (patch)
62 def addtested (self, patch):
63 self.tested.append (patch)
64 def addreport (self, patch):
65 self.reports.append (patch)
67 def reportcredit (self, patch):
68 self.repcred += 1
69 def testcredit (self, patch):
70 self.testcred += 1
72 HackersByName = { }
73 HackersByEmail = { }
74 HackersByID = { }
75 MaxID = 0
77 def StoreHacker (name, elist, email):
78 global MaxID
80 id = MaxID
81 MaxID += 1
82 h = Hacker (name, id, elist, email)
83 HackersByName[name] = h
84 HackersByEmail[email] = h
85 HackersByID[id] = h
86 return h
88 def LookupEmail (addr):
89 try:
90 return HackersByEmail[addr]
91 except KeyError:
92 return None
94 def LookupName (name):
95 try:
96 return HackersByName[name]
97 except KeyError:
98 return None
100 def LookupID (id):
101 try:
102 return HackersByID[id]
103 except KeyError:
104 return None
106 def LookupStoreHacker(name, email, mapunknown = True):
107 email = RemapEmail(email)
108 h = LookupEmail(email)
109 if h: # already there
110 return h
111 elist = LookupEmployer(email, mapunknown)
112 h = LookupName(name)
113 if h: # new email
114 h.addemail(email, elist)
115 return h
116 return StoreHacker(name, elist, email)
119 def AllHackers ():
120 return HackersByID.values ()
122 def DumpDB ():
123 out = open ('database.dump', 'w')
124 names = HackersByName.keys ()
125 names.sort ()
126 for name in names:
127 h = HackersByName[name]
128 out.write ('%4d %s %d p (+%d -%d) sob: %d\n' % (h.id, h.name,
129 len (h.patches),
130 h.added, h.removed,
131 len (h.signoffs)))
132 for i in range (0, len (h.email)):
133 out.write ('\t%s -> \n' % (h.email[i]))
134 for date, empl in h.employer[i]:
135 out.write ('\t\t %d-%d-%d %s\n' % (date.year, date.month, date.day,
136 empl.name))
137 if h.versions:
138 out.write ('\tVersions: %s\n' % ','.join (h.versions))
141 # Hack: The first visible tag comes a ways into the stream; when we see it,
142 # push it backward through the changes we've already seen.
144 def ApplyFirstTag (tag):
145 for n in HackersByName.keys ():
146 if HackersByName[n].versions:
147 HackersByName[n].versions = [tag]
150 # Employer info.
152 class Employer:
153 def __init__ (self, name):
154 self.name = name
155 self.added = self.removed = self.count = self.changed = 0
156 self.sobs = 0
157 self.hackers = [ ]
159 def AddCSet (self, patch):
160 self.added += patch.added
161 self.removed += patch.removed
162 self.changed += max(patch.added, patch.removed)
163 self.count += 1
164 if patch.author not in self.hackers:
165 self.hackers.append (patch.author)
167 def AddSOB (self):
168 self.sobs += 1
170 Employers = { }
172 def GetEmployer (name):
173 try:
174 return Employers[name]
175 except KeyError:
176 e = Employer (name)
177 Employers[name] = e
178 return e
180 def AllEmployers ():
181 return Employers.values ()
184 # Certain obnoxious developers, who will remain nameless (because we
185 # would never want to run afoul of Thomas) want their work split among
186 # multiple companies. Let's try to cope with that. Let's also hope
187 # this doesn't spread.
189 class VirtualEmployer (Employer):
190 def __init__ (self, name):
191 Employer.__init__ (self, name)
192 self.splits = [ ]
194 def addsplit (self, name, fraction):
195 self.splits.append ((name, fraction))
198 # Go through and (destructively) apply our credits to the
199 # real employer. Only one level of weirdness is supported.
201 def applysplits (self):
202 for name, fraction in self.splits:
203 real = GetEmployer (name)
204 real.added += int (self.added*fraction)
205 real.removed += int (self.removed*fraction)
206 real.changed += int (self.changed*fraction)
207 real.count += int (self.count*fraction)
208 self.__init__ (name) # Reset counts just in case
210 def store (self):
211 if Employers.has_key (self.name):
212 print Employers[self.name]
213 sys.stderr.write ('WARNING: Virtual empl %s overwrites another\n'
214 % (self.name))
215 if len (self.splits) == 0:
216 sys.stderr.write ('WARNING: Virtual empl %s has no splits\n'
217 % (self.name))
218 # Should check that they add up too, but I'm lazy
219 Employers[self.name] = self
221 class FileType:
222 def __init__ (self, patterns={}, order=[]):
223 self.patterns = patterns
224 self.order = order
226 def guess_file_type (self, filename, patterns=None, order=None):
227 patterns = patterns or self.patterns
228 order = order or self.order
230 for file_type in order:
231 if patterns.has_key (file_type):
232 for patt in patterns[file_type]:
233 if patt.search (filename):
234 return file_type
236 return 'unknown'
239 # By default we recognize nothing.
241 FileTypes = FileType ({}, [])
244 # Mix all the virtual employers into their real destinations.
246 def MixVirtuals ():
247 for empl in AllEmployers ():
248 if isinstance (empl, VirtualEmployer):
249 empl.applysplits ()
252 # The email map.
254 EmailAliases = { }
256 def AddEmailAlias (variant, canonical):
257 if EmailAliases.has_key (variant):
258 sys.stderr.write ('Duplicate email alias for %s\n' % (variant))
259 EmailAliases[variant] = canonical
261 def RemapEmail (email):
262 email = email.lower ()
263 try:
264 return EmailAliases[email]
265 except KeyError:
266 return email
269 # Email-to-employer mapping.
271 EmailToEmployer = { }
272 nextyear = datetime.date.today () + datetime.timedelta (days = 365)
274 def AddEmailEmployerMapping (email, employer, end = nextyear):
275 if end is None:
276 end = nextyear
277 email = email.lower ()
278 empl = GetEmployer (employer)
279 try:
280 l = EmailToEmployer[email]
281 for i in range (0, len(l)):
282 date, xempl = l[i]
283 if date == end: # probably both nextyear
284 print 'WARNING: duplicate email/empl for %s' % (email)
285 if date > end:
286 l.insert (i, (end, empl))
287 return
288 l.append ((end, empl))
289 except KeyError:
290 EmailToEmployer[email] = [(end, empl)]
292 def MapToEmployer (email, unknown = 0):
293 # Somebody sometimes does s/@/ at /; let's fix it.
294 email = email.lower ().replace (' at ', '@')
295 try:
296 return EmailToEmployer[email]
297 except KeyError:
298 pass
299 namedom = email.split ('@')
300 if len (namedom) < 2:
301 print 'Oops...funky email %s' % email
302 return [(nextyear, GetEmployer ('Funky'))]
303 s = namedom[1].split ('.')
304 for dots in range (len (s) - 2, -1, -1):
305 addr = '.'.join (s[dots:])
306 try:
307 return EmailToEmployer[addr]
308 except KeyError:
309 pass
311 # We don't know who they work for.
313 if unknown:
314 return [(nextyear, GetEmployer ('(Unknown)'))]
315 return [(nextyear, GetEmployer (email))]
318 def LookupEmployer (email, mapunknown = 0):
319 elist = MapToEmployer (email, mapunknown)
320 return elist # GetEmployer (ename)