Hacky stuff for 4.0
[git-dm.git] / database.py
blobcb242c1466dc8b4f7067e01940a6ccc1da9ac56a
2 # The "database".
4 # This code is part of the LWN git data miner.
6 # Copyright 2007-11 Eklektix, Inc.
7 # Copyright 2007-11 Jonathan Corbet <corbet@lwn.net>
9 # This file may be distributed under the terms of the GNU General
10 # Public License, version 2.
12 import sys, datetime
15 class Hacker:
16 def __init__ (self, name, id, elist, email):
17 self.name = name
18 self.id = id
19 self.employer = [ elist ]
20 self.email = [ email ]
21 self.changed = self.added = self.removed = 0
22 self.patches = [ ]
23 self.signoffs = [ ]
24 self.reviews = [ ]
25 self.tested = [ ]
26 self.reports = [ ]
27 self.testcred = self.repcred = 0
28 self.versions = [ ]
30 def addemail (self, email, elist):
31 self.email.append (email)
32 self.employer.append (elist)
33 HackersByEmail[email] = self
35 def emailemployer (self, email, date):
36 for i in range (0, len (self.email)):
37 if self.email[i] == email:
38 for edate, empl in self.employer[i]:
39 if edate > date:
40 return empl
41 print 'OOPS. ', self.name, self.employer, self.email, email, date
42 return None # Should not happen
44 def addpatch (self, patch):
45 self.added += patch.added
46 self.removed += patch.removed
47 self.changed += max(patch.added, patch.removed)
48 self.patches.append (patch)
51 # Note that the author is represented in this release.
53 def addversion (self, release):
54 if release not in self.versions:
55 self.versions.append (release)
57 # There's got to be a better way.
59 def addsob (self, patch):
60 self.signoffs.append (patch)
61 def addreview (self, patch):
62 self.reviews.append (patch)
63 def addtested (self, patch):
64 self.tested.append (patch)
65 def addreport (self, patch):
66 self.reports.append (patch)
68 def reportcredit (self, patch):
69 self.repcred += 1
70 def testcredit (self, patch):
71 self.testcred += 1
73 HackersByName = { }
74 HackersByEmail = { }
75 HackersByID = { }
76 MaxID = 0
78 def StoreHacker (name, elist, email):
79 global MaxID
81 id = MaxID
82 MaxID += 1
83 h = Hacker (name, id, elist, email)
84 HackersByName[name] = h
85 HackersByEmail[email] = h
86 HackersByID[id] = h
87 return h
89 def LookupEmail (addr):
90 try:
91 return HackersByEmail[addr]
92 except KeyError:
93 return None
95 def LookupName (name):
96 try:
97 return HackersByName[name]
98 except KeyError:
99 return None
101 def LookupID (id):
102 try:
103 return HackersByID[id]
104 except KeyError:
105 return None
107 def LookupStoreHacker(name, email, mapunknown = True):
108 email = RemapEmail(email)
109 h = LookupEmail(email)
110 if h: # already there
111 return h
112 elist = LookupEmployer(email, mapunknown)
113 h = LookupName(name)
114 if h: # new email
115 h.addemail(email, elist)
116 return h
117 return StoreHacker(name, elist, email)
120 def AllHackers ():
121 return HackersByID.values ()
123 def DumpDB ():
124 out = open ('database.dump', 'w')
125 names = HackersByName.keys ()
126 names.sort ()
127 for name in names:
128 h = HackersByName[name]
129 out.write ('%4d %s %d p (+%d -%d) sob: %d\n' % (h.id, h.name,
130 len (h.patches),
131 h.added, h.removed,
132 len (h.signoffs)))
133 for i in range (0, len (h.email)):
134 out.write ('\t%s -> \n' % (h.email[i]))
135 for date, empl in h.employer[i]:
136 out.write ('\t\t %d-%d-%d %s\n' % (date.year, date.month, date.day,
137 empl.name))
138 if h.versions:
139 out.write ('\tVersions: %s\n' % ','.join (h.versions))
142 # Hack: The first visible tag comes a ways into the stream; when we see it,
143 # push it backward through the changes we've already seen.
145 def ApplyFirstTag (tag):
146 for n in HackersByName.keys ():
147 if HackersByName[n].versions:
148 HackersByName[n].versions = [tag]
151 # Employer info.
153 class Employer:
154 def __init__ (self, name):
155 self.name = name
156 self.added = self.removed = self.count = self.changed = 0
157 self.sobs = 0
158 self.hackers = [ ]
160 def AddCSet (self, patch):
161 self.added += patch.added
162 self.removed += patch.removed
163 self.changed += max(patch.added, patch.removed)
164 self.count += 1
165 if patch.author not in self.hackers:
166 self.hackers.append (patch.author)
168 def AddSOB (self):
169 self.sobs += 1
171 Employers = { }
173 def GetEmployer (name):
174 try:
175 return Employers[name]
176 except KeyError:
177 e = Employer (name)
178 Employers[name] = e
179 return e
181 def AllEmployers ():
182 return Employers.values ()
185 # Certain obnoxious developers, who will remain nameless (because we
186 # would never want to run afoul of Thomas) want their work split among
187 # multiple companies. Let's try to cope with that. Let's also hope
188 # this doesn't spread.
190 class VirtualEmployer (Employer):
191 def __init__ (self, name):
192 Employer.__init__ (self, name)
193 self.splits = [ ]
195 def addsplit (self, name, fraction):
196 self.splits.append ((name, fraction))
199 # Go through and (destructively) apply our credits to the
200 # real employer. Only one level of weirdness is supported.
202 def applysplits (self):
203 for name, fraction in self.splits:
204 real = GetEmployer (name)
205 real.added += int (self.added*fraction)
206 real.removed += int (self.removed*fraction)
207 real.changed += int (self.changed*fraction)
208 real.count += int (self.count*fraction)
209 self.__init__ (name) # Reset counts just in case
211 def store (self):
212 if Employers.has_key (self.name):
213 print Employers[self.name]
214 sys.stderr.write ('WARNING: Virtual empl %s overwrites another\n'
215 % (self.name))
216 if len (self.splits) == 0:
217 sys.stderr.write ('WARNING: Virtual empl %s has no splits\n'
218 % (self.name))
219 # Should check that they add up too, but I'm lazy
220 Employers[self.name] = self
222 class FileType:
223 def __init__ (self, patterns={}, order=[]):
224 self.patterns = patterns
225 self.order = order
227 def guess_file_type (self, filename, patterns=None, order=None):
228 patterns = patterns or self.patterns
229 order = order or self.order
231 for file_type in order:
232 if patterns.has_key (file_type):
233 for patt in patterns[file_type]:
234 if patt.search (filename):
235 return file_type
237 return 'unknown'
240 # By default we recognize nothing.
242 FileTypes = FileType ({}, [])
245 # Mix all the virtual employers into their real destinations.
247 def MixVirtuals ():
248 for empl in AllEmployers ():
249 if isinstance (empl, VirtualEmployer):
250 empl.applysplits ()
253 # The email map.
255 EmailAliases = { }
257 def AddEmailAlias (variant, canonical):
258 if EmailAliases.has_key (variant):
259 sys.stderr.write ('Duplicate email alias for %s\n' % (variant))
260 EmailAliases[variant] = canonical
262 def RemapEmail (email):
263 email = email.lower ()
264 try:
265 return EmailAliases[email]
266 except KeyError:
267 return email
270 # Email-to-employer mapping.
272 EmailToEmployer = { }
273 nextyear = datetime.date.today () + datetime.timedelta (days = 365)
275 def AddEmailEmployerMapping (email, employer, end = nextyear):
276 if end is None:
277 end = nextyear
278 email = email.lower ()
279 empl = GetEmployer (employer)
280 try:
281 l = EmailToEmployer[email]
282 for i in range (0, len(l)):
283 date, xempl = l[i]
284 if date == end: # probably both nextyear
285 print 'WARNING: duplicate email/empl for %s' % (email)
286 if date > end:
287 l.insert (i, (end, empl))
288 return
289 l.append ((end, empl))
290 except KeyError:
291 EmailToEmployer[email] = [(end, empl)]
293 def MapToEmployer (email, unknown = 0):
294 # Somebody sometimes does s/@/ at /; let's fix it.
295 email = email.lower ().replace (' at ', '@')
296 try:
297 return EmailToEmployer[email]
298 except KeyError:
299 pass
300 namedom = email.split ('@')
301 if len (namedom) < 2:
302 print 'Oops...funky email %s' % email
303 return [(nextyear, GetEmployer ('Funky'))]
304 s = namedom[1].split ('.')
305 for dots in range (len (s) - 2, -1, -1):
306 addr = '.'.join (s[dots:])
307 try:
308 return EmailToEmployer[addr]
309 except KeyError:
310 pass
312 # We don't know who they work for.
314 if unknown:
315 return [(nextyear, GetEmployer ('(Unknown)'))]
316 return [(nextyear, GetEmployer (email))]
319 def LookupEmployer (email, mapunknown = 0):
320 elist = MapToEmployer (email, mapunknown)
321 return elist # GetEmployer (ename)