Make tag matching stricter
[git-dm.git] / database.py
blobf15cc7a802e8ef3f17c47049744d678350c327ba
2 # The "database".
4 # This code is part of the LWN git data miner.
6 # Copyright 2007-8 LWN.net
7 # Copyright 2007-8 Jonathan Corbet <corbet@lwn.net>
9 # This file may be distributed under the terms of the GNU General
10 # Public License, version 2.
12 import sys, datetime
15 class Hacker:
16 def __init__ (self, name, id, elist, email):
17 self.name = name
18 self.id = id
19 self.employer = [ elist ]
20 self.email = [ email ]
21 self.added = self.removed = 0
22 self.patches = [ ]
23 self.signoffs = [ ]
24 self.reviews = [ ]
25 self.tested = [ ]
26 self.reports = [ ]
27 self.testcred = self.repcred = 0
29 def addemail (self, email, elist):
30 self.email.append (email)
31 self.employer.append (elist)
32 HackersByEmail[email] = self
34 def emailemployer (self, email, date):
35 for i in range (0, len (self.email)):
36 if self.email[i] == email:
37 for edate, empl in self.employer[i]:
38 if edate > date:
39 return empl
40 print 'OOPS. ', self.name, self.employer, self.email, email, date
41 return None # Should not happen
43 def addpatch (self, patch):
44 self.added += patch.added
45 self.removed += patch.removed
46 self.patches.append (patch)
49 # There's got to be a better way.
51 def addsob (self, patch):
52 self.signoffs.append (patch)
53 def addreview (self, patch):
54 self.reviews.append (patch)
55 def addtested (self, patch):
56 self.tested.append (patch)
57 def addreport (self, patch):
58 self.reports.append (patch)
60 def reportcredit (self, patch):
61 self.repcred += 1
62 def testcredit (self, patch):
63 self.testcred += 1
65 HackersByName = { }
66 HackersByEmail = { }
67 HackersByID = { }
68 MaxID = 0
70 def StoreHacker (name, elist, email):
71 global MaxID
73 id = MaxID
74 MaxID += 1
75 h = Hacker (name, id, elist, email)
76 HackersByName[name] = h
77 HackersByEmail[email] = h
78 HackersByID[id] = h
79 return h
81 def LookupEmail (addr):
82 try:
83 return HackersByEmail[addr]
84 except KeyError:
85 return None
87 def LookupName (name):
88 try:
89 return HackersByName[name]
90 except KeyError:
91 return None
93 def LookupID (id):
94 try:
95 return HackersByID[id]
96 except KeyError:
97 return None
99 def AllHackers ():
100 return HackersByID.values ()
101 # return [h for h in HackersByID.values ()] # if (h.added + h.removed) > 0]
103 def DumpDB ():
104 out = open ('database.dump', 'w')
105 names = HackersByName.keys ()
106 names.sort ()
107 for name in names:
108 h = HackersByName[name]
109 out.write ('%4d %s %d p (+%d -%d) sob: %d\n' % (h.id, h.name,
110 len (h.patches),
111 h.added, h.removed,
112 len (h.signoffs)))
113 for i in range (0, len (h.email)):
114 out.write ('\t%s -> \n' % (h.email[i]))
115 for date, empl in h.employer[i]:
116 out.write ('\t\t %d-%d-%d %s\n' % (date.year, date.month, date.day,
117 empl.name))
120 # Employer info.
122 class Employer:
123 def __init__ (self, name):
124 self.name = name
125 self.added = self.removed = self.count = self.changed = 0
126 self.sobs = 0
127 self.hackers = [ ]
129 def AddCSet (self, patch):
130 self.added += patch.added
131 self.removed += patch.removed
132 self.changed += max(patch.added, patch.removed)
133 self.count += 1
134 if patch.author not in self.hackers:
135 self.hackers.append (patch.author)
137 def AddSOB (self):
138 self.sobs += 1
140 Employers = { }
142 def GetEmployer (name):
143 try:
144 return Employers[name]
145 except KeyError:
146 e = Employer (name)
147 Employers[name] = e
148 return e
150 def AllEmployers ():
151 return Employers.values ()
154 # The email map.
156 EmailAliases = { }
158 def AddEmailAlias (variant, canonical):
159 if EmailAliases.has_key (variant):
160 sys.stderr.write ('Duplicate email alias for %s\n' % (variant))
161 EmailAliases[variant] = canonical
163 def RemapEmail (email):
164 email = email.lower ()
165 try:
166 return EmailAliases[email]
167 except KeyError:
168 return email
171 # Email-to-employer mapping.
173 EmailToEmployer = { }
174 nextyear = datetime.date.today () + datetime.timedelta (days = 365)
176 def AddEmailEmployerMapping (email, employer, end = nextyear):
177 if end is None:
178 end = nextyear
179 email = email.lower ()
180 empl = GetEmployer (employer)
181 try:
182 l = EmailToEmployer[email]
183 for i in range (0, len(l)):
184 date, xempl = l[i]
185 if date == end: # probably both nextyear
186 print 'WARNING: duplicate email/empl for %s' % (email)
187 if date > end:
188 l.insert (i, (end, empl))
189 return
190 l.append ((end, empl))
191 except KeyError:
192 EmailToEmployer[email] = [(end, empl)]
194 def MapToEmployer (email, unknown = 0):
195 # Somebody sometimes does s/@/ at /; let's fix it.
196 email = email.lower ().replace (' at ', '@')
197 try:
198 return EmailToEmployer[email]
199 except KeyError:
200 pass
201 namedom = email.split ('@')
202 if len (namedom) < 2:
203 print 'Oops...funky email %s' % email
204 return [(nextyear, GetEmployer ('Funky'))]
205 s = namedom[1].split ('.')
206 for dots in range (len (s) - 2, -1, -1):
207 addr = '.'.join (s[dots:])
208 try:
209 return EmailToEmployer[addr]
210 except KeyError:
211 pass
212 if unknown:
213 return [(nextyear, GetEmployer ('(Unknown)'))]
214 return [(nextyear, GetEmployer (email))]
217 def LookupEmployer (email, mapunknown = 0):
218 elist = MapToEmployer (email, mapunknown)
219 return elist # GetEmployer (ename)