4 # This code is part of the LWN git data miner.
6 # Copyright 2007-11 Eklektix, Inc.
7 # Copyright 2007-11 Jonathan Corbet <corbet@lwn.net>
9 # This file may be distributed under the terms of the GNU General
10 # Public License, version 2.
16 def __init__ (self
, name
, id, elist
, email
):
19 self
.employer
= [ elist
]
20 self
.email
= [ email
]
21 self
.changed
= self
.added
= self
.removed
= 0
27 self
.testcred
= self
.repcred
= 0
28 self
.activity_start
= datetime
.date
.max
29 self
.activity_end
= datetime
.date
.min
32 def addemail (self
, email
, elist
):
33 self
.email
.append (email
)
34 self
.employer
.append (elist
)
35 HackersByEmail
[email
] = self
37 def emailemployer (self
, email
, date
):
38 for i
in range (0, len (self
.email
)):
39 if (email
is None) or (self
.email
[i
] == email
):
40 for edate
, empl
in self
.employer
[i
]:
43 print 'OOPS. ', self
.name
, self
.employer
, self
.email
, email
, date
44 return None # Should not happen
46 def addpatch (self
, patch
):
47 self
.added
+= patch
.added
48 self
.removed
+= patch
.removed
49 self
.changed
+= max(patch
.added
, patch
.removed
)
50 self
.patches
.append (patch
)
51 if patch
.date
< self
.activity_start
:
52 self
.activity_start
= patch
.date
53 if patch
.date
> self
.activity_end
:
54 self
.activity_end
= patch
.date
57 # Note that the author is represented in this release.
59 def addversion (self
, release
):
60 if release
not in self
.versions
:
61 self
.versions
.append (release
)
63 # There's got to be a better way.
65 def addsob (self
, patch
):
66 self
.signoffs
.append (patch
)
67 def addreview (self
, patch
):
68 self
.reviews
.append (patch
)
69 def addtested (self
, patch
):
70 self
.tested
.append (patch
)
71 def addreport (self
, patch
):
72 self
.reports
.append (patch
)
74 def reportcredit (self
, patch
):
76 def testcredit (self
, patch
):
84 def StoreHacker (name
, elist
, email
):
89 h
= Hacker (name
, id, elist
, email
)
90 HackersByName
[name
] = h
91 HackersByEmail
[email
] = h
95 def LookupEmail (addr
):
97 return HackersByEmail
[addr
]
101 def LookupName (name
):
103 return HackersByName
[name
]
109 return HackersByID
[id]
113 def LookupStoreHacker(name
, email
, mapunknown
= True):
114 email
= RemapEmail(email
)
115 h
= LookupEmail(email
)
116 if h
: # already there
118 elist
= LookupEmployer(email
, mapunknown
)
121 h
.addemail(email
, elist
)
123 return StoreHacker(name
, elist
, email
)
127 return HackersByID
.values ()
130 out
= open ('database.dump', 'w')
131 names
= HackersByName
.keys ()
134 h
= HackersByName
[name
]
135 out
.write ('%4d %s %d p (+%d -%d) sob: %d\n' % (h
.id, h
.name
,
139 for i
in range (0, len (h
.email
)):
140 out
.write ('\t%s -> \n' % (h
.email
[i
]))
141 for date
, empl
in h
.employer
[i
]:
142 out
.write ('\t\t %d-%d-%d %s\n' % (date
.year
, date
.month
, date
.day
,
145 out
.write ('\tVersions: %s\n' % ','.join (h
.versions
))
148 # Hack: The first visible tag comes a ways into the stream; when we see it,
149 # push it backward through the changes we've already seen.
151 def ApplyFirstTag (tag
):
152 for n
in HackersByName
.keys ():
153 if HackersByName
[n
].versions
:
154 HackersByName
[n
].versions
= [tag
]
160 def __init__ (self
, name
):
162 self
.added
= self
.removed
= self
.count
= self
.changed
= 0
166 def AddCSet (self
, patch
):
167 self
.added
+= patch
.added
168 self
.removed
+= patch
.removed
169 self
.changed
+= max(patch
.added
, patch
.removed
)
171 if patch
.author
not in self
.hackers
:
172 self
.hackers
.append (patch
.author
)
179 def GetEmployer (name
):
181 return Employers
[name
]
188 return Employers
.values ()
191 # Certain obnoxious developers, who will remain nameless (because we
192 # would never want to run afoul of Thomas) want their work split among
193 # multiple companies. Let's try to cope with that. Let's also hope
194 # this doesn't spread.
196 class VirtualEmployer (Employer
):
197 def __init__ (self
, name
):
198 Employer
.__init
__ (self
, name
)
201 def addsplit (self
, name
, fraction
):
202 self
.splits
.append ((name
, fraction
))
205 # Go through and (destructively) apply our credits to the
206 # real employer. Only one level of weirdness is supported.
208 def applysplits (self
):
209 for name
, fraction
in self
.splits
:
210 real
= GetEmployer (name
)
211 real
.added
+= int (self
.added
*fraction
)
212 real
.removed
+= int (self
.removed
*fraction
)
213 real
.changed
+= int (self
.changed
*fraction
)
214 real
.count
+= int (self
.count
*fraction
)
215 self
.__init
__ (name
) # Reset counts just in case
218 if Employers
.has_key (self
.name
):
219 print Employers
[self
.name
]
220 sys
.stderr
.write ('WARNING: Virtual empl %s overwrites another\n'
222 if len (self
.splits
) == 0:
223 sys
.stderr
.write ('WARNING: Virtual empl %s has no splits\n'
225 # Should check that they add up too, but I'm lazy
226 Employers
[self
.name
] = self
229 def __init__ (self
, patterns
={}, order
=[]):
230 self
.patterns
= patterns
233 def guess_file_type (self
, filename
, patterns
=None, order
=None):
234 patterns
= patterns
or self
.patterns
235 order
= order
or self
.order
237 for file_type
in order
:
238 if patterns
.has_key (file_type
):
239 for patt
in patterns
[file_type
]:
240 if patt
.search (filename
):
246 # By default we recognize nothing.
248 FileTypes
= FileType ({}, [])
251 # Mix all the virtual employers into their real destinations.
254 for empl
in AllEmployers ():
255 if isinstance (empl
, VirtualEmployer
):
263 def AddEmailAlias (variant
, canonical
):
264 if EmailAliases
.has_key (variant
):
265 sys
.stderr
.write ('Duplicate email alias for %s\n' % (variant
))
266 EmailAliases
[variant
] = canonical
268 def RemapEmail (email
):
269 email
= email
.lower ()
271 return EmailAliases
[email
]
276 # Email-to-employer mapping.
278 EmailToEmployer
= { }
279 nextyear
= datetime
.date
.today () + datetime
.timedelta (days
= 365)
281 def AddEmailEmployerMapping (email
, employer
, end
= nextyear
):
284 email
= email
.lower ()
285 empl
= GetEmployer (employer
)
287 l
= EmailToEmployer
[email
]
288 for i
in range (0, len(l
)):
290 if date
== end
: # probably both nextyear
291 print 'WARNING: duplicate email/empl for %s' % (email
)
293 l
.insert (i
, (end
, empl
))
295 l
.append ((end
, empl
))
297 EmailToEmployer
[email
] = [(end
, empl
)]
299 def MapToEmployer (email
, unknown
= 0):
300 # Somebody sometimes does s/@/ at /; let's fix it.
301 email
= email
.lower ().replace (' at ', '@')
303 return EmailToEmployer
[email
]
306 namedom
= email
.split ('@')
307 if len (namedom
) < 2:
308 print 'Oops...funky email %s' % email
309 return [(nextyear
, GetEmployer ('Funky'))]
310 s
= namedom
[1].split ('.')
311 for dots
in range (len (s
) - 2, -1, -1):
312 addr
= '.'.join (s
[dots
:])
314 return EmailToEmployer
[addr
]
318 # We don't know who they work for.
321 return [(nextyear
, GetEmployer ('(Unknown)'))]
322 return [(nextyear
, GetEmployer (email
))]
325 def LookupEmployer (email
, mapunknown
= 0):
326 elist
= MapToEmployer (email
, mapunknown
)
327 return elist
# GetEmployer (ename)