4 # This code is part of the LWN git data miner.
6 # Copyright 2007-11 Eklektix, Inc.
7 # Copyright 2007-11 Jonathan Corbet <corbet@lwn.net>
9 # This file may be distributed under the terms of the GNU General
10 # Public License, version 2.
16 def __init__ (self
, name
, id, elist
, email
):
19 self
.employer
= [ elist
]
20 self
.email
= [ email
]
21 self
.changed
= self
.added
= self
.removed
= 0
27 self
.testcred
= self
.repcred
= 0
30 def addemail (self
, email
, elist
):
31 self
.email
.append (email
)
32 self
.employer
.append (elist
)
33 HackersByEmail
[email
] = self
35 def emailemployer (self
, email
, date
):
36 for i
in range (0, len (self
.email
)):
37 if self
.email
[i
] == email
:
38 for edate
, empl
in self
.employer
[i
]:
41 print 'OOPS. ', self
.name
, self
.employer
, self
.email
, email
, date
42 return None # Should not happen
44 def addpatch (self
, patch
):
45 self
.added
+= patch
.added
46 self
.removed
+= patch
.removed
47 self
.changed
+= max(patch
.added
, patch
.removed
)
48 self
.patches
.append (patch
)
51 # Note that the author is represented in this release.
53 def addversion (self
, release
):
54 if release
not in self
.versions
:
55 self
.versions
.append (release
)
57 # There's got to be a better way.
59 def addsob (self
, patch
):
60 self
.signoffs
.append (patch
)
61 def addreview (self
, patch
):
62 self
.reviews
.append (patch
)
63 def addtested (self
, patch
):
64 self
.tested
.append (patch
)
65 def addreport (self
, patch
):
66 self
.reports
.append (patch
)
68 def reportcredit (self
, patch
):
70 def testcredit (self
, patch
):
78 def StoreHacker (name
, elist
, email
):
83 h
= Hacker (name
, id, elist
, email
)
84 HackersByName
[name
] = h
85 HackersByEmail
[email
] = h
89 def LookupEmail (addr
):
91 return HackersByEmail
[addr
]
95 def LookupName (name
):
97 return HackersByName
[name
]
103 return HackersByID
[id]
107 def LookupStoreHacker(name
, email
, mapunknown
= True):
108 email
= RemapEmail(email
)
109 h
= LookupEmail(email
)
110 if h
: # already there
112 elist
= LookupEmployer(email
, mapunknown
)
115 h
.addemail(email
, elist
)
117 return StoreHacker(name
, elist
, email
)
121 return HackersByID
.values ()
124 out
= open ('database.dump', 'w')
125 names
= HackersByName
.keys ()
128 h
= HackersByName
[name
]
129 out
.write ('%4d %s %d p (+%d -%d) sob: %d\n' % (h
.id, h
.name
,
133 for i
in range (0, len (h
.email
)):
134 out
.write ('\t%s -> \n' % (h
.email
[i
]))
135 for date
, empl
in h
.employer
[i
]:
136 out
.write ('\t\t %d-%d-%d %s\n' % (date
.year
, date
.month
, date
.day
,
139 out
.write ('\tVersions: %s\n' % ','.join (h
.versions
))
142 # Hack: The first visible tag comes a ways into the stream; when we see it,
143 # push it backward through the changes we've already seen.
145 def ApplyFirstTag (tag
):
146 for n
in HackersByName
.keys ():
147 if HackersByName
[n
].versions
:
148 HackersByName
[n
].versions
= [tag
]
154 def __init__ (self
, name
):
156 self
.added
= self
.removed
= self
.count
= self
.changed
= 0
160 def AddCSet (self
, patch
):
161 self
.added
+= patch
.added
162 self
.removed
+= patch
.removed
163 self
.changed
+= max(patch
.added
, patch
.removed
)
165 if patch
.author
not in self
.hackers
:
166 self
.hackers
.append (patch
.author
)
173 def GetEmployer (name
):
175 return Employers
[name
]
182 return Employers
.values ()
185 # Certain obnoxious developers, who will remain nameless (because we
186 # would never want to run afoul of Thomas) want their work split among
187 # multiple companies. Let's try to cope with that. Let's also hope
188 # this doesn't spread.
190 class VirtualEmployer (Employer
):
191 def __init__ (self
, name
):
192 Employer
.__init
__ (self
, name
)
195 def addsplit (self
, name
, fraction
):
196 self
.splits
.append ((name
, fraction
))
199 # Go through and (destructively) apply our credits to the
200 # real employer. Only one level of weirdness is supported.
202 def applysplits (self
):
203 for name
, fraction
in self
.splits
:
204 real
= GetEmployer (name
)
205 real
.added
+= int (self
.added
*fraction
)
206 real
.removed
+= int (self
.removed
*fraction
)
207 real
.changed
+= int (self
.changed
*fraction
)
208 real
.count
+= int (self
.count
*fraction
)
209 self
.__init
__ (name
) # Reset counts just in case
212 if Employers
.has_key (self
.name
):
213 print Employers
[self
.name
]
214 sys
.stderr
.write ('WARNING: Virtual empl %s overwrites another\n'
216 if len (self
.splits
) == 0:
217 sys
.stderr
.write ('WARNING: Virtual empl %s has no splits\n'
219 # Should check that they add up too, but I'm lazy
220 Employers
[self
.name
] = self
223 def __init__ (self
, patterns
={}, order
=[]):
224 self
.patterns
= patterns
227 def guess_file_type (self
, filename
, patterns
=None, order
=None):
228 patterns
= patterns
or self
.patterns
229 order
= order
or self
.order
231 for file_type
in order
:
232 if patterns
.has_key (file_type
):
233 for patt
in patterns
[file_type
]:
234 if patt
.search (filename
):
240 # By default we recognize nothing.
242 FileTypes
= FileType ({}, [])
245 # Mix all the virtual employers into their real destinations.
248 for empl
in AllEmployers ():
249 if isinstance (empl
, VirtualEmployer
):
257 def AddEmailAlias (variant
, canonical
):
258 if EmailAliases
.has_key (variant
):
259 sys
.stderr
.write ('Duplicate email alias for %s\n' % (variant
))
260 EmailAliases
[variant
] = canonical
262 def RemapEmail (email
):
263 email
= email
.lower ()
265 return EmailAliases
[email
]
270 # Email-to-employer mapping.
272 EmailToEmployer
= { }
273 nextyear
= datetime
.date
.today () + datetime
.timedelta (days
= 365)
275 def AddEmailEmployerMapping (email
, employer
, end
= nextyear
):
278 email
= email
.lower ()
279 empl
= GetEmployer (employer
)
281 l
= EmailToEmployer
[email
]
282 for i
in range (0, len(l
)):
284 if date
== end
: # probably both nextyear
285 print 'WARNING: duplicate email/empl for %s' % (email
)
287 l
.insert (i
, (end
, empl
))
289 l
.append ((end
, empl
))
291 EmailToEmployer
[email
] = [(end
, empl
)]
293 def MapToEmployer (email
, unknown
= 0):
294 # Somebody sometimes does s/@/ at /; let's fix it.
295 email
= email
.lower ().replace (' at ', '@')
297 return EmailToEmployer
[email
]
300 namedom
= email
.split ('@')
301 if len (namedom
) < 2:
302 print 'Oops...funky email %s' % email
303 return [(nextyear
, GetEmployer ('Funky'))]
304 s
= namedom
[1].split ('.')
305 for dots
in range (len (s
) - 2, -1, -1):
306 addr
= '.'.join (s
[dots
:])
308 return EmailToEmployer
[addr
]
312 # We don't know who they work for.
315 return [(nextyear
, GetEmployer ('(Unknown)'))]
316 return [(nextyear
, GetEmployer (email
))]
319 def LookupEmployer (email
, mapunknown
= 0):
320 elist
= MapToEmployer (email
, mapunknown
)
321 return elist
# GetEmployer (ename)