4 # This code is part of the LWN git data miner.
6 # Copyright 2007-11 Eklektix, Inc.
7 # Copyright 2007-11 Jonathan Corbet <corbet@lwn.net>
9 # This file may be distributed under the terms of the GNU General
10 # Public License, version 2.
16 def __init__ (self
, name
, id, elist
, email
):
19 self
.employer
= [ elist
]
20 self
.email
= [ email
]
21 self
.added
= self
.removed
= 0
27 self
.testcred
= self
.repcred
= 0
30 def addemail (self
, email
, elist
):
31 self
.email
.append (email
)
32 self
.employer
.append (elist
)
33 HackersByEmail
[email
] = self
35 def emailemployer (self
, email
, date
):
36 for i
in range (0, len (self
.email
)):
37 if self
.email
[i
] == email
:
38 for edate
, empl
in self
.employer
[i
]:
41 print 'OOPS. ', self
.name
, self
.employer
, self
.email
, email
, date
42 return None # Should not happen
44 def addpatch (self
, patch
):
45 self
.added
+= patch
.added
46 self
.removed
+= patch
.removed
47 self
.patches
.append (patch
)
50 # Note that the author is represented in this release.
52 def addversion (self
, release
):
53 if release
not in self
.versions
:
54 self
.versions
.append (release
)
56 # There's got to be a better way.
58 def addsob (self
, patch
):
59 self
.signoffs
.append (patch
)
60 def addreview (self
, patch
):
61 self
.reviews
.append (patch
)
62 def addtested (self
, patch
):
63 self
.tested
.append (patch
)
64 def addreport (self
, patch
):
65 self
.reports
.append (patch
)
67 def reportcredit (self
, patch
):
69 def testcredit (self
, patch
):
77 def StoreHacker (name
, elist
, email
):
82 h
= Hacker (name
, id, elist
, email
)
83 HackersByName
[name
] = h
84 HackersByEmail
[email
] = h
88 def LookupEmail (addr
):
90 return HackersByEmail
[addr
]
94 def LookupName (name
):
96 return HackersByName
[name
]
102 return HackersByID
[id]
106 def LookupStoreHacker(name
, email
, mapunknown
= True):
107 email
= RemapEmail(email
)
108 h
= LookupEmail(email
)
109 if h
: # already there
111 elist
= LookupEmployer(email
, mapunknown
)
114 h
.addemail(email
, elist
)
116 return StoreHacker(name
, elist
, email
)
120 return HackersByID
.values ()
123 out
= open ('database.dump', 'w')
124 names
= HackersByName
.keys ()
127 h
= HackersByName
[name
]
128 out
.write ('%4d %s %d p (+%d -%d) sob: %d\n' % (h
.id, h
.name
,
132 for i
in range (0, len (h
.email
)):
133 out
.write ('\t%s -> \n' % (h
.email
[i
]))
134 for date
, empl
in h
.employer
[i
]:
135 out
.write ('\t\t %d-%d-%d %s\n' % (date
.year
, date
.month
, date
.day
,
138 out
.write ('\tVersions: %s\n' % ','.join (h
.versions
))
141 # Hack: The first visible tag comes a ways into the stream; when we see it,
142 # push it backward through the changes we've already seen.
144 def ApplyFirstTag (tag
):
145 for n
in HackersByName
.keys ():
146 if HackersByName
[n
].versions
:
147 HackersByName
[n
].versions
= [tag
]
153 def __init__ (self
, name
):
155 self
.added
= self
.removed
= self
.count
= self
.changed
= 0
159 def AddCSet (self
, patch
):
160 self
.added
+= patch
.added
161 self
.removed
+= patch
.removed
162 self
.changed
+= max(patch
.added
, patch
.removed
)
164 if patch
.author
not in self
.hackers
:
165 self
.hackers
.append (patch
.author
)
172 def GetEmployer (name
):
174 return Employers
[name
]
181 return Employers
.values ()
184 # Certain obnoxious developers, who will remain nameless (because we
185 # would never want to run afoul of Thomas) want their work split among
186 # multiple companies. Let's try to cope with that. Let's also hope
187 # this doesn't spread.
189 class VirtualEmployer (Employer
):
190 def __init__ (self
, name
):
191 Employer
.__init
__ (self
, name
)
194 def addsplit (self
, name
, fraction
):
195 self
.splits
.append ((name
, fraction
))
198 # Go through and (destructively) apply our credits to the
199 # real employer. Only one level of weirdness is supported.
201 def applysplits (self
):
202 for name
, fraction
in self
.splits
:
203 real
= GetEmployer (name
)
204 real
.added
+= int (self
.added
*fraction
)
205 real
.removed
+= int (self
.removed
*fraction
)
206 real
.changed
+= int (self
.changed
*fraction
)
207 real
.count
+= int (self
.count
*fraction
)
208 self
.__init
__ (name
) # Reset counts just in case
211 if Employers
.has_key (self
.name
):
212 print Employers
[self
.name
]
213 sys
.stderr
.write ('WARNING: Virtual empl %s overwrites another\n'
215 if len (self
.splits
) == 0:
216 sys
.stderr
.write ('WARNING: Virtual empl %s has no splits\n'
218 # Should check that they add up too, but I'm lazy
219 Employers
[self
.name
] = self
222 def __init__ (self
, patterns
={}, order
=[]):
223 self
.patterns
= patterns
226 def guess_file_type (self
, filename
, patterns
=None, order
=None):
227 patterns
= patterns
or self
.patterns
228 order
= order
or self
.order
230 for file_type
in order
:
231 if patterns
.has_key (file_type
):
232 for patt
in patterns
[file_type
]:
233 if patt
.search (filename
):
239 # By default we recognize nothing.
241 FileTypes
= FileType ({}, [])
244 # Mix all the virtual employers into their real destinations.
247 for empl
in AllEmployers ():
248 if isinstance (empl
, VirtualEmployer
):
256 def AddEmailAlias (variant
, canonical
):
257 if EmailAliases
.has_key (variant
):
258 sys
.stderr
.write ('Duplicate email alias for %s\n' % (variant
))
259 EmailAliases
[variant
] = canonical
261 def RemapEmail (email
):
262 email
= email
.lower ()
264 return EmailAliases
[email
]
269 # Email-to-employer mapping.
271 EmailToEmployer
= { }
272 nextyear
= datetime
.date
.today () + datetime
.timedelta (days
= 365)
274 def AddEmailEmployerMapping (email
, employer
, end
= nextyear
):
277 email
= email
.lower ()
278 empl
= GetEmployer (employer
)
280 l
= EmailToEmployer
[email
]
281 for i
in range (0, len(l
)):
283 if date
== end
: # probably both nextyear
284 print 'WARNING: duplicate email/empl for %s' % (email
)
286 l
.insert (i
, (end
, empl
))
288 l
.append ((end
, empl
))
290 EmailToEmployer
[email
] = [(end
, empl
)]
292 def MapToEmployer (email
, unknown
= 0):
293 # Somebody sometimes does s/@/ at /; let's fix it.
294 email
= email
.lower ().replace (' at ', '@')
296 return EmailToEmployer
[email
]
299 namedom
= email
.split ('@')
300 if len (namedom
) < 2:
301 print 'Oops...funky email %s' % email
302 return [(nextyear
, GetEmployer ('Funky'))]
303 s
= namedom
[1].split ('.')
304 for dots
in range (len (s
) - 2, -1, -1):
305 addr
= '.'.join (s
[dots
:])
307 return EmailToEmployer
[addr
]
311 # We don't know who they work for.
314 return [(nextyear
, GetEmployer ('(Unknown)'))]
315 return [(nextyear
, GetEmployer (email
))]
318 def LookupEmployer (email
, mapunknown
= 0):
319 elist
= MapToEmployer (email
, mapunknown
)
320 return elist
# GetEmployer (ename)