4 # This code is part of the LWN git data miner.
6 # Copyright 2007-11 Eklektix, Inc.
7 # Copyright 2007-11 Jonathan Corbet <corbet@lwn.net>
9 # This file may be distributed under the terms of the GNU General
10 # Public License, version 2.
16 def __init__(self
, name
, id, elist
, email
):
19 self
.employer
= [ elist
]
20 self
.email
= [ email
]
21 self
.changed
= self
.added
= self
.removed
= 0
27 self
.testcred
= self
.repcred
= 0
28 self
.activity_start
= datetime
.date
.max
29 self
.activity_end
= datetime
.date
.min
32 def addemail(self
, email
, elist
):
33 self
.email
.append(email
)
34 self
.employer
.append(elist
)
35 HackersByEmail
[email
] = self
37 def emailemployer(self
, email
, date
):
38 for i
in range(0, len(self
.email
)):
39 if (email
is None) or (self
.email
[i
] == email
):
40 for edate
, empl
in self
.employer
[i
]:
43 print('OOPS. ', self
.name
, self
.employer
, self
.email
, email
, date
)
44 return None # Should not happen
46 def addpatch(self
, patch
):
47 self
.added
+= patch
.added
48 self
.removed
+= patch
.removed
49 self
.changed
+= max(patch
.added
, patch
.removed
)
50 self
.patches
.append(patch
)
51 if patch
.date
< self
.activity_start
:
52 self
.activity_start
= patch
.date
53 if patch
.date
> self
.activity_end
:
54 self
.activity_end
= patch
.date
57 # Note that the author is represented in this release.
59 def addversion(self
, release
):
60 if release
not in self
.versions
:
61 self
.versions
.append(release
)
63 # There's got to be a better way.
65 def addsob(self
, patch
):
66 self
.signoffs
.append(patch
)
67 def addreview(self
, patch
):
68 self
.reviews
.append(patch
)
69 def addtested(self
, patch
):
70 self
.tested
.append(patch
)
71 def addreport(self
, patch
):
72 self
.reports
.append(patch
)
74 def reportcredit(self
, patch
):
76 def testcredit(self
, patch
):
84 def StoreHacker(name
, elist
, email
):
89 h
= Hacker(name
, id, elist
, email
)
90 HackersByName
[name
] = h
91 HackersByEmail
[email
] = h
95 def LookupEmail(addr
):
97 return HackersByEmail
[addr
]
101 def LookupName(name
):
103 return HackersByName
[name
]
109 return HackersByID
[id]
113 def LookupStoreHacker(name
, email
, mapunknown
= True):
115 # See if we already know about this email address.
117 email
= RemapEmail(email
)
118 h
= LookupEmail(email
)
119 if h
: # already there
122 # OK, see if we can map an employer to the domain, and try a
125 elist
= LookupEmployer(email
, mapunknown
)
128 h
.addemail(email
, elist
)
131 # Something new, remember it.
133 return StoreHacker(name
, elist
, email
)
137 return list(HackersByID
.values())
140 out
= open('database.dump', 'w')
141 names
= sorted(HackersByName
)
143 h
= HackersByName
[name
]
144 out
.write('%4d %s %d p (+%d -%d) sob: %d\n' % (h
.id, h
.name
,
148 for i
in range(0, len(h
.email
)):
149 out
.write('\t%s -> \n' % (h
.email
[i
]))
150 for date
, empl
in h
.employer
[i
]:
151 out
.write('\t\t %d-%d-%d %s\n' % (date
.year
, date
.month
, date
.day
,
154 out
.write('\tVersions: %s\n' % ','.join(h
.versions
))
157 # Hack: The first visible tag comes a ways into the stream; when we see it,
158 # push it backward through the changes we've already seen.
160 def ApplyFirstTag(tag
):
161 for n
in HackersByName
.keys():
162 if HackersByName
[n
].versions
:
163 HackersByName
[n
].versions
= [tag
]
169 def __init__(self
, name
):
171 self
.added
= self
.removed
= self
.count
= self
.changed
= 0
175 def AddCSet(self
, patch
):
176 self
.added
+= patch
.added
177 self
.removed
+= patch
.removed
178 self
.changed
+= max(patch
.added
, patch
.removed
)
180 if patch
.author
not in self
.hackers
:
181 self
.hackers
.append(patch
.author
)
188 def GetEmployer(name
):
190 return Employers
[name
]
197 return list(Employers
.values())
200 # Certain obnoxious developers, who will remain nameless (because we
201 # would never want to run afoul of Thomas) want their work split among
202 # multiple companies. Let's try to cope with that. Let's also hope
203 # this doesn't spread.
205 class VirtualEmployer(Employer
):
206 def __init__(self
, name
):
207 Employer
.__init
__(self
, name
)
210 def addsplit(self
, name
, fraction
):
211 self
.splits
.append((name
, fraction
))
214 # Go through and (destructively) apply our credits to the
215 # real employer. Only one level of weirdness is supported.
217 def applysplits(self
):
218 for name
, fraction
in self
.splits
:
219 real
= GetEmployer(name
)
220 real
.added
+= int(self
.added
*fraction
)
221 real
.removed
+= int(self
.removed
*fraction
)
222 real
.changed
+= int(self
.changed
*fraction
)
223 real
.count
+= int(self
.count
*fraction
)
224 self
.__init
__(name
) # Reset counts just in case
227 if self
.name
in Employers
:
228 print(Employers
[self
.name
])
229 print('WARNING: Virtual empl %s overwrites another' % (self
.name
),
231 if len(self
.splits
) == 0:
232 print('WARNING: Virtual empl %s has no splits' % (self
.name
),
234 # Should check that they add up too, but I'm lazy
235 Employers
[self
.name
] = self
238 def __init__(self
, patterns
={}, order
=[]):
239 self
.patterns
= patterns
242 def guess_file_type(self
, filename
, patterns
=None, order
=None):
243 patterns
= patterns
or self
.patterns
244 order
= order
or self
.order
246 for file_type
in order
:
247 if file_type
in patterns
:
248 for patt
in patterns
[file_type
]:
249 if patt
.search(filename
):
255 # By default we recognize nothing.
257 FileTypes
= FileType({}, [])
260 # Mix all the virtual employers into their real destinations.
263 for empl
in AllEmployers():
264 if isinstance(empl
, VirtualEmployer
):
273 def AddEmailAlias(variant
, canonical
):
274 if variant
in EmailAliases
:
275 sys
.stderr
.write('Duplicate email alias for %s\n' % (variant
))
276 EmailAliases
[variant
] = canonical
278 def RemapEmail(email
):
279 email
= email
.lower()
281 return EmailAliases
[email
]
283 return RXRemapEmail(email
)
285 def AddRXEmailAlias(regex
, canonical
):
286 RXEmailAliases
.append((regex
, canonical
))
288 def RXRemapEmail(email
):
289 for regex
, canonical
in RXEmailAliases
:
290 if regex
.match(email
):
295 # Email-to-employer mapping.
297 EmailToEmployer
= { }
298 nextyear
= datetime
.date
.today() + datetime
.timedelta(days
= 365)
300 def AddEmailEmployerMapping(email
, employer
, end
= nextyear
):
303 email
= email
.lower()
304 empl
= GetEmployer(employer
)
306 l
= EmailToEmployer
[email
]
307 for i
in range(0, len(l
)):
309 if date
== end
: # probably both nextyear
310 print('WARNING: duplicate email/empl for %s' % (email
))
312 l
.insert(i
,(end
, empl
))
314 l
.append((end
, empl
))
316 EmailToEmployer
[email
] = [(end
, empl
)]
318 def MapToEmployer(email
, unknown
= 0):
319 # Somebody sometimes does s/@/ at /; let's fix it.
320 email
= email
.lower().replace(' at ', '@')
322 return EmailToEmployer
[email
]
325 namedom
= email
.split('@')
327 print('Oops...funky email %s' % email
)
328 return [(nextyear
, GetEmployer('Funky'))]
329 s
= namedom
[1].split('.')
330 for dots
in range(len(s
) - 2, -1, -1):
331 addr
= '.'.join(s
[dots
:])
333 return EmailToEmployer
[addr
]
337 # We don't know who they work for.
340 return [(nextyear
, GetEmployer('(Unknown)'))]
341 return [(nextyear
, GetEmployer(email
))]
344 def LookupEmployer(email
, mapunknown
= 0):
345 elist
= MapToEmployer(email
, mapunknown
)
346 return elist
# GetEmployer(ename)
349 # Make sure aliases don't mask other entries.
352 for email
in EmailToEmployer
:
353 remapped
= RemapEmail(email
)
354 if email
!= remapped
:
355 print(f
'WARNING: {email} is masked by an alias entry ({remapped})')