3 # Plow through the maintainers file and the repository to find
4 # unmaintained subsystems.
6 # Copyright 2020 Jonathan Corbet <corbet@lwn.net>
7 # distributable under the GNU General Public License v2
9 # Basic operation is as follows. Start by creating a processed MAINTAINERS
10 # database with something like:
12 # ./missingmaints analyze -\
13 # r /path/to/kernel/repo \
14 # -j 4 # Number of jobs to run simultaneously
15 # -g /path/to/aliases/file
16 # -o /processed/data # Where the output goes
18 # This will take a while.
22 # ./missingmaints dump [subystem ...] -l /path/to/processed/data
25 # -a to see subsystems with no listed maintainers
26 # -H to get output in HTML (useful for clicking on commits)
27 # --nf to include subsystems with no files
28 # -o to sort by oldest first
29 # --select=never to list subsystems w/no maint activity at all
30 # --select=nomaints to see subsystems with no listed maintainer
32 # Finally, look for subsystems with no maintainer activity but a lot of patches:
34 # ./missingmaints urgent -l /path/to/processed/data -r /path/to/repository
37 # -c <commits> # of commits since given ref
38 # --ref <release> # when to start counting commits
39 # -m <months> # minimum months since maintainer seen
42 import os, argparse, sys, re, subprocess, datetime, pickle
43 from concurrent.futures import ThreadPoolExecutor, as_completed
46 # email aliases management
50 def add_alias(addr1, addr2):
51 aliases = EmailAliases.get(addr1, [ addr1 ])
52 for alias in EmailAliases.get(addr2, [ addr2 ]):
53 if alias not in aliases:
56 EmailAliases[alias] = aliases
58 def get_aliases(email):
59 return EmailAliases.get(email, [ email ])
62 # Load a gitdm-style email aliases file
64 def load_gitdm_aliases(file):
65 with open(file, 'r') as f:
66 for line in f.readlines():
68 if not line or line[0] == '#':
71 add_alias(sline[0], sline[1])
74 # Load a kernel mailmap file.
76 mmap_alias = re.compile(r'^[^<]+<([^>]+)>\s+<([^>]+)>$')
78 def load_mailmap(file):
79 with open(file, 'r') as f:
80 for line in f.readlines():
81 m = mmap_alias.match(line)
83 add_alias(m.group(1), m.group(2))
89 # Manage a list of subsystems.
96 if (latest is None) or (d[role][1] > latest):
101 def __init__(self, name):
103 self.maints = [ ] # Don't really need this
106 self.status = 'unknown'
107 self.last_activity = None
109 def format_maint(self, maint):
110 minfo = self.mdata[maint]
112 return ' %s: (idle)' % (maint)
113 ret = [' ' + maint + ':']
115 ret.append(' Author %s %s' % minfo['author'])
116 if minfo['committer']:
117 ret.append(' Committer %s %s' % minfo['committer'])
119 ret.append(' Tags %s %s' % minfo['tags'])
120 return '\n'.join(ret)
123 ret = ['Subsystem %s' % (self.name)]
124 if not self.last_activity:
125 ret.append(' (No activity)')
127 ret.append(' Last activity: ' + self.last_activity.strftime('%Y-%m-%d'))
128 for maint in self.maints:
129 ret.append(self.format_maint(maint))
130 return '\n'.join(ret)
133 return self.__repr__()
135 def add_maintainer(self, maint):
136 self.maints.append(maint)
137 self.mdata[maint] = None
138 def store_minfo(self, maint, info):
139 self.mdata[maint] = info
141 latest = latest_act(info)
143 if (not self.last_activity) or latest > self.last_activity:
144 self.last_activity = latest
145 def add_file(self, file):
146 self.files.append(file)
147 def set_status(self, status):
151 # Management of the MAINTAINERS file.
155 def load_maintainers():
156 with open('MAINTAINERS', 'r') as mf:
158 # We "know" that 3c59x is the first entry in the file. That could
159 # change, but it's been that way for a long time :)
161 line = mf.readline().strip()
162 while (line is not None) and not line.startswith('3C59X'):
163 line = mf.readline().strip()
165 die("Bummer, couldn't find the first MAINTAINERS section")
167 # OK, soak everything up.
170 ss = load_subsystem(mf, line)
171 if ss.name == 'THE REST':
173 Subsystems[ss.name] = ss
174 line = mf.readline().strip()
175 while (line is not None) and (len(line) == 0):
176 line = mf.readline().strip()
177 print('Loaded %d subsystems' % len(Subsystems))
179 emailpat = re.compile(r'"?([^<]+)"? +<([^>]+)>')
180 def load_subsystem(mf, name):
182 line = mf.readline().strip()
185 pass # print('Funky line %s in %s' % (line, name))
188 value = line[2:].strip()
190 # Filter out mailing-list entries
191 m = emailpat.search(value)
193 ss.add_maintainer(value)
198 line = mf.readline().strip()
202 # Get info about a subsystem.
204 def get_subsys_info(subsys):
205 for m in subsys.maints:
206 subsys.store_minfo(m, lookup_maintainer(subsys, m, subsys.files))
207 print('Done:', subsys.name)
210 def get_all_subsys_info(jobs):
211 names = list(Subsystems.keys())
212 with ThreadPoolExecutor(max_workers = jobs) as tpe:
213 futures = [tpe.submit(get_subsys_info, Subsystems[name]) for name in names]
214 for future in futures:
219 # Look up what a maintainer has been doing.
221 def lookup_maintainer(subsys, maint, files):
222 m = emailpat.search(maint)
224 # print('Funky maintainer line:', subsys.name, maint)
227 # print('Subsys %s has no files' % subsys.name)
231 'author': git_search(files, alias_args('--author=%s', email)),
232 'committer': git_search(files, alias_args('--committer=%s', email), cdate = True),
233 'tags': git_search(files, alias_args('--grep=by:.*%s', email)),
236 def alias_args(arg, email):
237 return [ arg % (alias) for alias in get_aliases(email) ]
240 def decode_date(date):
241 return datetime.datetime.strptime(date, '%Y-%m-%d')
243 def git_search(files, tests, cdate = False):
244 command = ['git', 'log', '-1', '--pretty=format:%h %as %cs'] + tests + ['--'] + files
245 with subprocess.Popen(command, stdout = subprocess.PIPE) as p:
246 results = p.stdout.readline().decode('utf8')
250 commit, adate, cdate = results.strip().split()
252 return (commit, decode_date(cdate))
254 return (commit, decode_date(adate))
258 sys.stderr.write(string + '\n')
264 p = argparse.ArgumentParser()
265 subs = p.add_subparsers()
269 sp = subs.add_parser('analyze')
270 sp.add_argument('-g', '--gitdm-aliases', help = 'Load gitdm-style email aliases file',
272 sp.add_argument('-j', '--jobs', help = 'Number of threads to run', type = int,
274 sp.add_argument('-o', '--output', help = 'Name of output database file',
275 default = 'maintainers.pickle')
276 sp.add_argument('-r', '--repository', help = 'Repository location',
278 sp.add_argument('-s', '--subsystem', help = 'Look at this subsystem only',
280 sp.set_defaults(handler = cmd_analyze)
284 sp = subs.add_parser('dump')
285 sp.add_argument('subsys', nargs = '*')
286 sp.add_argument('-a', '--all', help = 'Dump maintainerless entries too',
287 action = 'store_true', default = False)
288 sp.add_argument('-H', '--html', help = 'Dump in HTML', action = 'store_true',
290 sp.add_argument('-l', '--load', help = 'Load data from pickle file',
291 default = 'maintainers.pickle')
292 sp.add_argument('--nf', help = 'Include subsystems with no files',
293 action = 'store_true', default = False)
294 sp.add_argument('-o', '--oldest', help = 'Sort oldest first', action = 'store_true',
296 sp.add_argument('-s', '--select', help = 'Filter for subsys to display',
297 choices = ['never', 'nomaints'], default = None)
298 sp.set_defaults(handler = cmd_dump)
300 # urgent - find unmaintained subsystems with activity
302 sp = subs.add_parser('urgent')
303 sp.add_argument('-c', '--commits', help = 'How many commits since ref',
304 default = 42, type = int)
305 sp.add_argument('-H', '--html', help = 'Dump in HTML', action = 'store_true',
307 sp.add_argument('-l', '--load', help = 'Load data from pickle file',
308 default = 'maintainers.pickle')
309 sp.add_argument('-m', '--months', type = int,
310 help = 'months of maint inactivity', default = 12)
311 sp.add_argument('--ref', help = 'Git ref to start patch count',
313 sp.add_argument('-r', '--repository', help = 'Repository location',
315 sp.set_defaults(handler = cmd_urgent)
316 return p.parse_args()
320 # Analyze the maintainers file.
322 def cmd_analyze(args):
324 with open(args.output, 'wb') as f:
326 f.write(pickle.dumps(Subsystems))
328 die(f'Unable to open output file {args.output}')
330 def do_analyze(args):
331 os.chdir(args.repository)
333 # Snag email alias information.
335 if args.gitdm_aliases:
336 load_gitdm_aliases(args.gitdm_aliases)
337 load_mailmap('.mailmap')
339 # Get the maintainers file, then crank.
342 print('Cranking all (%d subsystems, %d jobs)...go out for dinner...' %
343 (len(Subsystems), args.jobs))
344 get_all_subsys_info(args.jobs)
347 return Subsystems[s].last_activity or datetime.datetime(1990, 1, 1)
349 # Dump out some info.
351 dump_html_header = '''
353 <tr><th>Subsystem</th>
361 dump_html_footer = '</table>'
363 def dump_pdate(date):
365 return date.strftime("%Y-%m-%d")
369 return 'https://git.kernel.org/linus/' + commit
371 nameonly = re.compile(r'"?([^<"]+)"?\s+(<.*>)?')
372 def fixup_maint_name(name):
373 m = nameonly.match(name)
387 def dump_subsys_html(ss):
388 span = max(1, len(ss.maints))
390 print(f'''<tr class={rc}>
391 <td valign="top" rowspan={span}>{ss.name}</td>
392 <td valign="top" rowspan={span}>{dump_pdate(ss.last_activity)}</td>''')
394 print('<td>(no maintainers)</td><td colspan=3></td></tr>')
400 mi = { 'author': None, 'committer': None, 'tags': None}
401 print(f'\t{rowstart}<td valign="top">{fixup_maint_name(m)}</td>')
402 for type in ['author', 'committer', 'tags']:
404 commit, date = mi[type]
405 print(f'\t <td valign="top"><a href="{git_url(commit)}">{dump_pdate(date)}</a></td>')
407 print('\t <td valign="top">——</td>')
409 rowstart = f'<tr class={rc}>'
411 def load_pickle(pfile):
415 with open(pfile, 'rb') as f:
416 Subsystems = pickle.loads(f.read())
418 die(f'Unable to open pickle file {pfile}')
421 load_pickle(args.load)
422 subs = args.subsys or Subsystems.keys()
424 subs = [sub for sub in subs if Subsystems[sub].files]
425 if args.select == 'never':
426 subs = [sub for sub in subs if Subsystems[sub].last_activity is None]
427 elif args.select == 'nomaints':
428 subs = [sub for sub in subs if not Subsystems[sub].maints]
430 subs = sorted(subs, key = date_key)
432 print(dump_html_header)
437 die("No such subsystem: %s" % (sub))
438 if not (args.all or s.maints):
445 print(dump_html_footer)
448 # urgent - find unmaintained subsystems with activity
450 def unmaintained_for(subsys, delta):
451 if subsys.last_activity is None:
453 return (datetime.datetime.now() - subsys.last_activity) >= delta
455 def get_commit_count(subsys, ref):
456 cmd = ['git', 'log', '--oneline', f'{ref}..', '--'] + subsys.files
457 with subprocess.Popen(cmd, stdout = subprocess.PIPE) as p:
459 for line in p.stdout.readlines():
464 def cmd_urgent(args):
465 os.chdir(args.repository)
466 load_pickle(args.load)
468 # Get the list of unmaintained subsystems.
470 delta = datetime.timedelta(days = args.months*30)
471 subs = [sub for sub in Subsystems.keys()
472 if (Subsystems[sub].files and
473 unmaintained_for(Subsystems[sub], delta))]
475 # Now, for each one, see how many patches exist during the ref period.
478 print('<table class="OddEven">')
479 print('<tr><th>Subsystem</th><th>Activity</th><th>Commits</th></tr>')
482 commits = get_commit_count(ss, args.ref)
483 if commits >= args.commits:
485 activity = ss.last_activity.strftime('%Y-%m-%d')
489 print(f'<tr><td>{ss.name}</td><td>{activity}</td><td>{commits}</td></tr>')
491 print(f'{ss.name}: {activity} {commits}')