utils/demangle_tree.py

   1 # Given a path to llvm-objdump and a directory tree, spider the directory tree
   2 # dumping every object file encountered with correct options needed to demangle
   3 # symbols in the object file, and collect statistics about failed / crashed
   4 # demanglings.  Useful for stress testing the demangler against a large corpus
   5 # of inputs.
   6
   7 from __future__ import print_function
   8
   9 import argparse
  10 import functools
  11 import os
  12 import re
  13 import sys
  14 import subprocess
  15 import traceback
  16 from multiprocessing import Pool
  17 import multiprocessing
  18
  19 args = None
  20
  21 def parse_line(line):
  22     question = line.find('?')
  23     if question == -1:
  24         return None, None
  25
  26     open_paren = line.find('(', question)
  27     if open_paren == -1:
  28         return None, None
  29     close_paren = line.rfind(')', open_paren)
  30     if open_paren == -1:
  31         return None, None
  32     mangled = line[question : open_paren]
  33     demangled = line[open_paren+1 : close_paren]
  34     return mangled.strip(), demangled.strip()
  35
  36 class Result(object):
  37     def __init__(self):
  38         self.crashed = []
  39         self.file = None
  40         self.nsymbols = 0
  41         self.errors = set()
  42         self.nfiles = 0
  43
  44 class MapContext(object):
  45     def __init__(self):
  46         self.rincomplete = None
  47         self.rcumulative = Result()
  48         self.pending_objs = []
  49         self.npending = 0
  50
  51 def process_file(path, objdump):
  52     r = Result()
  53     r.file = path
  54
  55     popen_args = [objdump, '-t', '-demangle', path]
  56     p = subprocess.Popen(popen_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  57     stdout, stderr = p.communicate()
  58     if p.returncode != 0:
  59         r.crashed = [r.file]
  60         return r
  61
  62     output = stdout.decode('utf-8')
  63
  64     for line in output.splitlines():
  65         mangled, demangled = parse_line(line)
  66         if mangled is None:
  67             continue
  68         r.nsymbols += 1
  69         if "invalid mangled name" in demangled:
  70             r.errors.add(mangled)
  71     return r
  72
  73 def add_results(r1, r2):
  74     r1.crashed.extend(r2.crashed)
  75     r1.errors.update(r2.errors)
  76     r1.nsymbols += r2.nsymbols
  77     r1.nfiles += r2.nfiles
  78
  79 def print_result_row(directory, result):
  80     print("[{0} files, {1} crashes, {2} errors, {3} symbols]: '{4}'".format(
  81         result.nfiles, len(result.crashed), len(result.errors), result.nsymbols, directory))
  82
  83 def process_one_chunk(pool, chunk_size, objdump, context):
  84     objs = []
  85
  86     incomplete = False
  87     dir_results = {}
  88     ordered_dirs = []
  89     while context.npending > 0 and len(objs) < chunk_size:
  90         this_dir = context.pending_objs[0][0]
  91         ordered_dirs.append(this_dir)
  92         re = Result()
  93         if context.rincomplete is not None:
  94             re = context.rincomplete
  95             context.rincomplete = None
  96
  97         dir_results[this_dir] = re
  98         re.file = this_dir
  99
 100         nneeded = chunk_size - len(objs)
 101         objs_this_dir = context.pending_objs[0][1]
 102         navail = len(objs_this_dir)
 103         ntaken = min(nneeded, navail)
 104         objs.extend(objs_this_dir[0:ntaken])
 105         remaining_objs_this_dir = objs_this_dir[ntaken:]
 106         context.pending_objs[0] = (context.pending_objs[0][0], remaining_objs_this_dir)
 107         context.npending -= ntaken
 108         if ntaken == navail:
 109             context.pending_objs.pop(0)
 110         else:
 111             incomplete = True
 112
 113         re.nfiles += ntaken
 114
 115     assert(len(objs) == chunk_size or context.npending == 0)
 116
 117     copier = functools.partial(process_file, objdump=objdump)
 118     mapped_results = list(pool.map(copier, objs))
 119
 120     for mr in mapped_results:
 121         result_dir = os.path.dirname(mr.file)
 122         result_entry = dir_results[result_dir]
 123         add_results(result_entry, mr)
 124
 125     # It's only possible that a single item is incomplete, and it has to be the
 126     # last item.
 127     if incomplete:
 128         context.rincomplete = dir_results[ordered_dirs[-1]]
 129         ordered_dirs.pop()
 130
 131     # Now ordered_dirs contains a list of all directories which *did* complete.
 132     for c in ordered_dirs:
 133         re = dir_results[c]
 134         add_results(context.rcumulative, re)
 135         print_result_row(c, re)
 136
 137 def process_pending_files(pool, chunk_size, objdump, context):
 138     while context.npending >= chunk_size:
 139         process_one_chunk(pool, chunk_size, objdump, context)
 140
 141 def go():
 142     global args
 143
 144     obj_dir = args.dir
 145     extensions = args.extensions.split(',')
 146     extensions = [x if x[0] == '.' else '.' + x for x in extensions]
 147
 148
 149     pool_size = 48
 150     pool = Pool(processes=pool_size)
 151
 152     try:
 153         nfiles = 0
 154         context = MapContext()
 155
 156         for root, dirs, files in os.walk(obj_dir):
 157             root = os.path.normpath(root)
 158             pending = []
 159             for f in files:
 160                 file, ext = os.path.splitext(f)
 161                 if not ext in extensions:
 162                     continue
 163
 164                 nfiles += 1
 165                 full_path = os.path.join(root, f)
 166                 full_path = os.path.normpath(full_path)
 167                 pending.append(full_path)
 168
 169             # If this directory had no object files, just print a default
 170             # status line and continue with the next dir
 171             if len(pending) == 0:
 172                 print_result_row(root, Result())
 173                 continue
 174
 175             context.npending += len(pending)
 176             context.pending_objs.append((root, pending))
 177             # Drain the tasks, `pool_size` at a time, until we have less than
 178             # `pool_size` tasks remaining.
 179             process_pending_files(pool, pool_size, args.objdump, context)
 180
 181         assert(context.npending < pool_size);
 182         process_one_chunk(pool, pool_size, args.objdump, context)
 183
 184         total = context.rcumulative
 185         nfailed = len(total.errors)
 186         nsuccess = total.nsymbols - nfailed
 187         ncrashed = len(total.crashed)
 188
 189         if (nfailed > 0):
 190             print("Failures:")
 191             for m in sorted(total.errors):
 192                 print("  " + m)
 193         if (ncrashed > 0):
 194             print("Crashes:")
 195             for f in sorted(total.crashed):
 196                 print("  " + f)
 197         print("Summary:")
 198         spct = float(nsuccess)/float(total.nsymbols)
 199         fpct = float(nfailed)/float(total.nsymbols)
 200         cpct = float(ncrashed)/float(nfiles)
 201         print("Processed {0} object files.".format(nfiles))
 202         print("{0}/{1} symbols successfully demangled ({2:.4%})".format(nsuccess, total.nsymbols, spct))
 203         print("{0} symbols could not be demangled ({1:.4%})".format(nfailed, fpct))
 204         print("{0} files crashed while demangling ({1:.4%})".format(ncrashed, cpct))
 205
 206     except:
 207         traceback.print_exc()
 208
 209     pool.close()
 210     pool.join()
 211
 212 if __name__ == "__main__":
 213     def_obj = 'obj' if sys.platform == 'win32' else 'o'
 214
 215     parser = argparse.ArgumentParser(description='Demangle all symbols in a tree of object files, looking for failures.')
 216     parser.add_argument('dir', type=str, help='the root directory at which to start crawling')
 217     parser.add_argument('--objdump', type=str, help='path to llvm-objdump.  If not specified ' +
 218                         'the tool is located as if by `which llvm-objdump`.')
 219     parser.add_argument('--extensions', type=str, default=def_obj,
 220                         help='comma separated list of extensions to demangle (e.g. `o,obj`).  ' +
 221                         'By default this will be `obj` on Windows and `o` otherwise.')
 222
 223     args = parser.parse_args()
 224
 225
 226     multiprocessing.freeze_support()
 227     go()
 228