1 # Given a path to llvm-objdump and a directory tree, spider the directory tree
2 # dumping every object file encountered with correct options needed to demangle
3 # symbols in the object file, and collect statistics about failed / crashed
4 # demanglings. Useful for stress testing the demangler against a large corpus
7 from __future__
import print_function
16 from multiprocessing
import Pool
17 import multiprocessing
22 question
= line
.find('?')
26 open_paren
= line
.find('(', question
)
29 close_paren
= line
.rfind(')', open_paren
)
32 mangled
= line
[question
: open_paren
]
33 demangled
= line
[open_paren
+1 : close_paren
]
34 return mangled
.strip(), demangled
.strip()
44 class MapContext(object):
46 self
.rincomplete
= None
47 self
.rcumulative
= Result()
48 self
.pending_objs
= []
51 def process_file(path
, objdump
):
55 popen_args
= [objdump
, '-t', '-demangle', path
]
56 p
= subprocess
.Popen(popen_args
, stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
)
57 stdout
, stderr
= p
.communicate()
62 output
= stdout
.decode('utf-8')
64 for line
in output
.splitlines():
65 mangled
, demangled
= parse_line(line
)
69 if "invalid mangled name" in demangled
:
73 def add_results(r1
, r2
):
74 r1
.crashed
.extend(r2
.crashed
)
75 r1
.errors
.update(r2
.errors
)
76 r1
.nsymbols
+= r2
.nsymbols
77 r1
.nfiles
+= r2
.nfiles
79 def print_result_row(directory
, result
):
80 print("[{0} files, {1} crashes, {2} errors, {3} symbols]: '{4}'".format(
81 result
.nfiles
, len(result
.crashed
), len(result
.errors
), result
.nsymbols
, directory
))
83 def process_one_chunk(pool
, chunk_size
, objdump
, context
):
89 while context
.npending
> 0 and len(objs
) < chunk_size
:
90 this_dir
= context
.pending_objs
[0][0]
91 ordered_dirs
.append(this_dir
)
93 if context
.rincomplete
is not None:
94 re
= context
.rincomplete
95 context
.rincomplete
= None
97 dir_results
[this_dir
] = re
100 nneeded
= chunk_size
- len(objs
)
101 objs_this_dir
= context
.pending_objs
[0][1]
102 navail
= len(objs_this_dir
)
103 ntaken
= min(nneeded
, navail
)
104 objs
.extend(objs_this_dir
[0:ntaken
])
105 remaining_objs_this_dir
= objs_this_dir
[ntaken
:]
106 context
.pending_objs
[0] = (context
.pending_objs
[0][0], remaining_objs_this_dir
)
107 context
.npending
-= ntaken
109 context
.pending_objs
.pop(0)
115 assert(len(objs
) == chunk_size
or context
.npending
== 0)
117 copier
= functools
.partial(process_file
, objdump
=objdump
)
118 mapped_results
= list(pool
.map(copier
, objs
))
120 for mr
in mapped_results
:
121 result_dir
= os
.path
.dirname(mr
.file)
122 result_entry
= dir_results
[result_dir
]
123 add_results(result_entry
, mr
)
125 # It's only possible that a single item is incomplete, and it has to be the
128 context
.rincomplete
= dir_results
[ordered_dirs
[-1]]
131 # Now ordered_dirs contains a list of all directories which *did* complete.
132 for c
in ordered_dirs
:
134 add_results(context
.rcumulative
, re
)
135 print_result_row(c
, re
)
137 def process_pending_files(pool
, chunk_size
, objdump
, context
):
138 while context
.npending
>= chunk_size
:
139 process_one_chunk(pool
, chunk_size
, objdump
, context
)
145 extensions
= args
.extensions
.split(',')
146 extensions
= [x
if x
[0] == '.' else '.' + x
for x
in extensions
]
150 pool
= Pool(processes
=pool_size
)
154 context
= MapContext()
156 for root
, dirs
, files
in os
.walk(obj_dir
):
157 root
= os
.path
.normpath(root
)
160 file, ext
= os
.path
.splitext(f
)
161 if not ext
in extensions
:
165 full_path
= os
.path
.join(root
, f
)
166 full_path
= os
.path
.normpath(full_path
)
167 pending
.append(full_path
)
169 # If this directory had no object files, just print a default
170 # status line and continue with the next dir
171 if len(pending
) == 0:
172 print_result_row(root
, Result())
175 context
.npending
+= len(pending
)
176 context
.pending_objs
.append((root
, pending
))
177 # Drain the tasks, `pool_size` at a time, until we have less than
178 # `pool_size` tasks remaining.
179 process_pending_files(pool
, pool_size
, args
.objdump
, context
)
181 assert(context
.npending
< pool_size
);
182 process_one_chunk(pool
, pool_size
, args
.objdump
, context
)
184 total
= context
.rcumulative
185 nfailed
= len(total
.errors
)
186 nsuccess
= total
.nsymbols
- nfailed
187 ncrashed
= len(total
.crashed
)
191 for m
in sorted(total
.errors
):
195 for f
in sorted(total
.crashed
):
198 spct
= float(nsuccess
)/float(total
.nsymbols
)
199 fpct
= float(nfailed
)/float(total
.nsymbols
)
200 cpct
= float(ncrashed
)/float(nfiles
)
201 print("Processed {0} object files.".format(nfiles
))
202 print("{0}/{1} symbols successfully demangled ({2:.4%})".format(nsuccess
, total
.nsymbols
, spct
))
203 print("{0} symbols could not be demangled ({1:.4%})".format(nfailed
, fpct
))
204 print("{0} files crashed while demangling ({1:.4%})".format(ncrashed
, cpct
))
207 traceback
.print_exc()
212 if __name__
== "__main__":
213 def_obj
= 'obj' if sys
.platform
== 'win32' else 'o'
215 parser
= argparse
.ArgumentParser(description
='Demangle all symbols in a tree of object files, looking for failures.')
216 parser
.add_argument('dir', type=str, help='the root directory at which to start crawling')
217 parser
.add_argument('--objdump', type=str, help='path to llvm-objdump. If not specified ' +
218 'the tool is located as if by `which llvm-objdump`.')
219 parser
.add_argument('--extensions', type=str, default
=def_obj
,
220 help='comma separated list of extensions to demangle (e.g. `o,obj`). ' +
221 'By default this will be `obj` on Windows and `o` otherwise.')
223 args
= parser
.parse_args()
226 multiprocessing
.freeze_support()