Improve llvm-symbolizer discovery in asan_symbolize.py
[blocksruntime.git] / lib / asan / scripts / asan_symbolize.py
blob49bc1c02cb8a93d15c840fc65f4f27bce139c96d
1 #!/usr/bin/env python
2 #===- lib/asan/scripts/asan_symbolize.py -----------------------------------===#
4 # The LLVM Compiler Infrastructure
6 # This file is distributed under the University of Illinois Open Source
7 # License. See LICENSE.TXT for details.
9 #===------------------------------------------------------------------------===#
10 import bisect
11 import getopt
12 import os
13 import pty
14 import re
15 import subprocess
16 import sys
17 import termios
19 symbolizers = {}
20 DEBUG = False
21 demangle = False;
24 # FIXME: merge the code that calls fix_filename().
25 def fix_filename(file_name):
26 for path_to_cut in sys.argv[1:]:
27 file_name = re.sub('.*' + path_to_cut, '', file_name)
28 file_name = re.sub('.*asan_[a-z_]*.cc:[0-9]*', '_asan_rtl_', file_name)
29 file_name = re.sub('.*crtstuff.c:0', '???:0', file_name)
30 return file_name
32 def GuessArch(addr):
33 # Guess which arch we're running. 10 = len('0x') + 8 hex digits.
34 if len(addr) > 10:
35 return 'x86_64'
36 else:
37 return 'i386'
39 class Symbolizer(object):
40 def __init__(self):
41 pass
43 def symbolize(self, addr, binary, offset):
44 """Symbolize the given address (pair of binary and offset).
46 Overriden in subclasses.
47 Args:
48 addr: virtual address of an instruction.
49 binary: path to executable/shared object containing this instruction.
50 offset: instruction offset in the @binary.
51 Returns:
52 list of strings (one string for each inlined frame) describing
53 the code locations for this instruction (that is, function name, file
54 name, line and column numbers).
55 """
56 return None
59 class LLVMSymbolizer(Symbolizer):
60 def __init__(self, symbolizer_path, addr):
61 super(LLVMSymbolizer, self).__init__()
62 self.symbolizer_path = symbolizer_path
63 self.default_arch = GuessArch(addr)
64 self.pipe = self.open_llvm_symbolizer()
66 def open_llvm_symbolizer(self):
67 cmd = [self.symbolizer_path,
68 '--use-symbol-table=true',
69 '--demangle=%s' % demangle,
70 '--functions=true',
71 '--inlining=true',
72 '--default-arch=%s' % self.default_arch]
73 if DEBUG:
74 print ' '.join(cmd)
75 try:
76 result = subprocess.Popen(cmd, stdin=subprocess.PIPE,
77 stdout=subprocess.PIPE)
78 except OSError:
79 result = None
80 return result
82 def symbolize(self, addr, binary, offset):
83 """Overrides Symbolizer.symbolize."""
84 if not self.pipe:
85 return None
86 result = []
87 try:
88 symbolizer_input = '%s %s' % (binary, offset)
89 if DEBUG:
90 print symbolizer_input
91 print >> self.pipe.stdin, symbolizer_input
92 while True:
93 function_name = self.pipe.stdout.readline().rstrip()
94 if not function_name:
95 break
96 file_name = self.pipe.stdout.readline().rstrip()
97 file_name = fix_filename(file_name)
98 if (not function_name.startswith('??') and
99 not file_name.startswith('??')):
100 # Append only valid frames.
101 result.append('%s in %s %s' % (addr, function_name,
102 file_name))
103 except Exception:
104 result = []
105 if not result:
106 result = None
107 return result
110 def LLVMSymbolizerFactory(system, addr):
111 symbolizer_path = os.getenv('LLVM_SYMBOLIZER_PATH')
112 if not symbolizer_path:
113 symbolizer_path = os.getenv('ASAN_SYMBOLIZER_PATH')
114 if not symbolizer_path:
115 # Assume llvm-symbolizer is in PATH.
116 symbolizer_path = 'llvm-symbolizer'
117 return LLVMSymbolizer(symbolizer_path, addr)
120 class Addr2LineSymbolizer(Symbolizer):
121 def __init__(self, binary):
122 super(Addr2LineSymbolizer, self).__init__()
123 self.binary = binary
124 self.pipe = self.open_addr2line()
126 def open_addr2line(self):
127 cmd = ['addr2line', '-f']
128 if demangle:
129 cmd += ['--demangle']
130 cmd += ['-e', self.binary]
131 if DEBUG:
132 print ' '.join(cmd)
133 return subprocess.Popen(cmd,
134 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
136 def symbolize(self, addr, binary, offset):
137 """Overrides Symbolizer.symbolize."""
138 if self.binary != binary:
139 return None
140 try:
141 print >> self.pipe.stdin, offset
142 function_name = self.pipe.stdout.readline().rstrip()
143 file_name = self.pipe.stdout.readline().rstrip()
144 except Exception:
145 function_name = ''
146 file_name = ''
147 file_name = fix_filename(file_name)
148 return ['%s in %s %s' % (addr, function_name, file_name)]
151 class UnbufferedLineConverter(object):
153 Wrap a child process that responds to each line of input with one line of
154 output. Uses pty to trick the child into providing unbuffered output.
156 def __init__(self, args, close_stderr=False):
157 pid, fd = pty.fork()
158 if pid == 0:
159 # We're the child. Transfer control to command.
160 if close_stderr:
161 dev_null = os.open('/dev/null', 0)
162 os.dup2(dev_null, 2)
163 os.execvp(args[0], args)
164 else:
165 # Disable echoing.
166 attr = termios.tcgetattr(fd)
167 attr[3] = attr[3] & ~termios.ECHO
168 termios.tcsetattr(fd, termios.TCSANOW, attr)
169 # Set up a file()-like interface to the child process
170 self.r = os.fdopen(fd, "r", 1)
171 self.w = os.fdopen(os.dup(fd), "w", 1)
173 def convert(self, line):
174 self.w.write(line + "\n")
175 return self.readline()
177 def readline(self):
178 return self.r.readline().rstrip()
181 class DarwinSymbolizer(Symbolizer):
182 def __init__(self, addr, binary):
183 super(DarwinSymbolizer, self).__init__()
184 self.binary = binary
185 self.arch = GuessArch(addr)
186 self.open_atos()
188 def open_atos(self):
189 if DEBUG:
190 print 'atos -o %s -arch %s' % (self.binary, self.arch)
191 cmdline = ['atos', '-o', self.binary, '-arch', self.arch]
192 self.atos = UnbufferedLineConverter(cmdline, close_stderr=True)
194 def symbolize(self, addr, binary, offset):
195 """Overrides Symbolizer.symbolize."""
196 if self.binary != binary:
197 return None
198 atos_line = self.atos.convert('0x%x' % int(offset, 16))
199 while "got symbolicator for" in atos_line:
200 atos_line = self.atos.readline()
201 # A well-formed atos response looks like this:
202 # foo(type1, type2) (in object.name) (filename.cc:80)
203 match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line)
204 if DEBUG:
205 print 'atos_line: ', atos_line
206 if match:
207 function_name = match.group(1)
208 function_name = re.sub('\(.*?\)', '', function_name)
209 file_name = fix_filename(match.group(3))
210 return ['%s in %s %s' % (addr, function_name, file_name)]
211 else:
212 return ['%s in %s' % (addr, atos_line)]
215 # Chain several symbolizers so that if one symbolizer fails, we fall back
216 # to the next symbolizer in chain.
217 class ChainSymbolizer(Symbolizer):
218 def __init__(self, symbolizer_list):
219 super(ChainSymbolizer, self).__init__()
220 self.symbolizer_list = symbolizer_list
222 def symbolize(self, addr, binary, offset):
223 """Overrides Symbolizer.symbolize."""
224 for symbolizer in self.symbolizer_list:
225 if symbolizer:
226 result = symbolizer.symbolize(addr, binary, offset)
227 if result:
228 return result
229 return None
231 def append_symbolizer(self, symbolizer):
232 self.symbolizer_list.append(symbolizer)
235 def BreakpadSymbolizerFactory(binary):
236 suffix = os.getenv('BREAKPAD_SUFFIX')
237 if suffix:
238 filename = binary + suffix
239 if os.access(filename, os.F_OK):
240 return BreakpadSymbolizer(filename)
241 return None
244 def SystemSymbolizerFactory(system, addr, binary):
245 if system == 'Darwin':
246 return DarwinSymbolizer(addr, binary)
247 elif system == 'Linux':
248 return Addr2LineSymbolizer(binary)
251 class BreakpadSymbolizer(Symbolizer):
252 def __init__(self, filename):
253 super(BreakpadSymbolizer, self).__init__()
254 self.filename = filename
255 lines = file(filename).readlines()
256 self.files = []
257 self.symbols = {}
258 self.address_list = []
259 self.addresses = {}
260 # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t
261 fragments = lines[0].rstrip().split()
262 self.arch = fragments[2]
263 self.debug_id = fragments[3]
264 self.binary = ' '.join(fragments[4:])
265 self.parse_lines(lines[1:])
267 def parse_lines(self, lines):
268 cur_function_addr = ''
269 for line in lines:
270 fragments = line.split()
271 if fragments[0] == 'FILE':
272 assert int(fragments[1]) == len(self.files)
273 self.files.append(' '.join(fragments[2:]))
274 elif fragments[0] == 'PUBLIC':
275 self.symbols[int(fragments[1], 16)] = ' '.join(fragments[3:])
276 elif fragments[0] in ['CFI', 'STACK']:
277 pass
278 elif fragments[0] == 'FUNC':
279 cur_function_addr = int(fragments[1], 16)
280 if not cur_function_addr in self.symbols.keys():
281 self.symbols[cur_function_addr] = ' '.join(fragments[4:])
282 else:
283 # Line starting with an address.
284 addr = int(fragments[0], 16)
285 self.address_list.append(addr)
286 # Tuple of symbol address, size, line, file number.
287 self.addresses[addr] = (cur_function_addr,
288 int(fragments[1], 16),
289 int(fragments[2]),
290 int(fragments[3]))
291 self.address_list.sort()
293 def get_sym_file_line(self, addr):
294 key = None
295 if addr in self.addresses.keys():
296 key = addr
297 else:
298 index = bisect.bisect_left(self.address_list, addr)
299 if index == 0:
300 return None
301 else:
302 key = self.address_list[index - 1]
303 sym_id, size, line_no, file_no = self.addresses[key]
304 symbol = self.symbols[sym_id]
305 filename = self.files[file_no]
306 if addr < key + size:
307 return symbol, filename, line_no
308 else:
309 return None
311 def symbolize(self, addr, binary, offset):
312 if self.binary != binary:
313 return None
314 res = self.get_sym_file_line(int(offset, 16))
315 if res:
316 function_name, file_name, line_no = res
317 result = ['%s in %s %s:%d' % (
318 addr, function_name, file_name, line_no)]
319 print result
320 return result
321 else:
322 return None
325 class SymbolizationLoop(object):
326 def __init__(self, binary_name_filter=None):
327 # Used by clients who may want to supply a different binary name.
328 # E.g. in Chrome several binaries may share a single .dSYM.
329 self.binary_name_filter = binary_name_filter
330 self.system = os.uname()[0]
331 if self.system not in ['Linux', 'Darwin']:
332 raise Exception('Unknown system')
333 self.llvm_symbolizer = None
335 def symbolize_address(self, addr, binary, offset):
336 # Initialize llvm-symbolizer lazily.
337 if not self.llvm_symbolizer:
338 self.llvm_symbolizer = LLVMSymbolizerFactory(self.system, addr)
339 # Use the chain of symbolizers:
340 # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos
341 # (fall back to next symbolizer if the previous one fails).
342 if not binary in symbolizers:
343 symbolizers[binary] = ChainSymbolizer(
344 [BreakpadSymbolizerFactory(binary), self.llvm_symbolizer])
345 result = symbolizers[binary].symbolize(addr, binary, offset)
346 if result is None:
347 # Initialize system symbolizer only if other symbolizers failed.
348 symbolizers[binary].append_symbolizer(
349 SystemSymbolizerFactory(self.system, addr, binary))
350 result = symbolizers[binary].symbolize(addr, binary, offset)
351 # The system symbolizer must produce some result.
352 assert result
353 return result
355 def print_symbolized_lines(self, symbolized_lines):
356 if not symbolized_lines:
357 print self.current_line
358 else:
359 for symbolized_frame in symbolized_lines:
360 print ' #' + str(self.frame_no) + ' ' + symbolized_frame.rstrip()
361 self.frame_no += 1
363 def process_stdin(self):
364 self.frame_no = 0
365 while True:
366 line = sys.stdin.readline()
367 if not line:
368 break
369 self.current_line = line.rstrip()
370 #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45)
371 stack_trace_line_format = (
372 '^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)')
373 match = re.match(stack_trace_line_format, line)
374 if not match:
375 print self.current_line
376 continue
377 if DEBUG:
378 print line
379 _, frameno_str, addr, binary, offset = match.groups()
380 if frameno_str == '0':
381 # Assume that frame #0 is the first frame of new stack trace.
382 self.frame_no = 0
383 original_binary = binary
384 if self.binary_name_filter:
385 binary = self.binary_name_filter(binary)
386 symbolized_line = self.symbolize_address(addr, binary, offset)
387 if not symbolized_line:
388 if original_binary != binary:
389 symbolized_line = self.symbolize_address(addr, binary, offset)
390 self.print_symbolized_lines(symbolized_line)
393 if __name__ == '__main__':
394 opts, args = getopt.getopt(sys.argv[1:], "d", ["demangle"])
395 for o, a in opts:
396 if o in ("-d", "--demangle"):
397 demangle = True;
398 loop = SymbolizationLoop()
399 loop.process_stdin()