2 #===- lib/asan/scripts/asan_symbolize.py -----------------------------------===#
4 # The LLVM Compiler Infrastructure
6 # This file is distributed under the University of Illinois Open Source
7 # License. See LICENSE.TXT for details.
9 #===------------------------------------------------------------------------===#
24 # FIXME: merge the code that calls fix_filename().
25 def fix_filename(file_name
):
26 for path_to_cut
in sys
.argv
[1:]:
27 file_name
= re
.sub('.*' + path_to_cut
, '', file_name
)
28 file_name
= re
.sub('.*asan_[a-z_]*.cc:[0-9]*', '_asan_rtl_', file_name
)
29 file_name
= re
.sub('.*crtstuff.c:0', '???:0', file_name
)
33 # Guess which arch we're running. 10 = len('0x') + 8 hex digits.
39 class Symbolizer(object):
43 def symbolize(self
, addr
, binary
, offset
):
44 """Symbolize the given address (pair of binary and offset).
46 Overriden in subclasses.
48 addr: virtual address of an instruction.
49 binary: path to executable/shared object containing this instruction.
50 offset: instruction offset in the @binary.
52 list of strings (one string for each inlined frame) describing
53 the code locations for this instruction (that is, function name, file
54 name, line and column numbers).
59 class LLVMSymbolizer(Symbolizer
):
60 def __init__(self
, symbolizer_path
, addr
):
61 super(LLVMSymbolizer
, self
).__init
__()
62 self
.symbolizer_path
= symbolizer_path
63 self
.default_arch
= GuessArch(addr
)
64 self
.pipe
= self
.open_llvm_symbolizer()
66 def open_llvm_symbolizer(self
):
67 cmd
= [self
.symbolizer_path
,
68 '--use-symbol-table=true',
69 '--demangle=%s' % demangle
,
72 '--default-arch=%s' % self
.default_arch
]
76 result
= subprocess
.Popen(cmd
, stdin
=subprocess
.PIPE
,
77 stdout
=subprocess
.PIPE
)
82 def symbolize(self
, addr
, binary
, offset
):
83 """Overrides Symbolizer.symbolize."""
88 symbolizer_input
= '%s %s' % (binary
, offset
)
90 print symbolizer_input
91 print >> self
.pipe
.stdin
, symbolizer_input
93 function_name
= self
.pipe
.stdout
.readline().rstrip()
96 file_name
= self
.pipe
.stdout
.readline().rstrip()
97 file_name
= fix_filename(file_name
)
98 if (not function_name
.startswith('??') and
99 not file_name
.startswith('??')):
100 # Append only valid frames.
101 result
.append('%s in %s %s' % (addr
, function_name
,
110 def LLVMSymbolizerFactory(system
, addr
):
111 symbolizer_path
= os
.getenv('LLVM_SYMBOLIZER_PATH')
112 if not symbolizer_path
:
113 symbolizer_path
= os
.getenv('ASAN_SYMBOLIZER_PATH')
114 if not symbolizer_path
:
115 # Assume llvm-symbolizer is in PATH.
116 symbolizer_path
= 'llvm-symbolizer'
117 return LLVMSymbolizer(symbolizer_path
, addr
)
120 class Addr2LineSymbolizer(Symbolizer
):
121 def __init__(self
, binary
):
122 super(Addr2LineSymbolizer
, self
).__init
__()
124 self
.pipe
= self
.open_addr2line()
126 def open_addr2line(self
):
127 cmd
= ['addr2line', '-f']
129 cmd
+= ['--demangle']
130 cmd
+= ['-e', self
.binary
]
133 return subprocess
.Popen(cmd
,
134 stdin
=subprocess
.PIPE
, stdout
=subprocess
.PIPE
)
136 def symbolize(self
, addr
, binary
, offset
):
137 """Overrides Symbolizer.symbolize."""
138 if self
.binary
!= binary
:
141 print >> self
.pipe
.stdin
, offset
142 function_name
= self
.pipe
.stdout
.readline().rstrip()
143 file_name
= self
.pipe
.stdout
.readline().rstrip()
147 file_name
= fix_filename(file_name
)
148 return ['%s in %s %s' % (addr
, function_name
, file_name
)]
151 class UnbufferedLineConverter(object):
153 Wrap a child process that responds to each line of input with one line of
154 output. Uses pty to trick the child into providing unbuffered output.
156 def __init__(self
, args
, close_stderr
=False):
159 # We're the child. Transfer control to command.
161 dev_null
= os
.open('/dev/null', 0)
163 os
.execvp(args
[0], args
)
166 attr
= termios
.tcgetattr(fd
)
167 attr
[3] = attr
[3] & ~termios
.ECHO
168 termios
.tcsetattr(fd
, termios
.TCSANOW
, attr
)
169 # Set up a file()-like interface to the child process
170 self
.r
= os
.fdopen(fd
, "r", 1)
171 self
.w
= os
.fdopen(os
.dup(fd
), "w", 1)
173 def convert(self
, line
):
174 self
.w
.write(line
+ "\n")
175 return self
.readline()
178 return self
.r
.readline().rstrip()
181 class DarwinSymbolizer(Symbolizer
):
182 def __init__(self
, addr
, binary
):
183 super(DarwinSymbolizer
, self
).__init
__()
185 self
.arch
= GuessArch(addr
)
190 print 'atos -o %s -arch %s' % (self
.binary
, self
.arch
)
191 cmdline
= ['atos', '-o', self
.binary
, '-arch', self
.arch
]
192 self
.atos
= UnbufferedLineConverter(cmdline
, close_stderr
=True)
194 def symbolize(self
, addr
, binary
, offset
):
195 """Overrides Symbolizer.symbolize."""
196 if self
.binary
!= binary
:
198 atos_line
= self
.atos
.convert('0x%x' % int(offset
, 16))
199 while "got symbolicator for" in atos_line
:
200 atos_line
= self
.atos
.readline()
201 # A well-formed atos response looks like this:
202 # foo(type1, type2) (in object.name) (filename.cc:80)
203 match
= re
.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line
)
205 print 'atos_line: ', atos_line
207 function_name
= match
.group(1)
208 function_name
= re
.sub('\(.*?\)', '', function_name
)
209 file_name
= fix_filename(match
.group(3))
210 return ['%s in %s %s' % (addr
, function_name
, file_name
)]
212 return ['%s in %s' % (addr
, atos_line
)]
215 # Chain several symbolizers so that if one symbolizer fails, we fall back
216 # to the next symbolizer in chain.
217 class ChainSymbolizer(Symbolizer
):
218 def __init__(self
, symbolizer_list
):
219 super(ChainSymbolizer
, self
).__init
__()
220 self
.symbolizer_list
= symbolizer_list
222 def symbolize(self
, addr
, binary
, offset
):
223 """Overrides Symbolizer.symbolize."""
224 for symbolizer
in self
.symbolizer_list
:
226 result
= symbolizer
.symbolize(addr
, binary
, offset
)
231 def append_symbolizer(self
, symbolizer
):
232 self
.symbolizer_list
.append(symbolizer
)
235 def BreakpadSymbolizerFactory(binary
):
236 suffix
= os
.getenv('BREAKPAD_SUFFIX')
238 filename
= binary
+ suffix
239 if os
.access(filename
, os
.F_OK
):
240 return BreakpadSymbolizer(filename
)
244 def SystemSymbolizerFactory(system
, addr
, binary
):
245 if system
== 'Darwin':
246 return DarwinSymbolizer(addr
, binary
)
247 elif system
== 'Linux':
248 return Addr2LineSymbolizer(binary
)
251 class BreakpadSymbolizer(Symbolizer
):
252 def __init__(self
, filename
):
253 super(BreakpadSymbolizer
, self
).__init
__()
254 self
.filename
= filename
255 lines
= file(filename
).readlines()
258 self
.address_list
= []
260 # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t
261 fragments
= lines
[0].rstrip().split()
262 self
.arch
= fragments
[2]
263 self
.debug_id
= fragments
[3]
264 self
.binary
= ' '.join(fragments
[4:])
265 self
.parse_lines(lines
[1:])
267 def parse_lines(self
, lines
):
268 cur_function_addr
= ''
270 fragments
= line
.split()
271 if fragments
[0] == 'FILE':
272 assert int(fragments
[1]) == len(self
.files
)
273 self
.files
.append(' '.join(fragments
[2:]))
274 elif fragments
[0] == 'PUBLIC':
275 self
.symbols
[int(fragments
[1], 16)] = ' '.join(fragments
[3:])
276 elif fragments
[0] in ['CFI', 'STACK']:
278 elif fragments
[0] == 'FUNC':
279 cur_function_addr
= int(fragments
[1], 16)
280 if not cur_function_addr
in self
.symbols
.keys():
281 self
.symbols
[cur_function_addr
] = ' '.join(fragments
[4:])
283 # Line starting with an address.
284 addr
= int(fragments
[0], 16)
285 self
.address_list
.append(addr
)
286 # Tuple of symbol address, size, line, file number.
287 self
.addresses
[addr
] = (cur_function_addr
,
288 int(fragments
[1], 16),
291 self
.address_list
.sort()
293 def get_sym_file_line(self
, addr
):
295 if addr
in self
.addresses
.keys():
298 index
= bisect
.bisect_left(self
.address_list
, addr
)
302 key
= self
.address_list
[index
- 1]
303 sym_id
, size
, line_no
, file_no
= self
.addresses
[key
]
304 symbol
= self
.symbols
[sym_id
]
305 filename
= self
.files
[file_no
]
306 if addr
< key
+ size
:
307 return symbol
, filename
, line_no
311 def symbolize(self
, addr
, binary
, offset
):
312 if self
.binary
!= binary
:
314 res
= self
.get_sym_file_line(int(offset
, 16))
316 function_name
, file_name
, line_no
= res
317 result
= ['%s in %s %s:%d' % (
318 addr
, function_name
, file_name
, line_no
)]
325 class SymbolizationLoop(object):
326 def __init__(self
, binary_name_filter
=None):
327 # Used by clients who may want to supply a different binary name.
328 # E.g. in Chrome several binaries may share a single .dSYM.
329 self
.binary_name_filter
= binary_name_filter
330 self
.system
= os
.uname()[0]
331 if self
.system
not in ['Linux', 'Darwin']:
332 raise Exception('Unknown system')
333 self
.llvm_symbolizer
= None
335 def symbolize_address(self
, addr
, binary
, offset
):
336 # Initialize llvm-symbolizer lazily.
337 if not self
.llvm_symbolizer
:
338 self
.llvm_symbolizer
= LLVMSymbolizerFactory(self
.system
, addr
)
339 # Use the chain of symbolizers:
340 # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos
341 # (fall back to next symbolizer if the previous one fails).
342 if not binary
in symbolizers
:
343 symbolizers
[binary
] = ChainSymbolizer(
344 [BreakpadSymbolizerFactory(binary
), self
.llvm_symbolizer
])
345 result
= symbolizers
[binary
].symbolize(addr
, binary
, offset
)
347 # Initialize system symbolizer only if other symbolizers failed.
348 symbolizers
[binary
].append_symbolizer(
349 SystemSymbolizerFactory(self
.system
, addr
, binary
))
350 result
= symbolizers
[binary
].symbolize(addr
, binary
, offset
)
351 # The system symbolizer must produce some result.
355 def print_symbolized_lines(self
, symbolized_lines
):
356 if not symbolized_lines
:
357 print self
.current_line
359 for symbolized_frame
in symbolized_lines
:
360 print ' #' + str(self
.frame_no
) + ' ' + symbolized_frame
.rstrip()
363 def process_stdin(self
):
366 line
= sys
.stdin
.readline()
369 self
.current_line
= line
.rstrip()
370 #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45)
371 stack_trace_line_format
= (
372 '^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)')
373 match
= re
.match(stack_trace_line_format
, line
)
375 print self
.current_line
379 _
, frameno_str
, addr
, binary
, offset
= match
.groups()
380 if frameno_str
== '0':
381 # Assume that frame #0 is the first frame of new stack trace.
383 original_binary
= binary
384 if self
.binary_name_filter
:
385 binary
= self
.binary_name_filter(binary
)
386 symbolized_line
= self
.symbolize_address(addr
, binary
, offset
)
387 if not symbolized_line
:
388 if original_binary
!= binary
:
389 symbolized_line
= self
.symbolize_address(addr
, binary
, offset
)
390 self
.print_symbolized_lines(symbolized_line
)
393 if __name__
== '__main__':
394 opts
, args
= getopt
.getopt(sys
.argv
[1:], "d", ["demangle"])
396 if o
in ("-d", "--demangle"):
398 loop
= SymbolizationLoop()