1 //===-- sanitizer_symbolizer_posix_libcdep.cc -----------------------------===//
3 // This file is distributed under the University of Illinois Open Source
4 // License. See LICENSE.TXT for details.
6 //===----------------------------------------------------------------------===//
8 // This file is shared between AddressSanitizer and ThreadSanitizer
10 // POSIX-specific implementation of symbolizer parts.
11 //===----------------------------------------------------------------------===//
13 #include "sanitizer_platform.h"
15 #include "sanitizer_allocator_internal.h"
16 #include "sanitizer_common.h"
17 #include "sanitizer_internal_defs.h"
18 #include "sanitizer_linux.h"
19 #include "sanitizer_placement_new.h"
20 #include "sanitizer_procmaps.h"
21 #include "sanitizer_symbolizer.h"
22 #include "sanitizer_symbolizer_libbacktrace.h"
29 // C++ demangling function, as required by Itanium C++ ABI. This is weak,
30 // because we do not require a C++ ABI library to be linked to a program
31 // using sanitizers; if it's not present, we'll just use the mangled name.
32 namespace __cxxabiv1
{
33 extern "C" SANITIZER_WEAK_ATTRIBUTE
34 char *__cxa_demangle(const char *mangled
, char *buffer
,
35 size_t *length
, int *status
);
38 namespace __sanitizer
{
40 // Attempts to demangle the name via __cxa_demangle from __cxxabiv1.
41 static const char *DemangleCXXABI(const char *name
) {
42 // FIXME: __cxa_demangle aggressively insists on allocating memory.
43 // There's not much we can do about that, short of providing our
44 // own demangler (libc++abi's implementation could be adapted so that
45 // it does not allocate). For now, we just call it anyway, and we leak
46 // the returned value.
47 if (__cxxabiv1::__cxa_demangle
)
48 if (const char *demangled_name
=
49 __cxxabiv1::__cxa_demangle(name
, 0, 0, 0))
50 return demangled_name
;
55 #if defined(__x86_64__)
56 static const char* const kSymbolizerArch
= "--default-arch=x86_64";
57 #elif defined(__i386__)
58 static const char* const kSymbolizerArch
= "--default-arch=i386";
59 #elif defined(__powerpc64__)
60 static const char* const kSymbolizerArch
= "--default-arch=powerpc64";
62 static const char* const kSymbolizerArch
= "--default-arch=unknown";
65 static const int kSymbolizerStartupTimeMillis
= 10;
67 // Creates external symbolizer connected via pipe, user should write
68 // to output_fd and read from input_fd.
69 static bool StartSymbolizerSubprocess(const char *path_to_symbolizer
,
70 int *input_fd
, int *output_fd
) {
71 if (!FileExists(path_to_symbolizer
)) {
72 Report("WARNING: invalid path to external symbolizer!\n");
78 // The client program may close its stdin and/or stdout and/or stderr
79 // thus allowing socketpair to reuse file descriptors 0, 1 or 2.
80 // In this case the communication between the forked processes may be
81 // broken if either the parent or the child tries to close or duplicate
82 // these descriptors. The loop below produces two pairs of file
83 // descriptors, each greater than 2 (stderr).
85 for (int i
= 0; i
< 5; i
++) {
86 if (pipe(sock_pair
[i
]) == -1) {
87 for (int j
= 0; j
< i
; j
++) {
88 internal_close(sock_pair
[j
][0]);
89 internal_close(sock_pair
[j
][1]);
91 Report("WARNING: Can't create a socket pair to start "
92 "external symbolizer (errno: %d)\n", errno
);
94 } else if (sock_pair
[i
][0] > 2 && sock_pair
[i
][1] > 2) {
99 for (int j
= 0; j
< i
; j
++) {
100 if (sock_pair
[j
] == infd
) continue;
101 internal_close(sock_pair
[j
][0]);
102 internal_close(sock_pair
[j
][1]);
114 internal_close(infd
[0]);
115 internal_close(infd
[1]);
116 internal_close(outfd
[0]);
117 internal_close(outfd
[1]);
118 Report("WARNING: failed to fork external symbolizer "
119 " (errno: %d)\n", errno
);
121 } else if (pid
== 0) {
123 internal_close(STDOUT_FILENO
);
124 internal_close(STDIN_FILENO
);
125 internal_dup2(outfd
[0], STDIN_FILENO
);
126 internal_dup2(infd
[1], STDOUT_FILENO
);
127 internal_close(outfd
[0]);
128 internal_close(outfd
[1]);
129 internal_close(infd
[0]);
130 internal_close(infd
[1]);
131 for (int fd
= getdtablesize(); fd
> 2; fd
--)
133 execl(path_to_symbolizer
, path_to_symbolizer
, kSymbolizerArch
, (char*)0);
137 // Continue execution in parent process.
138 internal_close(outfd
[0]);
139 internal_close(infd
[1]);
141 *output_fd
= outfd
[1];
143 // Check that symbolizer subprocess started successfully.
145 SleepForMillis(kSymbolizerStartupTimeMillis
);
146 int exited_pid
= waitpid(pid
, &pid_status
, WNOHANG
);
147 if (exited_pid
!= 0) {
148 // Either waitpid failed, or child has already exited.
149 Report("WARNING: external symbolizer didn't start up correctly!\n");
156 // Extracts the prefix of "str" that consists of any characters not
157 // present in "delims" string, and copies this prefix to "result", allocating
159 // Returns a pointer to "str" after skipping extracted prefix and first
161 static const char *ExtractToken(const char *str
, const char *delims
,
163 uptr prefix_len
= internal_strcspn(str
, delims
);
164 *result
= (char*)InternalAlloc(prefix_len
+ 1);
165 internal_memcpy(*result
, str
, prefix_len
);
166 (*result
)[prefix_len
] = '\0';
167 const char *prefix_end
= str
+ prefix_len
;
168 if (*prefix_end
!= '\0') prefix_end
++;
172 // Same as ExtractToken, but converts extracted token to integer.
173 static const char *ExtractInt(const char *str
, const char *delims
,
176 const char *ret
= ExtractToken(str
, delims
, &buff
);
178 *result
= (int)internal_atoll(buff
);
184 static const char *ExtractUptr(const char *str
, const char *delims
,
187 const char *ret
= ExtractToken(str
, delims
, &buff
);
189 *result
= (uptr
)internal_atoll(buff
);
195 // ExternalSymbolizer encapsulates communication between the tool and
196 // external symbolizer program, running in a different subprocess,
197 // For now we assume the following protocol:
198 // For each request of the form
199 // <module_name> <module_offset>
200 // passed to STDIN, external symbolizer prints to STDOUT response:
202 // <file_name>:<line_number>:<column_number>
204 // <file_name>:<line_number>:<column_number>
207 // ExternalSymbolizer may not be used from two threads simultaneously.
208 class ExternalSymbolizer
{
210 explicit ExternalSymbolizer(const char *path
)
212 input_fd_(kInvalidFd
),
213 output_fd_(kInvalidFd
),
215 failed_to_start_(false) {
217 CHECK_NE(path
[0], '\0');
220 char *SendCommand(bool is_data
, const char *module_name
, uptr module_offset
) {
221 for (; times_restarted_
< kMaxTimesRestarted
; times_restarted_
++) {
222 // Start or restart symbolizer if we failed to send command to it.
223 if (char *res
= SendCommandImpl(is_data
, module_name
, module_offset
))
227 if (!failed_to_start_
) {
228 Report("WARNING: Failed to use and restart external symbolizer!\n");
229 failed_to_start_
= true;
239 if (input_fd_
!= kInvalidFd
)
240 internal_close(input_fd_
);
241 if (output_fd_
!= kInvalidFd
)
242 internal_close(output_fd_
);
243 return StartSymbolizerSubprocess(path_
, &input_fd_
, &output_fd_
);
246 char *SendCommandImpl(bool is_data
, const char *module_name
,
247 uptr module_offset
) {
248 if (input_fd_
== kInvalidFd
|| output_fd_
== kInvalidFd
)
251 internal_snprintf(buffer_
, kBufferSize
, "%s\"%s\" 0x%zx\n",
252 is_data
? "DATA " : "", module_name
, module_offset
);
253 if (!writeToSymbolizer(buffer_
, internal_strlen(buffer_
)))
255 if (!readFromSymbolizer(buffer_
, kBufferSize
))
260 bool readFromSymbolizer(char *buffer
, uptr max_length
) {
265 uptr just_read
= internal_read(input_fd_
, buffer
+ read_len
,
266 max_length
- read_len
);
267 // We can't read 0 bytes, as we don't expect external symbolizer to close
269 if (just_read
== 0 || just_read
== (uptr
)-1) {
270 Report("WARNING: Can't read from symbolizer at fd %d\n", input_fd_
);
273 read_len
+= just_read
;
274 // Empty line marks the end of symbolizer output.
275 if (read_len
>= 2 && buffer
[read_len
- 1] == '\n' &&
276 buffer
[read_len
- 2] == '\n') {
283 bool writeToSymbolizer(const char *buffer
, uptr length
) {
286 uptr write_len
= internal_write(output_fd_
, buffer
, length
);
287 if (write_len
== 0 || write_len
== (uptr
)-1) {
288 Report("WARNING: Can't write to symbolizer at fd %d\n", output_fd_
);
298 static const uptr kBufferSize
= 16 * 1024;
299 char buffer_
[kBufferSize
];
301 static const uptr kMaxTimesRestarted
= 5;
302 uptr times_restarted_
;
303 bool failed_to_start_
;
306 #if SANITIZER_SUPPORTS_WEAK_HOOKS
308 SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
309 bool __sanitizer_symbolize_code(const char *ModuleName
, u64 ModuleOffset
,
310 char *Buffer
, int MaxLength
);
311 SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
312 bool __sanitizer_symbolize_data(const char *ModuleName
, u64 ModuleOffset
,
313 char *Buffer
, int MaxLength
);
314 SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
315 void __sanitizer_symbolize_flush();
316 SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
317 int __sanitizer_symbolize_demangle(const char *Name
, char *Buffer
,
321 class InternalSymbolizer
{
323 typedef bool (*SanitizerSymbolizeFn
)(const char*, u64
, char*, int);
325 static InternalSymbolizer
*get(LowLevelAllocator
*alloc
) {
326 if (__sanitizer_symbolize_code
!= 0 &&
327 __sanitizer_symbolize_data
!= 0) {
328 return new(*alloc
) InternalSymbolizer();
333 char *SendCommand(bool is_data
, const char *module_name
, uptr module_offset
) {
334 SanitizerSymbolizeFn symbolize_fn
= is_data
? __sanitizer_symbolize_data
335 : __sanitizer_symbolize_code
;
336 if (symbolize_fn(module_name
, module_offset
, buffer_
, kBufferSize
))
342 if (__sanitizer_symbolize_flush
)
343 __sanitizer_symbolize_flush();
346 const char *Demangle(const char *name
) {
347 if (__sanitizer_symbolize_demangle
) {
348 for (uptr res_length
= 1024;
349 res_length
<= InternalSizeClassMap::kMaxSize
;) {
350 char *res_buff
= static_cast<char*>(InternalAlloc(res_length
));
352 __sanitizer_symbolize_demangle(name
, res_buff
, res_length
);
353 if (req_length
> res_length
) {
354 res_length
= req_length
+ 1;
355 InternalFree(res_buff
);
365 InternalSymbolizer() { }
367 static const int kBufferSize
= 16 * 1024;
368 static const int kMaxDemangledNameSize
= 1024;
369 char buffer_
[kBufferSize
];
371 #else // SANITIZER_SUPPORTS_WEAK_HOOKS
373 class InternalSymbolizer
{
375 static InternalSymbolizer
*get(LowLevelAllocator
*alloc
) { return 0; }
376 char *SendCommand(bool is_data
, const char *module_name
, uptr module_offset
) {
380 const char *Demangle(const char *name
) { return name
; }
383 #endif // SANITIZER_SUPPORTS_WEAK_HOOKS
385 class POSIXSymbolizer
: public Symbolizer
{
387 POSIXSymbolizer(ExternalSymbolizer
*external_symbolizer
,
388 InternalSymbolizer
*internal_symbolizer
,
389 LibbacktraceSymbolizer
*libbacktrace_symbolizer
)
391 external_symbolizer_(external_symbolizer
),
392 internal_symbolizer_(internal_symbolizer
),
393 libbacktrace_symbolizer_(libbacktrace_symbolizer
) {}
395 uptr
SymbolizeCode(uptr addr
, AddressInfo
*frames
, uptr max_frames
) {
396 BlockingMutexLock
l(&mu_
);
399 LoadedModule
*module
= FindModuleForAddress(addr
);
402 const char *module_name
= module
->full_name();
403 uptr module_offset
= addr
- module
->base_address();
404 // First, try to use libbacktrace symbolizer (if it's available).
405 if (libbacktrace_symbolizer_
!= 0) {
407 uptr res
= libbacktrace_symbolizer_
->SymbolizeCode(
408 addr
, frames
, max_frames
, module_name
, module_offset
);
412 const char *str
= SendCommand(false, module_name
, module_offset
);
414 // Symbolizer was not initialized or failed. Fill only data
415 // about module name and offset.
416 AddressInfo
*info
= &frames
[0];
418 info
->FillAddressAndModuleInfo(addr
, module_name
, module_offset
);
422 for (frame_id
= 0; frame_id
< max_frames
; frame_id
++) {
423 AddressInfo
*info
= &frames
[frame_id
];
424 char *function_name
= 0;
425 str
= ExtractToken(str
, "\n", &function_name
);
426 CHECK(function_name
);
427 if (function_name
[0] == '\0') {
428 // There are no more frames.
432 info
->FillAddressAndModuleInfo(addr
, module_name
, module_offset
);
433 info
->function
= function_name
;
434 // Parse <file>:<line>:<column> buffer.
435 char *file_line_info
= 0;
436 str
= ExtractToken(str
, "\n", &file_line_info
);
437 CHECK(file_line_info
);
438 const char *line_info
= ExtractToken(file_line_info
, ":", &info
->file
);
439 line_info
= ExtractInt(line_info
, ":", &info
->line
);
440 line_info
= ExtractInt(line_info
, "", &info
->column
);
441 InternalFree(file_line_info
);
443 // Functions and filenames can be "??", in which case we write 0
444 // to address info to mark that names are unknown.
445 if (0 == internal_strcmp(info
->function
, "??")) {
446 InternalFree(info
->function
);
449 if (0 == internal_strcmp(info
->file
, "??")) {
450 InternalFree(info
->file
);
455 // Make sure we return at least one frame.
456 AddressInfo
*info
= &frames
[0];
458 info
->FillAddressAndModuleInfo(addr
, module_name
, module_offset
);
464 bool SymbolizeData(uptr addr
, DataInfo
*info
) {
465 BlockingMutexLock
l(&mu_
);
466 LoadedModule
*module
= FindModuleForAddress(addr
);
469 const char *module_name
= module
->full_name();
470 uptr module_offset
= addr
- module
->base_address();
471 internal_memset(info
, 0, sizeof(*info
));
472 info
->address
= addr
;
473 info
->module
= internal_strdup(module_name
);
474 info
->module_offset
= module_offset
;
475 if (libbacktrace_symbolizer_
!= 0) {
477 if (libbacktrace_symbolizer_
->SymbolizeData(info
))
480 const char *str
= SendCommand(true, module_name
, module_offset
);
483 str
= ExtractToken(str
, "\n", &info
->name
);
484 str
= ExtractUptr(str
, " ", &info
->start
);
485 str
= ExtractUptr(str
, "\n", &info
->size
);
486 info
->start
+= module
->base_address();
491 return internal_symbolizer_
!= 0 || external_symbolizer_
!= 0 ||
492 libbacktrace_symbolizer_
!= 0;
495 bool IsExternalAvailable() {
496 return external_symbolizer_
!= 0;
500 BlockingMutexLock
l(&mu_
);
501 if (internal_symbolizer_
!= 0) {
502 SymbolizerScope
sym_scope(this);
503 internal_symbolizer_
->Flush();
505 if (external_symbolizer_
!= 0)
506 external_symbolizer_
->Flush();
509 const char *Demangle(const char *name
) {
510 BlockingMutexLock
l(&mu_
);
511 // Run hooks even if we don't use internal symbolizer, as cxxabi
512 // demangle may call system functions.
513 SymbolizerScope
sym_scope(this);
514 if (internal_symbolizer_
!= 0)
515 return internal_symbolizer_
->Demangle(name
);
516 if (libbacktrace_symbolizer_
!= 0) {
517 const char *demangled
= libbacktrace_symbolizer_
->Demangle(name
);
521 return DemangleCXXABI(name
);
524 void PrepareForSandboxing() {
525 #if SANITIZER_LINUX && !SANITIZER_ANDROID
526 BlockingMutexLock
l(&mu_
);
527 // Cache /proc/self/exe on Linux.
533 char *SendCommand(bool is_data
, const char *module_name
, uptr module_offset
) {
535 // First, try to use internal symbolizer.
536 if (internal_symbolizer_
) {
537 SymbolizerScope
sym_scope(this);
538 return internal_symbolizer_
->SendCommand(is_data
, module_name
,
541 // Otherwise, fall back to external symbolizer.
542 if (external_symbolizer_
) {
543 return external_symbolizer_
->SendCommand(is_data
, module_name
,
549 LoadedModule
*FindModuleForAddress(uptr address
) {
551 bool modules_were_reloaded
= false;
552 if (modules_
== 0 || !modules_fresh_
) {
553 modules_
= (LoadedModule
*)(symbolizer_allocator_
.Allocate(
554 kMaxNumberOfModuleContexts
* sizeof(LoadedModule
)));
556 n_modules_
= GetListOfModules(modules_
, kMaxNumberOfModuleContexts
,
558 // FIXME: Return this check when GetListOfModules is implemented on Mac.
559 // CHECK_GT(n_modules_, 0);
560 CHECK_LT(n_modules_
, kMaxNumberOfModuleContexts
);
561 modules_fresh_
= true;
562 modules_were_reloaded
= true;
564 for (uptr i
= 0; i
< n_modules_
; i
++) {
565 if (modules_
[i
].containsAddress(address
)) {
569 // Reload the modules and look up again, if we haven't tried it yet.
570 if (!modules_were_reloaded
) {
571 // FIXME: set modules_fresh_ from dlopen()/dlclose() interceptors.
572 // It's too aggressive to reload the list of modules each time we fail
573 // to find a module for a given address.
574 modules_fresh_
= false;
575 return FindModuleForAddress(address
);
580 // 16K loaded modules should be enough for everyone.
581 static const uptr kMaxNumberOfModuleContexts
= 1 << 14;
582 LoadedModule
*modules_
; // Array of module descriptions is leaked.
584 // If stale, need to reload the modules before looking up addresses.
588 ExternalSymbolizer
*external_symbolizer_
; // Leaked.
589 InternalSymbolizer
*const internal_symbolizer_
; // Leaked.
590 LibbacktraceSymbolizer
*libbacktrace_symbolizer_
; // Leaked.
593 Symbolizer
*Symbolizer::PlatformInit(const char *path_to_external
) {
594 InternalSymbolizer
* internal_symbolizer
=
595 InternalSymbolizer::get(&symbolizer_allocator_
);
596 ExternalSymbolizer
*external_symbolizer
= 0;
597 LibbacktraceSymbolizer
*libbacktrace_symbolizer
= 0;
599 if (!internal_symbolizer
) {
600 libbacktrace_symbolizer
=
601 LibbacktraceSymbolizer::get(&symbolizer_allocator_
);
602 if (!libbacktrace_symbolizer
) {
603 // Find path to llvm-symbolizer if it's not provided.
604 if (!path_to_external
)
605 path_to_external
= FindPathToBinary("llvm-symbolizer");
606 if (path_to_external
&& path_to_external
[0] != '\0')
607 external_symbolizer
= new(symbolizer_allocator_
)
608 ExternalSymbolizer(path_to_external
);
612 return new(symbolizer_allocator_
) POSIXSymbolizer(
613 external_symbolizer
, internal_symbolizer
, libbacktrace_symbolizer
);
616 } // namespace __sanitizer
618 #endif // SANITIZER_POSIX