Merge trunk version 209848 into gupc branch.
[official-gcc.git] / libsanitizer / sanitizer_common / sanitizer_symbolizer_posix_libcdep.cc
blob7aead970382248df8f1ce8c198cf7f0d11c5a8a2
1 //===-- sanitizer_symbolizer_posix_libcdep.cc -----------------------------===//
2 //
3 // This file is distributed under the University of Illinois Open Source
4 // License. See LICENSE.TXT for details.
5 //
6 //===----------------------------------------------------------------------===//
7 //
8 // This file is shared between AddressSanitizer and ThreadSanitizer
9 // run-time libraries.
10 // POSIX-specific implementation of symbolizer parts.
11 //===----------------------------------------------------------------------===//
13 #include "sanitizer_platform.h"
14 #if SANITIZER_POSIX
15 #include "sanitizer_allocator_internal.h"
16 #include "sanitizer_common.h"
17 #include "sanitizer_internal_defs.h"
18 #include "sanitizer_linux.h"
19 #include "sanitizer_placement_new.h"
20 #include "sanitizer_procmaps.h"
21 #include "sanitizer_symbolizer.h"
22 #include "sanitizer_symbolizer_libbacktrace.h"
24 #include <errno.h>
25 #include <stdlib.h>
26 #include <sys/wait.h>
27 #include <unistd.h>
29 // C++ demangling function, as required by Itanium C++ ABI. This is weak,
30 // because we do not require a C++ ABI library to be linked to a program
31 // using sanitizers; if it's not present, we'll just use the mangled name.
32 namespace __cxxabiv1 {
33 extern "C" SANITIZER_WEAK_ATTRIBUTE
34 char *__cxa_demangle(const char *mangled, char *buffer,
35 size_t *length, int *status);
38 namespace __sanitizer {
40 // Attempts to demangle the name via __cxa_demangle from __cxxabiv1.
41 static const char *DemangleCXXABI(const char *name) {
42 // FIXME: __cxa_demangle aggressively insists on allocating memory.
43 // There's not much we can do about that, short of providing our
44 // own demangler (libc++abi's implementation could be adapted so that
45 // it does not allocate). For now, we just call it anyway, and we leak
46 // the returned value.
47 if (__cxxabiv1::__cxa_demangle)
48 if (const char *demangled_name =
49 __cxxabiv1::__cxa_demangle(name, 0, 0, 0))
50 return demangled_name;
52 return name;
55 #if defined(__x86_64__)
56 static const char* const kSymbolizerArch = "--default-arch=x86_64";
57 #elif defined(__i386__)
58 static const char* const kSymbolizerArch = "--default-arch=i386";
59 #elif defined(__powerpc64__)
60 static const char* const kSymbolizerArch = "--default-arch=powerpc64";
61 #else
62 static const char* const kSymbolizerArch = "--default-arch=unknown";
63 #endif
65 static const int kSymbolizerStartupTimeMillis = 10;
67 // Creates external symbolizer connected via pipe, user should write
68 // to output_fd and read from input_fd.
69 static bool StartSymbolizerSubprocess(const char *path_to_symbolizer,
70 int *input_fd, int *output_fd) {
71 if (!FileExists(path_to_symbolizer)) {
72 Report("WARNING: invalid path to external symbolizer!\n");
73 return false;
76 int *infd = NULL;
77 int *outfd = NULL;
78 // The client program may close its stdin and/or stdout and/or stderr
79 // thus allowing socketpair to reuse file descriptors 0, 1 or 2.
80 // In this case the communication between the forked processes may be
81 // broken if either the parent or the child tries to close or duplicate
82 // these descriptors. The loop below produces two pairs of file
83 // descriptors, each greater than 2 (stderr).
84 int sock_pair[5][2];
85 for (int i = 0; i < 5; i++) {
86 if (pipe(sock_pair[i]) == -1) {
87 for (int j = 0; j < i; j++) {
88 internal_close(sock_pair[j][0]);
89 internal_close(sock_pair[j][1]);
91 Report("WARNING: Can't create a socket pair to start "
92 "external symbolizer (errno: %d)\n", errno);
93 return false;
94 } else if (sock_pair[i][0] > 2 && sock_pair[i][1] > 2) {
95 if (infd == NULL) {
96 infd = sock_pair[i];
97 } else {
98 outfd = sock_pair[i];
99 for (int j = 0; j < i; j++) {
100 if (sock_pair[j] == infd) continue;
101 internal_close(sock_pair[j][0]);
102 internal_close(sock_pair[j][1]);
104 break;
108 CHECK(infd);
109 CHECK(outfd);
111 int pid = fork();
112 if (pid == -1) {
113 // Fork() failed.
114 internal_close(infd[0]);
115 internal_close(infd[1]);
116 internal_close(outfd[0]);
117 internal_close(outfd[1]);
118 Report("WARNING: failed to fork external symbolizer "
119 " (errno: %d)\n", errno);
120 return false;
121 } else if (pid == 0) {
122 // Child subprocess.
123 internal_close(STDOUT_FILENO);
124 internal_close(STDIN_FILENO);
125 internal_dup2(outfd[0], STDIN_FILENO);
126 internal_dup2(infd[1], STDOUT_FILENO);
127 internal_close(outfd[0]);
128 internal_close(outfd[1]);
129 internal_close(infd[0]);
130 internal_close(infd[1]);
131 for (int fd = getdtablesize(); fd > 2; fd--)
132 internal_close(fd);
133 execl(path_to_symbolizer, path_to_symbolizer, kSymbolizerArch, (char*)0);
134 internal__exit(1);
137 // Continue execution in parent process.
138 internal_close(outfd[0]);
139 internal_close(infd[1]);
140 *input_fd = infd[0];
141 *output_fd = outfd[1];
143 // Check that symbolizer subprocess started successfully.
144 int pid_status;
145 SleepForMillis(kSymbolizerStartupTimeMillis);
146 int exited_pid = waitpid(pid, &pid_status, WNOHANG);
147 if (exited_pid != 0) {
148 // Either waitpid failed, or child has already exited.
149 Report("WARNING: external symbolizer didn't start up correctly!\n");
150 return false;
153 return true;
156 // Extracts the prefix of "str" that consists of any characters not
157 // present in "delims" string, and copies this prefix to "result", allocating
158 // space for it.
159 // Returns a pointer to "str" after skipping extracted prefix and first
160 // delimiter char.
161 static const char *ExtractToken(const char *str, const char *delims,
162 char **result) {
163 uptr prefix_len = internal_strcspn(str, delims);
164 *result = (char*)InternalAlloc(prefix_len + 1);
165 internal_memcpy(*result, str, prefix_len);
166 (*result)[prefix_len] = '\0';
167 const char *prefix_end = str + prefix_len;
168 if (*prefix_end != '\0') prefix_end++;
169 return prefix_end;
172 // Same as ExtractToken, but converts extracted token to integer.
173 static const char *ExtractInt(const char *str, const char *delims,
174 int *result) {
175 char *buff;
176 const char *ret = ExtractToken(str, delims, &buff);
177 if (buff != 0) {
178 *result = (int)internal_atoll(buff);
180 InternalFree(buff);
181 return ret;
184 static const char *ExtractUptr(const char *str, const char *delims,
185 uptr *result) {
186 char *buff;
187 const char *ret = ExtractToken(str, delims, &buff);
188 if (buff != 0) {
189 *result = (uptr)internal_atoll(buff);
191 InternalFree(buff);
192 return ret;
195 // ExternalSymbolizer encapsulates communication between the tool and
196 // external symbolizer program, running in a different subprocess,
197 // For now we assume the following protocol:
198 // For each request of the form
199 // <module_name> <module_offset>
200 // passed to STDIN, external symbolizer prints to STDOUT response:
201 // <function_name>
202 // <file_name>:<line_number>:<column_number>
203 // <function_name>
204 // <file_name>:<line_number>:<column_number>
205 // ...
206 // <empty line>
207 // ExternalSymbolizer may not be used from two threads simultaneously.
208 class ExternalSymbolizer {
209 public:
210 explicit ExternalSymbolizer(const char *path)
211 : path_(path),
212 input_fd_(kInvalidFd),
213 output_fd_(kInvalidFd),
214 times_restarted_(0),
215 failed_to_start_(false) {
216 CHECK(path_);
217 CHECK_NE(path[0], '\0');
220 char *SendCommand(bool is_data, const char *module_name, uptr module_offset) {
221 for (; times_restarted_ < kMaxTimesRestarted; times_restarted_++) {
222 // Start or restart symbolizer if we failed to send command to it.
223 if (char *res = SendCommandImpl(is_data, module_name, module_offset))
224 return res;
225 Restart();
227 if (!failed_to_start_) {
228 Report("WARNING: Failed to use and restart external symbolizer!\n");
229 failed_to_start_ = true;
231 return 0;
234 void Flush() {
237 private:
238 bool Restart() {
239 if (input_fd_ != kInvalidFd)
240 internal_close(input_fd_);
241 if (output_fd_ != kInvalidFd)
242 internal_close(output_fd_);
243 return StartSymbolizerSubprocess(path_, &input_fd_, &output_fd_);
246 char *SendCommandImpl(bool is_data, const char *module_name,
247 uptr module_offset) {
248 if (input_fd_ == kInvalidFd || output_fd_ == kInvalidFd)
249 return 0;
250 CHECK(module_name);
251 internal_snprintf(buffer_, kBufferSize, "%s\"%s\" 0x%zx\n",
252 is_data ? "DATA " : "", module_name, module_offset);
253 if (!writeToSymbolizer(buffer_, internal_strlen(buffer_)))
254 return 0;
255 if (!readFromSymbolizer(buffer_, kBufferSize))
256 return 0;
257 return buffer_;
260 bool readFromSymbolizer(char *buffer, uptr max_length) {
261 if (max_length == 0)
262 return true;
263 uptr read_len = 0;
264 while (true) {
265 uptr just_read = internal_read(input_fd_, buffer + read_len,
266 max_length - read_len);
267 // We can't read 0 bytes, as we don't expect external symbolizer to close
268 // its stdout.
269 if (just_read == 0 || just_read == (uptr)-1) {
270 Report("WARNING: Can't read from symbolizer at fd %d\n", input_fd_);
271 return false;
273 read_len += just_read;
274 // Empty line marks the end of symbolizer output.
275 if (read_len >= 2 && buffer[read_len - 1] == '\n' &&
276 buffer[read_len - 2] == '\n') {
277 break;
280 return true;
283 bool writeToSymbolizer(const char *buffer, uptr length) {
284 if (length == 0)
285 return true;
286 uptr write_len = internal_write(output_fd_, buffer, length);
287 if (write_len == 0 || write_len == (uptr)-1) {
288 Report("WARNING: Can't write to symbolizer at fd %d\n", output_fd_);
289 return false;
291 return true;
294 const char *path_;
295 int input_fd_;
296 int output_fd_;
298 static const uptr kBufferSize = 16 * 1024;
299 char buffer_[kBufferSize];
301 static const uptr kMaxTimesRestarted = 5;
302 uptr times_restarted_;
303 bool failed_to_start_;
306 #if SANITIZER_SUPPORTS_WEAK_HOOKS
307 extern "C" {
308 SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
309 bool __sanitizer_symbolize_code(const char *ModuleName, u64 ModuleOffset,
310 char *Buffer, int MaxLength);
311 SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
312 bool __sanitizer_symbolize_data(const char *ModuleName, u64 ModuleOffset,
313 char *Buffer, int MaxLength);
314 SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
315 void __sanitizer_symbolize_flush();
316 SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
317 int __sanitizer_symbolize_demangle(const char *Name, char *Buffer,
318 int MaxLength);
319 } // extern "C"
321 class InternalSymbolizer {
322 public:
323 typedef bool (*SanitizerSymbolizeFn)(const char*, u64, char*, int);
325 static InternalSymbolizer *get(LowLevelAllocator *alloc) {
326 if (__sanitizer_symbolize_code != 0 &&
327 __sanitizer_symbolize_data != 0) {
328 return new(*alloc) InternalSymbolizer();
330 return 0;
333 char *SendCommand(bool is_data, const char *module_name, uptr module_offset) {
334 SanitizerSymbolizeFn symbolize_fn = is_data ? __sanitizer_symbolize_data
335 : __sanitizer_symbolize_code;
336 if (symbolize_fn(module_name, module_offset, buffer_, kBufferSize))
337 return buffer_;
338 return 0;
341 void Flush() {
342 if (__sanitizer_symbolize_flush)
343 __sanitizer_symbolize_flush();
346 const char *Demangle(const char *name) {
347 if (__sanitizer_symbolize_demangle) {
348 for (uptr res_length = 1024;
349 res_length <= InternalSizeClassMap::kMaxSize;) {
350 char *res_buff = static_cast<char*>(InternalAlloc(res_length));
351 uptr req_length =
352 __sanitizer_symbolize_demangle(name, res_buff, res_length);
353 if (req_length > res_length) {
354 res_length = req_length + 1;
355 InternalFree(res_buff);
356 continue;
358 return res_buff;
361 return name;
364 private:
365 InternalSymbolizer() { }
367 static const int kBufferSize = 16 * 1024;
368 static const int kMaxDemangledNameSize = 1024;
369 char buffer_[kBufferSize];
371 #else // SANITIZER_SUPPORTS_WEAK_HOOKS
373 class InternalSymbolizer {
374 public:
375 static InternalSymbolizer *get(LowLevelAllocator *alloc) { return 0; }
376 char *SendCommand(bool is_data, const char *module_name, uptr module_offset) {
377 return 0;
379 void Flush() { }
380 const char *Demangle(const char *name) { return name; }
383 #endif // SANITIZER_SUPPORTS_WEAK_HOOKS
385 class POSIXSymbolizer : public Symbolizer {
386 public:
387 POSIXSymbolizer(ExternalSymbolizer *external_symbolizer,
388 InternalSymbolizer *internal_symbolizer,
389 LibbacktraceSymbolizer *libbacktrace_symbolizer)
390 : Symbolizer(),
391 external_symbolizer_(external_symbolizer),
392 internal_symbolizer_(internal_symbolizer),
393 libbacktrace_symbolizer_(libbacktrace_symbolizer) {}
395 uptr SymbolizeCode(uptr addr, AddressInfo *frames, uptr max_frames) {
396 BlockingMutexLock l(&mu_);
397 if (max_frames == 0)
398 return 0;
399 LoadedModule *module = FindModuleForAddress(addr);
400 if (module == 0)
401 return 0;
402 const char *module_name = module->full_name();
403 uptr module_offset = addr - module->base_address();
404 // First, try to use libbacktrace symbolizer (if it's available).
405 if (libbacktrace_symbolizer_ != 0) {
406 mu_.CheckLocked();
407 uptr res = libbacktrace_symbolizer_->SymbolizeCode(
408 addr, frames, max_frames, module_name, module_offset);
409 if (res > 0)
410 return res;
412 const char *str = SendCommand(false, module_name, module_offset);
413 if (str == 0) {
414 // Symbolizer was not initialized or failed. Fill only data
415 // about module name and offset.
416 AddressInfo *info = &frames[0];
417 info->Clear();
418 info->FillAddressAndModuleInfo(addr, module_name, module_offset);
419 return 1;
421 uptr frame_id = 0;
422 for (frame_id = 0; frame_id < max_frames; frame_id++) {
423 AddressInfo *info = &frames[frame_id];
424 char *function_name = 0;
425 str = ExtractToken(str, "\n", &function_name);
426 CHECK(function_name);
427 if (function_name[0] == '\0') {
428 // There are no more frames.
429 break;
431 info->Clear();
432 info->FillAddressAndModuleInfo(addr, module_name, module_offset);
433 info->function = function_name;
434 // Parse <file>:<line>:<column> buffer.
435 char *file_line_info = 0;
436 str = ExtractToken(str, "\n", &file_line_info);
437 CHECK(file_line_info);
438 const char *line_info = ExtractToken(file_line_info, ":", &info->file);
439 line_info = ExtractInt(line_info, ":", &info->line);
440 line_info = ExtractInt(line_info, "", &info->column);
441 InternalFree(file_line_info);
443 // Functions and filenames can be "??", in which case we write 0
444 // to address info to mark that names are unknown.
445 if (0 == internal_strcmp(info->function, "??")) {
446 InternalFree(info->function);
447 info->function = 0;
449 if (0 == internal_strcmp(info->file, "??")) {
450 InternalFree(info->file);
451 info->file = 0;
454 if (frame_id == 0) {
455 // Make sure we return at least one frame.
456 AddressInfo *info = &frames[0];
457 info->Clear();
458 info->FillAddressAndModuleInfo(addr, module_name, module_offset);
459 frame_id = 1;
461 return frame_id;
464 bool SymbolizeData(uptr addr, DataInfo *info) {
465 BlockingMutexLock l(&mu_);
466 LoadedModule *module = FindModuleForAddress(addr);
467 if (module == 0)
468 return false;
469 const char *module_name = module->full_name();
470 uptr module_offset = addr - module->base_address();
471 internal_memset(info, 0, sizeof(*info));
472 info->address = addr;
473 info->module = internal_strdup(module_name);
474 info->module_offset = module_offset;
475 if (libbacktrace_symbolizer_ != 0) {
476 mu_.CheckLocked();
477 if (libbacktrace_symbolizer_->SymbolizeData(info))
478 return true;
480 const char *str = SendCommand(true, module_name, module_offset);
481 if (str == 0)
482 return true;
483 str = ExtractToken(str, "\n", &info->name);
484 str = ExtractUptr(str, " ", &info->start);
485 str = ExtractUptr(str, "\n", &info->size);
486 info->start += module->base_address();
487 return true;
490 bool IsAvailable() {
491 return internal_symbolizer_ != 0 || external_symbolizer_ != 0 ||
492 libbacktrace_symbolizer_ != 0;
495 bool IsExternalAvailable() {
496 return external_symbolizer_ != 0;
499 void Flush() {
500 BlockingMutexLock l(&mu_);
501 if (internal_symbolizer_ != 0) {
502 SymbolizerScope sym_scope(this);
503 internal_symbolizer_->Flush();
505 if (external_symbolizer_ != 0)
506 external_symbolizer_->Flush();
509 const char *Demangle(const char *name) {
510 BlockingMutexLock l(&mu_);
511 // Run hooks even if we don't use internal symbolizer, as cxxabi
512 // demangle may call system functions.
513 SymbolizerScope sym_scope(this);
514 if (internal_symbolizer_ != 0)
515 return internal_symbolizer_->Demangle(name);
516 if (libbacktrace_symbolizer_ != 0) {
517 const char *demangled = libbacktrace_symbolizer_->Demangle(name);
518 if (demangled)
519 return demangled;
521 return DemangleCXXABI(name);
524 void PrepareForSandboxing() {
525 #if SANITIZER_LINUX && !SANITIZER_ANDROID
526 BlockingMutexLock l(&mu_);
527 // Cache /proc/self/exe on Linux.
528 CacheBinaryName();
529 #endif
532 private:
533 char *SendCommand(bool is_data, const char *module_name, uptr module_offset) {
534 mu_.CheckLocked();
535 // First, try to use internal symbolizer.
536 if (internal_symbolizer_) {
537 SymbolizerScope sym_scope(this);
538 return internal_symbolizer_->SendCommand(is_data, module_name,
539 module_offset);
541 // Otherwise, fall back to external symbolizer.
542 if (external_symbolizer_) {
543 return external_symbolizer_->SendCommand(is_data, module_name,
544 module_offset);
546 return 0;
549 LoadedModule *FindModuleForAddress(uptr address) {
550 mu_.CheckLocked();
551 bool modules_were_reloaded = false;
552 if (modules_ == 0 || !modules_fresh_) {
553 modules_ = (LoadedModule*)(symbolizer_allocator_.Allocate(
554 kMaxNumberOfModuleContexts * sizeof(LoadedModule)));
555 CHECK(modules_);
556 n_modules_ = GetListOfModules(modules_, kMaxNumberOfModuleContexts,
557 /* filter */ 0);
558 // FIXME: Return this check when GetListOfModules is implemented on Mac.
559 // CHECK_GT(n_modules_, 0);
560 CHECK_LT(n_modules_, kMaxNumberOfModuleContexts);
561 modules_fresh_ = true;
562 modules_were_reloaded = true;
564 for (uptr i = 0; i < n_modules_; i++) {
565 if (modules_[i].containsAddress(address)) {
566 return &modules_[i];
569 // Reload the modules and look up again, if we haven't tried it yet.
570 if (!modules_were_reloaded) {
571 // FIXME: set modules_fresh_ from dlopen()/dlclose() interceptors.
572 // It's too aggressive to reload the list of modules each time we fail
573 // to find a module for a given address.
574 modules_fresh_ = false;
575 return FindModuleForAddress(address);
577 return 0;
580 // 16K loaded modules should be enough for everyone.
581 static const uptr kMaxNumberOfModuleContexts = 1 << 14;
582 LoadedModule *modules_; // Array of module descriptions is leaked.
583 uptr n_modules_;
584 // If stale, need to reload the modules before looking up addresses.
585 bool modules_fresh_;
586 BlockingMutex mu_;
588 ExternalSymbolizer *external_symbolizer_; // Leaked.
589 InternalSymbolizer *const internal_symbolizer_; // Leaked.
590 LibbacktraceSymbolizer *libbacktrace_symbolizer_; // Leaked.
593 Symbolizer *Symbolizer::PlatformInit(const char *path_to_external) {
594 InternalSymbolizer* internal_symbolizer =
595 InternalSymbolizer::get(&symbolizer_allocator_);
596 ExternalSymbolizer *external_symbolizer = 0;
597 LibbacktraceSymbolizer *libbacktrace_symbolizer = 0;
599 if (!internal_symbolizer) {
600 libbacktrace_symbolizer =
601 LibbacktraceSymbolizer::get(&symbolizer_allocator_);
602 if (!libbacktrace_symbolizer) {
603 // Find path to llvm-symbolizer if it's not provided.
604 if (!path_to_external)
605 path_to_external = FindPathToBinary("llvm-symbolizer");
606 if (path_to_external && path_to_external[0] != '\0')
607 external_symbolizer = new(symbolizer_allocator_)
608 ExternalSymbolizer(path_to_external);
612 return new(symbolizer_allocator_) POSIXSymbolizer(
613 external_symbolizer, internal_symbolizer, libbacktrace_symbolizer);
616 } // namespace __sanitizer
618 #endif // SANITIZER_POSIX