1 //===-- sanitizer_symbolizer_libcdep.cc -----------------------------------===//
3 // This file is distributed under the University of Illinois Open Source
4 // License. See LICENSE.TXT for details.
6 //===----------------------------------------------------------------------===//
8 // This file is shared between AddressSanitizer and ThreadSanitizer
10 //===----------------------------------------------------------------------===//
12 #include "sanitizer_allocator_internal.h"
13 #include "sanitizer_internal_defs.h"
14 #include "sanitizer_symbolizer_internal.h"
16 namespace __sanitizer
{
18 Symbolizer
*Symbolizer::GetOrInit() {
19 SpinMutexLock
l(&init_mu_
);
22 symbolizer_
= PlatformInit();
27 // See sanitizer_symbolizer_fuchsia.cc.
28 #if !SANITIZER_FUCHSIA
30 const char *ExtractToken(const char *str
, const char *delims
, char **result
) {
31 uptr prefix_len
= internal_strcspn(str
, delims
);
32 *result
= (char*)InternalAlloc(prefix_len
+ 1);
33 internal_memcpy(*result
, str
, prefix_len
);
34 (*result
)[prefix_len
] = '\0';
35 const char *prefix_end
= str
+ prefix_len
;
36 if (*prefix_end
!= '\0') prefix_end
++;
40 const char *ExtractInt(const char *str
, const char *delims
, int *result
) {
42 const char *ret
= ExtractToken(str
, delims
, &buff
);
44 *result
= (int)internal_atoll(buff
);
50 const char *ExtractUptr(const char *str
, const char *delims
, uptr
*result
) {
52 const char *ret
= ExtractToken(str
, delims
, &buff
);
54 *result
= (uptr
)internal_atoll(buff
);
60 const char *ExtractTokenUpToDelimiter(const char *str
, const char *delimiter
,
62 const char *found_delimiter
= internal_strstr(str
, delimiter
);
64 found_delimiter
? found_delimiter
- str
: internal_strlen(str
);
65 *result
= (char *)InternalAlloc(prefix_len
+ 1);
66 internal_memcpy(*result
, str
, prefix_len
);
67 (*result
)[prefix_len
] = '\0';
68 const char *prefix_end
= str
+ prefix_len
;
69 if (*prefix_end
!= '\0') prefix_end
+= internal_strlen(delimiter
);
73 SymbolizedStack
*Symbolizer::SymbolizePC(uptr addr
) {
74 BlockingMutexLock
l(&mu_
);
75 const char *module_name
;
78 SymbolizedStack
*res
= SymbolizedStack::New(addr
);
79 if (!FindModuleNameAndOffsetForAddress(addr
, &module_name
, &module_offset
,
82 // Always fill data about module name and offset.
83 res
->info
.FillModuleInfo(module_name
, module_offset
, arch
);
84 for (auto &tool
: tools_
) {
85 SymbolizerScope
sym_scope(this);
86 if (tool
.SymbolizePC(addr
, res
)) {
93 bool Symbolizer::SymbolizeData(uptr addr
, DataInfo
*info
) {
94 BlockingMutexLock
l(&mu_
);
95 const char *module_name
;
98 if (!FindModuleNameAndOffsetForAddress(addr
, &module_name
, &module_offset
,
102 info
->module
= internal_strdup(module_name
);
103 info
->module_offset
= module_offset
;
104 info
->module_arch
= arch
;
105 for (auto &tool
: tools_
) {
106 SymbolizerScope
sym_scope(this);
107 if (tool
.SymbolizeData(addr
, info
)) {
114 bool Symbolizer::GetModuleNameAndOffsetForPC(uptr pc
, const char **module_name
,
115 uptr
*module_address
) {
116 BlockingMutexLock
l(&mu_
);
117 const char *internal_module_name
= nullptr;
119 if (!FindModuleNameAndOffsetForAddress(pc
, &internal_module_name
,
120 module_address
, &arch
))
124 *module_name
= module_names_
.GetOwnedCopy(internal_module_name
);
128 void Symbolizer::Flush() {
129 BlockingMutexLock
l(&mu_
);
130 for (auto &tool
: tools_
) {
131 SymbolizerScope
sym_scope(this);
136 const char *Symbolizer::Demangle(const char *name
) {
137 BlockingMutexLock
l(&mu_
);
138 for (auto &tool
: tools_
) {
139 SymbolizerScope
sym_scope(this);
140 if (const char *demangled
= tool
.Demangle(name
))
143 return PlatformDemangle(name
);
146 void Symbolizer::PrepareForSandboxing() {
147 BlockingMutexLock
l(&mu_
);
148 PlatformPrepareForSandboxing();
151 bool Symbolizer::FindModuleNameAndOffsetForAddress(uptr address
,
152 const char **module_name
,
154 ModuleArch
*module_arch
) {
155 const LoadedModule
*module
= FindModuleForAddress(address
);
156 if (module
== nullptr)
158 *module_name
= module
->full_name();
159 *module_offset
= address
- module
->base_address();
160 *module_arch
= module
->arch();
164 void Symbolizer::RefreshModules() {
166 fallback_modules_
.fallbackInit();
167 RAW_CHECK(modules_
.size() > 0);
168 modules_fresh_
= true;
171 static const LoadedModule
*SearchForModule(const ListOfModules
&modules
,
173 for (uptr i
= 0; i
< modules
.size(); i
++) {
174 if (modules
[i
].containsAddress(address
)) {
181 const LoadedModule
*Symbolizer::FindModuleForAddress(uptr address
) {
182 bool modules_were_reloaded
= false;
183 if (!modules_fresh_
) {
185 modules_were_reloaded
= true;
187 const LoadedModule
*module
= SearchForModule(modules_
, address
);
188 if (module
) return module
;
190 // dlopen/dlclose interceptors invalidate the module list, but when
191 // interception is disabled, we need to retry if the lookup fails in
192 // case the module list changed.
193 #if !SANITIZER_INTERCEPT_DLOPEN_DLCLOSE
194 if (!modules_were_reloaded
) {
196 module
= SearchForModule(modules_
, address
);
197 if (module
) return module
;
201 if (fallback_modules_
.size()) {
202 module
= SearchForModule(fallback_modules_
, address
);
207 // For now we assume the following protocol:
208 // For each request of the form
209 // <module_name> <module_offset>
210 // passed to STDIN, external symbolizer prints to STDOUT response:
212 // <file_name>:<line_number>:<column_number>
214 // <file_name>:<line_number>:<column_number>
217 class LLVMSymbolizerProcess
: public SymbolizerProcess
{
219 explicit LLVMSymbolizerProcess(const char *path
) : SymbolizerProcess(path
) {}
222 bool ReachedEndOfOutput(const char *buffer
, uptr length
) const override
{
223 // Empty line marks the end of llvm-symbolizer output.
224 return length
>= 2 && buffer
[length
- 1] == '\n' &&
225 buffer
[length
- 2] == '\n';
228 // When adding a new architecture, don't forget to also update
229 // script/asan_symbolize.py and sanitizer_common.h.
230 void GetArgV(const char *path_to_binary
,
231 const char *(&argv
)[kArgVMax
]) const override
{
232 #if defined(__x86_64h__)
233 const char* const kSymbolizerArch
= "--default-arch=x86_64h";
234 #elif defined(__x86_64__)
235 const char* const kSymbolizerArch
= "--default-arch=x86_64";
236 #elif defined(__i386__)
237 const char* const kSymbolizerArch
= "--default-arch=i386";
238 #elif defined(__aarch64__)
239 const char* const kSymbolizerArch
= "--default-arch=arm64";
240 #elif defined(__arm__)
241 const char* const kSymbolizerArch
= "--default-arch=arm";
242 #elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
243 const char* const kSymbolizerArch
= "--default-arch=powerpc64";
244 #elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
245 const char* const kSymbolizerArch
= "--default-arch=powerpc64le";
246 #elif defined(__s390x__)
247 const char* const kSymbolizerArch
= "--default-arch=s390x";
248 #elif defined(__s390__)
249 const char* const kSymbolizerArch
= "--default-arch=s390";
251 const char* const kSymbolizerArch
= "--default-arch=unknown";
254 const char *const inline_flag
= common_flags()->symbolize_inline_frames
256 : "--inlining=false";
258 argv
[i
++] = path_to_binary
;
259 argv
[i
++] = inline_flag
;
260 argv
[i
++] = kSymbolizerArch
;
265 LLVMSymbolizer::LLVMSymbolizer(const char *path
, LowLevelAllocator
*allocator
)
266 : symbolizer_process_(new(*allocator
) LLVMSymbolizerProcess(path
)) {}
268 // Parse a <file>:<line>[:<column>] buffer. The file path may contain colons on
269 // Windows, so extract tokens from the right hand side first. The column info is
271 static const char *ParseFileLineInfo(AddressInfo
*info
, const char *str
) {
272 char *file_line_info
= 0;
273 str
= ExtractToken(str
, "\n", &file_line_info
);
274 CHECK(file_line_info
);
276 if (uptr size
= internal_strlen(file_line_info
)) {
277 char *back
= file_line_info
+ size
- 1;
278 for (int i
= 0; i
< 2; ++i
) {
279 while (back
> file_line_info
&& IsDigit(*back
)) --back
;
280 if (*back
!= ':' || !IsDigit(back
[1])) break;
281 info
->column
= info
->line
;
282 info
->line
= internal_atoll(back
+ 1);
283 // Truncate the string at the colon to keep only filename.
287 ExtractToken(file_line_info
, "", &info
->file
);
290 InternalFree(file_line_info
);
294 // Parses one or more two-line strings in the following format:
296 // <file_name>:<line_number>[:<column_number>]
297 // Used by LLVMSymbolizer, Addr2LinePool and InternalSymbolizer, since all of
298 // them use the same output format.
299 void ParseSymbolizePCOutput(const char *str
, SymbolizedStack
*res
) {
300 bool top_frame
= true;
301 SymbolizedStack
*last
= res
;
303 char *function_name
= 0;
304 str
= ExtractToken(str
, "\n", &function_name
);
305 CHECK(function_name
);
306 if (function_name
[0] == '\0') {
307 // There are no more frames.
308 InternalFree(function_name
);
311 SymbolizedStack
*cur
;
316 cur
= SymbolizedStack::New(res
->info
.address
);
317 cur
->info
.FillModuleInfo(res
->info
.module
, res
->info
.module_offset
,
318 res
->info
.module_arch
);
323 AddressInfo
*info
= &cur
->info
;
324 info
->function
= function_name
;
325 str
= ParseFileLineInfo(info
, str
);
327 // Functions and filenames can be "??", in which case we write 0
328 // to address info to mark that names are unknown.
329 if (0 == internal_strcmp(info
->function
, "??")) {
330 InternalFree(info
->function
);
333 if (0 == internal_strcmp(info
->file
, "??")) {
334 InternalFree(info
->file
);
340 // Parses a two-line string in the following format:
342 // <start_address> <size>
343 // Used by LLVMSymbolizer and InternalSymbolizer.
344 void ParseSymbolizeDataOutput(const char *str
, DataInfo
*info
) {
345 str
= ExtractToken(str
, "\n", &info
->name
);
346 str
= ExtractUptr(str
, " ", &info
->start
);
347 str
= ExtractUptr(str
, "\n", &info
->size
);
350 bool LLVMSymbolizer::SymbolizePC(uptr addr
, SymbolizedStack
*stack
) {
351 AddressInfo
*info
= &stack
->info
;
352 const char *buf
= FormatAndSendCommand(
353 /*is_data*/ false, info
->module
, info
->module_offset
, info
->module_arch
);
355 ParseSymbolizePCOutput(buf
, stack
);
361 bool LLVMSymbolizer::SymbolizeData(uptr addr
, DataInfo
*info
) {
362 const char *buf
= FormatAndSendCommand(
363 /*is_data*/ true, info
->module
, info
->module_offset
, info
->module_arch
);
365 ParseSymbolizeDataOutput(buf
, info
);
366 info
->start
+= (addr
- info
->module_offset
); // Add the base address.
372 const char *LLVMSymbolizer::FormatAndSendCommand(bool is_data
,
373 const char *module_name
,
377 const char *is_data_str
= is_data
? "DATA " : "";
378 if (arch
== kModuleArchUnknown
) {
379 if (internal_snprintf(buffer_
, kBufferSize
, "%s\"%s\" 0x%zx\n", is_data_str
,
381 module_offset
) >= static_cast<int>(kBufferSize
)) {
382 Report("WARNING: Command buffer too small");
386 if (internal_snprintf(buffer_
, kBufferSize
, "%s\"%s:%s\" 0x%zx\n",
387 is_data_str
, module_name
, ModuleArchToString(arch
),
388 module_offset
) >= static_cast<int>(kBufferSize
)) {
389 Report("WARNING: Command buffer too small");
393 return symbolizer_process_
->SendCommand(buffer_
);
396 SymbolizerProcess::SymbolizerProcess(const char *path
, bool use_forkpty
)
398 input_fd_(kInvalidFd
),
399 output_fd_(kInvalidFd
),
401 failed_to_start_(false),
402 reported_invalid_path_(false),
403 use_forkpty_(use_forkpty
) {
405 CHECK_NE(path_
[0], '\0');
408 static bool IsSameModule(const char* path
) {
409 if (const char* ProcessName
= GetProcessName()) {
410 if (const char* SymbolizerName
= StripModuleName(path
)) {
411 return !internal_strcmp(ProcessName
, SymbolizerName
);
417 const char *SymbolizerProcess::SendCommand(const char *command
) {
418 if (failed_to_start_
)
420 if (IsSameModule(path_
)) {
421 Report("WARNING: Symbolizer was blocked from starting itself!\n");
422 failed_to_start_
= true;
425 for (; times_restarted_
< kMaxTimesRestarted
; times_restarted_
++) {
426 // Start or restart symbolizer if we failed to send command to it.
427 if (const char *res
= SendCommandImpl(command
))
431 if (!failed_to_start_
) {
432 Report("WARNING: Failed to use and restart external symbolizer!\n");
433 failed_to_start_
= true;
438 const char *SymbolizerProcess::SendCommandImpl(const char *command
) {
439 if (input_fd_
== kInvalidFd
|| output_fd_
== kInvalidFd
)
441 if (!WriteToSymbolizer(command
, internal_strlen(command
)))
443 if (!ReadFromSymbolizer(buffer_
, kBufferSize
))
448 bool SymbolizerProcess::Restart() {
449 if (input_fd_
!= kInvalidFd
)
450 CloseFile(input_fd_
);
451 if (output_fd_
!= kInvalidFd
)
452 CloseFile(output_fd_
);
453 return StartSymbolizerSubprocess();
456 bool SymbolizerProcess::ReadFromSymbolizer(char *buffer
, uptr max_length
) {
462 bool success
= ReadFromFile(input_fd_
, buffer
+ read_len
,
463 max_length
- read_len
- 1, &just_read
);
464 // We can't read 0 bytes, as we don't expect external symbolizer to close
466 if (!success
|| just_read
== 0) {
467 Report("WARNING: Can't read from symbolizer at fd %d\n", input_fd_
);
470 read_len
+= just_read
;
471 if (ReachedEndOfOutput(buffer
, read_len
))
473 if (read_len
+ 1 == max_length
) {
474 Report("WARNING: Symbolizer buffer too small\n");
479 buffer
[read_len
] = '\0';
483 bool SymbolizerProcess::WriteToSymbolizer(const char *buffer
, uptr length
) {
487 bool success
= WriteToFile(output_fd_
, buffer
, length
, &write_len
);
488 if (!success
|| write_len
!= length
) {
489 Report("WARNING: Can't write to symbolizer at fd %d\n", output_fd_
);
495 #endif // !SANITIZER_FUCHSIA
497 } // namespace __sanitizer