1 //===-- sanitizer_symbolizer_libcdep.cpp ----------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file is shared between AddressSanitizer and ThreadSanitizer
10 // run-time libraries.
11 //===----------------------------------------------------------------------===//
13 #include "sanitizer_allocator_internal.h"
14 #include "sanitizer_internal_defs.h"
15 #include "sanitizer_symbolizer_internal.h"
17 namespace __sanitizer
{
19 Symbolizer
*Symbolizer::GetOrInit() {
20 SpinMutexLock
l(&init_mu_
);
23 symbolizer_
= PlatformInit();
28 // See sanitizer_symbolizer_markup.cpp.
29 #if !SANITIZER_SYMBOLIZER_MARKUP
31 const char *ExtractToken(const char *str
, const char *delims
, char **result
) {
32 uptr prefix_len
= internal_strcspn(str
, delims
);
33 *result
= (char*)InternalAlloc(prefix_len
+ 1);
34 internal_memcpy(*result
, str
, prefix_len
);
35 (*result
)[prefix_len
] = '\0';
36 const char *prefix_end
= str
+ prefix_len
;
37 if (*prefix_end
!= '\0') prefix_end
++;
41 const char *ExtractInt(const char *str
, const char *delims
, int *result
) {
43 const char *ret
= ExtractToken(str
, delims
, &buff
);
45 *result
= (int)internal_atoll(buff
);
51 const char *ExtractUptr(const char *str
, const char *delims
, uptr
*result
) {
53 const char *ret
= ExtractToken(str
, delims
, &buff
);
55 *result
= (uptr
)internal_atoll(buff
);
61 const char *ExtractSptr(const char *str
, const char *delims
, sptr
*result
) {
63 const char *ret
= ExtractToken(str
, delims
, &buff
);
65 *result
= (sptr
)internal_atoll(buff
);
71 const char *ExtractTokenUpToDelimiter(const char *str
, const char *delimiter
,
73 const char *found_delimiter
= internal_strstr(str
, delimiter
);
75 found_delimiter
? found_delimiter
- str
: internal_strlen(str
);
76 *result
= (char *)InternalAlloc(prefix_len
+ 1);
77 internal_memcpy(*result
, str
, prefix_len
);
78 (*result
)[prefix_len
] = '\0';
79 const char *prefix_end
= str
+ prefix_len
;
80 if (*prefix_end
!= '\0') prefix_end
+= internal_strlen(delimiter
);
84 SymbolizedStack
*Symbolizer::SymbolizePC(uptr addr
) {
85 BlockingMutexLock
l(&mu_
);
86 const char *module_name
= nullptr;
89 SymbolizedStack
*res
= SymbolizedStack::New(addr
);
90 if (!FindModuleNameAndOffsetForAddress(addr
, &module_name
, &module_offset
,
93 // Always fill data about module name and offset.
94 res
->info
.FillModuleInfo(module_name
, module_offset
, arch
);
95 for (auto &tool
: tools_
) {
96 SymbolizerScope
sym_scope(this);
97 if (tool
.SymbolizePC(addr
, res
)) {
104 bool Symbolizer::SymbolizeData(uptr addr
, DataInfo
*info
) {
105 BlockingMutexLock
l(&mu_
);
106 const char *module_name
= nullptr;
109 if (!FindModuleNameAndOffsetForAddress(addr
, &module_name
, &module_offset
,
113 info
->module
= internal_strdup(module_name
);
114 info
->module_offset
= module_offset
;
115 info
->module_arch
= arch
;
116 for (auto &tool
: tools_
) {
117 SymbolizerScope
sym_scope(this);
118 if (tool
.SymbolizeData(addr
, info
)) {
125 bool Symbolizer::SymbolizeFrame(uptr addr
, FrameInfo
*info
) {
126 BlockingMutexLock
l(&mu_
);
127 const char *module_name
= nullptr;
128 if (!FindModuleNameAndOffsetForAddress(
129 addr
, &module_name
, &info
->module_offset
, &info
->module_arch
))
131 info
->module
= internal_strdup(module_name
);
132 for (auto &tool
: tools_
) {
133 SymbolizerScope
sym_scope(this);
134 if (tool
.SymbolizeFrame(addr
, info
)) {
141 bool Symbolizer::GetModuleNameAndOffsetForPC(uptr pc
, const char **module_name
,
142 uptr
*module_address
) {
143 BlockingMutexLock
l(&mu_
);
144 const char *internal_module_name
= nullptr;
146 if (!FindModuleNameAndOffsetForAddress(pc
, &internal_module_name
,
147 module_address
, &arch
))
151 *module_name
= module_names_
.GetOwnedCopy(internal_module_name
);
155 void Symbolizer::Flush() {
156 BlockingMutexLock
l(&mu_
);
157 for (auto &tool
: tools_
) {
158 SymbolizerScope
sym_scope(this);
163 const char *Symbolizer::Demangle(const char *name
) {
164 BlockingMutexLock
l(&mu_
);
165 for (auto &tool
: tools_
) {
166 SymbolizerScope
sym_scope(this);
167 if (const char *demangled
= tool
.Demangle(name
))
170 return PlatformDemangle(name
);
173 bool Symbolizer::FindModuleNameAndOffsetForAddress(uptr address
,
174 const char **module_name
,
176 ModuleArch
*module_arch
) {
177 const LoadedModule
*module
= FindModuleForAddress(address
);
180 *module_name
= module
->full_name();
181 *module_offset
= address
- module
->base_address();
182 *module_arch
= module
->arch();
186 void Symbolizer::RefreshModules() {
188 fallback_modules_
.fallbackInit();
189 RAW_CHECK(modules_
.size() > 0);
190 modules_fresh_
= true;
193 static const LoadedModule
*SearchForModule(const ListOfModules
&modules
,
195 for (uptr i
= 0; i
< modules
.size(); i
++) {
196 if (modules
[i
].containsAddress(address
)) {
203 const LoadedModule
*Symbolizer::FindModuleForAddress(uptr address
) {
204 bool modules_were_reloaded
= false;
205 if (!modules_fresh_
) {
207 modules_were_reloaded
= true;
209 const LoadedModule
*module
= SearchForModule(modules_
, address
);
210 if (module
) return module
;
212 // dlopen/dlclose interceptors invalidate the module list, but when
213 // interception is disabled, we need to retry if the lookup fails in
214 // case the module list changed.
215 #if !SANITIZER_INTERCEPT_DLOPEN_DLCLOSE
216 if (!modules_were_reloaded
) {
218 module
= SearchForModule(modules_
, address
);
219 if (module
) return module
;
223 if (fallback_modules_
.size()) {
224 module
= SearchForModule(fallback_modules_
, address
);
229 // For now we assume the following protocol:
230 // For each request of the form
231 // <module_name> <module_offset>
232 // passed to STDIN, external symbolizer prints to STDOUT response:
234 // <file_name>:<line_number>:<column_number>
236 // <file_name>:<line_number>:<column_number>
239 class LLVMSymbolizerProcess
: public SymbolizerProcess
{
241 explicit LLVMSymbolizerProcess(const char *path
)
242 : SymbolizerProcess(path
, /*use_posix_spawn=*/SANITIZER_MAC
) {}
245 bool ReachedEndOfOutput(const char *buffer
, uptr length
) const override
{
246 // Empty line marks the end of llvm-symbolizer output.
247 return length
>= 2 && buffer
[length
- 1] == '\n' &&
248 buffer
[length
- 2] == '\n';
251 // When adding a new architecture, don't forget to also update
252 // script/asan_symbolize.py and sanitizer_common.h.
253 void GetArgV(const char *path_to_binary
,
254 const char *(&argv
)[kArgVMax
]) const override
{
255 #if defined(__x86_64h__)
256 const char* const kSymbolizerArch
= "--default-arch=x86_64h";
257 #elif defined(__x86_64__)
258 const char* const kSymbolizerArch
= "--default-arch=x86_64";
259 #elif defined(__i386__)
260 const char* const kSymbolizerArch
= "--default-arch=i386";
261 #elif defined(__aarch64__)
262 const char* const kSymbolizerArch
= "--default-arch=arm64";
263 #elif defined(__arm__)
264 const char* const kSymbolizerArch
= "--default-arch=arm";
265 #elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
266 const char* const kSymbolizerArch
= "--default-arch=powerpc64";
267 #elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
268 const char* const kSymbolizerArch
= "--default-arch=powerpc64le";
269 #elif defined(__s390x__)
270 const char* const kSymbolizerArch
= "--default-arch=s390x";
271 #elif defined(__s390__)
272 const char* const kSymbolizerArch
= "--default-arch=s390";
274 const char* const kSymbolizerArch
= "--default-arch=unknown";
277 const char *const inline_flag
= common_flags()->symbolize_inline_frames
279 : "--inlining=false";
281 argv
[i
++] = path_to_binary
;
282 argv
[i
++] = inline_flag
;
283 argv
[i
++] = kSymbolizerArch
;
288 LLVMSymbolizer::LLVMSymbolizer(const char *path
, LowLevelAllocator
*allocator
)
289 : symbolizer_process_(new(*allocator
) LLVMSymbolizerProcess(path
)) {}
291 // Parse a <file>:<line>[:<column>] buffer. The file path may contain colons on
292 // Windows, so extract tokens from the right hand side first. The column info is
294 static const char *ParseFileLineInfo(AddressInfo
*info
, const char *str
) {
295 char *file_line_info
= nullptr;
296 str
= ExtractToken(str
, "\n", &file_line_info
);
297 CHECK(file_line_info
);
299 if (uptr size
= internal_strlen(file_line_info
)) {
300 char *back
= file_line_info
+ size
- 1;
301 for (int i
= 0; i
< 2; ++i
) {
302 while (back
> file_line_info
&& IsDigit(*back
)) --back
;
303 if (*back
!= ':' || !IsDigit(back
[1])) break;
304 info
->column
= info
->line
;
305 info
->line
= internal_atoll(back
+ 1);
306 // Truncate the string at the colon to keep only filename.
310 ExtractToken(file_line_info
, "", &info
->file
);
313 InternalFree(file_line_info
);
317 // Parses one or more two-line strings in the following format:
319 // <file_name>:<line_number>[:<column_number>]
320 // Used by LLVMSymbolizer, Addr2LinePool and InternalSymbolizer, since all of
321 // them use the same output format.
322 void ParseSymbolizePCOutput(const char *str
, SymbolizedStack
*res
) {
323 bool top_frame
= true;
324 SymbolizedStack
*last
= res
;
326 char *function_name
= nullptr;
327 str
= ExtractToken(str
, "\n", &function_name
);
328 CHECK(function_name
);
329 if (function_name
[0] == '\0') {
330 // There are no more frames.
331 InternalFree(function_name
);
334 SymbolizedStack
*cur
;
339 cur
= SymbolizedStack::New(res
->info
.address
);
340 cur
->info
.FillModuleInfo(res
->info
.module
, res
->info
.module_offset
,
341 res
->info
.module_arch
);
346 AddressInfo
*info
= &cur
->info
;
347 info
->function
= function_name
;
348 str
= ParseFileLineInfo(info
, str
);
350 // Functions and filenames can be "??", in which case we write 0
351 // to address info to mark that names are unknown.
352 if (0 == internal_strcmp(info
->function
, "??")) {
353 InternalFree(info
->function
);
356 if (0 == internal_strcmp(info
->file
, "??")) {
357 InternalFree(info
->file
);
363 // Parses a two-line string in the following format:
365 // <start_address> <size>
366 // Used by LLVMSymbolizer and InternalSymbolizer.
367 void ParseSymbolizeDataOutput(const char *str
, DataInfo
*info
) {
368 str
= ExtractToken(str
, "\n", &info
->name
);
369 str
= ExtractUptr(str
, " ", &info
->start
);
370 str
= ExtractUptr(str
, "\n", &info
->size
);
373 static void ParseSymbolizeFrameOutput(const char *str
,
374 InternalMmapVector
<LocalInfo
> *locals
) {
375 if (internal_strncmp(str
, "??", 2) == 0)
380 str
= ExtractToken(str
, "\n", &local
.function_name
);
381 str
= ExtractToken(str
, "\n", &local
.name
);
384 str
= ParseFileLineInfo(&addr
, str
);
385 local
.decl_file
= addr
.file
;
386 local
.decl_line
= addr
.line
;
388 local
.has_frame_offset
= internal_strncmp(str
, "??", 2) != 0;
389 str
= ExtractSptr(str
, " ", &local
.frame_offset
);
391 local
.has_size
= internal_strncmp(str
, "??", 2) != 0;
392 str
= ExtractUptr(str
, " ", &local
.size
);
394 local
.has_tag_offset
= internal_strncmp(str
, "??", 2) != 0;
395 str
= ExtractUptr(str
, "\n", &local
.tag_offset
);
397 locals
->push_back(local
);
401 bool LLVMSymbolizer::SymbolizePC(uptr addr
, SymbolizedStack
*stack
) {
402 AddressInfo
*info
= &stack
->info
;
403 const char *buf
= FormatAndSendCommand(
404 "CODE", info
->module
, info
->module_offset
, info
->module_arch
);
407 ParseSymbolizePCOutput(buf
, stack
);
411 bool LLVMSymbolizer::SymbolizeData(uptr addr
, DataInfo
*info
) {
412 const char *buf
= FormatAndSendCommand(
413 "DATA", info
->module
, info
->module_offset
, info
->module_arch
);
416 ParseSymbolizeDataOutput(buf
, info
);
417 info
->start
+= (addr
- info
->module_offset
); // Add the base address.
421 bool LLVMSymbolizer::SymbolizeFrame(uptr addr
, FrameInfo
*info
) {
422 const char *buf
= FormatAndSendCommand(
423 "FRAME", info
->module
, info
->module_offset
, info
->module_arch
);
426 ParseSymbolizeFrameOutput(buf
, &info
->locals
);
430 const char *LLVMSymbolizer::FormatAndSendCommand(const char *command_prefix
,
431 const char *module_name
,
436 if (arch
== kModuleArchUnknown
)
437 size_needed
= internal_snprintf(buffer_
, kBufferSize
, "%s \"%s\" 0x%zx\n",
438 command_prefix
, module_name
, module_offset
);
440 size_needed
= internal_snprintf(buffer_
, kBufferSize
,
441 "%s \"%s:%s\" 0x%zx\n", command_prefix
,
442 module_name
, ModuleArchToString(arch
),
445 if (size_needed
>= static_cast<int>(kBufferSize
)) {
446 Report("WARNING: Command buffer too small");
450 return symbolizer_process_
->SendCommand(buffer_
);
453 SymbolizerProcess::SymbolizerProcess(const char *path
, bool use_posix_spawn
)
455 input_fd_(kInvalidFd
),
456 output_fd_(kInvalidFd
),
458 failed_to_start_(false),
459 reported_invalid_path_(false),
460 use_posix_spawn_(use_posix_spawn
) {
462 CHECK_NE(path_
[0], '\0');
465 static bool IsSameModule(const char* path
) {
466 if (const char* ProcessName
= GetProcessName()) {
467 if (const char* SymbolizerName
= StripModuleName(path
)) {
468 return !internal_strcmp(ProcessName
, SymbolizerName
);
474 const char *SymbolizerProcess::SendCommand(const char *command
) {
475 if (failed_to_start_
)
477 if (IsSameModule(path_
)) {
478 Report("WARNING: Symbolizer was blocked from starting itself!\n");
479 failed_to_start_
= true;
482 for (; times_restarted_
< kMaxTimesRestarted
; times_restarted_
++) {
483 // Start or restart symbolizer if we failed to send command to it.
484 if (const char *res
= SendCommandImpl(command
))
488 if (!failed_to_start_
) {
489 Report("WARNING: Failed to use and restart external symbolizer!\n");
490 failed_to_start_
= true;
495 const char *SymbolizerProcess::SendCommandImpl(const char *command
) {
496 if (input_fd_
== kInvalidFd
|| output_fd_
== kInvalidFd
)
498 if (!WriteToSymbolizer(command
, internal_strlen(command
)))
500 if (!ReadFromSymbolizer(buffer_
, kBufferSize
))
505 bool SymbolizerProcess::Restart() {
506 if (input_fd_
!= kInvalidFd
)
507 CloseFile(input_fd_
);
508 if (output_fd_
!= kInvalidFd
)
509 CloseFile(output_fd_
);
510 return StartSymbolizerSubprocess();
513 bool SymbolizerProcess::ReadFromSymbolizer(char *buffer
, uptr max_length
) {
519 bool success
= ReadFromFile(input_fd_
, buffer
+ read_len
,
520 max_length
- read_len
- 1, &just_read
);
521 // We can't read 0 bytes, as we don't expect external symbolizer to close
523 if (!success
|| just_read
== 0) {
524 Report("WARNING: Can't read from symbolizer at fd %d\n", input_fd_
);
527 read_len
+= just_read
;
528 if (ReachedEndOfOutput(buffer
, read_len
))
530 if (read_len
+ 1 == max_length
) {
531 Report("WARNING: Symbolizer buffer too small\n");
536 buffer
[read_len
] = '\0';
540 bool SymbolizerProcess::WriteToSymbolizer(const char *buffer
, uptr length
) {
544 bool success
= WriteToFile(output_fd_
, buffer
, length
, &write_len
);
545 if (!success
|| write_len
!= length
) {
546 Report("WARNING: Can't write to symbolizer at fd %d\n", output_fd_
);
552 #endif // !SANITIZER_SYMBOLIZER_MARKUP
554 } // namespace __sanitizer