1 //===-- sanitizer_symbolizer_libcdep.cc -----------------------------------===//
3 // This file is distributed under the University of Illinois Open Source
4 // License. See LICENSE.TXT for details.
6 //===----------------------------------------------------------------------===//
8 // This file is shared between AddressSanitizer and ThreadSanitizer
10 //===----------------------------------------------------------------------===//
12 #include "sanitizer_allocator_internal.h"
13 #include "sanitizer_internal_defs.h"
14 #include "sanitizer_symbolizer_internal.h"
16 namespace __sanitizer
{
18 const char *ExtractToken(const char *str
, const char *delims
, char **result
) {
19 uptr prefix_len
= internal_strcspn(str
, delims
);
20 *result
= (char*)InternalAlloc(prefix_len
+ 1);
21 internal_memcpy(*result
, str
, prefix_len
);
22 (*result
)[prefix_len
] = '\0';
23 const char *prefix_end
= str
+ prefix_len
;
24 if (*prefix_end
!= '\0') prefix_end
++;
28 const char *ExtractInt(const char *str
, const char *delims
, int *result
) {
30 const char *ret
= ExtractToken(str
, delims
, &buff
);
32 *result
= (int)internal_atoll(buff
);
38 const char *ExtractUptr(const char *str
, const char *delims
, uptr
*result
) {
40 const char *ret
= ExtractToken(str
, delims
, &buff
);
42 *result
= (uptr
)internal_atoll(buff
);
48 const char *ExtractTokenUpToDelimiter(const char *str
, const char *delimiter
,
50 const char *found_delimiter
= internal_strstr(str
, delimiter
);
52 found_delimiter
? found_delimiter
- str
: internal_strlen(str
);
53 *result
= (char *)InternalAlloc(prefix_len
+ 1);
54 internal_memcpy(*result
, str
, prefix_len
);
55 (*result
)[prefix_len
] = '\0';
56 const char *prefix_end
= str
+ prefix_len
;
57 if (*prefix_end
!= '\0') prefix_end
+= internal_strlen(delimiter
);
61 SymbolizedStack
*Symbolizer::SymbolizePC(uptr addr
) {
62 BlockingMutexLock
l(&mu_
);
63 const char *module_name
;
65 SymbolizedStack
*res
= SymbolizedStack::New(addr
);
66 if (!FindModuleNameAndOffsetForAddress(addr
, &module_name
, &module_offset
))
68 // Always fill data about module name and offset.
69 res
->info
.FillModuleInfo(module_name
, module_offset
);
70 for (auto &tool
: tools_
) {
71 SymbolizerScope
sym_scope(this);
72 if (tool
.SymbolizePC(addr
, res
)) {
79 bool Symbolizer::SymbolizeData(uptr addr
, DataInfo
*info
) {
80 BlockingMutexLock
l(&mu_
);
81 const char *module_name
;
83 if (!FindModuleNameAndOffsetForAddress(addr
, &module_name
, &module_offset
))
86 info
->module
= internal_strdup(module_name
);
87 info
->module_offset
= module_offset
;
88 for (auto &tool
: tools_
) {
89 SymbolizerScope
sym_scope(this);
90 if (tool
.SymbolizeData(addr
, info
)) {
97 bool Symbolizer::GetModuleNameAndOffsetForPC(uptr pc
, const char **module_name
,
98 uptr
*module_address
) {
99 BlockingMutexLock
l(&mu_
);
100 const char *internal_module_name
= nullptr;
101 if (!FindModuleNameAndOffsetForAddress(pc
, &internal_module_name
,
106 *module_name
= module_names_
.GetOwnedCopy(internal_module_name
);
110 void Symbolizer::Flush() {
111 BlockingMutexLock
l(&mu_
);
112 for (auto &tool
: tools_
) {
113 SymbolizerScope
sym_scope(this);
118 const char *Symbolizer::Demangle(const char *name
) {
119 BlockingMutexLock
l(&mu_
);
120 for (auto &tool
: tools_
) {
121 SymbolizerScope
sym_scope(this);
122 if (const char *demangled
= tool
.Demangle(name
))
125 return PlatformDemangle(name
);
128 void Symbolizer::PrepareForSandboxing() {
129 BlockingMutexLock
l(&mu_
);
130 PlatformPrepareForSandboxing();
133 bool Symbolizer::FindModuleNameAndOffsetForAddress(uptr address
,
134 const char **module_name
,
135 uptr
*module_offset
) {
136 const LoadedModule
*module
= FindModuleForAddress(address
);
137 if (module
== nullptr)
139 *module_name
= module
->full_name();
140 *module_offset
= address
- module
->base_address();
144 const LoadedModule
*Symbolizer::FindModuleForAddress(uptr address
) {
145 bool modules_were_reloaded
= false;
146 if (!modules_fresh_
) {
148 RAW_CHECK(modules_
.size() > 0);
149 modules_fresh_
= true;
150 modules_were_reloaded
= true;
152 for (uptr i
= 0; i
< modules_
.size(); i
++) {
153 if (modules_
[i
].containsAddress(address
)) {
157 // Reload the modules and look up again, if we haven't tried it yet.
158 if (!modules_were_reloaded
) {
159 // FIXME: set modules_fresh_ from dlopen()/dlclose() interceptors.
160 // It's too aggressive to reload the list of modules each time we fail
161 // to find a module for a given address.
162 modules_fresh_
= false;
163 return FindModuleForAddress(address
);
168 Symbolizer
*Symbolizer::GetOrInit() {
169 SpinMutexLock
l(&init_mu_
);
172 symbolizer_
= PlatformInit();
177 // For now we assume the following protocol:
178 // For each request of the form
179 // <module_name> <module_offset>
180 // passed to STDIN, external symbolizer prints to STDOUT response:
182 // <file_name>:<line_number>:<column_number>
184 // <file_name>:<line_number>:<column_number>
187 class LLVMSymbolizerProcess
: public SymbolizerProcess
{
189 explicit LLVMSymbolizerProcess(const char *path
) : SymbolizerProcess(path
) {}
192 bool ReachedEndOfOutput(const char *buffer
, uptr length
) const override
{
193 // Empty line marks the end of llvm-symbolizer output.
194 return length
>= 2 && buffer
[length
- 1] == '\n' &&
195 buffer
[length
- 2] == '\n';
198 void GetArgV(const char *path_to_binary
,
199 const char *(&argv
)[kArgVMax
]) const override
{
200 #if defined(__x86_64h__)
201 const char* const kSymbolizerArch
= "--default-arch=x86_64h";
202 #elif defined(__x86_64__)
203 const char* const kSymbolizerArch
= "--default-arch=x86_64";
204 #elif defined(__i386__)
205 const char* const kSymbolizerArch
= "--default-arch=i386";
206 #elif defined(__aarch64__)
207 const char* const kSymbolizerArch
= "--default-arch=arm64";
208 #elif defined(__arm__)
209 const char* const kSymbolizerArch
= "--default-arch=arm";
210 #elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
211 const char* const kSymbolizerArch
= "--default-arch=powerpc64";
212 #elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
213 const char* const kSymbolizerArch
= "--default-arch=powerpc64le";
214 #elif defined(__s390x__)
215 const char* const kSymbolizerArch
= "--default-arch=s390x";
216 #elif defined(__s390__)
217 const char* const kSymbolizerArch
= "--default-arch=s390";
219 const char* const kSymbolizerArch
= "--default-arch=unknown";
222 const char *const inline_flag
= common_flags()->symbolize_inline_frames
224 : "--inlining=false";
226 argv
[i
++] = path_to_binary
;
227 argv
[i
++] = inline_flag
;
228 argv
[i
++] = kSymbolizerArch
;
233 LLVMSymbolizer::LLVMSymbolizer(const char *path
, LowLevelAllocator
*allocator
)
234 : symbolizer_process_(new(*allocator
) LLVMSymbolizerProcess(path
)) {}
236 // Parse a <file>:<line>[:<column>] buffer. The file path may contain colons on
237 // Windows, so extract tokens from the right hand side first. The column info is
239 static const char *ParseFileLineInfo(AddressInfo
*info
, const char *str
) {
240 char *file_line_info
= 0;
241 str
= ExtractToken(str
, "\n", &file_line_info
);
242 CHECK(file_line_info
);
243 // Parse the last :<int>, which must be there.
244 char *last_colon
= internal_strrchr(file_line_info
, ':');
246 int line_or_column
= internal_atoll(last_colon
+ 1);
247 // Truncate the string at the last colon and find the next-to-last colon.
249 last_colon
= internal_strrchr(file_line_info
, ':');
250 if (last_colon
&& IsDigit(last_colon
[1])) {
251 // If the second-to-last colon is followed by a digit, it must be the line
252 // number, and the previous parsed number was a column.
253 info
->line
= internal_atoll(last_colon
+ 1);
254 info
->column
= line_or_column
;
257 // Otherwise, we have line info but no column info.
258 info
->line
= line_or_column
;
261 ExtractToken(file_line_info
, "", &info
->file
);
262 InternalFree(file_line_info
);
266 // Parses one or more two-line strings in the following format:
268 // <file_name>:<line_number>[:<column_number>]
269 // Used by LLVMSymbolizer, Addr2LinePool and InternalSymbolizer, since all of
270 // them use the same output format.
271 void ParseSymbolizePCOutput(const char *str
, SymbolizedStack
*res
) {
272 bool top_frame
= true;
273 SymbolizedStack
*last
= res
;
275 char *function_name
= 0;
276 str
= ExtractToken(str
, "\n", &function_name
);
277 CHECK(function_name
);
278 if (function_name
[0] == '\0') {
279 // There are no more frames.
280 InternalFree(function_name
);
283 SymbolizedStack
*cur
;
288 cur
= SymbolizedStack::New(res
->info
.address
);
289 cur
->info
.FillModuleInfo(res
->info
.module
, res
->info
.module_offset
);
294 AddressInfo
*info
= &cur
->info
;
295 info
->function
= function_name
;
296 str
= ParseFileLineInfo(info
, str
);
298 // Functions and filenames can be "??", in which case we write 0
299 // to address info to mark that names are unknown.
300 if (0 == internal_strcmp(info
->function
, "??")) {
301 InternalFree(info
->function
);
304 if (0 == internal_strcmp(info
->file
, "??")) {
305 InternalFree(info
->file
);
311 // Parses a two-line string in the following format:
313 // <start_address> <size>
314 // Used by LLVMSymbolizer and InternalSymbolizer.
315 void ParseSymbolizeDataOutput(const char *str
, DataInfo
*info
) {
316 str
= ExtractToken(str
, "\n", &info
->name
);
317 str
= ExtractUptr(str
, " ", &info
->start
);
318 str
= ExtractUptr(str
, "\n", &info
->size
);
321 bool LLVMSymbolizer::SymbolizePC(uptr addr
, SymbolizedStack
*stack
) {
322 if (const char *buf
= SendCommand(/*is_data*/ false, stack
->info
.module
,
323 stack
->info
.module_offset
)) {
324 ParseSymbolizePCOutput(buf
, stack
);
330 bool LLVMSymbolizer::SymbolizeData(uptr addr
, DataInfo
*info
) {
331 if (const char *buf
=
332 SendCommand(/*is_data*/ true, info
->module
, info
->module_offset
)) {
333 ParseSymbolizeDataOutput(buf
, info
);
334 info
->start
+= (addr
- info
->module_offset
); // Add the base address.
340 const char *LLVMSymbolizer::SendCommand(bool is_data
, const char *module_name
,
341 uptr module_offset
) {
343 internal_snprintf(buffer_
, kBufferSize
, "%s\"%s\" 0x%zx\n",
344 is_data
? "DATA " : "", module_name
, module_offset
);
345 return symbolizer_process_
->SendCommand(buffer_
);
348 SymbolizerProcess::SymbolizerProcess(const char *path
, bool use_forkpty
)
350 input_fd_(kInvalidFd
),
351 output_fd_(kInvalidFd
),
353 failed_to_start_(false),
354 reported_invalid_path_(false),
355 use_forkpty_(use_forkpty
) {
357 CHECK_NE(path_
[0], '\0');
360 const char *SymbolizerProcess::SendCommand(const char *command
) {
361 for (; times_restarted_
< kMaxTimesRestarted
; times_restarted_
++) {
362 // Start or restart symbolizer if we failed to send command to it.
363 if (const char *res
= SendCommandImpl(command
))
367 if (!failed_to_start_
) {
368 Report("WARNING: Failed to use and restart external symbolizer!\n");
369 failed_to_start_
= true;
374 const char *SymbolizerProcess::SendCommandImpl(const char *command
) {
375 if (input_fd_
== kInvalidFd
|| output_fd_
== kInvalidFd
)
377 if (!WriteToSymbolizer(command
, internal_strlen(command
)))
379 if (!ReadFromSymbolizer(buffer_
, kBufferSize
))
384 bool SymbolizerProcess::Restart() {
385 if (input_fd_
!= kInvalidFd
)
386 CloseFile(input_fd_
);
387 if (output_fd_
!= kInvalidFd
)
388 CloseFile(output_fd_
);
389 return StartSymbolizerSubprocess();
392 bool SymbolizerProcess::ReadFromSymbolizer(char *buffer
, uptr max_length
) {
398 bool success
= ReadFromFile(input_fd_
, buffer
+ read_len
,
399 max_length
- read_len
- 1, &just_read
);
400 // We can't read 0 bytes, as we don't expect external symbolizer to close
402 if (!success
|| just_read
== 0) {
403 Report("WARNING: Can't read from symbolizer at fd %d\n", input_fd_
);
406 read_len
+= just_read
;
407 if (ReachedEndOfOutput(buffer
, read_len
))
410 buffer
[read_len
] = '\0';
414 bool SymbolizerProcess::WriteToSymbolizer(const char *buffer
, uptr length
) {
418 bool success
= WriteToFile(output_fd_
, buffer
, length
, &write_len
);
419 if (!success
|| write_len
!= length
) {
420 Report("WARNING: Can't write to symbolizer at fd %d\n", output_fd_
);
426 } // namespace __sanitizer