1 //===-- sanitizer_symbolizer_libcdep.cc -----------------------------------===//
3 // This file is distributed under the University of Illinois Open Source
4 // License. See LICENSE.TXT for details.
6 //===----------------------------------------------------------------------===//
8 // This file is shared between AddressSanitizer and ThreadSanitizer
10 //===----------------------------------------------------------------------===//
12 #include "sanitizer_allocator_internal.h"
13 #include "sanitizer_internal_defs.h"
14 #include "sanitizer_symbolizer_internal.h"
16 namespace __sanitizer
{
18 const char *ExtractToken(const char *str
, const char *delims
, char **result
) {
19 uptr prefix_len
= internal_strcspn(str
, delims
);
20 *result
= (char*)InternalAlloc(prefix_len
+ 1);
21 internal_memcpy(*result
, str
, prefix_len
);
22 (*result
)[prefix_len
] = '\0';
23 const char *prefix_end
= str
+ prefix_len
;
24 if (*prefix_end
!= '\0') prefix_end
++;
28 const char *ExtractInt(const char *str
, const char *delims
, int *result
) {
30 const char *ret
= ExtractToken(str
, delims
, &buff
);
32 *result
= (int)internal_atoll(buff
);
38 const char *ExtractUptr(const char *str
, const char *delims
, uptr
*result
) {
40 const char *ret
= ExtractToken(str
, delims
, &buff
);
42 *result
= (uptr
)internal_atoll(buff
);
48 const char *ExtractTokenUpToDelimiter(const char *str
, const char *delimiter
,
50 const char *found_delimiter
= internal_strstr(str
, delimiter
);
52 found_delimiter
? found_delimiter
- str
: internal_strlen(str
);
53 *result
= (char *)InternalAlloc(prefix_len
+ 1);
54 internal_memcpy(*result
, str
, prefix_len
);
55 (*result
)[prefix_len
] = '\0';
56 const char *prefix_end
= str
+ prefix_len
;
57 if (*prefix_end
!= '\0') prefix_end
+= internal_strlen(delimiter
);
61 SymbolizedStack
*Symbolizer::SymbolizePC(uptr addr
) {
62 BlockingMutexLock
l(&mu_
);
63 const char *module_name
;
65 SymbolizedStack
*res
= SymbolizedStack::New(addr
);
66 if (!FindModuleNameAndOffsetForAddress(addr
, &module_name
, &module_offset
))
68 // Always fill data about module name and offset.
69 res
->info
.FillModuleInfo(module_name
, module_offset
);
70 for (auto iter
= Iterator(&tools_
); iter
.hasNext();) {
71 auto *tool
= iter
.next();
72 SymbolizerScope
sym_scope(this);
73 if (tool
->SymbolizePC(addr
, res
)) {
80 bool Symbolizer::SymbolizeData(uptr addr
, DataInfo
*info
) {
81 BlockingMutexLock
l(&mu_
);
82 const char *module_name
;
84 if (!FindModuleNameAndOffsetForAddress(addr
, &module_name
, &module_offset
))
87 info
->module
= internal_strdup(module_name
);
88 info
->module_offset
= module_offset
;
89 for (auto iter
= Iterator(&tools_
); iter
.hasNext();) {
90 auto *tool
= iter
.next();
91 SymbolizerScope
sym_scope(this);
92 if (tool
->SymbolizeData(addr
, info
)) {
99 bool Symbolizer::GetModuleNameAndOffsetForPC(uptr pc
, const char **module_name
,
100 uptr
*module_address
) {
101 BlockingMutexLock
l(&mu_
);
102 const char *internal_module_name
= nullptr;
103 if (!FindModuleNameAndOffsetForAddress(pc
, &internal_module_name
,
108 *module_name
= module_names_
.GetOwnedCopy(internal_module_name
);
112 void Symbolizer::Flush() {
113 BlockingMutexLock
l(&mu_
);
114 for (auto iter
= Iterator(&tools_
); iter
.hasNext();) {
115 auto *tool
= iter
.next();
116 SymbolizerScope
sym_scope(this);
121 const char *Symbolizer::Demangle(const char *name
) {
122 BlockingMutexLock
l(&mu_
);
123 for (auto iter
= Iterator(&tools_
); iter
.hasNext();) {
124 auto *tool
= iter
.next();
125 SymbolizerScope
sym_scope(this);
126 if (const char *demangled
= tool
->Demangle(name
))
129 return PlatformDemangle(name
);
132 void Symbolizer::PrepareForSandboxing() {
133 BlockingMutexLock
l(&mu_
);
134 PlatformPrepareForSandboxing();
137 bool Symbolizer::FindModuleNameAndOffsetForAddress(uptr address
,
138 const char **module_name
,
139 uptr
*module_offset
) {
140 LoadedModule
*module
= FindModuleForAddress(address
);
143 *module_name
= module
->full_name();
144 *module_offset
= address
- module
->base_address();
148 LoadedModule
*Symbolizer::FindModuleForAddress(uptr address
) {
149 bool modules_were_reloaded
= false;
150 if (!modules_fresh_
) {
151 for (uptr i
= 0; i
< n_modules_
; i
++)
154 GetListOfModules(modules_
, kMaxNumberOfModules
, /* filter */ nullptr);
155 CHECK_GT(n_modules_
, 0);
156 CHECK_LT(n_modules_
, kMaxNumberOfModules
);
157 modules_fresh_
= true;
158 modules_were_reloaded
= true;
160 for (uptr i
= 0; i
< n_modules_
; i
++) {
161 if (modules_
[i
].containsAddress(address
)) {
165 // Reload the modules and look up again, if we haven't tried it yet.
166 if (!modules_were_reloaded
) {
167 // FIXME: set modules_fresh_ from dlopen()/dlclose() interceptors.
168 // It's too aggressive to reload the list of modules each time we fail
169 // to find a module for a given address.
170 modules_fresh_
= false;
171 return FindModuleForAddress(address
);
176 Symbolizer
*Symbolizer::GetOrInit() {
177 SpinMutexLock
l(&init_mu_
);
180 symbolizer_
= PlatformInit();
185 // For now we assume the following protocol:
186 // For each request of the form
187 // <module_name> <module_offset>
188 // passed to STDIN, external symbolizer prints to STDOUT response:
190 // <file_name>:<line_number>:<column_number>
192 // <file_name>:<line_number>:<column_number>
195 class LLVMSymbolizerProcess
: public SymbolizerProcess
{
197 explicit LLVMSymbolizerProcess(const char *path
) : SymbolizerProcess(path
) {}
200 bool ReachedEndOfOutput(const char *buffer
, uptr length
) const override
{
201 // Empty line marks the end of llvm-symbolizer output.
202 return length
>= 2 && buffer
[length
- 1] == '\n' &&
203 buffer
[length
- 2] == '\n';
206 void GetArgV(const char *path_to_binary
,
207 const char *(&argv
)[kArgVMax
]) const override
{
208 #if defined(__x86_64h__)
209 const char* const kSymbolizerArch
= "--default-arch=x86_64h";
210 #elif defined(__x86_64__)
211 const char* const kSymbolizerArch
= "--default-arch=x86_64";
212 #elif defined(__i386__)
213 const char* const kSymbolizerArch
= "--default-arch=i386";
214 #elif defined(__powerpc64__) && defined(__BIG_ENDIAN__)
215 const char* const kSymbolizerArch
= "--default-arch=powerpc64";
216 #elif defined(__powerpc64__) && defined(__LITTLE_ENDIAN__)
217 const char* const kSymbolizerArch
= "--default-arch=powerpc64le";
219 const char* const kSymbolizerArch
= "--default-arch=unknown";
222 const char *const inline_flag
= common_flags()->symbolize_inline_frames
224 : "--inlining=false";
226 argv
[i
++] = path_to_binary
;
227 argv
[i
++] = inline_flag
;
228 argv
[i
++] = kSymbolizerArch
;
233 LLVMSymbolizer::LLVMSymbolizer(const char *path
, LowLevelAllocator
*allocator
)
234 : symbolizer_process_(new(*allocator
) LLVMSymbolizerProcess(path
)) {}
236 // Parse a <file>:<line>[:<column>] buffer. The file path may contain colons on
237 // Windows, so extract tokens from the right hand side first. The column info is
239 static const char *ParseFileLineInfo(AddressInfo
*info
, const char *str
) {
240 char *file_line_info
= 0;
241 str
= ExtractToken(str
, "\n", &file_line_info
);
242 CHECK(file_line_info
);
243 // Parse the last :<int>, which must be there.
244 char *last_colon
= internal_strrchr(file_line_info
, ':');
246 int line_or_column
= internal_atoll(last_colon
+ 1);
247 // Truncate the string at the last colon and find the next-to-last colon.
249 last_colon
= internal_strrchr(file_line_info
, ':');
250 if (last_colon
&& IsDigit(last_colon
[1])) {
251 // If the second-to-last colon is followed by a digit, it must be the line
252 // number, and the previous parsed number was a column.
253 info
->line
= internal_atoll(last_colon
+ 1);
254 info
->column
= line_or_column
;
257 // Otherwise, we have line info but no column info.
258 info
->line
= line_or_column
;
261 ExtractToken(file_line_info
, "", &info
->file
);
262 InternalFree(file_line_info
);
266 // Parses one or more two-line strings in the following format:
268 // <file_name>:<line_number>[:<column_number>]
269 // Used by LLVMSymbolizer, Addr2LinePool and InternalSymbolizer, since all of
270 // them use the same output format.
271 void ParseSymbolizePCOutput(const char *str
, SymbolizedStack
*res
) {
272 bool top_frame
= true;
273 SymbolizedStack
*last
= res
;
275 char *function_name
= 0;
276 str
= ExtractToken(str
, "\n", &function_name
);
277 CHECK(function_name
);
278 if (function_name
[0] == '\0') {
279 // There are no more frames.
280 InternalFree(function_name
);
283 SymbolizedStack
*cur
;
288 cur
= SymbolizedStack::New(res
->info
.address
);
289 cur
->info
.FillModuleInfo(res
->info
.module
, res
->info
.module_offset
);
294 AddressInfo
*info
= &cur
->info
;
295 info
->function
= function_name
;
296 str
= ParseFileLineInfo(info
, str
);
298 // Functions and filenames can be "??", in which case we write 0
299 // to address info to mark that names are unknown.
300 if (0 == internal_strcmp(info
->function
, "??")) {
301 InternalFree(info
->function
);
304 if (0 == internal_strcmp(info
->file
, "??")) {
305 InternalFree(info
->file
);
311 // Parses a two-line string in the following format:
313 // <start_address> <size>
314 // Used by LLVMSymbolizer and InternalSymbolizer.
315 void ParseSymbolizeDataOutput(const char *str
, DataInfo
*info
) {
316 str
= ExtractToken(str
, "\n", &info
->name
);
317 str
= ExtractUptr(str
, " ", &info
->start
);
318 str
= ExtractUptr(str
, "\n", &info
->size
);
321 bool LLVMSymbolizer::SymbolizePC(uptr addr
, SymbolizedStack
*stack
) {
322 if (const char *buf
= SendCommand(/*is_data*/ false, stack
->info
.module
,
323 stack
->info
.module_offset
)) {
324 ParseSymbolizePCOutput(buf
, stack
);
330 bool LLVMSymbolizer::SymbolizeData(uptr addr
, DataInfo
*info
) {
331 if (const char *buf
=
332 SendCommand(/*is_data*/ true, info
->module
, info
->module_offset
)) {
333 ParseSymbolizeDataOutput(buf
, info
);
334 info
->start
+= (addr
- info
->module_offset
); // Add the base address.
340 const char *LLVMSymbolizer::SendCommand(bool is_data
, const char *module_name
,
341 uptr module_offset
) {
343 internal_snprintf(buffer_
, kBufferSize
, "%s\"%s\" 0x%zx\n",
344 is_data
? "DATA " : "", module_name
, module_offset
);
345 return symbolizer_process_
->SendCommand(buffer_
);
348 SymbolizerProcess::SymbolizerProcess(const char *path
, bool use_forkpty
)
350 input_fd_(kInvalidFd
),
351 output_fd_(kInvalidFd
),
353 failed_to_start_(false),
354 reported_invalid_path_(false),
355 use_forkpty_(use_forkpty
) {
357 CHECK_NE(path_
[0], '\0');
360 const char *SymbolizerProcess::SendCommand(const char *command
) {
361 for (; times_restarted_
< kMaxTimesRestarted
; times_restarted_
++) {
362 // Start or restart symbolizer if we failed to send command to it.
363 if (const char *res
= SendCommandImpl(command
))
367 if (!failed_to_start_
) {
368 Report("WARNING: Failed to use and restart external symbolizer!\n");
369 failed_to_start_
= true;
374 const char *SymbolizerProcess::SendCommandImpl(const char *command
) {
375 if (input_fd_
== kInvalidFd
|| output_fd_
== kInvalidFd
)
377 if (!WriteToSymbolizer(command
, internal_strlen(command
)))
379 if (!ReadFromSymbolizer(buffer_
, kBufferSize
))
384 bool SymbolizerProcess::Restart() {
385 if (input_fd_
!= kInvalidFd
)
386 CloseFile(input_fd_
);
387 if (output_fd_
!= kInvalidFd
)
388 CloseFile(output_fd_
);
389 return StartSymbolizerSubprocess();
392 bool SymbolizerProcess::ReadFromSymbolizer(char *buffer
, uptr max_length
) {
398 bool success
= ReadFromFile(input_fd_
, buffer
+ read_len
,
399 max_length
- read_len
- 1, &just_read
);
400 // We can't read 0 bytes, as we don't expect external symbolizer to close
402 if (!success
|| just_read
== 0) {
403 Report("WARNING: Can't read from symbolizer at fd %d\n", input_fd_
);
406 read_len
+= just_read
;
407 if (ReachedEndOfOutput(buffer
, read_len
))
410 buffer
[read_len
] = '\0';
414 bool SymbolizerProcess::WriteToSymbolizer(const char *buffer
, uptr length
) {
418 bool success
= WriteToFile(output_fd_
, buffer
, length
, &write_len
);
419 if (!success
|| write_len
!= length
) {
420 Report("WARNING: Can't write to symbolizer at fd %d\n", output_fd_
);
426 } // namespace __sanitizer