1 //===-- sanitizer_symbolizer.cc -------------------------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file is shared between AddressSanitizer and ThreadSanitizer
11 // run-time libraries. See sanitizer_symbolizer.h for details.
12 //===----------------------------------------------------------------------===//
14 #include "sanitizer_common.h"
15 #include "sanitizer_placement_new.h"
16 #include "sanitizer_procmaps.h"
17 #include "sanitizer_symbolizer.h"
19 namespace __sanitizer
{
21 void AddressInfo::Clear() {
23 InternalFree(function
);
25 internal_memset(this, 0, sizeof(AddressInfo
));
28 LoadedModule::LoadedModule(const char *module_name
, uptr base_address
) {
29 full_name_
= internal_strdup(module_name
);
30 base_address_
= base_address
;
34 void LoadedModule::addAddressRange(uptr beg
, uptr end
) {
35 CHECK_LT(n_ranges_
, kMaxNumberOfAddressRanges
);
36 ranges_
[n_ranges_
].beg
= beg
;
37 ranges_
[n_ranges_
].end
= end
;
41 bool LoadedModule::containsAddress(uptr address
) const {
42 for (uptr i
= 0; i
< n_ranges_
; i
++) {
43 if (ranges_
[i
].beg
<= address
&& address
< ranges_
[i
].end
)
49 // Extracts the prefix of "str" that consists of any characters not
50 // present in "delims" string, and copies this prefix to "result", allocating
52 // Returns a pointer to "str" after skipping extracted prefix and first
54 static const char *ExtractToken(const char *str
, const char *delims
,
56 uptr prefix_len
= internal_strcspn(str
, delims
);
57 *result
= (char*)InternalAlloc(prefix_len
+ 1);
58 internal_memcpy(*result
, str
, prefix_len
);
59 (*result
)[prefix_len
] = '\0';
60 const char *prefix_end
= str
+ prefix_len
;
61 if (*prefix_end
!= '\0') prefix_end
++;
65 // Same as ExtractToken, but converts extracted token to integer.
66 static const char *ExtractInt(const char *str
, const char *delims
,
69 const char *ret
= ExtractToken(str
, delims
, &buff
);
71 *result
= (int)internal_atoll(buff
);
77 static const char *ExtractUptr(const char *str
, const char *delims
,
80 const char *ret
= ExtractToken(str
, delims
, &buff
);
82 *result
= (uptr
)internal_atoll(buff
);
88 // ExternalSymbolizer encapsulates communication between the tool and
89 // external symbolizer program, running in a different subprocess,
90 // For now we assume the following protocol:
91 // For each request of the form
92 // <module_name> <module_offset>
93 // passed to STDIN, external symbolizer prints to STDOUT response:
95 // <file_name>:<line_number>:<column_number>
97 // <file_name>:<line_number>:<column_number>
100 class ExternalSymbolizer
{
102 ExternalSymbolizer(const char *path
, int input_fd
, int output_fd
)
105 output_fd_(output_fd
),
106 times_restarted_(0) {
108 CHECK_NE(input_fd_
, kInvalidFd
);
109 CHECK_NE(output_fd_
, kInvalidFd
);
112 char *SendCommand(bool is_data
, const char *module_name
, uptr module_offset
) {
114 internal_snprintf(buffer_
, kBufferSize
, "%s%s 0x%zx\n",
115 is_data
? "DATA " : "", module_name
, module_offset
);
116 if (!writeToSymbolizer(buffer_
, internal_strlen(buffer_
)))
118 if (!readFromSymbolizer(buffer_
, kBufferSize
))
124 if (times_restarted_
>= kMaxTimesRestarted
) return false;
126 internal_close(input_fd_
);
127 internal_close(output_fd_
);
128 return StartSymbolizerSubprocess(path_
, &input_fd_
, &output_fd_
);
132 bool readFromSymbolizer(char *buffer
, uptr max_length
) {
137 uptr just_read
= internal_read(input_fd_
, buffer
+ read_len
,
138 max_length
- read_len
);
139 // We can't read 0 bytes, as we don't expect external symbolizer to close
141 if (just_read
== 0 || just_read
== (uptr
)-1) {
142 Report("WARNING: Can't read from symbolizer at fd %d\n", input_fd_
);
145 read_len
+= just_read
;
146 // Empty line marks the end of symbolizer output.
147 if (read_len
>= 2 && buffer
[read_len
- 1] == '\n' &&
148 buffer
[read_len
- 2] == '\n') {
155 bool writeToSymbolizer(const char *buffer
, uptr length
) {
158 uptr write_len
= internal_write(output_fd_
, buffer
, length
);
159 if (write_len
== 0 || write_len
== (uptr
)-1) {
160 Report("WARNING: Can't write to symbolizer at fd %d\n", output_fd_
);
170 static const uptr kBufferSize
= 16 * 1024;
171 char buffer_
[kBufferSize
];
173 static const uptr kMaxTimesRestarted
= 5;
174 uptr times_restarted_
;
177 static LowLevelAllocator symbolizer_allocator
; // Linker initialized.
179 #if SANITIZER_SUPPORTS_WEAK_HOOKS
181 SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE
182 bool __sanitizer_symbolize_code(const char *ModuleName
, u64 ModuleOffset
,
183 char *Buffer
, int MaxLength
);
184 SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE
185 bool __sanitizer_symbolize_data(const char *ModuleName
, u64 ModuleOffset
,
186 char *Buffer
, int MaxLength
);
189 class InternalSymbolizer
{
191 typedef bool (*SanitizerSymbolizeFn
)(const char*, u64
, char*, int);
192 static InternalSymbolizer
*get() {
193 if (__sanitizer_symbolize_code
!= 0 &&
194 __sanitizer_symbolize_data
!= 0) {
195 void *mem
= symbolizer_allocator
.Allocate(sizeof(InternalSymbolizer
));
196 return new(mem
) InternalSymbolizer();
200 char *SendCommand(bool is_data
, const char *module_name
, uptr module_offset
) {
201 SanitizerSymbolizeFn symbolize_fn
= is_data
? __sanitizer_symbolize_data
202 : __sanitizer_symbolize_code
;
203 if (symbolize_fn(module_name
, module_offset
, buffer_
, kBufferSize
))
209 InternalSymbolizer() { }
211 static const int kBufferSize
= 16 * 1024;
212 char buffer_
[kBufferSize
];
214 #else // SANITIZER_SUPPORTS_WEAK_HOOKS
216 class InternalSymbolizer
{
218 static InternalSymbolizer
*get() { return 0; }
219 char *SendCommand(bool is_data
, const char *module_name
, uptr module_offset
) {
224 #endif // SANITIZER_SUPPORTS_WEAK_HOOKS
228 uptr
SymbolizeCode(uptr addr
, AddressInfo
*frames
, uptr max_frames
) {
231 LoadedModule
*module
= FindModuleForAddress(addr
);
234 const char *module_name
= module
->full_name();
235 uptr module_offset
= addr
- module
->base_address();
236 const char *str
= SendCommand(false, module_name
, module_offset
);
238 // External symbolizer was not initialized or failed. Fill only data
239 // about module name and offset.
240 AddressInfo
*info
= &frames
[0];
242 info
->FillAddressAndModuleInfo(addr
, module_name
, module_offset
);
246 for (frame_id
= 0; frame_id
< max_frames
; frame_id
++) {
247 AddressInfo
*info
= &frames
[frame_id
];
248 char *function_name
= 0;
249 str
= ExtractToken(str
, "\n", &function_name
);
250 CHECK(function_name
);
251 if (function_name
[0] == '\0') {
252 // There are no more frames.
256 info
->FillAddressAndModuleInfo(addr
, module_name
, module_offset
);
257 info
->function
= function_name
;
258 // Parse <file>:<line>:<column> buffer.
259 char *file_line_info
= 0;
260 str
= ExtractToken(str
, "\n", &file_line_info
);
261 CHECK(file_line_info
);
262 const char *line_info
= ExtractToken(file_line_info
, ":", &info
->file
);
263 line_info
= ExtractInt(line_info
, ":", &info
->line
);
264 line_info
= ExtractInt(line_info
, "", &info
->column
);
265 InternalFree(file_line_info
);
267 // Functions and filenames can be "??", in which case we write 0
268 // to address info to mark that names are unknown.
269 if (0 == internal_strcmp(info
->function
, "??")) {
270 InternalFree(info
->function
);
273 if (0 == internal_strcmp(info
->file
, "??")) {
274 InternalFree(info
->file
);
279 // Make sure we return at least one frame.
280 AddressInfo
*info
= &frames
[0];
282 info
->FillAddressAndModuleInfo(addr
, module_name
, module_offset
);
288 bool SymbolizeData(uptr addr
, DataInfo
*info
) {
289 LoadedModule
*module
= FindModuleForAddress(addr
);
292 const char *module_name
= module
->full_name();
293 uptr module_offset
= addr
- module
->base_address();
294 internal_memset(info
, 0, sizeof(*info
));
295 info
->address
= addr
;
296 info
->module
= internal_strdup(module_name
);
297 info
->module_offset
= module_offset
;
298 const char *str
= SendCommand(true, module_name
, module_offset
);
301 str
= ExtractToken(str
, "\n", &info
->name
);
302 str
= ExtractUptr(str
, " ", &info
->start
);
303 str
= ExtractUptr(str
, "\n", &info
->size
);
304 info
->start
+= module
->base_address();
308 bool InitializeExternalSymbolizer(const char *path_to_symbolizer
) {
309 int input_fd
, output_fd
;
310 if (!StartSymbolizerSubprocess(path_to_symbolizer
, &input_fd
, &output_fd
))
312 void *mem
= symbolizer_allocator
.Allocate(sizeof(ExternalSymbolizer
));
313 external_symbolizer_
= new(mem
) ExternalSymbolizer(path_to_symbolizer
,
314 input_fd
, output_fd
);
318 bool IsSymbolizerAvailable() {
319 if (internal_symbolizer_
== 0)
320 internal_symbolizer_
= InternalSymbolizer::get();
321 return internal_symbolizer_
|| external_symbolizer_
;
325 char *SendCommand(bool is_data
, const char *module_name
, uptr module_offset
) {
326 // First, try to use internal symbolizer.
327 if (internal_symbolizer_
== 0) {
328 internal_symbolizer_
= InternalSymbolizer::get();
330 if (internal_symbolizer_
) {
331 return internal_symbolizer_
->SendCommand(is_data
, module_name
,
334 // Otherwise, fall back to external symbolizer.
335 if (external_symbolizer_
== 0) {
336 ReportExternalSymbolizerError(
337 "WARNING: Trying to symbolize code, but external "
338 "symbolizer is not initialized!\n");
342 char *reply
= external_symbolizer_
->SendCommand(is_data
, module_name
,
346 // Try to restart symbolizer subprocess. If we don't succeed, forget
347 // about it and don't try to use it later.
348 if (!external_symbolizer_
->Restart()) {
349 ReportExternalSymbolizerError(
350 "WARNING: Failed to use and restart external symbolizer!\n");
351 external_symbolizer_
= 0;
357 LoadedModule
*FindModuleForAddress(uptr address
) {
359 modules_
= (LoadedModule
*)(symbolizer_allocator
.Allocate(
360 kMaxNumberOfModuleContexts
* sizeof(LoadedModule
)));
362 n_modules_
= GetListOfModules(modules_
, kMaxNumberOfModuleContexts
);
363 CHECK_GT(n_modules_
, 0);
364 CHECK_LT(n_modules_
, kMaxNumberOfModuleContexts
);
366 for (uptr i
= 0; i
< n_modules_
; i
++) {
367 if (modules_
[i
].containsAddress(address
)) {
373 void ReportExternalSymbolizerError(const char *msg
) {
374 // Don't use atomics here for now, as SymbolizeCode can't be called
375 // from multiple threads anyway.
376 static bool reported
;
383 // 16K loaded modules should be enough for everyone.
384 static const uptr kMaxNumberOfModuleContexts
= 1 << 14;
385 LoadedModule
*modules_
; // Array of module descriptions is leaked.
388 ExternalSymbolizer
*external_symbolizer_
; // Leaked.
389 InternalSymbolizer
*internal_symbolizer_
; // Leaked.
392 static Symbolizer symbolizer
; // Linker initialized.
394 uptr
SymbolizeCode(uptr address
, AddressInfo
*frames
, uptr max_frames
) {
395 return symbolizer
.SymbolizeCode(address
, frames
, max_frames
);
398 bool SymbolizeData(uptr address
, DataInfo
*info
) {
399 return symbolizer
.SymbolizeData(address
, info
);
402 bool InitializeExternalSymbolizer(const char *path_to_symbolizer
) {
403 return symbolizer
.InitializeExternalSymbolizer(path_to_symbolizer
);
406 bool IsSymbolizerAvailable() {
407 return symbolizer
.IsSymbolizerAvailable();
410 } // namespace __sanitizer