[Sanitizer] extend internal libc with stat/fstat/lstat functions
[blocksruntime.git] / lib / sanitizer_common / sanitizer_symbolizer.cc
blob02dfbc4c45d2ebf4a437b35cec86fba9f87ad523
1 //===-- sanitizer_symbolizer.cc -------------------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file is shared between AddressSanitizer and ThreadSanitizer
11 // run-time libraries. See sanitizer_symbolizer.h for details.
12 //===----------------------------------------------------------------------===//
14 #include "sanitizer_common.h"
15 #include "sanitizer_placement_new.h"
16 #include "sanitizer_procmaps.h"
17 #include "sanitizer_symbolizer.h"
19 namespace __sanitizer {
21 void AddressInfo::Clear() {
22 InternalFree(module);
23 InternalFree(function);
24 InternalFree(file);
25 internal_memset(this, 0, sizeof(AddressInfo));
28 LoadedModule::LoadedModule(const char *module_name, uptr base_address) {
29 full_name_ = internal_strdup(module_name);
30 base_address_ = base_address;
31 n_ranges_ = 0;
34 void LoadedModule::addAddressRange(uptr beg, uptr end) {
35 CHECK_LT(n_ranges_, kMaxNumberOfAddressRanges);
36 ranges_[n_ranges_].beg = beg;
37 ranges_[n_ranges_].end = end;
38 n_ranges_++;
41 bool LoadedModule::containsAddress(uptr address) const {
42 for (uptr i = 0; i < n_ranges_; i++) {
43 if (ranges_[i].beg <= address && address < ranges_[i].end)
44 return true;
46 return false;
49 // Extracts the prefix of "str" that consists of any characters not
50 // present in "delims" string, and copies this prefix to "result", allocating
51 // space for it.
52 // Returns a pointer to "str" after skipping extracted prefix and first
53 // delimiter char.
54 static const char *ExtractToken(const char *str, const char *delims,
55 char **result) {
56 uptr prefix_len = internal_strcspn(str, delims);
57 *result = (char*)InternalAlloc(prefix_len + 1);
58 internal_memcpy(*result, str, prefix_len);
59 (*result)[prefix_len] = '\0';
60 const char *prefix_end = str + prefix_len;
61 if (*prefix_end != '\0') prefix_end++;
62 return prefix_end;
65 // Same as ExtractToken, but converts extracted token to integer.
66 static const char *ExtractInt(const char *str, const char *delims,
67 int *result) {
68 char *buff;
69 const char *ret = ExtractToken(str, delims, &buff);
70 if (buff != 0) {
71 *result = (int)internal_atoll(buff);
73 InternalFree(buff);
74 return ret;
77 static const char *ExtractUptr(const char *str, const char *delims,
78 uptr *result) {
79 char *buff;
80 const char *ret = ExtractToken(str, delims, &buff);
81 if (buff != 0) {
82 *result = (uptr)internal_atoll(buff);
84 InternalFree(buff);
85 return ret;
88 // ExternalSymbolizer encapsulates communication between the tool and
89 // external symbolizer program, running in a different subprocess,
90 // For now we assume the following protocol:
91 // For each request of the form
92 // <module_name> <module_offset>
93 // passed to STDIN, external symbolizer prints to STDOUT response:
94 // <function_name>
95 // <file_name>:<line_number>:<column_number>
96 // <function_name>
97 // <file_name>:<line_number>:<column_number>
98 // ...
99 // <empty line>
100 class ExternalSymbolizer {
101 public:
102 ExternalSymbolizer(const char *path, int input_fd, int output_fd)
103 : path_(path),
104 input_fd_(input_fd),
105 output_fd_(output_fd),
106 times_restarted_(0) {
107 CHECK(path_);
108 CHECK_NE(input_fd_, kInvalidFd);
109 CHECK_NE(output_fd_, kInvalidFd);
112 char *SendCommand(bool is_data, const char *module_name, uptr module_offset) {
113 CHECK(module_name);
114 internal_snprintf(buffer_, kBufferSize, "%s%s 0x%zx\n",
115 is_data ? "DATA " : "", module_name, module_offset);
116 if (!writeToSymbolizer(buffer_, internal_strlen(buffer_)))
117 return 0;
118 if (!readFromSymbolizer(buffer_, kBufferSize))
119 return 0;
120 return buffer_;
123 bool Restart() {
124 if (times_restarted_ >= kMaxTimesRestarted) return false;
125 times_restarted_++;
126 internal_close(input_fd_);
127 internal_close(output_fd_);
128 return StartSymbolizerSubprocess(path_, &input_fd_, &output_fd_);
131 private:
132 bool readFromSymbolizer(char *buffer, uptr max_length) {
133 if (max_length == 0)
134 return true;
135 uptr read_len = 0;
136 while (true) {
137 uptr just_read = internal_read(input_fd_, buffer + read_len,
138 max_length - read_len);
139 // We can't read 0 bytes, as we don't expect external symbolizer to close
140 // its stdout.
141 if (just_read == 0 || just_read == (uptr)-1) {
142 Report("WARNING: Can't read from symbolizer at fd %d\n", input_fd_);
143 return false;
145 read_len += just_read;
146 // Empty line marks the end of symbolizer output.
147 if (read_len >= 2 && buffer[read_len - 1] == '\n' &&
148 buffer[read_len - 2] == '\n') {
149 break;
152 return true;
155 bool writeToSymbolizer(const char *buffer, uptr length) {
156 if (length == 0)
157 return true;
158 uptr write_len = internal_write(output_fd_, buffer, length);
159 if (write_len == 0 || write_len == (uptr)-1) {
160 Report("WARNING: Can't write to symbolizer at fd %d\n", output_fd_);
161 return false;
163 return true;
166 const char *path_;
167 int input_fd_;
168 int output_fd_;
170 static const uptr kBufferSize = 16 * 1024;
171 char buffer_[kBufferSize];
173 static const uptr kMaxTimesRestarted = 5;
174 uptr times_restarted_;
177 static LowLevelAllocator symbolizer_allocator; // Linker initialized.
179 #if SANITIZER_SUPPORTS_WEAK_HOOKS
180 extern "C" {
181 SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE
182 bool __sanitizer_symbolize_code(const char *ModuleName, u64 ModuleOffset,
183 char *Buffer, int MaxLength);
184 SANITIZER_WEAK_ATTRIBUTE SANITIZER_INTERFACE_ATTRIBUTE
185 bool __sanitizer_symbolize_data(const char *ModuleName, u64 ModuleOffset,
186 char *Buffer, int MaxLength);
187 } // extern "C"
189 class InternalSymbolizer {
190 public:
191 typedef bool (*SanitizerSymbolizeFn)(const char*, u64, char*, int);
192 static InternalSymbolizer *get() {
193 if (__sanitizer_symbolize_code != 0 &&
194 __sanitizer_symbolize_data != 0) {
195 void *mem = symbolizer_allocator.Allocate(sizeof(InternalSymbolizer));
196 return new(mem) InternalSymbolizer();
198 return 0;
200 char *SendCommand(bool is_data, const char *module_name, uptr module_offset) {
201 SanitizerSymbolizeFn symbolize_fn = is_data ? __sanitizer_symbolize_data
202 : __sanitizer_symbolize_code;
203 if (symbolize_fn(module_name, module_offset, buffer_, kBufferSize))
204 return buffer_;
205 return 0;
208 private:
209 InternalSymbolizer() { }
211 static const int kBufferSize = 16 * 1024;
212 char buffer_[kBufferSize];
214 #else // SANITIZER_SUPPORTS_WEAK_HOOKS
216 class InternalSymbolizer {
217 public:
218 static InternalSymbolizer *get() { return 0; }
219 char *SendCommand(bool is_data, const char *module_name, uptr module_offset) {
220 return 0;
224 #endif // SANITIZER_SUPPORTS_WEAK_HOOKS
226 class Symbolizer {
227 public:
228 uptr SymbolizeCode(uptr addr, AddressInfo *frames, uptr max_frames) {
229 if (max_frames == 0)
230 return 0;
231 LoadedModule *module = FindModuleForAddress(addr);
232 if (module == 0)
233 return 0;
234 const char *module_name = module->full_name();
235 uptr module_offset = addr - module->base_address();
236 const char *str = SendCommand(false, module_name, module_offset);
237 if (str == 0) {
238 // External symbolizer was not initialized or failed. Fill only data
239 // about module name and offset.
240 AddressInfo *info = &frames[0];
241 info->Clear();
242 info->FillAddressAndModuleInfo(addr, module_name, module_offset);
243 return 1;
245 uptr frame_id = 0;
246 for (frame_id = 0; frame_id < max_frames; frame_id++) {
247 AddressInfo *info = &frames[frame_id];
248 char *function_name = 0;
249 str = ExtractToken(str, "\n", &function_name);
250 CHECK(function_name);
251 if (function_name[0] == '\0') {
252 // There are no more frames.
253 break;
255 info->Clear();
256 info->FillAddressAndModuleInfo(addr, module_name, module_offset);
257 info->function = function_name;
258 // Parse <file>:<line>:<column> buffer.
259 char *file_line_info = 0;
260 str = ExtractToken(str, "\n", &file_line_info);
261 CHECK(file_line_info);
262 const char *line_info = ExtractToken(file_line_info, ":", &info->file);
263 line_info = ExtractInt(line_info, ":", &info->line);
264 line_info = ExtractInt(line_info, "", &info->column);
265 InternalFree(file_line_info);
267 // Functions and filenames can be "??", in which case we write 0
268 // to address info to mark that names are unknown.
269 if (0 == internal_strcmp(info->function, "??")) {
270 InternalFree(info->function);
271 info->function = 0;
273 if (0 == internal_strcmp(info->file, "??")) {
274 InternalFree(info->file);
275 info->file = 0;
278 if (frame_id == 0) {
279 // Make sure we return at least one frame.
280 AddressInfo *info = &frames[0];
281 info->Clear();
282 info->FillAddressAndModuleInfo(addr, module_name, module_offset);
283 frame_id = 1;
285 return frame_id;
288 bool SymbolizeData(uptr addr, DataInfo *info) {
289 LoadedModule *module = FindModuleForAddress(addr);
290 if (module == 0)
291 return false;
292 const char *module_name = module->full_name();
293 uptr module_offset = addr - module->base_address();
294 internal_memset(info, 0, sizeof(*info));
295 info->address = addr;
296 info->module = internal_strdup(module_name);
297 info->module_offset = module_offset;
298 const char *str = SendCommand(true, module_name, module_offset);
299 if (str == 0)
300 return true;
301 str = ExtractToken(str, "\n", &info->name);
302 str = ExtractUptr(str, " ", &info->start);
303 str = ExtractUptr(str, "\n", &info->size);
304 info->start += module->base_address();
305 return true;
308 bool InitializeExternalSymbolizer(const char *path_to_symbolizer) {
309 int input_fd, output_fd;
310 if (!StartSymbolizerSubprocess(path_to_symbolizer, &input_fd, &output_fd))
311 return false;
312 void *mem = symbolizer_allocator.Allocate(sizeof(ExternalSymbolizer));
313 external_symbolizer_ = new(mem) ExternalSymbolizer(path_to_symbolizer,
314 input_fd, output_fd);
315 return true;
318 bool IsSymbolizerAvailable() {
319 if (internal_symbolizer_ == 0)
320 internal_symbolizer_ = InternalSymbolizer::get();
321 return internal_symbolizer_ || external_symbolizer_;
324 private:
325 char *SendCommand(bool is_data, const char *module_name, uptr module_offset) {
326 // First, try to use internal symbolizer.
327 if (internal_symbolizer_ == 0) {
328 internal_symbolizer_ = InternalSymbolizer::get();
330 if (internal_symbolizer_) {
331 return internal_symbolizer_->SendCommand(is_data, module_name,
332 module_offset);
334 // Otherwise, fall back to external symbolizer.
335 if (external_symbolizer_ == 0) {
336 ReportExternalSymbolizerError(
337 "WARNING: Trying to symbolize code, but external "
338 "symbolizer is not initialized!\n");
339 return 0;
341 for (;;) {
342 char *reply = external_symbolizer_->SendCommand(is_data, module_name,
343 module_offset);
344 if (reply)
345 return reply;
346 // Try to restart symbolizer subprocess. If we don't succeed, forget
347 // about it and don't try to use it later.
348 if (!external_symbolizer_->Restart()) {
349 ReportExternalSymbolizerError(
350 "WARNING: Failed to use and restart external symbolizer!\n");
351 external_symbolizer_ = 0;
352 return 0;
357 LoadedModule *FindModuleForAddress(uptr address) {
358 if (modules_ == 0) {
359 modules_ = (LoadedModule*)(symbolizer_allocator.Allocate(
360 kMaxNumberOfModuleContexts * sizeof(LoadedModule)));
361 CHECK(modules_);
362 n_modules_ = GetListOfModules(modules_, kMaxNumberOfModuleContexts);
363 CHECK_GT(n_modules_, 0);
364 CHECK_LT(n_modules_, kMaxNumberOfModuleContexts);
366 for (uptr i = 0; i < n_modules_; i++) {
367 if (modules_[i].containsAddress(address)) {
368 return &modules_[i];
371 return 0;
373 void ReportExternalSymbolizerError(const char *msg) {
374 // Don't use atomics here for now, as SymbolizeCode can't be called
375 // from multiple threads anyway.
376 static bool reported;
377 if (!reported) {
378 Report(msg);
379 reported = true;
383 // 16K loaded modules should be enough for everyone.
384 static const uptr kMaxNumberOfModuleContexts = 1 << 14;
385 LoadedModule *modules_; // Array of module descriptions is leaked.
386 uptr n_modules_;
388 ExternalSymbolizer *external_symbolizer_; // Leaked.
389 InternalSymbolizer *internal_symbolizer_; // Leaked.
392 static Symbolizer symbolizer; // Linker initialized.
394 uptr SymbolizeCode(uptr address, AddressInfo *frames, uptr max_frames) {
395 return symbolizer.SymbolizeCode(address, frames, max_frames);
398 bool SymbolizeData(uptr address, DataInfo *info) {
399 return symbolizer.SymbolizeData(address, info);
402 bool InitializeExternalSymbolizer(const char *path_to_symbolizer) {
403 return symbolizer.InitializeExternalSymbolizer(path_to_symbolizer);
406 bool IsSymbolizerAvailable() {
407 return symbolizer.IsSymbolizerAvailable();
410 } // namespace __sanitizer