* configure.ac: Change target-libasan to target-libsanitizer.
[official-gcc.git] / libsanitizer / sanitizer_common / sanitizer_symbolizer.cc
blob66ac3c8a24675d0154a2dfab8306cf411d882890
1 //===-- sanitizer_symbolizer.cc -------------------------------------------===//
2 //
3 // This file is distributed under the University of Illinois Open Source
4 // License. See LICENSE.TXT for details.
5 //
6 //===----------------------------------------------------------------------===//
7 //
8 // This file is shared between AddressSanitizer and ThreadSanitizer
9 // run-time libraries. See sanitizer_symbolizer.h for details.
10 //===----------------------------------------------------------------------===//
12 #include "sanitizer_common.h"
13 #include "sanitizer_placement_new.h"
14 #include "sanitizer_procmaps.h"
15 #include "sanitizer_symbolizer.h"
17 namespace __sanitizer {
19 void AddressInfo::Clear() {
20 InternalFree(module);
21 InternalFree(function);
22 InternalFree(file);
23 internal_memset(this, 0, sizeof(AddressInfo));
26 LoadedModule::LoadedModule(const char *module_name, uptr base_address) {
27 full_name_ = internal_strdup(module_name);
28 base_address_ = base_address;
29 n_ranges_ = 0;
32 void LoadedModule::addAddressRange(uptr beg, uptr end) {
33 CHECK_LT(n_ranges_, kMaxNumberOfAddressRanges);
34 ranges_[n_ranges_].beg = beg;
35 ranges_[n_ranges_].end = end;
36 n_ranges_++;
39 bool LoadedModule::containsAddress(uptr address) const {
40 for (uptr i = 0; i < n_ranges_; i++) {
41 if (ranges_[i].beg <= address && address < ranges_[i].end)
42 return true;
44 return false;
47 // Extracts the prefix of "str" that consists of any characters not
48 // present in "delims" string, and copies this prefix to "result", allocating
49 // space for it.
50 // Returns a pointer to "str" after skipping extracted prefix and first
51 // delimiter char.
52 static const char *ExtractToken(const char *str, const char *delims,
53 char **result) {
54 uptr prefix_len = internal_strcspn(str, delims);
55 *result = (char*)InternalAlloc(prefix_len + 1);
56 internal_memcpy(*result, str, prefix_len);
57 (*result)[prefix_len] = '\0';
58 const char *prefix_end = str + prefix_len;
59 if (*prefix_end != '\0') prefix_end++;
60 return prefix_end;
63 // Same as ExtractToken, but converts extracted token to integer.
64 static const char *ExtractInt(const char *str, const char *delims,
65 int *result) {
66 char *buff;
67 const char *ret = ExtractToken(str, delims, &buff);
68 if (buff != 0) {
69 *result = internal_atoll(buff);
71 InternalFree(buff);
72 return ret;
75 // ExternalSymbolizer encapsulates communication between the tool and
76 // external symbolizer program, running in a different subprocess,
77 // For now we assume the following protocol:
78 // For each request of the form
79 // <module_name> <module_offset>
80 // passed to STDIN, external symbolizer prints to STDOUT response:
81 // <function_name>
82 // <file_name>:<line_number>:<column_number>
83 // <function_name>
84 // <file_name>:<line_number>:<column_number>
85 // ...
86 // <empty line>
87 class ExternalSymbolizer {
88 public:
89 ExternalSymbolizer(const char *path, int input_fd, int output_fd)
90 : path_(path),
91 input_fd_(input_fd),
92 output_fd_(output_fd),
93 times_restarted_(0) {
94 CHECK(path_);
95 CHECK_NE(input_fd_, kInvalidFd);
96 CHECK_NE(output_fd_, kInvalidFd);
99 // Returns the number of frames for a given address, or zero if
100 // symbolization failed.
101 uptr SymbolizeCode(uptr addr, const char *module_name, uptr module_offset,
102 AddressInfo *frames, uptr max_frames) {
103 CHECK(module_name);
104 // FIXME: Make sure this buffer always has sufficient size to hold
105 // large debug info.
106 static const int kMaxBufferSize = 4096;
107 InternalScopedBuffer<char> buffer(kMaxBufferSize);
108 char *buffer_data = buffer.data();
109 internal_snprintf(buffer_data, kMaxBufferSize, "%s 0x%zx\n",
110 module_name, module_offset);
111 if (!writeToSymbolizer(buffer_data, internal_strlen(buffer_data)))
112 return 0;
114 if (!readFromSymbolizer(buffer_data, kMaxBufferSize))
115 return 0;
116 const char *str = buffer_data;
117 uptr frame_id;
118 CHECK_GT(max_frames, 0);
119 for (frame_id = 0; frame_id < max_frames; frame_id++) {
120 AddressInfo *info = &frames[frame_id];
121 char *function_name = 0;
122 str = ExtractToken(str, "\n", &function_name);
123 CHECK(function_name);
124 if (function_name[0] == '\0') {
125 // There are no more frames.
126 break;
128 info->Clear();
129 info->FillAddressAndModuleInfo(addr, module_name, module_offset);
130 info->function = function_name;
131 // Parse <file>:<line>:<column> buffer.
132 char *file_line_info = 0;
133 str = ExtractToken(str, "\n", &file_line_info);
134 CHECK(file_line_info);
135 const char *line_info = ExtractToken(file_line_info, ":", &info->file);
136 line_info = ExtractInt(line_info, ":", &info->line);
137 line_info = ExtractInt(line_info, "", &info->column);
138 InternalFree(file_line_info);
140 // Functions and filenames can be "??", in which case we write 0
141 // to address info to mark that names are unknown.
142 if (0 == internal_strcmp(info->function, "??")) {
143 InternalFree(info->function);
144 info->function = 0;
146 if (0 == internal_strcmp(info->file, "??")) {
147 InternalFree(info->file);
148 info->file = 0;
151 if (frame_id == 0) {
152 // Make sure we return at least one frame.
153 AddressInfo *info = &frames[0];
154 info->Clear();
155 info->FillAddressAndModuleInfo(addr, module_name, module_offset);
156 frame_id = 1;
158 return frame_id;
161 bool Restart() {
162 if (times_restarted_ >= kMaxTimesRestarted) return false;
163 times_restarted_++;
164 internal_close(input_fd_);
165 internal_close(output_fd_);
166 return StartSymbolizerSubprocess(path_, &input_fd_, &output_fd_);
169 private:
170 bool readFromSymbolizer(char *buffer, uptr max_length) {
171 if (max_length == 0)
172 return true;
173 uptr read_len = 0;
174 while (true) {
175 uptr just_read = internal_read(input_fd_, buffer + read_len,
176 max_length - read_len);
177 // We can't read 0 bytes, as we don't expect external symbolizer to close
178 // its stdout.
179 if (just_read == 0 || just_read == (uptr)-1) {
180 Report("WARNING: Can't read from symbolizer at fd %d\n", input_fd_);
181 return false;
183 read_len += just_read;
184 // Empty line marks the end of symbolizer output.
185 if (read_len >= 2 && buffer[read_len - 1] == '\n' &&
186 buffer[read_len - 2] == '\n') {
187 break;
190 return true;
192 bool writeToSymbolizer(const char *buffer, uptr length) {
193 if (length == 0)
194 return true;
195 uptr write_len = internal_write(output_fd_, buffer, length);
196 if (write_len == 0 || write_len == (uptr)-1) {
197 Report("WARNING: Can't write to symbolizer at fd %d\n", output_fd_);
198 return false;
200 return true;
203 const char *path_;
204 int input_fd_;
205 int output_fd_;
207 static const uptr kMaxTimesRestarted = 5;
208 uptr times_restarted_;
211 static LowLevelAllocator symbolizer_allocator; // Linker initialized.
213 class Symbolizer {
214 public:
215 uptr SymbolizeCode(uptr addr, AddressInfo *frames, uptr max_frames) {
216 if (max_frames == 0)
217 return 0;
218 LoadedModule *module = FindModuleForAddress(addr);
219 if (module == 0)
220 return 0;
221 const char *module_name = module->full_name();
222 uptr module_offset = addr - module->base_address();
223 uptr actual_frames = 0;
224 if (external_symbolizer_ == 0) {
225 ReportExternalSymbolizerError(
226 "WARNING: Trying to symbolize code, but external "
227 "symbolizer is not initialized!\n");
228 } else {
229 while (true) {
230 actual_frames = external_symbolizer_->SymbolizeCode(
231 addr, module_name, module_offset, frames, max_frames);
232 if (actual_frames > 0) {
233 // Symbolization was successful.
234 break;
236 // Try to restart symbolizer subprocess. If we don't succeed, forget
237 // about it and don't try to use it later.
238 if (!external_symbolizer_->Restart()) {
239 ReportExternalSymbolizerError(
240 "WARNING: Failed to use and restart external symbolizer!\n");
241 external_symbolizer_ = 0;
242 break;
246 if (external_symbolizer_ == 0) {
247 // External symbolizer was not initialized or failed. Fill only data
248 // about module name and offset.
249 AddressInfo *info = &frames[0];
250 info->Clear();
251 info->FillAddressAndModuleInfo(addr, module_name, module_offset);
252 return 1;
254 // Otherwise, the data was filled by external symbolizer.
255 return actual_frames;
257 bool InitializeExternalSymbolizer(const char *path_to_symbolizer) {
258 int input_fd, output_fd;
259 if (!StartSymbolizerSubprocess(path_to_symbolizer, &input_fd, &output_fd))
260 return false;
261 void *mem = symbolizer_allocator.Allocate(sizeof(ExternalSymbolizer));
262 external_symbolizer_ = new(mem) ExternalSymbolizer(path_to_symbolizer,
263 input_fd, output_fd);
264 return true;
267 private:
268 LoadedModule *FindModuleForAddress(uptr address) {
269 if (modules_ == 0) {
270 modules_ = (LoadedModule*)(symbolizer_allocator.Allocate(
271 kMaxNumberOfModuleContexts * sizeof(LoadedModule)));
272 CHECK(modules_);
273 n_modules_ = GetListOfModules(modules_, kMaxNumberOfModuleContexts);
274 CHECK_GT(n_modules_, 0);
275 CHECK_LT(n_modules_, kMaxNumberOfModuleContexts);
277 for (uptr i = 0; i < n_modules_; i++) {
278 if (modules_[i].containsAddress(address)) {
279 return &modules_[i];
282 return 0;
284 void ReportExternalSymbolizerError(const char *msg) {
285 // Don't use atomics here for now, as SymbolizeCode can't be called
286 // from multiple threads anyway.
287 static bool reported;
288 if (!reported) {
289 Report(msg);
290 reported = true;
294 static const uptr kMaxNumberOfModuleContexts = 4096;
295 LoadedModule *modules_; // Array of module descriptions is leaked.
296 uptr n_modules_;
298 ExternalSymbolizer *external_symbolizer_; // Leaked.
301 static Symbolizer symbolizer; // Linker initialized.
303 uptr SymbolizeCode(uptr address, AddressInfo *frames, uptr max_frames) {
304 return symbolizer.SymbolizeCode(address, frames, max_frames);
307 bool InitializeExternalSymbolizer(const char *path_to_symbolizer) {
308 return symbolizer.InitializeExternalSymbolizer(path_to_symbolizer);
311 } // namespace __sanitizer