d: Fix accesses of immutable arrays using constant index still bounds checked
[official-gcc.git] / libsanitizer / sanitizer_common / sanitizer_symbolizer_libcdep.cpp
bloba6f82ced20367310fc13d8fecb3e1c1e70efe62b
1 //===-- sanitizer_symbolizer_libcdep.cpp ----------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file is shared between AddressSanitizer and ThreadSanitizer
10 // run-time libraries.
11 //===----------------------------------------------------------------------===//
13 #include "sanitizer_allocator_internal.h"
14 #include "sanitizer_internal_defs.h"
15 #include "sanitizer_platform.h"
16 #include "sanitizer_symbolizer_internal.h"
18 namespace __sanitizer {
20 Symbolizer *Symbolizer::GetOrInit() {
21 SpinMutexLock l(&init_mu_);
22 if (symbolizer_)
23 return symbolizer_;
24 symbolizer_ = PlatformInit();
25 CHECK(symbolizer_);
26 return symbolizer_;
29 // See sanitizer_symbolizer_markup.cpp.
30 #if !SANITIZER_SYMBOLIZER_MARKUP
32 const char *ExtractToken(const char *str, const char *delims, char **result) {
33 uptr prefix_len = internal_strcspn(str, delims);
34 *result = (char*)InternalAlloc(prefix_len + 1);
35 internal_memcpy(*result, str, prefix_len);
36 (*result)[prefix_len] = '\0';
37 const char *prefix_end = str + prefix_len;
38 if (*prefix_end != '\0') prefix_end++;
39 return prefix_end;
42 const char *ExtractInt(const char *str, const char *delims, int *result) {
43 char *buff = nullptr;
44 const char *ret = ExtractToken(str, delims, &buff);
45 if (buff) {
46 *result = (int)internal_atoll(buff);
48 InternalFree(buff);
49 return ret;
52 const char *ExtractUptr(const char *str, const char *delims, uptr *result) {
53 char *buff = nullptr;
54 const char *ret = ExtractToken(str, delims, &buff);
55 if (buff) {
56 *result = (uptr)internal_atoll(buff);
58 InternalFree(buff);
59 return ret;
62 const char *ExtractSptr(const char *str, const char *delims, sptr *result) {
63 char *buff = nullptr;
64 const char *ret = ExtractToken(str, delims, &buff);
65 if (buff) {
66 *result = (sptr)internal_atoll(buff);
68 InternalFree(buff);
69 return ret;
72 const char *ExtractTokenUpToDelimiter(const char *str, const char *delimiter,
73 char **result) {
74 const char *found_delimiter = internal_strstr(str, delimiter);
75 uptr prefix_len =
76 found_delimiter ? found_delimiter - str : internal_strlen(str);
77 *result = (char *)InternalAlloc(prefix_len + 1);
78 internal_memcpy(*result, str, prefix_len);
79 (*result)[prefix_len] = '\0';
80 const char *prefix_end = str + prefix_len;
81 if (*prefix_end != '\0') prefix_end += internal_strlen(delimiter);
82 return prefix_end;
85 SymbolizedStack *Symbolizer::SymbolizePC(uptr addr) {
86 Lock l(&mu_);
87 SymbolizedStack *res = SymbolizedStack::New(addr);
88 auto *mod = FindModuleForAddress(addr);
89 if (!mod)
90 return res;
91 // Always fill data about module name and offset.
92 res->info.FillModuleInfo(*mod);
93 for (auto &tool : tools_) {
94 SymbolizerScope sym_scope(this);
95 if (tool.SymbolizePC(addr, res)) {
96 return res;
99 return res;
102 bool Symbolizer::SymbolizeData(uptr addr, DataInfo *info) {
103 Lock l(&mu_);
104 const char *module_name = nullptr;
105 uptr module_offset;
106 ModuleArch arch;
107 if (!FindModuleNameAndOffsetForAddress(addr, &module_name, &module_offset,
108 &arch))
109 return false;
110 info->Clear();
111 info->module = internal_strdup(module_name);
112 info->module_offset = module_offset;
113 info->module_arch = arch;
114 for (auto &tool : tools_) {
115 SymbolizerScope sym_scope(this);
116 if (tool.SymbolizeData(addr, info)) {
117 return true;
120 return true;
123 bool Symbolizer::SymbolizeFrame(uptr addr, FrameInfo *info) {
124 Lock l(&mu_);
125 const char *module_name = nullptr;
126 if (!FindModuleNameAndOffsetForAddress(
127 addr, &module_name, &info->module_offset, &info->module_arch))
128 return false;
129 info->module = internal_strdup(module_name);
130 for (auto &tool : tools_) {
131 SymbolizerScope sym_scope(this);
132 if (tool.SymbolizeFrame(addr, info)) {
133 return true;
136 return true;
139 bool Symbolizer::GetModuleNameAndOffsetForPC(uptr pc, const char **module_name,
140 uptr *module_address) {
141 Lock l(&mu_);
142 const char *internal_module_name = nullptr;
143 ModuleArch arch;
144 if (!FindModuleNameAndOffsetForAddress(pc, &internal_module_name,
145 module_address, &arch))
146 return false;
148 if (module_name)
149 *module_name = module_names_.GetOwnedCopy(internal_module_name);
150 return true;
153 void Symbolizer::Flush() {
154 Lock l(&mu_);
155 for (auto &tool : tools_) {
156 SymbolizerScope sym_scope(this);
157 tool.Flush();
161 const char *Symbolizer::Demangle(const char *name) {
162 Lock l(&mu_);
163 for (auto &tool : tools_) {
164 SymbolizerScope sym_scope(this);
165 if (const char *demangled = tool.Demangle(name))
166 return demangled;
168 return PlatformDemangle(name);
171 bool Symbolizer::FindModuleNameAndOffsetForAddress(uptr address,
172 const char **module_name,
173 uptr *module_offset,
174 ModuleArch *module_arch) {
175 const LoadedModule *module = FindModuleForAddress(address);
176 if (!module)
177 return false;
178 *module_name = module->full_name();
179 *module_offset = address - module->base_address();
180 *module_arch = module->arch();
181 return true;
184 void Symbolizer::RefreshModules() {
185 modules_.init();
186 fallback_modules_.fallbackInit();
187 RAW_CHECK(modules_.size() > 0);
188 modules_fresh_ = true;
191 static const LoadedModule *SearchForModule(const ListOfModules &modules,
192 uptr address) {
193 for (uptr i = 0; i < modules.size(); i++) {
194 if (modules[i].containsAddress(address)) {
195 return &modules[i];
198 return nullptr;
201 const LoadedModule *Symbolizer::FindModuleForAddress(uptr address) {
202 bool modules_were_reloaded = false;
203 if (!modules_fresh_) {
204 RefreshModules();
205 modules_were_reloaded = true;
207 const LoadedModule *module = SearchForModule(modules_, address);
208 if (module) return module;
210 // dlopen/dlclose interceptors invalidate the module list, but when
211 // interception is disabled, we need to retry if the lookup fails in
212 // case the module list changed.
213 #if !SANITIZER_INTERCEPT_DLOPEN_DLCLOSE
214 if (!modules_were_reloaded) {
215 RefreshModules();
216 module = SearchForModule(modules_, address);
217 if (module) return module;
219 #endif
221 if (fallback_modules_.size()) {
222 module = SearchForModule(fallback_modules_, address);
224 return module;
227 // For now we assume the following protocol:
228 // For each request of the form
229 // <module_name> <module_offset>
230 // passed to STDIN, external symbolizer prints to STDOUT response:
231 // <function_name>
232 // <file_name>:<line_number>:<column_number>
233 // <function_name>
234 // <file_name>:<line_number>:<column_number>
235 // ...
236 // <empty line>
237 class LLVMSymbolizerProcess final : public SymbolizerProcess {
238 public:
239 explicit LLVMSymbolizerProcess(const char *path)
240 : SymbolizerProcess(path, /*use_posix_spawn=*/SANITIZER_APPLE) {}
242 private:
243 bool ReachedEndOfOutput(const char *buffer, uptr length) const override {
244 // Empty line marks the end of llvm-symbolizer output.
245 return length >= 2 && buffer[length - 1] == '\n' &&
246 buffer[length - 2] == '\n';
249 // When adding a new architecture, don't forget to also update
250 // script/asan_symbolize.py and sanitizer_common.h.
251 void GetArgV(const char *path_to_binary,
252 const char *(&argv)[kArgVMax]) const override {
253 #if defined(__x86_64h__)
254 const char* const kSymbolizerArch = "--default-arch=x86_64h";
255 #elif defined(__x86_64__)
256 const char* const kSymbolizerArch = "--default-arch=x86_64";
257 #elif defined(__i386__)
258 const char* const kSymbolizerArch = "--default-arch=i386";
259 #elif SANITIZER_LOONGARCH64
260 const char *const kSymbolizerArch = "--default-arch=loongarch64";
261 #elif SANITIZER_RISCV64
262 const char *const kSymbolizerArch = "--default-arch=riscv64";
263 #elif defined(__aarch64__)
264 const char* const kSymbolizerArch = "--default-arch=arm64";
265 #elif defined(__arm__)
266 const char* const kSymbolizerArch = "--default-arch=arm";
267 #elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
268 const char* const kSymbolizerArch = "--default-arch=powerpc64";
269 #elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
270 const char* const kSymbolizerArch = "--default-arch=powerpc64le";
271 #elif defined(__s390x__)
272 const char* const kSymbolizerArch = "--default-arch=s390x";
273 #elif defined(__s390__)
274 const char* const kSymbolizerArch = "--default-arch=s390";
275 #else
276 const char* const kSymbolizerArch = "--default-arch=unknown";
277 #endif
279 const char *const demangle_flag =
280 common_flags()->demangle ? "--demangle" : "--no-demangle";
281 const char *const inline_flag =
282 common_flags()->symbolize_inline_frames ? "--inlines" : "--no-inlines";
283 int i = 0;
284 argv[i++] = path_to_binary;
285 argv[i++] = demangle_flag;
286 argv[i++] = inline_flag;
287 argv[i++] = kSymbolizerArch;
288 argv[i++] = nullptr;
289 CHECK_LE(i, kArgVMax);
293 LLVMSymbolizer::LLVMSymbolizer(const char *path, LowLevelAllocator *allocator)
294 : symbolizer_process_(new(*allocator) LLVMSymbolizerProcess(path)) {}
296 // Parse a <file>:<line>[:<column>] buffer. The file path may contain colons on
297 // Windows, so extract tokens from the right hand side first. The column info is
298 // also optional.
299 static const char *ParseFileLineInfo(AddressInfo *info, const char *str) {
300 char *file_line_info = nullptr;
301 str = ExtractToken(str, "\n", &file_line_info);
302 CHECK(file_line_info);
304 if (uptr size = internal_strlen(file_line_info)) {
305 char *back = file_line_info + size - 1;
306 for (int i = 0; i < 2; ++i) {
307 while (back > file_line_info && IsDigit(*back)) --back;
308 if (*back != ':' || !IsDigit(back[1])) break;
309 info->column = info->line;
310 info->line = internal_atoll(back + 1);
311 // Truncate the string at the colon to keep only filename.
312 *back = '\0';
313 --back;
315 ExtractToken(file_line_info, "", &info->file);
318 InternalFree(file_line_info);
319 return str;
322 // Parses one or more two-line strings in the following format:
323 // <function_name>
324 // <file_name>:<line_number>[:<column_number>]
325 // Used by LLVMSymbolizer, Addr2LinePool and InternalSymbolizer, since all of
326 // them use the same output format.
327 void ParseSymbolizePCOutput(const char *str, SymbolizedStack *res) {
328 bool top_frame = true;
329 SymbolizedStack *last = res;
330 while (true) {
331 char *function_name = nullptr;
332 str = ExtractToken(str, "\n", &function_name);
333 CHECK(function_name);
334 if (function_name[0] == '\0') {
335 // There are no more frames.
336 InternalFree(function_name);
337 break;
339 SymbolizedStack *cur;
340 if (top_frame) {
341 cur = res;
342 top_frame = false;
343 } else {
344 cur = SymbolizedStack::New(res->info.address);
345 cur->info.FillModuleInfo(res->info.module, res->info.module_offset,
346 res->info.module_arch);
347 last->next = cur;
348 last = cur;
351 AddressInfo *info = &cur->info;
352 info->function = function_name;
353 str = ParseFileLineInfo(info, str);
355 // Functions and filenames can be "??", in which case we write 0
356 // to address info to mark that names are unknown.
357 if (0 == internal_strcmp(info->function, "??")) {
358 InternalFree(info->function);
359 info->function = 0;
361 if (info->file && 0 == internal_strcmp(info->file, "??")) {
362 InternalFree(info->file);
363 info->file = 0;
368 // Parses a two- or three-line string in the following format:
369 // <symbol_name>
370 // <start_address> <size>
371 // <filename>:<column>
372 // Used by LLVMSymbolizer and InternalSymbolizer. LLVMSymbolizer added support
373 // for symbolizing the third line in D123538, but we support the older two-line
374 // information as well.
375 void ParseSymbolizeDataOutput(const char *str, DataInfo *info) {
376 str = ExtractToken(str, "\n", &info->name);
377 str = ExtractUptr(str, " ", &info->start);
378 str = ExtractUptr(str, "\n", &info->size);
379 // Note: If the third line isn't present, these calls will set info.{file,
380 // line} to empty strings.
381 str = ExtractToken(str, ":", &info->file);
382 str = ExtractUptr(str, "\n", &info->line);
385 static void ParseSymbolizeFrameOutput(const char *str,
386 InternalMmapVector<LocalInfo> *locals) {
387 if (internal_strncmp(str, "??", 2) == 0)
388 return;
390 while (*str) {
391 LocalInfo local;
392 str = ExtractToken(str, "\n", &local.function_name);
393 str = ExtractToken(str, "\n", &local.name);
395 AddressInfo addr;
396 str = ParseFileLineInfo(&addr, str);
397 local.decl_file = addr.file;
398 local.decl_line = addr.line;
400 local.has_frame_offset = internal_strncmp(str, "??", 2) != 0;
401 str = ExtractSptr(str, " ", &local.frame_offset);
403 local.has_size = internal_strncmp(str, "??", 2) != 0;
404 str = ExtractUptr(str, " ", &local.size);
406 local.has_tag_offset = internal_strncmp(str, "??", 2) != 0;
407 str = ExtractUptr(str, "\n", &local.tag_offset);
409 locals->push_back(local);
413 bool LLVMSymbolizer::SymbolizePC(uptr addr, SymbolizedStack *stack) {
414 AddressInfo *info = &stack->info;
415 const char *buf = FormatAndSendCommand(
416 "CODE", info->module, info->module_offset, info->module_arch);
417 if (!buf)
418 return false;
419 ParseSymbolizePCOutput(buf, stack);
420 return true;
423 bool LLVMSymbolizer::SymbolizeData(uptr addr, DataInfo *info) {
424 const char *buf = FormatAndSendCommand(
425 "DATA", info->module, info->module_offset, info->module_arch);
426 if (!buf)
427 return false;
428 ParseSymbolizeDataOutput(buf, info);
429 info->start += (addr - info->module_offset); // Add the base address.
430 return true;
433 bool LLVMSymbolizer::SymbolizeFrame(uptr addr, FrameInfo *info) {
434 const char *buf = FormatAndSendCommand(
435 "FRAME", info->module, info->module_offset, info->module_arch);
436 if (!buf)
437 return false;
438 ParseSymbolizeFrameOutput(buf, &info->locals);
439 return true;
442 const char *LLVMSymbolizer::FormatAndSendCommand(const char *command_prefix,
443 const char *module_name,
444 uptr module_offset,
445 ModuleArch arch) {
446 CHECK(module_name);
447 int size_needed = 0;
448 if (arch == kModuleArchUnknown)
449 size_needed = internal_snprintf(buffer_, kBufferSize, "%s \"%s\" 0x%zx\n",
450 command_prefix, module_name, module_offset);
451 else
452 size_needed = internal_snprintf(buffer_, kBufferSize,
453 "%s \"%s:%s\" 0x%zx\n", command_prefix,
454 module_name, ModuleArchToString(arch),
455 module_offset);
457 if (size_needed >= static_cast<int>(kBufferSize)) {
458 Report("WARNING: Command buffer too small");
459 return nullptr;
462 return symbolizer_process_->SendCommand(buffer_);
465 SymbolizerProcess::SymbolizerProcess(const char *path, bool use_posix_spawn)
466 : path_(path),
467 input_fd_(kInvalidFd),
468 output_fd_(kInvalidFd),
469 times_restarted_(0),
470 failed_to_start_(false),
471 reported_invalid_path_(false),
472 use_posix_spawn_(use_posix_spawn) {
473 CHECK(path_);
474 CHECK_NE(path_[0], '\0');
477 static bool IsSameModule(const char* path) {
478 if (const char* ProcessName = GetProcessName()) {
479 if (const char* SymbolizerName = StripModuleName(path)) {
480 return !internal_strcmp(ProcessName, SymbolizerName);
483 return false;
486 const char *SymbolizerProcess::SendCommand(const char *command) {
487 if (failed_to_start_)
488 return nullptr;
489 if (IsSameModule(path_)) {
490 Report("WARNING: Symbolizer was blocked from starting itself!\n");
491 failed_to_start_ = true;
492 return nullptr;
494 for (; times_restarted_ < kMaxTimesRestarted; times_restarted_++) {
495 // Start or restart symbolizer if we failed to send command to it.
496 if (const char *res = SendCommandImpl(command))
497 return res;
498 Restart();
500 if (!failed_to_start_) {
501 Report("WARNING: Failed to use and restart external symbolizer!\n");
502 failed_to_start_ = true;
504 return nullptr;
507 const char *SymbolizerProcess::SendCommandImpl(const char *command) {
508 if (input_fd_ == kInvalidFd || output_fd_ == kInvalidFd)
509 return nullptr;
510 if (!WriteToSymbolizer(command, internal_strlen(command)))
511 return nullptr;
512 if (!ReadFromSymbolizer())
513 return nullptr;
514 return buffer_.data();
517 bool SymbolizerProcess::Restart() {
518 if (input_fd_ != kInvalidFd)
519 CloseFile(input_fd_);
520 if (output_fd_ != kInvalidFd)
521 CloseFile(output_fd_);
522 return StartSymbolizerSubprocess();
525 bool SymbolizerProcess::ReadFromSymbolizer() {
526 buffer_.clear();
527 constexpr uptr max_length = 1024;
528 bool ret = true;
529 do {
530 uptr just_read = 0;
531 uptr size_before = buffer_.size();
532 buffer_.resize(size_before + max_length);
533 buffer_.resize(buffer_.capacity());
534 bool ret = ReadFromFile(input_fd_, &buffer_[size_before],
535 buffer_.size() - size_before, &just_read);
537 if (!ret)
538 just_read = 0;
540 buffer_.resize(size_before + just_read);
542 // We can't read 0 bytes, as we don't expect external symbolizer to close
543 // its stdout.
544 if (just_read == 0) {
545 Report("WARNING: Can't read from symbolizer at fd %d\n", input_fd_);
546 ret = false;
547 break;
549 } while (!ReachedEndOfOutput(buffer_.data(), buffer_.size()));
550 buffer_.push_back('\0');
551 return ret;
554 bool SymbolizerProcess::WriteToSymbolizer(const char *buffer, uptr length) {
555 if (length == 0)
556 return true;
557 uptr write_len = 0;
558 bool success = WriteToFile(output_fd_, buffer, length, &write_len);
559 if (!success || write_len != length) {
560 Report("WARNING: Can't write to symbolizer at fd %d\n", output_fd_);
561 return false;
563 return true;
566 #endif // !SANITIZER_SYMBOLIZER_MARKUP
568 } // namespace __sanitizer