Bumping manifests a=b2g-bump
[gecko.git] / tools / profiler / LulElf.cpp
blob1b01c393d1b05ee12d140c83daadbe09f388b89e
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
4 // Copyright (c) 2006, 2011, 2012 Google Inc.
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are
9 // met:
11 // * Redistributions of source code must retain the above copyright
12 // notice, this list of conditions and the following disclaimer.
13 // * Redistributions in binary form must reproduce the above
14 // copyright notice, this list of conditions and the following disclaimer
15 // in the documentation and/or other materials provided with the
16 // distribution.
17 // * Neither the name of Google Inc. nor the names of its
18 // contributors may be used to endorse or promote products derived from
19 // this software without specific prior written permission.
21 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 // Restructured in 2009 by: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
35 // (derived from)
36 // dump_symbols.cc: implement google_breakpad::WriteSymbolFile:
37 // Find all the debugging info in a file and dump it as a Breakpad symbol file.
39 // dump_symbols.h: Read debugging information from an ELF file, and write
40 // it out as a Breakpad symbol file.
42 // This file is derived from the following files in
43 // toolkit/crashreporter/google-breakpad:
44 // src/common/linux/dump_symbols.cc
45 // src/common/linux/elfutils.cc
46 // src/common/linux/file_id.cc
48 #include <errno.h>
49 #include <fcntl.h>
50 #include <stdio.h>
51 #include <string.h>
52 #include <sys/mman.h>
53 #include <sys/stat.h>
54 #include <unistd.h>
55 #include <arpa/inet.h>
57 #include <set>
58 #include <string>
59 #include <vector>
61 #include "mozilla/Assertions.h"
63 #include "LulPlatformMacros.h"
64 #include "LulCommonExt.h"
65 #include "LulDwarfExt.h"
66 #if defined(LUL_PLAT_arm_android)
67 # include "LulExidxExt.h"
68 #endif
69 #include "LulElfInt.h"
70 #include "LulMainInt.h"
73 #if defined(LUL_PLAT_arm_android) && !defined(SHT_ARM_EXIDX)
74 // bionic and older glibsc don't define it
75 # define SHT_ARM_EXIDX (SHT_LOPROC + 1)
76 #endif
79 // This namespace contains helper functions.
80 namespace {
82 using lul::DwarfCFIToModule;
83 using lul::FindElfSectionByName;
84 using lul::GetOffset;
85 using lul::IsValidElf;
86 using lul::Module;
87 using lul::UniqueString;
88 using lul::scoped_ptr;
89 using lul::Summariser;
90 using std::string;
91 using std::vector;
92 using std::set;
95 // FDWrapper
97 // Wrapper class to make sure opened file is closed.
99 class FDWrapper {
100 public:
101 explicit FDWrapper(int fd) :
102 fd_(fd) {}
103 ~FDWrapper() {
104 if (fd_ != -1)
105 close(fd_);
107 int get() {
108 return fd_;
110 int release() {
111 int fd = fd_;
112 fd_ = -1;
113 return fd;
115 private:
116 int fd_;
120 // MmapWrapper
122 // Wrapper class to make sure mapped regions are unmapped.
124 class MmapWrapper {
125 public:
126 MmapWrapper() : is_set_(false) {}
127 ~MmapWrapper() {
128 if (is_set_ && base_ != NULL) {
129 MOZ_ASSERT(size_ > 0);
130 munmap(base_, size_);
133 void set(void *mapped_address, size_t mapped_size) {
134 is_set_ = true;
135 base_ = mapped_address;
136 size_ = mapped_size;
138 void release() {
139 MOZ_ASSERT(is_set_);
140 is_set_ = false;
141 base_ = NULL;
142 size_ = 0;
145 private:
146 bool is_set_;
147 void *base_;
148 size_t size_;
152 // Set NUM_DW_REGNAMES to be the number of Dwarf register names
153 // appropriate to the machine architecture given in HEADER. Return
154 // true on success, or false if HEADER's machine architecture is not
155 // supported.
156 template<typename ElfClass>
157 bool DwarfCFIRegisterNames(const typename ElfClass::Ehdr* elf_header,
158 unsigned int* num_dw_regnames) {
159 switch (elf_header->e_machine) {
160 case EM_386:
161 *num_dw_regnames = DwarfCFIToModule::RegisterNames::I386();
162 return true;
163 case EM_ARM:
164 *num_dw_regnames = DwarfCFIToModule::RegisterNames::ARM();
165 return true;
166 case EM_X86_64:
167 *num_dw_regnames = DwarfCFIToModule::RegisterNames::X86_64();
168 return true;
169 default:
170 MOZ_ASSERT(0);
171 return false;
175 template<typename ElfClass>
176 bool LoadDwarfCFI(const string& dwarf_filename,
177 const typename ElfClass::Ehdr* elf_header,
178 const char* section_name,
179 const typename ElfClass::Shdr* section,
180 const bool eh_frame,
181 const typename ElfClass::Shdr* got_section,
182 const typename ElfClass::Shdr* text_section,
183 const bool big_endian,
184 SecMap* smap,
185 uintptr_t text_bias,
186 void (*log)(const char*)) {
187 // Find the appropriate set of register names for this file's
188 // architecture.
189 unsigned int num_dw_regs = 0;
190 if (!DwarfCFIRegisterNames<ElfClass>(elf_header, &num_dw_regs)) {
191 fprintf(stderr, "%s: unrecognized ELF machine architecture '%d';"
192 " cannot convert DWARF call frame information\n",
193 dwarf_filename.c_str(), elf_header->e_machine);
194 return false;
197 const lul::Endianness endianness
198 = big_endian ? lul::ENDIANNESS_BIG : lul::ENDIANNESS_LITTLE;
200 // Find the call frame information and its size.
201 const char* cfi =
202 GetOffset<ElfClass, char>(elf_header, section->sh_offset);
203 size_t cfi_size = section->sh_size;
205 // Plug together the parser, handler, and their entourages.
207 // Here's a summariser, which will receive the output of the
208 // parser, create summaries, and add them to |smap|.
209 Summariser* summ = new Summariser(smap, text_bias, log);
211 DwarfCFIToModule::Reporter module_reporter(log, dwarf_filename, section_name);
212 DwarfCFIToModule handler(num_dw_regs, &module_reporter, summ);
213 lul::ByteReader byte_reader(endianness);
215 byte_reader.SetAddressSize(ElfClass::kAddrSize);
217 // Provide the base addresses for .eh_frame encoded pointers, if
218 // possible.
219 byte_reader.SetCFIDataBase(section->sh_addr, cfi);
220 if (got_section)
221 byte_reader.SetDataBase(got_section->sh_addr);
222 if (text_section)
223 byte_reader.SetTextBase(text_section->sh_addr);
225 lul::CallFrameInfo::Reporter dwarf_reporter(log, dwarf_filename,
226 section_name);
227 lul::CallFrameInfo parser(cfi, cfi_size,
228 &byte_reader, &handler, &dwarf_reporter,
229 eh_frame);
230 parser.Start();
232 delete summ;
233 return true;
236 #if defined(LUL_PLAT_arm_android)
237 template<typename ElfClass>
238 bool LoadARMexidx(const typename ElfClass::Ehdr* elf_header,
239 const typename ElfClass::Shdr* exidx_section,
240 const typename ElfClass::Shdr* extab_section,
241 uintptr_t text_bias,
242 uintptr_t rx_avma, size_t rx_size,
243 SecMap* smap,
244 void (*log)(const char*)) {
245 // To do this properly we need to know:
246 // * the bounds of the .ARM.exidx section in the process image
247 // * the bounds of the .ARM.extab section in the process image
248 // * the vma of the last byte in the text section associated with the .exidx
249 // The first two are easy. The third is a bit tricky. If we can't
250 // figure out what it is, just pass in zero.
251 // Note that we are reading EXIDX directly out of the mapped in
252 // executable image. Unlike with the CFI reader, there is no
253 // auxiliary, temporary mapping used to read the unwind data.
255 // An .exidx section is always required, but the .extab section
256 // can be optionally omitted, provided that .exidx does not refer
257 // to it. If the .exidx is erroneous and does refer to .extab even
258 // though .extab is missing, the range checks done by GET_EX_U32 in
259 // ExceptionTableInfo::ExtabEntryExtract should prevent any invalid
260 // memory accesses, and cause the .extab to be rejected as invalid.
262 uintptr_t exidx_svma = exidx_section->sh_addr;
263 uintptr_t exidx_avma = exidx_svma + text_bias;
264 size_t exidx_size = exidx_section->sh_size;
266 uintptr_t extab_svma = 0;
267 uintptr_t extab_avma = 0;
268 size_t extab_size = 0;
269 if (extab_section) {
270 extab_svma = extab_section->sh_addr;
271 extab_avma = extab_svma + text_bias;
272 extab_size = extab_section->sh_size;
275 // Because we are reading EXIDX directly out of the executing image,
276 // we need to be careful to check that the relevant sections have
277 // really been mapped with r permissions, so as to guarantee that
278 // reading them won't segfault. Do this by checking that rx mapped
279 // area covers the exidx and extab as mapped in.
281 if (rx_size == 0)
282 // This seems sufficiently bogus that we shouldn't proceed further.
283 return false;
285 if (exidx_size == 0)
286 // There's no EXIDX data. No point in continuing.
287 return false;
289 if (!(exidx_avma >= rx_avma && exidx_avma + exidx_size <= rx_avma + rx_size))
290 // The mapped .exidx isn't entirely inside the rx area.
291 return false;
293 if (extab_section &&
294 !(extab_avma >= rx_avma && extab_avma + extab_size <= rx_avma + rx_size))
295 // There an .extab section, but it isn't entirely inside the rx area.
296 return false;
298 // The sh_link field of the exidx section gives the section number
299 // for the associated text section.
300 uint32_t exidx_text_last_avma = 0;
301 int exidx_text_sno = exidx_section->sh_link;
302 typedef typename ElfClass::Shdr Shdr;
303 // |sections| points to the section header table
304 const Shdr* sections
305 = GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff);
306 const int num_sections = elf_header->e_shnum;
307 if (exidx_text_sno >= 0 && exidx_text_sno < num_sections) {
308 const Shdr* exidx_text_shdr = &sections[exidx_text_sno];
309 if (exidx_text_shdr->sh_size > 0) {
310 uint32_t exidx_text_last_svma
311 = exidx_text_shdr->sh_addr + exidx_text_shdr->sh_size - 1;
312 exidx_text_last_avma
313 = exidx_text_last_svma + text_bias;
317 lul::ARMExToModule handler(smap, log);
318 lul::ExceptionTableInfo
319 parser(reinterpret_cast<const char*>(exidx_avma), exidx_size,
320 reinterpret_cast<const char*>(extab_avma), extab_size,
321 exidx_text_last_avma, &handler, log);
322 parser.Start();
323 return true;
325 #endif /* defined(LUL_PLAT_arm_android) */
327 bool LoadELF(const string& obj_file, MmapWrapper* map_wrapper,
328 void** elf_header) {
329 int obj_fd = open(obj_file.c_str(), O_RDONLY);
330 if (obj_fd < 0) {
331 fprintf(stderr, "Failed to open ELF file '%s': %s\n",
332 obj_file.c_str(), strerror(errno));
333 return false;
335 FDWrapper obj_fd_wrapper(obj_fd);
336 struct stat st;
337 if (fstat(obj_fd, &st) != 0 && st.st_size <= 0) {
338 fprintf(stderr, "Unable to fstat ELF file '%s': %s\n",
339 obj_file.c_str(), strerror(errno));
340 return false;
342 // Mapping it read-only is good enough. In any case, mapping it
343 // read-write confuses Valgrind's debuginfo acquire/discard
344 // heuristics, making it hard to profile the profiler.
345 void *obj_base = mmap(nullptr, st.st_size,
346 PROT_READ, MAP_PRIVATE, obj_fd, 0);
347 if (obj_base == MAP_FAILED) {
348 fprintf(stderr, "Failed to mmap ELF file '%s': %s\n",
349 obj_file.c_str(), strerror(errno));
350 return false;
352 map_wrapper->set(obj_base, st.st_size);
353 *elf_header = obj_base;
354 if (!IsValidElf(*elf_header)) {
355 fprintf(stderr, "Not a valid ELF file: %s\n", obj_file.c_str());
356 return false;
358 return true;
361 // Get the endianness of ELF_HEADER. If it's invalid, return false.
362 template<typename ElfClass>
363 bool ElfEndianness(const typename ElfClass::Ehdr* elf_header,
364 bool* big_endian) {
365 if (elf_header->e_ident[EI_DATA] == ELFDATA2LSB) {
366 *big_endian = false;
367 return true;
369 if (elf_header->e_ident[EI_DATA] == ELFDATA2MSB) {
370 *big_endian = true;
371 return true;
374 fprintf(stderr, "bad data encoding in ELF header: %d\n",
375 elf_header->e_ident[EI_DATA]);
376 return false;
380 // LoadSymbolsInfo
382 // Holds the state between the two calls to LoadSymbols() in case it's necessary
383 // to follow the .gnu_debuglink section and load debug information from a
384 // different file.
386 template<typename ElfClass>
387 class LoadSymbolsInfo {
388 public:
389 typedef typename ElfClass::Addr Addr;
391 explicit LoadSymbolsInfo(const vector<string>& dbg_dirs) :
392 debug_dirs_(dbg_dirs),
393 has_loading_addr_(false) {}
395 // Keeps track of which sections have been loaded so sections don't
396 // accidentally get loaded twice from two different files.
397 void LoadedSection(const string &section) {
398 if (loaded_sections_.count(section) == 0) {
399 loaded_sections_.insert(section);
400 } else {
401 fprintf(stderr, "Section %s has already been loaded.\n",
402 section.c_str());
406 string debuglink_file() const {
407 return debuglink_file_;
410 private:
411 const vector<string>& debug_dirs_; // Directories in which to
412 // search for the debug ELF file.
414 string debuglink_file_; // Full path to the debug ELF file.
416 bool has_loading_addr_; // Indicate if LOADING_ADDR_ is valid.
418 set<string> loaded_sections_; // Tracks the Loaded ELF sections
419 // between calls to LoadSymbols().
422 // Find the preferred loading address of the binary.
423 template<typename ElfClass>
424 typename ElfClass::Addr GetLoadingAddress(
425 const typename ElfClass::Phdr* program_headers,
426 int nheader) {
427 typedef typename ElfClass::Phdr Phdr;
429 // For non-PIC executables (e_type == ET_EXEC), the load address is
430 // the start address of the first PT_LOAD segment. (ELF requires
431 // the segments to be sorted by load address.) For PIC executables
432 // and dynamic libraries (e_type == ET_DYN), this address will
433 // normally be zero.
434 for (int i = 0; i < nheader; ++i) {
435 const Phdr& header = program_headers[i];
436 if (header.p_type == PT_LOAD)
437 return header.p_vaddr;
439 return 0;
442 template<typename ElfClass>
443 bool LoadSymbols(const string& obj_file,
444 const bool big_endian,
445 const typename ElfClass::Ehdr* elf_header,
446 const bool read_gnu_debug_link,
447 LoadSymbolsInfo<ElfClass>* info,
448 SecMap* smap,
449 void* rx_avma, size_t rx_size,
450 void (*log)(const char*)) {
451 typedef typename ElfClass::Phdr Phdr;
452 typedef typename ElfClass::Shdr Shdr;
454 char buf[500];
455 snprintf(buf, sizeof(buf), "LoadSymbols: BEGIN %s\n", obj_file.c_str());
456 buf[sizeof(buf)-1] = 0;
457 log(buf);
459 // This is how the text bias is calculated.
460 // BEGIN CALCULATE BIAS
461 uintptr_t loading_addr = GetLoadingAddress<ElfClass>(
462 GetOffset<ElfClass, Phdr>(elf_header, elf_header->e_phoff),
463 elf_header->e_phnum);
464 uintptr_t text_bias = ((uintptr_t)rx_avma) - loading_addr;
465 snprintf(buf, sizeof(buf),
466 "LoadSymbols: rx_avma=%llx, text_bias=%llx",
467 (unsigned long long int)(uintptr_t)rx_avma,
468 (unsigned long long int)text_bias);
469 buf[sizeof(buf)-1] = 0;
470 log(buf);
471 // END CALCULATE BIAS
473 const Shdr* sections =
474 GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff);
475 const Shdr* section_names = sections + elf_header->e_shstrndx;
476 const char* names =
477 GetOffset<ElfClass, char>(elf_header, section_names->sh_offset);
478 const char *names_end = names + section_names->sh_size;
479 bool found_usable_info = false;
481 // Dwarf Call Frame Information (CFI) is actually independent from
482 // the other DWARF debugging information, and can be used alone.
483 const Shdr* dwarf_cfi_section =
484 FindElfSectionByName<ElfClass>(".debug_frame", SHT_PROGBITS,
485 sections, names, names_end,
486 elf_header->e_shnum);
487 if (dwarf_cfi_section) {
488 // Ignore the return value of this function; even without call frame
489 // information, the other debugging information could be perfectly
490 // useful.
491 info->LoadedSection(".debug_frame");
492 bool result =
493 LoadDwarfCFI<ElfClass>(obj_file, elf_header, ".debug_frame",
494 dwarf_cfi_section, false, 0, 0, big_endian,
495 smap, text_bias, log);
496 found_usable_info = found_usable_info || result;
497 if (result)
498 log("LoadSymbols: read CFI from .debug_frame");
501 // Linux C++ exception handling information can also provide
502 // unwinding data.
503 const Shdr* eh_frame_section =
504 FindElfSectionByName<ElfClass>(".eh_frame", SHT_PROGBITS,
505 sections, names, names_end,
506 elf_header->e_shnum);
507 if (eh_frame_section) {
508 // Pointers in .eh_frame data may be relative to the base addresses of
509 // certain sections. Provide those sections if present.
510 const Shdr* got_section =
511 FindElfSectionByName<ElfClass>(".got", SHT_PROGBITS,
512 sections, names, names_end,
513 elf_header->e_shnum);
514 const Shdr* text_section =
515 FindElfSectionByName<ElfClass>(".text", SHT_PROGBITS,
516 sections, names, names_end,
517 elf_header->e_shnum);
518 info->LoadedSection(".eh_frame");
519 // As above, ignore the return value of this function.
520 bool result =
521 LoadDwarfCFI<ElfClass>(obj_file, elf_header, ".eh_frame",
522 eh_frame_section, true,
523 got_section, text_section, big_endian,
524 smap, text_bias, log);
525 found_usable_info = found_usable_info || result;
526 if (result)
527 log("LoadSymbols: read CFI from .eh_frame");
530 # if defined(LUL_PLAT_arm_android)
531 // ARM has special unwind tables that can be used. .exidx is
532 // always required, and .extab is normally required, but may
533 // be omitted if it is empty. See comments on LoadARMexidx()
534 // for more details.
535 const Shdr* arm_exidx_section =
536 FindElfSectionByName<ElfClass>(".ARM.exidx", SHT_ARM_EXIDX,
537 sections, names, names_end,
538 elf_header->e_shnum);
539 const Shdr* arm_extab_section =
540 FindElfSectionByName<ElfClass>(".ARM.extab", SHT_PROGBITS,
541 sections, names, names_end,
542 elf_header->e_shnum);
543 const Shdr* debug_info_section =
544 FindElfSectionByName<ElfClass>(".debug_info", SHT_PROGBITS,
545 sections, names, names_end,
546 elf_header->e_shnum);
547 // Only load information from this section if there isn't a .debug_info
548 // section.
549 if (!debug_info_section && arm_exidx_section) {
550 info->LoadedSection(".ARM.exidx");
551 if (arm_extab_section)
552 info->LoadedSection(".ARM.extab");
553 bool result = LoadARMexidx<ElfClass>(elf_header,
554 arm_exidx_section, arm_extab_section,
555 text_bias,
556 reinterpret_cast<uintptr_t>(rx_avma),
557 rx_size, smap, log);
558 found_usable_info = found_usable_info || result;
559 if (result)
560 log("LoadSymbols: read EXIDX from .ARM.{exidx,extab}");
562 # endif /* defined(LUL_PLAT_arm_android) */
564 snprintf(buf, sizeof(buf), "LoadSymbols: END %s\n", obj_file.c_str());
565 buf[sizeof(buf)-1] = 0;
566 log(buf);
568 return found_usable_info;
571 // Return the breakpad symbol file identifier for the architecture of
572 // ELF_HEADER.
573 template<typename ElfClass>
574 const char* ElfArchitecture(const typename ElfClass::Ehdr* elf_header) {
575 typedef typename ElfClass::Half Half;
576 Half arch = elf_header->e_machine;
577 switch (arch) {
578 case EM_386: return "x86";
579 case EM_ARM: return "arm";
580 case EM_MIPS: return "mips";
581 case EM_PPC64: return "ppc64";
582 case EM_PPC: return "ppc";
583 case EM_S390: return "s390";
584 case EM_SPARC: return "sparc";
585 case EM_SPARCV9: return "sparcv9";
586 case EM_X86_64: return "x86_64";
587 default: return NULL;
591 // Format the Elf file identifier in IDENTIFIER as a UUID with the
592 // dashes removed.
593 string FormatIdentifier(unsigned char identifier[16]) {
594 char identifier_str[40];
595 lul::FileID::ConvertIdentifierToString(
596 identifier,
597 identifier_str,
598 sizeof(identifier_str));
599 string id_no_dash;
600 for (int i = 0; identifier_str[i] != '\0'; ++i)
601 if (identifier_str[i] != '-')
602 id_no_dash += identifier_str[i];
603 // Add an extra "0" by the end. PDB files on Windows have an 'age'
604 // number appended to the end of the file identifier; this isn't
605 // really used or necessary on other platforms, but be consistent.
606 id_no_dash += '0';
607 return id_no_dash;
610 // Return the non-directory portion of FILENAME: the portion after the
611 // last slash, or the whole filename if there are no slashes.
612 string BaseFileName(const string &filename) {
613 // Lots of copies! basename's behavior is less than ideal.
614 char *c_filename = strdup(filename.c_str());
615 string base = basename(c_filename);
616 free(c_filename);
617 return base;
620 template<typename ElfClass>
621 bool ReadSymbolDataElfClass(const typename ElfClass::Ehdr* elf_header,
622 const string& obj_filename,
623 const vector<string>& debug_dirs,
624 SecMap* smap, void* rx_avma, size_t rx_size,
625 void (*log)(const char*)) {
626 typedef typename ElfClass::Ehdr Ehdr;
628 unsigned char identifier[16];
629 if (!lul
630 ::FileID::ElfFileIdentifierFromMappedFile(elf_header, identifier)) {
631 fprintf(stderr, "%s: unable to generate file identifier\n",
632 obj_filename.c_str());
633 return false;
636 const char *architecture = ElfArchitecture<ElfClass>(elf_header);
637 if (!architecture) {
638 fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n",
639 obj_filename.c_str(), elf_header->e_machine);
640 return false;
643 // Figure out what endianness this file is.
644 bool big_endian;
645 if (!ElfEndianness<ElfClass>(elf_header, &big_endian))
646 return false;
648 string name = BaseFileName(obj_filename);
649 string os = "Linux";
650 string id = FormatIdentifier(identifier);
652 LoadSymbolsInfo<ElfClass> info(debug_dirs);
653 if (!LoadSymbols<ElfClass>(obj_filename, big_endian, elf_header,
654 !debug_dirs.empty(), &info,
655 smap, rx_avma, rx_size, log)) {
656 const string debuglink_file = info.debuglink_file();
657 if (debuglink_file.empty())
658 return false;
660 // Load debuglink ELF file.
661 fprintf(stderr, "Found debugging info in %s\n", debuglink_file.c_str());
662 MmapWrapper debug_map_wrapper;
663 Ehdr* debug_elf_header = NULL;
664 if (!LoadELF(debuglink_file, &debug_map_wrapper,
665 reinterpret_cast<void**>(&debug_elf_header)))
666 return false;
667 // Sanity checks to make sure everything matches up.
668 const char *debug_architecture =
669 ElfArchitecture<ElfClass>(debug_elf_header);
670 if (!debug_architecture) {
671 fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n",
672 debuglink_file.c_str(), debug_elf_header->e_machine);
673 return false;
675 if (strcmp(architecture, debug_architecture)) {
676 fprintf(stderr, "%s with ELF machine architecture %s does not match "
677 "%s with ELF architecture %s\n",
678 debuglink_file.c_str(), debug_architecture,
679 obj_filename.c_str(), architecture);
680 return false;
683 bool debug_big_endian;
684 if (!ElfEndianness<ElfClass>(debug_elf_header, &debug_big_endian))
685 return false;
686 if (debug_big_endian != big_endian) {
687 fprintf(stderr, "%s and %s does not match in endianness\n",
688 obj_filename.c_str(), debuglink_file.c_str());
689 return false;
692 if (!LoadSymbols<ElfClass>(debuglink_file, debug_big_endian,
693 debug_elf_header, false, &info,
694 smap, rx_avma, rx_size, log)) {
695 return false;
699 return true;
702 } // namespace (anon)
705 namespace lul {
707 bool ReadSymbolDataInternal(const uint8_t* obj_file,
708 const string& obj_filename,
709 const vector<string>& debug_dirs,
710 SecMap* smap, void* rx_avma, size_t rx_size,
711 void (*log)(const char*)) {
713 if (!IsValidElf(obj_file)) {
714 fprintf(stderr, "Not a valid ELF file: %s\n", obj_filename.c_str());
715 return false;
718 int elfclass = ElfClass(obj_file);
719 if (elfclass == ELFCLASS32) {
720 return ReadSymbolDataElfClass<ElfClass32>(
721 reinterpret_cast<const Elf32_Ehdr*>(obj_file),
722 obj_filename, debug_dirs, smap, rx_avma, rx_size, log);
724 if (elfclass == ELFCLASS64) {
725 return ReadSymbolDataElfClass<ElfClass64>(
726 reinterpret_cast<const Elf64_Ehdr*>(obj_file),
727 obj_filename, debug_dirs, smap, rx_avma, rx_size, log);
730 return false;
733 bool ReadSymbolData(const string& obj_file,
734 const vector<string>& debug_dirs,
735 SecMap* smap, void* rx_avma, size_t rx_size,
736 void (*log)(const char*)) {
737 MmapWrapper map_wrapper;
738 void* elf_header = NULL;
739 if (!LoadELF(obj_file, &map_wrapper, &elf_header))
740 return false;
742 return ReadSymbolDataInternal(reinterpret_cast<uint8_t*>(elf_header),
743 obj_file, debug_dirs,
744 smap, rx_avma, rx_size, log);
748 namespace {
750 template<typename ElfClass>
751 void FindElfClassSection(const char *elf_base,
752 const char *section_name,
753 typename ElfClass::Word section_type,
754 const void **section_start,
755 int *section_size) {
756 typedef typename ElfClass::Ehdr Ehdr;
757 typedef typename ElfClass::Shdr Shdr;
759 MOZ_ASSERT(elf_base);
760 MOZ_ASSERT(section_start);
761 MOZ_ASSERT(section_size);
763 MOZ_ASSERT(strncmp(elf_base, ELFMAG, SELFMAG) == 0);
765 const Ehdr* elf_header = reinterpret_cast<const Ehdr*>(elf_base);
766 MOZ_ASSERT(elf_header->e_ident[EI_CLASS] == ElfClass::kClass);
768 const Shdr* sections =
769 GetOffset<ElfClass,Shdr>(elf_header, elf_header->e_shoff);
770 const Shdr* section_names = sections + elf_header->e_shstrndx;
771 const char* names =
772 GetOffset<ElfClass,char>(elf_header, section_names->sh_offset);
773 const char *names_end = names + section_names->sh_size;
775 const Shdr* section =
776 FindElfSectionByName<ElfClass>(section_name, section_type,
777 sections, names, names_end,
778 elf_header->e_shnum);
780 if (section != NULL && section->sh_size > 0) {
781 *section_start = elf_base + section->sh_offset;
782 *section_size = section->sh_size;
786 template<typename ElfClass>
787 void FindElfClassSegment(const char *elf_base,
788 typename ElfClass::Word segment_type,
789 const void **segment_start,
790 int *segment_size) {
791 typedef typename ElfClass::Ehdr Ehdr;
792 typedef typename ElfClass::Phdr Phdr;
794 MOZ_ASSERT(elf_base);
795 MOZ_ASSERT(segment_start);
796 MOZ_ASSERT(segment_size);
798 MOZ_ASSERT(strncmp(elf_base, ELFMAG, SELFMAG) == 0);
800 const Ehdr* elf_header = reinterpret_cast<const Ehdr*>(elf_base);
801 MOZ_ASSERT(elf_header->e_ident[EI_CLASS] == ElfClass::kClass);
803 const Phdr* phdrs =
804 GetOffset<ElfClass,Phdr>(elf_header, elf_header->e_phoff);
806 for (int i = 0; i < elf_header->e_phnum; ++i) {
807 if (phdrs[i].p_type == segment_type) {
808 *segment_start = elf_base + phdrs[i].p_offset;
809 *segment_size = phdrs[i].p_filesz;
810 return;
815 } // namespace (anon)
817 bool IsValidElf(const void* elf_base) {
818 return strncmp(reinterpret_cast<const char*>(elf_base),
819 ELFMAG, SELFMAG) == 0;
822 int ElfClass(const void* elf_base) {
823 const ElfW(Ehdr)* elf_header =
824 reinterpret_cast<const ElfW(Ehdr)*>(elf_base);
826 return elf_header->e_ident[EI_CLASS];
829 bool FindElfSection(const void *elf_mapped_base,
830 const char *section_name,
831 uint32_t section_type,
832 const void **section_start,
833 int *section_size,
834 int *elfclass) {
835 MOZ_ASSERT(elf_mapped_base);
836 MOZ_ASSERT(section_start);
837 MOZ_ASSERT(section_size);
839 *section_start = NULL;
840 *section_size = 0;
842 if (!IsValidElf(elf_mapped_base))
843 return false;
845 int cls = ElfClass(elf_mapped_base);
846 if (elfclass) {
847 *elfclass = cls;
850 const char* elf_base =
851 static_cast<const char*>(elf_mapped_base);
853 if (cls == ELFCLASS32) {
854 FindElfClassSection<ElfClass32>(elf_base, section_name, section_type,
855 section_start, section_size);
856 return *section_start != NULL;
857 } else if (cls == ELFCLASS64) {
858 FindElfClassSection<ElfClass64>(elf_base, section_name, section_type,
859 section_start, section_size);
860 return *section_start != NULL;
863 return false;
866 bool FindElfSegment(const void *elf_mapped_base,
867 uint32_t segment_type,
868 const void **segment_start,
869 int *segment_size,
870 int *elfclass) {
871 MOZ_ASSERT(elf_mapped_base);
872 MOZ_ASSERT(segment_start);
873 MOZ_ASSERT(segment_size);
875 *segment_start = NULL;
876 *segment_size = 0;
878 if (!IsValidElf(elf_mapped_base))
879 return false;
881 int cls = ElfClass(elf_mapped_base);
882 if (elfclass) {
883 *elfclass = cls;
886 const char* elf_base =
887 static_cast<const char*>(elf_mapped_base);
889 if (cls == ELFCLASS32) {
890 FindElfClassSegment<ElfClass32>(elf_base, segment_type,
891 segment_start, segment_size);
892 return *segment_start != NULL;
893 } else if (cls == ELFCLASS64) {
894 FindElfClassSegment<ElfClass64>(elf_base, segment_type,
895 segment_start, segment_size);
896 return *segment_start != NULL;
899 return false;
903 // (derived from)
904 // file_id.cc: Return a unique identifier for a file
906 // See file_id.h for documentation
909 // ELF note name and desc are 32-bits word padded.
910 #define NOTE_PADDING(a) ((a + 3) & ~3)
912 // These functions are also used inside the crashed process, so be safe
913 // and use the syscall/libc wrappers instead of direct syscalls or libc.
915 template<typename ElfClass>
916 static bool ElfClassBuildIDNoteIdentifier(const void *section, int length,
917 uint8_t identifier[kMDGUIDSize]) {
918 typedef typename ElfClass::Nhdr Nhdr;
920 const void* section_end = reinterpret_cast<const char*>(section) + length;
921 const Nhdr* note_header = reinterpret_cast<const Nhdr*>(section);
922 while (reinterpret_cast<const void *>(note_header) < section_end) {
923 if (note_header->n_type == NT_GNU_BUILD_ID)
924 break;
925 note_header = reinterpret_cast<const Nhdr*>(
926 reinterpret_cast<const char*>(note_header) + sizeof(Nhdr) +
927 NOTE_PADDING(note_header->n_namesz) +
928 NOTE_PADDING(note_header->n_descsz));
930 if (reinterpret_cast<const void *>(note_header) >= section_end ||
931 note_header->n_descsz == 0) {
932 return false;
935 const char* build_id = reinterpret_cast<const char*>(note_header) +
936 sizeof(Nhdr) + NOTE_PADDING(note_header->n_namesz);
937 // Copy as many bits of the build ID as will fit
938 // into the GUID space.
939 memset(identifier, 0, kMDGUIDSize);
940 memcpy(identifier, build_id,
941 std::min(kMDGUIDSize, (size_t)note_header->n_descsz));
943 return true;
946 // Attempt to locate a .note.gnu.build-id section in an ELF binary
947 // and copy as many bytes of it as will fit into |identifier|.
948 static bool FindElfBuildIDNote(const void *elf_mapped_base,
949 uint8_t identifier[kMDGUIDSize]) {
950 void* note_section;
951 int note_size, elfclass;
952 if ((!FindElfSegment(elf_mapped_base, PT_NOTE,
953 (const void**)&note_section, &note_size, &elfclass) ||
954 note_size == 0) &&
955 (!FindElfSection(elf_mapped_base, ".note.gnu.build-id", SHT_NOTE,
956 (const void**)&note_section, &note_size, &elfclass) ||
957 note_size == 0)) {
958 return false;
961 if (elfclass == ELFCLASS32) {
962 return ElfClassBuildIDNoteIdentifier<ElfClass32>(note_section, note_size,
963 identifier);
964 } else if (elfclass == ELFCLASS64) {
965 return ElfClassBuildIDNoteIdentifier<ElfClass64>(note_section, note_size,
966 identifier);
969 return false;
972 // Attempt to locate the .text section of an ELF binary and generate
973 // a simple hash by XORing the first page worth of bytes into |identifier|.
974 static bool HashElfTextSection(const void *elf_mapped_base,
975 uint8_t identifier[kMDGUIDSize]) {
976 void* text_section;
977 int text_size;
978 if (!FindElfSection(elf_mapped_base, ".text", SHT_PROGBITS,
979 (const void**)&text_section, &text_size, NULL) ||
980 text_size == 0) {
981 return false;
984 memset(identifier, 0, kMDGUIDSize);
985 const uint8_t* ptr = reinterpret_cast<const uint8_t*>(text_section);
986 const uint8_t* ptr_end = ptr + std::min(text_size, 4096);
987 while (ptr < ptr_end) {
988 for (unsigned i = 0; i < kMDGUIDSize; i++)
989 identifier[i] ^= ptr[i];
990 ptr += kMDGUIDSize;
992 return true;
995 // static
996 bool FileID::ElfFileIdentifierFromMappedFile(const void* base,
997 uint8_t identifier[kMDGUIDSize]) {
998 // Look for a build id note first.
999 if (FindElfBuildIDNote(base, identifier))
1000 return true;
1002 // Fall back on hashing the first page of the text section.
1003 return HashElfTextSection(base, identifier);
1006 // static
1007 void FileID::ConvertIdentifierToString(const uint8_t identifier[kMDGUIDSize],
1008 char* buffer, int buffer_length) {
1009 uint8_t identifier_swapped[kMDGUIDSize];
1011 // Endian-ness swap to match dump processor expectation.
1012 memcpy(identifier_swapped, identifier, kMDGUIDSize);
1013 uint32_t* data1 = reinterpret_cast<uint32_t*>(identifier_swapped);
1014 *data1 = htonl(*data1);
1015 uint16_t* data2 = reinterpret_cast<uint16_t*>(identifier_swapped + 4);
1016 *data2 = htons(*data2);
1017 uint16_t* data3 = reinterpret_cast<uint16_t*>(identifier_swapped + 6);
1018 *data3 = htons(*data3);
1020 int buffer_idx = 0;
1021 for (unsigned int idx = 0;
1022 (buffer_idx < buffer_length) && (idx < kMDGUIDSize);
1023 ++idx) {
1024 int hi = (identifier_swapped[idx] >> 4) & 0x0F;
1025 int lo = (identifier_swapped[idx]) & 0x0F;
1027 if (idx == 4 || idx == 6 || idx == 8 || idx == 10)
1028 buffer[buffer_idx++] = '-';
1030 buffer[buffer_idx++] = (hi >= 10) ? 'A' + hi - 10 : '0' + hi;
1031 buffer[buffer_idx++] = (lo >= 10) ? 'A' + lo - 10 : '0' + lo;
1034 // NULL terminate
1035 buffer[(buffer_idx < buffer_length) ? buffer_idx : buffer_idx - 1] = 0;
1038 } // namespace lul