Bumping manifests a=b2g-bump
[gecko.git] / tools / profiler / EHABIStackWalk.cpp
blob20f4663a74fc6f7e68151ffef8f42331f3a6fc27
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 /*
8 * This is an implementation of stack unwinding according to a subset
9 * of the ARM Exception Handling ABI, as described in:
10 * http://infocenter.arm.com/help/topic/com.arm.doc.ihi0038a/IHI0038A_ehabi.pdf
12 * This handles only the ARM-defined "personality routines" (chapter
13 * 9), and don't track the value of FP registers, because profiling
14 * needs only chain of PC/SP values.
16 * Because the exception handling info may not be accurate for all
17 * possible places where an async signal could occur (e.g., in a
18 * prologue or epilogue), this bounds-checks all stack accesses.
20 * This file uses "struct" for structures in the exception tables and
21 * "class" otherwise. We should avoid violating the C++11
22 * standard-layout rules in the former.
25 #include "EHABIStackWalk.h"
27 #include "shared-libraries.h"
28 #include "platform.h"
30 #include "mozilla/Atomics.h"
31 #include "mozilla/Attributes.h"
32 #include "mozilla/DebugOnly.h"
33 #include "mozilla/Endian.h"
35 #include <algorithm>
36 #include <elf.h>
37 #include <stdint.h>
38 #include <vector>
39 #include <string>
41 #ifndef PT_ARM_EXIDX
42 #define PT_ARM_EXIDX 0x70000001
43 #endif
45 // Bug 1082817: ICS B2G has a buggy linker that doesn't always ensure
46 // that the EXIDX is sorted by address, as the spec requires. So in
47 // that case we build and sort an array of pointers into the index,
48 // and binary-search that; otherwise, we search the index in place
49 // (avoiding the time and space overhead of the indirection).
50 #if defined(ANDROID_VERSION) && ANDROID_VERSION < 16
51 #define HAVE_UNSORTED_EXIDX
52 #endif
54 namespace mozilla {
56 struct PRel31 {
57 uint32_t mBits;
58 bool topBit() const { return mBits & 0x80000000; }
59 uint32_t value() const { return mBits & 0x7fffffff; }
60 int32_t offset() const { return (static_cast<int32_t>(mBits) << 1) >> 1; }
61 const void *compute() const {
62 return reinterpret_cast<const char *>(this) + offset();
64 private:
65 PRel31(const PRel31 &copied) = delete;
66 PRel31() = delete;
69 struct EHEntry {
70 PRel31 startPC;
71 PRel31 exidx;
72 private:
73 EHEntry(const EHEntry &copied) = delete;
74 EHEntry() = delete;
77 class EHState {
78 // Note that any core register can be used as a "frame pointer" to
79 // influence the unwinding process, so this must track all of them.
80 uint32_t mRegs[16];
81 public:
82 bool unwind(const EHEntry *aEntry, const void *stackBase);
83 uint32_t &operator[](int i) { return mRegs[i]; }
84 const uint32_t &operator[](int i) const { return mRegs[i]; }
85 EHState(const mcontext_t &);
88 enum {
89 R_SP = 13,
90 R_LR = 14,
91 R_PC = 15
94 #ifdef HAVE_UNSORTED_EXIDX
95 class EHEntryHandle {
96 const EHEntry *mValue;
97 public:
98 EHEntryHandle(const EHEntry *aEntry) : mValue(aEntry) { }
99 const EHEntry *value() const { return mValue; }
102 bool operator<(const EHEntryHandle &lhs, const EHEntryHandle &rhs) {
103 return lhs.value()->startPC.compute() < rhs.value()->startPC.compute();
105 #endif
107 class EHTable {
108 uint32_t mStartPC;
109 uint32_t mEndPC;
110 uint32_t mLoadOffset;
111 #ifdef HAVE_UNSORTED_EXIDX
112 // In principle we should be able to binary-search the index section in
113 // place, but the ICS toolchain's linker is noncompliant and produces
114 // indices that aren't entirely sorted (e.g., libc). So we have this:
115 std::vector<EHEntryHandle> mEntries;
116 typedef std::vector<EHEntryHandle>::const_iterator EntryIterator;
117 EntryIterator entriesBegin() const { return mEntries.begin(); }
118 EntryIterator entriesEnd() const { return mEntries.end(); }
119 static const EHEntry* entryGet(EntryIterator aEntry) {
120 return aEntry->value();
122 #else
123 typedef const EHEntry *EntryIterator;
124 EntryIterator mEntriesBegin, mEntriesEnd;
125 EntryIterator entriesBegin() const { return mEntriesBegin; }
126 EntryIterator entriesEnd() const { return mEntriesEnd; }
127 static const EHEntry* entryGet(EntryIterator aEntry) { return aEntry; }
128 #endif
129 std::string mName;
130 public:
131 EHTable(const void *aELF, size_t aSize, const std::string &aName);
132 const EHEntry *lookup(uint32_t aPC) const;
133 bool isValid() const { return entriesEnd() != entriesBegin(); }
134 const std::string &name() const { return mName; }
135 uint32_t startPC() const { return mStartPC; }
136 uint32_t endPC() const { return mEndPC; }
137 uint32_t loadOffset() const { return mLoadOffset; }
140 class EHAddrSpace {
141 std::vector<uint32_t> mStarts;
142 std::vector<EHTable> mTables;
143 static mozilla::Atomic<const EHAddrSpace*> sCurrent;
144 public:
145 explicit EHAddrSpace(const std::vector<EHTable>& aTables);
146 const EHTable *lookup(uint32_t aPC) const;
147 static void Update();
148 static const EHAddrSpace *Get();
152 void EHABIStackWalkInit()
154 EHAddrSpace::Update();
157 size_t EHABIStackWalk(const mcontext_t &aContext, void *stackBase,
158 void **aSPs, void **aPCs, const size_t aNumFrames)
160 const EHAddrSpace *space = EHAddrSpace::Get();
161 EHState state(aContext);
162 size_t count = 0;
164 while (count < aNumFrames) {
165 uint32_t pc = state[R_PC], sp = state[R_SP];
166 aPCs[count] = reinterpret_cast<void *>(pc);
167 aSPs[count] = reinterpret_cast<void *>(sp);
168 count++;
170 if (!space)
171 break;
172 // TODO: cache these lookups. Binary-searching libxul is
173 // expensive (possibly more expensive than doing the actual
174 // unwind), and even a small cache should help.
175 const EHTable *table = space->lookup(pc);
176 if (!table)
177 break;
178 const EHEntry *entry = table->lookup(pc);
179 if (!entry)
180 break;
181 if (!state.unwind(entry, stackBase))
182 break;
185 return count;
189 class EHInterp {
190 public:
191 // Note that stackLimit is exclusive and stackBase is inclusive
192 // (i.e, stackLimit < SP <= stackBase), following the convention
193 // set by the AAPCS spec.
194 EHInterp(EHState &aState, const EHEntry *aEntry,
195 uint32_t aStackLimit, uint32_t aStackBase)
196 : mState(aState),
197 mStackLimit(aStackLimit),
198 mStackBase(aStackBase),
199 mNextWord(0),
200 mWordsLeft(0),
201 mFailed(false)
203 const PRel31 &exidx = aEntry->exidx;
204 uint32_t firstWord;
206 if (exidx.mBits == 1) { // EXIDX_CANTUNWIND
207 mFailed = true;
208 return;
210 if (exidx.topBit()) {
211 firstWord = exidx.mBits;
212 } else {
213 mNextWord = reinterpret_cast<const uint32_t *>(exidx.compute());
214 firstWord = *mNextWord++;
217 switch (firstWord >> 24) {
218 case 0x80: // short
219 mWord = firstWord << 8;
220 mBytesLeft = 3;
221 break;
222 case 0x81: case 0x82: // long; catch descriptor size ignored
223 mWord = firstWord << 16;
224 mBytesLeft = 2;
225 mWordsLeft = (firstWord >> 16) & 0xff;
226 break;
227 default:
228 // unknown personality
229 mFailed = true;
233 bool unwind();
235 private:
236 // TODO: GCC has been observed not CSEing repeated reads of
237 // mState[R_SP] with writes to mFailed between them, suggesting that
238 // it hasn't determined that they can't alias and is thus missing
239 // optimization opportunities. So, we may want to flatten EHState
240 // into this class; this may also make the code simpler.
241 EHState &mState;
242 uint32_t mStackLimit;
243 uint32_t mStackBase;
244 const uint32_t *mNextWord;
245 uint32_t mWord;
246 uint8_t mWordsLeft;
247 uint8_t mBytesLeft;
248 bool mFailed;
250 enum {
251 I_ADDSP = 0x00, // 0sxxxxxx (subtract if s)
252 M_ADDSP = 0x80,
253 I_POPMASK = 0x80, // 1000iiii iiiiiiii (if any i set)
254 M_POPMASK = 0xf0,
255 I_MOVSP = 0x90, // 1001nnnn
256 M_MOVSP = 0xf0,
257 I_POPN = 0xa0, // 1010lnnn
258 M_POPN = 0xf0,
259 I_FINISH = 0xb0, // 10110000
260 I_POPLO = 0xb1, // 10110001 0000iiii (if any i set)
261 I_ADDSPBIG = 0xb2, // 10110010 uleb128
262 I_POPFDX = 0xb3, // 10110011 sssscccc
263 I_POPFDX8 = 0xb8, // 10111nnn
264 M_POPFDX8 = 0xf8,
265 // "Intel Wireless MMX" extensions omitted.
266 I_POPFDD = 0xc8, // 1100100h sssscccc
267 M_POPFDD = 0xfe,
268 I_POPFDD8 = 0xd0, // 11010nnn
269 M_POPFDD8 = 0xf8
272 uint8_t next() {
273 if (mBytesLeft == 0) {
274 if (mWordsLeft == 0) {
275 return I_FINISH;
277 mWordsLeft--;
278 mWord = *mNextWord++;
279 mBytesLeft = 4;
281 mBytesLeft--;
282 mWord = (mWord << 8) | (mWord >> 24); // rotate
283 return mWord;
286 uint32_t &vSP() { return mState[R_SP]; }
287 uint32_t *ptrSP() { return reinterpret_cast<uint32_t *>(vSP()); }
289 void checkStackBase() { if (vSP() > mStackBase) mFailed = true; }
290 void checkStackLimit() { if (vSP() <= mStackLimit) mFailed = true; }
291 void checkStackAlign() { if ((vSP() & 3) != 0) mFailed = true; }
292 void checkStack() {
293 checkStackBase();
294 checkStackLimit();
295 checkStackAlign();
298 void popRange(uint8_t first, uint8_t last, uint16_t mask) {
299 bool hasSP = false;
300 uint32_t tmpSP;
301 if (mask == 0)
302 mFailed = true;
303 for (uint8_t r = first; r <= last; ++r) {
304 if (mask & 1) {
305 if (r == R_SP) {
306 hasSP = true;
307 tmpSP = *ptrSP();
308 } else
309 mState[r] = *ptrSP();
310 vSP() += 4;
311 checkStackBase();
312 if (mFailed)
313 return;
315 mask >>= 1;
317 if (hasSP) {
318 vSP() = tmpSP;
319 checkStack();
325 bool EHState::unwind(const EHEntry *aEntry, const void *stackBasePtr) {
326 // The unwinding program cannot set SP to less than the initial value.
327 uint32_t stackLimit = mRegs[R_SP] - 4;
328 uint32_t stackBase = reinterpret_cast<uint32_t>(stackBasePtr);
329 EHInterp interp(*this, aEntry, stackLimit, stackBase);
330 return interp.unwind();
333 bool EHInterp::unwind() {
334 mState[R_PC] = 0;
335 checkStack();
336 while (!mFailed) {
337 uint8_t insn = next();
338 #if DEBUG_EHABI_UNWIND
339 LOGF("unwind insn = %02x", (unsigned)insn);
340 #endif
341 // Try to put the common cases first.
343 // 00xxxxxx: vsp = vsp + (xxxxxx << 2) + 4
344 // 01xxxxxx: vsp = vsp - (xxxxxx << 2) - 4
345 if ((insn & M_ADDSP) == I_ADDSP) {
346 uint32_t offset = ((insn & 0x3f) << 2) + 4;
347 if (insn & 0x40) {
348 vSP() -= offset;
349 checkStackLimit();
350 } else {
351 vSP() += offset;
352 checkStackBase();
354 continue;
357 // 10100nnn: Pop r4-r[4+nnn]
358 // 10101nnn: Pop r4-r[4+nnn], r14
359 if ((insn & M_POPN) == I_POPN) {
360 uint8_t n = (insn & 0x07) + 1;
361 bool lr = insn & 0x08;
362 uint32_t *ptr = ptrSP();
363 vSP() += (n + (lr ? 1 : 0)) * 4;
364 checkStackBase();
365 for (uint8_t r = 4; r < 4 + n; ++r)
366 mState[r] = *ptr++;
367 if (lr)
368 mState[R_LR] = *ptr++;
369 continue;
372 // 1011000: Finish
373 if (insn == I_FINISH) {
374 if (mState[R_PC] == 0) {
375 mState[R_PC] = mState[R_LR];
376 // Non-standard change (bug 916106): Prevent the caller from
377 // re-using LR. Since the caller is by definition not a leaf
378 // routine, it will have to restore LR from somewhere to
379 // return to its own caller, so we can safely zero it here.
380 // This makes a difference only if an error in unwinding
381 // (e.g., caused by starting from within a prologue/epilogue)
382 // causes us to load a pointer to a leaf routine as LR; if we
383 // don't do something, we'll go into an infinite loop of
384 // "returning" to that same function.
385 mState[R_LR] = 0;
387 return true;
390 // 1001nnnn: Set vsp = r[nnnn]
391 if ((insn & M_MOVSP) == I_MOVSP) {
392 vSP() = mState[insn & 0x0f];
393 checkStack();
394 continue;
397 // 11001000 sssscccc: Pop VFP regs D[16+ssss]-D[16+ssss+cccc] (as FLDMFDD)
398 // 11001001 sssscccc: Pop VFP regs D[ssss]-D[ssss+cccc] (as FLDMFDD)
399 if ((insn & M_POPFDD) == I_POPFDD) {
400 uint8_t n = (next() & 0x0f) + 1;
401 // Note: if the 16+ssss+cccc > 31, the encoding is reserved.
402 // As the space is currently unused, we don't try to check.
403 vSP() += 8 * n;
404 checkStackBase();
405 continue;
408 // 11010nnn: Pop VFP regs D[8]-D[8+nnn] (as FLDMFDD)
409 if ((insn & M_POPFDD8) == I_POPFDD8) {
410 uint8_t n = (insn & 0x07) + 1;
411 vSP() += 8 * n;
412 checkStackBase();
413 continue;
416 // 10110010 uleb128: vsp = vsp + 0x204 + (uleb128 << 2)
417 if (insn == I_ADDSPBIG) {
418 uint32_t acc = 0;
419 uint8_t shift = 0;
420 uint8_t byte;
421 do {
422 if (shift >= 32)
423 return false;
424 byte = next();
425 acc |= (byte & 0x7f) << shift;
426 shift += 7;
427 } while (byte & 0x80);
428 uint32_t offset = 0x204 + (acc << 2);
429 // The calculations above could have overflowed.
430 // But the one we care about is this:
431 if (vSP() + offset < vSP())
432 mFailed = true;
433 vSP() += offset;
434 // ...so that this is the only other check needed:
435 checkStackBase();
436 continue;
439 // 1000iiii iiiiiiii (i not all 0): Pop under masks {r15-r12}, {r11-r4}
440 if ((insn & M_POPMASK) == I_POPMASK) {
441 popRange(4, 15, ((insn & 0x0f) << 8) | next());
442 continue;
445 // 1011001 0000iiii (i not all 0): Pop under mask {r3-r0}
446 if (insn == I_POPLO) {
447 popRange(0, 3, next() & 0x0f);
448 continue;
451 // 10110011 sssscccc: Pop VFP regs D[ssss]-D[ssss+cccc] (as FLDMFDX)
452 if (insn == I_POPFDX) {
453 uint8_t n = (next() & 0x0f) + 1;
454 vSP() += 8 * n + 4;
455 checkStackBase();
456 continue;
459 // 10111nnn: Pop VFP regs D[8]-D[8+nnn] (as FLDMFDX)
460 if ((insn & M_POPFDX8) == I_POPFDX8) {
461 uint8_t n = (insn & 0x07) + 1;
462 vSP() += 8 * n + 4;
463 checkStackBase();
464 continue;
467 // unhandled instruction
468 #ifdef DEBUG_EHABI_UNWIND
469 LOGF("Unhandled EHABI instruction 0x%02x", insn);
470 #endif
471 mFailed = true;
473 return false;
477 bool operator<(const EHTable &lhs, const EHTable &rhs) {
478 return lhs.startPC() < rhs.endPC();
481 // Async signal unsafe.
482 EHAddrSpace::EHAddrSpace(const std::vector<EHTable>& aTables)
483 : mTables(aTables)
485 std::sort(mTables.begin(), mTables.end());
486 DebugOnly<uint32_t> lastEnd = 0;
487 for (std::vector<EHTable>::iterator i = mTables.begin();
488 i != mTables.end(); ++i) {
489 MOZ_ASSERT(i->startPC() >= lastEnd);
490 mStarts.push_back(i->startPC());
491 lastEnd = i->endPC();
495 const EHTable *EHAddrSpace::lookup(uint32_t aPC) const {
496 ptrdiff_t i = (std::upper_bound(mStarts.begin(), mStarts.end(), aPC)
497 - mStarts.begin()) - 1;
499 if (i < 0 || aPC >= mTables[i].endPC())
500 return 0;
501 return &mTables[i];
505 const EHEntry *EHTable::lookup(uint32_t aPC) const {
506 MOZ_ASSERT(aPC >= mStartPC);
507 if (aPC >= mEndPC)
508 return nullptr;
510 EntryIterator begin = entriesBegin();
511 EntryIterator end = entriesEnd();
512 MOZ_ASSERT(begin < end);
513 if (aPC < reinterpret_cast<uint32_t>(entryGet(begin)->startPC.compute()))
514 return nullptr;
516 while (end - begin > 1) {
517 #ifdef EHABI_UNWIND_MORE_ASSERTS
518 if (entryGet(end - 1)->startPC.compute()
519 < entryGet(begin)->startPC.compute()) {
520 MOZ_CRASH("unsorted exidx");
522 #endif
523 EntryIterator mid = begin + (end - begin) / 2;
524 if (aPC < reinterpret_cast<uint32_t>(entryGet(mid)->startPC.compute()))
525 end = mid;
526 else
527 begin = mid;
529 return entryGet(begin);
533 #if MOZ_LITTLE_ENDIAN
534 static const unsigned char hostEndian = ELFDATA2LSB;
535 #elif MOZ_BIG_ENDIAN
536 static const unsigned char hostEndian = ELFDATA2MSB;
537 #else
538 #error "No endian?"
539 #endif
541 // Async signal unsafe: std::vector::reserve, std::string copy ctor.
542 EHTable::EHTable(const void *aELF, size_t aSize, const std::string &aName)
543 : mStartPC(~0), // largest uint32_t
544 mEndPC(0),
545 #ifndef HAVE_UNSORTED_EXIDX
546 mEntriesBegin(nullptr),
547 mEntriesEnd(nullptr),
548 #endif
549 mName(aName)
551 const uint32_t base = reinterpret_cast<uint32_t>(aELF);
553 if (aSize < sizeof(Elf32_Ehdr))
554 return;
556 const Elf32_Ehdr &file = *(reinterpret_cast<Elf32_Ehdr *>(base));
557 if (memcmp(&file.e_ident[EI_MAG0], ELFMAG, SELFMAG) != 0 ||
558 file.e_ident[EI_CLASS] != ELFCLASS32 ||
559 file.e_ident[EI_DATA] != hostEndian ||
560 file.e_ident[EI_VERSION] != EV_CURRENT ||
561 file.e_ident[EI_OSABI] != ELFOSABI_SYSV ||
562 #ifdef EI_ABIVERSION
563 file.e_ident[EI_ABIVERSION] != 0 ||
564 #endif
565 file.e_machine != EM_ARM ||
566 file.e_version != EV_CURRENT)
567 // e_flags?
568 return;
570 MOZ_ASSERT(file.e_phoff + file.e_phnum * file.e_phentsize <= aSize);
571 const Elf32_Phdr *exidxHdr = 0, *zeroHdr = 0;
572 for (unsigned i = 0; i < file.e_phnum; ++i) {
573 const Elf32_Phdr &phdr =
574 *(reinterpret_cast<Elf32_Phdr *>(base + file.e_phoff
575 + i * file.e_phentsize));
576 if (phdr.p_type == PT_ARM_EXIDX) {
577 exidxHdr = &phdr;
578 } else if (phdr.p_type == PT_LOAD) {
579 if (phdr.p_offset == 0) {
580 zeroHdr = &phdr;
582 if (phdr.p_flags & PF_X) {
583 mStartPC = std::min(mStartPC, phdr.p_vaddr);
584 mEndPC = std::max(mEndPC, phdr.p_vaddr + phdr.p_memsz);
588 if (!exidxHdr)
589 return;
590 if (!zeroHdr)
591 return;
592 mLoadOffset = base - zeroHdr->p_vaddr;
593 mStartPC += mLoadOffset;
594 mEndPC += mLoadOffset;
596 // Create a sorted index of the index to work around linker bugs.
597 const EHEntry *startTable =
598 reinterpret_cast<const EHEntry *>(mLoadOffset + exidxHdr->p_vaddr);
599 const EHEntry *endTable =
600 reinterpret_cast<const EHEntry *>(mLoadOffset + exidxHdr->p_vaddr
601 + exidxHdr->p_memsz);
602 #ifdef HAVE_UNSORTED_EXIDX
603 mEntries.reserve(endTable - startTable);
604 for (const EHEntry *i = startTable; i < endTable; ++i)
605 mEntries.push_back(i);
606 std::sort(mEntries.begin(), mEntries.end());
607 #else
608 mEntriesBegin = startTable;
609 mEntriesEnd = endTable;
610 #endif
614 mozilla::Atomic<const EHAddrSpace*> EHAddrSpace::sCurrent(nullptr);
616 // Async signal safe; can fail if Update() hasn't returned yet.
617 const EHAddrSpace *EHAddrSpace::Get() {
618 return sCurrent;
621 // Collect unwinding information from loaded objects. Calls after the
622 // first have no effect. Async signal unsafe.
623 void EHAddrSpace::Update() {
624 const EHAddrSpace *space = sCurrent;
625 if (space)
626 return;
628 SharedLibraryInfo info = SharedLibraryInfo::GetInfoForSelf();
629 std::vector<EHTable> tables;
631 for (size_t i = 0; i < info.GetSize(); ++i) {
632 const SharedLibrary &lib = info.GetEntry(i);
633 if (lib.GetOffset() != 0)
634 // TODO: if it has a name, and we haven't seen a mapping of
635 // offset 0 for that file, try opening it and reading the
636 // headers instead. The only thing I've seen so far that's
637 // linked so as to need that treatment is the dynamic linker
638 // itself.
639 continue;
640 EHTable tab(reinterpret_cast<const void *>(lib.GetStart()),
641 lib.GetEnd() - lib.GetStart(), lib.GetName());
642 if (tab.isValid())
643 tables.push_back(tab);
645 space = new EHAddrSpace(tables);
647 if (!sCurrent.compareExchange(nullptr, space)) {
648 delete space;
649 space = sCurrent;
654 EHState::EHState(const mcontext_t &context) {
655 #ifdef linux
656 mRegs[0] = context.arm_r0;
657 mRegs[1] = context.arm_r1;
658 mRegs[2] = context.arm_r2;
659 mRegs[3] = context.arm_r3;
660 mRegs[4] = context.arm_r4;
661 mRegs[5] = context.arm_r5;
662 mRegs[6] = context.arm_r6;
663 mRegs[7] = context.arm_r7;
664 mRegs[8] = context.arm_r8;
665 mRegs[9] = context.arm_r9;
666 mRegs[10] = context.arm_r10;
667 mRegs[11] = context.arm_fp;
668 mRegs[12] = context.arm_ip;
669 mRegs[13] = context.arm_sp;
670 mRegs[14] = context.arm_lr;
671 mRegs[15] = context.arm_pc;
672 #else
673 # error "Unhandled OS for ARM EHABI unwinding"
674 #endif
677 } // namespace mozilla