Bumping gaia.json for 1 gaia revision(s) a=gaia-bump
[gecko.git] / tools / profiler / LulMain.cpp
blob6e6c1d235f10bfabde0b889f73b9ee0a11b0b2a5
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #include "LulMain.h"
9 #include <string.h>
10 #include <stdlib.h>
11 #include <stdio.h>
13 #include <algorithm> // std::sort
14 #include <string>
16 #include "mozilla/Assertions.h"
17 #include "mozilla/ArrayUtils.h"
18 #include "mozilla/MemoryChecking.h"
20 #include "LulCommonExt.h"
21 #include "LulElfExt.h"
23 #include "LulMainInt.h"
25 // Set this to 1 for verbose logging
26 #define DEBUG_MAIN 0
29 namespace lul {
31 using std::string;
32 using std::vector;
35 ////////////////////////////////////////////////////////////////
36 // AutoLulRWLocker //
37 ////////////////////////////////////////////////////////////////
39 // This is a simple RAII class that manages acquisition and release of
40 // LulRWLock reader-writer locks.
42 class AutoLulRWLocker {
43 public:
44 enum AcqMode { FOR_READING, FOR_WRITING };
45 AutoLulRWLocker(LulRWLock* aRWLock, AcqMode mode)
46 : mRWLock(aRWLock)
48 if (mode == FOR_WRITING) {
49 aRWLock->WrLock();
50 } else {
51 aRWLock->RdLock();
54 ~AutoLulRWLocker()
56 mRWLock->Unlock();
59 private:
60 LulRWLock* mRWLock;
64 ////////////////////////////////////////////////////////////////
65 // RuleSet //
66 ////////////////////////////////////////////////////////////////
68 static const char*
69 NameOf_DW_REG(int16_t aReg)
71 switch (aReg) {
72 case DW_REG_CFA: return "cfa";
73 #if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
74 case DW_REG_INTEL_XBP: return "xbp";
75 case DW_REG_INTEL_XSP: return "xsp";
76 case DW_REG_INTEL_XIP: return "xip";
77 #elif defined(LUL_ARCH_arm)
78 case DW_REG_ARM_R7: return "r7";
79 case DW_REG_ARM_R11: return "r11";
80 case DW_REG_ARM_R12: return "r12";
81 case DW_REG_ARM_R13: return "r13";
82 case DW_REG_ARM_R14: return "r14";
83 case DW_REG_ARM_R15: return "r15";
84 #else
85 # error "Unsupported arch"
86 #endif
87 default: return "???";
91 static string
92 ShowRule(const char* aNewReg, LExpr aExpr)
94 char buf[64];
95 string res = string(aNewReg) + "=";
96 switch (aExpr.mHow) {
97 case LExpr::UNKNOWN:
98 res += "Unknown";
99 break;
100 case LExpr::NODEREF:
101 sprintf(buf, "%s+%d", NameOf_DW_REG(aExpr.mReg), (int)aExpr.mOffset);
102 res += buf;
103 break;
104 case LExpr::DEREF:
105 sprintf(buf, "*(%s+%d)", NameOf_DW_REG(aExpr.mReg), (int)aExpr.mOffset);
106 res += buf;
107 break;
108 default:
109 res += "???";
110 break;
112 return res;
115 void
116 RuleSet::Print(void(*aLog)(const char*))
118 char buf[96];
119 sprintf(buf, "[%llx .. %llx]: let ",
120 (unsigned long long int)mAddr,
121 (unsigned long long int)(mAddr + mLen - 1));
122 string res = string(buf);
123 res += ShowRule("cfa", mCfaExpr);
124 res += " in";
125 // For each reg we care about, print the recovery expression.
126 #if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
127 res += ShowRule(" RA", mXipExpr);
128 res += ShowRule(" SP", mXspExpr);
129 res += ShowRule(" BP", mXbpExpr);
130 #elif defined(LUL_ARCH_arm)
131 res += ShowRule(" R15", mR15expr);
132 res += ShowRule(" R7", mR7expr);
133 res += ShowRule(" R11", mR11expr);
134 res += ShowRule(" R12", mR12expr);
135 res += ShowRule(" R13", mR13expr);
136 res += ShowRule(" R14", mR14expr);
137 #else
138 # error "Unsupported arch"
139 #endif
140 aLog(res.c_str());
143 LExpr*
144 RuleSet::ExprForRegno(DW_REG_NUMBER aRegno) {
145 switch (aRegno) {
146 case DW_REG_CFA: return &mCfaExpr;
147 # if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
148 case DW_REG_INTEL_XIP: return &mXipExpr;
149 case DW_REG_INTEL_XSP: return &mXspExpr;
150 case DW_REG_INTEL_XBP: return &mXbpExpr;
151 # elif defined(LUL_ARCH_arm)
152 case DW_REG_ARM_R15: return &mR15expr;
153 case DW_REG_ARM_R14: return &mR14expr;
154 case DW_REG_ARM_R13: return &mR13expr;
155 case DW_REG_ARM_R12: return &mR12expr;
156 case DW_REG_ARM_R11: return &mR11expr;
157 case DW_REG_ARM_R7: return &mR7expr;
158 # else
159 # error "Unknown arch"
160 # endif
161 default: return nullptr;
165 RuleSet::RuleSet()
167 mAddr = 0;
168 mLen = 0;
169 // The only other fields are of type LExpr and those are initialised
170 // by LExpr::LExpr().
174 ////////////////////////////////////////////////////////////////
175 // SecMap //
176 ////////////////////////////////////////////////////////////////
178 // See header file LulMainInt.h for comments about invariants.
180 SecMap::SecMap(void(*aLog)(const char*))
181 : mSummaryMinAddr(1)
182 , mSummaryMaxAddr(0)
183 , mUsable(true)
184 , mLog(aLog)
187 SecMap::~SecMap() {
188 mRuleSets.clear();
191 RuleSet*
192 SecMap::FindRuleSet(uintptr_t ia) {
193 // Binary search mRuleSets to find one that brackets |ia|.
194 // lo and hi need to be signed, else the loop termination tests
195 // don't work properly. Note that this works correctly even when
196 // mRuleSets.size() == 0.
198 // Can't do this until the array has been sorted and preened.
199 MOZ_ASSERT(mUsable);
201 long int lo = 0;
202 long int hi = (long int)mRuleSets.size() - 1;
203 while (true) {
204 // current unsearched space is from lo to hi, inclusive.
205 if (lo > hi) {
206 // not found
207 return nullptr;
209 long int mid = lo + ((hi - lo) / 2);
210 RuleSet* mid_ruleSet = &mRuleSets[mid];
211 uintptr_t mid_minAddr = mid_ruleSet->mAddr;
212 uintptr_t mid_maxAddr = mid_minAddr + mid_ruleSet->mLen - 1;
213 if (ia < mid_minAddr) { hi = mid-1; continue; }
214 if (ia > mid_maxAddr) { lo = mid+1; continue; }
215 MOZ_ASSERT(mid_minAddr <= ia && ia <= mid_maxAddr);
216 return mid_ruleSet;
218 // NOTREACHED
221 // Add a RuleSet to the collection. The rule is copied in. Calling
222 // this makes the map non-searchable.
223 void
224 SecMap::AddRuleSet(RuleSet* rs) {
225 mUsable = false;
226 mRuleSets.push_back(*rs);
230 static bool
231 CmpRuleSetsByAddrLE(const RuleSet& rs1, const RuleSet& rs2) {
232 return rs1.mAddr < rs2.mAddr;
235 // Prepare the map for searching. Completely remove any which don't
236 // fall inside the specified range [start, +len).
237 void
238 SecMap::PrepareRuleSets(uintptr_t aStart, size_t aLen)
240 if (mRuleSets.empty()) {
241 return;
244 MOZ_ASSERT(aLen > 0);
245 if (aLen == 0) {
246 // This should never happen.
247 mRuleSets.clear();
248 return;
251 // Sort by start addresses.
252 std::sort(mRuleSets.begin(), mRuleSets.end(), CmpRuleSetsByAddrLE);
254 // Detect any entry not completely contained within [start, +len).
255 // Set its length to zero, so that the next pass will remove it.
256 for (size_t i = 0; i < mRuleSets.size(); ++i) {
257 RuleSet* rs = &mRuleSets[i];
258 if (rs->mLen > 0 &&
259 (rs->mAddr < aStart || rs->mAddr + rs->mLen > aStart + aLen)) {
260 rs->mLen = 0;
264 // Iteratively truncate any overlaps and remove any zero length
265 // entries that might result, or that may have been present
266 // initially. Unless the input is seriously screwy, this is
267 // expected to iterate only once.
268 while (true) {
269 size_t i;
270 size_t n = mRuleSets.size();
271 size_t nZeroLen = 0;
273 if (n == 0) {
274 break;
277 for (i = 1; i < n; ++i) {
278 RuleSet* prev = &mRuleSets[i-1];
279 RuleSet* here = &mRuleSets[i];
280 MOZ_ASSERT(prev->mAddr <= here->mAddr);
281 if (prev->mAddr + prev->mLen > here->mAddr) {
282 prev->mLen = here->mAddr - prev->mAddr;
284 if (prev->mLen == 0)
285 nZeroLen++;
288 if (mRuleSets[n-1].mLen == 0) {
289 nZeroLen++;
292 // At this point, the entries are in-order and non-overlapping.
293 // If none of them are zero-length, we are done.
294 if (nZeroLen == 0) {
295 break;
298 // Slide back the entries to remove the zero length ones.
299 size_t j = 0; // The write-point.
300 for (i = 0; i < n; ++i) {
301 if (mRuleSets[i].mLen == 0) {
302 continue;
304 if (j != i) mRuleSets[j] = mRuleSets[i];
305 ++j;
307 MOZ_ASSERT(i == n);
308 MOZ_ASSERT(nZeroLen <= n);
309 MOZ_ASSERT(j == n - nZeroLen);
310 while (nZeroLen > 0) {
311 mRuleSets.pop_back();
312 nZeroLen--;
315 MOZ_ASSERT(mRuleSets.size() == j);
318 size_t n = mRuleSets.size();
320 #ifdef DEBUG
321 // Do a final check on the rules: their address ranges must be
322 // ascending, non overlapping, non zero sized.
323 if (n > 0) {
324 MOZ_ASSERT(mRuleSets[0].mLen > 0);
325 for (size_t i = 1; i < n; ++i) {
326 RuleSet* prev = &mRuleSets[i-1];
327 RuleSet* here = &mRuleSets[i];
328 MOZ_ASSERT(prev->mAddr < here->mAddr);
329 MOZ_ASSERT(here->mLen > 0);
330 MOZ_ASSERT(prev->mAddr + prev->mLen <= here->mAddr);
333 #endif
335 // Set the summary min and max address values.
336 if (n == 0) {
337 // Use the values defined in comments in the class declaration.
338 mSummaryMinAddr = 1;
339 mSummaryMaxAddr = 0;
340 } else {
341 mSummaryMinAddr = mRuleSets[0].mAddr;
342 mSummaryMaxAddr = mRuleSets[n-1].mAddr + mRuleSets[n-1].mLen - 1;
344 char buf[150];
345 snprintf(buf, sizeof(buf),
346 "PrepareRuleSets: %d entries, smin/smax 0x%llx, 0x%llx\n",
347 (int)n, (unsigned long long int)mSummaryMinAddr,
348 (unsigned long long int)mSummaryMaxAddr);
349 buf[sizeof(buf)-1] = 0;
350 mLog(buf);
352 // Is now usable for binary search.
353 mUsable = true;
355 if (0) {
356 mLog("\nRulesets after preening\n");
357 for (size_t i = 0; i < mRuleSets.size(); ++i) {
358 mRuleSets[i].Print(mLog);
359 mLog("\n");
361 mLog("\n");
365 bool SecMap::IsEmpty() {
366 return mRuleSets.empty();
370 ////////////////////////////////////////////////////////////////
371 // SegArray //
372 ////////////////////////////////////////////////////////////////
374 // A SegArray holds a set of address ranges that together exactly
375 // cover an address range, with no overlaps or holes. Each range has
376 // an associated value, which in this case has been specialised to be
377 // a simple boolean. The representation is kept to minimal canonical
378 // form in which adjacent ranges with the same associated value are
379 // merged together. Each range is represented by a |struct Seg|.
381 // SegArrays are used to keep track of which parts of the address
382 // space are known to contain instructions.
383 class SegArray {
385 public:
386 void add(uintptr_t lo, uintptr_t hi, bool val) {
387 if (lo > hi) {
388 return;
390 split_at(lo);
391 if (hi < UINTPTR_MAX) {
392 split_at(hi+1);
394 std::vector<Seg>::size_type iLo, iHi, i;
395 iLo = find(lo);
396 iHi = find(hi);
397 for (i = iLo; i <= iHi; ++i) {
398 mSegs[i].val = val;
400 preen();
403 bool getBoundingCodeSegment(/*OUT*/uintptr_t* rx_min,
404 /*OUT*/uintptr_t* rx_max, uintptr_t addr) {
405 std::vector<Seg>::size_type i = find(addr);
406 if (!mSegs[i].val) {
407 return false;
409 *rx_min = mSegs[i].lo;
410 *rx_max = mSegs[i].hi;
411 return true;
414 SegArray() {
415 Seg s(0, UINTPTR_MAX, false);
416 mSegs.push_back(s);
419 private:
420 struct Seg {
421 Seg(uintptr_t lo, uintptr_t hi, bool val) : lo(lo), hi(hi), val(val) {}
422 uintptr_t lo;
423 uintptr_t hi;
424 bool val;
427 void preen() {
428 for (std::vector<Seg>::iterator iter = mSegs.begin();
429 iter < mSegs.end()-1;
430 ++iter) {
431 if (iter[0].val != iter[1].val) {
432 continue;
434 iter[0].hi = iter[1].hi;
435 mSegs.erase(iter+1);
436 // Back up one, so as not to miss an opportunity to merge
437 // with the entry after this one.
438 --iter;
442 std::vector<Seg>::size_type find(uintptr_t a) {
443 long int lo = 0;
444 long int hi = (long int)mSegs.size();
445 while (true) {
446 // The unsearched space is lo .. hi inclusive.
447 if (lo > hi) {
448 // Not found. This can't happen.
449 return (std::vector<Seg>::size_type)(-1);
451 long int mid = lo + ((hi - lo) / 2);
452 uintptr_t mid_lo = mSegs[mid].lo;
453 uintptr_t mid_hi = mSegs[mid].hi;
454 if (a < mid_lo) { hi = mid-1; continue; }
455 if (a > mid_hi) { lo = mid+1; continue; }
456 return (std::vector<Seg>::size_type)mid;
460 void split_at(uintptr_t a) {
461 std::vector<Seg>::size_type i = find(a);
462 if (mSegs[i].lo == a) {
463 return;
465 mSegs.insert( mSegs.begin()+i+1, mSegs[i] );
466 mSegs[i].hi = a-1;
467 mSegs[i+1].lo = a;
470 void show() {
471 printf("<< %d entries:\n", (int)mSegs.size());
472 for (std::vector<Seg>::iterator iter = mSegs.begin();
473 iter < mSegs.end();
474 ++iter) {
475 printf(" %016llx %016llx %s\n",
476 (unsigned long long int)(*iter).lo,
477 (unsigned long long int)(*iter).hi,
478 (*iter).val ? "true" : "false");
480 printf(">>\n");
483 std::vector<Seg> mSegs;
487 ////////////////////////////////////////////////////////////////
488 // PriMap //
489 ////////////////////////////////////////////////////////////////
491 class PriMap {
492 public:
493 PriMap(void (*aLog)(const char*))
494 : mLog(aLog)
497 ~PriMap() {
498 for (std::vector<SecMap*>::iterator iter = mSecMaps.begin();
499 iter != mSecMaps.end();
500 ++iter) {
501 delete *iter;
503 mSecMaps.clear();
506 // This can happen with the global lock held for reading.
507 RuleSet* Lookup(uintptr_t ia) {
508 SecMap* sm = FindSecMap(ia);
509 return sm ? sm->FindRuleSet(ia) : nullptr;
512 // Add a secondary map. No overlaps allowed w.r.t. existing
513 // secondary maps. Global lock must be held for writing.
514 void AddSecMap(SecMap* aSecMap) {
515 // We can't add an empty SecMap to the PriMap. But that's OK
516 // since we'd never be able to find anything in it anyway.
517 if (aSecMap->IsEmpty()) {
518 return;
521 // Iterate through the SecMaps and find the right place for this
522 // one. At the same time, ensure that the in-order
523 // non-overlapping invariant is preserved (and, generally, holds).
524 // FIXME: this gives a cost that is O(N^2) in the total number of
525 // shared objects in the system. ToDo: better.
526 MOZ_ASSERT(aSecMap->mSummaryMinAddr <= aSecMap->mSummaryMaxAddr);
528 size_t num_secMaps = mSecMaps.size();
529 uintptr_t i;
530 for (i = 0; i < num_secMaps; ++i) {
531 SecMap* sm_i = mSecMaps[i];
532 MOZ_ASSERT(sm_i->mSummaryMinAddr <= sm_i->mSummaryMaxAddr);
533 if (aSecMap->mSummaryMinAddr < sm_i->mSummaryMaxAddr) {
534 // |aSecMap| needs to be inserted immediately before mSecMaps[i].
535 break;
538 MOZ_ASSERT(i <= num_secMaps);
539 if (i == num_secMaps) {
540 // It goes at the end.
541 mSecMaps.push_back(aSecMap);
542 } else {
543 std::vector<SecMap*>::iterator iter = mSecMaps.begin() + i;
544 mSecMaps.insert(iter, aSecMap);
546 char buf[100];
547 snprintf(buf, sizeof(buf), "AddSecMap: now have %d SecMaps\n",
548 (int)mSecMaps.size());
549 buf[sizeof(buf)-1] = 0;
550 mLog(buf);
553 // Remove and delete any SecMaps in the mapping, that intersect
554 // with the specified address range.
555 void RemoveSecMapsInRange(uintptr_t avma_min, uintptr_t avma_max) {
556 MOZ_ASSERT(avma_min <= avma_max);
557 size_t num_secMaps = mSecMaps.size();
558 if (num_secMaps > 0) {
559 intptr_t i;
560 // Iterate from end to start over the vector, so as to ensure
561 // that the special case where |avma_min| and |avma_max| denote
562 // the entire address space, can be completed in time proportional
563 // to the number of elements in the map.
564 for (i = (intptr_t)num_secMaps-1; i >= 0; i--) {
565 SecMap* sm_i = mSecMaps[i];
566 if (sm_i->mSummaryMaxAddr < avma_min ||
567 avma_max < sm_i->mSummaryMinAddr) {
568 // There's no overlap. Move on.
569 continue;
571 // We need to remove mSecMaps[i] and slide all those above it
572 // downwards to cover the hole.
573 mSecMaps.erase(mSecMaps.begin() + i);
574 delete sm_i;
579 // Return the number of currently contained SecMaps.
580 size_t CountSecMaps() {
581 return mSecMaps.size();
584 // Assess heuristically whether the given address is an instruction
585 // immediately following a call instruction. The caller is required
586 // to hold the global lock for reading.
587 bool MaybeIsReturnPoint(TaggedUWord aInstrAddr, SegArray* aSegArray) {
588 if (!aInstrAddr.Valid()) {
589 return false;
592 uintptr_t ia = aInstrAddr.Value();
594 // Assume that nobody would be crazy enough to put code in the
595 // first or last page.
596 if (ia < 4096 || ((uintptr_t)(-ia)) < 4096) {
597 return false;
600 // See if it falls inside a known r-x mapped area. Poking around
601 // outside such places risks segfaulting.
602 uintptr_t insns_min, insns_max;
603 bool b = aSegArray->getBoundingCodeSegment(&insns_min, &insns_max, ia);
604 if (!b) {
605 // no code (that we know about) at this address
606 return false;
609 // |ia| falls within an r-x range. So we can
610 // safely poke around in [insns_min, insns_max].
612 #if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
613 // Is the previous instruction recognisably a CALL? This is
614 // common for the 32- and 64-bit versions, except for the
615 // simm32(%rip) case, which is 64-bit only.
617 // For all other cases, the 64 bit versions are either identical
618 // to the 32 bit versions, or have an optional extra leading REX.W
619 // byte (0x41). Since the extra 0x41 is optional we have to
620 // ignore it, with the convenient result that the same matching
621 // logic works for both 32- and 64-bit cases.
623 uint8_t* p = (uint8_t*)ia;
624 # if defined(LUL_ARCH_x64)
625 // CALL simm32(%rip) == FF15 simm32
626 if (ia - 6 >= insns_min && p[-6] == 0xFF && p[-5] == 0x15) {
627 return true;
629 # endif
630 // CALL rel32 == E8 rel32 (both 32- and 64-bit)
631 if (ia - 5 >= insns_min && p[-5] == 0xE8) {
632 return true;
634 // CALL *%eax .. CALL *%edi == FFD0 .. FFD7 (32-bit)
635 // CALL *%rax .. CALL *%rdi == FFD0 .. FFD7 (64-bit)
636 // CALL *%r8 .. CALL *%r15 == 41FFD0 .. 41FFD7 (64-bit)
637 if (ia - 2 >= insns_min &&
638 p[-2] == 0xFF && p[-1] >= 0xD0 && p[-1] <= 0xD7) {
639 return true;
641 // Almost all of the remaining cases that occur in practice are
642 // of the form CALL *simm8(reg) or CALL *simm32(reg).
644 // 64 bit cases:
646 // call *simm8(%rax) FF50 simm8
647 // call *simm8(%rcx) FF51 simm8
648 // call *simm8(%rdx) FF52 simm8
649 // call *simm8(%rbx) FF53 simm8
650 // call *simm8(%rsp) FF5424 simm8
651 // call *simm8(%rbp) FF55 simm8
652 // call *simm8(%rsi) FF56 simm8
653 // call *simm8(%rdi) FF57 simm8
655 // call *simm8(%r8) 41FF50 simm8
656 // call *simm8(%r9) 41FF51 simm8
657 // call *simm8(%r10) 41FF52 simm8
658 // call *simm8(%r11) 41FF53 simm8
659 // call *simm8(%r12) 41FF5424 simm8
660 // call *simm8(%r13) 41FF55 simm8
661 // call *simm8(%r14) 41FF56 simm8
662 // call *simm8(%r15) 41FF57 simm8
664 // call *simm32(%rax) FF90 simm32
665 // call *simm32(%rcx) FF91 simm32
666 // call *simm32(%rdx) FF92 simm32
667 // call *simm32(%rbx) FF93 simm32
668 // call *simm32(%rsp) FF9424 simm32
669 // call *simm32(%rbp) FF95 simm32
670 // call *simm32(%rsi) FF96 simm32
671 // call *simm32(%rdi) FF97 simm32
673 // call *simm32(%r8) 41FF90 simm32
674 // call *simm32(%r9) 41FF91 simm32
675 // call *simm32(%r10) 41FF92 simm32
676 // call *simm32(%r11) 41FF93 simm32
677 // call *simm32(%r12) 41FF9424 simm32
678 // call *simm32(%r13) 41FF95 simm32
679 // call *simm32(%r14) 41FF96 simm32
680 // call *simm32(%r15) 41FF97 simm32
682 // 32 bit cases:
684 // call *simm8(%eax) FF50 simm8
685 // call *simm8(%ecx) FF51 simm8
686 // call *simm8(%edx) FF52 simm8
687 // call *simm8(%ebx) FF53 simm8
688 // call *simm8(%esp) FF5424 simm8
689 // call *simm8(%ebp) FF55 simm8
690 // call *simm8(%esi) FF56 simm8
691 // call *simm8(%edi) FF57 simm8
693 // call *simm32(%eax) FF90 simm32
694 // call *simm32(%ecx) FF91 simm32
695 // call *simm32(%edx) FF92 simm32
696 // call *simm32(%ebx) FF93 simm32
697 // call *simm32(%esp) FF9424 simm32
698 // call *simm32(%ebp) FF95 simm32
699 // call *simm32(%esi) FF96 simm32
700 // call *simm32(%edi) FF97 simm32
701 if (ia - 3 >= insns_min &&
702 p[-3] == 0xFF &&
703 (p[-2] >= 0x50 && p[-2] <= 0x57 && p[-2] != 0x54)) {
704 // imm8 case, not including %esp/%rsp
705 return true;
707 if (ia - 4 >= insns_min &&
708 p[-4] == 0xFF && p[-3] == 0x54 && p[-2] == 0x24) {
709 // imm8 case for %esp/%rsp
710 return true;
712 if (ia - 6 >= insns_min &&
713 p[-6] == 0xFF &&
714 (p[-5] >= 0x90 && p[-5] <= 0x97 && p[-5] != 0x94)) {
715 // imm32 case, not including %esp/%rsp
716 return true;
718 if (ia - 7 >= insns_min &&
719 p[-7] == 0xFF && p[-6] == 0x94 && p[-5] == 0x24) {
720 // imm32 case for %esp/%rsp
721 return true;
724 #elif defined(LUL_ARCH_arm)
725 if (ia & 1) {
726 uint16_t w0 = 0, w1 = 0;
727 // The return address has its lowest bit set, indicating a return
728 // to Thumb code.
729 ia &= ~(uintptr_t)1;
730 if (ia - 2 >= insns_min && ia - 1 <= insns_max) {
731 w1 = *(uint16_t*)(ia - 2);
733 if (ia - 4 >= insns_min && ia - 1 <= insns_max) {
734 w0 = *(uint16_t*)(ia - 4);
736 // Is it a 32-bit Thumb call insn?
737 // BL simm26 (Encoding T1)
738 if ((w0 & 0xF800) == 0xF000 && (w1 & 0xC000) == 0xC000) {
739 return true;
741 // BLX simm26 (Encoding T2)
742 if ((w0 & 0xF800) == 0xF000 && (w1 & 0xC000) == 0xC000) {
743 return true;
745 // Other possible cases:
746 // (BLX Rm, Encoding T1).
747 // BLX Rm (encoding T1, 16 bit, inspect w1 and ignore w0.)
748 // 0100 0111 1 Rm 000
749 } else {
750 // Returning to ARM code.
751 uint32_t a0 = 0;
752 if ((ia & 3) == 0 && ia - 4 >= insns_min && ia - 1 <= insns_max) {
753 a0 = *(uint32_t*)(ia - 4);
755 // Leading E forces unconditional only -- fix. It could be
756 // anything except F, which is the deprecated NV code.
757 // BL simm26 (Encoding A1)
758 if ((a0 & 0xFF000000) == 0xEB000000) {
759 return true;
761 // Other possible cases:
762 // BLX simm26 (Encoding A2)
763 //if ((a0 & 0xFE000000) == 0xFA000000)
764 // return true;
765 // BLX (register) (A1): BLX <c> <Rm>
766 // cond 0001 0010 1111 1111 1111 0011 Rm
767 // again, cond can be anything except NV (0xF)
770 #else
771 # error "Unsupported arch"
772 #endif
774 // Not an insn we recognise.
775 return false;
778 private:
779 // FindSecMap's caller must hold the global lock for reading or writing.
780 SecMap* FindSecMap(uintptr_t ia) {
781 // Binary search mSecMaps to find one that brackets |ia|.
782 // lo and hi need to be signed, else the loop termination tests
783 // don't work properly.
784 long int lo = 0;
785 long int hi = (long int)mSecMaps.size() - 1;
786 while (true) {
787 // current unsearched space is from lo to hi, inclusive.
788 if (lo > hi) {
789 // not found
790 return nullptr;
792 long int mid = lo + ((hi - lo) / 2);
793 SecMap* mid_secMap = mSecMaps[mid];
794 uintptr_t mid_minAddr = mid_secMap->mSummaryMinAddr;
795 uintptr_t mid_maxAddr = mid_secMap->mSummaryMaxAddr;
796 if (ia < mid_minAddr) { hi = mid-1; continue; }
797 if (ia > mid_maxAddr) { lo = mid+1; continue; }
798 MOZ_ASSERT(mid_minAddr <= ia && ia <= mid_maxAddr);
799 return mid_secMap;
801 // NOTREACHED
804 private:
805 // sorted array of per-object ranges, non overlapping, non empty
806 std::vector<SecMap*> mSecMaps;
808 // a logging sink, for debugging.
809 void (*mLog)(const char*);
813 ////////////////////////////////////////////////////////////////
814 // CFICache //
815 ////////////////////////////////////////////////////////////////
817 // This is the thread-local cache. It maps individual insn AVMAs to
818 // the associated CFI record, which live in LUL::mPriMap.
820 // The cache is a direct map hash table indexed by address.
821 // It has to distinguish 3 cases:
823 // (1) .mRSet == (RuleSet*)0 ==> cache slot not in use
824 // (2) .mRSet == (RuleSet*)1 ==> slot in use, no RuleSet avail
825 // (3) .mRSet > (RuleSet*)1 ==> slot in use, RuleSet* available
827 // Distinguishing between (2) and (3) is important, because if we look
828 // up an address in LUL::mPriMap and find there is no RuleSet, then
829 // that fact needs to cached here, so as to avoid potentially
830 // expensive repeat lookups.
832 // A CFICacheEntry::mRSet value of zero indicates that the slot is not
833 // in use, and a value of one indicates that the slot is in use but
834 // there is no RuleSet available.
835 #define ENTRY_NOT_IN_USE ((RuleSet*)0)
836 #define NO_RULESET_AVAILABLE ((RuleSet*)1)
838 class CFICache {
839 public:
841 CFICache(PriMap* aPriMap) {
842 Invalidate();
843 mPriMap = aPriMap;
846 void Invalidate() {
847 for (int i = 0; i < N_ENTRIES; ++i) {
848 mCache[i].mAVMA = 0;
849 mCache[i].mRSet = ENTRY_NOT_IN_USE;
853 RuleSet* Lookup(uintptr_t ia) {
854 uintptr_t hash = ia % (uintptr_t)N_ENTRIES;
855 CFICacheEntry* ce = &mCache[hash];
856 if (ce->mAVMA == ia) {
857 // The cache has an entry for |ia|. Interpret it.
858 if (ce->mRSet > NO_RULESET_AVAILABLE) {
859 // There's a RuleSet. So return it.
860 return ce->mRSet;
862 if (ce->mRSet == NO_RULESET_AVAILABLE) {
863 // There's no RuleSet for this address. Don't update
864 // the cache, since we might get queried again.
865 return nullptr;
867 // Otherwise, the slot is not in use. Fall through to
868 // the 'miss' case.
871 // The cache entry is for some other address, or is not in use.
872 // Update it. If it can be found in the priMap then install it
873 // as-is. Else put NO_RULESET_AVAILABLE in, so as to indicate
874 // there's no info for this address.
875 RuleSet* fallback = mPriMap->Lookup(ia);
876 mCache[hash].mAVMA = ia;
877 mCache[hash].mRSet = fallback ? fallback : NO_RULESET_AVAILABLE;
878 return fallback;
881 private:
882 // This should be a prime number.
883 static const int N_ENTRIES = 509;
885 // See comment above for the meaning of these entries.
886 struct CFICacheEntry {
887 uintptr_t mAVMA; // AVMA of the associated instruction
888 RuleSet* mRSet; // RuleSet* for the instruction
890 CFICacheEntry mCache[N_ENTRIES];
892 // Need to have a pointer to the PriMap, so as to be able
893 // to service misses.
894 PriMap* mPriMap;
897 #undef ENTRY_NOT_IN_USE
898 #undef NO_RULESET_AVAILABLE
901 ////////////////////////////////////////////////////////////////
902 // LUL //
903 ////////////////////////////////////////////////////////////////
905 LUL::LUL(void (*aLog)(const char*))
907 mRWlock = new LulRWLock();
908 AutoLulRWLocker lock(mRWlock, AutoLulRWLocker::FOR_WRITING);
909 mLog = aLog;
910 mPriMap = new PriMap(aLog);
911 mSegArray = new SegArray();
915 LUL::~LUL()
917 // The auto-locked section must have its own scope, so that the
918 // unlock is performed before the mRWLock is deleted.
920 AutoLulRWLocker lock(mRWlock, AutoLulRWLocker::FOR_WRITING);
921 for (std::map<pthread_t,CFICache*>::iterator iter = mCaches.begin();
922 iter != mCaches.end();
923 ++iter) {
924 delete iter->second;
926 delete mPriMap;
927 delete mSegArray;
928 mLog = nullptr;
930 // Now we don't hold the lock. Hence it is safe to delete it.
931 delete mRWlock;
935 void
936 LUL::RegisterUnwinderThread()
938 AutoLulRWLocker lock(mRWlock, AutoLulRWLocker::FOR_WRITING);
940 pthread_t me = pthread_self();
941 CFICache* cache = new CFICache(mPriMap);
943 std::pair<std::map<pthread_t,CFICache*>::iterator, bool> res
944 = mCaches.insert(std::pair<pthread_t,CFICache*>(me, cache));
945 // "this thread is not already registered"
946 MOZ_ASSERT(res.second); // "new element was inserted"
947 // Using mozilla::DebugOnly to declare |res| leads to compilation error
948 (void)res.second;
951 void
952 LUL::NotifyAfterMap(uintptr_t aRXavma, size_t aSize,
953 const char* aFileName, const void* aMappedImage)
955 AutoLulRWLocker lock(mRWlock, AutoLulRWLocker::FOR_WRITING);
957 mLog(":\n");
958 char buf[200];
959 snprintf(buf, sizeof(buf), "NotifyMap %llx %llu %s\n",
960 (unsigned long long int)aRXavma, (unsigned long long int)aSize,
961 aFileName);
962 buf[sizeof(buf)-1] = 0;
963 mLog(buf);
965 InvalidateCFICaches();
967 // Ignore obviously-stupid notifications.
968 if (aSize > 0) {
970 // Here's a new mapping, for this object.
971 SecMap* smap = new SecMap(mLog);
973 // Read CFI or EXIDX unwind data into |smap|.
974 if (!aMappedImage) {
975 (void)lul::ReadSymbolData(
976 string(aFileName), std::vector<string>(), smap,
977 (void*)aRXavma, mLog);
978 } else {
979 (void)lul::ReadSymbolDataInternal(
980 (const uint8_t*)aMappedImage,
981 string(aFileName), std::vector<string>(), smap,
982 (void*)aRXavma, mLog);
985 mLog("NotifyMap .. preparing entries\n");
987 smap->PrepareRuleSets(aRXavma, aSize);
989 snprintf(buf, sizeof(buf),
990 "NotifyMap got %lld entries\n", (long long int)smap->Size());
991 buf[sizeof(buf)-1] = 0;
992 mLog(buf);
994 // Add it to the primary map (the top level set of mapped objects).
995 mPriMap->AddSecMap(smap);
997 // Tell the segment array about the mapping, so that the stack
998 // scan and __kernel_syscall mechanisms know where valid code is.
999 mSegArray->add(aRXavma, aRXavma + aSize - 1, true);
1004 void
1005 LUL::NotifyExecutableArea(uintptr_t aRXavma, size_t aSize)
1007 AutoLulRWLocker lock(mRWlock, AutoLulRWLocker::FOR_WRITING);
1009 mLog(":\n");
1010 char buf[200];
1011 snprintf(buf, sizeof(buf), "NotifyExecutableArea %llx %llu\n",
1012 (unsigned long long int)aRXavma, (unsigned long long int)aSize);
1013 buf[sizeof(buf)-1] = 0;
1014 mLog(buf);
1016 InvalidateCFICaches();
1018 // Ignore obviously-stupid notifications.
1019 if (aSize > 0) {
1020 // Tell the segment array about the mapping, so that the stack
1021 // scan and __kernel_syscall mechanisms know where valid code is.
1022 mSegArray->add(aRXavma, aRXavma + aSize - 1, true);
1027 void
1028 LUL::NotifyBeforeUnmap(uintptr_t aRXavmaMin, uintptr_t aRXavmaMax)
1030 AutoLulRWLocker lock(mRWlock, AutoLulRWLocker::FOR_WRITING);
1032 mLog(":\n");
1033 char buf[100];
1034 snprintf(buf, sizeof(buf), "NotifyUnmap %016llx-%016llx\n",
1035 (unsigned long long int)aRXavmaMin,
1036 (unsigned long long int)aRXavmaMax);
1037 buf[sizeof(buf)-1] = 0;
1038 mLog(buf);
1040 MOZ_ASSERT(aRXavmaMin <= aRXavmaMax);
1042 InvalidateCFICaches();
1044 // Remove from the primary map, any secondary maps that intersect
1045 // with the address range. Also delete the secondary maps.
1046 mPriMap->RemoveSecMapsInRange(aRXavmaMin, aRXavmaMax);
1048 // Tell the segment array that the address range no longer
1049 // contains valid code.
1050 mSegArray->add(aRXavmaMin, aRXavmaMax, false);
1052 snprintf(buf, sizeof(buf), "NotifyUnmap: now have %d SecMaps\n",
1053 (int)mPriMap->CountSecMaps());
1054 buf[sizeof(buf)-1] = 0;
1055 mLog(buf);
1059 size_t
1060 LUL::CountMappings()
1062 AutoLulRWLocker lock(mRWlock, AutoLulRWLocker::FOR_WRITING);
1063 return mPriMap->CountSecMaps();
1067 static
1068 TaggedUWord DerefTUW(TaggedUWord aAddr, StackImage* aStackImg)
1070 if (!aAddr.Valid()) {
1071 return TaggedUWord();
1073 if (aAddr.Value() < aStackImg->mStartAvma) {
1074 return TaggedUWord();
1076 if (aAddr.Value() + sizeof(uintptr_t) > aStackImg->mStartAvma
1077 + aStackImg->mLen) {
1078 return TaggedUWord();
1080 return TaggedUWord(*(uintptr_t*)(aStackImg->mContents + aAddr.Value()
1081 - aStackImg->mStartAvma));
1084 static
1085 TaggedUWord EvaluateReg(int16_t aReg, UnwindRegs* aOldRegs, TaggedUWord aCFA)
1087 switch (aReg) {
1088 case DW_REG_CFA: return aCFA;
1089 #if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
1090 case DW_REG_INTEL_XBP: return aOldRegs->xbp;
1091 case DW_REG_INTEL_XSP: return aOldRegs->xsp;
1092 case DW_REG_INTEL_XIP: return aOldRegs->xip;
1093 #elif defined(LUL_ARCH_arm)
1094 case DW_REG_ARM_R7: return aOldRegs->r7;
1095 case DW_REG_ARM_R11: return aOldRegs->r11;
1096 case DW_REG_ARM_R12: return aOldRegs->r12;
1097 case DW_REG_ARM_R13: return aOldRegs->r13;
1098 case DW_REG_ARM_R14: return aOldRegs->r14;
1099 case DW_REG_ARM_R15: return aOldRegs->r15;
1100 #else
1101 # error "Unsupported arch"
1102 #endif
1103 default: MOZ_ASSERT(0); return TaggedUWord();
1107 static
1108 TaggedUWord EvaluateExpr(LExpr aExpr, UnwindRegs* aOldRegs,
1109 TaggedUWord aCFA, StackImage* aStackImg)
1111 switch (aExpr.mHow) {
1112 case LExpr::UNKNOWN:
1113 return TaggedUWord();
1114 case LExpr::NODEREF: {
1115 TaggedUWord tuw = EvaluateReg(aExpr.mReg, aOldRegs, aCFA);
1116 tuw.Add(TaggedUWord((intptr_t)aExpr.mOffset));
1117 return tuw;
1119 case LExpr::DEREF: {
1120 TaggedUWord tuw = EvaluateReg(aExpr.mReg, aOldRegs, aCFA);
1121 tuw.Add(TaggedUWord((intptr_t)aExpr.mOffset));
1122 return DerefTUW(tuw, aStackImg);
1124 default:
1125 MOZ_ASSERT(0);
1126 return TaggedUWord();
1130 static
1131 void UseRuleSet(/*MOD*/UnwindRegs* aRegs,
1132 StackImage* aStackImg, RuleSet* aRS)
1134 // Take a copy of regs, since we'll need to refer to the old values
1135 // whilst computing the new ones.
1136 UnwindRegs old_regs = *aRegs;
1138 // Mark all the current register values as invalid, so that the
1139 // caller can see, on our return, which ones have been computed
1140 // anew. If we don't even manage to compute a new PC value, then
1141 // the caller will have to abandon the unwind.
1142 // FIXME: Create and use instead: aRegs->SetAllInvalid();
1143 #if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
1144 aRegs->xbp = TaggedUWord();
1145 aRegs->xsp = TaggedUWord();
1146 aRegs->xip = TaggedUWord();
1147 #elif defined(LUL_ARCH_arm)
1148 aRegs->r7 = TaggedUWord();
1149 aRegs->r11 = TaggedUWord();
1150 aRegs->r12 = TaggedUWord();
1151 aRegs->r13 = TaggedUWord();
1152 aRegs->r14 = TaggedUWord();
1153 aRegs->r15 = TaggedUWord();
1154 #else
1155 # error "Unsupported arch"
1156 #endif
1158 // This is generally useful.
1159 const TaggedUWord inval = TaggedUWord();
1161 // First, compute the CFA.
1162 TaggedUWord cfa = EvaluateExpr(aRS->mCfaExpr, &old_regs,
1163 inval/*old cfa*/, aStackImg);
1165 // If we didn't manage to compute the CFA, well .. that's ungood,
1166 // but keep going anyway. It'll be OK provided none of the register
1167 // value rules mention the CFA. In any case, compute the new values
1168 // for each register that we're tracking.
1170 #if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
1171 aRegs->xbp = EvaluateExpr(aRS->mXbpExpr, &old_regs, cfa, aStackImg);
1172 aRegs->xsp = EvaluateExpr(aRS->mXspExpr, &old_regs, cfa, aStackImg);
1173 aRegs->xip = EvaluateExpr(aRS->mXipExpr, &old_regs, cfa, aStackImg);
1174 #elif defined(LUL_ARCH_arm)
1175 aRegs->r7 = EvaluateExpr(aRS->mR7expr, &old_regs, cfa, aStackImg);
1176 aRegs->r11 = EvaluateExpr(aRS->mR11expr, &old_regs, cfa, aStackImg);
1177 aRegs->r12 = EvaluateExpr(aRS->mR12expr, &old_regs, cfa, aStackImg);
1178 aRegs->r13 = EvaluateExpr(aRS->mR13expr, &old_regs, cfa, aStackImg);
1179 aRegs->r14 = EvaluateExpr(aRS->mR14expr, &old_regs, cfa, aStackImg);
1180 aRegs->r15 = EvaluateExpr(aRS->mR15expr, &old_regs, cfa, aStackImg);
1181 #else
1182 # error "Unsupported arch"
1183 #endif
1185 // We're done. Any regs for which we didn't manage to compute a
1186 // new value will now be marked as invalid.
1189 void
1190 LUL::Unwind(/*OUT*/uintptr_t* aFramePCs,
1191 /*OUT*/uintptr_t* aFrameSPs,
1192 /*OUT*/size_t* aFramesUsed,
1193 /*OUT*/size_t* aScannedFramesAcquired,
1194 size_t aFramesAvail,
1195 size_t aScannedFramesAllowed,
1196 UnwindRegs* aStartRegs, StackImage* aStackImg)
1198 AutoLulRWLocker lock(mRWlock, AutoLulRWLocker::FOR_READING);
1200 pthread_t me = pthread_self();
1201 std::map<pthread_t, CFICache*>::iterator iter = mCaches.find(me);
1203 if (iter == mCaches.end()) {
1204 // The calling thread is not registered for unwinding.
1205 MOZ_CRASH();
1206 return;
1209 CFICache* cache = iter->second;
1210 MOZ_ASSERT(cache);
1212 // Do unwindery, possibly modifying |cache|.
1214 /////////////////////////////////////////////////////////
1215 // BEGIN UNWIND
1217 *aFramesUsed = 0;
1219 UnwindRegs regs = *aStartRegs;
1220 TaggedUWord last_valid_sp = TaggedUWord();
1222 // Stack-scan control
1223 unsigned int n_scanned_frames = 0; // # s-s frames recovered so far
1224 static const int NUM_SCANNED_WORDS = 50; // max allowed scan length
1226 while (true) {
1228 if (DEBUG_MAIN) {
1229 char buf[300];
1230 mLog("\n");
1231 #if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
1232 snprintf(buf, sizeof(buf),
1233 "LoopTop: rip %d/%llx rsp %d/%llx rbp %d/%llx\n",
1234 (int)regs.xip.Valid(), (unsigned long long int)regs.xip.Value(),
1235 (int)regs.xsp.Valid(), (unsigned long long int)regs.xsp.Value(),
1236 (int)regs.xbp.Valid(), (unsigned long long int)regs.xbp.Value());
1237 buf[sizeof(buf)-1] = 0;
1238 mLog(buf);
1239 #elif defined(LUL_ARCH_arm)
1240 snprintf(buf, sizeof(buf),
1241 "LoopTop: r15 %d/%llx r7 %d/%llx r11 %d/%llx"
1242 " r12 %d/%llx r13 %d/%llx r14 %d/%llx\n",
1243 (int)regs.r15.Valid(), (unsigned long long int)regs.r15.Value(),
1244 (int)regs.r7.Valid(), (unsigned long long int)regs.r7.Value(),
1245 (int)regs.r11.Valid(), (unsigned long long int)regs.r11.Value(),
1246 (int)regs.r12.Valid(), (unsigned long long int)regs.r12.Value(),
1247 (int)regs.r13.Valid(), (unsigned long long int)regs.r13.Value(),
1248 (int)regs.r14.Valid(), (unsigned long long int)regs.r14.Value());
1249 buf[sizeof(buf)-1] = 0;
1250 mLog(buf);
1251 #else
1252 # error "Unsupported arch"
1253 #endif
1256 #if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
1257 TaggedUWord ia = regs.xip;
1258 TaggedUWord sp = regs.xsp;
1259 #elif defined(LUL_ARCH_arm)
1260 TaggedUWord ia = (*aFramesUsed == 0 ? regs.r15 : regs.r14);
1261 TaggedUWord sp = regs.r13;
1262 #else
1263 # error "Unsupported arch"
1264 #endif
1266 if (*aFramesUsed >= aFramesAvail) {
1267 break;
1270 // If we don't have a valid value for the PC, give up.
1271 if (!ia.Valid()) {
1272 break;
1275 // If this is the innermost frame, record the SP value, which
1276 // presumably is valid. If this isn't the innermost frame, and we
1277 // have a valid SP value, check that its SP value isn't less that
1278 // the one we've seen so far, so as to catch potential SP value
1279 // cycles.
1280 if (*aFramesUsed == 0) {
1281 last_valid_sp = sp;
1282 } else {
1283 MOZ_ASSERT(last_valid_sp.Valid());
1284 if (sp.Valid()) {
1285 if (sp.Value() < last_valid_sp.Value()) {
1286 // Hmm, SP going in the wrong direction. Let's stop.
1287 break;
1289 // Remember where we got to.
1290 last_valid_sp = sp;
1294 // For the innermost frame, the IA value is what we need. For all
1295 // other frames, it's actually the return address, so back up one
1296 // byte so as to get it into the calling instruction.
1297 aFramePCs[*aFramesUsed] = ia.Value() - (*aFramesUsed == 0 ? 0 : 1);
1298 aFrameSPs[*aFramesUsed] = sp.Valid() ? sp.Value() : 0;
1299 (*aFramesUsed)++;
1301 // Find the RuleSet for the current IA, if any. This will also
1302 // query the backing (secondary) maps if it isn't found in the
1303 // thread-local cache.
1305 // If this isn't the innermost frame, back up into the calling insn.
1306 if (*aFramesUsed > 1) {
1307 ia.Add(TaggedUWord((uintptr_t)(-1)));
1310 RuleSet* ruleset = cache->Lookup(ia.Value());
1311 if (DEBUG_MAIN) {
1312 char buf[100];
1313 snprintf(buf, sizeof(buf), "ruleset for 0x%llx = %p\n",
1314 (unsigned long long int)ia.Value(), ruleset);
1315 buf[sizeof(buf)-1] = 0;
1316 mLog(buf);
1319 /////////////////////////////////////////////
1320 ////
1321 // On 32 bit x86-linux, syscalls are often done via the VDSO
1322 // function __kernel_vsyscall, which doesn't have a corresponding
1323 // object that we can read debuginfo from. That effectively kills
1324 // off all stack traces for threads blocked in syscalls. Hence
1325 // special-case by looking at the code surrounding the program
1326 // counter.
1328 // 0xf7757420 <__kernel_vsyscall+0>: push %ecx
1329 // 0xf7757421 <__kernel_vsyscall+1>: push %edx
1330 // 0xf7757422 <__kernel_vsyscall+2>: push %ebp
1331 // 0xf7757423 <__kernel_vsyscall+3>: mov %esp,%ebp
1332 // 0xf7757425 <__kernel_vsyscall+5>: sysenter
1333 // 0xf7757427 <__kernel_vsyscall+7>: nop
1334 // 0xf7757428 <__kernel_vsyscall+8>: nop
1335 // 0xf7757429 <__kernel_vsyscall+9>: nop
1336 // 0xf775742a <__kernel_vsyscall+10>: nop
1337 // 0xf775742b <__kernel_vsyscall+11>: nop
1338 // 0xf775742c <__kernel_vsyscall+12>: nop
1339 // 0xf775742d <__kernel_vsyscall+13>: nop
1340 // 0xf775742e <__kernel_vsyscall+14>: int $0x80
1341 // 0xf7757430 <__kernel_vsyscall+16>: pop %ebp
1342 // 0xf7757431 <__kernel_vsyscall+17>: pop %edx
1343 // 0xf7757432 <__kernel_vsyscall+18>: pop %ecx
1344 // 0xf7757433 <__kernel_vsyscall+19>: ret
1346 // In cases where the sampled thread is blocked in a syscall, its
1347 // program counter will point at "pop %ebp". Hence we look for
1348 // the sequence "int $0x80; pop %ebp; pop %edx; pop %ecx; ret", and
1349 // the corresponding register-recovery actions are:
1350 // new_ebp = *(old_esp + 0)
1351 // new eip = *(old_esp + 12)
1352 // new_esp = old_esp + 16
1354 // It may also be the case that the program counter points two
1355 // nops before the "int $0x80", viz, is __kernel_vsyscall+12, in
1356 // the case where the syscall has been restarted but the thread
1357 // hasn't been rescheduled. The code below doesn't handle that;
1358 // it could easily be made to.
1360 #if defined(LUL_PLAT_x86_android) || defined(LUL_PLAT_x86_linux)
1361 if (!ruleset && *aFramesUsed == 1 && ia.Valid() && sp.Valid()) {
1362 uintptr_t insns_min, insns_max;
1363 uintptr_t eip = ia.Value();
1364 bool b = mSegArray->getBoundingCodeSegment(&insns_min, &insns_max, eip);
1365 if (b && eip - 2 >= insns_min && eip + 3 <= insns_max) {
1366 uint8_t* eipC = (uint8_t*)eip;
1367 if (eipC[-2] == 0xCD && eipC[-1] == 0x80 && eipC[0] == 0x5D &&
1368 eipC[1] == 0x5A && eipC[2] == 0x59 && eipC[3] == 0xC3) {
1369 TaggedUWord sp_plus_0 = sp;
1370 TaggedUWord sp_plus_12 = sp;
1371 TaggedUWord sp_plus_16 = sp;
1372 sp_plus_12.Add(TaggedUWord(12));
1373 sp_plus_16.Add(TaggedUWord(16));
1374 TaggedUWord new_ebp = DerefTUW(sp_plus_0, aStackImg);
1375 TaggedUWord new_eip = DerefTUW(sp_plus_12, aStackImg);
1376 TaggedUWord new_esp = sp_plus_16;
1377 if (new_ebp.Valid() && new_eip.Valid() && new_esp.Valid()) {
1378 regs.xbp = new_ebp;
1379 regs.xip = new_eip;
1380 regs.xsp = new_esp;
1381 continue;
1386 #endif
1387 ////
1388 /////////////////////////////////////////////
1390 // So, do we have a ruleset for this address? If so, use it now.
1391 if (ruleset) {
1393 if (DEBUG_MAIN) {
1394 ruleset->Print(mLog); mLog("\n");
1396 // Use the RuleSet to compute the registers for the previous
1397 // frame. |regs| is modified in-place.
1398 UseRuleSet(&regs, aStackImg, ruleset);
1400 } else {
1402 // There's no RuleSet for the specified address, so see if
1403 // it's possible to get anywhere by stack-scanning.
1405 // Use stack scanning frugally.
1406 if (n_scanned_frames++ >= aScannedFramesAllowed) {
1407 break;
1410 // We can't scan the stack without a valid, aligned stack pointer.
1411 if (!sp.IsAligned()) {
1412 break;
1415 bool scan_succeeded = false;
1416 for (int i = 0; i < NUM_SCANNED_WORDS; ++i) {
1417 TaggedUWord aWord = DerefTUW(sp, aStackImg);
1418 // aWord is something we fished off the stack. It should be
1419 // valid, unless we overran the stack bounds.
1420 if (!aWord.Valid()) {
1421 break;
1424 // Now, does aWord point inside a text section and immediately
1425 // after something that looks like a call instruction?
1426 if (mPriMap->MaybeIsReturnPoint(aWord, mSegArray)) {
1427 // Yes it does. Update the unwound registers heuristically,
1428 // using the same schemes as Breakpad does.
1429 scan_succeeded = true;
1430 (*aScannedFramesAcquired)++;
1432 #if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
1433 // The same logic applies for the 32- and 64-bit cases.
1434 // Register names of the form xsp etc refer to (eg) esp in
1435 // the 32-bit case and rsp in the 64-bit case.
1436 # if defined(LUL_ARCH_x64)
1437 const int wordSize = 8;
1438 # else
1439 const int wordSize = 4;
1440 # endif
1441 // The return address -- at XSP -- will have been pushed by
1442 // the CALL instruction. So the caller's XSP value
1443 // immediately before and after that CALL instruction is the
1444 // word above XSP.
1445 regs.xsp = sp;
1446 regs.xsp.Add(TaggedUWord(wordSize));
1448 // aWord points at the return point, so back up one byte
1449 // to put it in the calling instruction.
1450 regs.xip = aWord;
1451 regs.xip.Add(TaggedUWord((uintptr_t)(-1)));
1453 // Computing a new value from the frame pointer is more tricky.
1454 if (regs.xbp.Valid() &&
1455 sp.Valid() && regs.xbp.Value() == sp.Value() - wordSize) {
1456 // One possibility is that the callee begins with the standard
1457 // preamble "push %xbp; mov %xsp, %xbp". In which case, the
1458 // (1) caller's XBP value will be at the word below XSP, and
1459 // (2) the current (callee's) XBP will point at that word:
1460 regs.xbp = DerefTUW(regs.xbp, aStackImg);
1461 } else if (regs.xbp.Valid() &&
1462 sp.Valid() && regs.xbp.Value() >= sp.Value() + wordSize) {
1463 // If that didn't work out, maybe the callee didn't change
1464 // XBP, so it still holds the caller's value. For that to
1465 // be plausible, XBP will need to have a value at least
1466 // higher than XSP since that holds the purported return
1467 // address. In which case do nothing, since XBP already
1468 // holds the "right" value.
1469 } else {
1470 // Mark XBP as invalid, so that subsequent unwind iterations
1471 // don't assume it holds valid data.
1472 regs.xbp = TaggedUWord();
1475 // Move on to the next word up the stack
1476 sp.Add(TaggedUWord(wordSize));
1478 #elif defined(LUL_ARCH_arm)
1479 // Set all registers to be undefined, except for SP(R13) and
1480 // PC(R15).
1482 // aWord points either at the return point, if returning to
1483 // ARM code, or one insn past the return point if returning
1484 // to Thumb code. In both cases, aWord-2 is guaranteed to
1485 // fall within the calling instruction.
1486 regs.r15 = aWord;
1487 regs.r15.Add(TaggedUWord((uintptr_t)(-2)));
1489 // Make SP be the word above the location where the return
1490 // address was found.
1491 regs.r13 = sp;
1492 regs.r13.Add(TaggedUWord(4));
1494 // All other regs are undefined.
1495 regs.r7 = regs.r11 = regs.r12 = regs.r14 = TaggedUWord();
1497 // Move on to the next word up the stack
1498 sp.Add(TaggedUWord(4));
1500 #else
1501 # error "Unknown plat"
1502 #endif
1504 break;
1507 } // for (int i = 0; i < NUM_SCANNED_WORDS; i++)
1509 // We tried to make progress by scanning the stack, but failed.
1510 // So give up -- fall out of the top level unwind loop.
1511 if (!scan_succeeded) {
1512 break;
1516 } // top level unwind loop
1518 // END UNWIND
1519 /////////////////////////////////////////////////////////
1523 void
1524 LUL::InvalidateCFICaches()
1526 // NB: the caller must hold m_rwl for writing.
1528 // FIXME: this could get expensive. Use a bool to remember when the
1529 // caches have been invalidated and hence avoid duplicate invalidations.
1530 for (std::map<pthread_t,CFICache*>::iterator iter = mCaches.begin();
1531 iter != mCaches.end();
1532 ++iter) {
1533 iter->second->Invalidate();
1538 ////////////////////////////////////////////////////////////////
1539 // LUL Unit Testing //
1540 ////////////////////////////////////////////////////////////////
1542 static const int LUL_UNIT_TEST_STACK_SIZE = 16384;
1544 // This function is innermost in the test call sequence. It uses LUL
1545 // to unwind, and compares the result with the sequence specified in
1546 // the director string. These need to agree in order for the test to
1547 // pass. In order not to screw up the results, this function needs
1548 // to have a not-very big stack frame, since we're only presenting
1549 // the innermost LUL_UNIT_TEST_STACK_SIZE bytes of stack to LUL, and
1550 // that chunk unavoidably includes the frame for this function.
1552 // This function must not be inlined into its callers. Doing so will
1553 // cause the expected-vs-actual backtrace consistency checking to
1554 // fail. Prints summary results to |aLUL|'s logging sink and also
1555 // returns a boolean indicating whether or not the test passed.
1556 static __attribute__((noinline))
1557 bool GetAndCheckStackTrace(LUL* aLUL, const char* dstring)
1559 // Get hold of the current unwind-start registers.
1560 UnwindRegs startRegs;
1561 memset(&startRegs, 0, sizeof(startRegs));
1562 #if defined(LUL_PLAT_x64_linux)
1563 volatile uintptr_t block[3];
1564 MOZ_ASSERT(sizeof(block) == 24);
1565 __asm__ __volatile__(
1566 "leaq 0(%%rip), %%r15" "\n\t"
1567 "movq %%r15, 0(%0)" "\n\t"
1568 "movq %%rsp, 8(%0)" "\n\t"
1569 "movq %%rbp, 16(%0)" "\n"
1570 : : "r"(&block[0]) : "memory", "r15"
1572 startRegs.xip = TaggedUWord(block[0]);
1573 startRegs.xsp = TaggedUWord(block[1]);
1574 startRegs.xbp = TaggedUWord(block[2]);
1575 const uintptr_t REDZONE_SIZE = 128;
1576 uintptr_t start = block[1] - REDZONE_SIZE;
1577 #elif defined(LUL_PLAT_x86_linux) || defined(LUL_PLAT_x86_android)
1578 volatile uintptr_t block[3];
1579 MOZ_ASSERT(sizeof(block) == 12);
1580 __asm__ __volatile__(
1581 ".byte 0xE8,0x00,0x00,0x00,0x00"/*call next insn*/ "\n\t"
1582 "popl %%edi" "\n\t"
1583 "movl %%edi, 0(%0)" "\n\t"
1584 "movl %%esp, 4(%0)" "\n\t"
1585 "movl %%ebp, 8(%0)" "\n"
1586 : : "r"(&block[0]) : "memory", "edi"
1588 startRegs.xip = TaggedUWord(block[0]);
1589 startRegs.xsp = TaggedUWord(block[1]);
1590 startRegs.xbp = TaggedUWord(block[2]);
1591 const uintptr_t REDZONE_SIZE = 0;
1592 uintptr_t start = block[1] - REDZONE_SIZE;
1593 #elif defined(LUL_PLAT_arm_android)
1594 volatile uintptr_t block[6];
1595 MOZ_ASSERT(sizeof(block) == 24);
1596 __asm__ __volatile__(
1597 "mov r0, r15" "\n\t"
1598 "str r0, [%0, #0]" "\n\t"
1599 "str r14, [%0, #4]" "\n\t"
1600 "str r13, [%0, #8]" "\n\t"
1601 "str r12, [%0, #12]" "\n\t"
1602 "str r11, [%0, #16]" "\n\t"
1603 "str r7, [%0, #20]" "\n"
1604 : : "r"(&block[0]) : "memory", "r0"
1606 startRegs.r15 = TaggedUWord(block[0]);
1607 startRegs.r14 = TaggedUWord(block[1]);
1608 startRegs.r13 = TaggedUWord(block[2]);
1609 startRegs.r12 = TaggedUWord(block[3]);
1610 startRegs.r11 = TaggedUWord(block[4]);
1611 startRegs.r7 = TaggedUWord(block[5]);
1612 const uintptr_t REDZONE_SIZE = 0;
1613 uintptr_t start = block[1] - REDZONE_SIZE;
1614 #else
1615 # error "Unsupported platform"
1616 #endif
1618 // Get hold of the innermost LUL_UNIT_TEST_STACK_SIZE bytes of the
1619 // stack.
1620 uintptr_t end = start + LUL_UNIT_TEST_STACK_SIZE;
1621 uintptr_t ws = sizeof(void*);
1622 start &= ~(ws-1);
1623 end &= ~(ws-1);
1624 uintptr_t nToCopy = end - start;
1625 if (nToCopy > lul::N_STACK_BYTES) {
1626 nToCopy = lul::N_STACK_BYTES;
1628 MOZ_ASSERT(nToCopy <= lul::N_STACK_BYTES);
1629 StackImage* stackImg = new StackImage();
1630 stackImg->mLen = nToCopy;
1631 stackImg->mStartAvma = start;
1632 if (nToCopy > 0) {
1633 MOZ_MAKE_MEM_DEFINED((void*)start, nToCopy);
1634 memcpy(&stackImg->mContents[0], (void*)start, nToCopy);
1637 // Unwind it.
1638 const int MAX_TEST_FRAMES = 64;
1639 uintptr_t framePCs[MAX_TEST_FRAMES];
1640 uintptr_t frameSPs[MAX_TEST_FRAMES];
1641 size_t framesAvail = mozilla::ArrayLength(framePCs);
1642 size_t framesUsed = 0;
1643 size_t scannedFramesAllowed = 0;
1644 size_t scannedFramesAcquired = 0;
1645 aLUL->Unwind( &framePCs[0], &frameSPs[0],
1646 &framesUsed, &scannedFramesAcquired,
1647 framesAvail, scannedFramesAllowed,
1648 &startRegs, stackImg );
1650 delete stackImg;
1652 //if (0) {
1653 // // Show what we have.
1654 // fprintf(stderr, "Got %d frames:\n", (int)framesUsed);
1655 // for (size_t i = 0; i < framesUsed; i++) {
1656 // fprintf(stderr, " [%2d] SP %p PC %p\n",
1657 // (int)i, (void*)frameSPs[i], (void*)framePCs[i]);
1658 // }
1659 // fprintf(stderr, "\n");
1662 // Check to see if there's a consistent binding between digits in
1663 // the director string ('1' .. '8') and the PC values acquired by
1664 // the unwind. If there isn't, the unwinding has failed somehow.
1665 uintptr_t binding[8]; // binding for '1' .. binding for '8'
1666 memset((void*)binding, 0, sizeof(binding));
1668 // The general plan is to work backwards along the director string
1669 // and forwards along the framePCs array. Doing so corresponds to
1670 // working outwards from the innermost frame of the recursive test set.
1671 const char* cursor = dstring;
1673 // Find the end. This leaves |cursor| two bytes past the first
1674 // character we want to look at -- see comment below.
1675 while (*cursor) cursor++;
1677 // Counts the number of consistent frames.
1678 size_t nConsistent = 0;
1680 // Iterate back to the start of the director string. The starting
1681 // points are a bit complex. We can't use framePCs[0] because that
1682 // contains the PC in this frame (above). We can't use framePCs[1]
1683 // because that will contain the PC at return point in the recursive
1684 // test group (TestFn[1-8]) for their call "out" to this function,
1685 // GetAndCheckStackTrace. Although LUL will compute a correct
1686 // return address, that will not be the same return address as for a
1687 // recursive call out of the the function to another function in the
1688 // group. Hence we can only start consistency checking at
1689 // framePCs[2].
1691 // To be consistent, then, we must ignore the last element in the
1692 // director string as that corresponds to framePCs[1]. Hence the
1693 // start points are: framePCs[2] and the director string 2 bytes
1694 // before the terminating zero.
1696 // Also as a result of this, the number of consistent frames counted
1697 // will always be one less than the length of the director string
1698 // (not including its terminating zero).
1699 size_t frameIx;
1700 for (cursor = cursor-2, frameIx = 2;
1701 cursor >= dstring && frameIx < framesUsed;
1702 cursor--, frameIx++) {
1703 char c = *cursor;
1704 uintptr_t pc = framePCs[frameIx];
1705 // If this doesn't hold, the director string is ill-formed.
1706 MOZ_ASSERT(c >= '1' && c <= '8');
1707 int n = ((int)c) - ((int)'1');
1708 if (binding[n] == 0) {
1709 // There's no binding for |c| yet, so install |pc| and carry on.
1710 binding[n] = pc;
1711 nConsistent++;
1712 continue;
1714 // There's a pre-existing binding for |c|. Check it's consistent.
1715 if (binding[n] != pc) {
1716 // Not consistent. Give up now.
1717 break;
1719 // Consistent. Keep going.
1720 nConsistent++;
1723 // So, did we succeed?
1724 bool passed = nConsistent+1 == strlen(dstring);
1726 // Show the results.
1727 char buf[200];
1728 snprintf(buf, sizeof(buf), "LULUnitTest: dstring = %s\n", dstring);
1729 buf[sizeof(buf)-1] = 0;
1730 aLUL->mLog(buf);
1731 snprintf(buf, sizeof(buf),
1732 "LULUnitTest: %d consistent, %d in dstring: %s\n",
1733 (int)nConsistent, (int)strlen(dstring),
1734 passed ? "PASS" : "FAIL");
1735 buf[sizeof(buf)-1] = 0;
1736 aLUL->mLog(buf);
1738 return passed;
1742 // Macro magic to create a set of 8 mutually recursive functions with
1743 // varying frame sizes. These will recurse amongst themselves as
1744 // specified by |strP|, the directory string, and call
1745 // GetAndCheckStackTrace when the string becomes empty, passing it the
1746 // original value of the string. This checks the result, printing
1747 // results on |aLUL|'s logging sink, and also returns a boolean
1748 // indicating whether or not the results are acceptable (correct).
1750 #define DECL_TEST_FN(NAME) \
1751 bool NAME(LUL* aLUL, const char* strPorig, const char* strP);
1753 #define GEN_TEST_FN(NAME, FRAMESIZE) \
1754 bool NAME(LUL* aLUL, const char* strPorig, const char* strP) { \
1755 volatile char space[FRAMESIZE]; \
1756 memset((char*)&space[0], 0, sizeof(space)); \
1757 if (*strP == '\0') { \
1758 /* We've come to the end of the director string. */ \
1759 /* Take a stack snapshot. */ \
1760 return GetAndCheckStackTrace(aLUL, strPorig); \
1761 } else { \
1762 /* Recurse onwards. This is a bit subtle. The obvious */ \
1763 /* thing to do here is call onwards directly, from within the */ \
1764 /* arms of the case statement. That gives a problem in that */ \
1765 /* there will be multiple return points inside each function when */ \
1766 /* unwinding, so it will be difficult to check for consistency */ \
1767 /* against the director string. Instead, we make an indirect */ \
1768 /* call, so as to guarantee that there is only one call site */ \
1769 /* within each function. This does assume that the compiler */ \
1770 /* won't transform it back to the simple direct-call form. */ \
1771 /* To discourage it from doing so, the call is bracketed with */ \
1772 /* __asm__ __volatile__ sections so as to make it not-movable. */ \
1773 bool (*nextFn)(LUL*, const char*, const char*) = NULL; \
1774 switch (*strP) { \
1775 case '1': nextFn = TestFn1; break; \
1776 case '2': nextFn = TestFn2; break; \
1777 case '3': nextFn = TestFn3; break; \
1778 case '4': nextFn = TestFn4; break; \
1779 case '5': nextFn = TestFn5; break; \
1780 case '6': nextFn = TestFn6; break; \
1781 case '7': nextFn = TestFn7; break; \
1782 case '8': nextFn = TestFn8; break; \
1783 default: nextFn = TestFn8; break; \
1785 __asm__ __volatile__("":::"cc","memory"); \
1786 bool passed = nextFn(aLUL, strPorig, strP+1); \
1787 __asm__ __volatile__("":::"cc","memory"); \
1788 return passed; \
1792 // The test functions are mutually recursive, so it is necessary to
1793 // declare them before defining them.
1794 DECL_TEST_FN(TestFn1)
1795 DECL_TEST_FN(TestFn2)
1796 DECL_TEST_FN(TestFn3)
1797 DECL_TEST_FN(TestFn4)
1798 DECL_TEST_FN(TestFn5)
1799 DECL_TEST_FN(TestFn6)
1800 DECL_TEST_FN(TestFn7)
1801 DECL_TEST_FN(TestFn8)
1803 GEN_TEST_FN(TestFn1, 123)
1804 GEN_TEST_FN(TestFn2, 456)
1805 GEN_TEST_FN(TestFn3, 789)
1806 GEN_TEST_FN(TestFn4, 23)
1807 GEN_TEST_FN(TestFn5, 47)
1808 GEN_TEST_FN(TestFn6, 117)
1809 GEN_TEST_FN(TestFn7, 1)
1810 GEN_TEST_FN(TestFn8, 99)
1813 // This starts the test sequence going. Call here to generate a
1814 // sequence of calls as directed by the string |dstring|. The call
1815 // sequence will, from its innermost frame, finish by calling
1816 // GetAndCheckStackTrace() and passing it |dstring|.
1817 // GetAndCheckStackTrace() will unwind the stack, check consistency
1818 // of those results against |dstring|, and print a pass/fail message
1819 // to aLUL's logging sink. It also updates the counters in *aNTests
1820 // and aNTestsPassed.
1821 __attribute__((noinline)) void
1822 TestUnw(/*OUT*/int* aNTests, /*OUT*/int*aNTestsPassed,
1823 LUL* aLUL, const char* dstring)
1825 // Ensure that the stack has at least this much space on it. This
1826 // makes it safe to saw off the top LUL_UNIT_TEST_STACK_SIZE bytes
1827 // and hand it to LUL. Safe in the sense that no segfault can
1828 // happen because the stack is at least this big. This is all
1829 // somewhat dubious in the sense that a sufficiently clever compiler
1830 // (clang, for one) can figure out that space[] is unused and delete
1831 // it from the frame. Hence the somewhat elaborate hoop jumping to
1832 // fill it up before the call and to at least appear to use the
1833 // value afterwards.
1834 int i;
1835 volatile char space[LUL_UNIT_TEST_STACK_SIZE];
1836 for (i = 0; i < LUL_UNIT_TEST_STACK_SIZE; i++) {
1837 space[i] = (char)(i & 0x7F);
1840 // Really run the test.
1841 bool passed = TestFn1(aLUL, dstring, dstring);
1843 // Appear to use space[], by visiting the value to compute some kind
1844 // of checksum, and then (apparently) using the checksum.
1845 int sum = 0;
1846 for (i = 0; i < LUL_UNIT_TEST_STACK_SIZE; i++) {
1847 // If this doesn't fool LLVM, I don't know what will.
1848 sum += space[i] - 3*i;
1850 __asm__ __volatile__("" : : "r"(sum));
1852 // Update the counters.
1853 (*aNTests)++;
1854 if (passed) {
1855 (*aNTestsPassed)++;
1860 void
1861 RunLulUnitTests(/*OUT*/int* aNTests, /*OUT*/int*aNTestsPassed, LUL* aLUL)
1863 aLUL->mLog(":\n");
1864 aLUL->mLog("LULUnitTest: BEGIN\n");
1865 *aNTests = *aNTestsPassed = 0;
1866 TestUnw(aNTests, aNTestsPassed, aLUL, "11111111");
1867 TestUnw(aNTests, aNTestsPassed, aLUL, "11222211");
1868 TestUnw(aNTests, aNTestsPassed, aLUL, "111222333");
1869 TestUnw(aNTests, aNTestsPassed, aLUL, "1212121231212331212121212121212");
1870 TestUnw(aNTests, aNTestsPassed, aLUL, "31415827271828325332173258");
1871 TestUnw(aNTests, aNTestsPassed, aLUL,
1872 "123456781122334455667788777777777777777777777");
1873 aLUL->mLog("LULUnitTest: END\n");
1874 aLUL->mLog(":\n");
1878 } // namespace lul