1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
13 #include <algorithm> // std::sort
16 #include "mozilla/Assertions.h"
17 #include "mozilla/ArrayUtils.h"
18 #include "mozilla/MemoryChecking.h"
20 #include "LulCommonExt.h"
21 #include "LulElfExt.h"
23 #include "LulMainInt.h"
25 // Set this to 1 for verbose logging
35 ////////////////////////////////////////////////////////////////
37 ////////////////////////////////////////////////////////////////
39 // This is a simple RAII class that manages acquisition and release of
40 // LulRWLock reader-writer locks.
42 class AutoLulRWLocker
{
44 enum AcqMode
{ FOR_READING
, FOR_WRITING
};
45 AutoLulRWLocker(LulRWLock
* aRWLock
, AcqMode mode
)
48 if (mode
== FOR_WRITING
) {
64 ////////////////////////////////////////////////////////////////
66 ////////////////////////////////////////////////////////////////
69 NameOf_DW_REG(int16_t aReg
)
72 case DW_REG_CFA
: return "cfa";
73 #if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
74 case DW_REG_INTEL_XBP
: return "xbp";
75 case DW_REG_INTEL_XSP
: return "xsp";
76 case DW_REG_INTEL_XIP
: return "xip";
77 #elif defined(LUL_ARCH_arm)
78 case DW_REG_ARM_R7
: return "r7";
79 case DW_REG_ARM_R11
: return "r11";
80 case DW_REG_ARM_R12
: return "r12";
81 case DW_REG_ARM_R13
: return "r13";
82 case DW_REG_ARM_R14
: return "r14";
83 case DW_REG_ARM_R15
: return "r15";
85 # error "Unsupported arch"
87 default: return "???";
92 ShowRule(const char* aNewReg
, LExpr aExpr
)
95 string res
= string(aNewReg
) + "=";
101 sprintf(buf
, "%s+%d", NameOf_DW_REG(aExpr
.mReg
), (int)aExpr
.mOffset
);
105 sprintf(buf
, "*(%s+%d)", NameOf_DW_REG(aExpr
.mReg
), (int)aExpr
.mOffset
);
116 RuleSet::Print(void(*aLog
)(const char*))
119 sprintf(buf
, "[%llx .. %llx]: let ",
120 (unsigned long long int)mAddr
,
121 (unsigned long long int)(mAddr
+ mLen
- 1));
122 string res
= string(buf
);
123 res
+= ShowRule("cfa", mCfaExpr
);
125 // For each reg we care about, print the recovery expression.
126 #if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
127 res
+= ShowRule(" RA", mXipExpr
);
128 res
+= ShowRule(" SP", mXspExpr
);
129 res
+= ShowRule(" BP", mXbpExpr
);
130 #elif defined(LUL_ARCH_arm)
131 res
+= ShowRule(" R15", mR15expr
);
132 res
+= ShowRule(" R7", mR7expr
);
133 res
+= ShowRule(" R11", mR11expr
);
134 res
+= ShowRule(" R12", mR12expr
);
135 res
+= ShowRule(" R13", mR13expr
);
136 res
+= ShowRule(" R14", mR14expr
);
138 # error "Unsupported arch"
144 RuleSet::ExprForRegno(DW_REG_NUMBER aRegno
) {
146 case DW_REG_CFA
: return &mCfaExpr
;
147 # if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
148 case DW_REG_INTEL_XIP
: return &mXipExpr
;
149 case DW_REG_INTEL_XSP
: return &mXspExpr
;
150 case DW_REG_INTEL_XBP
: return &mXbpExpr
;
151 # elif defined(LUL_ARCH_arm)
152 case DW_REG_ARM_R15
: return &mR15expr
;
153 case DW_REG_ARM_R14
: return &mR14expr
;
154 case DW_REG_ARM_R13
: return &mR13expr
;
155 case DW_REG_ARM_R12
: return &mR12expr
;
156 case DW_REG_ARM_R11
: return &mR11expr
;
157 case DW_REG_ARM_R7
: return &mR7expr
;
159 # error "Unknown arch"
161 default: return nullptr;
169 // The only other fields are of type LExpr and those are initialised
170 // by LExpr::LExpr().
174 ////////////////////////////////////////////////////////////////
176 ////////////////////////////////////////////////////////////////
178 // See header file LulMainInt.h for comments about invariants.
180 SecMap::SecMap(void(*aLog
)(const char*))
192 SecMap::FindRuleSet(uintptr_t ia
) {
193 // Binary search mRuleSets to find one that brackets |ia|.
194 // lo and hi need to be signed, else the loop termination tests
195 // don't work properly. Note that this works correctly even when
196 // mRuleSets.size() == 0.
198 // Can't do this until the array has been sorted and preened.
202 long int hi
= (long int)mRuleSets
.size() - 1;
204 // current unsearched space is from lo to hi, inclusive.
209 long int mid
= lo
+ ((hi
- lo
) / 2);
210 RuleSet
* mid_ruleSet
= &mRuleSets
[mid
];
211 uintptr_t mid_minAddr
= mid_ruleSet
->mAddr
;
212 uintptr_t mid_maxAddr
= mid_minAddr
+ mid_ruleSet
->mLen
- 1;
213 if (ia
< mid_minAddr
) { hi
= mid
-1; continue; }
214 if (ia
> mid_maxAddr
) { lo
= mid
+1; continue; }
215 MOZ_ASSERT(mid_minAddr
<= ia
&& ia
<= mid_maxAddr
);
221 // Add a RuleSet to the collection. The rule is copied in. Calling
222 // this makes the map non-searchable.
224 SecMap::AddRuleSet(RuleSet
* rs
) {
226 mRuleSets
.push_back(*rs
);
231 CmpRuleSetsByAddrLE(const RuleSet
& rs1
, const RuleSet
& rs2
) {
232 return rs1
.mAddr
< rs2
.mAddr
;
235 // Prepare the map for searching. Completely remove any which don't
236 // fall inside the specified range [start, +len).
238 SecMap::PrepareRuleSets(uintptr_t aStart
, size_t aLen
)
240 if (mRuleSets
.empty()) {
244 MOZ_ASSERT(aLen
> 0);
246 // This should never happen.
251 // Sort by start addresses.
252 std::sort(mRuleSets
.begin(), mRuleSets
.end(), CmpRuleSetsByAddrLE
);
254 // Detect any entry not completely contained within [start, +len).
255 // Set its length to zero, so that the next pass will remove it.
256 for (size_t i
= 0; i
< mRuleSets
.size(); ++i
) {
257 RuleSet
* rs
= &mRuleSets
[i
];
259 (rs
->mAddr
< aStart
|| rs
->mAddr
+ rs
->mLen
> aStart
+ aLen
)) {
264 // Iteratively truncate any overlaps and remove any zero length
265 // entries that might result, or that may have been present
266 // initially. Unless the input is seriously screwy, this is
267 // expected to iterate only once.
270 size_t n
= mRuleSets
.size();
277 for (i
= 1; i
< n
; ++i
) {
278 RuleSet
* prev
= &mRuleSets
[i
-1];
279 RuleSet
* here
= &mRuleSets
[i
];
280 MOZ_ASSERT(prev
->mAddr
<= here
->mAddr
);
281 if (prev
->mAddr
+ prev
->mLen
> here
->mAddr
) {
282 prev
->mLen
= here
->mAddr
- prev
->mAddr
;
288 if (mRuleSets
[n
-1].mLen
== 0) {
292 // At this point, the entries are in-order and non-overlapping.
293 // If none of them are zero-length, we are done.
298 // Slide back the entries to remove the zero length ones.
299 size_t j
= 0; // The write-point.
300 for (i
= 0; i
< n
; ++i
) {
301 if (mRuleSets
[i
].mLen
== 0) {
304 if (j
!= i
) mRuleSets
[j
] = mRuleSets
[i
];
308 MOZ_ASSERT(nZeroLen
<= n
);
309 MOZ_ASSERT(j
== n
- nZeroLen
);
310 while (nZeroLen
> 0) {
311 mRuleSets
.pop_back();
315 MOZ_ASSERT(mRuleSets
.size() == j
);
318 size_t n
= mRuleSets
.size();
321 // Do a final check on the rules: their address ranges must be
322 // ascending, non overlapping, non zero sized.
324 MOZ_ASSERT(mRuleSets
[0].mLen
> 0);
325 for (size_t i
= 1; i
< n
; ++i
) {
326 RuleSet
* prev
= &mRuleSets
[i
-1];
327 RuleSet
* here
= &mRuleSets
[i
];
328 MOZ_ASSERT(prev
->mAddr
< here
->mAddr
);
329 MOZ_ASSERT(here
->mLen
> 0);
330 MOZ_ASSERT(prev
->mAddr
+ prev
->mLen
<= here
->mAddr
);
335 // Set the summary min and max address values.
337 // Use the values defined in comments in the class declaration.
341 mSummaryMinAddr
= mRuleSets
[0].mAddr
;
342 mSummaryMaxAddr
= mRuleSets
[n
-1].mAddr
+ mRuleSets
[n
-1].mLen
- 1;
345 snprintf(buf
, sizeof(buf
),
346 "PrepareRuleSets: %d entries, smin/smax 0x%llx, 0x%llx\n",
347 (int)n
, (unsigned long long int)mSummaryMinAddr
,
348 (unsigned long long int)mSummaryMaxAddr
);
349 buf
[sizeof(buf
)-1] = 0;
352 // Is now usable for binary search.
356 mLog("\nRulesets after preening\n");
357 for (size_t i
= 0; i
< mRuleSets
.size(); ++i
) {
358 mRuleSets
[i
].Print(mLog
);
365 bool SecMap::IsEmpty() {
366 return mRuleSets
.empty();
370 ////////////////////////////////////////////////////////////////
372 ////////////////////////////////////////////////////////////////
374 // A SegArray holds a set of address ranges that together exactly
375 // cover an address range, with no overlaps or holes. Each range has
376 // an associated value, which in this case has been specialised to be
377 // a simple boolean. The representation is kept to minimal canonical
378 // form in which adjacent ranges with the same associated value are
379 // merged together. Each range is represented by a |struct Seg|.
381 // SegArrays are used to keep track of which parts of the address
382 // space are known to contain instructions.
386 void add(uintptr_t lo
, uintptr_t hi
, bool val
) {
391 if (hi
< UINTPTR_MAX
) {
394 std::vector
<Seg
>::size_type iLo
, iHi
, i
;
397 for (i
= iLo
; i
<= iHi
; ++i
) {
403 bool getBoundingCodeSegment(/*OUT*/uintptr_t* rx_min
,
404 /*OUT*/uintptr_t* rx_max
, uintptr_t addr
) {
405 std::vector
<Seg
>::size_type i
= find(addr
);
409 *rx_min
= mSegs
[i
].lo
;
410 *rx_max
= mSegs
[i
].hi
;
415 Seg
s(0, UINTPTR_MAX
, false);
421 Seg(uintptr_t lo
, uintptr_t hi
, bool val
) : lo(lo
), hi(hi
), val(val
) {}
428 for (std::vector
<Seg
>::iterator iter
= mSegs
.begin();
429 iter
< mSegs
.end()-1;
431 if (iter
[0].val
!= iter
[1].val
) {
434 iter
[0].hi
= iter
[1].hi
;
436 // Back up one, so as not to miss an opportunity to merge
437 // with the entry after this one.
442 std::vector
<Seg
>::size_type
find(uintptr_t a
) {
444 long int hi
= (long int)mSegs
.size();
446 // The unsearched space is lo .. hi inclusive.
448 // Not found. This can't happen.
449 return (std::vector
<Seg
>::size_type
)(-1);
451 long int mid
= lo
+ ((hi
- lo
) / 2);
452 uintptr_t mid_lo
= mSegs
[mid
].lo
;
453 uintptr_t mid_hi
= mSegs
[mid
].hi
;
454 if (a
< mid_lo
) { hi
= mid
-1; continue; }
455 if (a
> mid_hi
) { lo
= mid
+1; continue; }
456 return (std::vector
<Seg
>::size_type
)mid
;
460 void split_at(uintptr_t a
) {
461 std::vector
<Seg
>::size_type i
= find(a
);
462 if (mSegs
[i
].lo
== a
) {
465 mSegs
.insert( mSegs
.begin()+i
+1, mSegs
[i
] );
471 printf("<< %d entries:\n", (int)mSegs
.size());
472 for (std::vector
<Seg
>::iterator iter
= mSegs
.begin();
475 printf(" %016llx %016llx %s\n",
476 (unsigned long long int)(*iter
).lo
,
477 (unsigned long long int)(*iter
).hi
,
478 (*iter
).val
? "true" : "false");
483 std::vector
<Seg
> mSegs
;
487 ////////////////////////////////////////////////////////////////
489 ////////////////////////////////////////////////////////////////
493 PriMap(void (*aLog
)(const char*))
498 for (std::vector
<SecMap
*>::iterator iter
= mSecMaps
.begin();
499 iter
!= mSecMaps
.end();
506 // This can happen with the global lock held for reading.
507 RuleSet
* Lookup(uintptr_t ia
) {
508 SecMap
* sm
= FindSecMap(ia
);
509 return sm
? sm
->FindRuleSet(ia
) : nullptr;
512 // Add a secondary map. No overlaps allowed w.r.t. existing
513 // secondary maps. Global lock must be held for writing.
514 void AddSecMap(SecMap
* aSecMap
) {
515 // We can't add an empty SecMap to the PriMap. But that's OK
516 // since we'd never be able to find anything in it anyway.
517 if (aSecMap
->IsEmpty()) {
521 // Iterate through the SecMaps and find the right place for this
522 // one. At the same time, ensure that the in-order
523 // non-overlapping invariant is preserved (and, generally, holds).
524 // FIXME: this gives a cost that is O(N^2) in the total number of
525 // shared objects in the system. ToDo: better.
526 MOZ_ASSERT(aSecMap
->mSummaryMinAddr
<= aSecMap
->mSummaryMaxAddr
);
528 size_t num_secMaps
= mSecMaps
.size();
530 for (i
= 0; i
< num_secMaps
; ++i
) {
531 SecMap
* sm_i
= mSecMaps
[i
];
532 MOZ_ASSERT(sm_i
->mSummaryMinAddr
<= sm_i
->mSummaryMaxAddr
);
533 if (aSecMap
->mSummaryMinAddr
< sm_i
->mSummaryMaxAddr
) {
534 // |aSecMap| needs to be inserted immediately before mSecMaps[i].
538 MOZ_ASSERT(i
<= num_secMaps
);
539 if (i
== num_secMaps
) {
540 // It goes at the end.
541 mSecMaps
.push_back(aSecMap
);
543 std::vector
<SecMap
*>::iterator iter
= mSecMaps
.begin() + i
;
544 mSecMaps
.insert(iter
, aSecMap
);
547 snprintf(buf
, sizeof(buf
), "AddSecMap: now have %d SecMaps\n",
548 (int)mSecMaps
.size());
549 buf
[sizeof(buf
)-1] = 0;
553 // Remove and delete any SecMaps in the mapping, that intersect
554 // with the specified address range.
555 void RemoveSecMapsInRange(uintptr_t avma_min
, uintptr_t avma_max
) {
556 MOZ_ASSERT(avma_min
<= avma_max
);
557 size_t num_secMaps
= mSecMaps
.size();
558 if (num_secMaps
> 0) {
560 // Iterate from end to start over the vector, so as to ensure
561 // that the special case where |avma_min| and |avma_max| denote
562 // the entire address space, can be completed in time proportional
563 // to the number of elements in the map.
564 for (i
= (intptr_t)num_secMaps
-1; i
>= 0; i
--) {
565 SecMap
* sm_i
= mSecMaps
[i
];
566 if (sm_i
->mSummaryMaxAddr
< avma_min
||
567 avma_max
< sm_i
->mSummaryMinAddr
) {
568 // There's no overlap. Move on.
571 // We need to remove mSecMaps[i] and slide all those above it
572 // downwards to cover the hole.
573 mSecMaps
.erase(mSecMaps
.begin() + i
);
579 // Return the number of currently contained SecMaps.
580 size_t CountSecMaps() {
581 return mSecMaps
.size();
584 // Assess heuristically whether the given address is an instruction
585 // immediately following a call instruction. The caller is required
586 // to hold the global lock for reading.
587 bool MaybeIsReturnPoint(TaggedUWord aInstrAddr
, SegArray
* aSegArray
) {
588 if (!aInstrAddr
.Valid()) {
592 uintptr_t ia
= aInstrAddr
.Value();
594 // Assume that nobody would be crazy enough to put code in the
595 // first or last page.
596 if (ia
< 4096 || ((uintptr_t)(-ia
)) < 4096) {
600 // See if it falls inside a known r-x mapped area. Poking around
601 // outside such places risks segfaulting.
602 uintptr_t insns_min
, insns_max
;
603 bool b
= aSegArray
->getBoundingCodeSegment(&insns_min
, &insns_max
, ia
);
605 // no code (that we know about) at this address
609 // |ia| falls within an r-x range. So we can
610 // safely poke around in [insns_min, insns_max].
612 #if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
613 // Is the previous instruction recognisably a CALL? This is
614 // common for the 32- and 64-bit versions, except for the
615 // simm32(%rip) case, which is 64-bit only.
617 // For all other cases, the 64 bit versions are either identical
618 // to the 32 bit versions, or have an optional extra leading REX.W
619 // byte (0x41). Since the extra 0x41 is optional we have to
620 // ignore it, with the convenient result that the same matching
621 // logic works for both 32- and 64-bit cases.
623 uint8_t* p
= (uint8_t*)ia
;
624 # if defined(LUL_ARCH_x64)
625 // CALL simm32(%rip) == FF15 simm32
626 if (ia
- 6 >= insns_min
&& p
[-6] == 0xFF && p
[-5] == 0x15) {
630 // CALL rel32 == E8 rel32 (both 32- and 64-bit)
631 if (ia
- 5 >= insns_min
&& p
[-5] == 0xE8) {
634 // CALL *%eax .. CALL *%edi == FFD0 .. FFD7 (32-bit)
635 // CALL *%rax .. CALL *%rdi == FFD0 .. FFD7 (64-bit)
636 // CALL *%r8 .. CALL *%r15 == 41FFD0 .. 41FFD7 (64-bit)
637 if (ia
- 2 >= insns_min
&&
638 p
[-2] == 0xFF && p
[-1] >= 0xD0 && p
[-1] <= 0xD7) {
641 // Almost all of the remaining cases that occur in practice are
642 // of the form CALL *simm8(reg) or CALL *simm32(reg).
646 // call *simm8(%rax) FF50 simm8
647 // call *simm8(%rcx) FF51 simm8
648 // call *simm8(%rdx) FF52 simm8
649 // call *simm8(%rbx) FF53 simm8
650 // call *simm8(%rsp) FF5424 simm8
651 // call *simm8(%rbp) FF55 simm8
652 // call *simm8(%rsi) FF56 simm8
653 // call *simm8(%rdi) FF57 simm8
655 // call *simm8(%r8) 41FF50 simm8
656 // call *simm8(%r9) 41FF51 simm8
657 // call *simm8(%r10) 41FF52 simm8
658 // call *simm8(%r11) 41FF53 simm8
659 // call *simm8(%r12) 41FF5424 simm8
660 // call *simm8(%r13) 41FF55 simm8
661 // call *simm8(%r14) 41FF56 simm8
662 // call *simm8(%r15) 41FF57 simm8
664 // call *simm32(%rax) FF90 simm32
665 // call *simm32(%rcx) FF91 simm32
666 // call *simm32(%rdx) FF92 simm32
667 // call *simm32(%rbx) FF93 simm32
668 // call *simm32(%rsp) FF9424 simm32
669 // call *simm32(%rbp) FF95 simm32
670 // call *simm32(%rsi) FF96 simm32
671 // call *simm32(%rdi) FF97 simm32
673 // call *simm32(%r8) 41FF90 simm32
674 // call *simm32(%r9) 41FF91 simm32
675 // call *simm32(%r10) 41FF92 simm32
676 // call *simm32(%r11) 41FF93 simm32
677 // call *simm32(%r12) 41FF9424 simm32
678 // call *simm32(%r13) 41FF95 simm32
679 // call *simm32(%r14) 41FF96 simm32
680 // call *simm32(%r15) 41FF97 simm32
684 // call *simm8(%eax) FF50 simm8
685 // call *simm8(%ecx) FF51 simm8
686 // call *simm8(%edx) FF52 simm8
687 // call *simm8(%ebx) FF53 simm8
688 // call *simm8(%esp) FF5424 simm8
689 // call *simm8(%ebp) FF55 simm8
690 // call *simm8(%esi) FF56 simm8
691 // call *simm8(%edi) FF57 simm8
693 // call *simm32(%eax) FF90 simm32
694 // call *simm32(%ecx) FF91 simm32
695 // call *simm32(%edx) FF92 simm32
696 // call *simm32(%ebx) FF93 simm32
697 // call *simm32(%esp) FF9424 simm32
698 // call *simm32(%ebp) FF95 simm32
699 // call *simm32(%esi) FF96 simm32
700 // call *simm32(%edi) FF97 simm32
701 if (ia
- 3 >= insns_min
&&
703 (p
[-2] >= 0x50 && p
[-2] <= 0x57 && p
[-2] != 0x54)) {
704 // imm8 case, not including %esp/%rsp
707 if (ia
- 4 >= insns_min
&&
708 p
[-4] == 0xFF && p
[-3] == 0x54 && p
[-2] == 0x24) {
709 // imm8 case for %esp/%rsp
712 if (ia
- 6 >= insns_min
&&
714 (p
[-5] >= 0x90 && p
[-5] <= 0x97 && p
[-5] != 0x94)) {
715 // imm32 case, not including %esp/%rsp
718 if (ia
- 7 >= insns_min
&&
719 p
[-7] == 0xFF && p
[-6] == 0x94 && p
[-5] == 0x24) {
720 // imm32 case for %esp/%rsp
724 #elif defined(LUL_ARCH_arm)
726 uint16_t w0
= 0, w1
= 0;
727 // The return address has its lowest bit set, indicating a return
730 if (ia
- 2 >= insns_min
&& ia
- 1 <= insns_max
) {
731 w1
= *(uint16_t*)(ia
- 2);
733 if (ia
- 4 >= insns_min
&& ia
- 1 <= insns_max
) {
734 w0
= *(uint16_t*)(ia
- 4);
736 // Is it a 32-bit Thumb call insn?
737 // BL simm26 (Encoding T1)
738 if ((w0
& 0xF800) == 0xF000 && (w1
& 0xC000) == 0xC000) {
741 // BLX simm26 (Encoding T2)
742 if ((w0
& 0xF800) == 0xF000 && (w1
& 0xC000) == 0xC000) {
745 // Other possible cases:
746 // (BLX Rm, Encoding T1).
747 // BLX Rm (encoding T1, 16 bit, inspect w1 and ignore w0.)
748 // 0100 0111 1 Rm 000
750 // Returning to ARM code.
752 if ((ia
& 3) == 0 && ia
- 4 >= insns_min
&& ia
- 1 <= insns_max
) {
753 a0
= *(uint32_t*)(ia
- 4);
755 // Leading E forces unconditional only -- fix. It could be
756 // anything except F, which is the deprecated NV code.
757 // BL simm26 (Encoding A1)
758 if ((a0
& 0xFF000000) == 0xEB000000) {
761 // Other possible cases:
762 // BLX simm26 (Encoding A2)
763 //if ((a0 & 0xFE000000) == 0xFA000000)
765 // BLX (register) (A1): BLX <c> <Rm>
766 // cond 0001 0010 1111 1111 1111 0011 Rm
767 // again, cond can be anything except NV (0xF)
771 # error "Unsupported arch"
774 // Not an insn we recognise.
779 // FindSecMap's caller must hold the global lock for reading or writing.
780 SecMap
* FindSecMap(uintptr_t ia
) {
781 // Binary search mSecMaps to find one that brackets |ia|.
782 // lo and hi need to be signed, else the loop termination tests
783 // don't work properly.
785 long int hi
= (long int)mSecMaps
.size() - 1;
787 // current unsearched space is from lo to hi, inclusive.
792 long int mid
= lo
+ ((hi
- lo
) / 2);
793 SecMap
* mid_secMap
= mSecMaps
[mid
];
794 uintptr_t mid_minAddr
= mid_secMap
->mSummaryMinAddr
;
795 uintptr_t mid_maxAddr
= mid_secMap
->mSummaryMaxAddr
;
796 if (ia
< mid_minAddr
) { hi
= mid
-1; continue; }
797 if (ia
> mid_maxAddr
) { lo
= mid
+1; continue; }
798 MOZ_ASSERT(mid_minAddr
<= ia
&& ia
<= mid_maxAddr
);
805 // sorted array of per-object ranges, non overlapping, non empty
806 std::vector
<SecMap
*> mSecMaps
;
808 // a logging sink, for debugging.
809 void (*mLog
)(const char*);
813 ////////////////////////////////////////////////////////////////
815 ////////////////////////////////////////////////////////////////
817 // This is the thread-local cache. It maps individual insn AVMAs to
818 // the associated CFI record, which live in LUL::mPriMap.
820 // The cache is a direct map hash table indexed by address.
821 // It has to distinguish 3 cases:
823 // (1) .mRSet == (RuleSet*)0 ==> cache slot not in use
824 // (2) .mRSet == (RuleSet*)1 ==> slot in use, no RuleSet avail
825 // (3) .mRSet > (RuleSet*)1 ==> slot in use, RuleSet* available
827 // Distinguishing between (2) and (3) is important, because if we look
828 // up an address in LUL::mPriMap and find there is no RuleSet, then
829 // that fact needs to cached here, so as to avoid potentially
830 // expensive repeat lookups.
832 // A CFICacheEntry::mRSet value of zero indicates that the slot is not
833 // in use, and a value of one indicates that the slot is in use but
834 // there is no RuleSet available.
835 #define ENTRY_NOT_IN_USE ((RuleSet*)0)
836 #define NO_RULESET_AVAILABLE ((RuleSet*)1)
841 CFICache(PriMap
* aPriMap
) {
847 for (int i
= 0; i
< N_ENTRIES
; ++i
) {
849 mCache
[i
].mRSet
= ENTRY_NOT_IN_USE
;
853 RuleSet
* Lookup(uintptr_t ia
) {
854 uintptr_t hash
= ia
% (uintptr_t)N_ENTRIES
;
855 CFICacheEntry
* ce
= &mCache
[hash
];
856 if (ce
->mAVMA
== ia
) {
857 // The cache has an entry for |ia|. Interpret it.
858 if (ce
->mRSet
> NO_RULESET_AVAILABLE
) {
859 // There's a RuleSet. So return it.
862 if (ce
->mRSet
== NO_RULESET_AVAILABLE
) {
863 // There's no RuleSet for this address. Don't update
864 // the cache, since we might get queried again.
867 // Otherwise, the slot is not in use. Fall through to
871 // The cache entry is for some other address, or is not in use.
872 // Update it. If it can be found in the priMap then install it
873 // as-is. Else put NO_RULESET_AVAILABLE in, so as to indicate
874 // there's no info for this address.
875 RuleSet
* fallback
= mPriMap
->Lookup(ia
);
876 mCache
[hash
].mAVMA
= ia
;
877 mCache
[hash
].mRSet
= fallback
? fallback
: NO_RULESET_AVAILABLE
;
882 // This should be a prime number.
883 static const int N_ENTRIES
= 509;
885 // See comment above for the meaning of these entries.
886 struct CFICacheEntry
{
887 uintptr_t mAVMA
; // AVMA of the associated instruction
888 RuleSet
* mRSet
; // RuleSet* for the instruction
890 CFICacheEntry mCache
[N_ENTRIES
];
892 // Need to have a pointer to the PriMap, so as to be able
893 // to service misses.
897 #undef ENTRY_NOT_IN_USE
898 #undef NO_RULESET_AVAILABLE
901 ////////////////////////////////////////////////////////////////
903 ////////////////////////////////////////////////////////////////
905 LUL::LUL(void (*aLog
)(const char*))
907 mRWlock
= new LulRWLock();
908 AutoLulRWLocker
lock(mRWlock
, AutoLulRWLocker::FOR_WRITING
);
910 mPriMap
= new PriMap(aLog
);
911 mSegArray
= new SegArray();
917 // The auto-locked section must have its own scope, so that the
918 // unlock is performed before the mRWLock is deleted.
920 AutoLulRWLocker
lock(mRWlock
, AutoLulRWLocker::FOR_WRITING
);
921 for (std::map
<pthread_t
,CFICache
*>::iterator iter
= mCaches
.begin();
922 iter
!= mCaches
.end();
930 // Now we don't hold the lock. Hence it is safe to delete it.
936 LUL::RegisterUnwinderThread()
938 AutoLulRWLocker
lock(mRWlock
, AutoLulRWLocker::FOR_WRITING
);
940 pthread_t me
= pthread_self();
941 CFICache
* cache
= new CFICache(mPriMap
);
943 std::pair
<std::map
<pthread_t
,CFICache
*>::iterator
, bool> res
944 = mCaches
.insert(std::pair
<pthread_t
,CFICache
*>(me
, cache
));
945 // "this thread is not already registered"
946 MOZ_ASSERT(res
.second
); // "new element was inserted"
947 // Using mozilla::DebugOnly to declare |res| leads to compilation error
952 LUL::NotifyAfterMap(uintptr_t aRXavma
, size_t aSize
,
953 const char* aFileName
, const void* aMappedImage
)
955 AutoLulRWLocker
lock(mRWlock
, AutoLulRWLocker::FOR_WRITING
);
959 snprintf(buf
, sizeof(buf
), "NotifyMap %llx %llu %s\n",
960 (unsigned long long int)aRXavma
, (unsigned long long int)aSize
,
962 buf
[sizeof(buf
)-1] = 0;
965 InvalidateCFICaches();
967 // Ignore obviously-stupid notifications.
970 // Here's a new mapping, for this object.
971 SecMap
* smap
= new SecMap(mLog
);
973 // Read CFI or EXIDX unwind data into |smap|.
975 (void)lul::ReadSymbolData(
976 string(aFileName
), std::vector
<string
>(), smap
,
977 (void*)aRXavma
, mLog
);
979 (void)lul::ReadSymbolDataInternal(
980 (const uint8_t*)aMappedImage
,
981 string(aFileName
), std::vector
<string
>(), smap
,
982 (void*)aRXavma
, mLog
);
985 mLog("NotifyMap .. preparing entries\n");
987 smap
->PrepareRuleSets(aRXavma
, aSize
);
989 snprintf(buf
, sizeof(buf
),
990 "NotifyMap got %lld entries\n", (long long int)smap
->Size());
991 buf
[sizeof(buf
)-1] = 0;
994 // Add it to the primary map (the top level set of mapped objects).
995 mPriMap
->AddSecMap(smap
);
997 // Tell the segment array about the mapping, so that the stack
998 // scan and __kernel_syscall mechanisms know where valid code is.
999 mSegArray
->add(aRXavma
, aRXavma
+ aSize
- 1, true);
1005 LUL::NotifyExecutableArea(uintptr_t aRXavma
, size_t aSize
)
1007 AutoLulRWLocker
lock(mRWlock
, AutoLulRWLocker::FOR_WRITING
);
1011 snprintf(buf
, sizeof(buf
), "NotifyExecutableArea %llx %llu\n",
1012 (unsigned long long int)aRXavma
, (unsigned long long int)aSize
);
1013 buf
[sizeof(buf
)-1] = 0;
1016 InvalidateCFICaches();
1018 // Ignore obviously-stupid notifications.
1020 // Tell the segment array about the mapping, so that the stack
1021 // scan and __kernel_syscall mechanisms know where valid code is.
1022 mSegArray
->add(aRXavma
, aRXavma
+ aSize
- 1, true);
1028 LUL::NotifyBeforeUnmap(uintptr_t aRXavmaMin
, uintptr_t aRXavmaMax
)
1030 AutoLulRWLocker
lock(mRWlock
, AutoLulRWLocker::FOR_WRITING
);
1034 snprintf(buf
, sizeof(buf
), "NotifyUnmap %016llx-%016llx\n",
1035 (unsigned long long int)aRXavmaMin
,
1036 (unsigned long long int)aRXavmaMax
);
1037 buf
[sizeof(buf
)-1] = 0;
1040 MOZ_ASSERT(aRXavmaMin
<= aRXavmaMax
);
1042 InvalidateCFICaches();
1044 // Remove from the primary map, any secondary maps that intersect
1045 // with the address range. Also delete the secondary maps.
1046 mPriMap
->RemoveSecMapsInRange(aRXavmaMin
, aRXavmaMax
);
1048 // Tell the segment array that the address range no longer
1049 // contains valid code.
1050 mSegArray
->add(aRXavmaMin
, aRXavmaMax
, false);
1052 snprintf(buf
, sizeof(buf
), "NotifyUnmap: now have %d SecMaps\n",
1053 (int)mPriMap
->CountSecMaps());
1054 buf
[sizeof(buf
)-1] = 0;
1060 LUL::CountMappings()
1062 AutoLulRWLocker
lock(mRWlock
, AutoLulRWLocker::FOR_WRITING
);
1063 return mPriMap
->CountSecMaps();
1068 TaggedUWord
DerefTUW(TaggedUWord aAddr
, StackImage
* aStackImg
)
1070 if (!aAddr
.Valid()) {
1071 return TaggedUWord();
1073 if (aAddr
.Value() < aStackImg
->mStartAvma
) {
1074 return TaggedUWord();
1076 if (aAddr
.Value() + sizeof(uintptr_t) > aStackImg
->mStartAvma
1077 + aStackImg
->mLen
) {
1078 return TaggedUWord();
1080 return TaggedUWord(*(uintptr_t*)(aStackImg
->mContents
+ aAddr
.Value()
1081 - aStackImg
->mStartAvma
));
1085 TaggedUWord
EvaluateReg(int16_t aReg
, UnwindRegs
* aOldRegs
, TaggedUWord aCFA
)
1088 case DW_REG_CFA
: return aCFA
;
1089 #if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
1090 case DW_REG_INTEL_XBP
: return aOldRegs
->xbp
;
1091 case DW_REG_INTEL_XSP
: return aOldRegs
->xsp
;
1092 case DW_REG_INTEL_XIP
: return aOldRegs
->xip
;
1093 #elif defined(LUL_ARCH_arm)
1094 case DW_REG_ARM_R7
: return aOldRegs
->r7
;
1095 case DW_REG_ARM_R11
: return aOldRegs
->r11
;
1096 case DW_REG_ARM_R12
: return aOldRegs
->r12
;
1097 case DW_REG_ARM_R13
: return aOldRegs
->r13
;
1098 case DW_REG_ARM_R14
: return aOldRegs
->r14
;
1099 case DW_REG_ARM_R15
: return aOldRegs
->r15
;
1101 # error "Unsupported arch"
1103 default: MOZ_ASSERT(0); return TaggedUWord();
1108 TaggedUWord
EvaluateExpr(LExpr aExpr
, UnwindRegs
* aOldRegs
,
1109 TaggedUWord aCFA
, StackImage
* aStackImg
)
1111 switch (aExpr
.mHow
) {
1112 case LExpr::UNKNOWN
:
1113 return TaggedUWord();
1114 case LExpr::NODEREF
: {
1115 TaggedUWord tuw
= EvaluateReg(aExpr
.mReg
, aOldRegs
, aCFA
);
1116 tuw
.Add(TaggedUWord((intptr_t)aExpr
.mOffset
));
1119 case LExpr::DEREF
: {
1120 TaggedUWord tuw
= EvaluateReg(aExpr
.mReg
, aOldRegs
, aCFA
);
1121 tuw
.Add(TaggedUWord((intptr_t)aExpr
.mOffset
));
1122 return DerefTUW(tuw
, aStackImg
);
1126 return TaggedUWord();
1131 void UseRuleSet(/*MOD*/UnwindRegs
* aRegs
,
1132 StackImage
* aStackImg
, RuleSet
* aRS
)
1134 // Take a copy of regs, since we'll need to refer to the old values
1135 // whilst computing the new ones.
1136 UnwindRegs old_regs
= *aRegs
;
1138 // Mark all the current register values as invalid, so that the
1139 // caller can see, on our return, which ones have been computed
1140 // anew. If we don't even manage to compute a new PC value, then
1141 // the caller will have to abandon the unwind.
1142 // FIXME: Create and use instead: aRegs->SetAllInvalid();
1143 #if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
1144 aRegs
->xbp
= TaggedUWord();
1145 aRegs
->xsp
= TaggedUWord();
1146 aRegs
->xip
= TaggedUWord();
1147 #elif defined(LUL_ARCH_arm)
1148 aRegs
->r7
= TaggedUWord();
1149 aRegs
->r11
= TaggedUWord();
1150 aRegs
->r12
= TaggedUWord();
1151 aRegs
->r13
= TaggedUWord();
1152 aRegs
->r14
= TaggedUWord();
1153 aRegs
->r15
= TaggedUWord();
1155 # error "Unsupported arch"
1158 // This is generally useful.
1159 const TaggedUWord inval
= TaggedUWord();
1161 // First, compute the CFA.
1162 TaggedUWord cfa
= EvaluateExpr(aRS
->mCfaExpr
, &old_regs
,
1163 inval
/*old cfa*/, aStackImg
);
1165 // If we didn't manage to compute the CFA, well .. that's ungood,
1166 // but keep going anyway. It'll be OK provided none of the register
1167 // value rules mention the CFA. In any case, compute the new values
1168 // for each register that we're tracking.
1170 #if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
1171 aRegs
->xbp
= EvaluateExpr(aRS
->mXbpExpr
, &old_regs
, cfa
, aStackImg
);
1172 aRegs
->xsp
= EvaluateExpr(aRS
->mXspExpr
, &old_regs
, cfa
, aStackImg
);
1173 aRegs
->xip
= EvaluateExpr(aRS
->mXipExpr
, &old_regs
, cfa
, aStackImg
);
1174 #elif defined(LUL_ARCH_arm)
1175 aRegs
->r7
= EvaluateExpr(aRS
->mR7expr
, &old_regs
, cfa
, aStackImg
);
1176 aRegs
->r11
= EvaluateExpr(aRS
->mR11expr
, &old_regs
, cfa
, aStackImg
);
1177 aRegs
->r12
= EvaluateExpr(aRS
->mR12expr
, &old_regs
, cfa
, aStackImg
);
1178 aRegs
->r13
= EvaluateExpr(aRS
->mR13expr
, &old_regs
, cfa
, aStackImg
);
1179 aRegs
->r14
= EvaluateExpr(aRS
->mR14expr
, &old_regs
, cfa
, aStackImg
);
1180 aRegs
->r15
= EvaluateExpr(aRS
->mR15expr
, &old_regs
, cfa
, aStackImg
);
1182 # error "Unsupported arch"
1185 // We're done. Any regs for which we didn't manage to compute a
1186 // new value will now be marked as invalid.
1190 LUL::Unwind(/*OUT*/uintptr_t* aFramePCs
,
1191 /*OUT*/uintptr_t* aFrameSPs
,
1192 /*OUT*/size_t* aFramesUsed
,
1193 /*OUT*/size_t* aScannedFramesAcquired
,
1194 size_t aFramesAvail
,
1195 size_t aScannedFramesAllowed
,
1196 UnwindRegs
* aStartRegs
, StackImage
* aStackImg
)
1198 AutoLulRWLocker
lock(mRWlock
, AutoLulRWLocker::FOR_READING
);
1200 pthread_t me
= pthread_self();
1201 std::map
<pthread_t
, CFICache
*>::iterator iter
= mCaches
.find(me
);
1203 if (iter
== mCaches
.end()) {
1204 // The calling thread is not registered for unwinding.
1209 CFICache
* cache
= iter
->second
;
1212 // Do unwindery, possibly modifying |cache|.
1214 /////////////////////////////////////////////////////////
1219 UnwindRegs regs
= *aStartRegs
;
1220 TaggedUWord last_valid_sp
= TaggedUWord();
1222 // Stack-scan control
1223 unsigned int n_scanned_frames
= 0; // # s-s frames recovered so far
1224 static const int NUM_SCANNED_WORDS
= 50; // max allowed scan length
1231 #if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
1232 snprintf(buf
, sizeof(buf
),
1233 "LoopTop: rip %d/%llx rsp %d/%llx rbp %d/%llx\n",
1234 (int)regs
.xip
.Valid(), (unsigned long long int)regs
.xip
.Value(),
1235 (int)regs
.xsp
.Valid(), (unsigned long long int)regs
.xsp
.Value(),
1236 (int)regs
.xbp
.Valid(), (unsigned long long int)regs
.xbp
.Value());
1237 buf
[sizeof(buf
)-1] = 0;
1239 #elif defined(LUL_ARCH_arm)
1240 snprintf(buf
, sizeof(buf
),
1241 "LoopTop: r15 %d/%llx r7 %d/%llx r11 %d/%llx"
1242 " r12 %d/%llx r13 %d/%llx r14 %d/%llx\n",
1243 (int)regs
.r15
.Valid(), (unsigned long long int)regs
.r15
.Value(),
1244 (int)regs
.r7
.Valid(), (unsigned long long int)regs
.r7
.Value(),
1245 (int)regs
.r11
.Valid(), (unsigned long long int)regs
.r11
.Value(),
1246 (int)regs
.r12
.Valid(), (unsigned long long int)regs
.r12
.Value(),
1247 (int)regs
.r13
.Valid(), (unsigned long long int)regs
.r13
.Value(),
1248 (int)regs
.r14
.Valid(), (unsigned long long int)regs
.r14
.Value());
1249 buf
[sizeof(buf
)-1] = 0;
1252 # error "Unsupported arch"
1256 #if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
1257 TaggedUWord ia
= regs
.xip
;
1258 TaggedUWord sp
= regs
.xsp
;
1259 #elif defined(LUL_ARCH_arm)
1260 TaggedUWord ia
= (*aFramesUsed
== 0 ? regs
.r15
: regs
.r14
);
1261 TaggedUWord sp
= regs
.r13
;
1263 # error "Unsupported arch"
1266 if (*aFramesUsed
>= aFramesAvail
) {
1270 // If we don't have a valid value for the PC, give up.
1275 // If this is the innermost frame, record the SP value, which
1276 // presumably is valid. If this isn't the innermost frame, and we
1277 // have a valid SP value, check that its SP value isn't less that
1278 // the one we've seen so far, so as to catch potential SP value
1280 if (*aFramesUsed
== 0) {
1283 MOZ_ASSERT(last_valid_sp
.Valid());
1285 if (sp
.Value() < last_valid_sp
.Value()) {
1286 // Hmm, SP going in the wrong direction. Let's stop.
1289 // Remember where we got to.
1294 // For the innermost frame, the IA value is what we need. For all
1295 // other frames, it's actually the return address, so back up one
1296 // byte so as to get it into the calling instruction.
1297 aFramePCs
[*aFramesUsed
] = ia
.Value() - (*aFramesUsed
== 0 ? 0 : 1);
1298 aFrameSPs
[*aFramesUsed
] = sp
.Valid() ? sp
.Value() : 0;
1301 // Find the RuleSet for the current IA, if any. This will also
1302 // query the backing (secondary) maps if it isn't found in the
1303 // thread-local cache.
1305 // If this isn't the innermost frame, back up into the calling insn.
1306 if (*aFramesUsed
> 1) {
1307 ia
.Add(TaggedUWord((uintptr_t)(-1)));
1310 RuleSet
* ruleset
= cache
->Lookup(ia
.Value());
1313 snprintf(buf
, sizeof(buf
), "ruleset for 0x%llx = %p\n",
1314 (unsigned long long int)ia
.Value(), ruleset
);
1315 buf
[sizeof(buf
)-1] = 0;
1319 /////////////////////////////////////////////
1321 // On 32 bit x86-linux, syscalls are often done via the VDSO
1322 // function __kernel_vsyscall, which doesn't have a corresponding
1323 // object that we can read debuginfo from. That effectively kills
1324 // off all stack traces for threads blocked in syscalls. Hence
1325 // special-case by looking at the code surrounding the program
1328 // 0xf7757420 <__kernel_vsyscall+0>: push %ecx
1329 // 0xf7757421 <__kernel_vsyscall+1>: push %edx
1330 // 0xf7757422 <__kernel_vsyscall+2>: push %ebp
1331 // 0xf7757423 <__kernel_vsyscall+3>: mov %esp,%ebp
1332 // 0xf7757425 <__kernel_vsyscall+5>: sysenter
1333 // 0xf7757427 <__kernel_vsyscall+7>: nop
1334 // 0xf7757428 <__kernel_vsyscall+8>: nop
1335 // 0xf7757429 <__kernel_vsyscall+9>: nop
1336 // 0xf775742a <__kernel_vsyscall+10>: nop
1337 // 0xf775742b <__kernel_vsyscall+11>: nop
1338 // 0xf775742c <__kernel_vsyscall+12>: nop
1339 // 0xf775742d <__kernel_vsyscall+13>: nop
1340 // 0xf775742e <__kernel_vsyscall+14>: int $0x80
1341 // 0xf7757430 <__kernel_vsyscall+16>: pop %ebp
1342 // 0xf7757431 <__kernel_vsyscall+17>: pop %edx
1343 // 0xf7757432 <__kernel_vsyscall+18>: pop %ecx
1344 // 0xf7757433 <__kernel_vsyscall+19>: ret
1346 // In cases where the sampled thread is blocked in a syscall, its
1347 // program counter will point at "pop %ebp". Hence we look for
1348 // the sequence "int $0x80; pop %ebp; pop %edx; pop %ecx; ret", and
1349 // the corresponding register-recovery actions are:
1350 // new_ebp = *(old_esp + 0)
1351 // new eip = *(old_esp + 12)
1352 // new_esp = old_esp + 16
1354 // It may also be the case that the program counter points two
1355 // nops before the "int $0x80", viz, is __kernel_vsyscall+12, in
1356 // the case where the syscall has been restarted but the thread
1357 // hasn't been rescheduled. The code below doesn't handle that;
1358 // it could easily be made to.
1360 #if defined(LUL_PLAT_x86_android) || defined(LUL_PLAT_x86_linux)
1361 if (!ruleset
&& *aFramesUsed
== 1 && ia
.Valid() && sp
.Valid()) {
1362 uintptr_t insns_min
, insns_max
;
1363 uintptr_t eip
= ia
.Value();
1364 bool b
= mSegArray
->getBoundingCodeSegment(&insns_min
, &insns_max
, eip
);
1365 if (b
&& eip
- 2 >= insns_min
&& eip
+ 3 <= insns_max
) {
1366 uint8_t* eipC
= (uint8_t*)eip
;
1367 if (eipC
[-2] == 0xCD && eipC
[-1] == 0x80 && eipC
[0] == 0x5D &&
1368 eipC
[1] == 0x5A && eipC
[2] == 0x59 && eipC
[3] == 0xC3) {
1369 TaggedUWord sp_plus_0
= sp
;
1370 TaggedUWord sp_plus_12
= sp
;
1371 TaggedUWord sp_plus_16
= sp
;
1372 sp_plus_12
.Add(TaggedUWord(12));
1373 sp_plus_16
.Add(TaggedUWord(16));
1374 TaggedUWord new_ebp
= DerefTUW(sp_plus_0
, aStackImg
);
1375 TaggedUWord new_eip
= DerefTUW(sp_plus_12
, aStackImg
);
1376 TaggedUWord new_esp
= sp_plus_16
;
1377 if (new_ebp
.Valid() && new_eip
.Valid() && new_esp
.Valid()) {
1388 /////////////////////////////////////////////
1390 // So, do we have a ruleset for this address? If so, use it now.
1394 ruleset
->Print(mLog
); mLog("\n");
1396 // Use the RuleSet to compute the registers for the previous
1397 // frame. |regs| is modified in-place.
1398 UseRuleSet(®s
, aStackImg
, ruleset
);
1402 // There's no RuleSet for the specified address, so see if
1403 // it's possible to get anywhere by stack-scanning.
1405 // Use stack scanning frugally.
1406 if (n_scanned_frames
++ >= aScannedFramesAllowed
) {
1410 // We can't scan the stack without a valid, aligned stack pointer.
1411 if (!sp
.IsAligned()) {
1415 bool scan_succeeded
= false;
1416 for (int i
= 0; i
< NUM_SCANNED_WORDS
; ++i
) {
1417 TaggedUWord aWord
= DerefTUW(sp
, aStackImg
);
1418 // aWord is something we fished off the stack. It should be
1419 // valid, unless we overran the stack bounds.
1420 if (!aWord
.Valid()) {
1424 // Now, does aWord point inside a text section and immediately
1425 // after something that looks like a call instruction?
1426 if (mPriMap
->MaybeIsReturnPoint(aWord
, mSegArray
)) {
1427 // Yes it does. Update the unwound registers heuristically,
1428 // using the same schemes as Breakpad does.
1429 scan_succeeded
= true;
1430 (*aScannedFramesAcquired
)++;
1432 #if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
1433 // The same logic applies for the 32- and 64-bit cases.
1434 // Register names of the form xsp etc refer to (eg) esp in
1435 // the 32-bit case and rsp in the 64-bit case.
1436 # if defined(LUL_ARCH_x64)
1437 const int wordSize
= 8;
1439 const int wordSize
= 4;
1441 // The return address -- at XSP -- will have been pushed by
1442 // the CALL instruction. So the caller's XSP value
1443 // immediately before and after that CALL instruction is the
1446 regs
.xsp
.Add(TaggedUWord(wordSize
));
1448 // aWord points at the return point, so back up one byte
1449 // to put it in the calling instruction.
1451 regs
.xip
.Add(TaggedUWord((uintptr_t)(-1)));
1453 // Computing a new value from the frame pointer is more tricky.
1454 if (regs
.xbp
.Valid() &&
1455 sp
.Valid() && regs
.xbp
.Value() == sp
.Value() - wordSize
) {
1456 // One possibility is that the callee begins with the standard
1457 // preamble "push %xbp; mov %xsp, %xbp". In which case, the
1458 // (1) caller's XBP value will be at the word below XSP, and
1459 // (2) the current (callee's) XBP will point at that word:
1460 regs
.xbp
= DerefTUW(regs
.xbp
, aStackImg
);
1461 } else if (regs
.xbp
.Valid() &&
1462 sp
.Valid() && regs
.xbp
.Value() >= sp
.Value() + wordSize
) {
1463 // If that didn't work out, maybe the callee didn't change
1464 // XBP, so it still holds the caller's value. For that to
1465 // be plausible, XBP will need to have a value at least
1466 // higher than XSP since that holds the purported return
1467 // address. In which case do nothing, since XBP already
1468 // holds the "right" value.
1470 // Mark XBP as invalid, so that subsequent unwind iterations
1471 // don't assume it holds valid data.
1472 regs
.xbp
= TaggedUWord();
1475 // Move on to the next word up the stack
1476 sp
.Add(TaggedUWord(wordSize
));
1478 #elif defined(LUL_ARCH_arm)
1479 // Set all registers to be undefined, except for SP(R13) and
1482 // aWord points either at the return point, if returning to
1483 // ARM code, or one insn past the return point if returning
1484 // to Thumb code. In both cases, aWord-2 is guaranteed to
1485 // fall within the calling instruction.
1487 regs
.r15
.Add(TaggedUWord((uintptr_t)(-2)));
1489 // Make SP be the word above the location where the return
1490 // address was found.
1492 regs
.r13
.Add(TaggedUWord(4));
1494 // All other regs are undefined.
1495 regs
.r7
= regs
.r11
= regs
.r12
= regs
.r14
= TaggedUWord();
1497 // Move on to the next word up the stack
1498 sp
.Add(TaggedUWord(4));
1501 # error "Unknown plat"
1507 } // for (int i = 0; i < NUM_SCANNED_WORDS; i++)
1509 // We tried to make progress by scanning the stack, but failed.
1510 // So give up -- fall out of the top level unwind loop.
1511 if (!scan_succeeded
) {
1516 } // top level unwind loop
1519 /////////////////////////////////////////////////////////
1524 LUL::InvalidateCFICaches()
1526 // NB: the caller must hold m_rwl for writing.
1528 // FIXME: this could get expensive. Use a bool to remember when the
1529 // caches have been invalidated and hence avoid duplicate invalidations.
1530 for (std::map
<pthread_t
,CFICache
*>::iterator iter
= mCaches
.begin();
1531 iter
!= mCaches
.end();
1533 iter
->second
->Invalidate();
1538 ////////////////////////////////////////////////////////////////
1539 // LUL Unit Testing //
1540 ////////////////////////////////////////////////////////////////
1542 static const int LUL_UNIT_TEST_STACK_SIZE
= 16384;
1544 // This function is innermost in the test call sequence. It uses LUL
1545 // to unwind, and compares the result with the sequence specified in
1546 // the director string. These need to agree in order for the test to
1547 // pass. In order not to screw up the results, this function needs
1548 // to have a not-very big stack frame, since we're only presenting
1549 // the innermost LUL_UNIT_TEST_STACK_SIZE bytes of stack to LUL, and
1550 // that chunk unavoidably includes the frame for this function.
1552 // This function must not be inlined into its callers. Doing so will
1553 // cause the expected-vs-actual backtrace consistency checking to
1554 // fail. Prints summary results to |aLUL|'s logging sink and also
1555 // returns a boolean indicating whether or not the test passed.
1556 static __attribute__((noinline
))
1557 bool GetAndCheckStackTrace(LUL
* aLUL
, const char* dstring
)
1559 // Get hold of the current unwind-start registers.
1560 UnwindRegs startRegs
;
1561 memset(&startRegs
, 0, sizeof(startRegs
));
1562 #if defined(LUL_PLAT_x64_linux)
1563 volatile uintptr_t block
[3];
1564 MOZ_ASSERT(sizeof(block
) == 24);
1565 __asm__
__volatile__(
1566 "leaq 0(%%rip), %%r15" "\n\t"
1567 "movq %%r15, 0(%0)" "\n\t"
1568 "movq %%rsp, 8(%0)" "\n\t"
1569 "movq %%rbp, 16(%0)" "\n"
1570 : : "r"(&block
[0]) : "memory", "r15"
1572 startRegs
.xip
= TaggedUWord(block
[0]);
1573 startRegs
.xsp
= TaggedUWord(block
[1]);
1574 startRegs
.xbp
= TaggedUWord(block
[2]);
1575 const uintptr_t REDZONE_SIZE
= 128;
1576 uintptr_t start
= block
[1] - REDZONE_SIZE
;
1577 #elif defined(LUL_PLAT_x86_linux) || defined(LUL_PLAT_x86_android)
1578 volatile uintptr_t block
[3];
1579 MOZ_ASSERT(sizeof(block
) == 12);
1580 __asm__
__volatile__(
1581 ".byte 0xE8,0x00,0x00,0x00,0x00"/*call next insn*/ "\n\t"
1583 "movl %%edi, 0(%0)" "\n\t"
1584 "movl %%esp, 4(%0)" "\n\t"
1585 "movl %%ebp, 8(%0)" "\n"
1586 : : "r"(&block
[0]) : "memory", "edi"
1588 startRegs
.xip
= TaggedUWord(block
[0]);
1589 startRegs
.xsp
= TaggedUWord(block
[1]);
1590 startRegs
.xbp
= TaggedUWord(block
[2]);
1591 const uintptr_t REDZONE_SIZE
= 0;
1592 uintptr_t start
= block
[1] - REDZONE_SIZE
;
1593 #elif defined(LUL_PLAT_arm_android)
1594 volatile uintptr_t block
[6];
1595 MOZ_ASSERT(sizeof(block
) == 24);
1596 __asm__
__volatile__(
1597 "mov r0, r15" "\n\t"
1598 "str r0, [%0, #0]" "\n\t"
1599 "str r14, [%0, #4]" "\n\t"
1600 "str r13, [%0, #8]" "\n\t"
1601 "str r12, [%0, #12]" "\n\t"
1602 "str r11, [%0, #16]" "\n\t"
1603 "str r7, [%0, #20]" "\n"
1604 : : "r"(&block
[0]) : "memory", "r0"
1606 startRegs
.r15
= TaggedUWord(block
[0]);
1607 startRegs
.r14
= TaggedUWord(block
[1]);
1608 startRegs
.r13
= TaggedUWord(block
[2]);
1609 startRegs
.r12
= TaggedUWord(block
[3]);
1610 startRegs
.r11
= TaggedUWord(block
[4]);
1611 startRegs
.r7
= TaggedUWord(block
[5]);
1612 const uintptr_t REDZONE_SIZE
= 0;
1613 uintptr_t start
= block
[1] - REDZONE_SIZE
;
1615 # error "Unsupported platform"
1618 // Get hold of the innermost LUL_UNIT_TEST_STACK_SIZE bytes of the
1620 uintptr_t end
= start
+ LUL_UNIT_TEST_STACK_SIZE
;
1621 uintptr_t ws
= sizeof(void*);
1624 uintptr_t nToCopy
= end
- start
;
1625 if (nToCopy
> lul::N_STACK_BYTES
) {
1626 nToCopy
= lul::N_STACK_BYTES
;
1628 MOZ_ASSERT(nToCopy
<= lul::N_STACK_BYTES
);
1629 StackImage
* stackImg
= new StackImage();
1630 stackImg
->mLen
= nToCopy
;
1631 stackImg
->mStartAvma
= start
;
1633 MOZ_MAKE_MEM_DEFINED((void*)start
, nToCopy
);
1634 memcpy(&stackImg
->mContents
[0], (void*)start
, nToCopy
);
1638 const int MAX_TEST_FRAMES
= 64;
1639 uintptr_t framePCs
[MAX_TEST_FRAMES
];
1640 uintptr_t frameSPs
[MAX_TEST_FRAMES
];
1641 size_t framesAvail
= mozilla::ArrayLength(framePCs
);
1642 size_t framesUsed
= 0;
1643 size_t scannedFramesAllowed
= 0;
1644 size_t scannedFramesAcquired
= 0;
1645 aLUL
->Unwind( &framePCs
[0], &frameSPs
[0],
1646 &framesUsed
, &scannedFramesAcquired
,
1647 framesAvail
, scannedFramesAllowed
,
1648 &startRegs
, stackImg
);
1653 // // Show what we have.
1654 // fprintf(stderr, "Got %d frames:\n", (int)framesUsed);
1655 // for (size_t i = 0; i < framesUsed; i++) {
1656 // fprintf(stderr, " [%2d] SP %p PC %p\n",
1657 // (int)i, (void*)frameSPs[i], (void*)framePCs[i]);
1659 // fprintf(stderr, "\n");
1662 // Check to see if there's a consistent binding between digits in
1663 // the director string ('1' .. '8') and the PC values acquired by
1664 // the unwind. If there isn't, the unwinding has failed somehow.
1665 uintptr_t binding
[8]; // binding for '1' .. binding for '8'
1666 memset((void*)binding
, 0, sizeof(binding
));
1668 // The general plan is to work backwards along the director string
1669 // and forwards along the framePCs array. Doing so corresponds to
1670 // working outwards from the innermost frame of the recursive test set.
1671 const char* cursor
= dstring
;
1673 // Find the end. This leaves |cursor| two bytes past the first
1674 // character we want to look at -- see comment below.
1675 while (*cursor
) cursor
++;
1677 // Counts the number of consistent frames.
1678 size_t nConsistent
= 0;
1680 // Iterate back to the start of the director string. The starting
1681 // points are a bit complex. We can't use framePCs[0] because that
1682 // contains the PC in this frame (above). We can't use framePCs[1]
1683 // because that will contain the PC at return point in the recursive
1684 // test group (TestFn[1-8]) for their call "out" to this function,
1685 // GetAndCheckStackTrace. Although LUL will compute a correct
1686 // return address, that will not be the same return address as for a
1687 // recursive call out of the the function to another function in the
1688 // group. Hence we can only start consistency checking at
1691 // To be consistent, then, we must ignore the last element in the
1692 // director string as that corresponds to framePCs[1]. Hence the
1693 // start points are: framePCs[2] and the director string 2 bytes
1694 // before the terminating zero.
1696 // Also as a result of this, the number of consistent frames counted
1697 // will always be one less than the length of the director string
1698 // (not including its terminating zero).
1700 for (cursor
= cursor
-2, frameIx
= 2;
1701 cursor
>= dstring
&& frameIx
< framesUsed
;
1702 cursor
--, frameIx
++) {
1704 uintptr_t pc
= framePCs
[frameIx
];
1705 // If this doesn't hold, the director string is ill-formed.
1706 MOZ_ASSERT(c
>= '1' && c
<= '8');
1707 int n
= ((int)c
) - ((int)'1');
1708 if (binding
[n
] == 0) {
1709 // There's no binding for |c| yet, so install |pc| and carry on.
1714 // There's a pre-existing binding for |c|. Check it's consistent.
1715 if (binding
[n
] != pc
) {
1716 // Not consistent. Give up now.
1719 // Consistent. Keep going.
1723 // So, did we succeed?
1724 bool passed
= nConsistent
+1 == strlen(dstring
);
1726 // Show the results.
1728 snprintf(buf
, sizeof(buf
), "LULUnitTest: dstring = %s\n", dstring
);
1729 buf
[sizeof(buf
)-1] = 0;
1731 snprintf(buf
, sizeof(buf
),
1732 "LULUnitTest: %d consistent, %d in dstring: %s\n",
1733 (int)nConsistent
, (int)strlen(dstring
),
1734 passed
? "PASS" : "FAIL");
1735 buf
[sizeof(buf
)-1] = 0;
1742 // Macro magic to create a set of 8 mutually recursive functions with
1743 // varying frame sizes. These will recurse amongst themselves as
1744 // specified by |strP|, the directory string, and call
1745 // GetAndCheckStackTrace when the string becomes empty, passing it the
1746 // original value of the string. This checks the result, printing
1747 // results on |aLUL|'s logging sink, and also returns a boolean
1748 // indicating whether or not the results are acceptable (correct).
1750 #define DECL_TEST_FN(NAME) \
1751 bool NAME(LUL* aLUL, const char* strPorig, const char* strP);
1753 #define GEN_TEST_FN(NAME, FRAMESIZE) \
1754 bool NAME(LUL* aLUL, const char* strPorig, const char* strP) { \
1755 volatile char space[FRAMESIZE]; \
1756 memset((char*)&space[0], 0, sizeof(space)); \
1757 if (*strP == '\0') { \
1758 /* We've come to the end of the director string. */ \
1759 /* Take a stack snapshot. */ \
1760 return GetAndCheckStackTrace(aLUL, strPorig); \
1762 /* Recurse onwards. This is a bit subtle. The obvious */ \
1763 /* thing to do here is call onwards directly, from within the */ \
1764 /* arms of the case statement. That gives a problem in that */ \
1765 /* there will be multiple return points inside each function when */ \
1766 /* unwinding, so it will be difficult to check for consistency */ \
1767 /* against the director string. Instead, we make an indirect */ \
1768 /* call, so as to guarantee that there is only one call site */ \
1769 /* within each function. This does assume that the compiler */ \
1770 /* won't transform it back to the simple direct-call form. */ \
1771 /* To discourage it from doing so, the call is bracketed with */ \
1772 /* __asm__ __volatile__ sections so as to make it not-movable. */ \
1773 bool (*nextFn)(LUL*, const char*, const char*) = NULL; \
1775 case '1': nextFn = TestFn1; break; \
1776 case '2': nextFn = TestFn2; break; \
1777 case '3': nextFn = TestFn3; break; \
1778 case '4': nextFn = TestFn4; break; \
1779 case '5': nextFn = TestFn5; break; \
1780 case '6': nextFn = TestFn6; break; \
1781 case '7': nextFn = TestFn7; break; \
1782 case '8': nextFn = TestFn8; break; \
1783 default: nextFn = TestFn8; break; \
1785 __asm__ __volatile__("":::"cc","memory"); \
1786 bool passed = nextFn(aLUL, strPorig, strP+1); \
1787 __asm__ __volatile__("":::"cc","memory"); \
1792 // The test functions are mutually recursive, so it is necessary to
1793 // declare them before defining them.
1794 DECL_TEST_FN(TestFn1
)
1795 DECL_TEST_FN(TestFn2
)
1796 DECL_TEST_FN(TestFn3
)
1797 DECL_TEST_FN(TestFn4
)
1798 DECL_TEST_FN(TestFn5
)
1799 DECL_TEST_FN(TestFn6
)
1800 DECL_TEST_FN(TestFn7
)
1801 DECL_TEST_FN(TestFn8
)
1803 GEN_TEST_FN(TestFn1
, 123)
1804 GEN_TEST_FN(TestFn2
, 456)
1805 GEN_TEST_FN(TestFn3
, 789)
1806 GEN_TEST_FN(TestFn4
, 23)
1807 GEN_TEST_FN(TestFn5
, 47)
1808 GEN_TEST_FN(TestFn6
, 117)
1809 GEN_TEST_FN(TestFn7
, 1)
1810 GEN_TEST_FN(TestFn8
, 99)
1813 // This starts the test sequence going. Call here to generate a
1814 // sequence of calls as directed by the string |dstring|. The call
1815 // sequence will, from its innermost frame, finish by calling
1816 // GetAndCheckStackTrace() and passing it |dstring|.
1817 // GetAndCheckStackTrace() will unwind the stack, check consistency
1818 // of those results against |dstring|, and print a pass/fail message
1819 // to aLUL's logging sink. It also updates the counters in *aNTests
1820 // and aNTestsPassed.
1821 __attribute__((noinline
)) void
1822 TestUnw(/*OUT*/int* aNTests
, /*OUT*/int*aNTestsPassed
,
1823 LUL
* aLUL
, const char* dstring
)
1825 // Ensure that the stack has at least this much space on it. This
1826 // makes it safe to saw off the top LUL_UNIT_TEST_STACK_SIZE bytes
1827 // and hand it to LUL. Safe in the sense that no segfault can
1828 // happen because the stack is at least this big. This is all
1829 // somewhat dubious in the sense that a sufficiently clever compiler
1830 // (clang, for one) can figure out that space[] is unused and delete
1831 // it from the frame. Hence the somewhat elaborate hoop jumping to
1832 // fill it up before the call and to at least appear to use the
1833 // value afterwards.
1835 volatile char space
[LUL_UNIT_TEST_STACK_SIZE
];
1836 for (i
= 0; i
< LUL_UNIT_TEST_STACK_SIZE
; i
++) {
1837 space
[i
] = (char)(i
& 0x7F);
1840 // Really run the test.
1841 bool passed
= TestFn1(aLUL
, dstring
, dstring
);
1843 // Appear to use space[], by visiting the value to compute some kind
1844 // of checksum, and then (apparently) using the checksum.
1846 for (i
= 0; i
< LUL_UNIT_TEST_STACK_SIZE
; i
++) {
1847 // If this doesn't fool LLVM, I don't know what will.
1848 sum
+= space
[i
] - 3*i
;
1850 __asm__
__volatile__("" : : "r"(sum
));
1852 // Update the counters.
1861 RunLulUnitTests(/*OUT*/int* aNTests
, /*OUT*/int*aNTestsPassed
, LUL
* aLUL
)
1864 aLUL
->mLog("LULUnitTest: BEGIN\n");
1865 *aNTests
= *aNTestsPassed
= 0;
1866 TestUnw(aNTests
, aNTestsPassed
, aLUL
, "11111111");
1867 TestUnw(aNTests
, aNTestsPassed
, aLUL
, "11222211");
1868 TestUnw(aNTests
, aNTestsPassed
, aLUL
, "111222333");
1869 TestUnw(aNTests
, aNTestsPassed
, aLUL
, "1212121231212331212121212121212");
1870 TestUnw(aNTests
, aNTestsPassed
, aLUL
, "31415827271828325332173258");
1871 TestUnw(aNTests
, aNTestsPassed
, aLUL
,
1872 "123456781122334455667788777777777777777777777");
1873 aLUL
->mLog("LULUnitTest: END\n");