Bug 1811847 [wpt PR 38108] - Update wpt metadata, a=testonly
[gecko.git] / tools / profiler / lul / LulMain.cpp
blob7cf5508234d90302ec6cb4d83db21b0e4c110861
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #include "LulMain.h"
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <unistd.h> // write(), only for testing LUL
14 #include <algorithm> // std::sort
15 #include <string>
16 #include <utility>
18 #include "GeckoProfiler.h" // for profiler_current_thread_id()
19 #include "LulCommonExt.h"
20 #include "LulElfExt.h"
21 #include "LulMainInt.h"
22 #include "mozilla/ArrayUtils.h"
23 #include "mozilla/Assertions.h"
24 #include "mozilla/CheckedInt.h"
25 #include "mozilla/DebugOnly.h"
26 #include "mozilla/MemoryChecking.h"
27 #include "mozilla/Sprintf.h"
28 #include "mozilla/UniquePtr.h"
29 #include "mozilla/Unused.h"
31 // Set this to 1 for verbose logging
32 #define DEBUG_MAIN 0
34 namespace lul {
36 using mozilla::CheckedInt;
37 using mozilla::DebugOnly;
38 using mozilla::MallocSizeOf;
39 using mozilla::Unused;
40 using std::pair;
41 using std::string;
42 using std::vector;
44 // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
46 // Some functions in this file are marked RUNS IN NO-MALLOC CONTEXT.
47 // Any such function -- and, hence, the transitive closure of those
48 // reachable from it -- must not do any dynamic memory allocation.
49 // Doing so risks deadlock. There is exactly one root function for
50 // the transitive closure: Lul::Unwind.
52 // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
54 ////////////////////////////////////////////////////////////////
55 // RuleSet //
56 ////////////////////////////////////////////////////////////////
58 static const char* NameOf_DW_REG(int16_t aReg) {
59 switch (aReg) {
60 case DW_REG_CFA:
61 return "cfa";
62 #if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
63 case DW_REG_INTEL_XBP:
64 return "xbp";
65 case DW_REG_INTEL_XSP:
66 return "xsp";
67 case DW_REG_INTEL_XIP:
68 return "xip";
69 #elif defined(GP_ARCH_arm)
70 case DW_REG_ARM_R7:
71 return "r7";
72 case DW_REG_ARM_R11:
73 return "r11";
74 case DW_REG_ARM_R12:
75 return "r12";
76 case DW_REG_ARM_R13:
77 return "r13";
78 case DW_REG_ARM_R14:
79 return "r14";
80 case DW_REG_ARM_R15:
81 return "r15";
82 #elif defined(GP_ARCH_arm64)
83 case DW_REG_AARCH64_X29:
84 return "x29";
85 case DW_REG_AARCH64_X30:
86 return "x30";
87 case DW_REG_AARCH64_SP:
88 return "sp";
89 #elif defined(GP_ARCH_mips64)
90 case DW_REG_MIPS_SP:
91 return "sp";
92 case DW_REG_MIPS_FP:
93 return "fp";
94 case DW_REG_MIPS_PC:
95 return "pc";
96 #else
97 # error "Unsupported arch"
98 #endif
99 default:
100 return "???";
104 string LExpr::ShowRule(const char* aNewReg) const {
105 char buf[64];
106 string res = string(aNewReg) + "=";
107 switch (mHow) {
108 case UNKNOWN:
109 res += "Unknown";
110 break;
111 case NODEREF:
112 SprintfLiteral(buf, "%s+%d", NameOf_DW_REG(mReg), (int)mOffset);
113 res += buf;
114 break;
115 case DEREF:
116 SprintfLiteral(buf, "*(%s+%d)", NameOf_DW_REG(mReg), (int)mOffset);
117 res += buf;
118 break;
119 case PFXEXPR:
120 SprintfLiteral(buf, "PfxExpr-at-%d", (int)mOffset);
121 res += buf;
122 break;
123 default:
124 res += "???";
125 break;
127 return res;
130 void RuleSet::Print(uintptr_t avma, uintptr_t len,
131 void (*aLog)(const char*)) const {
132 char buf[96];
133 SprintfLiteral(buf, "[%llx .. %llx]: let ", (unsigned long long int)avma,
134 (unsigned long long int)(avma + len - 1));
135 string res = string(buf);
136 res += mCfaExpr.ShowRule("cfa");
137 res += " in";
138 // For each reg we care about, print the recovery expression.
139 #if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
140 res += mXipExpr.ShowRule(" RA");
141 res += mXspExpr.ShowRule(" SP");
142 res += mXbpExpr.ShowRule(" BP");
143 #elif defined(GP_ARCH_arm)
144 res += mR15expr.ShowRule(" R15");
145 res += mR7expr.ShowRule(" R7");
146 res += mR11expr.ShowRule(" R11");
147 res += mR12expr.ShowRule(" R12");
148 res += mR13expr.ShowRule(" R13");
149 res += mR14expr.ShowRule(" R14");
150 #elif defined(GP_ARCH_arm64)
151 res += mX29expr.ShowRule(" X29");
152 res += mX30expr.ShowRule(" X30");
153 res += mSPexpr.ShowRule(" SP");
154 #elif defined(GP_ARCH_mips64)
155 res += mPCexpr.ShowRule(" PC");
156 res += mSPexpr.ShowRule(" SP");
157 res += mFPexpr.ShowRule(" FP");
158 #else
159 # error "Unsupported arch"
160 #endif
161 aLog(res.c_str());
164 LExpr* RuleSet::ExprForRegno(DW_REG_NUMBER aRegno) {
165 switch (aRegno) {
166 case DW_REG_CFA:
167 return &mCfaExpr;
168 #if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
169 case DW_REG_INTEL_XIP:
170 return &mXipExpr;
171 case DW_REG_INTEL_XSP:
172 return &mXspExpr;
173 case DW_REG_INTEL_XBP:
174 return &mXbpExpr;
175 #elif defined(GP_ARCH_arm)
176 case DW_REG_ARM_R15:
177 return &mR15expr;
178 case DW_REG_ARM_R14:
179 return &mR14expr;
180 case DW_REG_ARM_R13:
181 return &mR13expr;
182 case DW_REG_ARM_R12:
183 return &mR12expr;
184 case DW_REG_ARM_R11:
185 return &mR11expr;
186 case DW_REG_ARM_R7:
187 return &mR7expr;
188 #elif defined(GP_ARCH_arm64)
189 case DW_REG_AARCH64_X29:
190 return &mX29expr;
191 case DW_REG_AARCH64_X30:
192 return &mX30expr;
193 case DW_REG_AARCH64_SP:
194 return &mSPexpr;
195 #elif defined(GP_ARCH_mips64)
196 case DW_REG_MIPS_SP:
197 return &mSPexpr;
198 case DW_REG_MIPS_FP:
199 return &mFPexpr;
200 case DW_REG_MIPS_PC:
201 return &mPCexpr;
202 #else
203 # error "Unknown arch"
204 #endif
205 default:
206 return nullptr;
210 RuleSet::RuleSet() {
211 // All fields are of type LExpr and so are initialised by LExpr::LExpr().
214 ////////////////////////////////////////////////////////////////
215 // SecMap //
216 ////////////////////////////////////////////////////////////////
218 // See header file LulMainInt.h for comments about invariants.
220 SecMap::SecMap(uintptr_t mapStartAVMA, uint32_t mapLen,
221 void (*aLog)(const char*))
222 : mUsable(false),
223 mUniqifier(new mozilla::HashMap<RuleSet, uint32_t, RuleSet,
224 InfallibleAllocPolicy>),
225 mLog(aLog) {
226 if (mapLen == 0) {
227 // Degenerate case.
228 mMapMinAVMA = 1;
229 mMapMaxAVMA = 0;
230 } else {
231 mMapMinAVMA = mapStartAVMA;
232 mMapMaxAVMA = mapStartAVMA + (uintptr_t)mapLen - 1;
236 SecMap::~SecMap() {
237 mExtents.clear();
238 mDictionary.clear();
239 if (mUniqifier) {
240 mUniqifier->clear();
241 mUniqifier = nullptr;
245 // RUNS IN NO-MALLOC CONTEXT
246 RuleSet* SecMap::FindRuleSet(uintptr_t ia) {
247 // Binary search mExtents to find one that brackets |ia|.
248 // lo and hi need to be signed, else the loop termination tests
249 // don't work properly. Note that this works correctly even when
250 // mExtents.size() == 0.
252 // Can't do this until the array has been sorted and preened.
253 MOZ_ASSERT(mUsable);
255 long int lo = 0;
256 long int hi = (long int)mExtents.size() - 1;
257 while (true) {
258 // current unsearched space is from lo to hi, inclusive.
259 if (lo > hi) {
260 // not found
261 return nullptr;
263 long int mid = lo + ((hi - lo) / 2);
264 Extent* mid_extent = &mExtents[mid];
265 uintptr_t mid_offset = mid_extent->offset();
266 uintptr_t mid_len = mid_extent->len();
267 uintptr_t mid_minAddr = mMapMinAVMA + mid_offset;
268 uintptr_t mid_maxAddr = mid_minAddr + mid_len - 1;
269 if (ia < mid_minAddr) {
270 hi = mid - 1;
271 continue;
273 if (ia > mid_maxAddr) {
274 lo = mid + 1;
275 continue;
277 MOZ_ASSERT(mid_minAddr <= ia && ia <= mid_maxAddr);
278 uint32_t mid_extent_dictIx = mid_extent->dictIx();
279 MOZ_RELEASE_ASSERT(mid_extent_dictIx < mExtents.size());
280 return &mDictionary[mid_extent_dictIx];
282 // NOTREACHED
285 // Add a RuleSet to the collection. The rule is copied in. Calling
286 // this makes the map non-searchable.
287 void SecMap::AddRuleSet(const RuleSet* rs, uintptr_t avma, uintptr_t len) {
288 mUsable = false;
290 // Zero length RuleSet? Meaningless, but ignore it anyway.
291 if (len == 0) {
292 return;
295 // Ignore attempts to add RuleSets whose address range doesn't fall within
296 // the declared address range for the SecMap. Maybe we should print some
297 // kind of error message rather than silently ignoring them.
298 if (!(avma >= mMapMinAVMA && avma + len - 1 <= mMapMaxAVMA)) {
299 return;
302 // Because `mMapStartAVMA` .. `mMapEndAVMA` can specify at most a 2^32-1 byte
303 // chunk of address space, the following must now hold.
304 MOZ_RELEASE_ASSERT(len <= (uintptr_t)0xFFFFFFFF);
306 // See if `mUniqifier` already has `rs`. If so set `dictIx` to the assigned
307 // dictionary index; if not, add `rs` to `mUniqifier` and assign a new
308 // dictionary index. This is the core of the RuleSet-de-duplication process.
309 uint32_t dictIx = 0;
310 mozilla::HashMap<RuleSet, uint32_t, RuleSet, InfallibleAllocPolicy>::AddPtr
311 p = mUniqifier->lookupForAdd(*rs);
312 if (!p) {
313 dictIx = mUniqifier->count();
314 // If this ever fails, Extents::dictIx will need to be changed to be a
315 // type wider than the current uint16_t.
316 MOZ_RELEASE_ASSERT(dictIx < (1 << 16));
317 // This returns `false` on OOM. We ignore the return value since we asked
318 // for it to use the InfallibleAllocPolicy.
319 DebugOnly<bool> addedOK = mUniqifier->add(p, *rs, dictIx);
320 MOZ_ASSERT(addedOK);
321 } else {
322 dictIx = p->value();
325 uint32_t offset = (uint32_t)(avma - mMapMinAVMA);
326 while (len > 0) {
327 // Because Extents::len is a uint16_t, we have to add multiple `mExtents`
328 // entries to cover the case where `len` is equal to or greater than 2^16.
329 // This happens only exceedingly rarely. In order to get more test
330 // coverage on what would otherwise be a very low probability (less than
331 // 0.0002%) corner case, we do this in steps of 4095. On libxul.so as of
332 // Sept 2020, this increases the number of `mExtents` entries by about
333 // 0.05%, hence has no meaningful effect on space use, but increases the
334 // use of this corner case, and hence its test coverage, by a factor of 250.
335 uint32_t this_step_len = (len > 4095) ? 4095 : len;
336 mExtents.emplace_back(offset, this_step_len, dictIx);
337 offset += this_step_len;
338 len -= this_step_len;
342 // Add a PfxInstr to the vector of such instrs, and return the index
343 // in the vector. Calling this makes the map non-searchable.
344 uint32_t SecMap::AddPfxInstr(PfxInstr pfxi) {
345 mUsable = false;
346 mPfxInstrs.push_back(pfxi);
347 return mPfxInstrs.size() - 1;
350 // Prepare the map for searching, by sorting it, de-overlapping entries and
351 // removing any resulting zero-length entries. At the start of this routine,
352 // all Extents should fall within [mMapMinAVMA, mMapMaxAVMA] and not have zero
353 // length, as a result of the checks in AddRuleSet().
354 void SecMap::PrepareRuleSets() {
355 // At this point, the de-duped RuleSets are in `mUniqifier`, and
356 // `mDictionary` is empty. This method will, amongst other things, copy
357 // them into `mDictionary` in order of their assigned dictionary-index
358 // values, as established by `SecMap::AddRuleSet`, and free `mUniqifier`;
359 // after this method, it has no further use.
360 MOZ_RELEASE_ASSERT(mUniqifier);
361 MOZ_RELEASE_ASSERT(mDictionary.empty());
363 if (mExtents.empty()) {
364 mUniqifier->clear();
365 mUniqifier = nullptr;
366 return;
369 if (mMapMinAVMA == 1 && mMapMaxAVMA == 0) {
370 // The map is empty. This should never happen.
371 mExtents.clear();
372 mUniqifier->clear();
373 mUniqifier = nullptr;
374 return;
376 MOZ_RELEASE_ASSERT(mMapMinAVMA <= mMapMaxAVMA);
378 // We must have at least one Extent, and as a consequence there must be at
379 // least one entry in the uniqifier.
380 MOZ_RELEASE_ASSERT(!mExtents.empty() && !mUniqifier->empty());
382 #ifdef DEBUG
383 // Check invariants on incoming Extents.
384 for (size_t i = 0; i < mExtents.size(); ++i) {
385 Extent* ext = &mExtents[i];
386 uint32_t len = ext->len();
387 MOZ_ASSERT(len > 0);
388 MOZ_ASSERT(len <= 4095 /* per '4095' in AddRuleSet() */);
389 uint32_t offset = ext->offset();
390 uintptr_t avma = mMapMinAVMA + (uintptr_t)offset;
391 // Upper bounds test. There's no lower bounds test because `offset` is a
392 // positive displacement from `mMapMinAVMA`, so a small underrun will
393 // manifest as `len` being close to 2^32.
394 MOZ_ASSERT(avma + (uintptr_t)len - 1 <= mMapMaxAVMA);
396 #endif
398 // Sort by start addresses.
399 std::sort(mExtents.begin(), mExtents.end(),
400 [](const Extent& ext1, const Extent& ext2) {
401 return ext1.offset() < ext2.offset();
404 // Iteratively truncate any overlaps and remove any zero length
405 // entries that might result, or that may have been present
406 // initially. Unless the input is seriously screwy, this is
407 // expected to iterate only once.
408 while (true) {
409 size_t i;
410 size_t n = mExtents.size();
411 size_t nZeroLen = 0;
413 if (n == 0) {
414 break;
417 for (i = 1; i < n; ++i) {
418 Extent* prev = &mExtents[i - 1];
419 Extent* here = &mExtents[i];
420 MOZ_ASSERT(prev->offset() <= here->offset());
421 if (prev->offset() + prev->len() > here->offset()) {
422 prev->setLen(here->offset() - prev->offset());
424 if (prev->len() == 0) {
425 nZeroLen++;
429 if (mExtents[n - 1].len() == 0) {
430 nZeroLen++;
433 // At this point, the entries are in-order and non-overlapping.
434 // If none of them are zero-length, we are done.
435 if (nZeroLen == 0) {
436 break;
439 // Slide back the entries to remove the zero length ones.
440 size_t j = 0; // The write-point.
441 for (i = 0; i < n; ++i) {
442 if (mExtents[i].len() == 0) {
443 continue;
445 if (j != i) {
446 mExtents[j] = mExtents[i];
448 ++j;
450 MOZ_ASSERT(i == n);
451 MOZ_ASSERT(nZeroLen <= n);
452 MOZ_ASSERT(j == n - nZeroLen);
453 while (nZeroLen > 0) {
454 mExtents.pop_back();
455 nZeroLen--;
458 MOZ_ASSERT(mExtents.size() == j);
461 size_t nExtents = mExtents.size();
463 #ifdef DEBUG
464 // Do a final check on the extents: their address ranges must be
465 // ascending, non overlapping, non zero sized.
466 if (nExtents > 0) {
467 MOZ_ASSERT(mExtents[0].len() > 0);
468 for (size_t i = 1; i < nExtents; ++i) {
469 const Extent* prev = &mExtents[i - 1];
470 const Extent* here = &mExtents[i];
471 MOZ_ASSERT(prev->offset() < here->offset());
472 MOZ_ASSERT(here->len() > 0);
473 MOZ_ASSERT(prev->offset() + prev->len() <= here->offset());
476 #endif
478 // Create the final dictionary by enumerating the uniqifier.
479 size_t nUniques = mUniqifier->count();
481 RuleSet dummy;
482 mozilla::PodZero(&dummy);
484 mDictionary.reserve(nUniques);
485 for (size_t i = 0; i < nUniques; i++) {
486 mDictionary.push_back(dummy);
489 for (auto iter = mUniqifier->iter(); !iter.done(); iter.next()) {
490 MOZ_RELEASE_ASSERT(iter.get().value() < nUniques);
491 mDictionary[iter.get().value()] = iter.get().key();
494 mUniqifier = nullptr;
496 char buf[150];
497 SprintfLiteral(
498 buf,
499 "PrepareRuleSets: %lu extents, %lu rulesets, "
500 "avma min/max 0x%llx, 0x%llx\n",
501 (unsigned long int)nExtents, (unsigned long int)mDictionary.size(),
502 (unsigned long long int)mMapMinAVMA, (unsigned long long int)mMapMaxAVMA);
503 buf[sizeof(buf) - 1] = 0;
504 mLog(buf);
506 // Is now usable for binary search.
507 mUsable = true;
509 #if 0
510 mLog("\nRulesets after preening\n");
511 for (size_t i = 0; i < nExtents; ++i) {
512 const Extent* extent = &mExtents[i];
513 uintptr_t avma = mMapMinAVMA + (uintptr_t)extent->offset();
514 mDictionary[extent->dictIx()].Print(avma, extent->len(), mLog);
515 mLog("\n");
517 mLog("\n");
518 #endif
521 bool SecMap::IsEmpty() { return mExtents.empty(); }
523 size_t SecMap::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
524 size_t n = aMallocSizeOf(this);
526 // It's conceivable that these calls would be unsafe with some
527 // implementations of std::vector, but it seems to be working for now...
528 n += aMallocSizeOf(mPfxInstrs.data());
530 if (mUniqifier) {
531 n += mUniqifier->shallowSizeOfIncludingThis(aMallocSizeOf);
533 n += aMallocSizeOf(mDictionary.data());
534 n += aMallocSizeOf(mExtents.data());
536 return n;
539 ////////////////////////////////////////////////////////////////
540 // SegArray //
541 ////////////////////////////////////////////////////////////////
543 // A SegArray holds a set of address ranges that together exactly
544 // cover an address range, with no overlaps or holes. Each range has
545 // an associated value, which in this case has been specialised to be
546 // a simple boolean. The representation is kept to minimal canonical
547 // form in which adjacent ranges with the same associated value are
548 // merged together. Each range is represented by a |struct Seg|.
550 // SegArrays are used to keep track of which parts of the address
551 // space are known to contain instructions.
552 class SegArray {
553 public:
554 void add(uintptr_t lo, uintptr_t hi, bool val) {
555 if (lo > hi) {
556 return;
558 split_at(lo);
559 if (hi < UINTPTR_MAX) {
560 split_at(hi + 1);
562 std::vector<Seg>::size_type iLo, iHi, i;
563 iLo = find(lo);
564 iHi = find(hi);
565 for (i = iLo; i <= iHi; ++i) {
566 mSegs[i].val = val;
568 preen();
571 // RUNS IN NO-MALLOC CONTEXT
572 bool getBoundingCodeSegment(/*OUT*/ uintptr_t* rx_min,
573 /*OUT*/ uintptr_t* rx_max, uintptr_t addr) {
574 std::vector<Seg>::size_type i = find(addr);
575 if (!mSegs[i].val) {
576 return false;
578 *rx_min = mSegs[i].lo;
579 *rx_max = mSegs[i].hi;
580 return true;
583 SegArray() {
584 Seg s(0, UINTPTR_MAX, false);
585 mSegs.push_back(s);
588 private:
589 struct Seg {
590 Seg(uintptr_t lo, uintptr_t hi, bool val) : lo(lo), hi(hi), val(val) {}
591 uintptr_t lo;
592 uintptr_t hi;
593 bool val;
596 void preen() {
597 for (std::vector<Seg>::iterator iter = mSegs.begin();
598 iter < mSegs.end() - 1; ++iter) {
599 if (iter[0].val != iter[1].val) {
600 continue;
602 iter[0].hi = iter[1].hi;
603 mSegs.erase(iter + 1);
604 // Back up one, so as not to miss an opportunity to merge
605 // with the entry after this one.
606 --iter;
610 // RUNS IN NO-MALLOC CONTEXT
611 std::vector<Seg>::size_type find(uintptr_t a) {
612 long int lo = 0;
613 long int hi = (long int)mSegs.size();
614 while (true) {
615 // The unsearched space is lo .. hi inclusive.
616 if (lo > hi) {
617 // Not found. This can't happen.
618 return (std::vector<Seg>::size_type)(-1);
620 long int mid = lo + ((hi - lo) / 2);
621 uintptr_t mid_lo = mSegs[mid].lo;
622 uintptr_t mid_hi = mSegs[mid].hi;
623 if (a < mid_lo) {
624 hi = mid - 1;
625 continue;
627 if (a > mid_hi) {
628 lo = mid + 1;
629 continue;
631 return (std::vector<Seg>::size_type)mid;
635 void split_at(uintptr_t a) {
636 std::vector<Seg>::size_type i = find(a);
637 if (mSegs[i].lo == a) {
638 return;
640 mSegs.insert(mSegs.begin() + i + 1, mSegs[i]);
641 mSegs[i].hi = a - 1;
642 mSegs[i + 1].lo = a;
645 void show() {
646 printf("<< %d entries:\n", (int)mSegs.size());
647 for (std::vector<Seg>::iterator iter = mSegs.begin(); iter < mSegs.end();
648 ++iter) {
649 printf(" %016llx %016llx %s\n", (unsigned long long int)(*iter).lo,
650 (unsigned long long int)(*iter).hi,
651 (*iter).val ? "true" : "false");
653 printf(">>\n");
656 std::vector<Seg> mSegs;
659 ////////////////////////////////////////////////////////////////
660 // PriMap //
661 ////////////////////////////////////////////////////////////////
663 class PriMap {
664 public:
665 explicit PriMap(void (*aLog)(const char*)) : mLog(aLog) {}
667 // RUNS IN NO-MALLOC CONTEXT
668 pair<const RuleSet*, const vector<PfxInstr>*> Lookup(uintptr_t ia) {
669 SecMap* sm = FindSecMap(ia);
670 return pair<const RuleSet*, const vector<PfxInstr>*>(
671 sm ? sm->FindRuleSet(ia) : nullptr, sm ? sm->GetPfxInstrs() : nullptr);
674 // Add a secondary map. No overlaps allowed w.r.t. existing
675 // secondary maps.
676 void AddSecMap(mozilla::UniquePtr<SecMap>&& aSecMap) {
677 // We can't add an empty SecMap to the PriMap. But that's OK
678 // since we'd never be able to find anything in it anyway.
679 if (aSecMap->IsEmpty()) {
680 return;
683 // Iterate through the SecMaps and find the right place for this
684 // one. At the same time, ensure that the in-order
685 // non-overlapping invariant is preserved (and, generally, holds).
686 // FIXME: this gives a cost that is O(N^2) in the total number of
687 // shared objects in the system. ToDo: better.
688 MOZ_ASSERT(aSecMap->mMapMinAVMA <= aSecMap->mMapMaxAVMA);
690 size_t num_secMaps = mSecMaps.size();
691 uintptr_t i;
692 for (i = 0; i < num_secMaps; ++i) {
693 mozilla::UniquePtr<SecMap>& sm_i = mSecMaps[i];
694 MOZ_ASSERT(sm_i->mMapMinAVMA <= sm_i->mMapMaxAVMA);
695 if (aSecMap->mMapMinAVMA < sm_i->mMapMaxAVMA) {
696 // |aSecMap| needs to be inserted immediately before mSecMaps[i].
697 break;
700 MOZ_ASSERT(i <= num_secMaps);
701 if (i == num_secMaps) {
702 // It goes at the end.
703 mSecMaps.push_back(std::move(aSecMap));
704 } else {
705 std::vector<mozilla::UniquePtr<SecMap>>::iterator iter =
706 mSecMaps.begin() + i;
707 mSecMaps.insert(iter, std::move(aSecMap));
709 char buf[100];
710 SprintfLiteral(buf, "AddSecMap: now have %d SecMaps\n",
711 (int)mSecMaps.size());
712 buf[sizeof(buf) - 1] = 0;
713 mLog(buf);
716 // Remove and delete any SecMaps in the mapping, that intersect
717 // with the specified address range.
718 void RemoveSecMapsInRange(uintptr_t avma_min, uintptr_t avma_max) {
719 MOZ_ASSERT(avma_min <= avma_max);
720 size_t num_secMaps = mSecMaps.size();
721 if (num_secMaps > 0) {
722 intptr_t i;
723 // Iterate from end to start over the vector, so as to ensure
724 // that the special case where |avma_min| and |avma_max| denote
725 // the entire address space, can be completed in time proportional
726 // to the number of elements in the map.
727 for (i = (intptr_t)num_secMaps - 1; i >= 0; i--) {
728 mozilla::UniquePtr<SecMap>& sm_i = mSecMaps[i];
729 if (sm_i->mMapMaxAVMA < avma_min || avma_max < sm_i->mMapMinAVMA) {
730 // There's no overlap. Move on.
731 continue;
733 // We need to remove mSecMaps[i] and slide all those above it
734 // downwards to cover the hole.
735 mSecMaps.erase(mSecMaps.begin() + i);
740 // Return the number of currently contained SecMaps.
741 size_t CountSecMaps() { return mSecMaps.size(); }
743 size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
744 size_t n = aMallocSizeOf(this);
746 // It's conceivable that this call would be unsafe with some
747 // implementations of std::vector, but it seems to be working for now...
748 n += aMallocSizeOf(mSecMaps.data());
750 for (size_t i = 0; i < mSecMaps.size(); i++) {
751 n += mSecMaps[i]->SizeOfIncludingThis(aMallocSizeOf);
754 return n;
757 private:
758 // RUNS IN NO-MALLOC CONTEXT
759 SecMap* FindSecMap(uintptr_t ia) {
760 // Binary search mSecMaps to find one that brackets |ia|.
761 // lo and hi need to be signed, else the loop termination tests
762 // don't work properly.
763 long int lo = 0;
764 long int hi = (long int)mSecMaps.size() - 1;
765 while (true) {
766 // current unsearched space is from lo to hi, inclusive.
767 if (lo > hi) {
768 // not found
769 return nullptr;
771 long int mid = lo + ((hi - lo) / 2);
772 mozilla::UniquePtr<SecMap>& mid_secMap = mSecMaps[mid];
773 uintptr_t mid_minAddr = mid_secMap->mMapMinAVMA;
774 uintptr_t mid_maxAddr = mid_secMap->mMapMaxAVMA;
775 if (ia < mid_minAddr) {
776 hi = mid - 1;
777 continue;
779 if (ia > mid_maxAddr) {
780 lo = mid + 1;
781 continue;
783 MOZ_ASSERT(mid_minAddr <= ia && ia <= mid_maxAddr);
784 return mid_secMap.get();
786 // NOTREACHED
789 private:
790 // sorted array of per-object ranges, non overlapping, non empty
791 std::vector<mozilla::UniquePtr<SecMap>> mSecMaps;
793 // a logging sink, for debugging.
794 void (*mLog)(const char*);
797 ////////////////////////////////////////////////////////////////
798 // LUL //
799 ////////////////////////////////////////////////////////////////
801 #define LUL_LOG(_str) \
802 do { \
803 char buf[200]; \
804 SprintfLiteral(buf, "LUL: pid %" PRIu64 " tid %" PRIu64 " lul-obj %p: %s", \
805 uint64_t(profiler_current_process_id().ToNumber()), \
806 uint64_t(profiler_current_thread_id().ToNumber()), this, \
807 (_str)); \
808 buf[sizeof(buf) - 1] = 0; \
809 mLog(buf); \
810 } while (0)
812 LUL::LUL(void (*aLog)(const char*))
813 : mLog(aLog),
814 mAdminMode(true),
815 mAdminThreadId(profiler_current_thread_id()),
816 mPriMap(new PriMap(aLog)),
817 mSegArray(new SegArray()),
818 mUSU(new UniqueStringUniverse()) {
819 LUL_LOG("LUL::LUL: Created object");
822 LUL::~LUL() {
823 LUL_LOG("LUL::~LUL: Destroyed object");
824 delete mPriMap;
825 delete mSegArray;
826 mLog = nullptr;
827 delete mUSU;
830 void LUL::MaybeShowStats() {
831 // This is racey in the sense that it can't guarantee that
832 // n_new == n_new_Context + n_new_CFI + n_new_Scanned
833 // if it should happen that mStats is updated by some other thread
834 // in between computation of n_new and n_new_{Context,CFI,FP}.
835 // But it's just stats printing, so we don't really care.
836 uint32_t n_new = mStats - mStatsPrevious;
837 if (n_new >= 5000) {
838 uint32_t n_new_Context = mStats.mContext - mStatsPrevious.mContext;
839 uint32_t n_new_CFI = mStats.mCFI - mStatsPrevious.mCFI;
840 uint32_t n_new_FP = mStats.mFP - mStatsPrevious.mFP;
841 mStatsPrevious = mStats;
842 char buf[200];
843 SprintfLiteral(buf,
844 "LUL frame stats: TOTAL %5u"
845 " CTX %4u CFI %4u FP %4u",
846 n_new, n_new_Context, n_new_CFI, n_new_FP);
847 buf[sizeof(buf) - 1] = 0;
848 mLog(buf);
852 size_t LUL::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
853 size_t n = aMallocSizeOf(this);
854 n += mPriMap->SizeOfIncludingThis(aMallocSizeOf);
856 // Measurement of the following members may be added later if DMD finds it
857 // is worthwhile:
858 // - mSegArray
859 // - mUSU
861 return n;
864 void LUL::EnableUnwinding() {
865 LUL_LOG("LUL::EnableUnwinding");
866 // Don't assert for Admin mode here. That is, tolerate a call here
867 // if we are already in Unwinding mode.
868 MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);
870 mAdminMode = false;
873 void LUL::NotifyAfterMap(uintptr_t aRXavma, size_t aSize, const char* aFileName,
874 const void* aMappedImage) {
875 MOZ_RELEASE_ASSERT(mAdminMode);
876 MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);
878 mLog(":\n");
879 char buf[200];
880 SprintfLiteral(buf, "NotifyMap %llx %llu %s\n",
881 (unsigned long long int)aRXavma, (unsigned long long int)aSize,
882 aFileName);
883 buf[sizeof(buf) - 1] = 0;
884 mLog(buf);
886 // We can't have a SecMap covering more than 2^32-1 bytes of address space.
887 // See the definition of SecMap for why. Rather than crash the system, just
888 // limit the SecMap's size accordingly. This case is never actually
889 // expected to happen.
890 if (((unsigned long long int)aSize) > 0xFFFFFFFFULL) {
891 aSize = (uintptr_t)0xFFFFFFFF;
893 MOZ_RELEASE_ASSERT(aSize <= 0xFFFFFFFF);
895 // Ignore obviously-stupid notifications.
896 if (aSize > 0) {
897 // Here's a new mapping, for this object.
898 mozilla::UniquePtr<SecMap> smap =
899 mozilla::MakeUnique<SecMap>(aRXavma, (uint32_t)aSize, mLog);
901 // Read CFI or EXIDX unwind data into |smap|.
902 if (!aMappedImage) {
903 (void)lul::ReadSymbolData(string(aFileName), std::vector<string>(),
904 smap.get(), (void*)aRXavma, aSize, mUSU, mLog);
905 } else {
906 (void)lul::ReadSymbolDataInternal(
907 (const uint8_t*)aMappedImage, string(aFileName),
908 std::vector<string>(), smap.get(), (void*)aRXavma, aSize, mUSU, mLog);
911 mLog("NotifyMap .. preparing entries\n");
913 smap->PrepareRuleSets();
915 SprintfLiteral(buf, "NotifyMap got %lld entries\n",
916 (long long int)smap->Size());
917 buf[sizeof(buf) - 1] = 0;
918 mLog(buf);
920 // Add it to the primary map (the top level set of mapped objects).
921 mPriMap->AddSecMap(std::move(smap));
923 // Tell the segment array about the mapping, so that the stack
924 // scan and __kernel_syscall mechanisms know where valid code is.
925 mSegArray->add(aRXavma, aRXavma + aSize - 1, true);
929 void LUL::NotifyExecutableArea(uintptr_t aRXavma, size_t aSize) {
930 MOZ_RELEASE_ASSERT(mAdminMode);
931 MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);
933 mLog(":\n");
934 char buf[200];
935 SprintfLiteral(buf, "NotifyExecutableArea %llx %llu\n",
936 (unsigned long long int)aRXavma,
937 (unsigned long long int)aSize);
938 buf[sizeof(buf) - 1] = 0;
939 mLog(buf);
941 // Ignore obviously-stupid notifications.
942 if (aSize > 0) {
943 // Tell the segment array about the mapping, so that the stack
944 // scan and __kernel_syscall mechanisms know where valid code is.
945 mSegArray->add(aRXavma, aRXavma + aSize - 1, true);
949 void LUL::NotifyBeforeUnmap(uintptr_t aRXavmaMin, uintptr_t aRXavmaMax) {
950 MOZ_RELEASE_ASSERT(mAdminMode);
951 MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);
953 mLog(":\n");
954 char buf[100];
955 SprintfLiteral(buf, "NotifyUnmap %016llx-%016llx\n",
956 (unsigned long long int)aRXavmaMin,
957 (unsigned long long int)aRXavmaMax);
958 buf[sizeof(buf) - 1] = 0;
959 mLog(buf);
961 MOZ_ASSERT(aRXavmaMin <= aRXavmaMax);
963 // Remove from the primary map, any secondary maps that intersect
964 // with the address range. Also delete the secondary maps.
965 mPriMap->RemoveSecMapsInRange(aRXavmaMin, aRXavmaMax);
967 // Tell the segment array that the address range no longer
968 // contains valid code.
969 mSegArray->add(aRXavmaMin, aRXavmaMax, false);
971 SprintfLiteral(buf, "NotifyUnmap: now have %d SecMaps\n",
972 (int)mPriMap->CountSecMaps());
973 buf[sizeof(buf) - 1] = 0;
974 mLog(buf);
977 size_t LUL::CountMappings() {
978 MOZ_RELEASE_ASSERT(mAdminMode);
979 MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);
981 return mPriMap->CountSecMaps();
984 // RUNS IN NO-MALLOC CONTEXT
985 static TaggedUWord DerefTUW(TaggedUWord aAddr, const StackImage* aStackImg) {
986 if (!aAddr.Valid()) {
987 return TaggedUWord();
990 // Lower limit check. |aAddr.Value()| is the lowest requested address
991 // and |aStackImg->mStartAvma| is the lowest address we actually have,
992 // so the comparison is straightforward.
993 if (aAddr.Value() < aStackImg->mStartAvma) {
994 return TaggedUWord();
997 // Upper limit check. We must compute the highest requested address
998 // and the highest address we actually have, but being careful to
999 // avoid overflow. In particular if |aAddr| is 0xFFF...FFF or the
1000 // 3/7 values below that, then we will get overflow. See bug #1245477.
1001 typedef CheckedInt<uintptr_t> CheckedUWord;
1002 CheckedUWord highest_requested_plus_one =
1003 CheckedUWord(aAddr.Value()) + CheckedUWord(sizeof(uintptr_t));
1004 CheckedUWord highest_available_plus_one =
1005 CheckedUWord(aStackImg->mStartAvma) + CheckedUWord(aStackImg->mLen);
1006 if (!highest_requested_plus_one.isValid() // overflow?
1007 || !highest_available_plus_one.isValid() // overflow?
1008 || (highest_requested_plus_one.value() >
1009 highest_available_plus_one.value())) { // in range?
1010 return TaggedUWord();
1013 return TaggedUWord(
1014 *(uintptr_t*)(&aStackImg
1015 ->mContents[aAddr.Value() - aStackImg->mStartAvma]));
1018 // RUNS IN NO-MALLOC CONTEXT
1019 static TaggedUWord EvaluateReg(int16_t aReg, const UnwindRegs* aOldRegs,
1020 TaggedUWord aCFA) {
1021 switch (aReg) {
1022 case DW_REG_CFA:
1023 return aCFA;
1024 #if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
1025 case DW_REG_INTEL_XBP:
1026 return aOldRegs->xbp;
1027 case DW_REG_INTEL_XSP:
1028 return aOldRegs->xsp;
1029 case DW_REG_INTEL_XIP:
1030 return aOldRegs->xip;
1031 #elif defined(GP_ARCH_arm)
1032 case DW_REG_ARM_R7:
1033 return aOldRegs->r7;
1034 case DW_REG_ARM_R11:
1035 return aOldRegs->r11;
1036 case DW_REG_ARM_R12:
1037 return aOldRegs->r12;
1038 case DW_REG_ARM_R13:
1039 return aOldRegs->r13;
1040 case DW_REG_ARM_R14:
1041 return aOldRegs->r14;
1042 case DW_REG_ARM_R15:
1043 return aOldRegs->r15;
1044 #elif defined(GP_ARCH_arm64)
1045 case DW_REG_AARCH64_X29:
1046 return aOldRegs->x29;
1047 case DW_REG_AARCH64_X30:
1048 return aOldRegs->x30;
1049 case DW_REG_AARCH64_SP:
1050 return aOldRegs->sp;
1051 #elif defined(GP_ARCH_mips64)
1052 case DW_REG_MIPS_SP:
1053 return aOldRegs->sp;
1054 case DW_REG_MIPS_FP:
1055 return aOldRegs->fp;
1056 case DW_REG_MIPS_PC:
1057 return aOldRegs->pc;
1058 #else
1059 # error "Unsupported arch"
1060 #endif
1061 default:
1062 MOZ_ASSERT(0);
1063 return TaggedUWord();
1067 // RUNS IN NO-MALLOC CONTEXT
1068 // See prototype for comment.
1069 TaggedUWord EvaluatePfxExpr(int32_t start, const UnwindRegs* aOldRegs,
1070 TaggedUWord aCFA, const StackImage* aStackImg,
1071 const vector<PfxInstr>& aPfxInstrs) {
1072 // A small evaluation stack, and a stack pointer, which points to
1073 // the highest numbered in-use element.
1074 const int N_STACK = 10;
1075 TaggedUWord stack[N_STACK];
1076 int stackPointer = -1;
1077 for (int i = 0; i < N_STACK; i++) stack[i] = TaggedUWord();
1079 #define PUSH(_tuw) \
1080 do { \
1081 if (stackPointer >= N_STACK - 1) goto fail; /* overflow */ \
1082 stack[++stackPointer] = (_tuw); \
1083 } while (0)
1085 #define POP(_lval) \
1086 do { \
1087 if (stackPointer < 0) goto fail; /* underflow */ \
1088 _lval = stack[stackPointer--]; \
1089 } while (0)
1091 // Cursor in the instruction sequence.
1092 size_t curr = start + 1;
1094 // Check the start point is sane.
1095 size_t nInstrs = aPfxInstrs.size();
1096 if (start < 0 || (size_t)start >= nInstrs) goto fail;
1099 // The instruction sequence must start with PX_Start. If not,
1100 // something is seriously wrong.
1101 PfxInstr first = aPfxInstrs[start];
1102 if (first.mOpcode != PX_Start) goto fail;
1104 // Push the CFA on the stack to start with (or not), as required by
1105 // the original DW_OP_*expression* CFI.
1106 if (first.mOperand != 0) PUSH(aCFA);
1109 while (true) {
1110 if (curr >= nInstrs) goto fail; // ran off the end of the sequence
1112 PfxInstr pfxi = aPfxInstrs[curr++];
1113 if (pfxi.mOpcode == PX_End) break; // we're done
1115 switch (pfxi.mOpcode) {
1116 case PX_Start:
1117 // This should appear only at the start of the sequence.
1118 goto fail;
1119 case PX_End:
1120 // We just took care of that, so we shouldn't see it again.
1121 MOZ_ASSERT(0);
1122 goto fail;
1123 case PX_SImm32:
1124 PUSH(TaggedUWord((intptr_t)pfxi.mOperand));
1125 break;
1126 case PX_DwReg: {
1127 DW_REG_NUMBER reg = (DW_REG_NUMBER)pfxi.mOperand;
1128 MOZ_ASSERT(reg != DW_REG_CFA);
1129 PUSH(EvaluateReg(reg, aOldRegs, aCFA));
1130 break;
1132 case PX_Deref: {
1133 TaggedUWord addr;
1134 POP(addr);
1135 PUSH(DerefTUW(addr, aStackImg));
1136 break;
1138 case PX_Add: {
1139 TaggedUWord x, y;
1140 POP(x);
1141 POP(y);
1142 PUSH(y + x);
1143 break;
1145 case PX_Sub: {
1146 TaggedUWord x, y;
1147 POP(x);
1148 POP(y);
1149 PUSH(y - x);
1150 break;
1152 case PX_And: {
1153 TaggedUWord x, y;
1154 POP(x);
1155 POP(y);
1156 PUSH(y & x);
1157 break;
1159 case PX_Or: {
1160 TaggedUWord x, y;
1161 POP(x);
1162 POP(y);
1163 PUSH(y | x);
1164 break;
1166 case PX_CmpGES: {
1167 TaggedUWord x, y;
1168 POP(x);
1169 POP(y);
1170 PUSH(y.CmpGEs(x));
1171 break;
1173 case PX_Shl: {
1174 TaggedUWord x, y;
1175 POP(x);
1176 POP(y);
1177 PUSH(y << x);
1178 break;
1180 default:
1181 MOZ_ASSERT(0);
1182 goto fail;
1184 } // while (true)
1186 // Evaluation finished. The top value on the stack is the result.
1187 if (stackPointer >= 0) {
1188 return stack[stackPointer];
1190 // Else fall through
1192 fail:
1193 return TaggedUWord();
1195 #undef PUSH
1196 #undef POP
1199 // RUNS IN NO-MALLOC CONTEXT
1200 TaggedUWord LExpr::EvaluateExpr(const UnwindRegs* aOldRegs, TaggedUWord aCFA,
1201 const StackImage* aStackImg,
1202 const vector<PfxInstr>* aPfxInstrs) const {
1203 switch (mHow) {
1204 case UNKNOWN:
1205 return TaggedUWord();
1206 case NODEREF: {
1207 TaggedUWord tuw = EvaluateReg(mReg, aOldRegs, aCFA);
1208 tuw = tuw + TaggedUWord((intptr_t)mOffset);
1209 return tuw;
1211 case DEREF: {
1212 TaggedUWord tuw = EvaluateReg(mReg, aOldRegs, aCFA);
1213 tuw = tuw + TaggedUWord((intptr_t)mOffset);
1214 return DerefTUW(tuw, aStackImg);
1216 case PFXEXPR: {
1217 MOZ_ASSERT(aPfxInstrs);
1218 if (!aPfxInstrs) {
1219 return TaggedUWord();
1221 return EvaluatePfxExpr(mOffset, aOldRegs, aCFA, aStackImg, *aPfxInstrs);
1223 default:
1224 MOZ_ASSERT(0);
1225 return TaggedUWord();
1229 // RUNS IN NO-MALLOC CONTEXT
1230 static void UseRuleSet(/*MOD*/ UnwindRegs* aRegs, const StackImage* aStackImg,
1231 const RuleSet* aRS, const vector<PfxInstr>* aPfxInstrs) {
1232 // Take a copy of regs, since we'll need to refer to the old values
1233 // whilst computing the new ones.
1234 UnwindRegs old_regs = *aRegs;
1236 // Mark all the current register values as invalid, so that the
1237 // caller can see, on our return, which ones have been computed
1238 // anew. If we don't even manage to compute a new PC value, then
1239 // the caller will have to abandon the unwind.
1240 // FIXME: Create and use instead: aRegs->SetAllInvalid();
1241 #if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
1242 aRegs->xbp = TaggedUWord();
1243 aRegs->xsp = TaggedUWord();
1244 aRegs->xip = TaggedUWord();
1245 #elif defined(GP_ARCH_arm)
1246 aRegs->r7 = TaggedUWord();
1247 aRegs->r11 = TaggedUWord();
1248 aRegs->r12 = TaggedUWord();
1249 aRegs->r13 = TaggedUWord();
1250 aRegs->r14 = TaggedUWord();
1251 aRegs->r15 = TaggedUWord();
1252 #elif defined(GP_ARCH_arm64)
1253 aRegs->x29 = TaggedUWord();
1254 aRegs->x30 = TaggedUWord();
1255 aRegs->sp = TaggedUWord();
1256 aRegs->pc = TaggedUWord();
1257 #elif defined(GP_ARCH_mips64)
1258 aRegs->sp = TaggedUWord();
1259 aRegs->fp = TaggedUWord();
1260 aRegs->pc = TaggedUWord();
1261 #else
1262 # error "Unsupported arch"
1263 #endif
1265 // This is generally useful.
1266 const TaggedUWord inval = TaggedUWord();
1268 // First, compute the CFA.
1269 TaggedUWord cfa = aRS->mCfaExpr.EvaluateExpr(&old_regs, inval /*old cfa*/,
1270 aStackImg, aPfxInstrs);
1272 // If we didn't manage to compute the CFA, well .. that's ungood,
1273 // but keep going anyway. It'll be OK provided none of the register
1274 // value rules mention the CFA. In any case, compute the new values
1275 // for each register that we're tracking.
1277 #if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
1278 aRegs->xbp =
1279 aRS->mXbpExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1280 aRegs->xsp =
1281 aRS->mXspExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1282 aRegs->xip =
1283 aRS->mXipExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1284 #elif defined(GP_ARCH_arm)
1285 aRegs->r7 = aRS->mR7expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1286 aRegs->r11 =
1287 aRS->mR11expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1288 aRegs->r12 =
1289 aRS->mR12expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1290 aRegs->r13 =
1291 aRS->mR13expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1292 aRegs->r14 =
1293 aRS->mR14expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1294 aRegs->r15 =
1295 aRS->mR15expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1296 #elif defined(GP_ARCH_arm64)
1297 aRegs->x29 =
1298 aRS->mX29expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1299 aRegs->x30 =
1300 aRS->mX30expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1301 aRegs->sp = aRS->mSPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1302 #elif defined(GP_ARCH_mips64)
1303 aRegs->sp = aRS->mSPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1304 aRegs->fp = aRS->mFPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1305 aRegs->pc = aRS->mPCexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
1306 #else
1307 # error "Unsupported arch"
1308 #endif
1310 // We're done. Any regs for which we didn't manage to compute a
1311 // new value will now be marked as invalid.
1314 // RUNS IN NO-MALLOC CONTEXT
1315 void LUL::Unwind(/*OUT*/ uintptr_t* aFramePCs,
1316 /*OUT*/ uintptr_t* aFrameSPs,
1317 /*OUT*/ size_t* aFramesUsed,
1318 /*OUT*/ size_t* aFramePointerFramesAcquired,
1319 size_t aFramesAvail, UnwindRegs* aStartRegs,
1320 StackImage* aStackImg) {
1321 MOZ_RELEASE_ASSERT(!mAdminMode);
1323 /////////////////////////////////////////////////////////
1324 // BEGIN UNWIND
1326 *aFramesUsed = 0;
1328 UnwindRegs regs = *aStartRegs;
1329 TaggedUWord last_valid_sp = TaggedUWord();
1331 while (true) {
1332 if (DEBUG_MAIN) {
1333 char buf[300];
1334 mLog("\n");
1335 #if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
1336 SprintfLiteral(
1337 buf, "LoopTop: rip %d/%llx rsp %d/%llx rbp %d/%llx\n",
1338 (int)regs.xip.Valid(), (unsigned long long int)regs.xip.Value(),
1339 (int)regs.xsp.Valid(), (unsigned long long int)regs.xsp.Value(),
1340 (int)regs.xbp.Valid(), (unsigned long long int)regs.xbp.Value());
1341 buf[sizeof(buf) - 1] = 0;
1342 mLog(buf);
1343 #elif defined(GP_ARCH_arm)
1344 SprintfLiteral(
1345 buf,
1346 "LoopTop: r15 %d/%llx r7 %d/%llx r11 %d/%llx"
1347 " r12 %d/%llx r13 %d/%llx r14 %d/%llx\n",
1348 (int)regs.r15.Valid(), (unsigned long long int)regs.r15.Value(),
1349 (int)regs.r7.Valid(), (unsigned long long int)regs.r7.Value(),
1350 (int)regs.r11.Valid(), (unsigned long long int)regs.r11.Value(),
1351 (int)regs.r12.Valid(), (unsigned long long int)regs.r12.Value(),
1352 (int)regs.r13.Valid(), (unsigned long long int)regs.r13.Value(),
1353 (int)regs.r14.Valid(), (unsigned long long int)regs.r14.Value());
1354 buf[sizeof(buf) - 1] = 0;
1355 mLog(buf);
1356 #elif defined(GP_ARCH_arm64)
1357 SprintfLiteral(
1358 buf,
1359 "LoopTop: pc %d/%llx x29 %d/%llx x30 %d/%llx"
1360 " sp %d/%llx\n",
1361 (int)regs.pc.Valid(), (unsigned long long int)regs.pc.Value(),
1362 (int)regs.x29.Valid(), (unsigned long long int)regs.x29.Value(),
1363 (int)regs.x30.Valid(), (unsigned long long int)regs.x30.Value(),
1364 (int)regs.sp.Valid(), (unsigned long long int)regs.sp.Value());
1365 buf[sizeof(buf) - 1] = 0;
1366 mLog(buf);
1367 #elif defined(GP_ARCH_mips64)
1368 SprintfLiteral(
1369 buf, "LoopTop: pc %d/%llx sp %d/%llx fp %d/%llx\n",
1370 (int)regs.pc.Valid(), (unsigned long long int)regs.pc.Value(),
1371 (int)regs.sp.Valid(), (unsigned long long int)regs.sp.Value(),
1372 (int)regs.fp.Valid(), (unsigned long long int)regs.fp.Value());
1373 buf[sizeof(buf) - 1] = 0;
1374 mLog(buf);
1375 #else
1376 # error "Unsupported arch"
1377 #endif
1380 #if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
1381 TaggedUWord ia = regs.xip;
1382 TaggedUWord sp = regs.xsp;
1383 #elif defined(GP_ARCH_arm)
1384 TaggedUWord ia = (*aFramesUsed == 0 ? regs.r15 : regs.r14);
1385 TaggedUWord sp = regs.r13;
1386 #elif defined(GP_ARCH_arm64)
1387 TaggedUWord ia = (*aFramesUsed == 0 ? regs.pc : regs.x30);
1388 TaggedUWord sp = regs.sp;
1389 #elif defined(GP_ARCH_mips64)
1390 TaggedUWord ia = regs.pc;
1391 TaggedUWord sp = regs.sp;
1392 #else
1393 # error "Unsupported arch"
1394 #endif
1396 if (*aFramesUsed >= aFramesAvail) {
1397 break;
1400 // If we don't have a valid value for the PC, give up.
1401 if (!ia.Valid()) {
1402 break;
1405 // If this is the innermost frame, record the SP value, which
1406 // presumably is valid. If this isn't the innermost frame, and we
1407 // have a valid SP value, check that its SP value isn't less that
1408 // the one we've seen so far, so as to catch potential SP value
1409 // cycles.
1410 if (*aFramesUsed == 0) {
1411 last_valid_sp = sp;
1412 } else {
1413 MOZ_ASSERT(last_valid_sp.Valid());
1414 if (sp.Valid()) {
1415 if (sp.Value() < last_valid_sp.Value()) {
1416 // Hmm, SP going in the wrong direction. Let's stop.
1417 break;
1419 // Remember where we got to.
1420 last_valid_sp = sp;
1424 aFramePCs[*aFramesUsed] = ia.Value();
1425 aFrameSPs[*aFramesUsed] = sp.Valid() ? sp.Value() : 0;
1426 (*aFramesUsed)++;
1428 // Find the RuleSet for the current IA, if any. This will also
1429 // query the backing (secondary) maps if it isn't found in the
1430 // thread-local cache.
1432 // If this isn't the innermost frame, back up into the calling insn.
1433 if (*aFramesUsed > 1) {
1434 ia = ia + TaggedUWord((uintptr_t)(-1));
1437 pair<const RuleSet*, const vector<PfxInstr>*> ruleset_and_pfxinstrs =
1438 mPriMap->Lookup(ia.Value());
1439 const RuleSet* ruleset = ruleset_and_pfxinstrs.first;
1440 const vector<PfxInstr>* pfxinstrs = ruleset_and_pfxinstrs.second;
1442 if (DEBUG_MAIN) {
1443 char buf[100];
1444 SprintfLiteral(buf, "ruleset for 0x%llx = %p\n",
1445 (unsigned long long int)ia.Value(), ruleset);
1446 buf[sizeof(buf) - 1] = 0;
1447 mLog(buf);
1450 #if defined(GP_PLAT_x86_android) || defined(GP_PLAT_x86_linux)
1451 /////////////////////////////////////////////
1452 ////
1453 // On 32 bit x86-linux, syscalls are often done via the VDSO
1454 // function __kernel_vsyscall, which doesn't have a corresponding
1455 // object that we can read debuginfo from. That effectively kills
1456 // off all stack traces for threads blocked in syscalls. Hence
1457 // special-case by looking at the code surrounding the program
1458 // counter.
1460 // 0xf7757420 <__kernel_vsyscall+0>: push %ecx
1461 // 0xf7757421 <__kernel_vsyscall+1>: push %edx
1462 // 0xf7757422 <__kernel_vsyscall+2>: push %ebp
1463 // 0xf7757423 <__kernel_vsyscall+3>: mov %esp,%ebp
1464 // 0xf7757425 <__kernel_vsyscall+5>: sysenter
1465 // 0xf7757427 <__kernel_vsyscall+7>: nop
1466 // 0xf7757428 <__kernel_vsyscall+8>: nop
1467 // 0xf7757429 <__kernel_vsyscall+9>: nop
1468 // 0xf775742a <__kernel_vsyscall+10>: nop
1469 // 0xf775742b <__kernel_vsyscall+11>: nop
1470 // 0xf775742c <__kernel_vsyscall+12>: nop
1471 // 0xf775742d <__kernel_vsyscall+13>: nop
1472 // 0xf775742e <__kernel_vsyscall+14>: int $0x80
1473 // 0xf7757430 <__kernel_vsyscall+16>: pop %ebp
1474 // 0xf7757431 <__kernel_vsyscall+17>: pop %edx
1475 // 0xf7757432 <__kernel_vsyscall+18>: pop %ecx
1476 // 0xf7757433 <__kernel_vsyscall+19>: ret
1478 // In cases where the sampled thread is blocked in a syscall, its
1479 // program counter will point at "pop %ebp". Hence we look for
1480 // the sequence "int $0x80; pop %ebp; pop %edx; pop %ecx; ret", and
1481 // the corresponding register-recovery actions are:
1482 // new_ebp = *(old_esp + 0)
1483 // new eip = *(old_esp + 12)
1484 // new_esp = old_esp + 16
1486 // It may also be the case that the program counter points two
1487 // nops before the "int $0x80", viz, is __kernel_vsyscall+12, in
1488 // the case where the syscall has been restarted but the thread
1489 // hasn't been rescheduled. The code below doesn't handle that;
1490 // it could easily be made to.
1492 if (!ruleset && *aFramesUsed == 1 && ia.Valid() && sp.Valid()) {
1493 uintptr_t insns_min, insns_max;
1494 uintptr_t eip = ia.Value();
1495 bool b = mSegArray->getBoundingCodeSegment(&insns_min, &insns_max, eip);
1496 if (b && eip - 2 >= insns_min && eip + 3 <= insns_max) {
1497 uint8_t* eipC = (uint8_t*)eip;
1498 if (eipC[-2] == 0xCD && eipC[-1] == 0x80 && eipC[0] == 0x5D &&
1499 eipC[1] == 0x5A && eipC[2] == 0x59 && eipC[3] == 0xC3) {
1500 TaggedUWord sp_plus_0 = sp;
1501 TaggedUWord sp_plus_12 = sp;
1502 TaggedUWord sp_plus_16 = sp;
1503 sp_plus_12 = sp_plus_12 + TaggedUWord(12);
1504 sp_plus_16 = sp_plus_16 + TaggedUWord(16);
1505 TaggedUWord new_ebp = DerefTUW(sp_plus_0, aStackImg);
1506 TaggedUWord new_eip = DerefTUW(sp_plus_12, aStackImg);
1507 TaggedUWord new_esp = sp_plus_16;
1508 if (new_ebp.Valid() && new_eip.Valid() && new_esp.Valid()) {
1509 regs.xbp = new_ebp;
1510 regs.xip = new_eip;
1511 regs.xsp = new_esp;
1512 continue;
1517 ////
1518 /////////////////////////////////////////////
1519 #endif // defined(GP_PLAT_x86_android) || defined(GP_PLAT_x86_linux)
1521 // So, do we have a ruleset for this address? If so, use it now.
1522 if (ruleset) {
1523 if (DEBUG_MAIN) {
1524 ruleset->Print(ia.Value(), 1 /*bogus, but doesn't matter*/, mLog);
1525 mLog("\n");
1527 // Use the RuleSet to compute the registers for the previous
1528 // frame. |regs| is modified in-place.
1529 UseRuleSet(&regs, aStackImg, ruleset, pfxinstrs);
1530 continue;
1533 #if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_x86_linux) || \
1534 defined(GP_PLAT_amd64_android) || defined(GP_PLAT_x86_android) || \
1535 defined(GP_PLAT_amd64_freebsd)
1536 // There's no RuleSet for the specified address. On amd64/x86_linux, see if
1537 // it's possible to recover the caller's frame by using the frame pointer.
1539 // We seek to compute (new_IP, new_SP, new_BP) from (old_BP, stack image),
1540 // and assume the following layout:
1542 // <--- new_SP
1543 // +----------+
1544 // | new_IP | (return address)
1545 // +----------+
1546 // | new_BP | <--- old_BP
1547 // +----------+
1548 // | .... |
1549 // | .... |
1550 // | .... |
1551 // +----------+ <---- old_SP (arbitrary, but must be <= old_BP)
1553 const size_t wordSzB = sizeof(uintptr_t);
1554 TaggedUWord old_xsp = regs.xsp;
1556 // points at new_BP ?
1557 TaggedUWord old_xbp = regs.xbp;
1558 // points at new_IP ?
1559 TaggedUWord old_xbp_plus1 = regs.xbp + TaggedUWord(1 * wordSzB);
1560 // is the new_SP ?
1561 TaggedUWord old_xbp_plus2 = regs.xbp + TaggedUWord(2 * wordSzB);
1563 if (old_xbp.Valid() && old_xbp.IsAligned() && old_xsp.Valid() &&
1564 old_xsp.IsAligned() && old_xsp.Value() <= old_xbp.Value()) {
1565 // We don't need to do any range, alignment or validity checks for
1566 // addresses passed to DerefTUW, since that performs them itself, and
1567 // returns an invalid value on failure. Any such value will poison
1568 // subsequent uses, and we do a final check for validity before putting
1569 // the computed values into |regs|.
1570 TaggedUWord new_xbp = DerefTUW(old_xbp, aStackImg);
1571 if (new_xbp.Valid() && new_xbp.IsAligned() &&
1572 old_xbp.Value() < new_xbp.Value()) {
1573 TaggedUWord new_xip = DerefTUW(old_xbp_plus1, aStackImg);
1574 TaggedUWord new_xsp = old_xbp_plus2;
1575 if (new_xbp.Valid() && new_xip.Valid() && new_xsp.Valid()) {
1576 regs.xbp = new_xbp;
1577 regs.xip = new_xip;
1578 regs.xsp = new_xsp;
1579 (*aFramePointerFramesAcquired)++;
1580 continue;
1584 #elif defined(GP_ARCH_arm64)
1585 // Here is an example of generated code for prologue and epilogue..
1587 // stp x29, x30, [sp, #-16]!
1588 // mov x29, sp
1589 // ...
1590 // ldp x29, x30, [sp], #16
1591 // ret
1593 // Next is another example of generated code.
1595 // stp x20, x19, [sp, #-32]!
1596 // stp x29, x30, [sp, #16]
1597 // add x29, sp, #0x10
1598 // ...
1599 // ldp x29, x30, [sp, #16]
1600 // ldp x20, x19, [sp], #32
1601 // ret
1603 // Previous x29 and x30 register are stored in the address of x29 register.
1604 // But since sp register value depends on local variables, we cannot compute
1605 // previous sp register from current sp/fp/lr register and there is no
1606 // regular rule for sp register in prologue. But since return address is lr
1607 // register, if x29 is valid, we will get return address without sp
1608 // register.
1610 // So we assume the following layout that if no rule set. x29 is frame
1611 // pointer, so we will be able to compute x29 and x30 .
1613 // +----------+ <--- new_sp (cannot compute)
1614 // | .... |
1615 // +----------+
1616 // | new_lr | (return address)
1617 // +----------+
1618 // | new_fp | <--- old_fp
1619 // +----------+
1620 // | .... |
1621 // | .... |
1622 // +----------+ <---- old_sp (arbitrary, but unused)
1624 TaggedUWord old_fp = regs.x29;
1625 if (old_fp.Valid() && old_fp.IsAligned() && last_valid_sp.Valid() &&
1626 last_valid_sp.Value() <= old_fp.Value()) {
1627 TaggedUWord new_fp = DerefTUW(old_fp, aStackImg);
1628 if (new_fp.Valid() && new_fp.IsAligned() &&
1629 old_fp.Value() < new_fp.Value()) {
1630 TaggedUWord old_fp_plus1 = old_fp + TaggedUWord(8);
1631 TaggedUWord new_lr = DerefTUW(old_fp_plus1, aStackImg);
1632 if (new_lr.Valid()) {
1633 regs.x29 = new_fp;
1634 regs.x30 = new_lr;
1635 // When using frame pointer to walk stack, we cannot compute sp
1636 // register since we cannot compute sp register from fp/lr/sp
1637 // register, and there is no regular rule to compute previous sp
1638 // register. So mark as invalid.
1639 regs.sp = TaggedUWord();
1640 (*aFramePointerFramesAcquired)++;
1641 continue;
1645 #endif // defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_x86_linux) ||
1646 // defined(GP_PLAT_amd64_android) || defined(GP_PLAT_x86_android) ||
1647 // defined(GP_PLAT_amd64_freebsd)
1649 // We failed to recover a frame either using CFI or FP chasing, and we
1650 // have no other ways to recover the frame. So we have to give up.
1651 break;
1653 } // top level unwind loop
1655 // END UNWIND
1656 /////////////////////////////////////////////////////////
1659 ////////////////////////////////////////////////////////////////
1660 // LUL Unit Testing //
1661 ////////////////////////////////////////////////////////////////
1663 static const int LUL_UNIT_TEST_STACK_SIZE = 32768;
1665 #if defined(GP_ARCH_mips64)
1666 static __attribute__((noinline)) unsigned long __getpc(void) {
1667 unsigned long rtaddr;
1668 __asm__ volatile("move %0, $31" : "=r"(rtaddr));
1669 return rtaddr;
1671 #endif
1673 // This function is innermost in the test call sequence. It uses LUL
1674 // to unwind, and compares the result with the sequence specified in
1675 // the director string. These need to agree in order for the test to
1676 // pass. In order not to screw up the results, this function needs
1677 // to have a not-very big stack frame, since we're only presenting
1678 // the innermost LUL_UNIT_TEST_STACK_SIZE bytes of stack to LUL, and
1679 // that chunk unavoidably includes the frame for this function.
1681 // This function must not be inlined into its callers. Doing so will
1682 // cause the expected-vs-actual backtrace consistency checking to
1683 // fail. Prints summary results to |aLUL|'s logging sink and also
1684 // returns a boolean indicating whether or not the test failed.
1685 static __attribute__((noinline)) bool GetAndCheckStackTrace(
1686 LUL* aLUL, const char* dstring) {
1687 // Get hold of the current unwind-start registers.
1688 UnwindRegs startRegs;
1689 memset(&startRegs, 0, sizeof(startRegs));
1690 #if defined(GP_ARCH_amd64)
1691 volatile uintptr_t block[3];
1692 MOZ_ASSERT(sizeof(block) == 24);
1693 __asm__ __volatile__(
1694 "leaq 0(%%rip), %%r15"
1695 "\n\t"
1696 "movq %%r15, 0(%0)"
1697 "\n\t"
1698 "movq %%rsp, 8(%0)"
1699 "\n\t"
1700 "movq %%rbp, 16(%0)"
1701 "\n"
1703 : "r"(&block[0])
1704 : "memory", "r15");
1705 startRegs.xip = TaggedUWord(block[0]);
1706 startRegs.xsp = TaggedUWord(block[1]);
1707 startRegs.xbp = TaggedUWord(block[2]);
1708 const uintptr_t REDZONE_SIZE = 128;
1709 uintptr_t start = block[1] - REDZONE_SIZE;
1710 #elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android)
1711 volatile uintptr_t block[3];
1712 MOZ_ASSERT(sizeof(block) == 12);
1713 __asm__ __volatile__(
1714 ".byte 0xE8,0x00,0x00,0x00,0x00" /*call next insn*/
1715 "\n\t"
1716 "popl %%edi"
1717 "\n\t"
1718 "movl %%edi, 0(%0)"
1719 "\n\t"
1720 "movl %%esp, 4(%0)"
1721 "\n\t"
1722 "movl %%ebp, 8(%0)"
1723 "\n"
1725 : "r"(&block[0])
1726 : "memory", "edi");
1727 startRegs.xip = TaggedUWord(block[0]);
1728 startRegs.xsp = TaggedUWord(block[1]);
1729 startRegs.xbp = TaggedUWord(block[2]);
1730 const uintptr_t REDZONE_SIZE = 0;
1731 uintptr_t start = block[1] - REDZONE_SIZE;
1732 #elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
1733 volatile uintptr_t block[6];
1734 MOZ_ASSERT(sizeof(block) == 24);
1735 __asm__ __volatile__(
1736 "mov r0, r15"
1737 "\n\t"
1738 "str r0, [%0, #0]"
1739 "\n\t"
1740 "str r14, [%0, #4]"
1741 "\n\t"
1742 "str r13, [%0, #8]"
1743 "\n\t"
1744 "str r12, [%0, #12]"
1745 "\n\t"
1746 "str r11, [%0, #16]"
1747 "\n\t"
1748 "str r7, [%0, #20]"
1749 "\n"
1751 : "r"(&block[0])
1752 : "memory", "r0");
1753 startRegs.r15 = TaggedUWord(block[0]);
1754 startRegs.r14 = TaggedUWord(block[1]);
1755 startRegs.r13 = TaggedUWord(block[2]);
1756 startRegs.r12 = TaggedUWord(block[3]);
1757 startRegs.r11 = TaggedUWord(block[4]);
1758 startRegs.r7 = TaggedUWord(block[5]);
1759 const uintptr_t REDZONE_SIZE = 0;
1760 uintptr_t start = block[1] - REDZONE_SIZE;
1761 #elif defined(GP_ARCH_arm64)
1762 volatile uintptr_t block[4];
1763 MOZ_ASSERT(sizeof(block) == 32);
1764 __asm__ __volatile__(
1765 "adr x0, . \n\t"
1766 "str x0, [%0, #0] \n\t"
1767 "str x29, [%0, #8] \n\t"
1768 "str x30, [%0, #16] \n\t"
1769 "mov x0, sp \n\t"
1770 "str x0, [%0, #24] \n\t"
1772 : "r"(&block[0])
1773 : "memory", "x0");
1774 startRegs.pc = TaggedUWord(block[0]);
1775 startRegs.x29 = TaggedUWord(block[1]);
1776 startRegs.x30 = TaggedUWord(block[2]);
1777 startRegs.sp = TaggedUWord(block[3]);
1778 const uintptr_t REDZONE_SIZE = 0;
1779 uintptr_t start = block[1] - REDZONE_SIZE;
1780 #elif defined(GP_ARCH_mips64)
1781 volatile uintptr_t block[3];
1782 MOZ_ASSERT(sizeof(block) == 24);
1783 __asm__ __volatile__(
1784 "sd $29, 8(%0) \n"
1785 "sd $30, 16(%0) \n"
1787 : "r"(block)
1788 : "memory");
1789 block[0] = __getpc();
1790 startRegs.pc = TaggedUWord(block[0]);
1791 startRegs.sp = TaggedUWord(block[1]);
1792 startRegs.fp = TaggedUWord(block[2]);
1793 const uintptr_t REDZONE_SIZE = 0;
1794 uintptr_t start = block[1] - REDZONE_SIZE;
1795 #else
1796 # error "Unsupported platform"
1797 #endif
1799 // Get hold of the innermost LUL_UNIT_TEST_STACK_SIZE bytes of the
1800 // stack.
1801 uintptr_t end = start + LUL_UNIT_TEST_STACK_SIZE;
1802 uintptr_t ws = sizeof(void*);
1803 start &= ~(ws - 1);
1804 end &= ~(ws - 1);
1805 uintptr_t nToCopy = end - start;
1806 if (nToCopy > lul::N_STACK_BYTES) {
1807 nToCopy = lul::N_STACK_BYTES;
1809 MOZ_ASSERT(nToCopy <= lul::N_STACK_BYTES);
1810 StackImage* stackImg = new StackImage();
1811 stackImg->mLen = nToCopy;
1812 stackImg->mStartAvma = start;
1813 if (nToCopy > 0) {
1814 MOZ_MAKE_MEM_DEFINED((void*)start, nToCopy);
1815 memcpy(&stackImg->mContents[0], (void*)start, nToCopy);
1818 // Unwind it.
1819 const int MAX_TEST_FRAMES = 64;
1820 uintptr_t framePCs[MAX_TEST_FRAMES];
1821 uintptr_t frameSPs[MAX_TEST_FRAMES];
1822 size_t framesAvail = mozilla::ArrayLength(framePCs);
1823 size_t framesUsed = 0;
1824 size_t framePointerFramesAcquired = 0;
1825 aLUL->Unwind(&framePCs[0], &frameSPs[0], &framesUsed,
1826 &framePointerFramesAcquired, framesAvail, &startRegs, stackImg);
1828 delete stackImg;
1830 // if (0) {
1831 // // Show what we have.
1832 // fprintf(stderr, "Got %d frames:\n", (int)framesUsed);
1833 // for (size_t i = 0; i < framesUsed; i++) {
1834 // fprintf(stderr, " [%2d] SP %p PC %p\n",
1835 // (int)i, (void*)frameSPs[i], (void*)framePCs[i]);
1836 // }
1837 // fprintf(stderr, "\n");
1840 // Check to see if there's a consistent binding between digits in
1841 // the director string ('1' .. '8') and the PC values acquired by
1842 // the unwind. If there isn't, the unwinding has failed somehow.
1843 uintptr_t binding[8]; // binding for '1' .. binding for '8'
1844 memset((void*)binding, 0, sizeof(binding));
1846 // The general plan is to work backwards along the director string
1847 // and forwards along the framePCs array. Doing so corresponds to
1848 // working outwards from the innermost frame of the recursive test set.
1849 const char* cursor = dstring;
1851 // Find the end. This leaves |cursor| two bytes past the first
1852 // character we want to look at -- see comment below.
1853 while (*cursor) cursor++;
1855 // Counts the number of consistent frames.
1856 size_t nConsistent = 0;
1858 // Iterate back to the start of the director string. The starting
1859 // points are a bit complex. We can't use framePCs[0] because that
1860 // contains the PC in this frame (above). We can't use framePCs[1]
1861 // because that will contain the PC at return point in the recursive
1862 // test group (TestFn[1-8]) for their call "out" to this function,
1863 // GetAndCheckStackTrace. Although LUL will compute a correct
1864 // return address, that will not be the same return address as for a
1865 // recursive call out of the the function to another function in the
1866 // group. Hence we can only start consistency checking at
1867 // framePCs[2].
1869 // To be consistent, then, we must ignore the last element in the
1870 // director string as that corresponds to framePCs[1]. Hence the
1871 // start points are: framePCs[2] and the director string 2 bytes
1872 // before the terminating zero.
1874 // Also as a result of this, the number of consistent frames counted
1875 // will always be one less than the length of the director string
1876 // (not including its terminating zero).
1877 size_t frameIx;
1878 for (cursor = cursor - 2, frameIx = 2;
1879 cursor >= dstring && frameIx < framesUsed; cursor--, frameIx++) {
1880 char c = *cursor;
1881 uintptr_t pc = framePCs[frameIx];
1882 // If this doesn't hold, the director string is ill-formed.
1883 MOZ_ASSERT(c >= '1' && c <= '8');
1884 int n = ((int)c) - ((int)'1');
1885 if (binding[n] == 0) {
1886 // There's no binding for |c| yet, so install |pc| and carry on.
1887 binding[n] = pc;
1888 nConsistent++;
1889 continue;
1891 // There's a pre-existing binding for |c|. Check it's consistent.
1892 if (binding[n] != pc) {
1893 // Not consistent. Give up now.
1894 break;
1896 // Consistent. Keep going.
1897 nConsistent++;
1900 // So, did we succeed?
1901 bool passed = nConsistent + 1 == strlen(dstring);
1903 // Show the results.
1904 char buf[200];
1905 SprintfLiteral(buf, "LULUnitTest: dstring = %s\n", dstring);
1906 buf[sizeof(buf) - 1] = 0;
1907 aLUL->mLog(buf);
1908 SprintfLiteral(buf, "LULUnitTest: %d consistent, %d in dstring: %s\n",
1909 (int)nConsistent, (int)strlen(dstring),
1910 passed ? "PASS" : "FAIL");
1911 buf[sizeof(buf) - 1] = 0;
1912 aLUL->mLog(buf);
1914 return !passed;
1917 // Macro magic to create a set of 8 mutually recursive functions with
1918 // varying frame sizes. These will recurse amongst themselves as
1919 // specified by |strP|, the directory string, and call
1920 // GetAndCheckStackTrace when the string becomes empty, passing it the
1921 // original value of the string. This checks the result, printing
1922 // results on |aLUL|'s logging sink, and also returns a boolean
1923 // indicating whether or not the results are acceptable (correct).
1925 #define DECL_TEST_FN(NAME) \
1926 bool NAME(LUL* aLUL, const char* strPorig, const char* strP);
1928 #define GEN_TEST_FN(NAME, FRAMESIZE) \
1929 bool NAME(LUL* aLUL, const char* strPorig, const char* strP) { \
1930 /* Create a frame of size (at least) FRAMESIZE, so that the */ \
1931 /* 8 functions created by this macro offer some variation in frame */ \
1932 /* sizes. This isn't as simple as it might seem, since a clever */ \
1933 /* optimizing compiler (eg, clang-5) detects that the array is unused */ \
1934 /* and removes it. We try to defeat this by passing it to a function */ \
1935 /* in a different compilation unit, and hoping that clang does not */ \
1936 /* notice that the call is a no-op. */ \
1937 char space[FRAMESIZE]; \
1938 Unused << write(1, space, 0); /* write zero bytes of |space| to stdout */ \
1940 if (*strP == '\0') { \
1941 /* We've come to the end of the director string. */ \
1942 /* Take a stack snapshot. */ \
1943 /* We purposefully use a negation to avoid tail-call optimization */ \
1944 return !GetAndCheckStackTrace(aLUL, strPorig); \
1945 } else { \
1946 /* Recurse onwards. This is a bit subtle. The obvious */ \
1947 /* thing to do here is call onwards directly, from within the */ \
1948 /* arms of the case statement. That gives a problem in that */ \
1949 /* there will be multiple return points inside each function when */ \
1950 /* unwinding, so it will be difficult to check for consistency */ \
1951 /* against the director string. Instead, we make an indirect */ \
1952 /* call, so as to guarantee that there is only one call site */ \
1953 /* within each function. This does assume that the compiler */ \
1954 /* won't transform it back to the simple direct-call form. */ \
1955 /* To discourage it from doing so, the call is bracketed with */ \
1956 /* __asm__ __volatile__ sections so as to make it not-movable. */ \
1957 bool (*nextFn)(LUL*, const char*, const char*) = NULL; \
1958 switch (*strP) { \
1959 case '1': \
1960 nextFn = TestFn1; \
1961 break; \
1962 case '2': \
1963 nextFn = TestFn2; \
1964 break; \
1965 case '3': \
1966 nextFn = TestFn3; \
1967 break; \
1968 case '4': \
1969 nextFn = TestFn4; \
1970 break; \
1971 case '5': \
1972 nextFn = TestFn5; \
1973 break; \
1974 case '6': \
1975 nextFn = TestFn6; \
1976 break; \
1977 case '7': \
1978 nextFn = TestFn7; \
1979 break; \
1980 case '8': \
1981 nextFn = TestFn8; \
1982 break; \
1983 default: \
1984 nextFn = TestFn8; \
1985 break; \
1987 /* "use" |space| immediately after the recursive call, */ \
1988 /* so as to dissuade clang from deallocating the space while */ \
1989 /* the call is active, or otherwise messing with the stack frame. */ \
1990 __asm__ __volatile__("" ::: "cc", "memory"); \
1991 bool passed = nextFn(aLUL, strPorig, strP + 1); \
1992 Unused << write(1, space, 0); \
1993 __asm__ __volatile__("" ::: "cc", "memory"); \
1994 return passed; \
1998 // The test functions are mutually recursive, so it is necessary to
1999 // declare them before defining them.
2000 DECL_TEST_FN(TestFn1)
2001 DECL_TEST_FN(TestFn2)
2002 DECL_TEST_FN(TestFn3)
2003 DECL_TEST_FN(TestFn4)
2004 DECL_TEST_FN(TestFn5)
2005 DECL_TEST_FN(TestFn6)
2006 DECL_TEST_FN(TestFn7)
2007 DECL_TEST_FN(TestFn8)
2009 GEN_TEST_FN(TestFn1, 123)
2010 GEN_TEST_FN(TestFn2, 456)
2011 GEN_TEST_FN(TestFn3, 789)
2012 GEN_TEST_FN(TestFn4, 23)
2013 GEN_TEST_FN(TestFn5, 47)
2014 GEN_TEST_FN(TestFn6, 117)
2015 GEN_TEST_FN(TestFn7, 1)
2016 GEN_TEST_FN(TestFn8, 99)
2018 // This starts the test sequence going. Call here to generate a
2019 // sequence of calls as directed by the string |dstring|. The call
2020 // sequence will, from its innermost frame, finish by calling
2021 // GetAndCheckStackTrace() and passing it |dstring|.
2022 // GetAndCheckStackTrace() will unwind the stack, check consistency
2023 // of those results against |dstring|, and print a pass/fail message
2024 // to aLUL's logging sink. It also updates the counters in *aNTests
2025 // and aNTestsPassed.
2026 __attribute__((noinline)) void TestUnw(/*OUT*/ int* aNTests,
2027 /*OUT*/ int* aNTestsPassed, LUL* aLUL,
2028 const char* dstring) {
2029 // Ensure that the stack has at least this much space on it. This
2030 // makes it safe to saw off the top LUL_UNIT_TEST_STACK_SIZE bytes
2031 // and hand it to LUL. Safe in the sense that no segfault can
2032 // happen because the stack is at least this big. This is all
2033 // somewhat dubious in the sense that a sufficiently clever compiler
2034 // (clang, for one) can figure out that space[] is unused and delete
2035 // it from the frame. Hence the somewhat elaborate hoop jumping to
2036 // fill it up before the call and to at least appear to use the
2037 // value afterwards.
2038 int i;
2039 volatile char space[LUL_UNIT_TEST_STACK_SIZE];
2040 for (i = 0; i < LUL_UNIT_TEST_STACK_SIZE; i++) {
2041 space[i] = (char)(i & 0x7F);
2044 // Really run the test.
2045 bool passed = TestFn1(aLUL, dstring, dstring);
2047 // Appear to use space[], by visiting the value to compute some kind
2048 // of checksum, and then (apparently) using the checksum.
2049 int sum = 0;
2050 for (i = 0; i < LUL_UNIT_TEST_STACK_SIZE; i++) {
2051 // If this doesn't fool LLVM, I don't know what will.
2052 sum += space[i] - 3 * i;
2054 __asm__ __volatile__("" : : "r"(sum));
2056 // Update the counters.
2057 (*aNTests)++;
2058 if (passed) {
2059 (*aNTestsPassed)++;
2063 void RunLulUnitTests(/*OUT*/ int* aNTests, /*OUT*/ int* aNTestsPassed,
2064 LUL* aLUL) {
2065 aLUL->mLog(":\n");
2066 aLUL->mLog("LULUnitTest: BEGIN\n");
2067 *aNTests = *aNTestsPassed = 0;
2068 TestUnw(aNTests, aNTestsPassed, aLUL, "11111111");
2069 TestUnw(aNTests, aNTestsPassed, aLUL, "11222211");
2070 TestUnw(aNTests, aNTestsPassed, aLUL, "111222333");
2071 TestUnw(aNTests, aNTestsPassed, aLUL, "1212121231212331212121212121212");
2072 TestUnw(aNTests, aNTestsPassed, aLUL, "31415827271828325332173258");
2073 TestUnw(aNTests, aNTestsPassed, aLUL,
2074 "123456781122334455667788777777777777777777777");
2075 aLUL->mLog("LULUnitTest: END\n");
2076 aLUL->mLog(":\n");
2079 } // namespace lul