tools/profiler/LulMain.cpp

   1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
   3 /* This Source Code Form is subject to the terms of the Mozilla Public
   4  * License, v. 2.0. If a copy of the MPL was not distributed with this
   5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
   6
   7 #include "LulMain.h"
   8
   9 #include <string.h>
  10 #include <stdlib.h>
  11 #include <stdio.h>
  12
  13 #include <algorithm>  // std::sort
  14 #include <string>
  15
  16 #include "mozilla/Assertions.h"
  17 #include "mozilla/ArrayUtils.h"
  18 #include "mozilla/MemoryChecking.h"
  19
  20 #include "LulCommonExt.h"
  21 #include "LulElfExt.h"
  22
  23 #include "LulMainInt.h"
  24
  25 // Set this to 1 for verbose logging
  26 #define DEBUG_MAIN 0
  27
  28
  29 namespace lul {
  30
  31 using std::string;
  32 using std::vector;
  33
  34
  35 ////////////////////////////////////////////////////////////////
  36 // AutoLulRWLocker                                            //
  37 ////////////////////////////////////////////////////////////////
  38
  39 // This is a simple RAII class that manages acquisition and release of
  40 // LulRWLock reader-writer locks.
  41
  42 class AutoLulRWLocker {
  43 public:
  44   enum AcqMode { FOR_READING, FOR_WRITING };
  45   AutoLulRWLocker(LulRWLock* aRWLock, AcqMode mode)
  46     : mRWLock(aRWLock)
  47   {
  48     if (mode == FOR_WRITING) {
  49       aRWLock->WrLock();
  50     } else {
  51       aRWLock->RdLock();
  52     }
  53   }
  54   ~AutoLulRWLocker()
  55   {
  56     mRWLock->Unlock();
  57   }
  58
  59 private:
  60   LulRWLock* mRWLock;
  61 };
  62
  63
  64 ////////////////////////////////////////////////////////////////
  65 // RuleSet                                                    //
  66 ////////////////////////////////////////////////////////////////
  67
  68 static const char*
  69 NameOf_DW_REG(int16_t aReg)
  70 {
  71   switch (aReg) {
  72     case DW_REG_CFA:       return "cfa";
  73 #if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
  74     case DW_REG_INTEL_XBP: return "xbp";
  75     case DW_REG_INTEL_XSP: return "xsp";
  76     case DW_REG_INTEL_XIP: return "xip";
  77 #elif defined(LUL_ARCH_arm)
  78     case DW_REG_ARM_R7:    return "r7";
  79     case DW_REG_ARM_R11:   return "r11";
  80     case DW_REG_ARM_R12:   return "r12";
  81     case DW_REG_ARM_R13:   return "r13";
  82     case DW_REG_ARM_R14:   return "r14";
  83     case DW_REG_ARM_R15:   return "r15";
  84 #else
  85 # error "Unsupported arch"
  86 #endif
  87     default: return "???";
  88   }
  89 }
  90
  91 static string
  92 ShowRule(const char* aNewReg, LExpr aExpr)
  93 {
  94   char buf[64];
  95   string res = string(aNewReg) + "=";
  96   switch (aExpr.mHow) {
  97     case LExpr::UNKNOWN:
  98       res += "Unknown";
  99       break;
 100     case LExpr::NODEREF:
 101       sprintf(buf, "%s+%d", NameOf_DW_REG(aExpr.mReg), (int)aExpr.mOffset);
 102       res += buf;
 103       break;
 104     case LExpr::DEREF:
 105       sprintf(buf, "*(%s+%d)", NameOf_DW_REG(aExpr.mReg), (int)aExpr.mOffset);
 106       res += buf;
 107       break;
 108     default:
 109       res += "???";
 110       break;
 111   }
 112   return res;
 113 }
 114
 115 void
 116 RuleSet::Print(void(*aLog)(const char*))
 117 {
 118   char buf[96];
 119   sprintf(buf, "[%llx .. %llx]: let ",
 120           (unsigned long long int)mAddr,
 121           (unsigned long long int)(mAddr + mLen - 1));
 122   string res = string(buf);
 123   res += ShowRule("cfa", mCfaExpr);
 124   res += " in";
 125   // For each reg we care about, print the recovery expression.
 126 #if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
 127   res += ShowRule(" RA", mXipExpr);
 128   res += ShowRule(" SP", mXspExpr);
 129   res += ShowRule(" BP", mXbpExpr);
 130 #elif defined(LUL_ARCH_arm)
 131   res += ShowRule(" R15", mR15expr);
 132   res += ShowRule(" R7",  mR7expr);
 133   res += ShowRule(" R11", mR11expr);
 134   res += ShowRule(" R12", mR12expr);
 135   res += ShowRule(" R13", mR13expr);
 136   res += ShowRule(" R14", mR14expr);
 137 #else
 138 # error "Unsupported arch"
 139 #endif
 140   aLog(res.c_str());
 141 }
 142
 143 LExpr*
 144 RuleSet::ExprForRegno(DW_REG_NUMBER aRegno) {
 145   switch (aRegno) {
 146     case DW_REG_CFA: return &mCfaExpr;
 147 #   if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
 148     case DW_REG_INTEL_XIP: return &mXipExpr;
 149     case DW_REG_INTEL_XSP: return &mXspExpr;
 150     case DW_REG_INTEL_XBP: return &mXbpExpr;
 151 #   elif defined(LUL_ARCH_arm)
 152     case DW_REG_ARM_R15:   return &mR15expr;
 153     case DW_REG_ARM_R14:   return &mR14expr;
 154     case DW_REG_ARM_R13:   return &mR13expr;
 155     case DW_REG_ARM_R12:   return &mR12expr;
 156     case DW_REG_ARM_R11:   return &mR11expr;
 157     case DW_REG_ARM_R7:    return &mR7expr;
 158 #   else
 159 #     error "Unknown arch"
 160 #   endif
 161     default: return nullptr;
 162   }
 163 }
 164
 165 RuleSet::RuleSet()
 166 {
 167   mAddr = 0;
 168   mLen  = 0;
 169   // The only other fields are of type LExpr and those are initialised
 170   // by LExpr::LExpr().
 171 }
 172
 173
 174 ////////////////////////////////////////////////////////////////
 175 // SecMap                                                     //
 176 ////////////////////////////////////////////////////////////////
 177
 178 // See header file LulMainInt.h for comments about invariants.
 179
 180 SecMap::SecMap(void(*aLog)(const char*))
 181   : mSummaryMinAddr(1)
 182   , mSummaryMaxAddr(0)
 183   , mUsable(true)
 184   , mLog(aLog)
 185 {}
 186
 187 SecMap::~SecMap() {
 188   mRuleSets.clear();
 189 }
 190
 191 RuleSet*
 192 SecMap::FindRuleSet(uintptr_t ia) {
 193   // Binary search mRuleSets to find one that brackets |ia|.
 194   // lo and hi need to be signed, else the loop termination tests
 195   // don't work properly.  Note that this works correctly even when
 196   // mRuleSets.size() == 0.
 197
 198   // Can't do this until the array has been sorted and preened.
 199   MOZ_ASSERT(mUsable);
 200
 201   long int lo = 0;
 202   long int hi = (long int)mRuleSets.size() - 1;
 203   while (true) {
 204     // current unsearched space is from lo to hi, inclusive.
 205     if (lo > hi) {
 206       // not found
 207       return nullptr;
 208     }
 209     long int  mid         = lo + ((hi - lo) / 2);
 210     RuleSet*  mid_ruleSet = &mRuleSets[mid];
 211     uintptr_t mid_minAddr = mid_ruleSet->mAddr;
 212     uintptr_t mid_maxAddr = mid_minAddr + mid_ruleSet->mLen - 1;
 213     if (ia < mid_minAddr) { hi = mid-1; continue; }
 214     if (ia > mid_maxAddr) { lo = mid+1; continue; }
 215     MOZ_ASSERT(mid_minAddr <= ia && ia <= mid_maxAddr);
 216     return mid_ruleSet;
 217   }
 218   // NOTREACHED
 219 }
 220
 221 // Add a RuleSet to the collection.  The rule is copied in.  Calling
 222 // this makes the map non-searchable.
 223 void
 224 SecMap::AddRuleSet(RuleSet* rs) {
 225   mUsable = false;
 226   mRuleSets.push_back(*rs);
 227 }
 228
 229
 230 static bool
 231 CmpRuleSetsByAddrLE(const RuleSet& rs1, const RuleSet& rs2) {
 232   return rs1.mAddr < rs2.mAddr;
 233 }
 234
 235 // Prepare the map for searching.  Completely remove any which don't
 236 // fall inside the specified range [start, +len).
 237 void
 238 SecMap::PrepareRuleSets(uintptr_t aStart, size_t aLen)
 239 {
 240   if (mRuleSets.empty()) {
 241     return;
 242   }
 243
 244   MOZ_ASSERT(aLen > 0);
 245   if (aLen == 0) {
 246     // This should never happen.
 247     mRuleSets.clear();
 248     return;
 249   }
 250
 251   // Sort by start addresses.
 252   std::sort(mRuleSets.begin(), mRuleSets.end(), CmpRuleSetsByAddrLE);
 253
 254   // Detect any entry not completely contained within [start, +len).
 255   // Set its length to zero, so that the next pass will remove it.
 256   for (size_t i = 0; i < mRuleSets.size(); ++i) {
 257     RuleSet* rs = &mRuleSets[i];
 258     if (rs->mLen > 0 &&
 259         (rs->mAddr < aStart || rs->mAddr + rs->mLen > aStart + aLen)) {
 260       rs->mLen = 0;
 261     }
 262   }
 263
 264   // Iteratively truncate any overlaps and remove any zero length
 265   // entries that might result, or that may have been present
 266   // initially.  Unless the input is seriously screwy, this is
 267   // expected to iterate only once.
 268   while (true) {
 269     size_t i;
 270     size_t n = mRuleSets.size();
 271     size_t nZeroLen = 0;
 272
 273     if (n == 0) {
 274       break;
 275     }
 276
 277     for (i = 1; i < n; ++i) {
 278       RuleSet* prev = &mRuleSets[i-1];
 279       RuleSet* here = &mRuleSets[i];
 280       MOZ_ASSERT(prev->mAddr <= here->mAddr);
 281       if (prev->mAddr + prev->mLen > here->mAddr) {
 282         prev->mLen = here->mAddr - prev->mAddr;
 283       }
 284       if (prev->mLen == 0)
 285         nZeroLen++;
 286     }
 287
 288     if (mRuleSets[n-1].mLen == 0) {
 289       nZeroLen++;
 290     }
 291
 292     // At this point, the entries are in-order and non-overlapping.
 293     // If none of them are zero-length, we are done.
 294     if (nZeroLen == 0) {
 295       break;
 296     }
 297
 298     // Slide back the entries to remove the zero length ones.
 299     size_t j = 0;  // The write-point.
 300     for (i = 0; i < n; ++i) {
 301       if (mRuleSets[i].mLen == 0) {
 302         continue;
 303       }
 304       if (j != i) mRuleSets[j] = mRuleSets[i];
 305       ++j;
 306     }
 307     MOZ_ASSERT(i == n);
 308     MOZ_ASSERT(nZeroLen <= n);
 309     MOZ_ASSERT(j == n - nZeroLen);
 310     while (nZeroLen > 0) {
 311       mRuleSets.pop_back();
 312       nZeroLen--;
 313     }
 314
 315     MOZ_ASSERT(mRuleSets.size() == j);
 316   }
 317
 318   size_t n = mRuleSets.size();
 319
 320 #ifdef DEBUG
 321   // Do a final check on the rules: their address ranges must be
 322   // ascending, non overlapping, non zero sized.
 323   if (n > 0) {
 324     MOZ_ASSERT(mRuleSets[0].mLen > 0);
 325     for (size_t i = 1; i < n; ++i) {
 326       RuleSet* prev = &mRuleSets[i-1];
 327       RuleSet* here = &mRuleSets[i];
 328       MOZ_ASSERT(prev->mAddr < here->mAddr);
 329       MOZ_ASSERT(here->mLen > 0);
 330       MOZ_ASSERT(prev->mAddr + prev->mLen <= here->mAddr);
 331     }
 332   }
 333 #endif
 334
 335   // Set the summary min and max address values.
 336   if (n == 0) {
 337     // Use the values defined in comments in the class declaration.
 338     mSummaryMinAddr = 1;
 339     mSummaryMaxAddr = 0;
 340   } else {
 341     mSummaryMinAddr = mRuleSets[0].mAddr;
 342     mSummaryMaxAddr = mRuleSets[n-1].mAddr + mRuleSets[n-1].mLen - 1;
 343   }
 344   char buf[150];
 345   snprintf(buf, sizeof(buf),
 346            "PrepareRuleSets: %d entries, smin/smax 0x%llx, 0x%llx\n",
 347            (int)n, (unsigned long long int)mSummaryMinAddr,
 348                    (unsigned long long int)mSummaryMaxAddr);
 349   buf[sizeof(buf)-1] = 0;
 350   mLog(buf);
 351
 352   // Is now usable for binary search.
 353   mUsable = true;
 354
 355   if (0) {
 356     mLog("\nRulesets after preening\n");
 357     for (size_t i = 0; i < mRuleSets.size(); ++i) {
 358       mRuleSets[i].Print(mLog);
 359       mLog("\n");
 360     }
 361     mLog("\n");
 362   }
 363 }
 364
 365 bool SecMap::IsEmpty() {
 366   return mRuleSets.empty();
 367 }
 368
 369
 370 ////////////////////////////////////////////////////////////////
 371 // SegArray                                                   //
 372 ////////////////////////////////////////////////////////////////
 373
 374 // A SegArray holds a set of address ranges that together exactly
 375 // cover an address range, with no overlaps or holes.  Each range has
 376 // an associated value, which in this case has been specialised to be
 377 // a simple boolean.  The representation is kept to minimal canonical
 378 // form in which adjacent ranges with the same associated value are
 379 // merged together.  Each range is represented by a |struct Seg|.
 380 //
 381 // SegArrays are used to keep track of which parts of the address
 382 // space are known to contain instructions.
 383 class SegArray {
 384
 385  public:
 386   void add(uintptr_t lo, uintptr_t hi, bool val) {
 387     if (lo > hi) {
 388       return;
 389     }
 390     split_at(lo);
 391     if (hi < UINTPTR_MAX) {
 392       split_at(hi+1);
 393     }
 394     std::vector<Seg>::size_type iLo, iHi, i;
 395     iLo = find(lo);
 396     iHi = find(hi);
 397     for (i = iLo; i <= iHi; ++i) {
 398       mSegs[i].val = val;
 399     }
 400     preen();
 401   }
 402
 403   bool getBoundingCodeSegment(/*OUT*/uintptr_t* rx_min,
 404                               /*OUT*/uintptr_t* rx_max, uintptr_t addr) {
 405     std::vector<Seg>::size_type i = find(addr);
 406     if (!mSegs[i].val) {
 407       return false;
 408     }
 409     *rx_min = mSegs[i].lo;
 410     *rx_max = mSegs[i].hi;
 411     return true;
 412   }
 413
 414   SegArray() {
 415     Seg s(0, UINTPTR_MAX, false);
 416     mSegs.push_back(s);
 417   }
 418
 419  private:
 420   struct Seg {
 421     Seg(uintptr_t lo, uintptr_t hi, bool val) : lo(lo), hi(hi), val(val) {}
 422     uintptr_t lo;
 423     uintptr_t hi;
 424     bool val;
 425   };
 426
 427   void preen() {
 428     for (std::vector<Seg>::iterator iter = mSegs.begin();
 429          iter < mSegs.end()-1;
 430          ++iter) {
 431       if (iter[0].val != iter[1].val) {
 432         continue;
 433       }
 434       iter[0].hi = iter[1].hi;
 435       mSegs.erase(iter+1);
 436       // Back up one, so as not to miss an opportunity to merge
 437       // with the entry after this one.
 438       --iter;
 439     }
 440   }
 441
 442   std::vector<Seg>::size_type find(uintptr_t a) {
 443     long int lo = 0;
 444     long int hi = (long int)mSegs.size();
 445     while (true) {
 446       // The unsearched space is lo .. hi inclusive.
 447       if (lo > hi) {
 448         // Not found.  This can't happen.
 449         return (std::vector<Seg>::size_type)(-1);
 450       }
 451       long int  mid    = lo + ((hi - lo) / 2);
 452       uintptr_t mid_lo = mSegs[mid].lo;
 453       uintptr_t mid_hi = mSegs[mid].hi;
 454       if (a < mid_lo) { hi = mid-1; continue; }
 455       if (a > mid_hi) { lo = mid+1; continue; }
 456       return (std::vector<Seg>::size_type)mid;
 457     }
 458   }
 459
 460   void split_at(uintptr_t a) {
 461     std::vector<Seg>::size_type i = find(a);
 462     if (mSegs[i].lo == a) {
 463       return;
 464     }
 465     mSegs.insert( mSegs.begin()+i+1, mSegs[i] );
 466     mSegs[i].hi = a-1;
 467     mSegs[i+1].lo = a;
 468   }
 469
 470   void show() {
 471     printf("<< %d entries:\n", (int)mSegs.size());
 472     for (std::vector<Seg>::iterator iter = mSegs.begin();
 473          iter < mSegs.end();
 474          ++iter) {
 475       printf("  %016llx  %016llx  %s\n",
 476              (unsigned long long int)(*iter).lo,
 477              (unsigned long long int)(*iter).hi,
 478              (*iter).val ? "true" : "false");
 479     }
 480     printf(">>\n");
 481   }
 482
 483   std::vector<Seg> mSegs;
 484 };
 485
 486
 487 ////////////////////////////////////////////////////////////////
 488 // PriMap                                                     //
 489 ////////////////////////////////////////////////////////////////
 490
 491 class PriMap {
 492  public:
 493   PriMap(void (*aLog)(const char*))
 494     : mLog(aLog)
 495   {}
 496
 497   ~PriMap() {
 498     for (std::vector<SecMap*>::iterator iter = mSecMaps.begin();
 499          iter != mSecMaps.end();
 500          ++iter) {
 501       delete *iter;
 502     }
 503     mSecMaps.clear();
 504   }
 505
 506   // This can happen with the global lock held for reading.
 507   RuleSet* Lookup(uintptr_t ia) {
 508     SecMap* sm = FindSecMap(ia);
 509     return sm ? sm->FindRuleSet(ia) : nullptr;
 510   }
 511
 512   // Add a secondary map.  No overlaps allowed w.r.t. existing
 513   // secondary maps.  Global lock must be held for writing.
 514   void AddSecMap(SecMap* aSecMap) {
 515     // We can't add an empty SecMap to the PriMap.  But that's OK
 516     // since we'd never be able to find anything in it anyway.
 517     if (aSecMap->IsEmpty()) {
 518       return;
 519     }
 520
 521     // Iterate through the SecMaps and find the right place for this
 522     // one.  At the same time, ensure that the in-order
 523     // non-overlapping invariant is preserved (and, generally, holds).
 524     // FIXME: this gives a cost that is O(N^2) in the total number of
 525     // shared objects in the system.  ToDo: better.
 526     MOZ_ASSERT(aSecMap->mSummaryMinAddr <= aSecMap->mSummaryMaxAddr);
 527
 528     size_t num_secMaps = mSecMaps.size();
 529     uintptr_t i;
 530     for (i = 0; i < num_secMaps; ++i) {
 531       SecMap* sm_i = mSecMaps[i];
 532       MOZ_ASSERT(sm_i->mSummaryMinAddr <= sm_i->mSummaryMaxAddr);
 533       if (aSecMap->mSummaryMinAddr < sm_i->mSummaryMaxAddr) {
 534         // |aSecMap| needs to be inserted immediately before mSecMaps[i].
 535         break;
 536       }
 537     }
 538     MOZ_ASSERT(i <= num_secMaps);
 539     if (i == num_secMaps) {
 540       // It goes at the end.
 541       mSecMaps.push_back(aSecMap);
 542     } else {
 543       std::vector<SecMap*>::iterator iter = mSecMaps.begin() + i;
 544       mSecMaps.insert(iter, aSecMap);
 545     }
 546     char buf[100];
 547     snprintf(buf, sizeof(buf), "AddSecMap: now have %d SecMaps\n",
 548              (int)mSecMaps.size());
 549     buf[sizeof(buf)-1] = 0;
 550     mLog(buf);
 551   }
 552
 553   // Remove and delete any SecMaps in the mapping, that intersect
 554   // with the specified address range.
 555   void RemoveSecMapsInRange(uintptr_t avma_min, uintptr_t avma_max) {
 556     MOZ_ASSERT(avma_min <= avma_max);
 557     size_t num_secMaps = mSecMaps.size();
 558     if (num_secMaps > 0) {
 559       intptr_t i;
 560       // Iterate from end to start over the vector, so as to ensure
 561       // that the special case where |avma_min| and |avma_max| denote
 562       // the entire address space, can be completed in time proportional
 563       // to the number of elements in the map.
 564       for (i = (intptr_t)num_secMaps-1; i >= 0; i--) {
 565         SecMap* sm_i = mSecMaps[i];
 566         if (sm_i->mSummaryMaxAddr < avma_min ||
 567             avma_max < sm_i->mSummaryMinAddr) {
 568           // There's no overlap.  Move on.
 569           continue;
 570         }
 571         // We need to remove mSecMaps[i] and slide all those above it
 572         // downwards to cover the hole.
 573         mSecMaps.erase(mSecMaps.begin() + i);
 574         delete sm_i;
 575       }
 576     }
 577   }
 578
 579   // Return the number of currently contained SecMaps.
 580   size_t CountSecMaps() {
 581     return mSecMaps.size();
 582   }
 583
 584   // Assess heuristically whether the given address is an instruction
 585   // immediately following a call instruction.  The caller is required
 586   // to hold the global lock for reading.
 587   bool MaybeIsReturnPoint(TaggedUWord aInstrAddr, SegArray* aSegArray) {
 588     if (!aInstrAddr.Valid()) {
 589       return false;
 590     }
 591
 592     uintptr_t ia = aInstrAddr.Value();
 593
 594     // Assume that nobody would be crazy enough to put code in the
 595     // first or last page.
 596     if (ia < 4096 || ((uintptr_t)(-ia)) < 4096) {
 597       return false;
 598     }
 599
 600     // See if it falls inside a known r-x mapped area.  Poking around
 601     // outside such places risks segfaulting.
 602     uintptr_t insns_min, insns_max;
 603     bool b = aSegArray->getBoundingCodeSegment(&insns_min, &insns_max, ia);
 604     if (!b) {
 605       // no code (that we know about) at this address
 606       return false;
 607     }
 608
 609     // |ia| falls within an r-x range.  So we can
 610     // safely poke around in [insns_min, insns_max].
 611
 612 #if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
 613     // Is the previous instruction recognisably a CALL?  This is
 614     // common for the 32- and 64-bit versions, except for the
 615     // simm32(%rip) case, which is 64-bit only.
 616     //
 617     // For all other cases, the 64 bit versions are either identical
 618     // to the 32 bit versions, or have an optional extra leading REX.W
 619     // byte (0x41).  Since the extra 0x41 is optional we have to
 620     // ignore it, with the convenient result that the same matching
 621     // logic works for both 32- and 64-bit cases.
 622
 623     uint8_t* p = (uint8_t*)ia;
 624 #   if defined(LUL_ARCH_x64)
 625     // CALL simm32(%rip)  == FF15 simm32
 626     if (ia - 6 >= insns_min && p[-6] == 0xFF && p[-5] == 0x15) {
 627       return true;
 628     }
 629 #   endif
 630     // CALL rel32  == E8 rel32  (both 32- and 64-bit)
 631     if (ia - 5 >= insns_min && p[-5] == 0xE8) {
 632       return true;
 633     }
 634     // CALL *%eax .. CALL *%edi  ==   FFD0 ..   FFD7  (32-bit)
 635     // CALL *%rax .. CALL *%rdi  ==   FFD0 ..   FFD7  (64-bit)
 636     // CALL *%r8  .. CALL *%r15  == 41FFD0 .. 41FFD7  (64-bit)
 637     if (ia - 2 >= insns_min &&
 638         p[-2] == 0xFF && p[-1] >= 0xD0 && p[-1] <= 0xD7) {
 639       return true;
 640     }
 641     // Almost all of the remaining cases that occur in practice are
 642     // of the form CALL *simm8(reg) or CALL *simm32(reg).
 643     //
 644     // 64 bit cases:
 645     //
 646     // call  *simm8(%rax)         FF50   simm8
 647     // call  *simm8(%rcx)         FF51   simm8
 648     // call  *simm8(%rdx)         FF52   simm8
 649     // call  *simm8(%rbx)         FF53   simm8
 650     // call  *simm8(%rsp)         FF5424 simm8
 651     // call  *simm8(%rbp)         FF55   simm8
 652     // call  *simm8(%rsi)         FF56   simm8
 653     // call  *simm8(%rdi)         FF57   simm8
 654     //
 655     // call  *simm8(%r8)        41FF50   simm8
 656     // call  *simm8(%r9)        41FF51   simm8
 657     // call  *simm8(%r10)       41FF52   simm8
 658     // call  *simm8(%r11)       41FF53   simm8
 659     // call  *simm8(%r12)       41FF5424 simm8
 660     // call  *simm8(%r13)       41FF55   simm8
 661     // call  *simm8(%r14)       41FF56   simm8
 662     // call  *simm8(%r15)       41FF57   simm8
 663     //
 664     // call  *simm32(%rax)        FF90   simm32
 665     // call  *simm32(%rcx)        FF91   simm32
 666     // call  *simm32(%rdx)        FF92   simm32
 667     // call  *simm32(%rbx)        FF93   simm32
 668     // call  *simm32(%rsp)        FF9424 simm32
 669     // call  *simm32(%rbp)        FF95   simm32
 670     // call  *simm32(%rsi)        FF96   simm32
 671     // call  *simm32(%rdi)        FF97   simm32
 672     //
 673     // call  *simm32(%r8)       41FF90   simm32
 674     // call  *simm32(%r9)       41FF91   simm32
 675     // call  *simm32(%r10)      41FF92   simm32
 676     // call  *simm32(%r11)      41FF93   simm32
 677     // call  *simm32(%r12)      41FF9424 simm32
 678     // call  *simm32(%r13)      41FF95   simm32
 679     // call  *simm32(%r14)      41FF96   simm32
 680     // call  *simm32(%r15)      41FF97   simm32
 681     //
 682     // 32 bit cases:
 683     //
 684     // call  *simm8(%eax)         FF50   simm8
 685     // call  *simm8(%ecx)         FF51   simm8
 686     // call  *simm8(%edx)         FF52   simm8
 687     // call  *simm8(%ebx)         FF53   simm8
 688     // call  *simm8(%esp)         FF5424 simm8
 689     // call  *simm8(%ebp)         FF55   simm8
 690     // call  *simm8(%esi)         FF56   simm8
 691     // call  *simm8(%edi)         FF57   simm8
 692     //
 693     // call  *simm32(%eax)        FF90   simm32
 694     // call  *simm32(%ecx)        FF91   simm32
 695     // call  *simm32(%edx)        FF92   simm32
 696     // call  *simm32(%ebx)        FF93   simm32
 697     // call  *simm32(%esp)        FF9424 simm32
 698     // call  *simm32(%ebp)        FF95   simm32
 699     // call  *simm32(%esi)        FF96   simm32
 700     // call  *simm32(%edi)        FF97   simm32
 701     if (ia - 3 >= insns_min &&
 702         p[-3] == 0xFF &&
 703         (p[-2] >= 0x50 && p[-2] <= 0x57 && p[-2] != 0x54)) {
 704       // imm8 case, not including %esp/%rsp
 705       return true;
 706     }
 707     if (ia - 4 >= insns_min &&
 708         p[-4] == 0xFF && p[-3] == 0x54 && p[-2] == 0x24) {
 709       // imm8 case for %esp/%rsp
 710       return true;
 711     }
 712     if (ia - 6 >= insns_min &&
 713         p[-6] == 0xFF &&
 714         (p[-5] >= 0x90 && p[-5] <= 0x97 && p[-5] != 0x94)) {
 715       // imm32 case, not including %esp/%rsp
 716       return true;
 717     }
 718     if (ia - 7 >= insns_min &&
 719         p[-7] == 0xFF && p[-6] == 0x94 && p[-5] == 0x24) {
 720       // imm32 case for %esp/%rsp
 721       return true;
 722     }
 723
 724 #elif defined(LUL_ARCH_arm)
 725     if (ia & 1) {
 726       uint16_t w0 = 0, w1 = 0;
 727       // The return address has its lowest bit set, indicating a return
 728       // to Thumb code.
 729       ia &= ~(uintptr_t)1;
 730       if (ia - 2 >= insns_min && ia - 1 <= insns_max) {
 731         w1 = *(uint16_t*)(ia - 2);
 732       }
 733       if (ia - 4 >= insns_min && ia - 1 <= insns_max) {
 734         w0 = *(uint16_t*)(ia - 4);
 735       }
 736       // Is it a 32-bit Thumb call insn?
 737       // BL  simm26 (Encoding T1)
 738       if ((w0 & 0xF800) == 0xF000 && (w1 & 0xC000) == 0xC000) {
 739         return true;
 740       }
 741       // BLX simm26 (Encoding T2)
 742       if ((w0 & 0xF800) == 0xF000 && (w1 & 0xC000) == 0xC000) {
 743         return true;
 744       }
 745       // Other possible cases:
 746       // (BLX Rm, Encoding T1).
 747       // BLX Rm (encoding T1, 16 bit, inspect w1 and ignore w0.)
 748       // 0100 0111 1 Rm 000
 749     } else {
 750       // Returning to ARM code.
 751       uint32_t a0 = 0;
 752       if ((ia & 3) == 0 && ia - 4 >= insns_min && ia - 1 <= insns_max) {
 753         a0 = *(uint32_t*)(ia - 4);
 754       }
 755       // Leading E forces unconditional only -- fix.  It could be
 756       // anything except F, which is the deprecated NV code.
 757       // BL simm26 (Encoding A1)
 758       if ((a0 & 0xFF000000) == 0xEB000000) {
 759         return true;
 760       }
 761       // Other possible cases:
 762       // BLX simm26 (Encoding A2)
 763       //if ((a0 & 0xFE000000) == 0xFA000000)
 764       //  return true;
 765       // BLX (register) (A1): BLX <c> <Rm>
 766       // cond 0001 0010 1111 1111 1111 0011 Rm
 767       // again, cond can be anything except NV (0xF)
 768     }
 769
 770 #else
 771 # error "Unsupported arch"
 772 #endif
 773
 774     // Not an insn we recognise.
 775     return false;
 776   }
 777
 778  private:
 779   // FindSecMap's caller must hold the global lock for reading or writing.
 780   SecMap* FindSecMap(uintptr_t ia) {
 781     // Binary search mSecMaps to find one that brackets |ia|.
 782     // lo and hi need to be signed, else the loop termination tests
 783     // don't work properly.
 784     long int lo = 0;
 785     long int hi = (long int)mSecMaps.size() - 1;
 786     while (true) {
 787       // current unsearched space is from lo to hi, inclusive.
 788       if (lo > hi) {
 789         // not found
 790         return nullptr;
 791       }
 792       long int  mid         = lo + ((hi - lo) / 2);
 793       SecMap*   mid_secMap  = mSecMaps[mid];
 794       uintptr_t mid_minAddr = mid_secMap->mSummaryMinAddr;
 795       uintptr_t mid_maxAddr = mid_secMap->mSummaryMaxAddr;
 796       if (ia < mid_minAddr) { hi = mid-1; continue; }
 797       if (ia > mid_maxAddr) { lo = mid+1; continue; }
 798       MOZ_ASSERT(mid_minAddr <= ia && ia <= mid_maxAddr);
 799       return mid_secMap;
 800     }
 801     // NOTREACHED
 802   }
 803
 804  private:
 805   // sorted array of per-object ranges, non overlapping, non empty
 806   std::vector<SecMap*> mSecMaps;
 807
 808   // a logging sink, for debugging.
 809   void (*mLog)(const char*);
 810 };
 811
 812
 813 ////////////////////////////////////////////////////////////////
 814 // CFICache                                                   //
 815 ////////////////////////////////////////////////////////////////
 816
 817 // This is the thread-local cache.  It maps individual insn AVMAs to
 818 // the associated CFI record, which live in LUL::mPriMap.
 819 //
 820 // The cache is a direct map hash table indexed by address.
 821 // It has to distinguish 3 cases:
 822 //
 823 // (1) .mRSet == (RuleSet*)0  ==> cache slot not in use
 824 // (2) .mRSet == (RuleSet*)1  ==> slot in use, no RuleSet avail
 825 // (3) .mRSet >  (RuleSet*)1  ==> slot in use, RuleSet* available
 826 //
 827 // Distinguishing between (2) and (3) is important, because if we look
 828 // up an address in LUL::mPriMap and find there is no RuleSet, then
 829 // that fact needs to cached here, so as to avoid potentially
 830 // expensive repeat lookups.
 831
 832 // A CFICacheEntry::mRSet value of zero indicates that the slot is not
 833 // in use, and a value of one indicates that the slot is in use but
 834 // there is no RuleSet available.
 835 #define ENTRY_NOT_IN_USE      ((RuleSet*)0)
 836 #define NO_RULESET_AVAILABLE  ((RuleSet*)1)
 837
 838 class CFICache {
 839  public:
 840
 841   CFICache(PriMap* aPriMap) {
 842     Invalidate();
 843     mPriMap = aPriMap;
 844   }
 845
 846   void Invalidate() {
 847     for (int i = 0; i < N_ENTRIES; ++i) {
 848       mCache[i].mAVMA = 0;
 849       mCache[i].mRSet = ENTRY_NOT_IN_USE;
 850     }
 851   }
 852
 853   RuleSet* Lookup(uintptr_t ia) {
 854     uintptr_t hash = ia % (uintptr_t)N_ENTRIES;
 855     CFICacheEntry* ce = &mCache[hash];
 856     if (ce->mAVMA == ia) {
 857       // The cache has an entry for |ia|.  Interpret it.
 858       if (ce->mRSet > NO_RULESET_AVAILABLE) {
 859         // There's a RuleSet.  So return it.
 860         return ce->mRSet;
 861       }
 862       if (ce->mRSet == NO_RULESET_AVAILABLE) {
 863         // There's no RuleSet for this address.  Don't update
 864         // the cache, since we might get queried again.
 865         return nullptr;
 866       }
 867       // Otherwise, the slot is not in use.  Fall through to
 868       // the 'miss' case.
 869     }
 870
 871     // The cache entry is for some other address, or is not in use.
 872     // Update it.  If it can be found in the priMap then install it
 873     // as-is.  Else put NO_RULESET_AVAILABLE in, so as to indicate
 874     // there's no info for this address.
 875     RuleSet* fallback  = mPriMap->Lookup(ia);
 876     mCache[hash].mAVMA = ia;
 877     mCache[hash].mRSet = fallback ? fallback : NO_RULESET_AVAILABLE;
 878     return fallback;
 879   }
 880
 881  private:
 882   // This should be a prime number.
 883   static const int N_ENTRIES = 509;
 884
 885   // See comment above for the meaning of these entries.
 886   struct CFICacheEntry {
 887     uintptr_t mAVMA; // AVMA of the associated instruction
 888     RuleSet*  mRSet; // RuleSet* for the instruction
 889   };
 890   CFICacheEntry mCache[N_ENTRIES];
 891
 892   // Need to have a pointer to the PriMap, so as to be able
 893   // to service misses.
 894   PriMap* mPriMap;
 895 };
 896
 897 #undef ENTRY_NOT_IN_USE
 898 #undef NO_RULESET_AVAILABLE
 899
 900
 901 ////////////////////////////////////////////////////////////////
 902 // LUL                                                        //
 903 ////////////////////////////////////////////////////////////////
 904
 905 LUL::LUL(void (*aLog)(const char*))
 906 {
 907   mRWlock = new LulRWLock();
 908   AutoLulRWLocker lock(mRWlock, AutoLulRWLocker::FOR_WRITING);
 909   mLog = aLog;
 910   mPriMap = new PriMap(aLog);
 911   mSegArray = new SegArray();
 912 }
 913
 914
 915 LUL::~LUL()
 916 {
 917   // The auto-locked section must have its own scope, so that the
 918   // unlock is performed before the mRWLock is deleted.
 919   {
 920     AutoLulRWLocker lock(mRWlock, AutoLulRWLocker::FOR_WRITING);
 921     for (std::map<pthread_t,CFICache*>::iterator iter = mCaches.begin();
 922          iter != mCaches.end();
 923          ++iter) {
 924       delete iter->second;
 925     }
 926     delete mPriMap;
 927     delete mSegArray;
 928     mLog = nullptr;
 929   }
 930   // Now we don't hold the lock.  Hence it is safe to delete it.
 931   delete mRWlock;
 932 }
 933
 934
 935 void
 936 LUL::RegisterUnwinderThread()
 937 {
 938   AutoLulRWLocker lock(mRWlock, AutoLulRWLocker::FOR_WRITING);
 939
 940   pthread_t me = pthread_self();
 941   CFICache* cache = new CFICache(mPriMap);
 942
 943   std::pair<std::map<pthread_t,CFICache*>::iterator, bool> res
 944     = mCaches.insert(std::pair<pthread_t,CFICache*>(me, cache));
 945   // "this thread is not already registered"
 946   MOZ_ASSERT(res.second); // "new element was inserted"
 947   // Using mozilla::DebugOnly to declare |res| leads to compilation error
 948   (void)res.second;
 949 }
 950
 951 void
 952 LUL::NotifyAfterMap(uintptr_t aRXavma, size_t aSize,
 953                     const char* aFileName, const void* aMappedImage)
 954 {
 955   AutoLulRWLocker lock(mRWlock, AutoLulRWLocker::FOR_WRITING);
 956
 957   mLog(":\n");
 958   char buf[200];
 959   snprintf(buf, sizeof(buf), "NotifyMap %llx %llu %s\n",
 960            (unsigned long long int)aRXavma, (unsigned long long int)aSize,
 961            aFileName);
 962   buf[sizeof(buf)-1] = 0;
 963   mLog(buf);
 964
 965   InvalidateCFICaches();
 966
 967   // Ignore obviously-stupid notifications.
 968   if (aSize > 0) {
 969
 970     // Here's a new mapping, for this object.
 971     SecMap* smap = new SecMap(mLog);
 972
 973     // Read CFI or EXIDX unwind data into |smap|.
 974     if (!aMappedImage) {
 975       (void)lul::ReadSymbolData(
 976               string(aFileName), std::vector<string>(), smap,
 977               (void*)aRXavma, mLog);
 978     } else {
 979       (void)lul::ReadSymbolDataInternal(
 980               (const uint8_t*)aMappedImage,
 981               string(aFileName), std::vector<string>(), smap,
 982               (void*)aRXavma, mLog);
 983     }
 984
 985     mLog("NotifyMap .. preparing entries\n");
 986
 987     smap->PrepareRuleSets(aRXavma, aSize);
 988
 989     snprintf(buf, sizeof(buf),
 990              "NotifyMap got %lld entries\n", (long long int)smap->Size());
 991     buf[sizeof(buf)-1] = 0;
 992     mLog(buf);
 993
 994     // Add it to the primary map (the top level set of mapped objects).
 995     mPriMap->AddSecMap(smap);
 996
 997     // Tell the segment array about the mapping, so that the stack
 998     // scan and __kernel_syscall mechanisms know where valid code is.
 999     mSegArray->add(aRXavma, aRXavma + aSize - 1, true);
1000   }
1001 }
1002
1003
1004 void
1005 LUL::NotifyExecutableArea(uintptr_t aRXavma, size_t aSize)
1006 {
1007   AutoLulRWLocker lock(mRWlock, AutoLulRWLocker::FOR_WRITING);
1008
1009   mLog(":\n");
1010   char buf[200];
1011   snprintf(buf, sizeof(buf), "NotifyExecutableArea %llx %llu\n",
1012            (unsigned long long int)aRXavma, (unsigned long long int)aSize);
1013   buf[sizeof(buf)-1] = 0;
1014   mLog(buf);
1015
1016   InvalidateCFICaches();
1017
1018   // Ignore obviously-stupid notifications.
1019   if (aSize > 0) {
1020     // Tell the segment array about the mapping, so that the stack
1021     // scan and __kernel_syscall mechanisms know where valid code is.
1022     mSegArray->add(aRXavma, aRXavma + aSize - 1, true);
1023   }
1024 }
1025
1026
1027 void
1028 LUL::NotifyBeforeUnmap(uintptr_t aRXavmaMin, uintptr_t aRXavmaMax)
1029 {
1030   AutoLulRWLocker lock(mRWlock, AutoLulRWLocker::FOR_WRITING);
1031
1032   mLog(":\n");
1033   char buf[100];
1034   snprintf(buf, sizeof(buf), "NotifyUnmap %016llx-%016llx\n",
1035            (unsigned long long int)aRXavmaMin,
1036            (unsigned long long int)aRXavmaMax);
1037   buf[sizeof(buf)-1] = 0;
1038   mLog(buf);
1039
1040   MOZ_ASSERT(aRXavmaMin <= aRXavmaMax);
1041
1042   InvalidateCFICaches();
1043
1044   // Remove from the primary map, any secondary maps that intersect
1045   // with the address range.  Also delete the secondary maps.
1046   mPriMap->RemoveSecMapsInRange(aRXavmaMin, aRXavmaMax);
1047
1048   // Tell the segment array that the address range no longer
1049   // contains valid code.
1050   mSegArray->add(aRXavmaMin, aRXavmaMax, false);
1051
1052   snprintf(buf, sizeof(buf), "NotifyUnmap: now have %d SecMaps\n",
1053            (int)mPriMap->CountSecMaps());
1054   buf[sizeof(buf)-1] = 0;
1055   mLog(buf);
1056 }
1057
1058
1059 size_t
1060 LUL::CountMappings()
1061 {
1062   AutoLulRWLocker lock(mRWlock, AutoLulRWLocker::FOR_WRITING);
1063   return mPriMap->CountSecMaps();
1064 }
1065
1066
1067 static
1068 TaggedUWord DerefTUW(TaggedUWord aAddr, StackImage* aStackImg)
1069 {
1070   if (!aAddr.Valid()) {
1071     return TaggedUWord();
1072   }
1073   if (aAddr.Value() < aStackImg->mStartAvma) {
1074     return TaggedUWord();
1075   }
1076   if (aAddr.Value() + sizeof(uintptr_t) > aStackImg->mStartAvma
1077                                           + aStackImg->mLen) {
1078     return TaggedUWord();
1079   }
1080   return TaggedUWord(*(uintptr_t*)(aStackImg->mContents + aAddr.Value()
1081                                    - aStackImg->mStartAvma));
1082 }
1083
1084 static
1085 TaggedUWord EvaluateReg(int16_t aReg, UnwindRegs* aOldRegs, TaggedUWord aCFA)
1086 {
1087   switch (aReg) {
1088     case DW_REG_CFA:       return aCFA;
1089 #if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
1090     case DW_REG_INTEL_XBP: return aOldRegs->xbp;
1091     case DW_REG_INTEL_XSP: return aOldRegs->xsp;
1092     case DW_REG_INTEL_XIP: return aOldRegs->xip;
1093 #elif defined(LUL_ARCH_arm)
1094     case DW_REG_ARM_R7:    return aOldRegs->r7;
1095     case DW_REG_ARM_R11:   return aOldRegs->r11;
1096     case DW_REG_ARM_R12:   return aOldRegs->r12;
1097     case DW_REG_ARM_R13:   return aOldRegs->r13;
1098     case DW_REG_ARM_R14:   return aOldRegs->r14;
1099     case DW_REG_ARM_R15:   return aOldRegs->r15;
1100 #else
1101 # error "Unsupported arch"
1102 #endif
1103     default: MOZ_ASSERT(0); return TaggedUWord();
1104   }
1105 }
1106
1107 static
1108 TaggedUWord EvaluateExpr(LExpr aExpr, UnwindRegs* aOldRegs,
1109                          TaggedUWord aCFA, StackImage* aStackImg)
1110 {
1111   switch (aExpr.mHow) {
1112     case LExpr::UNKNOWN:
1113       return TaggedUWord();
1114     case LExpr::NODEREF: {
1115       TaggedUWord tuw = EvaluateReg(aExpr.mReg, aOldRegs, aCFA);
1116       tuw.Add(TaggedUWord((intptr_t)aExpr.mOffset));
1117       return tuw;
1118     }
1119     case LExpr::DEREF: {
1120       TaggedUWord tuw = EvaluateReg(aExpr.mReg, aOldRegs, aCFA);
1121       tuw.Add(TaggedUWord((intptr_t)aExpr.mOffset));
1122       return DerefTUW(tuw, aStackImg);
1123     }
1124     default:
1125       MOZ_ASSERT(0);
1126       return TaggedUWord();
1127   }
1128 }
1129
1130 static
1131 void UseRuleSet(/*MOD*/UnwindRegs* aRegs,
1132                 StackImage* aStackImg, RuleSet* aRS)
1133 {
1134   // Take a copy of regs, since we'll need to refer to the old values
1135   // whilst computing the new ones.
1136   UnwindRegs old_regs = *aRegs;
1137
1138   // Mark all the current register values as invalid, so that the
1139   // caller can see, on our return, which ones have been computed
1140   // anew.  If we don't even manage to compute a new PC value, then
1141   // the caller will have to abandon the unwind.
1142   // FIXME: Create and use instead: aRegs->SetAllInvalid();
1143 #if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
1144   aRegs->xbp = TaggedUWord();
1145   aRegs->xsp = TaggedUWord();
1146   aRegs->xip = TaggedUWord();
1147 #elif defined(LUL_ARCH_arm)
1148   aRegs->r7  = TaggedUWord();
1149   aRegs->r11 = TaggedUWord();
1150   aRegs->r12 = TaggedUWord();
1151   aRegs->r13 = TaggedUWord();
1152   aRegs->r14 = TaggedUWord();
1153   aRegs->r15 = TaggedUWord();
1154 #else
1155 #  error "Unsupported arch"
1156 #endif
1157
1158   // This is generally useful.
1159   const TaggedUWord inval = TaggedUWord();
1160
1161   // First, compute the CFA.
1162   TaggedUWord cfa = EvaluateExpr(aRS->mCfaExpr, &old_regs,
1163                                  inval/*old cfa*/, aStackImg);
1164
1165   // If we didn't manage to compute the CFA, well .. that's ungood,
1166   // but keep going anyway.  It'll be OK provided none of the register
1167   // value rules mention the CFA.  In any case, compute the new values
1168   // for each register that we're tracking.
1169
1170 #if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
1171   aRegs->xbp = EvaluateExpr(aRS->mXbpExpr, &old_regs, cfa, aStackImg);
1172   aRegs->xsp = EvaluateExpr(aRS->mXspExpr, &old_regs, cfa, aStackImg);
1173   aRegs->xip = EvaluateExpr(aRS->mXipExpr, &old_regs, cfa, aStackImg);
1174 #elif defined(LUL_ARCH_arm)
1175   aRegs->r7  = EvaluateExpr(aRS->mR7expr,  &old_regs, cfa, aStackImg);
1176   aRegs->r11 = EvaluateExpr(aRS->mR11expr, &old_regs, cfa, aStackImg);
1177   aRegs->r12 = EvaluateExpr(aRS->mR12expr, &old_regs, cfa, aStackImg);
1178   aRegs->r13 = EvaluateExpr(aRS->mR13expr, &old_regs, cfa, aStackImg);
1179   aRegs->r14 = EvaluateExpr(aRS->mR14expr, &old_regs, cfa, aStackImg);
1180   aRegs->r15 = EvaluateExpr(aRS->mR15expr, &old_regs, cfa, aStackImg);
1181 #else
1182 # error "Unsupported arch"
1183 #endif
1184
1185   // We're done.  Any regs for which we didn't manage to compute a
1186   // new value will now be marked as invalid.
1187 }
1188
1189 void
1190 LUL::Unwind(/*OUT*/uintptr_t* aFramePCs,
1191             /*OUT*/uintptr_t* aFrameSPs,
1192             /*OUT*/size_t* aFramesUsed,
1193             /*OUT*/size_t* aScannedFramesAcquired,
1194             size_t aFramesAvail,
1195             size_t aScannedFramesAllowed,
1196             UnwindRegs* aStartRegs, StackImage* aStackImg)
1197 {
1198   AutoLulRWLocker lock(mRWlock, AutoLulRWLocker::FOR_READING);
1199
1200   pthread_t me = pthread_self();
1201   std::map<pthread_t, CFICache*>::iterator iter = mCaches.find(me);
1202
1203   if (iter == mCaches.end()) {
1204     // The calling thread is not registered for unwinding.
1205     MOZ_CRASH();
1206     return;
1207   }
1208
1209   CFICache* cache = iter->second;
1210   MOZ_ASSERT(cache);
1211
1212   // Do unwindery, possibly modifying |cache|.
1213
1214   /////////////////////////////////////////////////////////
1215   // BEGIN UNWIND
1216
1217   *aFramesUsed = 0;
1218
1219   UnwindRegs  regs          = *aStartRegs;
1220   TaggedUWord last_valid_sp = TaggedUWord();
1221
1222   // Stack-scan control
1223   unsigned int n_scanned_frames      = 0;  // # s-s frames recovered so far
1224   static const int NUM_SCANNED_WORDS = 50; // max allowed scan length
1225
1226   while (true) {
1227
1228     if (DEBUG_MAIN) {
1229       char buf[300];
1230       mLog("\n");
1231 #if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
1232       snprintf(buf, sizeof(buf),
1233                "LoopTop: rip %d/%llx  rsp %d/%llx  rbp %d/%llx\n",
1234                (int)regs.xip.Valid(), (unsigned long long int)regs.xip.Value(),
1235                (int)regs.xsp.Valid(), (unsigned long long int)regs.xsp.Value(),
1236                (int)regs.xbp.Valid(), (unsigned long long int)regs.xbp.Value());
1237       buf[sizeof(buf)-1] = 0;
1238       mLog(buf);
1239 #elif defined(LUL_ARCH_arm)
1240       snprintf(buf, sizeof(buf),
1241                "LoopTop: r15 %d/%llx  r7 %d/%llx  r11 %d/%llx"
1242                "  r12 %d/%llx  r13 %d/%llx  r14 %d/%llx\n",
1243                (int)regs.r15.Valid(), (unsigned long long int)regs.r15.Value(),
1244                (int)regs.r7.Valid(),  (unsigned long long int)regs.r7.Value(),
1245                (int)regs.r11.Valid(), (unsigned long long int)regs.r11.Value(),
1246                (int)regs.r12.Valid(), (unsigned long long int)regs.r12.Value(),
1247                (int)regs.r13.Valid(), (unsigned long long int)regs.r13.Value(),
1248                (int)regs.r14.Valid(), (unsigned long long int)regs.r14.Value());
1249       buf[sizeof(buf)-1] = 0;
1250       mLog(buf);
1251 #else
1252 # error "Unsupported arch"
1253 #endif
1254     }
1255
1256 #if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
1257     TaggedUWord ia = regs.xip;
1258     TaggedUWord sp = regs.xsp;
1259 #elif defined(LUL_ARCH_arm)
1260     TaggedUWord ia = (*aFramesUsed == 0 ? regs.r15 : regs.r14);
1261     TaggedUWord sp = regs.r13;
1262 #else
1263 # error "Unsupported arch"
1264 #endif
1265
1266     if (*aFramesUsed >= aFramesAvail) {
1267       break;
1268     }
1269
1270     // If we don't have a valid value for the PC, give up.
1271     if (!ia.Valid()) {
1272       break;
1273     }
1274
1275     // If this is the innermost frame, record the SP value, which
1276     // presumably is valid.  If this isn't the innermost frame, and we
1277     // have a valid SP value, check that its SP value isn't less that
1278     // the one we've seen so far, so as to catch potential SP value
1279     // cycles.
1280     if (*aFramesUsed == 0) {
1281       last_valid_sp = sp;
1282     } else {
1283       MOZ_ASSERT(last_valid_sp.Valid());
1284       if (sp.Valid()) {
1285         if (sp.Value() < last_valid_sp.Value()) {
1286           // Hmm, SP going in the wrong direction.  Let's stop.
1287           break;
1288         }
1289         // Remember where we got to.
1290         last_valid_sp = sp;
1291       }
1292     }
1293
1294     // For the innermost frame, the IA value is what we need.  For all
1295     // other frames, it's actually the return address, so back up one
1296     // byte so as to get it into the calling instruction.
1297     aFramePCs[*aFramesUsed] = ia.Value() - (*aFramesUsed == 0 ? 0 : 1);
1298     aFrameSPs[*aFramesUsed] = sp.Valid() ? sp.Value() : 0;
1299     (*aFramesUsed)++;
1300
1301     // Find the RuleSet for the current IA, if any.  This will also
1302     // query the backing (secondary) maps if it isn't found in the
1303     // thread-local cache.
1304
1305     // If this isn't the innermost frame, back up into the calling insn.
1306     if (*aFramesUsed > 1) {
1307       ia.Add(TaggedUWord((uintptr_t)(-1)));
1308     }
1309
1310     RuleSet* ruleset = cache->Lookup(ia.Value());
1311     if (DEBUG_MAIN) {
1312       char buf[100];
1313       snprintf(buf, sizeof(buf), "ruleset for 0x%llx = %p\n",
1314                (unsigned long long int)ia.Value(), ruleset);
1315       buf[sizeof(buf)-1] = 0;
1316       mLog(buf);
1317     }
1318
1319     /////////////////////////////////////////////
1320     ////
1321     // On 32 bit x86-linux, syscalls are often done via the VDSO
1322     // function __kernel_vsyscall, which doesn't have a corresponding
1323     // object that we can read debuginfo from.  That effectively kills
1324     // off all stack traces for threads blocked in syscalls.  Hence
1325     // special-case by looking at the code surrounding the program
1326     // counter.
1327     //
1328     // 0xf7757420 <__kernel_vsyscall+0>:        push   %ecx
1329     // 0xf7757421 <__kernel_vsyscall+1>:        push   %edx
1330     // 0xf7757422 <__kernel_vsyscall+2>:        push   %ebp
1331     // 0xf7757423 <__kernel_vsyscall+3>:        mov    %esp,%ebp
1332     // 0xf7757425 <__kernel_vsyscall+5>:        sysenter
1333     // 0xf7757427 <__kernel_vsyscall+7>:        nop
1334     // 0xf7757428 <__kernel_vsyscall+8>:        nop
1335     // 0xf7757429 <__kernel_vsyscall+9>:        nop
1336     // 0xf775742a <__kernel_vsyscall+10>:       nop
1337     // 0xf775742b <__kernel_vsyscall+11>:       nop
1338     // 0xf775742c <__kernel_vsyscall+12>:       nop
1339     // 0xf775742d <__kernel_vsyscall+13>:       nop
1340     // 0xf775742e <__kernel_vsyscall+14>:       int    $0x80
1341     // 0xf7757430 <__kernel_vsyscall+16>:       pop    %ebp
1342     // 0xf7757431 <__kernel_vsyscall+17>:       pop    %edx
1343     // 0xf7757432 <__kernel_vsyscall+18>:       pop    %ecx
1344     // 0xf7757433 <__kernel_vsyscall+19>:       ret
1345     //
1346     // In cases where the sampled thread is blocked in a syscall, its
1347     // program counter will point at "pop %ebp".  Hence we look for
1348     // the sequence "int $0x80; pop %ebp; pop %edx; pop %ecx; ret", and
1349     // the corresponding register-recovery actions are:
1350     //    new_ebp = *(old_esp + 0)
1351     //    new eip = *(old_esp + 12)
1352     //    new_esp = old_esp + 16
1353     //
1354     // It may also be the case that the program counter points two
1355     // nops before the "int $0x80", viz, is __kernel_vsyscall+12, in
1356     // the case where the syscall has been restarted but the thread
1357     // hasn't been rescheduled.  The code below doesn't handle that;
1358     // it could easily be made to.
1359     //
1360 #if defined(LUL_PLAT_x86_android) || defined(LUL_PLAT_x86_linux)
1361     if (!ruleset && *aFramesUsed == 1 && ia.Valid() && sp.Valid()) {
1362       uintptr_t insns_min, insns_max;
1363       uintptr_t eip = ia.Value();
1364       bool b = mSegArray->getBoundingCodeSegment(&insns_min, &insns_max, eip);
1365       if (b && eip - 2 >= insns_min && eip + 3 <= insns_max) {
1366         uint8_t* eipC = (uint8_t*)eip;
1367         if (eipC[-2] == 0xCD && eipC[-1] == 0x80 && eipC[0] == 0x5D &&
1368             eipC[1] == 0x5A && eipC[2] == 0x59 && eipC[3] == 0xC3) {
1369           TaggedUWord sp_plus_0  = sp;
1370           TaggedUWord sp_plus_12 = sp;
1371           TaggedUWord sp_plus_16 = sp;
1372           sp_plus_12.Add(TaggedUWord(12));
1373           sp_plus_16.Add(TaggedUWord(16));
1374           TaggedUWord new_ebp = DerefTUW(sp_plus_0, aStackImg);
1375           TaggedUWord new_eip = DerefTUW(sp_plus_12, aStackImg);
1376           TaggedUWord new_esp = sp_plus_16;
1377           if (new_ebp.Valid() && new_eip.Valid() && new_esp.Valid()) {
1378             regs.xbp = new_ebp;
1379             regs.xip = new_eip;
1380             regs.xsp = new_esp;
1381             continue;
1382           }
1383         }
1384       }
1385     }
1386 #endif
1387     ////
1388     /////////////////////////////////////////////
1389
1390     // So, do we have a ruleset for this address?  If so, use it now.
1391     if (ruleset) {
1392
1393       if (DEBUG_MAIN) {
1394         ruleset->Print(mLog); mLog("\n");
1395       }
1396       // Use the RuleSet to compute the registers for the previous
1397       // frame.  |regs| is modified in-place.
1398       UseRuleSet(&regs, aStackImg, ruleset);
1399
1400     } else {
1401
1402       // There's no RuleSet for the specified address, so see if
1403       // it's possible to get anywhere by stack-scanning.
1404
1405       // Use stack scanning frugally.
1406       if (n_scanned_frames++ >= aScannedFramesAllowed) {
1407         break;
1408       }
1409
1410       // We can't scan the stack without a valid, aligned stack pointer.
1411       if (!sp.IsAligned()) {
1412         break;
1413       }
1414
1415       bool scan_succeeded = false;
1416       for (int i = 0; i < NUM_SCANNED_WORDS; ++i) {
1417         TaggedUWord aWord = DerefTUW(sp, aStackImg);
1418         // aWord is something we fished off the stack.  It should be
1419         // valid, unless we overran the stack bounds.
1420         if (!aWord.Valid()) {
1421           break;
1422         }
1423
1424         // Now, does aWord point inside a text section and immediately
1425         // after something that looks like a call instruction?
1426         if (mPriMap->MaybeIsReturnPoint(aWord, mSegArray)) {
1427           // Yes it does.  Update the unwound registers heuristically,
1428           // using the same schemes as Breakpad does.
1429           scan_succeeded = true;
1430           (*aScannedFramesAcquired)++;
1431
1432 #if defined(LUL_ARCH_x64) || defined(LUL_ARCH_x86)
1433           // The same logic applies for the 32- and 64-bit cases.
1434           // Register names of the form xsp etc refer to (eg) esp in
1435           // the 32-bit case and rsp in the 64-bit case.
1436 #         if defined(LUL_ARCH_x64)
1437           const int wordSize = 8;
1438 #         else
1439           const int wordSize = 4;
1440 #         endif
1441           // The return address -- at XSP -- will have been pushed by
1442           // the CALL instruction.  So the caller's XSP value
1443           // immediately before and after that CALL instruction is the
1444           // word above XSP.
1445           regs.xsp = sp;
1446           regs.xsp.Add(TaggedUWord(wordSize));
1447
1448           // aWord points at the return point, so back up one byte
1449           // to put it in the calling instruction.
1450           regs.xip = aWord;
1451           regs.xip.Add(TaggedUWord((uintptr_t)(-1)));
1452
1453           // Computing a new value from the frame pointer is more tricky.
1454           if (regs.xbp.Valid() &&
1455               sp.Valid() && regs.xbp.Value() == sp.Value() - wordSize) {
1456             // One possibility is that the callee begins with the standard
1457             // preamble "push %xbp; mov %xsp, %xbp".  In which case, the
1458             // (1) caller's XBP value will be at the word below XSP, and
1459             // (2) the current (callee's) XBP will point at that word:
1460             regs.xbp = DerefTUW(regs.xbp, aStackImg);
1461           } else if (regs.xbp.Valid() &&
1462                      sp.Valid() && regs.xbp.Value() >= sp.Value() + wordSize) {
1463             // If that didn't work out, maybe the callee didn't change
1464             // XBP, so it still holds the caller's value.  For that to
1465             // be plausible, XBP will need to have a value at least
1466             // higher than XSP since that holds the purported return
1467             // address.  In which case do nothing, since XBP already
1468             // holds the "right" value.
1469           } else {
1470             // Mark XBP as invalid, so that subsequent unwind iterations
1471             // don't assume it holds valid data.
1472             regs.xbp = TaggedUWord();
1473           }
1474
1475           // Move on to the next word up the stack
1476           sp.Add(TaggedUWord(wordSize));
1477
1478 #elif defined(LUL_ARCH_arm)
1479           // Set all registers to be undefined, except for SP(R13) and
1480           // PC(R15).
1481
1482           // aWord points either at the return point, if returning to
1483           // ARM code, or one insn past the return point if returning
1484           // to Thumb code.  In both cases, aWord-2 is guaranteed to
1485           // fall within the calling instruction.
1486           regs.r15 = aWord;
1487           regs.r15.Add(TaggedUWord((uintptr_t)(-2)));
1488
1489           // Make SP be the word above the location where the return
1490           // address was found.
1491           regs.r13 = sp;
1492           regs.r13.Add(TaggedUWord(4));
1493
1494           // All other regs are undefined.
1495           regs.r7 = regs.r11 = regs.r12 = regs.r14 = TaggedUWord();
1496
1497           // Move on to the next word up the stack
1498           sp.Add(TaggedUWord(4));
1499
1500 #else
1501 # error "Unknown plat"
1502 #endif
1503
1504           break;
1505         }
1506
1507       } // for (int i = 0; i < NUM_SCANNED_WORDS; i++)
1508
1509       // We tried to make progress by scanning the stack, but failed.
1510       // So give up -- fall out of the top level unwind loop.
1511       if (!scan_succeeded) {
1512         break;
1513       }
1514     }
1515
1516   } // top level unwind loop
1517
1518   // END UNWIND
1519   /////////////////////////////////////////////////////////
1520 }
1521
1522
1523 void
1524 LUL::InvalidateCFICaches()
1525 {
1526   // NB: the caller must hold m_rwl for writing.
1527
1528   // FIXME: this could get expensive.  Use a bool to remember when the
1529   // caches have been invalidated and hence avoid duplicate invalidations.
1530   for (std::map<pthread_t,CFICache*>::iterator iter = mCaches.begin();
1531        iter != mCaches.end();
1532        ++iter) {
1533     iter->second->Invalidate();
1534   }
1535 }
1536
1537
1538 ////////////////////////////////////////////////////////////////
1539 // LUL Unit Testing                                           //
1540 ////////////////////////////////////////////////////////////////
1541
1542 static const int LUL_UNIT_TEST_STACK_SIZE = 16384;
1543
1544 // This function is innermost in the test call sequence.  It uses LUL
1545 // to unwind, and compares the result with the sequence specified in
1546 // the director string.  These need to agree in order for the test to
1547 // pass.  In order not to screw up the results, this function needs
1548 // to have a not-very big stack frame, since we're only presenting
1549 // the innermost LUL_UNIT_TEST_STACK_SIZE bytes of stack to LUL, and
1550 // that chunk unavoidably includes the frame for this function.
1551 //
1552 // This function must not be inlined into its callers.  Doing so will
1553 // cause the expected-vs-actual backtrace consistency checking to
1554 // fail.  Prints summary results to |aLUL|'s logging sink and also
1555 // returns a boolean indicating whether or not the test passed.
1556 static __attribute__((noinline))
1557 bool GetAndCheckStackTrace(LUL* aLUL, const char* dstring)
1558 {
1559   // Get hold of the current unwind-start registers.
1560   UnwindRegs startRegs;
1561   memset(&startRegs, 0, sizeof(startRegs));
1562 #if defined(LUL_PLAT_x64_linux)
1563   volatile uintptr_t block[3];
1564   MOZ_ASSERT(sizeof(block) == 24);
1565   __asm__ __volatile__(
1566     "leaq 0(%%rip), %%r15"   "\n\t"
1567     "movq %%r15, 0(%0)"      "\n\t"
1568     "movq %%rsp, 8(%0)"      "\n\t"
1569     "movq %%rbp, 16(%0)"     "\n"
1570     : : "r"(&block[0]) : "memory", "r15"
1571   );
1572   startRegs.xip = TaggedUWord(block[0]);
1573   startRegs.xsp = TaggedUWord(block[1]);
1574   startRegs.xbp = TaggedUWord(block[2]);
1575   const uintptr_t REDZONE_SIZE = 128;
1576   uintptr_t start = block[1] - REDZONE_SIZE;
1577 #elif defined(LUL_PLAT_x86_linux) || defined(LUL_PLAT_x86_android)
1578   volatile uintptr_t block[3];
1579   MOZ_ASSERT(sizeof(block) == 12);
1580   __asm__ __volatile__(
1581     ".byte 0xE8,0x00,0x00,0x00,0x00"/*call next insn*/  "\n\t"
1582     "popl %%edi"             "\n\t"
1583     "movl %%edi, 0(%0)"      "\n\t"
1584     "movl %%esp, 4(%0)"      "\n\t"
1585     "movl %%ebp, 8(%0)"      "\n"
1586     : : "r"(&block[0]) : "memory", "edi"
1587   );
1588   startRegs.xip = TaggedUWord(block[0]);
1589   startRegs.xsp = TaggedUWord(block[1]);
1590   startRegs.xbp = TaggedUWord(block[2]);
1591   const uintptr_t REDZONE_SIZE = 0;
1592   uintptr_t start = block[1] - REDZONE_SIZE;
1593 #elif defined(LUL_PLAT_arm_android)
1594   volatile uintptr_t block[6];
1595   MOZ_ASSERT(sizeof(block) == 24);
1596   __asm__ __volatile__(
1597     "mov r0, r15"            "\n\t"
1598     "str r0,  [%0, #0]"      "\n\t"
1599     "str r14, [%0, #4]"      "\n\t"
1600     "str r13, [%0, #8]"      "\n\t"
1601     "str r12, [%0, #12]"     "\n\t"
1602     "str r11, [%0, #16]"     "\n\t"
1603     "str r7,  [%0, #20]"     "\n"
1604     : : "r"(&block[0]) : "memory", "r0"
1605   );
1606   startRegs.r15 = TaggedUWord(block[0]);
1607   startRegs.r14 = TaggedUWord(block[1]);
1608   startRegs.r13 = TaggedUWord(block[2]);
1609   startRegs.r12 = TaggedUWord(block[3]);
1610   startRegs.r11 = TaggedUWord(block[4]);
1611   startRegs.r7  = TaggedUWord(block[5]);
1612   const uintptr_t REDZONE_SIZE = 0;
1613   uintptr_t start = block[1] - REDZONE_SIZE;
1614 #else
1615 # error "Unsupported platform"
1616 #endif
1617
1618   // Get hold of the innermost LUL_UNIT_TEST_STACK_SIZE bytes of the
1619   // stack.
1620   uintptr_t end = start + LUL_UNIT_TEST_STACK_SIZE;
1621   uintptr_t ws  = sizeof(void*);
1622   start &= ~(ws-1);
1623   end   &= ~(ws-1);
1624   uintptr_t nToCopy = end - start;
1625   if (nToCopy > lul::N_STACK_BYTES) {
1626     nToCopy = lul::N_STACK_BYTES;
1627   }
1628   MOZ_ASSERT(nToCopy <= lul::N_STACK_BYTES);
1629   StackImage* stackImg = new StackImage();
1630   stackImg->mLen       = nToCopy;
1631   stackImg->mStartAvma = start;
1632   if (nToCopy > 0) {
1633     MOZ_MAKE_MEM_DEFINED((void*)start, nToCopy);
1634     memcpy(&stackImg->mContents[0], (void*)start, nToCopy);
1635   }
1636
1637   // Unwind it.
1638   const int MAX_TEST_FRAMES = 64;
1639   uintptr_t framePCs[MAX_TEST_FRAMES];
1640   uintptr_t frameSPs[MAX_TEST_FRAMES];
1641   size_t framesAvail = mozilla::ArrayLength(framePCs);
1642   size_t framesUsed  = 0;
1643   size_t scannedFramesAllowed = 0;
1644   size_t scannedFramesAcquired = 0;
1645   aLUL->Unwind( &framePCs[0], &frameSPs[0],
1646                 &framesUsed, &scannedFramesAcquired,
1647                 framesAvail, scannedFramesAllowed,
1648                 &startRegs, stackImg );
1649
1650   delete stackImg;
1651
1652   //if (0) {
1653   //  // Show what we have.
1654   //  fprintf(stderr, "Got %d frames:\n", (int)framesUsed);
1655   //  for (size_t i = 0; i < framesUsed; i++) {
1656   //    fprintf(stderr, "  [%2d]   SP %p   PC %p\n",
1657   //            (int)i, (void*)frameSPs[i], (void*)framePCs[i]);
1658   //  }
1659   //  fprintf(stderr, "\n");
1660   //}
1661
1662   // Check to see if there's a consistent binding between digits in
1663   // the director string ('1' .. '8') and the PC values acquired by
1664   // the unwind.  If there isn't, the unwinding has failed somehow.
1665   uintptr_t binding[8];  // binding for '1' .. binding for '8'
1666   memset((void*)binding, 0, sizeof(binding));
1667
1668   // The general plan is to work backwards along the director string
1669   // and forwards along the framePCs array.  Doing so corresponds to
1670   // working outwards from the innermost frame of the recursive test set.
1671   const char* cursor = dstring;
1672
1673   // Find the end.  This leaves |cursor| two bytes past the first
1674   // character we want to look at -- see comment below.
1675   while (*cursor) cursor++;
1676
1677   // Counts the number of consistent frames.
1678   size_t nConsistent = 0;
1679
1680   // Iterate back to the start of the director string.  The starting
1681   // points are a bit complex.  We can't use framePCs[0] because that
1682   // contains the PC in this frame (above).  We can't use framePCs[1]
1683   // because that will contain the PC at return point in the recursive
1684   // test group (TestFn[1-8]) for their call "out" to this function,
1685   // GetAndCheckStackTrace.  Although LUL will compute a correct
1686   // return address, that will not be the same return address as for a
1687   // recursive call out of the the function to another function in the
1688   // group.  Hence we can only start consistency checking at
1689   // framePCs[2].
1690   //
1691   // To be consistent, then, we must ignore the last element in the
1692   // director string as that corresponds to framePCs[1].  Hence the
1693   // start points are: framePCs[2] and the director string 2 bytes
1694   // before the terminating zero.
1695   //
1696   // Also as a result of this, the number of consistent frames counted
1697   // will always be one less than the length of the director string
1698   // (not including its terminating zero).
1699   size_t frameIx;
1700   for (cursor = cursor-2, frameIx = 2;
1701        cursor >= dstring && frameIx < framesUsed;
1702        cursor--, frameIx++) {
1703     char      c  = *cursor;
1704     uintptr_t pc = framePCs[frameIx];
1705     // If this doesn't hold, the director string is ill-formed.
1706     MOZ_ASSERT(c >= '1' && c <= '8');
1707     int n = ((int)c) - ((int)'1');
1708     if (binding[n] == 0) {
1709       // There's no binding for |c| yet, so install |pc| and carry on.
1710       binding[n] = pc;
1711       nConsistent++;
1712       continue;
1713     }
1714     // There's a pre-existing binding for |c|.  Check it's consistent.
1715     if (binding[n] != pc) {
1716       // Not consistent.  Give up now.
1717       break;
1718     }
1719     // Consistent.  Keep going.
1720     nConsistent++;
1721   }
1722
1723   // So, did we succeed?
1724   bool passed = nConsistent+1 == strlen(dstring);
1725
1726   // Show the results.
1727   char buf[200];
1728   snprintf(buf, sizeof(buf), "LULUnitTest:   dstring = %s\n", dstring);
1729   buf[sizeof(buf)-1] = 0;
1730   aLUL->mLog(buf);
1731   snprintf(buf, sizeof(buf),
1732            "LULUnitTest:     %d consistent, %d in dstring: %s\n",
1733            (int)nConsistent, (int)strlen(dstring),
1734            passed ? "PASS" : "FAIL");
1735   buf[sizeof(buf)-1] = 0;
1736   aLUL->mLog(buf);
1737
1738   return passed;
1739 }
1740
1741
1742 // Macro magic to create a set of 8 mutually recursive functions with
1743 // varying frame sizes.  These will recurse amongst themselves as
1744 // specified by |strP|, the directory string, and call
1745 // GetAndCheckStackTrace when the string becomes empty, passing it the
1746 // original value of the string.  This checks the result, printing
1747 // results on |aLUL|'s logging sink, and also returns a boolean
1748 // indicating whether or not the results are acceptable (correct).
1749
1750 #define DECL_TEST_FN(NAME) \
1751   bool NAME(LUL* aLUL, const char* strPorig, const char* strP);
1752
1753 #define GEN_TEST_FN(NAME, FRAMESIZE) \
1754   bool NAME(LUL* aLUL, const char* strPorig, const char* strP) { \
1755     volatile char space[FRAMESIZE]; \
1756     memset((char*)&space[0], 0, sizeof(space)); \
1757     if (*strP == '\0') { \
1758       /* We've come to the end of the director string. */ \
1759       /* Take a stack snapshot. */ \
1760       return GetAndCheckStackTrace(aLUL, strPorig); \
1761     } else { \
1762       /* Recurse onwards.  This is a bit subtle.  The obvious */ \
1763       /* thing to do here is call onwards directly, from within the */ \
1764       /* arms of the case statement.  That gives a problem in that */ \
1765       /* there will be multiple return points inside each function when */ \
1766       /* unwinding, so it will be difficult to check for consistency */ \
1767       /* against the director string.  Instead, we make an indirect */ \
1768       /* call, so as to guarantee that there is only one call site */ \
1769       /* within each function.  This does assume that the compiler */ \
1770       /* won't transform it back to the simple direct-call form. */ \
1771       /* To discourage it from doing so, the call is bracketed with */ \
1772       /* __asm__ __volatile__ sections so as to make it not-movable. */ \
1773       bool (*nextFn)(LUL*, const char*, const char*) = NULL; \
1774       switch (*strP) { \
1775         case '1': nextFn = TestFn1; break; \
1776         case '2': nextFn = TestFn2; break; \
1777         case '3': nextFn = TestFn3; break; \
1778         case '4': nextFn = TestFn4; break; \
1779         case '5': nextFn = TestFn5; break; \
1780         case '6': nextFn = TestFn6; break; \
1781         case '7': nextFn = TestFn7; break; \
1782         case '8': nextFn = TestFn8; break; \
1783         default:  nextFn = TestFn8; break; \
1784       } \
1785       __asm__ __volatile__("":::"cc","memory"); \
1786       bool passed = nextFn(aLUL, strPorig, strP+1); \
1787       __asm__ __volatile__("":::"cc","memory"); \
1788       return passed; \
1789     } \
1790   }
1791
1792 // The test functions are mutually recursive, so it is necessary to
1793 // declare them before defining them.
1794 DECL_TEST_FN(TestFn1)
1795 DECL_TEST_FN(TestFn2)
1796 DECL_TEST_FN(TestFn3)
1797 DECL_TEST_FN(TestFn4)
1798 DECL_TEST_FN(TestFn5)
1799 DECL_TEST_FN(TestFn6)
1800 DECL_TEST_FN(TestFn7)
1801 DECL_TEST_FN(TestFn8)
1802
1803 GEN_TEST_FN(TestFn1, 123)
1804 GEN_TEST_FN(TestFn2, 456)
1805 GEN_TEST_FN(TestFn3, 789)
1806 GEN_TEST_FN(TestFn4, 23)
1807 GEN_TEST_FN(TestFn5, 47)
1808 GEN_TEST_FN(TestFn6, 117)
1809 GEN_TEST_FN(TestFn7, 1)
1810 GEN_TEST_FN(TestFn8, 99)
1811
1812
1813 // This starts the test sequence going.  Call here to generate a
1814 // sequence of calls as directed by the string |dstring|.  The call
1815 // sequence will, from its innermost frame, finish by calling
1816 // GetAndCheckStackTrace() and passing it |dstring|.
1817 // GetAndCheckStackTrace() will unwind the stack, check consistency
1818 // of those results against |dstring|, and print a pass/fail message
1819 // to aLUL's logging sink.  It also updates the counters in *aNTests
1820 // and aNTestsPassed.
1821 __attribute__((noinline)) void
1822 TestUnw(/*OUT*/int* aNTests, /*OUT*/int*aNTestsPassed,
1823         LUL* aLUL, const char* dstring)
1824 {
1825   // Ensure that the stack has at least this much space on it.  This
1826   // makes it safe to saw off the top LUL_UNIT_TEST_STACK_SIZE bytes
1827   // and hand it to LUL.  Safe in the sense that no segfault can
1828   // happen because the stack is at least this big.  This is all
1829   // somewhat dubious in the sense that a sufficiently clever compiler
1830   // (clang, for one) can figure out that space[] is unused and delete
1831   // it from the frame.  Hence the somewhat elaborate hoop jumping to
1832   // fill it up before the call and to at least appear to use the
1833   // value afterwards.
1834   int i;
1835   volatile char space[LUL_UNIT_TEST_STACK_SIZE];
1836   for (i = 0; i < LUL_UNIT_TEST_STACK_SIZE; i++) {
1837     space[i] = (char)(i & 0x7F);
1838   }
1839
1840   // Really run the test.
1841   bool passed = TestFn1(aLUL, dstring, dstring);
1842
1843   // Appear to use space[], by visiting the value to compute some kind
1844   // of checksum, and then (apparently) using the checksum.
1845   int sum = 0;
1846   for (i = 0; i < LUL_UNIT_TEST_STACK_SIZE; i++) {
1847     // If this doesn't fool LLVM, I don't know what will.
1848     sum += space[i] - 3*i;
1849   }
1850   __asm__ __volatile__("" : : "r"(sum));
1851
1852   // Update the counters.
1853   (*aNTests)++;
1854   if (passed) {
1855     (*aNTestsPassed)++;
1856   }
1857 }
1858
1859
1860 void
1861 RunLulUnitTests(/*OUT*/int* aNTests, /*OUT*/int*aNTestsPassed, LUL* aLUL)
1862 {
1863   aLUL->mLog(":\n");
1864   aLUL->mLog("LULUnitTest: BEGIN\n");
1865   *aNTests = *aNTestsPassed = 0;
1866   TestUnw(aNTests, aNTestsPassed, aLUL, "11111111");
1867   TestUnw(aNTests, aNTestsPassed, aLUL, "11222211");
1868   TestUnw(aNTests, aNTestsPassed, aLUL, "111222333");
1869   TestUnw(aNTests, aNTestsPassed, aLUL, "1212121231212331212121212121212");
1870   TestUnw(aNTests, aNTestsPassed, aLUL, "31415827271828325332173258");
1871   TestUnw(aNTests, aNTestsPassed, aLUL,
1872           "123456781122334455667788777777777777777777777");
1873   aLUL->mLog("LULUnitTest: END\n");
1874   aLUL->mLog(":\n");
1875 }
1876
1877
1878 } // namespace lul