js/src/nanojit/Assembler.cpp

   1 /* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */
   2 /* vi: set ts=4 sw=4 expandtab: (add to ~/.vimrc: set modeline modelines=5) */
   3 /* ***** BEGIN LICENSE BLOCK *****
   4  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
   5  *
   6  * The contents of this file are subject to the Mozilla Public License Version
   7  * 1.1 (the "License"); you may not use this file except in compliance with
   8  * the License. You may obtain a copy of the License at
   9  * http://www.mozilla.org/MPL/
  10  *
  11  * Software distributed under the License is distributed on an "AS IS" basis,
  12  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  13  * for the specific language governing rights and limitations under the
  14  * License.
  15  *
  16  * The Original Code is [Open Source Virtual Machine].
  17  *
  18  * The Initial Developer of the Original Code is
  19  * Adobe System Incorporated.
  20  * Portions created by the Initial Developer are Copyright (C) 2004-2007
  21  * the Initial Developer. All Rights Reserved.
  22  *
  23  * Contributor(s):
  24  *   Adobe AS3 Team
  25  *
  26  * Alternatively, the contents of this file may be used under the terms of
  27  * either the GNU General Public License Version 2 or later (the "GPL"), or
  28  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  29  * in which case the provisions of the GPL or the LGPL are applicable instead
  30  * of those above. If you wish to allow use of your version of this file only
  31  * under the terms of either the GPL or the LGPL, and not to allow others to
  32  * use your version of this file under the terms of the MPL, indicate your
  33  * decision by deleting the provisions above and replace them with the notice
  34  * and other provisions required by the GPL or the LGPL. If you do not delete
  35  * the provisions above, a recipient may use your version of this file under
  36  * the terms of any one of the MPL, the GPL or the LGPL.
  37  *
  38  * ***** END LICENSE BLOCK ***** */
  39
  40 #include "nanojit.h"
  41
  42 #ifdef FEATURE_NANOJIT
  43
  44 #ifdef VMCFG_VTUNE
  45 #include "../core/CodegenLIR.h"
  46 #endif
  47
  48 #ifdef _MSC_VER
  49     // disable some specific warnings which are normally useful, but pervasive in the code-gen macros
  50     #pragma warning(disable:4310) // cast truncates constant value
  51 #endif
  52
  53 #ifdef VMCFG_VTUNE
  54 namespace vtune {
  55     using namespace nanojit;
  56     void vtuneStart(void*, NIns*);
  57     void vtuneEnd(void*, NIns*);
  58     void vtuneLine(void*, int, NIns*);
  59     void vtuneFile(void*, void*);
  60 }
  61 using namespace vtune;
  62 #endif // VMCFG_VTUNE
  63
  64
  65 namespace nanojit
  66 {
  67     /**
  68      * Need the following:
  69      *
  70      *    - merging paths ( build a graph? ), possibly use external rep to drive codegen
  71      */
  72     Assembler::Assembler(CodeAlloc& codeAlloc, Allocator& dataAlloc, Allocator& alloc, AvmCore* core, LogControl* logc, const Config& config)
  73         : alloc(alloc)
  74         , _codeAlloc(codeAlloc)
  75         , _dataAlloc(dataAlloc)
  76         , _thisfrag(NULL)
  77         , _branchStateMap(alloc)
  78         , _patches(alloc)
  79         , _labels(alloc)
  80         , _noise(NULL)
  81     #if NJ_USES_IMMD_POOL
  82         , _immDPool(alloc)
  83     #endif
  84         , codeList(NULL)
  85         , _epilogue(NULL)
  86         , _err(None)
  87     #if PEDANTIC
  88         , pedanticTop(NULL)
  89     #endif
  90     #ifdef VMCFG_VTUNE
  91         , vtuneHandle(NULL)
  92     #endif
  93         , _config(config)
  94     {
  95         nInit(core);
  96         (void)logc;
  97         verbose_only( _logc = logc; )
  98         verbose_only( _outputCache = 0; )
  99         verbose_only( outline[0] = '\0'; )
 100         verbose_only( outlineEOL[0] = '\0'; )
 101
 102         reset();
 103     }
 104
 105     // Per-opcode register hint table.  Default to no hints for all
 106     // instructions.  It's not marked const because individual back-ends can
 107     // install hint values for opcodes of interest in nInit().
 108     RegisterMask Assembler::nHints[LIR_sentinel+1] = {
 109 #define OP___(op, number, repKind, retType, isCse) \
 110         0,
 111 #include "LIRopcode.tbl"
 112 #undef OP___
 113         0
 114     };
 115
 116 #ifdef _DEBUG
 117
 118     /*static*/ LIns* const AR::BAD_ENTRY = (LIns*)0xdeadbeef;
 119
 120     void AR::validateQuick()
 121     {
 122         NanoAssert(_highWaterMark < NJ_MAX_STACK_ENTRY);
 123         NanoAssert(_entries[0] == NULL);
 124         // Only check a few entries around _highWaterMark.
 125         uint32_t const RADIUS = 4;
 126         uint32_t const lo = (_highWaterMark > 1 + RADIUS ? _highWaterMark - RADIUS : 1);
 127         uint32_t const hi = (_highWaterMark + 1 + RADIUS < NJ_MAX_STACK_ENTRY ? _highWaterMark + 1 + RADIUS : NJ_MAX_STACK_ENTRY);
 128         for (uint32_t i = lo; i <= _highWaterMark; ++i)
 129             NanoAssert(_entries[i] != BAD_ENTRY);
 130         for (uint32_t i = _highWaterMark+1; i < hi; ++i)
 131             NanoAssert(_entries[i] == BAD_ENTRY);
 132     }
 133
 134     void AR::validateFull()
 135     {
 136         NanoAssert(_highWaterMark < NJ_MAX_STACK_ENTRY);
 137         NanoAssert(_entries[0] == NULL);
 138         for (uint32_t i = 1; i <= _highWaterMark; ++i)
 139             NanoAssert(_entries[i] != BAD_ENTRY);
 140         for (uint32_t i = _highWaterMark+1; i < NJ_MAX_STACK_ENTRY; ++i)
 141             NanoAssert(_entries[i] == BAD_ENTRY);
 142     }
 143
 144     void AR::validate()
 145     {
 146         static uint32_t validateCounter = 0;
 147         if (++validateCounter >= 100)
 148         {
 149             validateFull();
 150             validateCounter = 0;
 151         }
 152         else
 153         {
 154             validateQuick();
 155         }
 156     }
 157
 158 #endif
 159
 160     inline void AR::clear()
 161     {
 162         _highWaterMark = 0;
 163         NanoAssert(_entries[0] == NULL);
 164     #ifdef _DEBUG
 165         for (uint32_t i = 1; i < NJ_MAX_STACK_ENTRY; ++i)
 166             _entries[i] = BAD_ENTRY;
 167     #endif
 168     }
 169
 170     bool AR::Iter::next(LIns*& ins, uint32_t& nStackSlots, int32_t& arIndex)
 171     {
 172         while (_i <= _ar._highWaterMark) {
 173             ins = _ar._entries[_i];
 174             if (ins) {
 175                 arIndex = _i;
 176                 nStackSlots = nStackSlotsFor(ins);
 177                 _i += nStackSlots;
 178                 return true;
 179             }
 180             _i++;
 181         }
 182         ins = NULL;
 183         nStackSlots = 0;
 184         arIndex = 0;
 185         return false;
 186     }
 187
 188     void Assembler::arReset()
 189     {
 190         _activation.clear();
 191         _branchStateMap.clear();
 192         _patches.clear();
 193         _labels.clear();
 194     #if NJ_USES_IMMD_POOL
 195         _immDPool.clear();
 196     #endif
 197     }
 198
 199     void Assembler::registerResetAll()
 200     {
 201         nRegisterResetAll(_allocator);
 202         _allocator.managed = _allocator.free;
 203
 204         // At start, should have some registers free and none active.
 205         NanoAssert(0 != _allocator.free);
 206         NanoAssert(0 == _allocator.activeMask());
 207 #ifdef NANOJIT_IA32
 208         debug_only(_fpuStkDepth = 0; )
 209 #endif
 210     }
 211
 212     // Legend for register sets: A = allowed, P = preferred, F = free, S = SavedReg.
 213     //
 214     // Finds a register in 'setA___' to store the result of 'ins' (one from
 215     // 'set_P__' if possible), evicting one if necessary.  Doesn't consider
 216     // the prior state of 'ins'.
 217     //
 218     // Nb: 'setA___' comes from the instruction's use, 'set_P__' comes from its def.
 219     // Eg. in 'add(call(...), ...)':
 220     //     - the call's use means setA___==GpRegs;
 221     //     - the call's def means set_P__==rmask(retRegs[0]).
 222     //
 223     Register Assembler::registerAlloc(LIns* ins, RegisterMask setA___, RegisterMask set_P__)
 224     {
 225         Register r;
 226         RegisterMask set__F_ = _allocator.free;
 227         RegisterMask setA_F_ = setA___ & set__F_;
 228
 229         if (setA_F_) {
 230             RegisterMask set___S = SavedRegs;
 231             RegisterMask setA_FS = setA_F_ & set___S;
 232             RegisterMask setAPF_ = setA_F_ & set_P__;
 233             RegisterMask setAPFS = setA_FS & set_P__;
 234             RegisterMask set;
 235
 236             if      (setAPFS) set = setAPFS;
 237             else if (setAPF_) set = setAPF_;
 238             else if (setA_FS) set = setA_FS;
 239             else              set = setA_F_;
 240
 241             r = nRegisterAllocFromSet(set);
 242             _allocator.addActive(r, ins);
 243             ins->setReg(r);
 244         } else {
 245             // Nothing free, steal one.
 246             // LSRA says pick the one with the furthest use.
 247             LIns* vic = findVictim(setA___);
 248             NanoAssert(vic->isInReg());
 249             r = vic->getReg();
 250
 251             evict(vic);
 252
 253             // r ends up staying active, but the LIns defining it changes.
 254             _allocator.removeFree(r);
 255             _allocator.addActive(r, ins);
 256             ins->setReg(r);
 257         }
 258
 259         return r;
 260     }
 261
 262     // Finds a register in 'allow' to store a temporary value (one not
 263     // associated with a particular LIns), evicting one if necessary.  The
 264     // returned register is marked as being free and so can only be safely
 265     // used for code generation purposes until the regstate is next inspected
 266     // or updated.
 267     Register Assembler::registerAllocTmp(RegisterMask allow)
 268     {
 269         LIns dummyIns;
 270         Register r = registerAlloc(&dummyIns, allow, /*prefer*/0);
 271
 272         // Mark r as free, ready for use as a temporary value.
 273         _allocator.removeActive(r);
 274         _allocator.addFree(r);
 275         return r;
 276     }
 277
 278     void Assembler::codeAlloc(NIns *&start, NIns *&end, NIns *&eip
 279                               verbose_only(, size_t &nBytes))
 280     {
 281         // save the block we just filled
 282         if (start)
 283             CodeAlloc::add(codeList, start, end);
 284
 285         // CodeAlloc contract: allocations never fail
 286         _codeAlloc.alloc(start, end);
 287         verbose_only( nBytes += (end - start) * sizeof(NIns); )
 288         NanoAssert(uintptr_t(end) - uintptr_t(start) >= (size_t)LARGEST_UNDERRUN_PROT);
 289         eip = end;
 290     }
 291
 292     void Assembler::clearNInsPtrs()
 293     {
 294         _nIns = 0;
 295         _nExitIns = 0;
 296         codeStart = codeEnd = 0;
 297         exitStart = exitEnd = 0;
 298         codeList = 0;
 299     }
 300
 301     void Assembler::reset()
 302     {
 303         clearNInsPtrs();
 304         nativePageReset();
 305         registerResetAll();
 306         arReset();
 307     }
 308
 309     #ifdef _DEBUG
 310     void Assembler::pageValidate()
 311     {
 312         if (error()) return;
 313         // This may be a normal code chunk or an exit code chunk.
 314         NanoAssertMsg(codeStart <= _nIns && _nIns <= codeEnd,
 315                      "Native instruction pointer overstep paging bounds; check overrideProtect for last instruction");
 316     }
 317     #endif
 318
 319     #ifdef _DEBUG
 320
 321     bool AR::isValidEntry(uint32_t idx, LIns* ins) const
 322     {
 323         return idx > 0 && idx <= _highWaterMark && _entries[idx] == ins;
 324     }
 325
 326     void AR::checkForResourceConsistency(const RegAlloc& regs)
 327     {
 328         validate();
 329         for (uint32_t i = 1; i <= _highWaterMark; ++i)
 330         {
 331             LIns* ins = _entries[i];
 332             if (!ins)
 333                 continue;
 334             uint32_t arIndex = ins->getArIndex();
 335             NanoAssert(arIndex != 0);
 336             if (ins->isop(LIR_allocp)) {
 337                 int const n = i + (ins->size()>>2);
 338                 for (int j=i+1; j < n; j++) {
 339                     NanoAssert(_entries[j]==ins);
 340                 }
 341                 NanoAssert(arIndex == (uint32_t)n-1);
 342                 i = n-1;
 343             }
 344             else if (ins->isQorD()) {
 345                 NanoAssert(_entries[i + 1]==ins);
 346                 i += 1; // skip high word
 347             }
 348             else {
 349                 NanoAssertMsg(arIndex == i, "Stack record index mismatch");
 350             }
 351             NanoAssertMsg(!ins->isInReg() || regs.isConsistent(ins->getReg(), ins),
 352                           "Register record mismatch");
 353         }
 354     }
 355
 356     void Assembler::resourceConsistencyCheck()
 357     {
 358         NanoAssert(!error());
 359 #ifdef NANOJIT_IA32
 360         // Within the expansion of a single LIR instruction, we may use the x87
 361         // stack for unmanaged temporaries.  Otherwise, we do not use the x87 stack
 362         // as such, but use the top element alone as a single allocatable FP register.
 363         // Compensation code must be inserted to keep the stack balanced and avoid
 364         // overflow, and the mechanisms for this are rather fragile and IA32-specific.
 365         // The predicate below should hold between any pair of instructions within
 366         // a basic block, at labels, and just after a conditional branch.  Currently,
 367         // we enforce this condition between all pairs of instructions, but this is
 368         // overly restrictive, and would fail if we did not generate unreachable x87
 369         // stack pops following unconditional branches.
 370         NanoAssert((_allocator.active[REGNUM(FST0)] && _fpuStkDepth == -1) ||
 371                    (!_allocator.active[REGNUM(FST0)] && _fpuStkDepth == 0));
 372 #endif
 373         _activation.checkForResourceConsistency(_allocator);
 374         registerConsistencyCheck();
 375     }
 376
 377     void Assembler::registerConsistencyCheck()
 378     {
 379         RegisterMask managed = _allocator.managed;
 380         for (Register r = lsReg(managed); managed; r = nextLsReg(managed, r)) {
 381             // A register managed by register allocation must be either
 382             // free or active, but not both.
 383             if (_allocator.isFree(r)) {
 384                 NanoAssertMsgf(_allocator.getActive(r)==0,
 385                     "register %s is free but assigned to ins", gpn(r));
 386             } else {
 387                 // An LIns defining a register must have that register in
 388                 // its reservation.
 389                 LIns* ins = _allocator.getActive(r);
 390                 NanoAssert(ins);
 391                 NanoAssertMsg(r == ins->getReg(), "Register record mismatch");
 392             }
 393         }
 394
 395         RegisterMask not_managed = ~_allocator.managed;
 396         for (Register r = lsReg(not_managed); not_managed; r = nextLsReg(not_managed, r)) {
 397             // A register not managed by register allocation must be
 398             // neither free nor active.
 399             if (REGNUM(r) <= LastRegNum) {
 400                 NanoAssert(!_allocator.isFree(r));
 401                 NanoAssert(!_allocator.getActive(r));
 402             }
 403         }
 404     }
 405     #endif /* _DEBUG */
 406
 407     void Assembler::findRegFor2(RegisterMask allowa, LIns* ia, Register& ra,
 408                                 RegisterMask allowb, LIns* ib, Register& rb)
 409     {
 410         // There should be some overlap between 'allowa' and 'allowb', else
 411         // there's no point calling this function.
 412         NanoAssert(allowa & allowb);
 413
 414         if (ia == ib) {
 415             ra = rb = findRegFor(ia, allowa & allowb);  // use intersection(allowa, allowb)
 416
 417         } else if (ib->isInRegMask(allowb)) {
 418             // 'ib' is already in an allowable reg -- don't let it get evicted
 419             // when finding 'ra'.
 420             rb = ib->getReg();
 421             ra = findRegFor(ia, allowa & ~rmask(rb));
 422
 423         } else {
 424             ra = findRegFor(ia, allowa);
 425             rb = findRegFor(ib, allowb & ~rmask(ra));
 426         }
 427     }
 428
 429     Register Assembler::findSpecificRegFor(LIns* i, Register w)
 430     {
 431         return findRegFor(i, rmask(w));
 432     }
 433
 434     // Like findRegFor(), but called when the LIns is used as a pointer.  It
 435     // doesn't have to be called, findRegFor() can still be used, but it can
 436     // optimize the LIR_allocp case by indexing off FP, thus saving the use of
 437     // a GpReg.
 438     //
 439     Register Assembler::getBaseReg(LIns* base, int &d, RegisterMask allow)
 440     {
 441     #if !PEDANTIC
 442         if (base->isop(LIR_allocp)) {
 443             // The value of a LIR_allocp is a pointer to its stack memory,
 444             // which is always relative to FP.  So we can just return FP if we
 445             // also adjust 'd' (and can do so in a valid manner).  Or, in the
 446             // PEDANTIC case, we can just assign a register as normal;
 447             // findRegFor() will allocate the stack memory for LIR_allocp if
 448             // necessary.
 449             d += findMemFor(base);
 450             return FP;
 451         }
 452     #else
 453         (void) d;
 454     #endif
 455         return findRegFor(base, allow);
 456     }
 457
 458     // Like findRegFor2(), but used for stores where the base value has the
 459     // same type as the stored value, eg. in asm_store32() on 32-bit platforms
 460     // and asm_store64() on 64-bit platforms.  Similar to getBaseReg(),
 461     // findRegFor2() can be called instead, but this function can optimize the
 462     // case where the base value is a LIR_allocp.
 463     void Assembler::getBaseReg2(RegisterMask allowValue, LIns* value, Register& rv,
 464                                 RegisterMask allowBase, LIns* base, Register& rb, int &d)
 465     {
 466     #if !PEDANTIC
 467         if (base->isop(LIR_allocp)) {
 468             rb = FP;
 469             d += findMemFor(base);
 470             rv = findRegFor(value, allowValue);
 471             return;
 472         }
 473     #else
 474         (void) d;
 475     #endif
 476         findRegFor2(allowValue, value, rv, allowBase, base, rb);
 477     }
 478
 479     RegisterMask Assembler::hint(LIns* ins)
 480     {
 481         RegisterMask prefer = nHints[ins->opcode()];
 482         return (prefer == PREFER_SPECIAL) ? nHint(ins) : prefer;
 483     }
 484
 485     // Finds a register in 'allow' to hold the result of 'ins'.  Used when we
 486     // encounter a use of 'ins'.  The actions depend on the prior regstate of
 487     // 'ins':
 488     // - If the result of 'ins' is not in any register, we find an allowed
 489     //   one, evicting one if necessary.
 490     // - If the result of 'ins' is already in an allowed register, we use that.
 491     // - If the result of 'ins' is already in a not-allowed register, we find an
 492     //   allowed one and move it.
 493     //
 494     Register Assembler::findRegFor(LIns* ins, RegisterMask allow)
 495     {
 496         if (ins->isop(LIR_allocp)) {
 497             // Never allocate a reg for this without stack space too.
 498             findMemFor(ins);
 499         }
 500
 501         Register r;
 502
 503         if (!ins->isInReg()) {
 504             // 'ins' isn't in a register (must be in a spill slot or nowhere).
 505             r = registerAlloc(ins, allow, hint(ins));
 506
 507         } else if (rmask(r = ins->getReg()) & allow) {
 508             // 'ins' is in an allowed register.
 509             _allocator.useActive(r);
 510
 511         } else {
 512             // 'ins' is in a register (r) that's not in 'allow'.
 513 #ifdef NANOJIT_IA32
 514             if (((rmask(r)&XmmRegs) && !(allow&XmmRegs)) ||
 515                 ((rmask(r)&x87Regs) && !(allow&x87Regs)))
 516             {
 517                 // x87 <-> xmm copy required
 518                 //_nvprof("fpu-evict",1);
 519                 evict(ins);
 520                 r = registerAlloc(ins, allow, hint(ins));
 521             } else
 522 #elif defined(NANOJIT_PPC) || defined(NANOJIT_MIPS) || defined(NANOJIT_SPARC)
 523             if (((rmask(r)&GpRegs) && !(allow&GpRegs)) ||
 524                 ((rmask(r)&FpRegs) && !(allow&FpRegs)))
 525             {
 526                 evict(ins);
 527                 r = registerAlloc(ins, allow, hint(ins));
 528             } else
 529 #endif
 530             {
 531                 // The post-state register holding 'ins' is 's', the pre-state
 532                 // register holding 'ins' is 'r'.  For example, if s=eax and
 533                 // r=ecx:
 534                 //
 535                 // pre-state:   ecx(ins)
 536                 // instruction: mov eax, ecx
 537                 // post-state:  eax(ins)
 538                 //
 539                 Register s = r;
 540                 _allocator.retire(r);
 541                 r = registerAlloc(ins, allow, hint(ins));
 542
 543                 // 'ins' is in 'allow', in register r (different to the old r);
 544                 //  s is the old r.
 545                 if ((rmask(s) & GpRegs) && (rmask(r) & GpRegs)) {
 546                     MR(s, r);   // move 'ins' from its pre-state reg (r) to its post-state reg (s)
 547                 } else {
 548                     asm_nongp_copy(s, r);
 549                 }
 550             }
 551         }
 552
 553         return r;
 554     }
 555
 556     // Like findSpecificRegFor(), but only for when 'r' is known to be free
 557     // and 'ins' is known to not already have a register allocated.  Updates
 558     // the regstate (maintaining the invariants) but does not generate any
 559     // code.  The return value is redundant, always being 'r', but it's
 560     // sometimes useful to have it there for assignments.
 561     Register Assembler::findSpecificRegForUnallocated(LIns* ins, Register r)
 562     {
 563         if (ins->isop(LIR_allocp)) {
 564             // never allocate a reg for this w/out stack space too
 565             findMemFor(ins);
 566         }
 567
 568         NanoAssert(!ins->isInReg());
 569         NanoAssert(_allocator.free & rmask(r));
 570
 571         ins->setReg(r);
 572         _allocator.removeFree(r);
 573         _allocator.addActive(r, ins);
 574
 575         return r;
 576     }
 577
 578 #if NJ_USES_IMMD_POOL
 579     const uint64_t* Assembler::findImmDFromPool(uint64_t q)
 580     {
 581         uint64_t* p = _immDPool.get(q);
 582         if (!p)
 583         {
 584             p = new (_dataAlloc) uint64_t;
 585             *p = q;
 586             _immDPool.put(q, p);
 587         }
 588         return p;
 589     }
 590 #endif
 591
 592     int Assembler::findMemFor(LIns *ins)
 593     {
 594 #if NJ_USES_IMMD_POOL
 595         NanoAssert(!ins->isImmD());
 596 #endif
 597         if (!ins->isInAr()) {
 598             uint32_t const arIndex = arReserve(ins);
 599             ins->setArIndex(arIndex);
 600             NanoAssert(_activation.isValidEntry(ins->getArIndex(), ins) == (arIndex != 0));
 601         }
 602         return arDisp(ins);
 603     }
 604
 605     // XXX: this function is dangerous and should be phased out;
 606     // See bug 513615.  Calls to it should replaced it with a
 607     // prepareResultReg() / generate code / freeResourcesOf() sequence.
 608     Register Assembler::deprecated_prepResultReg(LIns *ins, RegisterMask allow)
 609     {
 610 #ifdef NANOJIT_IA32
 611         // We used to have to worry about possibly popping the x87 stack here.
 612         // But this function is no longer used on i386, and this assertion
 613         // ensures that.
 614         NanoAssert(0);
 615 #endif
 616         Register r = findRegFor(ins, allow);
 617         deprecated_freeRsrcOf(ins);
 618         return r;
 619     }
 620
 621     // Finds a register in 'allow' to hold the result of 'ins'.  Also
 622     // generates code to spill the result if necessary.  Called just prior to
 623     // generating the code for 'ins' (because we generate code backwards).
 624     //
 625     // An example where no spill is necessary.  Lines marked '*' are those
 626     // done by this function.
 627     //
 628     //   regstate:  R
 629     //   asm:       define res into r
 630     // * regstate:  R + r(res)
 631     //              ...
 632     //   asm:       use res in r
 633     //
 634     // An example where a spill is necessary.
 635     //
 636     //   regstate:  R
 637     //   asm:       define res into r
 638     // * regstate:  R + r(res)
 639     // * asm:       spill res from r
 640     //   regstate:  R
 641     //              ...
 642     //   asm:       restore res into r2
 643     //   regstate:  R + r2(res) + other changes from "..."
 644     //   asm:       use res in r2
 645     //
 646     Register Assembler::prepareResultReg(LIns *ins, RegisterMask allow)
 647     {
 648         // At this point, we know the result of 'ins' is used later in the
 649         // code, unless it is a call to an impure function that must be
 650         // included for effect even though its result is ignored.  It may have
 651         // had to be evicted, in which case the restore will have already been
 652         // generated, so we now generate the spill.  QUERY: Is there any attempt
 653         // to elide the spill if we know that all restores can be rematerialized?
 654 #ifdef NANOJIT_IA32
 655         const bool notInFST0 = (!ins->isInReg() || ins->getReg() != FST0);
 656         Register r = findRegFor(ins, allow);
 657         // If the result register is FST0, but FST0 is not in the post-regstate,
 658         // then we must pop the x87 stack.  This may occur because the result is
 659         // unused, or because it has been stored to a spill slot or an XMM register.
 660         const bool needPop = notInFST0 && (r == FST0);
 661         const bool didSpill = asm_maybe_spill(ins, needPop);
 662         if (!didSpill && needPop) {
 663             // If the instruction is spilled, then the pop will have already
 664             // been performed by the store to the stack slot.  Otherwise, we
 665             // must pop now.  This may occur when the result of a LIR_calld
 666             // to an impure (side-effecting) function is not used.
 667             FSTP(FST0);
 668         }
 669 #else
 670         Register r = findRegFor(ins, allow);
 671         asm_maybe_spill(ins, false);
 672 #endif
 673         return r;
 674     }
 675
 676     bool Assembler::asm_maybe_spill(LIns* ins, bool pop)
 677     {
 678         if (ins->isInAr()) {
 679             int d = arDisp(ins);
 680             Register r = ins->getReg();
 681             verbose_only( RefBuf b;
 682                           if (_logc->lcbits & LC_Native) {
 683                              setOutputForEOL("  <= spill %s",
 684                              _thisfrag->lirbuf->printer->formatRef(&b, ins)); } )
 685 #ifdef NANOJIT_IA32
 686             asm_spill(r, d, pop);
 687 #else
 688             (void)pop;
 689             asm_spill(r, d, ins->isQorD());
 690 #endif
 691             return true;
 692         }
 693         return false;
 694     }
 695
 696     // XXX: This function is error-prone and should be phased out; see bug 513615.
 697     void Assembler::deprecated_freeRsrcOf(LIns *ins)
 698     {
 699         if (ins->isInReg()) {
 700             asm_maybe_spill(ins, /*pop*/false);
 701             _allocator.retire(ins->getReg());   // free any register associated with entry
 702             ins->clearReg();
 703         }
 704         if (ins->isInAr()) {
 705             arFree(ins);                        // free any AR space associated with entry
 706             ins->clearArIndex();
 707         }
 708     }
 709
 710     // Frees all record of registers and spill slots used by 'ins'.
 711     void Assembler::freeResourcesOf(LIns *ins)
 712     {
 713         if (ins->isInReg()) {
 714             _allocator.retire(ins->getReg());   // free any register associated with entry
 715             ins->clearReg();
 716         }
 717         if (ins->isInAr()) {
 718             arFree(ins);                        // free any AR space associated with entry
 719             ins->clearArIndex();
 720         }
 721     }
 722
 723     // Frees 'r' in the RegAlloc regstate, if it's not already free.
 724     void Assembler::evictIfActive(Register r)
 725     {
 726         if (LIns* vic = _allocator.getActive(r)) {
 727             NanoAssert(vic->getReg() == r);
 728             evict(vic);
 729         }
 730     }
 731
 732     // Frees 'r' (which currently holds the result of 'vic') in the regstate.
 733     // An example:
 734     //
 735     //   pre-regstate:  eax(ld1)
 736     //   instruction:   mov ebx,-4(ebp) <= restore add1   # %ebx is dest
 737     //   post-regstate: eax(ld1) ebx(add1)
 738     //
 739     // At run-time we are *restoring* 'add1' into %ebx, hence the call to
 740     // asm_restore().  But at regalloc-time we are moving backwards through
 741     // the code, so in that sense we are *evicting* 'add1' from %ebx.
 742     //
 743     void Assembler::evict(LIns* vic)
 744     {
 745         // Not free, need to steal.
 746         Register r = vic->getReg();
 747
 748         NanoAssert(!_allocator.isFree(r));
 749         NanoAssert(vic == _allocator.getActive(r));
 750
 751         verbose_only( RefBuf b;
 752                       if (_logc->lcbits & LC_Native) {
 753                         setOutputForEOL("  <= restore %s",
 754                         _thisfrag->lirbuf->printer->formatRef(&b, vic)); } )
 755         asm_restore(vic, r);
 756
 757         _allocator.retire(r);
 758         vic->clearReg();
 759
 760         // At this point 'vic' is unused (if rematerializable), or in a spill
 761         // slot (if not).
 762     }
 763
 764     // If we have this:
 765     //
 766     //   W = ld(addp(B, lshp(I, k)))[d] , where int(1) <= k <= int(3)
 767     //
 768     // then we set base=B, index=I, scale=k.
 769     //
 770     // Otherwise, we must have this:
 771     //
 772     //   W = ld(addp(B, I))[d]
 773     //
 774     // and we set base=B, index=I, scale=0.
 775     //
 776     void Assembler::getBaseIndexScale(LIns* addp, LIns** base, LIns** index, int* scale)
 777     {
 778         NanoAssert(addp->isop(LIR_addp));
 779
 780         *base = addp->oprnd1();
 781         LIns* rhs = addp->oprnd2();
 782         int k;
 783
 784         if (rhs->opcode() == LIR_lshp && rhs->oprnd2()->isImmI() &&
 785             (k = rhs->oprnd2()->immI(), (1 <= k && k <= 3)))
 786         {
 787             *index = rhs->oprnd1();
 788             *scale = k;
 789         } else {
 790             *index = rhs;
 791             *scale = 0;
 792         }
 793     }
 794     void Assembler::patch(GuardRecord *lr)
 795     {
 796         if (!lr->jmp) // the guard might have been eliminated as redundant
 797             return;
 798         Fragment *frag = lr->exit->target;
 799         NanoAssert(frag->fragEntry != 0);
 800         nPatchBranch((NIns*)lr->jmp, frag->fragEntry);
 801         CodeAlloc::flushICache(lr->jmp, LARGEST_BRANCH_PATCH);
 802         verbose_only(verbose_outputf("patching jump at %p to target %p\n",
 803             lr->jmp, frag->fragEntry);)
 804     }
 805
 806     void Assembler::patch(SideExit *exit)
 807     {
 808         GuardRecord *rec = exit->guards;
 809         NanoAssert(rec);
 810         while (rec) {
 811             patch(rec);
 812             rec = rec->next;
 813         }
 814     }
 815
 816 #ifdef NANOJIT_IA32
 817     void Assembler::patch(SideExit* exit, SwitchInfo* si)
 818     {
 819         for (GuardRecord* lr = exit->guards; lr; lr = lr->next) {
 820             Fragment *frag = lr->exit->target;
 821             NanoAssert(frag->fragEntry != 0);
 822             si->table[si->index] = frag->fragEntry;
 823         }
 824     }
 825 #endif
 826
 827     NIns* Assembler::asm_exit(LIns* guard)
 828     {
 829         SideExit *exit = guard->record()->exit;
 830         NIns* at = 0;
 831         if (!_branchStateMap.get(exit))
 832         {
 833             at = asm_leave_trace(guard);
 834         }
 835         else
 836         {
 837             RegAlloc* captured = _branchStateMap.get(exit);
 838             intersectRegisterState(*captured);
 839             at = exit->target->fragEntry;
 840             NanoAssert(at != 0);
 841             _branchStateMap.remove(exit);
 842         }
 843         return at;
 844     }
 845
 846     NIns* Assembler::asm_leave_trace(LIns* guard)
 847     {
 848         verbose_only( verbose_outputf("----------------------------------- ## END exit block %p", guard);)
 849
 850         // This point is unreachable.  So free all the registers.  If an
 851         // instruction has a stack entry we will leave it alone, otherwise we
 852         // free it entirely.  intersectRegisterState() will restore.
 853         RegAlloc capture = _allocator;
 854         releaseRegisters();
 855
 856         swapCodeChunks();
 857         _inExit = true;
 858
 859 #ifdef NANOJIT_IA32
 860         debug_only( _sv_fpuStkDepth = _fpuStkDepth; _fpuStkDepth = 0; )
 861 #endif
 862
 863         nFragExit(guard);
 864
 865         // Restore the callee-saved register and parameters.
 866         assignSavedRegs();
 867         assignParamRegs();
 868
 869         intersectRegisterState(capture);
 870
 871         // this can be useful for breaking whenever an exit is taken
 872         //INT3();
 873         //NOP();
 874
 875         // we are done producing the exit logic for the guard so demark where our exit block code begins
 876         NIns* jmpTarget = _nIns;     // target in exit path for our mainline conditional jump
 877
 878         // swap back pointers, effectively storing the last location used in the exit path
 879         swapCodeChunks();
 880         _inExit = false;
 881
 882         //verbose_only( verbose_outputf("         LIR_xt/xf swapCodeChunks, _nIns is now %08X(%08X), _nExitIns is now %08X(%08X)",_nIns, *_nIns,_nExitIns,*_nExitIns) );
 883         verbose_only( verbose_outputf("%p:", jmpTarget);)
 884         verbose_only( verbose_outputf("----------------------------------- ## BEGIN exit block (LIR_xt|LIR_xf)") );
 885
 886 #ifdef NANOJIT_IA32
 887         NanoAssertMsgf(_fpuStkDepth == _sv_fpuStkDepth, "LIR_xtf, _fpuStkDepth=%d, expect %d",_fpuStkDepth, _sv_fpuStkDepth);
 888         debug_only( _fpuStkDepth = _sv_fpuStkDepth; _sv_fpuStkDepth = 9999; )
 889 #endif
 890
 891         return jmpTarget;
 892     }
 893
 894     void Assembler::compile(Fragment* frag, Allocator& alloc, bool optimize verbose_only(, LInsPrinter* printer))
 895     {
 896         verbose_only(
 897         bool anyVerb = (_logc->lcbits & 0xFFFF & ~LC_FragProfile) > 0;
 898         bool liveVerb = (_logc->lcbits & 0xFFFF & LC_Liveness) > 0;
 899         )
 900
 901         /* BEGIN decorative preamble */
 902         verbose_only(
 903         if (anyVerb) {
 904             _logc->printf("========================================"
 905                           "========================================\n");
 906             _logc->printf("=== BEGIN LIR::compile(%p, %p)\n",
 907                           (void*)this, (void*)frag);
 908             _logc->printf("===\n");
 909         })
 910         /* END decorative preamble */
 911
 912         verbose_only( if (liveVerb) {
 913             _logc->printf("\n");
 914             _logc->printf("=== Results of liveness analysis:\n");
 915             _logc->printf("===\n");
 916             LirReader br(frag->lastIns);
 917             LirFilter* lir = &br;
 918             if (optimize) {
 919                 StackFilter* sf = new (alloc) StackFilter(lir, alloc, frag->lirbuf->sp);
 920                 lir = sf;
 921             }
 922             live(lir, alloc, frag, _logc);
 923         })
 924
 925         /* Set up the generic text output cache for the assembler */
 926         verbose_only( StringList asmOutput(alloc); )
 927         verbose_only( _outputCache = &asmOutput; )
 928
 929         beginAssembly(frag);
 930         if (error())
 931             return;
 932
 933         //_logc->printf("recompile trigger %X kind %d\n", (int)frag, frag->kind);
 934
 935         verbose_only( if (anyVerb) {
 936             _logc->printf("=== Translating LIR fragments into assembly:\n");
 937         })
 938
 939         // now the the main trunk
 940         verbose_only( RefBuf b; )
 941         verbose_only( if (anyVerb) {
 942             _logc->printf("=== -- Compile trunk %s: begin\n", printer->formatAddr(&b, frag));
 943         })
 944
 945         // Used for debug printing, if needed
 946         debug_only(ValidateReader *validate = NULL;)
 947         verbose_only(
 948         ReverseLister *pp_init = NULL;
 949         ReverseLister *pp_after_sf = NULL;
 950         )
 951
 952         // The LIR passes through these filters as listed in this
 953         // function, viz, top to bottom.
 954
 955         // set up backwards pipeline: assembler <- StackFilter <- LirReader
 956         LirFilter* lir = new (alloc) LirReader(frag->lastIns);
 957
 958 #ifdef DEBUG
 959         // VALIDATION
 960         validate = new (alloc) ValidateReader(lir);
 961         lir = validate;
 962 #endif
 963
 964         // INITIAL PRINTING
 965         verbose_only( if (_logc->lcbits & LC_ReadLIR) {
 966         pp_init = new (alloc) ReverseLister(lir, alloc, frag->lirbuf->printer, _logc,
 967                                     "Initial LIR");
 968         lir = pp_init;
 969         })
 970
 971         // STACKFILTER
 972         if (optimize) {
 973             StackFilter* stackfilter = new (alloc) StackFilter(lir, alloc, frag->lirbuf->sp);
 974             lir = stackfilter;
 975         }
 976
 977         verbose_only( if (_logc->lcbits & LC_AfterSF) {
 978         pp_after_sf = new (alloc) ReverseLister(lir, alloc, frag->lirbuf->printer, _logc,
 979                                                 "After StackFilter");
 980         lir = pp_after_sf;
 981         })
 982
 983         assemble(frag, lir);
 984
 985         // If we were accumulating debug info in the various ReverseListers,
 986         // call finish() to emit whatever contents they have accumulated.
 987         verbose_only(
 988         if (pp_init)        pp_init->finish();
 989         if (pp_after_sf)    pp_after_sf->finish();
 990         )
 991
 992         verbose_only( if (anyVerb) {
 993             _logc->printf("=== -- Compile trunk %s: end\n", printer->formatAddr(&b, frag));
 994         })
 995
 996         endAssembly(frag);
 997
 998         // Reverse output so that assembly is displayed low-to-high.
 999         // Up to this point, _outputCache has been non-NULL, and so has been
1000         // accumulating output.  Now we set it to NULL, traverse the entire
1001         // list of stored strings, and hand them a second time to output.
1002         // Since _outputCache is now NULL, outputf just hands these strings
1003         // directly onwards to _logc->printf.
1004         verbose_only( if (anyVerb) {
1005             _logc->printf("\n");
1006             _logc->printf("=== Aggregated assembly output: BEGIN\n");
1007             _logc->printf("===\n");
1008             _outputCache = 0;
1009             for (Seq<char*>* p = asmOutput.get(); p != NULL; p = p->tail) {
1010                 char *str = p->head;
1011                 outputf("  %s", str);
1012             }
1013             _logc->printf("===\n");
1014             _logc->printf("=== Aggregated assembly output: END\n");
1015         });
1016
1017         if (error())
1018             frag->fragEntry = 0;
1019
1020         verbose_only( frag->nCodeBytes += codeBytes; )
1021         verbose_only( frag->nExitBytes += exitBytes; )
1022
1023         /* BEGIN decorative postamble */
1024         verbose_only( if (anyVerb) {
1025             _logc->printf("\n");
1026             _logc->printf("===\n");
1027             _logc->printf("=== END LIR::compile(%p, %p)\n",
1028                           (void*)this, (void*)frag);
1029             _logc->printf("========================================"
1030                           "========================================\n");
1031             _logc->printf("\n");
1032         });
1033         /* END decorative postamble */
1034     }
1035
1036     void Assembler::beginAssembly(Fragment *frag)
1037     {
1038         verbose_only( codeBytes = 0; )
1039         verbose_only( exitBytes = 0; )
1040
1041         reset();
1042
1043         NanoAssert(codeList == 0);
1044         NanoAssert(codeStart == 0);
1045         NanoAssert(codeEnd == 0);
1046         NanoAssert(exitStart == 0);
1047         NanoAssert(exitEnd == 0);
1048         NanoAssert(_nIns == 0);
1049         NanoAssert(_nExitIns == 0);
1050
1051         _thisfrag = frag;
1052         _inExit = false;
1053
1054         setError(None);
1055
1056         // native code gen buffer setup
1057         nativePageSetup();
1058
1059         // make sure we got memory at least one page
1060         if (error()) return;
1061
1062         _epilogue = NULL;
1063
1064         nBeginAssembly();
1065     }
1066
1067     void Assembler::assemble(Fragment* frag, LirFilter* reader)
1068     {
1069         if (error()) return;
1070         _thisfrag = frag;
1071
1072         // check the fragment is starting out with a sane profiling state
1073         verbose_only( NanoAssert(frag->nStaticExits == 0); )
1074         verbose_only( NanoAssert(frag->nCodeBytes == 0); )
1075         verbose_only( NanoAssert(frag->nExitBytes == 0); )
1076         verbose_only( NanoAssert(frag->profCount == 0); )
1077         verbose_only( if (_logc->lcbits & LC_FragProfile)
1078                           NanoAssert(frag->profFragID > 0);
1079                       else
1080                           NanoAssert(frag->profFragID == 0); )
1081
1082         _inExit = false;
1083
1084         gen(reader);
1085
1086         if (!error()) {
1087             // patch all branches
1088             NInsMap::Iter iter(_patches);
1089             while (iter.next()) {
1090                 NIns* where = iter.key();
1091                 LIns* target = iter.value();
1092                 if (target->isop(LIR_jtbl)) {
1093                     // Need to patch up a whole jump table, 'where' is the table.
1094                     LIns *jtbl = target;
1095                     NIns** native_table = (NIns**) (void *) where;
1096                     for (uint32_t i = 0, n = jtbl->getTableSize(); i < n; i++) {
1097                         LabelState* lstate = _labels.get(jtbl->getTarget(i));
1098                         NIns* ntarget = lstate->addr;
1099                         if (ntarget) {
1100                             native_table[i] = ntarget;
1101                         } else {
1102                             setError(UnknownBranch);
1103                             break;
1104                         }
1105                     }
1106                 } else {
1107                     // target is a label for a single-target branch
1108                     LabelState *lstate = _labels.get(target);
1109                     NIns* ntarget = lstate->addr;
1110                     if (ntarget) {
1111                         nPatchBranch(where, ntarget);
1112                     } else {
1113                         setError(UnknownBranch);
1114                         break;
1115                     }
1116                 }
1117             }
1118         }
1119     }
1120
1121     void Assembler::cleanupAfterError()
1122     {
1123         _codeAlloc.freeAll(codeList);
1124         if (_nExitIns)
1125             _codeAlloc.free(exitStart, exitEnd);
1126         _codeAlloc.free(codeStart, codeEnd);
1127         codeList = NULL;
1128         _codeAlloc.markAllExec(); // expensive but safe, we mark all code pages R-X
1129     }
1130
1131     void Assembler::endAssembly(Fragment* frag)
1132     {
1133         // don't try to patch code if we are in an error state since we might have partially
1134         // overwritten the code cache already
1135         if (error()) {
1136             // something went wrong, release all allocated code memory
1137             cleanupAfterError();
1138             return;
1139         }
1140
1141         NIns* fragEntry = genPrologue();
1142         verbose_only( asm_output("[prologue]"); )
1143
1144         debug_only(_activation.checkForResourceLeaks());
1145
1146         NanoAssert(!_inExit);
1147         // save used parts of current block on fragment's code list, free the rest
1148 #if defined(NANOJIT_ARM) || defined(NANOJIT_MIPS)
1149         // [codeStart, _nSlot) ... gap ... [_nIns, codeEnd)
1150         if (_nExitIns) {
1151             _codeAlloc.addRemainder(codeList, exitStart, exitEnd, _nExitSlot, _nExitIns);
1152             verbose_only( exitBytes -= (_nExitIns - _nExitSlot) * sizeof(NIns); )
1153         }
1154         _codeAlloc.addRemainder(codeList, codeStart, codeEnd, _nSlot, _nIns);
1155         verbose_only( codeBytes -= (_nIns - _nSlot) * sizeof(NIns); )
1156 #else
1157         // [codeStart ... gap ... [_nIns, codeEnd))
1158         if (_nExitIns) {
1159             _codeAlloc.addRemainder(codeList, exitStart, exitEnd, exitStart, _nExitIns);
1160             verbose_only( exitBytes -= (_nExitIns - exitStart) * sizeof(NIns); )
1161         }
1162         _codeAlloc.addRemainder(codeList, codeStart, codeEnd, codeStart, _nIns);
1163         verbose_only( codeBytes -= (_nIns - codeStart) * sizeof(NIns); )
1164 #endif
1165
1166         // note: the code pages are no longer writable from this point onwards
1167         _codeAlloc.markExec(codeList);
1168
1169         // at this point all our new code is in the d-cache and not the i-cache,
1170         // so flush the i-cache on cpu's that need it.
1171         CodeAlloc::flushICache(codeList);
1172
1173         // save entry point pointers
1174         frag->fragEntry = fragEntry;
1175         frag->setCode(_nIns);
1176
1177 #ifdef VMCFG_VTUNE
1178         if (vtuneHandle)
1179         {
1180             vtuneEnd(vtuneHandle, codeEnd);
1181             vtuneStart(vtuneHandle, _nIns);
1182         }
1183 #endif
1184
1185         PERFM_NVPROF("code", CodeAlloc::size(codeList));
1186
1187 #ifdef NANOJIT_IA32
1188         NanoAssertMsgf(_fpuStkDepth == 0,"_fpuStkDepth %d\n",_fpuStkDepth);
1189 #endif
1190
1191         debug_only( pageValidate(); )
1192         NanoAssert(_branchStateMap.isEmpty());
1193     }
1194
1195     void Assembler::releaseRegisters()
1196     {
1197         RegisterMask active = _allocator.activeMask();
1198         for (Register r = lsReg(active); active; r = nextLsReg(active, r))
1199         {
1200             LIns *ins = _allocator.getActive(r);
1201             // Clear reg allocation, preserve stack allocation.
1202             _allocator.retire(r);
1203             NanoAssert(r == ins->getReg());
1204             ins->clearReg();
1205         }
1206     }
1207
1208 #ifdef PERFM
1209 #define countlir_live() _nvprof("lir-live",1)
1210 #define countlir_ret() _nvprof("lir-ret",1)
1211 #define countlir_alloc() _nvprof("lir-alloc",1)
1212 #define countlir_var() _nvprof("lir-var",1)
1213 #define countlir_use() _nvprof("lir-use",1)
1214 #define countlir_def() _nvprof("lir-def",1)
1215 #define countlir_imm() _nvprof("lir-imm",1)
1216 #define countlir_param() _nvprof("lir-param",1)
1217 #define countlir_cmov() _nvprof("lir-cmov",1)
1218 #define countlir_ld() _nvprof("lir-ld",1)
1219 #define countlir_ldq() _nvprof("lir-ldq",1)
1220 #define countlir_alu() _nvprof("lir-alu",1)
1221 #define countlir_qjoin() _nvprof("lir-qjoin",1)
1222 #define countlir_qlo() _nvprof("lir-qlo",1)
1223 #define countlir_qhi() _nvprof("lir-qhi",1)
1224 #define countlir_fpu() _nvprof("lir-fpu",1)
1225 #define countlir_st() _nvprof("lir-st",1)
1226 #define countlir_stq() _nvprof("lir-stq",1)
1227 #define countlir_jmp() _nvprof("lir-jmp",1)
1228 #define countlir_jcc() _nvprof("lir-jcc",1)
1229 #define countlir_label() _nvprof("lir-label",1)
1230 #define countlir_xcc() _nvprof("lir-xcc",1)
1231 #define countlir_x() _nvprof("lir-x",1)
1232 #define countlir_call() _nvprof("lir-call",1)
1233 #define countlir_jtbl() _nvprof("lir-jtbl",1)
1234 #else
1235 #define countlir_live()
1236 #define countlir_ret()
1237 #define countlir_alloc()
1238 #define countlir_var()
1239 #define countlir_use()
1240 #define countlir_def()
1241 #define countlir_imm()
1242 #define countlir_param()
1243 #define countlir_cmov()
1244 #define countlir_ld()
1245 #define countlir_ldq()
1246 #define countlir_alu()
1247 #define countlir_qjoin()
1248 #define countlir_qlo()
1249 #define countlir_qhi()
1250 #define countlir_fpu()
1251 #define countlir_st()
1252 #define countlir_stq()
1253 #define countlir_jmp()
1254 #define countlir_jcc()
1255 #define countlir_label()
1256 #define countlir_xcc()
1257 #define countlir_x()
1258 #define countlir_call()
1259 #define countlir_jtbl()
1260 #endif
1261
1262     void Assembler::asm_jmp(LIns* ins, InsList& pending_lives)
1263     {
1264         NanoAssert((ins->isop(LIR_j) && !ins->oprnd1()) ||
1265                    (ins->isop(LIR_jf) && ins->oprnd1()->isImmI(0)) ||
1266                    (ins->isop(LIR_jt) && ins->oprnd1()->isImmI(1)));
1267
1268         countlir_jmp();
1269         LIns* to = ins->getTarget();
1270         LabelState *label = _labels.get(to);
1271         // The jump is always taken so whatever register state we
1272         // have from downstream code, is irrelevant to code before
1273         // this jump.  So clear it out.  We will pick up register
1274         // state from the jump target, if we have seen that label.
1275         releaseRegisters();
1276 #ifdef NANOJIT_IA32
1277         // Unreachable, so assume correct stack depth.
1278         debug_only( _fpuStkDepth = 0; )
1279 #endif
1280         if (label && label->addr) {
1281             // Forward jump - pick up register state from target.
1282             unionRegisterState(label->regs);
1283 #ifdef NANOJIT_IA32
1284             // Set stack depth according to the register state we just loaded,
1285             // negating the effect of any unreachable x87 stack pop that might
1286             // have been emitted by unionRegisterState().
1287             debug_only( _fpuStkDepth = (_allocator.getActive(FST0) ? -1 : 0); )
1288 #endif
1289             JMP(label->addr);
1290         }
1291         else {
1292             // Backwards jump.
1293             handleLoopCarriedExprs(pending_lives);
1294             if (!label) {
1295                 // save empty register state at loop header
1296                 _labels.add(to, 0, _allocator);
1297             }
1298             else {
1299                 intersectRegisterState(label->regs);
1300 #ifdef NANOJIT_IA32
1301                 debug_only( _fpuStkDepth = (_allocator.getActive(FST0) ? -1 : 0); )
1302 #endif
1303             }
1304             JMP(0);
1305             _patches.put(_nIns, to);
1306         }
1307     }
1308
1309     void Assembler::asm_jcc(LIns* ins, InsList& pending_lives)
1310     {
1311         bool branchOnFalse = (ins->opcode() == LIR_jf);
1312         LIns* cond = ins->oprnd1();
1313         if (cond->isImmI()) {
1314             if ((!branchOnFalse && !cond->immI()) || (branchOnFalse && cond->immI())) {
1315                 // jmp never taken, not needed
1316             } else {
1317                 asm_jmp(ins, pending_lives);    // jmp always taken
1318             }
1319             return;
1320         }
1321
1322         // Changes to the logic below will likely need to be propagated to Assembler::asm_jov().
1323
1324         countlir_jcc();
1325         LIns* to = ins->getTarget();
1326         LabelState *label = _labels.get(to);
1327         if (label && label->addr) {
1328             // Forward jump to known label.  Need to merge with label's register state.
1329             unionRegisterState(label->regs);
1330             asm_branch(branchOnFalse, cond, label->addr);
1331         }
1332         else {
1333             // Back edge.
1334             handleLoopCarriedExprs(pending_lives);
1335             if (!label) {
1336                 // Evict all registers, most conservative approach.
1337                 evictAllActiveRegs();
1338                 _labels.add(to, 0, _allocator);
1339             }
1340             else {
1341                 // Evict all registers, most conservative approach.
1342                 intersectRegisterState(label->regs);
1343             }
1344             NIns *branch = asm_branch(branchOnFalse, cond, 0);
1345             _patches.put(branch,to);
1346         }
1347     }
1348
1349     void Assembler::asm_jov(LIns* ins, InsList& pending_lives)
1350     {
1351         // The caller is responsible for countlir_* profiling, unlike
1352         // asm_jcc above.  The reason for this is that asm_jov may not be
1353         // be called if the instruction is dead, and it is our convention
1354         // to count such instructions anyway.
1355         LOpcode op = ins->opcode();
1356         LIns* to = ins->getTarget();
1357         LabelState *label = _labels.get(to);
1358         if (label && label->addr) {
1359             // forward jump to known label.  need to merge with label's register state.
1360             unionRegisterState(label->regs);
1361             asm_branch_ov(op, label->addr);
1362         }
1363         else {
1364             // back edge.
1365             handleLoopCarriedExprs(pending_lives);
1366             if (!label) {
1367                 // evict all registers, most conservative approach.
1368                 evictAllActiveRegs();
1369                 _labels.add(to, 0, _allocator);
1370             }
1371             else {
1372                 // evict all registers, most conservative approach.
1373                 intersectRegisterState(label->regs);
1374             }
1375             NIns *branch = asm_branch_ov(op, 0);
1376             _patches.put(branch,to);
1377         }
1378     }
1379
1380     void Assembler::asm_x(LIns* ins)
1381     {
1382         verbose_only( _thisfrag->nStaticExits++; )
1383         countlir_x();
1384         // Generate the side exit branch on the main trace.
1385         NIns *exit = asm_exit(ins);
1386         JMP(exit);
1387     }
1388
1389     void Assembler::asm_xcc(LIns* ins)
1390     {
1391         LIns* cond = ins->oprnd1();
1392         if (cond->isImmI()) {
1393             if ((ins->isop(LIR_xt) && !cond->immI()) || (ins->isop(LIR_xf) && cond->immI())) {
1394                 // guard never taken, not needed
1395             } else {
1396                 asm_x(ins);     // guard always taken
1397             }
1398             return;
1399         }
1400
1401         verbose_only( _thisfrag->nStaticExits++; )
1402         countlir_xcc();
1403         // We only support cmp with guard right now, also assume it is 'close'
1404         // and only emit the branch.
1405         NIns* exit = asm_exit(ins); // does intersectRegisterState()
1406         asm_branch(ins->opcode() == LIR_xf, cond, exit);
1407     }
1408
1409     // helper function for nop insertion feature that results in no more
1410     // than 1 no-op instruction insertion every 128-1151 Bytes
1411     static inline uint32_t noiseForNopInsertion(Noise* n) {
1412         return n->getValue(1023) + 128;
1413     }
1414
1415     void Assembler::gen(LirFilter* reader)
1416     {
1417         NanoAssert(_thisfrag->nStaticExits == 0);
1418
1419         InsList pending_lives(alloc);
1420
1421         NanoAssert(!error());
1422
1423         // compiler hardening setup
1424         NIns* priorIns = _nIns;
1425         int32_t nopInsertTrigger = hardenNopInsertion(_config) ? noiseForNopInsertion(_noise): 0;
1426
1427         // What's going on here: we're visiting all the LIR instructions in
1428         // the buffer, working strictly backwards in buffer-order, and
1429         // generating machine instructions for them as we go.
1430         //
1431         // For each LIns, we first check if it's live.  If so we mark its
1432         // operands as also live, and then generate code for it *if
1433         // necessary*.  It may not be necessary if the instruction is an
1434         // expression and code has already been generated for all its uses in
1435         // combination with previously handled instructions (ins->isExtant()
1436         // will return false if this is so).
1437
1438         // Note that the backwards code traversal can make register allocation
1439         // confusing.  (For example, we restore a value before we spill it!)
1440         // In particular, words like "before" and "after" must be used very
1441         // carefully -- their meaning at regalloc-time is opposite to their
1442         // meaning at run-time.  We use the term "pre-regstate" to refer to
1443         // the register allocation state that occurs prior to an instruction's
1444         // execution, and "post-regstate" to refer to the state that occurs
1445         // after an instruction's execution, e.g.:
1446         //
1447         //   pre-regstate:  ebx(ins)
1448         //   instruction:   mov eax, ebx     // mov dst, src
1449         //   post-regstate: eax(ins)
1450         //
1451         // At run-time, the instruction updates the pre-regstate into the
1452         // post-regstate (and these states are the real machine's regstates).
1453         // But when allocating registers, because we go backwards, the
1454         // pre-regstate is constructed from the post-regstate (and these
1455         // regstates are those stored in RegAlloc).
1456         //
1457         // One consequence of generating code backwards is that we tend to
1458         // both spill and restore registers as early (at run-time) as
1459         // possible;  this is good for tolerating memory latency.  If we
1460         // generated code forwards, we would expect to both spill and restore
1461         // registers as late (at run-time) as possible;  this might be better
1462         // for reducing register pressure.
1463
1464         // The trace must end with one of these opcodes.  Mark it as live.
1465         NanoAssert(reader->finalIns()->isop(LIR_x)    ||
1466                    reader->finalIns()->isop(LIR_xtbl) ||
1467                    reader->finalIns()->isRet()        ||
1468                    isLiveOpcode(reader->finalIns()->opcode()));
1469
1470         for (currIns = reader->read(); !currIns->isop(LIR_start); currIns = reader->read())
1471         {
1472             LIns* ins = currIns;        // give it a shorter name for local use
1473
1474             if (!ins->isLive()) {
1475                 NanoAssert(!ins->isExtant());
1476                 continue;
1477             }
1478
1479 #ifdef NJ_VERBOSE
1480             // Output the post-regstate (registers and/or activation).
1481             // Because asm output comes in reverse order, doing it now means
1482             // it is printed after the LIR and native code, exactly when the
1483             // post-regstate should be shown.
1484             if ((_logc->lcbits & LC_Native) && (_logc->lcbits & LC_Activation))
1485                 printActivationState();
1486             if ((_logc->lcbits & LC_Native) && (_logc->lcbits & LC_RegAlloc))
1487                 printRegState();
1488 #endif
1489
1490             // compiler hardening technique that inserts no-op instructions in the compiled method when nopInsertTrigger < 0
1491             if (hardenNopInsertion(_config))
1492             {
1493                 size_t delta = (uintptr_t)priorIns - (uintptr_t)_nIns; // # bytes that have been emitted since last go-around
1494
1495                 // if no codeList then we know priorIns and _nIns are on same page, otherwise make sure priorIns was not in the previous code block
1496                 if (!codeList || !codeList->isInBlock(priorIns)) {
1497                     NanoAssert(delta < VMPI_getVMPageSize()); // sanity check
1498                     nopInsertTrigger -= (int32_t) delta;
1499                     if (nopInsertTrigger < 0)
1500                     {
1501                         nopInsertTrigger = noiseForNopInsertion(_noise);
1502                         asm_insert_random_nop();
1503                         PERFM_NVPROF("hardening:nop-insert", 1);
1504                     }
1505                 }
1506                 priorIns = _nIns;
1507             }
1508
1509             LOpcode op = ins->opcode();
1510             switch (op)
1511             {
1512                 default:
1513                     NanoAssertMsgf(false, "unsupported LIR instruction: %d\n", op);
1514                     break;
1515
1516                 case LIR_regfence:
1517                     evictAllActiveRegs();
1518                     break;
1519
1520                 case LIR_livei:
1521                 CASE64(LIR_liveq:)
1522                 case LIR_lived: {
1523                     countlir_live();
1524                     LIns* op1 = ins->oprnd1();
1525                     op1->setResultLive();
1526                     // LIR_allocp's are meant to live until the point of the
1527                     // LIR_livep instruction, marking other expressions as
1528                     // live ensures that they remain so at loop bottoms.
1529                     // LIR_allocp areas require special treatment because they
1530                     // are accessed indirectly and the indirect accesses are
1531                     // invisible to the assembler, other than via LIR_livep.
1532                     // Other expression results are only accessed directly in
1533                     // ways that are visible to the assembler, so extending
1534                     // those expression's lifetimes past the last loop edge
1535                     // isn't necessary.
1536                     if (op1->isop(LIR_allocp)) {
1537                         findMemFor(op1);
1538                     } else {
1539                         pending_lives.add(ins);
1540                     }
1541                     break;
1542                 }
1543
1544                 case LIR_reti:
1545                 CASE64(LIR_retq:)
1546                 case LIR_retd:
1547                     countlir_ret();
1548                     ins->oprnd1()->setResultLive();
1549                     asm_ret(ins);
1550                     break;
1551
1552                 // Allocate some stack space.  The value of this instruction
1553                 // is the address of the stack space.
1554                 case LIR_allocp:
1555                     countlir_alloc();
1556                     if (ins->isExtant()) {
1557                         NanoAssert(ins->isInAr());
1558                         if (ins->isInReg())
1559                             evict(ins);
1560                         freeResourcesOf(ins);
1561                     }
1562                     break;
1563
1564                 case LIR_immi:
1565                     countlir_imm();
1566                     if (ins->isExtant()) {
1567                         asm_immi(ins);
1568                     }
1569                     break;
1570
1571 #ifdef NANOJIT_64BIT
1572                 case LIR_immq:
1573                     countlir_imm();
1574                     if (ins->isExtant()) {
1575                         asm_immq(ins);
1576                     }
1577                     break;
1578 #endif
1579                 case LIR_immd:
1580                     countlir_imm();
1581                     if (ins->isExtant()) {
1582                         asm_immd(ins);
1583                     }
1584                     break;
1585
1586                 case LIR_paramp:
1587                     countlir_param();
1588                     if (ins->isExtant()) {
1589                         asm_param(ins);
1590                     }
1591                     break;
1592
1593 #if NJ_SOFTFLOAT_SUPPORTED
1594                 case LIR_hcalli: {
1595                     LIns* op1 = ins->oprnd1();
1596                     op1->setResultLive();
1597                     if (ins->isExtant()) {
1598                         // Return result of quad-call in register.
1599                         deprecated_prepResultReg(ins, rmask(retRegs[1]));
1600                         // If hi half was used, we must use the call to ensure it happens.
1601                         findSpecificRegFor(op1, retRegs[0]);
1602                     }
1603                     break;
1604                 }
1605
1606                 case LIR_dlo2i:
1607                     countlir_qlo();
1608                     ins->oprnd1()->setResultLive();
1609                     if (ins->isExtant()) {
1610                         asm_qlo(ins);
1611                     }
1612                     break;
1613
1614                 case LIR_dhi2i:
1615                     countlir_qhi();
1616                     ins->oprnd1()->setResultLive();
1617                     if (ins->isExtant()) {
1618                         asm_qhi(ins);
1619                     }
1620                     break;
1621
1622                 case LIR_ii2d:
1623                     countlir_qjoin();
1624                     ins->oprnd1()->setResultLive();
1625                     ins->oprnd2()->setResultLive();
1626                     if (ins->isExtant()) {
1627                         asm_qjoin(ins);
1628                     }
1629                     break;
1630 #endif
1631                 case LIR_cmovi:
1632                 CASE64(LIR_cmovq:)
1633                 case LIR_cmovd:
1634                     countlir_cmov();
1635                     ins->oprnd1()->setResultLive();
1636                     ins->oprnd2()->setResultLive();
1637                     ins->oprnd3()->setResultLive();
1638                     if (ins->isExtant()) {
1639                         asm_cmov(ins);
1640                     }
1641                     break;
1642
1643                 case LIR_lduc2ui:
1644                 case LIR_ldus2ui:
1645                 case LIR_ldc2i:
1646                 case LIR_lds2i:
1647                 case LIR_ldi:
1648                     countlir_ld();
1649                     ins->oprnd1()->setResultLive();
1650                     if (ins->isExtant()) {
1651                         asm_load32(ins);
1652                     }
1653                     break;
1654
1655                 CASE64(LIR_ldq:)
1656                 case LIR_ldd:
1657                 case LIR_ldf2d:
1658                     countlir_ldq();
1659                     ins->oprnd1()->setResultLive();
1660                     if (ins->isExtant()) {
1661                         asm_load64(ins);
1662                     }
1663                     break;
1664
1665                 case LIR_negi:
1666                 case LIR_noti:
1667                     countlir_alu();
1668                     ins->oprnd1()->setResultLive();
1669                     if (ins->isExtant()) {
1670                         asm_neg_not(ins);
1671                     }
1672                     break;
1673
1674 #if defined NANOJIT_64BIT
1675                 case LIR_addq:
1676                 case LIR_subq:
1677                 case LIR_andq:
1678                 case LIR_lshq:
1679                 case LIR_rshuq:
1680                 case LIR_rshq:
1681                 case LIR_orq:
1682                 case LIR_xorq:
1683                     countlir_alu();
1684                     ins->oprnd1()->setResultLive();
1685                     ins->oprnd2()->setResultLive();
1686                     if (ins->isExtant()) {
1687                         asm_qbinop(ins);
1688                     }
1689                     break;
1690 #endif
1691
1692                 case LIR_addi:
1693                 case LIR_subi:
1694                 case LIR_muli:
1695                 case LIR_andi:
1696                 case LIR_ori:
1697                 case LIR_xori:
1698                 case LIR_lshi:
1699                 case LIR_rshi:
1700                 case LIR_rshui:
1701                 CASE86(LIR_divi:)
1702                     countlir_alu();
1703                     ins->oprnd1()->setResultLive();
1704                     ins->oprnd2()->setResultLive();
1705                     if (ins->isExtant()) {
1706                         asm_arith(ins);
1707                     }
1708                     break;
1709
1710 #if defined NANOJIT_IA32 || defined NANOJIT_X64
1711                 CASE86(LIR_modi:)
1712                     countlir_alu();
1713                     ins->oprnd1()->setResultLive();
1714                     if (ins->isExtant()) {
1715                         asm_arith(ins);
1716                     }
1717                     break;
1718 #endif
1719
1720                 case LIR_negd:
1721                     countlir_fpu();
1722                     ins->oprnd1()->setResultLive();
1723                     if (ins->isExtant()) {
1724                         asm_fneg(ins);
1725                     }
1726                     break;
1727
1728                 case LIR_addd:
1729                 case LIR_subd:
1730                 case LIR_muld:
1731                 case LIR_divd:
1732                     countlir_fpu();
1733                     ins->oprnd1()->setResultLive();
1734                     ins->oprnd2()->setResultLive();
1735                     if (ins->isExtant()) {
1736                         asm_fop(ins);
1737                     }
1738                     break;
1739
1740                 case LIR_i2d:
1741                     countlir_fpu();
1742                     ins->oprnd1()->setResultLive();
1743                     if (ins->isExtant()) {
1744                         asm_i2d(ins);
1745                     }
1746                     break;
1747
1748                 case LIR_ui2d:
1749                     countlir_fpu();
1750                     ins->oprnd1()->setResultLive();
1751                     if (ins->isExtant()) {
1752                         asm_ui2d(ins);
1753                     }
1754                     break;
1755
1756                 case LIR_d2i:
1757                     countlir_fpu();
1758                     ins->oprnd1()->setResultLive();
1759                     if (ins->isExtant()) {
1760                         asm_d2i(ins);
1761                     }
1762                     break;
1763
1764 #ifdef NANOJIT_64BIT
1765                 case LIR_i2q:
1766                 case LIR_ui2uq:
1767                     countlir_alu();
1768                     ins->oprnd1()->setResultLive();
1769                     if (ins->isExtant()) {
1770                         asm_ui2uq(ins);
1771                     }
1772                     break;
1773
1774                 case LIR_q2i:
1775                     countlir_alu();
1776                     ins->oprnd1()->setResultLive();
1777                     if (ins->isExtant()) {
1778                         asm_q2i(ins);
1779                     }
1780                     break;
1781
1782                 case LIR_dasq:
1783                     countlir_alu();
1784                     ins->oprnd1()->setResultLive();
1785                     if (ins->isExtant()) {
1786                         asm_dasq(ins);
1787                     }
1788                     break;
1789
1790                 case LIR_qasd:
1791                     countlir_alu();
1792                     ins->oprnd1()->setResultLive();
1793                     if (ins->isExtant()) {
1794                         asm_qasd(ins);
1795                     }
1796                     break;
1797 #endif
1798                 case LIR_sti2c:
1799                 case LIR_sti2s:
1800                 case LIR_sti:
1801                     countlir_st();
1802                     ins->oprnd1()->setResultLive();
1803                     ins->oprnd2()->setResultLive();
1804                     asm_store32(op, ins->oprnd1(), ins->disp(), ins->oprnd2());
1805                     break;
1806
1807                 CASE64(LIR_stq:)
1808                 case LIR_std:
1809                 case LIR_std2f: {
1810                     countlir_stq();
1811                     ins->oprnd1()->setResultLive();
1812                     ins->oprnd2()->setResultLive();
1813                     LIns* value = ins->oprnd1();
1814                     LIns* base = ins->oprnd2();
1815                     int dr = ins->disp();
1816 #if NJ_SOFTFLOAT_SUPPORTED
1817                     if (value->isop(LIR_ii2d) && op == LIR_std)
1818                     {
1819                         // This is correct for little-endian only.
1820                         asm_store32(LIR_sti, value->oprnd1(), dr, base);
1821                         asm_store32(LIR_sti, value->oprnd2(), dr+4, base);
1822                     }
1823                     else
1824 #endif
1825                     {
1826                         asm_store64(op, value, dr, base);
1827                     }
1828                     break;
1829                 }
1830
1831                 case LIR_j:
1832                     asm_jmp(ins, pending_lives);
1833                     break;
1834
1835                 case LIR_jt:
1836                 case LIR_jf:
1837                     ins->oprnd1()->setResultLive();
1838                     asm_jcc(ins, pending_lives);
1839                     break;
1840
1841                 #if NJ_JTBL_SUPPORTED
1842                 case LIR_jtbl: {
1843                     countlir_jtbl();
1844                     ins->oprnd1()->setResultLive();
1845                     // Multiway jump can contain both forward and backward jumps.
1846                     // Out of range indices aren't allowed or checked.
1847                     // Code after this jtbl instruction is unreachable.
1848                     releaseRegisters();
1849                     NanoAssert(_allocator.activeMask() == 0);
1850
1851                     uint32_t count = ins->getTableSize();
1852                     bool has_back_edges = false;
1853
1854                     // Merge the regstates of labels we have already seen.
1855                     for (uint32_t i = count; i-- > 0;) {
1856                         LIns* to = ins->getTarget(i);
1857                         LabelState *lstate = _labels.get(to);
1858                         if (lstate) {
1859                             unionRegisterState(lstate->regs);
1860                             verbose_only( RefBuf b; )
1861                             asm_output("   %u: [&%s]", i, _thisfrag->lirbuf->printer->formatRef(&b, to));
1862                         } else {
1863                             has_back_edges = true;
1864                         }
1865                     }
1866                     asm_output("forward edges");
1867
1868                     // In a multi-way jump, the register allocator has no ability to deal
1869                     // with two existing edges that have conflicting register assignments, unlike
1870                     // a conditional branch where code can be inserted on the fall-through path
1871                     // to reconcile registers.  So, frontends *must* insert LIR_regfence at labels of
1872                     // forward jtbl jumps.  Check here to make sure no registers were picked up from
1873                     // any forward edges.
1874                     NanoAssert(_allocator.activeMask() == 0);
1875
1876                     if (has_back_edges) {
1877                         handleLoopCarriedExprs(pending_lives);
1878                         // save merged (empty) register state at target labels we haven't seen yet
1879                         for (uint32_t i = count; i-- > 0;) {
1880                             LIns* to = ins->getTarget(i);
1881                             LabelState *lstate = _labels.get(to);
1882                             if (!lstate) {
1883                                 _labels.add(to, 0, _allocator);
1884                                 verbose_only( RefBuf b; )
1885                                 asm_output("   %u: [&%s]", i, _thisfrag->lirbuf->printer->formatRef(&b, to));
1886                             }
1887                         }
1888                         asm_output("backward edges");
1889                     }
1890
1891                     // Emit the jump instruction, which allocates 1 register for the jump index.
1892                     NIns** native_table = new (_dataAlloc) NIns*[count];
1893                     asm_output("[%p]:", (void*)native_table);
1894                     _patches.put((NIns*)native_table, ins);
1895                     asm_jtbl(ins, native_table);
1896                     break;
1897                 }
1898                 #endif
1899
1900                 case LIR_label: {
1901                     countlir_label();
1902                     LabelState *label = _labels.get(ins);
1903                     // add profiling inc, if necessary.
1904                     verbose_only( if (_logc->lcbits & LC_FragProfile) {
1905                         if (ins == _thisfrag->loopLabel)
1906                             asm_inc_m32(& _thisfrag->profCount);
1907                     })
1908                     if (!label) {
1909                         // label seen first, normal target of forward jump, save addr & allocator
1910                         _labels.add(ins, _nIns, _allocator);
1911                     }
1912                     else {
1913                         // we're at the top of a loop
1914                         NanoAssert(label->addr == 0);
1915                         //evictAllActiveRegs();
1916                         intersectRegisterState(label->regs);
1917                         label->addr = _nIns;
1918                     }
1919                     verbose_only(
1920                         RefBuf b;
1921                         if (_logc->lcbits & LC_Native) {
1922                             asm_output("[%s]", _thisfrag->lirbuf->printer->formatRef(&b, ins));
1923                     })
1924                     break;
1925                 }
1926
1927                 case LIR_xbarrier:
1928                     break;
1929
1930                 case LIR_xtbl: {
1931                     ins->oprnd1()->setResultLive();
1932 #ifdef NANOJIT_IA32
1933                     NIns* exit = asm_exit(ins); // does intersectRegisterState()
1934                     asm_switch(ins, exit);
1935 #else
1936                     NanoAssertMsg(0, "Not supported for this architecture");
1937 #endif
1938                     break;
1939                 }
1940
1941                 case LIR_xt:
1942                 case LIR_xf:
1943                     ins->oprnd1()->setResultLive();
1944                     asm_xcc(ins);
1945                     break;
1946
1947                 case LIR_x:
1948                     asm_x(ins);
1949                     break;
1950
1951                 case LIR_addxovi:
1952                 case LIR_subxovi:
1953                 case LIR_mulxovi:
1954                     verbose_only( _thisfrag->nStaticExits++; )
1955                     countlir_xcc();
1956                     countlir_alu();
1957                     ins->oprnd1()->setResultLive();
1958                     ins->oprnd2()->setResultLive();
1959                     if (ins->isExtant()) {
1960                         NIns* exit = asm_exit(ins); // does intersectRegisterState()
1961                         asm_branch_ov(op, exit);
1962                         asm_arith(ins);
1963                     }
1964                     break;
1965
1966                 case LIR_addjovi:
1967                 case LIR_subjovi:
1968                 case LIR_muljovi:
1969                     countlir_jcc();
1970                     countlir_alu();
1971                     ins->oprnd1()->setResultLive();
1972                     ins->oprnd2()->setResultLive();
1973                     if (ins->isExtant()) {
1974                         asm_jov(ins, pending_lives);
1975                         asm_arith(ins);
1976                     }
1977                     break;
1978
1979 #ifdef NANOJIT_64BIT
1980                 case LIR_addjovq:
1981                 case LIR_subjovq:
1982                     countlir_jcc();
1983                     countlir_alu();
1984                     ins->oprnd1()->setResultLive();
1985                     ins->oprnd2()->setResultLive();
1986                     if (ins->isExtant()) {
1987                         asm_jov(ins, pending_lives);
1988                         asm_qbinop(ins);
1989                     }
1990                     break;
1991 #endif
1992
1993                 case LIR_eqd:
1994                 case LIR_led:
1995                 case LIR_ltd:
1996                 case LIR_gtd:
1997                 case LIR_ged:
1998                     countlir_fpu();
1999                     ins->oprnd1()->setResultLive();
2000                     ins->oprnd2()->setResultLive();
2001                     if (ins->isExtant()) {
2002                         asm_condd(ins);
2003                     }
2004                     break;
2005
2006                 case LIR_eqi:
2007                 case LIR_lei:
2008                 case LIR_lti:
2009                 case LIR_gti:
2010                 case LIR_gei:
2011                 case LIR_ltui:
2012                 case LIR_leui:
2013                 case LIR_gtui:
2014                 case LIR_geui:
2015                 CASE64(LIR_eqq:)
2016                 CASE64(LIR_leq:)
2017                 CASE64(LIR_ltq:)
2018                 CASE64(LIR_gtq:)
2019                 CASE64(LIR_geq:)
2020                 CASE64(LIR_ltuq:)
2021                 CASE64(LIR_leuq:)
2022                 CASE64(LIR_gtuq:)
2023                 CASE64(LIR_geuq:)
2024                     countlir_alu();
2025                     ins->oprnd1()->setResultLive();
2026                     ins->oprnd2()->setResultLive();
2027                     if (ins->isExtant()) {
2028                         asm_cond(ins);
2029                     }
2030                     break;
2031
2032                 case LIR_callv:
2033                 case LIR_calli:
2034                 CASE64(LIR_callq:)
2035                 case LIR_calld:
2036                     countlir_call();
2037                     for (int i = 0, argc = ins->argc(); i < argc; i++)
2038                         ins->arg(i)->setResultLive();
2039                     // It must be impure or pure-and-extant -- it couldn't be
2040                     // pure-and-not-extant, because there's no way the codegen
2041                     // for a call can be folded into the codegen of another
2042                     // LIR instruction.
2043                     NanoAssert(!ins->callInfo()->_isPure || ins->isExtant());
2044                     asm_call(ins);
2045                     break;
2046
2047                 #ifdef VMCFG_VTUNE
2048                 case LIR_file: {
2049                      // we traverse backwards so we are now hitting the file
2050                      // that is associated with a bunch of LIR_lines we already have seen
2051                     if (vtuneHandle) {
2052                         void * currentFile = (void *) ins->oprnd1()->immI();
2053                         vtuneFile(vtuneHandle, currentFile);
2054                     }
2055                     break;
2056                 }
2057                 case LIR_line: {
2058                      // add a new table entry, we don't yet knwo which file it belongs
2059                      // to so we need to add it to the update table too
2060                      // note the alloc, actual act is delayed; see above
2061                     if (vtuneHandle) {
2062                         uint32_t currentLine = (uint32_t) ins->oprnd1()->immI();
2063                         vtuneLine(vtuneHandle, currentLine, _nIns);
2064                     }
2065                     break;
2066                 }
2067                #endif // VMCFG_VTUNE
2068
2069                 case LIR_comment:
2070                     // Do nothing.
2071                     break;
2072             }
2073
2074 #ifdef NJ_VERBOSE
2075             // We do final LIR printing inside this loop to avoid printing
2076             // dead LIR instructions.  We print the LIns after generating the
2077             // code.  This ensures that the LIns will appear in debug output
2078             // *before* the native code, because Assembler::outputf()
2079             // prints everything in reverse.
2080             //
2081             if (_logc->lcbits & LC_AfterDCE) {
2082                 InsBuf b;
2083                 LInsPrinter* printer = _thisfrag->lirbuf->printer;
2084                 if (ins->isop(LIR_comment))
2085                     outputf("%s", printer->formatIns(&b, ins));
2086                 else
2087                     outputf("    %s", printer->formatIns(&b, ins));
2088             }
2089 #endif
2090
2091             if (error())
2092                 return;
2093
2094             // check that all is well (don't check in exit paths since its more complicated)
2095             debug_only( pageValidate(); )
2096             debug_only( resourceConsistencyCheck();  )
2097         }
2098     }
2099
2100     /*
2101      * Write a jump table for the given SwitchInfo and store the table
2102      * address in the SwitchInfo. Every entry will initially point to
2103      * target.
2104      */
2105     void Assembler::emitJumpTable(SwitchInfo* si, NIns* target)
2106     {
2107         si->table = (NIns **) alloc.alloc(si->count * sizeof(NIns*));
2108         for (uint32_t i = 0; i < si->count; ++i)
2109             si->table[i] = target;
2110     }
2111
2112     void Assembler::assignSavedRegs()
2113     {
2114         // Restore saved regsters.
2115         LirBuffer *b = _thisfrag->lirbuf;
2116         for (int i=0, n = NumSavedRegs; i < n; i++) {
2117             LIns *p = b->savedRegs[i];
2118             if (p)
2119                 findSpecificRegForUnallocated(p, savedRegs[p->paramArg()]);
2120         }
2121     }
2122
2123     void Assembler::reserveSavedRegs()
2124     {
2125         LirBuffer *b = _thisfrag->lirbuf;
2126         for (int i = 0, n = NumSavedRegs; i < n; i++) {
2127             LIns *ins = b->savedRegs[i];
2128             if (ins)
2129                 findMemFor(ins);
2130         }
2131     }
2132
2133     void Assembler::assignParamRegs()
2134     {
2135         LIns* state = _thisfrag->lirbuf->state;
2136         if (state)
2137             findSpecificRegForUnallocated(state, argRegs[state->paramArg()]);
2138         LIns* param1 = _thisfrag->lirbuf->param1;
2139         if (param1)
2140             findSpecificRegForUnallocated(param1, argRegs[param1->paramArg()]);
2141     }
2142
2143     void Assembler::handleLoopCarriedExprs(InsList& pending_lives)
2144     {
2145         // ensure that exprs spanning the loop are marked live at the end of the loop
2146         reserveSavedRegs();
2147         for (Seq<LIns*> *p = pending_lives.get(); p != NULL; p = p->tail) {
2148             LIns *ins = p->head;
2149             NanoAssert(isLiveOpcode(ins->opcode()));
2150             LIns *op1 = ins->oprnd1();
2151             // Must findMemFor even if we're going to findRegFor; loop-carried
2152             // operands may spill on another edge, and we need them to always
2153             // spill to the same place.
2154 #if NJ_USES_IMMD_POOL
2155             // Exception: if float constants are true constants, we should
2156             // never call findMemFor on those ops.
2157             if (!op1->isImmD())
2158 #endif
2159             {
2160                 findMemFor(op1);
2161             }
2162             if (!op1->isImmAny())
2163                 findRegFor(op1, ins->isop(LIR_lived) ? FpRegs : GpRegs);
2164         }
2165
2166         // clear this list since we have now dealt with those lifetimes.  extending
2167         // their lifetimes again later (earlier in the code) serves no purpose.
2168         pending_lives.clear();
2169     }
2170
2171     void AR::freeEntryAt(uint32_t idx)
2172     {
2173         NanoAssert(idx > 0 && idx <= _highWaterMark);
2174
2175         // NB: this loop relies on using entry[0] being NULL,
2176         // so that we are guaranteed to terminate
2177         // without access negative entries.
2178         LIns* i = _entries[idx];
2179         NanoAssert(i != NULL);
2180         do {
2181             _entries[idx] = NULL;
2182             idx--;
2183         } while (_entries[idx] == i);
2184     }
2185
2186 #ifdef NJ_VERBOSE
2187     void Assembler::printRegState()
2188     {
2189         char* s = &outline[0];
2190         VMPI_memset(s, ' ', 26);  s[26] = '\0';
2191         s += VMPI_strlen(s);
2192         VMPI_sprintf(s, "RR");
2193         s += VMPI_strlen(s);
2194
2195         RegisterMask active = _allocator.activeMask();
2196         for (Register r = lsReg(active); active != 0; r = nextLsReg(active, r)) {
2197             LIns *ins = _allocator.getActive(r);
2198             NanoAssertMsg(!_allocator.isFree(r),
2199                           "Coding error; register is both free and active! " );
2200             RefBuf b;
2201             const char* n = _thisfrag->lirbuf->printer->formatRef(&b, ins);
2202
2203             if (ins->isop(LIR_paramp) && ins->paramKind()==1 &&
2204                 r == Assembler::savedRegs[ins->paramArg()])
2205             {
2206                 // dont print callee-saved regs that arent used
2207                 continue;
2208             }
2209
2210             VMPI_sprintf(s, " %s(%s)", gpn(r), n);
2211             s += VMPI_strlen(s);
2212         }
2213         output();
2214     }
2215
2216     void Assembler::printActivationState()
2217     {
2218         char* s = &outline[0];
2219         VMPI_memset(s, ' ', 26);  s[26] = '\0';
2220         s += VMPI_strlen(s);
2221         VMPI_sprintf(s, "AR");
2222         s += VMPI_strlen(s);
2223
2224         LIns* ins = 0;
2225         uint32_t nStackSlots = 0;
2226         int32_t arIndex = 0;
2227         for (AR::Iter iter(_activation); iter.next(ins, nStackSlots, arIndex); )
2228         {
2229             RefBuf b;
2230             const char* n = _thisfrag->lirbuf->printer->formatRef(&b, ins);
2231             if (nStackSlots > 1) {
2232                 VMPI_sprintf(s," %d-%d(%s)", 4*arIndex, 4*(arIndex+nStackSlots-1), n);
2233             }
2234             else {
2235                 VMPI_sprintf(s," %d(%s)", 4*arIndex, n);
2236             }
2237             s += VMPI_strlen(s);
2238         }
2239         output();
2240     }
2241 #endif
2242
2243     inline bool AR::isEmptyRange(uint32_t start, uint32_t nStackSlots) const
2244     {
2245         for (uint32_t i=0; i < nStackSlots; i++)
2246         {
2247             if (_entries[start-i] != NULL)
2248                 return false;
2249         }
2250         return true;
2251     }
2252
2253     uint32_t AR::reserveEntry(LIns* ins)
2254     {
2255         uint32_t const nStackSlots = nStackSlotsFor(ins);
2256
2257         if (nStackSlots == 1)
2258         {
2259             for (uint32_t i = 1; i <= _highWaterMark; i++)
2260             {
2261                 if (_entries[i] == NULL)
2262                 {
2263                     _entries[i] = ins;
2264                     return i;
2265                 }
2266             }
2267             if (_highWaterMark < NJ_MAX_STACK_ENTRY - 1)
2268             {
2269                 NanoAssert(_entries[_highWaterMark+1] == BAD_ENTRY);
2270                 _highWaterMark++;
2271                 _entries[_highWaterMark] = ins;
2272                 return _highWaterMark;
2273              }
2274         }
2275         else
2276         {
2277             // alloc larger block on 8byte boundary.
2278             uint32_t const start = nStackSlots + (nStackSlots & 1);
2279             for (uint32_t i = start; i <= _highWaterMark; i += 2)
2280             {
2281                 if (isEmptyRange(i, nStackSlots))
2282                 {
2283                     // place the entry in the table and mark the instruction with it
2284                     for (uint32_t j=0; j < nStackSlots; j++)
2285                     {
2286                         NanoAssert(i-j <= _highWaterMark);
2287                         NanoAssert(_entries[i-j] == NULL);
2288                         _entries[i-j] = ins;
2289                     }
2290                     return i;
2291                 }
2292             }
2293
2294             // Be sure to account for any 8-byte-round-up when calculating spaceNeeded.
2295             uint32_t const spaceLeft = NJ_MAX_STACK_ENTRY - _highWaterMark - 1;
2296             uint32_t const spaceNeeded = nStackSlots + (_highWaterMark & 1);
2297             if (spaceLeft >= spaceNeeded)
2298             {
2299                 if (_highWaterMark & 1)
2300                 {
2301                     NanoAssert(_entries[_highWaterMark+1] == BAD_ENTRY);
2302                     _entries[_highWaterMark+1] = NULL;
2303                 }
2304                 _highWaterMark += spaceNeeded;
2305                 for (uint32_t j = 0; j < nStackSlots; j++)
2306                 {
2307                     NanoAssert(_highWaterMark-j < NJ_MAX_STACK_ENTRY);
2308                     NanoAssert(_entries[_highWaterMark-j] == BAD_ENTRY);
2309                     _entries[_highWaterMark-j] = ins;
2310                 }
2311                 return _highWaterMark;
2312             }
2313         }
2314         // no space. oh well.
2315         return 0;
2316     }
2317
2318     #ifdef _DEBUG
2319     void AR::checkForResourceLeaks() const
2320     {
2321         for (uint32_t i = 1; i <= _highWaterMark; i++) {
2322             NanoAssertMsgf(_entries[i] == NULL, "frame entry %d wasn't freed\n",4*i);
2323         }
2324     }
2325     #endif
2326
2327     uint32_t Assembler::arReserve(LIns* ins)
2328     {
2329         uint32_t i = _activation.reserveEntry(ins);
2330         if (!i)
2331             setError(StackFull);
2332         return i;
2333     }
2334
2335     void Assembler::arFree(LIns* ins)
2336     {
2337         NanoAssert(ins->isInAr());
2338         uint32_t arIndex = ins->getArIndex();
2339         NanoAssert(arIndex);
2340         NanoAssert(_activation.isValidEntry(arIndex, ins));
2341         _activation.freeEntryAt(arIndex);        // free any stack stack space associated with entry
2342     }
2343
2344     /**
2345      * Move regs around so the SavedRegs contains the highest priority regs.
2346      */
2347     void Assembler::evictScratchRegsExcept(RegisterMask ignore)
2348     {
2349         // Find the top GpRegs that are candidates to put in SavedRegs.
2350
2351         // 'tosave' is a binary heap stored in an array.  The root is tosave[0],
2352         // left child is at i+1, right child is at i+2.
2353
2354         Register tosave[LastRegNum - FirstRegNum + 1];
2355         int len=0;
2356         RegAlloc *regs = &_allocator;
2357         RegisterMask evict_set = regs->activeMask() & GpRegs & ~ignore;
2358         for (Register r = lsReg(evict_set); evict_set; r = nextLsReg(evict_set, r)) {
2359             LIns *ins = regs->getActive(r);
2360             if (canRemat(ins)) {
2361                 NanoAssert(ins->getReg() == r);
2362                 evict(ins);
2363             }
2364             else {
2365                 int32_t pri = regs->getPriority(r);
2366                 // add to heap by adding to end and bubbling up
2367                 int j = len++;
2368                 while (j > 0 && pri > regs->getPriority(tosave[j/2])) {
2369                     tosave[j] = tosave[j/2];
2370                     j /= 2;
2371                 }
2372                 NanoAssert(size_t(j) < sizeof(tosave)/sizeof(tosave[0]));
2373                 tosave[j] = r;
2374             }
2375         }
2376
2377         // Now primap has the live exprs in priority order.
2378         // Allocate each of the top priority exprs to a SavedReg.
2379
2380         RegisterMask allow = SavedRegs;
2381         while (allow && len > 0) {
2382             // get the highest priority var
2383             Register hi = tosave[0];
2384             if (!(rmask(hi) & SavedRegs)) {
2385                 LIns *ins = regs->getActive(hi);
2386                 Register r = findRegFor(ins, allow);
2387                 allow &= ~rmask(r);
2388             }
2389             else {
2390                 // hi is already in a saved reg, leave it alone.
2391                 allow &= ~rmask(hi);
2392             }
2393
2394             // remove from heap by replacing root with end element and bubbling down.
2395             if (allow && --len > 0) {
2396                 Register last = tosave[len];
2397                 int j = 0;
2398                 while (j+1 < len) {
2399                     int child = j+1;
2400                     if (j+2 < len && regs->getPriority(tosave[j+2]) > regs->getPriority(tosave[j+1]))
2401                         child++;
2402                     if (regs->getPriority(last) > regs->getPriority(tosave[child]))
2403                         break;
2404                     tosave[j] = tosave[child];
2405                     j = child;
2406                 }
2407                 tosave[j] = last;
2408             }
2409         }
2410
2411         // now evict everything else.
2412         evictSomeActiveRegs(~(SavedRegs | ignore));
2413     }
2414
2415     // Generate code to restore any registers in 'regs' that are currently active,
2416     void Assembler::evictSomeActiveRegs(RegisterMask regs)
2417     {
2418         RegisterMask evict_set = regs & _allocator.activeMask();
2419         for (Register r = lsReg(evict_set); evict_set; r = nextLsReg(evict_set, r))
2420             evict(_allocator.getActive(r));
2421     }
2422
2423     /**
2424      * Merge the current regstate with a previously stored version.
2425      *
2426      * Situation                            Change to _allocator
2427      * ---------                            --------------------
2428      * !current & !saved
2429      * !current &  saved                    add saved
2430      *  current & !saved                    evict current (unionRegisterState does nothing)
2431      *  current &  saved & current==saved
2432      *  current &  saved & current!=saved   evict current, add saved
2433      */
2434     void Assembler::intersectRegisterState(RegAlloc& saved)
2435     {
2436         Register regsTodo[LastRegNum + 1];
2437         LIns* insTodo[LastRegNum + 1];
2438         int nTodo = 0;
2439
2440         // Do evictions and pops first.
2441         verbose_only(bool shouldMention=false; )
2442         // The obvious thing to do here is to iterate from FirstRegNum to
2443         // LastRegNum.  However, on ARM that causes lower-numbered integer
2444         // registers to be be saved at higher addresses, which inhibits the
2445         // formation of load/store multiple instructions.  Hence iterate the
2446         // loop the other way.
2447         RegisterMask reg_set = _allocator.activeMask() | saved.activeMask();
2448         for (Register r = msReg(reg_set); reg_set; r = nextMsReg(reg_set, r))
2449         {
2450             LIns* curins = _allocator.getActive(r);
2451             LIns* savedins = saved.getActive(r);
2452             if (curins != savedins)
2453             {
2454                 if (savedins) {
2455                     regsTodo[nTodo] = r;
2456                     insTodo[nTodo] = savedins;
2457                     nTodo++;
2458                 }
2459                 if (curins) {
2460                     //_nvprof("intersect-evict",1);
2461                     verbose_only( shouldMention=true; )
2462                     NanoAssert(curins->getReg() == r);
2463                     evict(curins);
2464                 }
2465
2466                 #ifdef NANOJIT_IA32
2467                 if (savedins && r == FST0) {
2468                     verbose_only( shouldMention=true; )
2469                     FSTP(FST0);
2470                 }
2471                 #endif
2472             }
2473         }
2474         // Now reassign mainline registers.
2475         for (int i = 0; i < nTodo; i++) {
2476             findSpecificRegFor(insTodo[i], regsTodo[i]);
2477         }
2478         verbose_only(
2479             if (shouldMention)
2480                 verbose_outputf("## merging registers (intersect) with existing edge");
2481         )
2482     }
2483
2484     /**
2485      * Merge the current state of the registers with a previously stored version.
2486      *
2487      * Situation                            Change to _allocator
2488      * ---------                            --------------------
2489      * !current & !saved                    none
2490      * !current &  saved                    add saved
2491      *  current & !saved                    none (intersectRegisterState evicts current)
2492      *  current &  saved & current==saved   none
2493      *  current &  saved & current!=saved   evict current, add saved
2494      */
2495     void Assembler::unionRegisterState(RegAlloc& saved)
2496     {
2497         Register regsTodo[LastRegNum + 1];
2498         LIns* insTodo[LastRegNum + 1];
2499         int nTodo = 0;
2500
2501         // Do evictions and pops first.
2502         verbose_only(bool shouldMention=false; )
2503         RegisterMask reg_set = _allocator.activeMask() | saved.activeMask();
2504         for (Register r = lsReg(reg_set); reg_set; r = nextLsReg(reg_set, r))
2505         {
2506             LIns* curins = _allocator.getActive(r);
2507             LIns* savedins = saved.getActive(r);
2508             if (curins != savedins)
2509             {
2510                 if (savedins) {
2511                     regsTodo[nTodo] = r;
2512                     insTodo[nTodo] = savedins;
2513                     nTodo++;
2514                 }
2515                 if (curins && savedins) {
2516                     //_nvprof("union-evict",1);
2517                     verbose_only( shouldMention=true; )
2518                     NanoAssert(curins->getReg() == r);
2519                     evict(curins);
2520                 }
2521
2522                 #ifdef NANOJIT_IA32
2523                 if (r == FST0) {
2524                     if (savedins) {
2525                         // Discard top of x87 stack.
2526                         FSTP(FST0);
2527                     }
2528                     else if (curins) {
2529                         // Saved state did not have fpu reg allocated,
2530                         // so we must evict here to keep x87 stack balanced.
2531                         evict(curins);
2532                     }
2533                     verbose_only( shouldMention=true; )
2534                 }
2535                 #endif
2536             }
2537         }
2538         // Now reassign mainline registers.
2539         for (int i = 0; i < nTodo; i++) {
2540             findSpecificRegFor(insTodo[i], regsTodo[i]);
2541         }
2542         verbose_only(
2543             if (shouldMention)
2544                 verbose_outputf("## merging registers (union) with existing edge");
2545         )
2546     }
2547
2548     // Scan table for instruction with the lowest priority, meaning it is used
2549     // furthest in the future.
2550     LIns* Assembler::findVictim(RegisterMask allow)
2551     {
2552         NanoAssert(allow);
2553         LIns *ins, *vic = 0;
2554         int allow_pri = 0x7fffffff;
2555         RegisterMask vic_set = allow & _allocator.activeMask();
2556         for (Register r = lsReg(vic_set); vic_set; r = nextLsReg(vic_set, r))
2557         {
2558             ins = _allocator.getActive(r);
2559             int pri = canRemat(ins) ? 0 : _allocator.getPriority(r);
2560             if (!vic || pri < allow_pri) {
2561                 vic = ins;
2562                 allow_pri = pri;
2563             }
2564         }
2565         NanoAssert(vic != 0);
2566         return vic;
2567     }
2568
2569 #ifdef NJ_VERBOSE
2570     char Assembler::outline[8192];
2571     char Assembler::outlineEOL[512];
2572
2573     void Assembler::output()
2574     {
2575         // The +1 is for the terminating NUL char.
2576         VMPI_strncat(outline, outlineEOL, sizeof(outline)-(strlen(outline)+1));
2577
2578         if (_outputCache) {
2579             char* str = new (alloc) char[VMPI_strlen(outline)+1];
2580             VMPI_strcpy(str, outline);
2581             _outputCache->insert(str);
2582         } else {
2583             _logc->printf("%s\n", outline);
2584         }
2585
2586         outline[0] = '\0';
2587         outlineEOL[0] = '\0';
2588     }
2589
2590     void Assembler::outputf(const char* format, ...)
2591     {
2592         va_list args;
2593         va_start(args, format);
2594
2595         outline[0] = '\0';
2596         vsprintf(outline, format, args);
2597         output();
2598     }
2599
2600     void Assembler::setOutputForEOL(const char* format, ...)
2601     {
2602         va_list args;
2603         va_start(args, format);
2604
2605         outlineEOL[0] = '\0';
2606         vsprintf(outlineEOL, format, args);
2607     }
2608 #endif // NJ_VERBOSE
2609
2610     void LabelStateMap::add(LIns *label, NIns *addr, RegAlloc &regs) {
2611         LabelState *st = new (alloc) LabelState(addr, regs);
2612         labels.put(label, st);
2613     }
2614
2615     LabelState* LabelStateMap::get(LIns *label) {
2616         return labels.get(label);
2617     }
2618 }
2619 #endif /* FEATURE_NANOJIT */