ASC-4041: Skip two spidermonkey regression tests due to stack overflow when compiling...
[tamarin-stm.git] / nanojit / Assembler.cpp
blob3bbd2b8602a92d374f80b65c2bf8a6e642441750
1 /* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */
2 /* vi: set ts=4 sw=4 expandtab: (add to ~/.vimrc: set modeline modelines=5) */
3 /* ***** BEGIN LICENSE BLOCK *****
4 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 1.1 (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
9 * http://www.mozilla.org/MPL/
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
14 * License.
16 * The Original Code is [Open Source Virtual Machine].
18 * The Initial Developer of the Original Code is
19 * Adobe System Incorporated.
20 * Portions created by the Initial Developer are Copyright (C) 2004-2007
21 * the Initial Developer. All Rights Reserved.
23 * Contributor(s):
24 * Adobe AS3 Team
26 * Alternatively, the contents of this file may be used under the terms of
27 * either the GNU General Public License Version 2 or later (the "GPL"), or
28 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29 * in which case the provisions of the GPL or the LGPL are applicable instead
30 * of those above. If you wish to allow use of your version of this file only
31 * under the terms of either the GPL or the LGPL, and not to allow others to
32 * use your version of this file under the terms of the MPL, indicate your
33 * decision by deleting the provisions above and replace them with the notice
34 * and other provisions required by the GPL or the LGPL. If you do not delete
35 * the provisions above, a recipient may use your version of this file under
36 * the terms of any one of the MPL, the GPL or the LGPL.
38 * ***** END LICENSE BLOCK ***** */
40 #include "nanojit.h"
42 #ifdef FEATURE_NANOJIT
44 #ifdef VMCFG_VTUNE
45 #include "../core/CodegenLIR.h"
46 #endif
48 #ifdef _MSC_VER
49 // disable some specific warnings which are normally useful, but pervasive in the code-gen macros
50 #pragma warning(disable:4310) // cast truncates constant value
51 #endif
53 #ifdef VMCFG_VTUNE
54 namespace vtune {
55 using namespace nanojit;
56 void vtuneStart(void*, NIns*);
57 void vtuneEnd(void*, NIns*);
58 void vtuneLine(void*, int, NIns*);
59 void vtuneFile(void*, void*);
61 using namespace vtune;
62 #endif // VMCFG_VTUNE
65 namespace nanojit
67 /**
68 * Need the following:
70 * - merging paths ( build a graph? ), possibly use external rep to drive codegen
72 Assembler::Assembler(CodeAlloc& codeAlloc, Allocator& dataAlloc, Allocator& alloc, AvmCore* core, LogControl* logc, const Config& config)
73 : codeList(NULL)
74 , alloc(alloc)
75 , _codeAlloc(codeAlloc)
76 , _dataAlloc(dataAlloc)
77 , _thisfrag(NULL)
78 , _branchStateMap(alloc)
79 , _patches(alloc)
80 , _labels(alloc)
81 #if NJ_USES_IMMD_POOL
82 , _immDPool(alloc)
83 #endif
84 , _epilogue(NULL)
85 , _err(None)
86 #if PEDANTIC
87 , pedanticTop(NULL)
88 #endif
89 #ifdef VMCFG_VTUNE
90 , vtuneHandle(NULL)
91 #endif
92 , _config(config)
94 nInit(core);
95 (void)logc;
96 verbose_only( _logc = logc; )
97 verbose_only( _outputCache = 0; )
98 verbose_only( outline[0] = '\0'; )
99 verbose_only( outlineEOL[0] = '\0'; )
101 reset();
104 // Per-opcode register hint table. Default to no hints for all
105 // instructions. It's not marked const because individual back-ends can
106 // install hint values for opcodes of interest in nInit().
107 RegisterMask Assembler::nHints[LIR_sentinel+1] = {
108 #define OP___(op, number, repKind, retType, isCse) \
110 #include "LIRopcode.tbl"
111 #undef OP___
115 #ifdef _DEBUG
117 /*static*/ LIns* const AR::BAD_ENTRY = (LIns*)0xdeadbeef;
119 void AR::validateQuick()
121 NanoAssert(_highWaterMark < NJ_MAX_STACK_ENTRY);
122 NanoAssert(_entries[0] == NULL);
123 // Only check a few entries around _highWaterMark.
124 uint32_t const RADIUS = 4;
125 uint32_t const lo = (_highWaterMark > 1 + RADIUS ? _highWaterMark - RADIUS : 1);
126 uint32_t const hi = (_highWaterMark + 1 + RADIUS < NJ_MAX_STACK_ENTRY ? _highWaterMark + 1 + RADIUS : NJ_MAX_STACK_ENTRY);
127 for (uint32_t i = lo; i <= _highWaterMark; ++i)
128 NanoAssert(_entries[i] != BAD_ENTRY);
129 for (uint32_t i = _highWaterMark+1; i < hi; ++i)
130 NanoAssert(_entries[i] == BAD_ENTRY);
133 void AR::validateFull()
135 NanoAssert(_highWaterMark < NJ_MAX_STACK_ENTRY);
136 NanoAssert(_entries[0] == NULL);
137 for (uint32_t i = 1; i <= _highWaterMark; ++i)
138 NanoAssert(_entries[i] != BAD_ENTRY);
139 for (uint32_t i = _highWaterMark+1; i < NJ_MAX_STACK_ENTRY; ++i)
140 NanoAssert(_entries[i] == BAD_ENTRY);
143 void AR::validate()
145 static uint32_t validateCounter = 0;
146 if (++validateCounter >= 100)
148 validateFull();
149 validateCounter = 0;
151 else
153 validateQuick();
157 #endif
159 inline void AR::clear()
161 _highWaterMark = 0;
162 NanoAssert(_entries[0] == NULL);
163 #ifdef _DEBUG
164 for (uint32_t i = 1; i < NJ_MAX_STACK_ENTRY; ++i)
165 _entries[i] = BAD_ENTRY;
166 #endif
169 bool AR::Iter::next(LIns*& ins, uint32_t& nStackSlots, int32_t& arIndex)
171 while (_i <= _ar._highWaterMark) {
172 ins = _ar._entries[_i];
173 if (ins) {
174 arIndex = _i;
175 nStackSlots = nStackSlotsFor(ins);
176 _i += nStackSlots;
177 return true;
179 _i++;
181 ins = NULL;
182 nStackSlots = 0;
183 arIndex = 0;
184 return false;
187 void Assembler::arReset()
189 _activation.clear();
190 _branchStateMap.clear();
191 _patches.clear();
192 _labels.clear();
193 #if NJ_USES_IMMD_POOL
194 _immDPool.clear();
195 #endif
198 void Assembler::registerResetAll()
200 nRegisterResetAll(_allocator);
201 _allocator.managed = _allocator.free;
203 // At start, should have some registers free and none active.
204 NanoAssert(0 != _allocator.free);
205 NanoAssert(0 == _allocator.activeMask());
206 #ifdef NANOJIT_IA32
207 debug_only(_fpuStkDepth = 0; )
208 #endif
211 // Legend for register sets: A = allowed, P = preferred, F = free, S = SavedReg.
213 // Finds a register in 'setA___' to store the result of 'ins' (one from
214 // 'set_P__' if possible), evicting one if necessary. Doesn't consider
215 // the prior state of 'ins'.
217 // Nb: 'setA___' comes from the instruction's use, 'set_P__' comes from its def.
218 // Eg. in 'add(call(...), ...)':
219 // - the call's use means setA___==GpRegs;
220 // - the call's def means set_P__==rmask(retRegs[0]).
222 Register Assembler::registerAlloc(LIns* ins, RegisterMask setA___, RegisterMask set_P__)
224 Register r;
225 RegisterMask set__F_ = _allocator.free;
226 RegisterMask setA_F_ = setA___ & set__F_;
228 if (setA_F_) {
229 RegisterMask set___S = SavedRegs;
230 RegisterMask setA_FS = setA_F_ & set___S;
231 RegisterMask setAPF_ = setA_F_ & set_P__;
232 RegisterMask setAPFS = setA_FS & set_P__;
233 RegisterMask set;
235 if (setAPFS) set = setAPFS;
236 else if (setAPF_) set = setAPF_;
237 else if (setA_FS) set = setA_FS;
238 else set = setA_F_;
240 r = nRegisterAllocFromSet(set);
241 _allocator.addActive(r, ins);
242 ins->setReg(r);
243 } else {
244 // Nothing free, steal one.
245 // LSRA says pick the one with the furthest use.
246 LIns* vic = findVictim(setA___);
247 NanoAssert(vic->isInReg());
248 r = vic->getReg();
250 evict(vic);
252 // r ends up staying active, but the LIns defining it changes.
253 _allocator.removeFree(r);
254 _allocator.addActive(r, ins);
255 ins->setReg(r);
258 return r;
261 // Finds a register in 'allow' to store a temporary value (one not
262 // associated with a particular LIns), evicting one if necessary. The
263 // returned register is marked as being free and so can only be safely
264 // used for code generation purposes until the regstate is next inspected
265 // or updated.
266 Register Assembler::registerAllocTmp(RegisterMask allow)
268 LIns dummyIns;
269 Register r = registerAlloc(&dummyIns, allow, /*prefer*/0);
271 // Mark r as free, ready for use as a temporary value.
272 _allocator.removeActive(r);
273 _allocator.addFree(r);
274 return r;
277 void Assembler::codeAlloc(NIns *&start, NIns *&end, NIns *&eip
278 verbose_only(, size_t &nBytes))
280 // save the block we just filled
281 if (start)
282 CodeAlloc::add(codeList, start, end);
284 // CodeAlloc contract: allocations never fail
285 _codeAlloc.alloc(start, end);
286 verbose_only( nBytes += (end - start) * sizeof(NIns); )
287 NanoAssert(uintptr_t(end) - uintptr_t(start) >= (size_t)LARGEST_UNDERRUN_PROT);
288 eip = end;
291 void Assembler::reset()
293 _nIns = 0;
294 _nExitIns = 0;
295 codeStart = codeEnd = 0;
296 exitStart = exitEnd = 0;
297 codeList = 0;
299 nativePageReset();
300 registerResetAll();
301 arReset();
304 #ifdef _DEBUG
305 void Assembler::pageValidate()
307 if (error()) return;
308 // This may be a normal code chunk or an exit code chunk.
309 NanoAssertMsg(codeStart <= _nIns && _nIns <= codeEnd,
310 "Native instruction pointer overstep paging bounds; check overrideProtect for last instruction");
312 #endif
314 #ifdef _DEBUG
316 bool AR::isValidEntry(uint32_t idx, LIns* ins) const
318 return idx > 0 && idx <= _highWaterMark && _entries[idx] == ins;
321 void AR::checkForResourceConsistency(const RegAlloc& regs)
323 validate();
324 for (uint32_t i = 1; i <= _highWaterMark; ++i)
326 LIns* ins = _entries[i];
327 if (!ins)
328 continue;
329 uint32_t arIndex = ins->getArIndex();
330 NanoAssert(arIndex != 0);
331 if (ins->isop(LIR_allocp)) {
332 int const n = i + (ins->size()>>2);
333 for (int j=i+1; j < n; j++) {
334 NanoAssert(_entries[j]==ins);
336 NanoAssert(arIndex == (uint32_t)n-1);
337 i = n-1;
339 else if (ins->isQorD()) {
340 NanoAssert(_entries[i + 1]==ins);
341 i += 1; // skip high word
343 else {
344 NanoAssertMsg(arIndex == i, "Stack record index mismatch");
346 NanoAssertMsg(!ins->isInReg() || regs.isConsistent(ins->getReg(), ins),
347 "Register record mismatch");
351 void Assembler::resourceConsistencyCheck()
353 NanoAssert(!error());
354 #ifdef NANOJIT_IA32
355 // Within the expansion of a single LIR instruction, we may use the x87
356 // stack for unmanaged temporaries. Otherwise, we do not use the x87 stack
357 // as such, but use the top element alone as a single allocatable FP register.
358 // Compensation code must be inserted to keep the stack balanced and avoid
359 // overflow, and the mechanisms for this are rather fragile and IA32-specific.
360 // The predicate below should hold between any pair of instructions within
361 // a basic block, at labels, and just after a conditional branch. Currently,
362 // we enforce this condition between all pairs of instructions, but this is
363 // overly restrictive, and would fail if we did not generate unreachable x87
364 // stack pops following unconditional branches.
365 NanoAssert((_allocator.active[FST0] && _fpuStkDepth == -1) ||
366 (!_allocator.active[FST0] && _fpuStkDepth == 0));
367 #endif
368 _activation.checkForResourceConsistency(_allocator);
369 registerConsistencyCheck();
372 void Assembler::registerConsistencyCheck()
374 RegisterMask managed = _allocator.managed;
375 for (Register r = lsReg(managed); managed; r = nextLsReg(managed, r)) {
376 // A register managed by register allocation must be either
377 // free or active, but not both.
378 if (_allocator.isFree(r)) {
379 NanoAssertMsgf(_allocator.getActive(r)==0,
380 "register %s is free but assigned to ins", gpn(r));
381 } else {
382 // An LIns defining a register must have that register in
383 // its reservation.
384 LIns* ins = _allocator.getActive(r);
385 NanoAssert(ins);
386 NanoAssertMsg(r == ins->getReg(), "Register record mismatch");
390 RegisterMask not_managed = ~_allocator.managed;
391 for (Register r = lsReg(not_managed); not_managed; r = nextLsReg(not_managed, r)) {
392 // A register not managed by register allocation must be
393 // neither free nor active.
394 if (r <= LastReg) {
395 NanoAssert(!_allocator.isFree(r));
396 NanoAssert(!_allocator.getActive(r));
400 #endif /* _DEBUG */
402 void Assembler::findRegFor2(RegisterMask allowa, LIns* ia, Register& ra,
403 RegisterMask allowb, LIns* ib, Register& rb)
405 // There should be some overlap between 'allowa' and 'allowb', else
406 // there's no point calling this function.
407 NanoAssert(allowa & allowb);
409 if (ia == ib) {
410 ra = rb = findRegFor(ia, allowa & allowb); // use intersection(allowa, allowb)
412 } else if (ib->isInRegMask(allowb)) {
413 // 'ib' is already in an allowable reg -- don't let it get evicted
414 // when finding 'ra'.
415 rb = ib->getReg();
416 ra = findRegFor(ia, allowa & ~rmask(rb));
418 } else {
419 ra = findRegFor(ia, allowa);
420 rb = findRegFor(ib, allowb & ~rmask(ra));
424 Register Assembler::findSpecificRegFor(LIns* i, Register w)
426 return findRegFor(i, rmask(w));
429 // Like findRegFor(), but called when the LIns is used as a pointer. It
430 // doesn't have to be called, findRegFor() can still be used, but it can
431 // optimize the LIR_allocp case by indexing off FP, thus saving the use of
432 // a GpReg.
434 Register Assembler::getBaseReg(LIns* base, int &d, RegisterMask allow)
436 #if !PEDANTIC
437 if (base->isop(LIR_allocp)) {
438 // The value of a LIR_allocp is a pointer to its stack memory,
439 // which is always relative to FP. So we can just return FP if we
440 // also adjust 'd' (and can do so in a valid manner). Or, in the
441 // PEDANTIC case, we can just assign a register as normal;
442 // findRegFor() will allocate the stack memory for LIR_allocp if
443 // necessary.
444 d += findMemFor(base);
445 return FP;
447 #else
448 (void) d;
449 #endif
450 return findRegFor(base, allow);
453 // Like findRegFor2(), but used for stores where the base value has the
454 // same type as the stored value, eg. in asm_store32() on 32-bit platforms
455 // and asm_store64() on 64-bit platforms. Similar to getBaseReg(),
456 // findRegFor2() can be called instead, but this function can optimize the
457 // case where the base value is a LIR_allocp.
458 void Assembler::getBaseReg2(RegisterMask allowValue, LIns* value, Register& rv,
459 RegisterMask allowBase, LIns* base, Register& rb, int &d)
461 #if !PEDANTIC
462 if (base->isop(LIR_allocp)) {
463 rb = FP;
464 d += findMemFor(base);
465 rv = findRegFor(value, allowValue);
466 return;
468 #else
469 (void) d;
470 #endif
471 findRegFor2(allowValue, value, rv, allowBase, base, rb);
474 RegisterMask Assembler::hint(LIns* ins)
476 RegisterMask prefer = nHints[ins->opcode()];
477 return (prefer == PREFER_SPECIAL) ? nHint(ins) : prefer;
480 // Finds a register in 'allow' to hold the result of 'ins'. Used when we
481 // encounter a use of 'ins'. The actions depend on the prior regstate of
482 // 'ins':
483 // - If the result of 'ins' is not in any register, we find an allowed
484 // one, evicting one if necessary.
485 // - If the result of 'ins' is already in an allowed register, we use that.
486 // - If the result of 'ins' is already in a not-allowed register, we find an
487 // allowed one and move it.
489 Register Assembler::findRegFor(LIns* ins, RegisterMask allow)
491 if (ins->isop(LIR_allocp)) {
492 // Never allocate a reg for this without stack space too.
493 findMemFor(ins);
496 Register r;
498 if (!ins->isInReg()) {
499 // 'ins' isn't in a register (must be in a spill slot or nowhere).
500 r = registerAlloc(ins, allow, hint(ins));
502 } else if (rmask(r = ins->getReg()) & allow) {
503 // 'ins' is in an allowed register.
504 _allocator.useActive(r);
506 } else {
507 // 'ins' is in a register (r) that's not in 'allow'.
508 #ifdef NANOJIT_IA32
509 if (((rmask(r)&XmmRegs) && !(allow&XmmRegs)) ||
510 ((rmask(r)&x87Regs) && !(allow&x87Regs)))
512 // x87 <-> xmm copy required
513 //_nvprof("fpu-evict",1);
514 evict(ins);
515 r = registerAlloc(ins, allow, hint(ins));
516 } else
517 #elif defined(NANOJIT_PPC) || defined(NANOJIT_MIPS)
518 if (((rmask(r)&GpRegs) && !(allow&GpRegs)) ||
519 ((rmask(r)&FpRegs) && !(allow&FpRegs)))
521 evict(ins);
522 r = registerAlloc(ins, allow, hint(ins));
523 } else
524 #endif
526 // The post-state register holding 'ins' is 's', the pre-state
527 // register holding 'ins' is 'r'. For example, if s=eax and
528 // r=ecx:
530 // pre-state: ecx(ins)
531 // instruction: mov eax, ecx
532 // post-state: eax(ins)
534 Register s = r;
535 _allocator.retire(r);
536 r = registerAlloc(ins, allow, hint(ins));
538 // 'ins' is in 'allow', in register r (different to the old r);
539 // s is the old r.
540 if ((rmask(s) & GpRegs) && (rmask(r) & GpRegs)) {
541 MR(s, r); // move 'ins' from its pre-state reg (r) to its post-state reg (s)
542 } else {
543 asm_nongp_copy(s, r);
548 return r;
551 // Like findSpecificRegFor(), but only for when 'r' is known to be free
552 // and 'ins' is known to not already have a register allocated. Updates
553 // the regstate (maintaining the invariants) but does not generate any
554 // code. The return value is redundant, always being 'r', but it's
555 // sometimes useful to have it there for assignments.
556 Register Assembler::findSpecificRegForUnallocated(LIns* ins, Register r)
558 if (ins->isop(LIR_allocp)) {
559 // never allocate a reg for this w/out stack space too
560 findMemFor(ins);
563 NanoAssert(!ins->isInReg());
564 NanoAssert(_allocator.free & rmask(r));
566 ins->setReg(r);
567 _allocator.removeFree(r);
568 _allocator.addActive(r, ins);
570 return r;
573 #if NJ_USES_IMMD_POOL
574 const uint64_t* Assembler::findImmDFromPool(uint64_t q)
576 uint64_t* p = _immDPool.get(q);
577 if (!p)
579 p = new (_dataAlloc) uint64_t;
580 *p = q;
581 _immDPool.put(q, p);
583 return p;
585 #endif
587 int Assembler::findMemFor(LIns *ins)
589 #if NJ_USES_IMMD_POOL
590 NanoAssert(!ins->isImmD());
591 #endif
592 if (!ins->isInAr()) {
593 uint32_t const arIndex = arReserve(ins);
594 ins->setArIndex(arIndex);
595 NanoAssert(_activation.isValidEntry(ins->getArIndex(), ins) == (arIndex != 0));
597 return arDisp(ins);
600 // XXX: this function is dangerous and should be phased out;
601 // See bug 513615. Calls to it should replaced it with a
602 // prepareResultReg() / generate code / freeResourcesOf() sequence.
603 Register Assembler::deprecated_prepResultReg(LIns *ins, RegisterMask allow)
605 #ifdef NANOJIT_IA32
606 // We used to have to worry about possibly popping the x87 stack here.
607 // But this function is no longer used on i386, and this assertion
608 // ensures that.
609 NanoAssert(0);
610 #endif
611 Register r = findRegFor(ins, allow);
612 deprecated_freeRsrcOf(ins);
613 return r;
616 // Finds a register in 'allow' to hold the result of 'ins'. Also
617 // generates code to spill the result if necessary. Called just prior to
618 // generating the code for 'ins' (because we generate code backwards).
620 // An example where no spill is necessary. Lines marked '*' are those
621 // done by this function.
623 // regstate: R
624 // asm: define res into r
625 // * regstate: R + r(res)
626 // ...
627 // asm: use res in r
629 // An example where a spill is necessary.
631 // regstate: R
632 // asm: define res into r
633 // * regstate: R + r(res)
634 // * asm: spill res from r
635 // regstate: R
636 // ...
637 // asm: restore res into r2
638 // regstate: R + r2(res) + other changes from "..."
639 // asm: use res in r2
641 Register Assembler::prepareResultReg(LIns *ins, RegisterMask allow)
643 // At this point, we know the result of 'ins' is used later in the
644 // code, unless it is a call to an impure function that must be
645 // included for effect even though its result is ignored. It may have
646 // had to be evicted, in which case the restore will have already been
647 // generated, so we now generate the spill. QUERY: Is there any attempt
648 // to elide the spill if we know that all restores can be rematerialized?
649 #ifdef NANOJIT_IA32
650 const bool notInFST0 = (!ins->isInReg() || ins->getReg() != FST0);
651 Register r = findRegFor(ins, allow);
652 // If the result register is FST0, but FST0 is not in the post-regstate,
653 // then we must pop the x87 stack. This may occur because the result is
654 // unused, or because it has been stored to a spill slot or an XMM register.
655 const bool needPop = notInFST0 && (r == FST0);
656 const bool didSpill = asm_maybe_spill(ins, needPop);
657 if (!didSpill && needPop) {
658 // If the instruction is spilled, then the pop will have already
659 // been performed by the store to the stack slot. Otherwise, we
660 // must pop now. This may occur when the result of a LIR_calld
661 // to an impure (side-effecting) function is not used.
662 FSTP(FST0);
664 #else
665 Register r = findRegFor(ins, allow);
666 asm_maybe_spill(ins, false);
667 #endif
668 return r;
671 bool Assembler::asm_maybe_spill(LIns* ins, bool pop)
673 if (ins->isInAr()) {
674 int d = arDisp(ins);
675 Register r = ins->getReg();
676 verbose_only( RefBuf b;
677 if (_logc->lcbits & LC_Native) {
678 setOutputForEOL(" <= spill %s",
679 _thisfrag->lirbuf->printer->formatRef(&b, ins)); } )
680 #ifdef NANOJIT_IA32
681 asm_spill(r, d, pop);
682 #else
683 (void)pop;
684 asm_spill(r, d, ins->isQorD());
685 #endif
686 return true;
688 return false;
691 // XXX: This function is error-prone and should be phased out; see bug 513615.
692 void Assembler::deprecated_freeRsrcOf(LIns *ins)
694 if (ins->isInReg()) {
695 asm_maybe_spill(ins, /*pop*/false);
696 _allocator.retire(ins->getReg()); // free any register associated with entry
697 ins->clearReg();
699 if (ins->isInAr()) {
700 arFree(ins); // free any AR space associated with entry
701 ins->clearArIndex();
705 // Frees all record of registers and spill slots used by 'ins'.
706 void Assembler::freeResourcesOf(LIns *ins)
708 if (ins->isInReg()) {
709 _allocator.retire(ins->getReg()); // free any register associated with entry
710 ins->clearReg();
712 if (ins->isInAr()) {
713 arFree(ins); // free any AR space associated with entry
714 ins->clearArIndex();
718 // Frees 'r' in the RegAlloc regstate, if it's not already free.
719 void Assembler::evictIfActive(Register r)
721 if (LIns* vic = _allocator.getActive(r)) {
722 NanoAssert(vic->getReg() == r);
723 evict(vic);
727 // Frees 'r' (which currently holds the result of 'vic') in the regstate.
728 // An example:
730 // pre-regstate: eax(ld1)
731 // instruction: mov ebx,-4(ebp) <= restore add1 # %ebx is dest
732 // post-regstate: eax(ld1) ebx(add1)
734 // At run-time we are *restoring* 'add1' into %ebx, hence the call to
735 // asm_restore(). But at regalloc-time we are moving backwards through
736 // the code, so in that sense we are *evicting* 'add1' from %ebx.
738 void Assembler::evict(LIns* vic)
740 // Not free, need to steal.
741 Register r = vic->getReg();
743 NanoAssert(!_allocator.isFree(r));
744 NanoAssert(vic == _allocator.getActive(r));
746 verbose_only( RefBuf b;
747 if (_logc->lcbits & LC_Native) {
748 setOutputForEOL(" <= restore %s",
749 _thisfrag->lirbuf->printer->formatRef(&b, vic)); } )
750 asm_restore(vic, r);
752 _allocator.retire(r);
753 vic->clearReg();
755 // At this point 'vic' is unused (if rematerializable), or in a spill
756 // slot (if not).
759 void Assembler::patch(GuardRecord *lr)
761 if (!lr->jmp) // the guard might have been eliminated as redundant
762 return;
763 Fragment *frag = lr->exit->target;
764 NanoAssert(frag->fragEntry != 0);
765 nPatchBranch((NIns*)lr->jmp, frag->fragEntry);
766 CodeAlloc::flushICache(lr->jmp, LARGEST_BRANCH_PATCH);
767 verbose_only(verbose_outputf("patching jump at %p to target %p\n",
768 lr->jmp, frag->fragEntry);)
771 void Assembler::patch(SideExit *exit)
773 GuardRecord *rec = exit->guards;
774 NanoAssert(rec);
775 while (rec) {
776 patch(rec);
777 rec = rec->next;
781 #ifdef NANOJIT_IA32
782 void Assembler::patch(SideExit* exit, SwitchInfo* si)
784 for (GuardRecord* lr = exit->guards; lr; lr = lr->next) {
785 Fragment *frag = lr->exit->target;
786 NanoAssert(frag->fragEntry != 0);
787 si->table[si->index] = frag->fragEntry;
790 #endif
792 NIns* Assembler::asm_exit(LIns* guard)
794 SideExit *exit = guard->record()->exit;
795 NIns* at = 0;
796 if (!_branchStateMap.get(exit))
798 at = asm_leave_trace(guard);
800 else
802 RegAlloc* captured = _branchStateMap.get(exit);
803 intersectRegisterState(*captured);
804 at = exit->target->fragEntry;
805 NanoAssert(at != 0);
806 _branchStateMap.remove(exit);
808 return at;
811 NIns* Assembler::asm_leave_trace(LIns* guard)
813 verbose_only( verbose_outputf("----------------------------------- ## END exit block %p", guard);)
815 // This point is unreachable. So free all the registers. If an
816 // instruction has a stack entry we will leave it alone, otherwise we
817 // free it entirely. intersectRegisterState() will restore.
818 RegAlloc capture = _allocator;
819 releaseRegisters();
821 swapCodeChunks();
822 _inExit = true;
824 #ifdef NANOJIT_IA32
825 debug_only( _sv_fpuStkDepth = _fpuStkDepth; _fpuStkDepth = 0; )
826 #endif
828 nFragExit(guard);
830 // Restore the callee-saved register and parameters.
831 assignSavedRegs();
832 assignParamRegs();
834 intersectRegisterState(capture);
836 // this can be useful for breaking whenever an exit is taken
837 //INT3();
838 //NOP();
840 // we are done producing the exit logic for the guard so demark where our exit block code begins
841 NIns* jmpTarget = _nIns; // target in exit path for our mainline conditional jump
843 // swap back pointers, effectively storing the last location used in the exit path
844 swapCodeChunks();
845 _inExit = false;
847 //verbose_only( verbose_outputf(" LIR_xt/xf swapCodeChunks, _nIns is now %08X(%08X), _nExitIns is now %08X(%08X)",_nIns, *_nIns,_nExitIns,*_nExitIns) );
848 verbose_only( verbose_outputf("%p:", jmpTarget);)
849 verbose_only( verbose_outputf("----------------------------------- ## BEGIN exit block (LIR_xt|LIR_xf)") );
851 #ifdef NANOJIT_IA32
852 NanoAssertMsgf(_fpuStkDepth == _sv_fpuStkDepth, "LIR_xtf, _fpuStkDepth=%d, expect %d",_fpuStkDepth, _sv_fpuStkDepth);
853 debug_only( _fpuStkDepth = _sv_fpuStkDepth; _sv_fpuStkDepth = 9999; )
854 #endif
856 return jmpTarget;
859 void Assembler::compile(Fragment* frag, Allocator& alloc, bool optimize verbose_only(, LInsPrinter* printer))
861 verbose_only(
862 bool anyVerb = (_logc->lcbits & 0xFFFF & ~LC_FragProfile) > 0;
863 bool liveVerb = (_logc->lcbits & 0xFFFF & LC_Liveness) > 0;
866 /* BEGIN decorative preamble */
867 verbose_only(
868 if (anyVerb) {
869 _logc->printf("========================================"
870 "========================================\n");
871 _logc->printf("=== BEGIN LIR::compile(%p, %p)\n",
872 (void*)this, (void*)frag);
873 _logc->printf("===\n");
875 /* END decorative preamble */
877 verbose_only( if (liveVerb) {
878 _logc->printf("\n");
879 _logc->printf("=== Results of liveness analysis:\n");
880 _logc->printf("===\n");
881 LirReader br(frag->lastIns);
882 LirFilter* lir = &br;
883 if (optimize) {
884 StackFilter* sf = new (alloc) StackFilter(lir, alloc, frag->lirbuf->sp);
885 lir = sf;
887 live(lir, alloc, frag, _logc);
890 /* Set up the generic text output cache for the assembler */
891 verbose_only( StringList asmOutput(alloc); )
892 verbose_only( _outputCache = &asmOutput; )
894 beginAssembly(frag);
895 if (error())
896 return;
898 //_logc->printf("recompile trigger %X kind %d\n", (int)frag, frag->kind);
900 verbose_only( if (anyVerb) {
901 _logc->printf("=== Translating LIR fragments into assembly:\n");
904 // now the the main trunk
905 verbose_only( RefBuf b; )
906 verbose_only( if (anyVerb) {
907 _logc->printf("=== -- Compile trunk %s: begin\n", printer->formatAddr(&b, frag));
910 // Used for debug printing, if needed
911 debug_only(ValidateReader *validate = NULL;)
912 verbose_only(
913 ReverseLister *pp_init = NULL;
914 ReverseLister *pp_after_sf = NULL;
917 // The LIR passes through these filters as listed in this
918 // function, viz, top to bottom.
920 // set up backwards pipeline: assembler <- StackFilter <- LirReader
921 LirFilter* lir = new (alloc) LirReader(frag->lastIns);
923 #ifdef DEBUG
924 // VALIDATION
925 validate = new (alloc) ValidateReader(lir);
926 lir = validate;
927 #endif
929 // INITIAL PRINTING
930 verbose_only( if (_logc->lcbits & LC_ReadLIR) {
931 pp_init = new (alloc) ReverseLister(lir, alloc, frag->lirbuf->printer, _logc,
932 "Initial LIR");
933 lir = pp_init;
936 // STACKFILTER
937 if (optimize) {
938 StackFilter* stackfilter = new (alloc) StackFilter(lir, alloc, frag->lirbuf->sp);
939 lir = stackfilter;
942 verbose_only( if (_logc->lcbits & LC_AfterSF) {
943 pp_after_sf = new (alloc) ReverseLister(lir, alloc, frag->lirbuf->printer, _logc,
944 "After StackFilter");
945 lir = pp_after_sf;
948 assemble(frag, lir);
950 // If we were accumulating debug info in the various ReverseListers,
951 // call finish() to emit whatever contents they have accumulated.
952 verbose_only(
953 if (pp_init) pp_init->finish();
954 if (pp_after_sf) pp_after_sf->finish();
957 verbose_only( if (anyVerb) {
958 _logc->printf("=== -- Compile trunk %s: end\n", printer->formatAddr(&b, frag));
961 endAssembly(frag);
963 // Reverse output so that assembly is displayed low-to-high.
964 // Up to this point, _outputCache has been non-NULL, and so has been
965 // accumulating output. Now we set it to NULL, traverse the entire
966 // list of stored strings, and hand them a second time to output.
967 // Since _outputCache is now NULL, outputf just hands these strings
968 // directly onwards to _logc->printf.
969 verbose_only( if (anyVerb) {
970 _logc->printf("\n");
971 _logc->printf("=== Aggregated assembly output: BEGIN\n");
972 _logc->printf("===\n");
973 _outputCache = 0;
974 for (Seq<char*>* p = asmOutput.get(); p != NULL; p = p->tail) {
975 char *str = p->head;
976 outputf(" %s", str);
978 _logc->printf("===\n");
979 _logc->printf("=== Aggregated assembly output: END\n");
982 if (error())
983 frag->fragEntry = 0;
985 verbose_only( frag->nCodeBytes += codeBytes; )
986 verbose_only( frag->nExitBytes += exitBytes; )
988 /* BEGIN decorative postamble */
989 verbose_only( if (anyVerb) {
990 _logc->printf("\n");
991 _logc->printf("===\n");
992 _logc->printf("=== END LIR::compile(%p, %p)\n",
993 (void*)this, (void*)frag);
994 _logc->printf("========================================"
995 "========================================\n");
996 _logc->printf("\n");
998 /* END decorative postamble */
1001 void Assembler::beginAssembly(Fragment *frag)
1003 verbose_only( codeBytes = 0; )
1004 verbose_only( exitBytes = 0; )
1006 reset();
1008 NanoAssert(codeList == 0);
1009 NanoAssert(codeStart == 0);
1010 NanoAssert(codeEnd == 0);
1011 NanoAssert(exitStart == 0);
1012 NanoAssert(exitEnd == 0);
1013 NanoAssert(_nIns == 0);
1014 NanoAssert(_nExitIns == 0);
1016 _thisfrag = frag;
1017 _inExit = false;
1019 setError(None);
1021 // native code gen buffer setup
1022 nativePageSetup();
1024 // make sure we got memory at least one page
1025 if (error()) return;
1027 _epilogue = NULL;
1029 nBeginAssembly();
1032 void Assembler::assemble(Fragment* frag, LirFilter* reader)
1034 if (error()) return;
1035 _thisfrag = frag;
1037 // check the fragment is starting out with a sane profiling state
1038 verbose_only( NanoAssert(frag->nStaticExits == 0); )
1039 verbose_only( NanoAssert(frag->nCodeBytes == 0); )
1040 verbose_only( NanoAssert(frag->nExitBytes == 0); )
1041 verbose_only( NanoAssert(frag->profCount == 0); )
1042 verbose_only( if (_logc->lcbits & LC_FragProfile)
1043 NanoAssert(frag->profFragID > 0);
1044 else
1045 NanoAssert(frag->profFragID == 0); )
1047 _inExit = false;
1049 gen(reader);
1051 if (!error()) {
1052 // patch all branches
1053 NInsMap::Iter iter(_patches);
1054 while (iter.next()) {
1055 NIns* where = iter.key();
1056 LIns* target = iter.value();
1057 if (target->isop(LIR_jtbl)) {
1058 // Need to patch up a whole jump table, 'where' is the table.
1059 LIns *jtbl = target;
1060 NIns** native_table = (NIns**) (void *) where;
1061 for (uint32_t i = 0, n = jtbl->getTableSize(); i < n; i++) {
1062 LabelState* lstate = _labels.get(jtbl->getTarget(i));
1063 NIns* ntarget = lstate->addr;
1064 if (ntarget) {
1065 native_table[i] = ntarget;
1066 } else {
1067 setError(UnknownBranch);
1068 break;
1071 } else {
1072 // target is a label for a single-target branch
1073 LabelState *lstate = _labels.get(target);
1074 NIns* ntarget = lstate->addr;
1075 if (ntarget) {
1076 nPatchBranch(where, ntarget);
1077 } else {
1078 setError(UnknownBranch);
1079 break;
1086 void Assembler::endAssembly(Fragment* frag)
1088 // don't try to patch code if we are in an error state since we might have partially
1089 // overwritten the code cache already
1090 if (error()) {
1091 // something went wrong, release all allocated code memory
1092 _codeAlloc.freeAll(codeList);
1093 if (_nExitIns)
1094 _codeAlloc.free(exitStart, exitEnd);
1095 _codeAlloc.free(codeStart, codeEnd);
1096 codeList = NULL;
1097 return;
1100 NIns* fragEntry = genPrologue();
1101 verbose_only( asm_output("[prologue]"); )
1103 debug_only(_activation.checkForResourceLeaks());
1105 NanoAssert(!_inExit);
1106 // save used parts of current block on fragment's code list, free the rest
1107 #if defined(NANOJIT_ARM) || defined(NANOJIT_MIPS)
1108 // [codeStart, _nSlot) ... gap ... [_nIns, codeEnd)
1109 if (_nExitIns) {
1110 _codeAlloc.addRemainder(codeList, exitStart, exitEnd, _nExitSlot, _nExitIns);
1111 verbose_only( exitBytes -= (_nExitIns - _nExitSlot) * sizeof(NIns); )
1113 _codeAlloc.addRemainder(codeList, codeStart, codeEnd, _nSlot, _nIns);
1114 verbose_only( codeBytes -= (_nIns - _nSlot) * sizeof(NIns); )
1115 #else
1116 // [codeStart ... gap ... [_nIns, codeEnd))
1117 if (_nExitIns) {
1118 _codeAlloc.addRemainder(codeList, exitStart, exitEnd, exitStart, _nExitIns);
1119 verbose_only( exitBytes -= (_nExitIns - exitStart) * sizeof(NIns); )
1121 _codeAlloc.addRemainder(codeList, codeStart, codeEnd, codeStart, _nIns);
1122 verbose_only( codeBytes -= (_nIns - codeStart) * sizeof(NIns); )
1123 #endif
1125 // at this point all our new code is in the d-cache and not the i-cache,
1126 // so flush the i-cache on cpu's that need it.
1127 CodeAlloc::flushICache(codeList);
1129 // save entry point pointers
1130 frag->fragEntry = fragEntry;
1131 frag->setCode(_nIns);
1133 #ifdef VMCFG_VTUNE
1134 if (vtuneHandle)
1136 vtuneEnd(vtuneHandle, codeEnd);
1137 vtuneStart(vtuneHandle, _nIns);
1139 #endif
1141 PERFM_NVPROF("code", CodeAlloc::size(codeList));
1143 #ifdef NANOJIT_IA32
1144 NanoAssertMsgf(_fpuStkDepth == 0,"_fpuStkDepth %d\n",_fpuStkDepth);
1145 #endif
1147 debug_only( pageValidate(); )
1148 NanoAssert(_branchStateMap.isEmpty());
1151 void Assembler::releaseRegisters()
1153 RegisterMask active = _allocator.activeMask();
1154 for (Register r = lsReg(active); active; r = nextLsReg(active, r))
1156 LIns *ins = _allocator.getActive(r);
1157 // Clear reg allocation, preserve stack allocation.
1158 _allocator.retire(r);
1159 NanoAssert(r == ins->getReg());
1160 ins->clearReg();
1164 #ifdef PERFM
1165 #define countlir_live() _nvprof("lir-live",1)
1166 #define countlir_ret() _nvprof("lir-ret",1)
1167 #define countlir_alloc() _nvprof("lir-alloc",1)
1168 #define countlir_var() _nvprof("lir-var",1)
1169 #define countlir_use() _nvprof("lir-use",1)
1170 #define countlir_def() _nvprof("lir-def",1)
1171 #define countlir_imm() _nvprof("lir-imm",1)
1172 #define countlir_param() _nvprof("lir-param",1)
1173 #define countlir_cmov() _nvprof("lir-cmov",1)
1174 #define countlir_ld() _nvprof("lir-ld",1)
1175 #define countlir_ldq() _nvprof("lir-ldq",1)
1176 #define countlir_alu() _nvprof("lir-alu",1)
1177 #define countlir_qjoin() _nvprof("lir-qjoin",1)
1178 #define countlir_qlo() _nvprof("lir-qlo",1)
1179 #define countlir_qhi() _nvprof("lir-qhi",1)
1180 #define countlir_fpu() _nvprof("lir-fpu",1)
1181 #define countlir_st() _nvprof("lir-st",1)
1182 #define countlir_stq() _nvprof("lir-stq",1)
1183 #define countlir_jmp() _nvprof("lir-jmp",1)
1184 #define countlir_jcc() _nvprof("lir-jcc",1)
1185 #define countlir_label() _nvprof("lir-label",1)
1186 #define countlir_xcc() _nvprof("lir-xcc",1)
1187 #define countlir_x() _nvprof("lir-x",1)
1188 #define countlir_call() _nvprof("lir-call",1)
1189 #define countlir_jtbl() _nvprof("lir-jtbl",1)
1190 #else
1191 #define countlir_live()
1192 #define countlir_ret()
1193 #define countlir_alloc()
1194 #define countlir_var()
1195 #define countlir_use()
1196 #define countlir_def()
1197 #define countlir_imm()
1198 #define countlir_param()
1199 #define countlir_cmov()
1200 #define countlir_ld()
1201 #define countlir_ldq()
1202 #define countlir_alu()
1203 #define countlir_qjoin()
1204 #define countlir_qlo()
1205 #define countlir_qhi()
1206 #define countlir_fpu()
1207 #define countlir_st()
1208 #define countlir_stq()
1209 #define countlir_jmp()
1210 #define countlir_jcc()
1211 #define countlir_label()
1212 #define countlir_xcc()
1213 #define countlir_x()
1214 #define countlir_call()
1215 #define countlir_jtbl()
1216 #endif
1218 void Assembler::asm_jmp(LIns* ins, InsList& pending_lives)
1220 NanoAssert((ins->isop(LIR_j) && !ins->oprnd1()) ||
1221 (ins->isop(LIR_jf) && ins->oprnd1()->isImmI(0)) ||
1222 (ins->isop(LIR_jt) && ins->oprnd1()->isImmI(1)));
1224 countlir_jmp();
1225 LIns* to = ins->getTarget();
1226 LabelState *label = _labels.get(to);
1227 // The jump is always taken so whatever register state we
1228 // have from downstream code, is irrelevant to code before
1229 // this jump. So clear it out. We will pick up register
1230 // state from the jump target, if we have seen that label.
1231 releaseRegisters();
1232 #ifdef NANOJIT_IA32
1233 // Unreachable, so assume correct stack depth.
1234 debug_only( _fpuStkDepth = 0; )
1235 #endif
1236 if (label && label->addr) {
1237 // Forward jump - pick up register state from target.
1238 unionRegisterState(label->regs);
1239 #ifdef NANOJIT_IA32
1240 // Set stack depth according to the register state we just loaded,
1241 // negating the effect of any unreachable x87 stack pop that might
1242 // have been emitted by unionRegisterState().
1243 debug_only( _fpuStkDepth = (_allocator.getActive(FST0) ? -1 : 0); )
1244 #endif
1245 JMP(label->addr);
1247 else {
1248 // Backwards jump.
1249 handleLoopCarriedExprs(pending_lives);
1250 if (!label) {
1251 // save empty register state at loop header
1252 _labels.add(to, 0, _allocator);
1254 else {
1255 intersectRegisterState(label->regs);
1256 #ifdef NANOJIT_IA32
1257 debug_only( _fpuStkDepth = (_allocator.getActive(FST0) ? -1 : 0); )
1258 #endif
1260 JMP(0);
1261 _patches.put(_nIns, to);
1265 void Assembler::asm_jcc(LIns* ins, InsList& pending_lives)
1267 bool branchOnFalse = (ins->opcode() == LIR_jf);
1268 LIns* cond = ins->oprnd1();
1269 if (cond->isImmI()) {
1270 if ((!branchOnFalse && !cond->immI()) || (branchOnFalse && cond->immI())) {
1271 // jmp never taken, not needed
1272 } else {
1273 asm_jmp(ins, pending_lives); // jmp always taken
1275 return;
1278 // Changes to the logic below will likely need to be propagated to Assembler::asm_jov().
1280 countlir_jcc();
1281 LIns* to = ins->getTarget();
1282 LabelState *label = _labels.get(to);
1283 if (label && label->addr) {
1284 // Forward jump to known label. Need to merge with label's register state.
1285 unionRegisterState(label->regs);
1286 asm_branch(branchOnFalse, cond, label->addr);
1288 else {
1289 // Back edge.
1290 handleLoopCarriedExprs(pending_lives);
1291 if (!label) {
1292 // Evict all registers, most conservative approach.
1293 evictAllActiveRegs();
1294 _labels.add(to, 0, _allocator);
1296 else {
1297 // Evict all registers, most conservative approach.
1298 intersectRegisterState(label->regs);
1300 NIns *branch = asm_branch(branchOnFalse, cond, 0);
1301 _patches.put(branch,to);
1305 void Assembler::asm_jov(LIns* ins, InsList& pending_lives)
1307 // The caller is responsible for countlir_* profiling, unlike
1308 // asm_jcc above. The reason for this is that asm_jov may not be
1309 // be called if the instruction is dead, and it is our convention
1310 // to count such instructions anyway.
1311 LOpcode op = ins->opcode();
1312 LIns* to = ins->getTarget();
1313 LabelState *label = _labels.get(to);
1314 if (label && label->addr) {
1315 // forward jump to known label. need to merge with label's register state.
1316 unionRegisterState(label->regs);
1317 asm_branch_ov(op, label->addr);
1319 else {
1320 // back edge.
1321 handleLoopCarriedExprs(pending_lives);
1322 if (!label) {
1323 // evict all registers, most conservative approach.
1324 evictAllActiveRegs();
1325 _labels.add(to, 0, _allocator);
1327 else {
1328 // evict all registers, most conservative approach.
1329 intersectRegisterState(label->regs);
1331 NIns *branch = asm_branch_ov(op, 0);
1332 _patches.put(branch,to);
1336 void Assembler::asm_x(LIns* ins)
1338 verbose_only( _thisfrag->nStaticExits++; )
1339 countlir_x();
1340 // Generate the side exit branch on the main trace.
1341 NIns *exit = asm_exit(ins);
1342 JMP(exit);
1345 void Assembler::asm_xcc(LIns* ins)
1347 LIns* cond = ins->oprnd1();
1348 if (cond->isImmI()) {
1349 if ((ins->isop(LIR_xt) && !cond->immI()) || (ins->isop(LIR_xf) && cond->immI())) {
1350 // guard never taken, not needed
1351 } else {
1352 asm_x(ins); // guard always taken
1354 return;
1357 verbose_only( _thisfrag->nStaticExits++; )
1358 countlir_xcc();
1359 // We only support cmp with guard right now, also assume it is 'close'
1360 // and only emit the branch.
1361 NIns* exit = asm_exit(ins); // does intersectRegisterState()
1362 asm_branch(ins->opcode() == LIR_xf, cond, exit);
1365 void Assembler::gen(LirFilter* reader)
1367 NanoAssert(_thisfrag->nStaticExits == 0);
1369 InsList pending_lives(alloc);
1371 NanoAssert(!error());
1373 // What's going on here: we're visiting all the LIR instructions in
1374 // the buffer, working strictly backwards in buffer-order, and
1375 // generating machine instructions for them as we go.
1377 // For each LIns, we first check if it's live. If so we mark its
1378 // operands as also live, and then generate code for it *if
1379 // necessary*. It may not be necessary if the instruction is an
1380 // expression and code has already been generated for all its uses in
1381 // combination with previously handled instructions (ins->isExtant()
1382 // will return false if this is so).
1384 // Note that the backwards code traversal can make register allocation
1385 // confusing. (For example, we restore a value before we spill it!)
1386 // In particular, words like "before" and "after" must be used very
1387 // carefully -- their meaning at regalloc-time is opposite to their
1388 // meaning at run-time. We use the term "pre-regstate" to refer to
1389 // the register allocation state that occurs prior to an instruction's
1390 // execution, and "post-regstate" to refer to the state that occurs
1391 // after an instruction's execution, e.g.:
1393 // pre-regstate: ebx(ins)
1394 // instruction: mov eax, ebx // mov dst, src
1395 // post-regstate: eax(ins)
1397 // At run-time, the instruction updates the pre-regstate into the
1398 // post-regstate (and these states are the real machine's regstates).
1399 // But when allocating registers, because we go backwards, the
1400 // pre-regstate is constructed from the post-regstate (and these
1401 // regstates are those stored in RegAlloc).
1403 // One consequence of generating code backwards is that we tend to
1404 // both spill and restore registers as early (at run-time) as
1405 // possible; this is good for tolerating memory latency. If we
1406 // generated code forwards, we would expect to both spill and restore
1407 // registers as late (at run-time) as possible; this might be better
1408 // for reducing register pressure.
1410 // The trace must end with one of these opcodes. Mark it as live.
1411 NanoAssert(reader->finalIns()->isop(LIR_x) ||
1412 reader->finalIns()->isop(LIR_xtbl) ||
1413 reader->finalIns()->isRet() ||
1414 isLiveOpcode(reader->finalIns()->opcode()));
1416 for (currIns = reader->read(); !currIns->isop(LIR_start); currIns = reader->read())
1418 LIns* ins = currIns; // give it a shorter name for local use
1420 if (!ins->isLive()) {
1421 NanoAssert(!ins->isExtant());
1422 continue;
1425 #ifdef NJ_VERBOSE
1426 // Output the post-regstate (registers and/or activation).
1427 // Because asm output comes in reverse order, doing it now means
1428 // it is printed after the LIR and native code, exactly when the
1429 // post-regstate should be shown.
1430 if ((_logc->lcbits & LC_Native) && (_logc->lcbits & LC_Activation))
1431 printActivationState();
1432 if ((_logc->lcbits & LC_Native) && (_logc->lcbits & LC_RegAlloc))
1433 printRegState();
1434 #endif
1436 LOpcode op = ins->opcode();
1437 switch (op)
1439 default:
1440 NanoAssertMsgf(false, "unsupported LIR instruction: %d\n", op);
1441 break;
1443 case LIR_regfence:
1444 evictAllActiveRegs();
1445 break;
1447 case LIR_livei:
1448 CASE64(LIR_liveq:)
1449 case LIR_lived: {
1450 countlir_live();
1451 LIns* op1 = ins->oprnd1();
1452 op1->setResultLive();
1453 // LIR_allocp's are meant to live until the point of the
1454 // LIR_livep instruction, marking other expressions as
1455 // live ensures that they remain so at loop bottoms.
1456 // LIR_allocp areas require special treatment because they
1457 // are accessed indirectly and the indirect accesses are
1458 // invisible to the assembler, other than via LIR_livep.
1459 // Other expression results are only accessed directly in
1460 // ways that are visible to the assembler, so extending
1461 // those expression's lifetimes past the last loop edge
1462 // isn't necessary.
1463 if (op1->isop(LIR_allocp)) {
1464 findMemFor(op1);
1465 } else {
1466 pending_lives.add(ins);
1468 break;
1471 case LIR_reti:
1472 CASE64(LIR_retq:)
1473 case LIR_retd:
1474 countlir_ret();
1475 ins->oprnd1()->setResultLive();
1476 asm_ret(ins);
1477 break;
1479 // Allocate some stack space. The value of this instruction
1480 // is the address of the stack space.
1481 case LIR_allocp:
1482 countlir_alloc();
1483 if (ins->isExtant()) {
1484 NanoAssert(ins->isInAr());
1485 if (ins->isInReg())
1486 evict(ins);
1487 freeResourcesOf(ins);
1489 break;
1491 case LIR_immi:
1492 countlir_imm();
1493 if (ins->isExtant()) {
1494 asm_immi(ins);
1496 break;
1498 #ifdef NANOJIT_64BIT
1499 case LIR_immq:
1500 countlir_imm();
1501 if (ins->isExtant()) {
1502 asm_immq(ins);
1504 break;
1505 #endif
1506 case LIR_immd:
1507 countlir_imm();
1508 if (ins->isExtant()) {
1509 asm_immd(ins);
1511 break;
1513 case LIR_paramp:
1514 countlir_param();
1515 if (ins->isExtant()) {
1516 asm_param(ins);
1518 break;
1520 #if NJ_SOFTFLOAT_SUPPORTED
1521 case LIR_hcalli: {
1522 LIns* op1 = ins->oprnd1();
1523 op1->setResultLive();
1524 if (ins->isExtant()) {
1525 // Return result of quad-call in register.
1526 deprecated_prepResultReg(ins, rmask(retRegs[1]));
1527 // If hi half was used, we must use the call to ensure it happens.
1528 findSpecificRegFor(op1, retRegs[0]);
1530 break;
1533 case LIR_dlo2i:
1534 countlir_qlo();
1535 ins->oprnd1()->setResultLive();
1536 if (ins->isExtant()) {
1537 asm_qlo(ins);
1539 break;
1541 case LIR_dhi2i:
1542 countlir_qhi();
1543 ins->oprnd1()->setResultLive();
1544 if (ins->isExtant()) {
1545 asm_qhi(ins);
1547 break;
1549 case LIR_ii2d:
1550 countlir_qjoin();
1551 ins->oprnd1()->setResultLive();
1552 ins->oprnd2()->setResultLive();
1553 if (ins->isExtant()) {
1554 asm_qjoin(ins);
1556 break;
1557 #endif
1558 case LIR_cmovi:
1559 CASE64(LIR_cmovq:)
1560 case LIR_cmovd:
1561 countlir_cmov();
1562 ins->oprnd1()->setResultLive();
1563 ins->oprnd2()->setResultLive();
1564 ins->oprnd3()->setResultLive();
1565 if (ins->isExtant()) {
1566 asm_cmov(ins);
1568 break;
1570 case LIR_lduc2ui:
1571 case LIR_ldus2ui:
1572 case LIR_ldc2i:
1573 case LIR_lds2i:
1574 case LIR_ldi:
1575 countlir_ld();
1576 ins->oprnd1()->setResultLive();
1577 if (ins->isExtant()) {
1578 asm_load32(ins);
1580 break;
1582 CASE64(LIR_ldq:)
1583 case LIR_ldd:
1584 case LIR_ldf2d:
1585 countlir_ldq();
1586 ins->oprnd1()->setResultLive();
1587 if (ins->isExtant()) {
1588 asm_load64(ins);
1590 break;
1592 case LIR_negi:
1593 case LIR_noti:
1594 countlir_alu();
1595 ins->oprnd1()->setResultLive();
1596 if (ins->isExtant()) {
1597 asm_neg_not(ins);
1599 break;
1601 #if defined NANOJIT_64BIT
1602 case LIR_addq:
1603 case LIR_subq:
1604 case LIR_andq:
1605 case LIR_lshq:
1606 case LIR_rshuq:
1607 case LIR_rshq:
1608 case LIR_orq:
1609 case LIR_xorq:
1610 countlir_alu();
1611 ins->oprnd1()->setResultLive();
1612 ins->oprnd2()->setResultLive();
1613 if (ins->isExtant()) {
1614 asm_qbinop(ins);
1616 break;
1617 #endif
1619 case LIR_addi:
1620 case LIR_subi:
1621 case LIR_muli:
1622 case LIR_andi:
1623 case LIR_ori:
1624 case LIR_xori:
1625 case LIR_lshi:
1626 case LIR_rshi:
1627 case LIR_rshui:
1628 CASE86(LIR_divi:)
1629 countlir_alu();
1630 ins->oprnd1()->setResultLive();
1631 ins->oprnd2()->setResultLive();
1632 if (ins->isExtant()) {
1633 asm_arith(ins);
1635 break;
1637 #if defined NANOJIT_IA32 || defined NANOJIT_X64
1638 CASE86(LIR_modi:)
1639 countlir_alu();
1640 ins->oprnd1()->setResultLive();
1641 if (ins->isExtant()) {
1642 asm_arith(ins);
1644 break;
1645 #endif
1647 case LIR_negd:
1648 countlir_fpu();
1649 ins->oprnd1()->setResultLive();
1650 if (ins->isExtant()) {
1651 asm_fneg(ins);
1653 break;
1655 case LIR_addd:
1656 case LIR_subd:
1657 case LIR_muld:
1658 case LIR_divd:
1659 countlir_fpu();
1660 ins->oprnd1()->setResultLive();
1661 ins->oprnd2()->setResultLive();
1662 if (ins->isExtant()) {
1663 asm_fop(ins);
1665 break;
1667 case LIR_i2d:
1668 countlir_fpu();
1669 ins->oprnd1()->setResultLive();
1670 if (ins->isExtant()) {
1671 asm_i2d(ins);
1673 break;
1675 case LIR_ui2d:
1676 countlir_fpu();
1677 ins->oprnd1()->setResultLive();
1678 if (ins->isExtant()) {
1679 asm_ui2d(ins);
1681 break;
1683 case LIR_d2i:
1684 countlir_fpu();
1685 ins->oprnd1()->setResultLive();
1686 if (ins->isExtant()) {
1687 asm_d2i(ins);
1689 break;
1691 #ifdef NANOJIT_64BIT
1692 case LIR_i2q:
1693 case LIR_ui2uq:
1694 countlir_alu();
1695 ins->oprnd1()->setResultLive();
1696 if (ins->isExtant()) {
1697 asm_ui2uq(ins);
1699 break;
1701 case LIR_q2i:
1702 countlir_alu();
1703 ins->oprnd1()->setResultLive();
1704 if (ins->isExtant()) {
1705 asm_q2i(ins);
1707 break;
1709 case LIR_dasq:
1710 countlir_alu();
1711 ins->oprnd1()->setResultLive();
1712 if (ins->isExtant()) {
1713 asm_dasq(ins);
1715 break;
1717 case LIR_qasd:
1718 countlir_alu();
1719 ins->oprnd1()->setResultLive();
1720 if (ins->isExtant()) {
1721 asm_qasd(ins);
1723 break;
1724 #endif
1725 case LIR_sti2c:
1726 case LIR_sti2s:
1727 case LIR_sti:
1728 countlir_st();
1729 ins->oprnd1()->setResultLive();
1730 ins->oprnd2()->setResultLive();
1731 asm_store32(op, ins->oprnd1(), ins->disp(), ins->oprnd2());
1732 break;
1734 CASE64(LIR_stq:)
1735 case LIR_std:
1736 case LIR_std2f: {
1737 countlir_stq();
1738 ins->oprnd1()->setResultLive();
1739 ins->oprnd2()->setResultLive();
1740 LIns* value = ins->oprnd1();
1741 LIns* base = ins->oprnd2();
1742 int dr = ins->disp();
1743 #if NJ_SOFTFLOAT_SUPPORTED
1744 if (value->isop(LIR_ii2d) && op == LIR_std)
1746 // This is correct for little-endian only.
1747 asm_store32(LIR_sti, value->oprnd1(), dr, base);
1748 asm_store32(LIR_sti, value->oprnd2(), dr+4, base);
1750 else
1751 #endif
1753 asm_store64(op, value, dr, base);
1755 break;
1758 case LIR_j:
1759 asm_jmp(ins, pending_lives);
1760 break;
1762 case LIR_jt:
1763 case LIR_jf:
1764 ins->oprnd1()->setResultLive();
1765 asm_jcc(ins, pending_lives);
1766 break;
1768 #if NJ_JTBL_SUPPORTED
1769 case LIR_jtbl: {
1770 countlir_jtbl();
1771 ins->oprnd1()->setResultLive();
1772 // Multiway jump can contain both forward and backward jumps.
1773 // Out of range indices aren't allowed or checked.
1774 // Code after this jtbl instruction is unreachable.
1775 releaseRegisters();
1776 NanoAssert(_allocator.activeMask() == 0);
1778 uint32_t count = ins->getTableSize();
1779 bool has_back_edges = false;
1781 // Merge the regstates of labels we have already seen.
1782 for (uint32_t i = count; i-- > 0;) {
1783 LIns* to = ins->getTarget(i);
1784 LabelState *lstate = _labels.get(to);
1785 if (lstate) {
1786 unionRegisterState(lstate->regs);
1787 verbose_only( RefBuf b; )
1788 asm_output(" %u: [&%s]", i, _thisfrag->lirbuf->printer->formatRef(&b, to));
1789 } else {
1790 has_back_edges = true;
1793 asm_output("forward edges");
1795 // In a multi-way jump, the register allocator has no ability to deal
1796 // with two existing edges that have conflicting register assignments, unlike
1797 // a conditional branch where code can be inserted on the fall-through path
1798 // to reconcile registers. So, frontends *must* insert LIR_regfence at labels of
1799 // forward jtbl jumps. Check here to make sure no registers were picked up from
1800 // any forward edges.
1801 NanoAssert(_allocator.activeMask() == 0);
1803 if (has_back_edges) {
1804 handleLoopCarriedExprs(pending_lives);
1805 // save merged (empty) register state at target labels we haven't seen yet
1806 for (uint32_t i = count; i-- > 0;) {
1807 LIns* to = ins->getTarget(i);
1808 LabelState *lstate = _labels.get(to);
1809 if (!lstate) {
1810 _labels.add(to, 0, _allocator);
1811 verbose_only( RefBuf b; )
1812 asm_output(" %u: [&%s]", i, _thisfrag->lirbuf->printer->formatRef(&b, to));
1815 asm_output("backward edges");
1818 // Emit the jump instruction, which allocates 1 register for the jump index.
1819 NIns** native_table = new (_dataAlloc) NIns*[count];
1820 asm_output("[%p]:", (void*)native_table);
1821 _patches.put((NIns*)native_table, ins);
1822 asm_jtbl(ins, native_table);
1823 break;
1825 #endif
1827 case LIR_label: {
1828 countlir_label();
1829 LabelState *label = _labels.get(ins);
1830 // add profiling inc, if necessary.
1831 verbose_only( if (_logc->lcbits & LC_FragProfile) {
1832 if (ins == _thisfrag->loopLabel)
1833 asm_inc_m32(& _thisfrag->profCount);
1835 if (!label) {
1836 // label seen first, normal target of forward jump, save addr & allocator
1837 _labels.add(ins, _nIns, _allocator);
1839 else {
1840 // we're at the top of a loop
1841 NanoAssert(label->addr == 0);
1842 //evictAllActiveRegs();
1843 intersectRegisterState(label->regs);
1844 label->addr = _nIns;
1846 verbose_only(
1847 RefBuf b;
1848 if (_logc->lcbits & LC_Native) {
1849 asm_output("[%s]", _thisfrag->lirbuf->printer->formatRef(&b, ins));
1851 break;
1854 case LIR_xbarrier:
1855 break;
1857 case LIR_xtbl: {
1858 ins->oprnd1()->setResultLive();
1859 #ifdef NANOJIT_IA32
1860 NIns* exit = asm_exit(ins); // does intersectRegisterState()
1861 asm_switch(ins, exit);
1862 #else
1863 NanoAssertMsg(0, "Not supported for this architecture");
1864 #endif
1865 break;
1868 case LIR_xt:
1869 case LIR_xf:
1870 ins->oprnd1()->setResultLive();
1871 asm_xcc(ins);
1872 break;
1874 case LIR_x:
1875 asm_x(ins);
1876 break;
1878 case LIR_addxovi:
1879 case LIR_subxovi:
1880 case LIR_mulxovi:
1881 verbose_only( _thisfrag->nStaticExits++; )
1882 countlir_xcc();
1883 countlir_alu();
1884 ins->oprnd1()->setResultLive();
1885 ins->oprnd2()->setResultLive();
1886 if (ins->isExtant()) {
1887 NIns* exit = asm_exit(ins); // does intersectRegisterState()
1888 asm_branch_ov(op, exit);
1889 asm_arith(ins);
1891 break;
1893 case LIR_addjovi:
1894 case LIR_subjovi:
1895 case LIR_muljovi:
1896 countlir_jcc();
1897 countlir_alu();
1898 ins->oprnd1()->setResultLive();
1899 ins->oprnd2()->setResultLive();
1900 if (ins->isExtant()) {
1901 asm_jov(ins, pending_lives);
1902 asm_arith(ins);
1904 break;
1906 #ifdef NANOJIT_64BIT
1907 case LIR_addjovq:
1908 case LIR_subjovq:
1909 countlir_jcc();
1910 countlir_alu();
1911 ins->oprnd1()->setResultLive();
1912 ins->oprnd2()->setResultLive();
1913 if (ins->isExtant()) {
1914 asm_jov(ins, pending_lives);
1915 asm_qbinop(ins);
1917 break;
1918 #endif
1920 case LIR_eqd:
1921 case LIR_led:
1922 case LIR_ltd:
1923 case LIR_gtd:
1924 case LIR_ged:
1925 countlir_fpu();
1926 ins->oprnd1()->setResultLive();
1927 ins->oprnd2()->setResultLive();
1928 if (ins->isExtant()) {
1929 asm_condd(ins);
1931 break;
1933 case LIR_eqi:
1934 case LIR_lei:
1935 case LIR_lti:
1936 case LIR_gti:
1937 case LIR_gei:
1938 case LIR_ltui:
1939 case LIR_leui:
1940 case LIR_gtui:
1941 case LIR_geui:
1942 CASE64(LIR_eqq:)
1943 CASE64(LIR_leq:)
1944 CASE64(LIR_ltq:)
1945 CASE64(LIR_gtq:)
1946 CASE64(LIR_geq:)
1947 CASE64(LIR_ltuq:)
1948 CASE64(LIR_leuq:)
1949 CASE64(LIR_gtuq:)
1950 CASE64(LIR_geuq:)
1951 countlir_alu();
1952 ins->oprnd1()->setResultLive();
1953 ins->oprnd2()->setResultLive();
1954 if (ins->isExtant()) {
1955 asm_cond(ins);
1957 break;
1959 case LIR_calli:
1960 CASE64(LIR_callq:)
1961 case LIR_calld:
1962 countlir_call();
1963 for (int i = 0, argc = ins->argc(); i < argc; i++)
1964 ins->arg(i)->setResultLive();
1965 // It must be impure or pure-and-extant -- it couldn't be
1966 // pure-and-not-extant, because there's no way the codegen
1967 // for a call can be folded into the codegen of another
1968 // LIR instruction.
1969 NanoAssert(!ins->callInfo()->_isPure || ins->isExtant());
1970 asm_call(ins);
1971 break;
1973 #ifdef VMCFG_VTUNE
1974 case LIR_file: {
1975 // we traverse backwards so we are now hitting the file
1976 // that is associated with a bunch of LIR_lines we already have seen
1977 if (vtuneHandle) {
1978 void * currentFile = (void *) ins->oprnd1()->immI();
1979 vtuneFile(vtuneHandle, currentFile);
1981 break;
1983 case LIR_line: {
1984 // add a new table entry, we don't yet knwo which file it belongs
1985 // to so we need to add it to the update table too
1986 // note the alloc, actual act is delayed; see above
1987 if (vtuneHandle) {
1988 uint32_t currentLine = (uint32_t) ins->oprnd1()->immI();
1989 vtuneLine(vtuneHandle, currentLine, _nIns);
1991 break;
1993 #endif // VMCFG_VTUNE
1997 #ifdef NJ_VERBOSE
1998 // We do final LIR printing inside this loop to avoid printing
1999 // dead LIR instructions. We print the LIns after generating the
2000 // code. This ensures that the LIns will appear in debug output
2001 // *before* the native code, because Assembler::outputf()
2002 // prints everything in reverse.
2004 if (_logc->lcbits & LC_AfterDCE) {
2005 InsBuf b;
2006 LInsPrinter* printer = _thisfrag->lirbuf->printer;
2007 outputf(" %s", printer->formatIns(&b, ins));
2009 #endif
2011 if (error())
2012 return;
2014 // check that all is well (don't check in exit paths since its more complicated)
2015 debug_only( pageValidate(); )
2016 debug_only( resourceConsistencyCheck(); )
2021 * Write a jump table for the given SwitchInfo and store the table
2022 * address in the SwitchInfo. Every entry will initially point to
2023 * target.
2025 void Assembler::emitJumpTable(SwitchInfo* si, NIns* target)
2027 si->table = (NIns **) alloc.alloc(si->count * sizeof(NIns*));
2028 for (uint32_t i = 0; i < si->count; ++i)
2029 si->table[i] = target;
2032 void Assembler::assignSavedRegs()
2034 // Restore saved regsters.
2035 LirBuffer *b = _thisfrag->lirbuf;
2036 for (int i=0, n = NumSavedRegs; i < n; i++) {
2037 LIns *p = b->savedRegs[i];
2038 if (p)
2039 findSpecificRegForUnallocated(p, savedRegs[p->paramArg()]);
2043 void Assembler::reserveSavedRegs()
2045 LirBuffer *b = _thisfrag->lirbuf;
2046 for (int i = 0, n = NumSavedRegs; i < n; i++) {
2047 LIns *ins = b->savedRegs[i];
2048 if (ins)
2049 findMemFor(ins);
2053 void Assembler::assignParamRegs()
2055 LIns* state = _thisfrag->lirbuf->state;
2056 if (state)
2057 findSpecificRegForUnallocated(state, argRegs[state->paramArg()]);
2058 LIns* param1 = _thisfrag->lirbuf->param1;
2059 if (param1)
2060 findSpecificRegForUnallocated(param1, argRegs[param1->paramArg()]);
2063 void Assembler::handleLoopCarriedExprs(InsList& pending_lives)
2065 // ensure that exprs spanning the loop are marked live at the end of the loop
2066 reserveSavedRegs();
2067 for (Seq<LIns*> *p = pending_lives.get(); p != NULL; p = p->tail) {
2068 LIns *ins = p->head;
2069 NanoAssert(isLiveOpcode(ins->opcode()));
2070 LIns *op1 = ins->oprnd1();
2071 // Must findMemFor even if we're going to findRegFor; loop-carried
2072 // operands may spill on another edge, and we need them to always
2073 // spill to the same place.
2074 #if NJ_USES_IMMD_POOL
2075 // Exception: if float constants are true constants, we should
2076 // never call findMemFor on those ops.
2077 if (!op1->isImmD())
2078 #endif
2080 findMemFor(op1);
2082 if (!op1->isImmAny())
2083 findRegFor(op1, ins->isop(LIR_lived) ? FpRegs : GpRegs);
2086 // clear this list since we have now dealt with those lifetimes. extending
2087 // their lifetimes again later (earlier in the code) serves no purpose.
2088 pending_lives.clear();
2091 void AR::freeEntryAt(uint32_t idx)
2093 NanoAssert(idx > 0 && idx <= _highWaterMark);
2095 // NB: this loop relies on using entry[0] being NULL,
2096 // so that we are guaranteed to terminate
2097 // without access negative entries.
2098 LIns* i = _entries[idx];
2099 NanoAssert(i != NULL);
2100 do {
2101 _entries[idx] = NULL;
2102 idx--;
2103 } while (_entries[idx] == i);
2106 #ifdef NJ_VERBOSE
2107 void Assembler::printRegState()
2109 char* s = &outline[0];
2110 VMPI_memset(s, ' ', 26); s[26] = '\0';
2111 s += VMPI_strlen(s);
2112 VMPI_sprintf(s, "RR");
2113 s += VMPI_strlen(s);
2115 RegisterMask active = _allocator.activeMask();
2116 for (Register r = lsReg(active); active != 0; r = nextLsReg(active, r)) {
2117 LIns *ins = _allocator.getActive(r);
2118 NanoAssertMsg(!_allocator.isFree(r),
2119 "Coding error; register is both free and active! " );
2120 RefBuf b;
2121 const char* n = _thisfrag->lirbuf->printer->formatRef(&b, ins);
2123 if (ins->isop(LIR_paramp) && ins->paramKind()==1 &&
2124 r == Assembler::savedRegs[ins->paramArg()])
2126 // dont print callee-saved regs that arent used
2127 continue;
2130 VMPI_sprintf(s, " %s(%s)", gpn(r), n);
2131 s += VMPI_strlen(s);
2133 output();
2136 void Assembler::printActivationState()
2138 char* s = &outline[0];
2139 VMPI_memset(s, ' ', 26); s[26] = '\0';
2140 s += VMPI_strlen(s);
2141 VMPI_sprintf(s, "AR");
2142 s += VMPI_strlen(s);
2144 LIns* ins = 0;
2145 uint32_t nStackSlots = 0;
2146 int32_t arIndex = 0;
2147 for (AR::Iter iter(_activation); iter.next(ins, nStackSlots, arIndex); )
2149 RefBuf b;
2150 const char* n = _thisfrag->lirbuf->printer->formatRef(&b, ins);
2151 if (nStackSlots > 1) {
2152 VMPI_sprintf(s," %d-%d(%s)", 4*arIndex, 4*(arIndex+nStackSlots-1), n);
2154 else {
2155 VMPI_sprintf(s," %d(%s)", 4*arIndex, n);
2157 s += VMPI_strlen(s);
2159 output();
2161 #endif
2163 inline bool AR::isEmptyRange(uint32_t start, uint32_t nStackSlots) const
2165 for (uint32_t i=0; i < nStackSlots; i++)
2167 if (_entries[start-i] != NULL)
2168 return false;
2170 return true;
2173 uint32_t AR::reserveEntry(LIns* ins)
2175 uint32_t const nStackSlots = nStackSlotsFor(ins);
2177 if (nStackSlots == 1)
2179 for (uint32_t i = 1; i <= _highWaterMark; i++)
2181 if (_entries[i] == NULL)
2183 _entries[i] = ins;
2184 return i;
2187 if (_highWaterMark < NJ_MAX_STACK_ENTRY - 1)
2189 NanoAssert(_entries[_highWaterMark+1] == BAD_ENTRY);
2190 _highWaterMark++;
2191 _entries[_highWaterMark] = ins;
2192 return _highWaterMark;
2195 else
2197 // alloc larger block on 8byte boundary.
2198 uint32_t const start = nStackSlots + (nStackSlots & 1);
2199 for (uint32_t i = start; i <= _highWaterMark; i += 2)
2201 if (isEmptyRange(i, nStackSlots))
2203 // place the entry in the table and mark the instruction with it
2204 for (uint32_t j=0; j < nStackSlots; j++)
2206 NanoAssert(i-j <= _highWaterMark);
2207 NanoAssert(_entries[i-j] == NULL);
2208 _entries[i-j] = ins;
2210 return i;
2214 // Be sure to account for any 8-byte-round-up when calculating spaceNeeded.
2215 uint32_t const spaceLeft = NJ_MAX_STACK_ENTRY - _highWaterMark - 1;
2216 uint32_t const spaceNeeded = nStackSlots + (_highWaterMark & 1);
2217 if (spaceLeft >= spaceNeeded)
2219 if (_highWaterMark & 1)
2221 NanoAssert(_entries[_highWaterMark+1] == BAD_ENTRY);
2222 _entries[_highWaterMark+1] = NULL;
2224 _highWaterMark += spaceNeeded;
2225 for (uint32_t j = 0; j < nStackSlots; j++)
2227 NanoAssert(_highWaterMark-j < NJ_MAX_STACK_ENTRY);
2228 NanoAssert(_entries[_highWaterMark-j] == BAD_ENTRY);
2229 _entries[_highWaterMark-j] = ins;
2231 return _highWaterMark;
2234 // no space. oh well.
2235 return 0;
2238 #ifdef _DEBUG
2239 void AR::checkForResourceLeaks() const
2241 for (uint32_t i = 1; i <= _highWaterMark; i++) {
2242 NanoAssertMsgf(_entries[i] == NULL, "frame entry %d wasn't freed\n",4*i);
2245 #endif
2247 uint32_t Assembler::arReserve(LIns* ins)
2249 uint32_t i = _activation.reserveEntry(ins);
2250 if (!i)
2251 setError(StackFull);
2252 return i;
2255 void Assembler::arFree(LIns* ins)
2257 NanoAssert(ins->isInAr());
2258 uint32_t arIndex = ins->getArIndex();
2259 NanoAssert(arIndex);
2260 NanoAssert(_activation.isValidEntry(arIndex, ins));
2261 _activation.freeEntryAt(arIndex); // free any stack stack space associated with entry
2265 * Move regs around so the SavedRegs contains the highest priority regs.
2267 void Assembler::evictScratchRegsExcept(RegisterMask ignore)
2269 // Find the top GpRegs that are candidates to put in SavedRegs.
2271 // 'tosave' is a binary heap stored in an array. The root is tosave[0],
2272 // left child is at i+1, right child is at i+2.
2274 Register tosave[LastReg-FirstReg+1];
2275 int len=0;
2276 RegAlloc *regs = &_allocator;
2277 RegisterMask evict_set = regs->activeMask() & GpRegs & ~ignore;
2278 for (Register r = lsReg(evict_set); evict_set; r = nextLsReg(evict_set, r)) {
2279 LIns *ins = regs->getActive(r);
2280 if (canRemat(ins)) {
2281 NanoAssert(ins->getReg() == r);
2282 evict(ins);
2284 else {
2285 int32_t pri = regs->getPriority(r);
2286 // add to heap by adding to end and bubbling up
2287 int j = len++;
2288 while (j > 0 && pri > regs->getPriority(tosave[j/2])) {
2289 tosave[j] = tosave[j/2];
2290 j /= 2;
2292 NanoAssert(size_t(j) < sizeof(tosave)/sizeof(tosave[0]));
2293 tosave[j] = r;
2297 // Now primap has the live exprs in priority order.
2298 // Allocate each of the top priority exprs to a SavedReg.
2300 RegisterMask allow = SavedRegs;
2301 while (allow && len > 0) {
2302 // get the highest priority var
2303 Register hi = tosave[0];
2304 if (!(rmask(hi) & SavedRegs)) {
2305 LIns *ins = regs->getActive(hi);
2306 Register r = findRegFor(ins, allow);
2307 allow &= ~rmask(r);
2309 else {
2310 // hi is already in a saved reg, leave it alone.
2311 allow &= ~rmask(hi);
2314 // remove from heap by replacing root with end element and bubbling down.
2315 if (allow && --len > 0) {
2316 Register last = tosave[len];
2317 int j = 0;
2318 while (j+1 < len) {
2319 int child = j+1;
2320 if (j+2 < len && regs->getPriority(tosave[j+2]) > regs->getPriority(tosave[j+1]))
2321 child++;
2322 if (regs->getPriority(last) > regs->getPriority(tosave[child]))
2323 break;
2324 tosave[j] = tosave[child];
2325 j = child;
2327 tosave[j] = last;
2331 // now evict everything else.
2332 evictSomeActiveRegs(~(SavedRegs | ignore));
2335 // Generate code to restore any registers in 'regs' that are currently active,
2336 void Assembler::evictSomeActiveRegs(RegisterMask regs)
2338 RegisterMask evict_set = regs & _allocator.activeMask();
2339 for (Register r = lsReg(evict_set); evict_set; r = nextLsReg(evict_set, r))
2340 evict(_allocator.getActive(r));
2344 * Merge the current regstate with a previously stored version.
2346 * Situation Change to _allocator
2347 * --------- --------------------
2348 * !current & !saved
2349 * !current & saved add saved
2350 * current & !saved evict current (unionRegisterState does nothing)
2351 * current & saved & current==saved
2352 * current & saved & current!=saved evict current, add saved
2354 void Assembler::intersectRegisterState(RegAlloc& saved)
2356 Register regsTodo[LastReg + 1];
2357 LIns* insTodo[LastReg + 1];
2358 int nTodo = 0;
2360 // Do evictions and pops first.
2361 verbose_only(bool shouldMention=false; )
2362 // The obvious thing to do here is to iterate from FirstReg to LastReg.
2363 // However, on ARM that causes lower-numbered integer registers
2364 // to be be saved at higher addresses, which inhibits the formation
2365 // of load/store multiple instructions. Hence iterate the loop the
2366 // other way.
2367 RegisterMask reg_set = _allocator.activeMask() | saved.activeMask();
2368 for (Register r = msReg(reg_set); reg_set; r = nextMsReg(reg_set, r))
2370 LIns* curins = _allocator.getActive(r);
2371 LIns* savedins = saved.getActive(r);
2372 if (curins != savedins)
2374 if (savedins) {
2375 regsTodo[nTodo] = r;
2376 insTodo[nTodo] = savedins;
2377 nTodo++;
2379 if (curins) {
2380 //_nvprof("intersect-evict",1);
2381 verbose_only( shouldMention=true; )
2382 NanoAssert(curins->getReg() == r);
2383 evict(curins);
2386 #ifdef NANOJIT_IA32
2387 if (savedins && r == FST0) {
2388 verbose_only( shouldMention=true; )
2389 FSTP(FST0);
2391 #endif
2394 // Now reassign mainline registers.
2395 for (int i = 0; i < nTodo; i++) {
2396 findSpecificRegFor(insTodo[i], regsTodo[i]);
2398 verbose_only(
2399 if (shouldMention)
2400 verbose_outputf("## merging registers (intersect) with existing edge");
2405 * Merge the current state of the registers with a previously stored version.
2407 * Situation Change to _allocator
2408 * --------- --------------------
2409 * !current & !saved none
2410 * !current & saved add saved
2411 * current & !saved none (intersectRegisterState evicts current)
2412 * current & saved & current==saved none
2413 * current & saved & current!=saved evict current, add saved
2415 void Assembler::unionRegisterState(RegAlloc& saved)
2417 Register regsTodo[LastReg + 1];
2418 LIns* insTodo[LastReg + 1];
2419 int nTodo = 0;
2421 // Do evictions and pops first.
2422 verbose_only(bool shouldMention=false; )
2423 RegisterMask reg_set = _allocator.activeMask() | saved.activeMask();
2424 for (Register r = lsReg(reg_set); reg_set; r = nextLsReg(reg_set, r))
2426 LIns* curins = _allocator.getActive(r);
2427 LIns* savedins = saved.getActive(r);
2428 if (curins != savedins)
2430 if (savedins) {
2431 regsTodo[nTodo] = r;
2432 insTodo[nTodo] = savedins;
2433 nTodo++;
2435 if (curins && savedins) {
2436 //_nvprof("union-evict",1);
2437 verbose_only( shouldMention=true; )
2438 NanoAssert(curins->getReg() == r);
2439 evict(curins);
2442 #ifdef NANOJIT_IA32
2443 if (r == FST0) {
2444 if (savedins) {
2445 // Discard top of x87 stack.
2446 FSTP(FST0);
2448 else if (curins) {
2449 // Saved state did not have fpu reg allocated,
2450 // so we must evict here to keep x87 stack balanced.
2451 evict(curins);
2453 verbose_only( shouldMention=true; )
2455 #endif
2458 // Now reassign mainline registers.
2459 for (int i = 0; i < nTodo; i++) {
2460 findSpecificRegFor(insTodo[i], regsTodo[i]);
2462 verbose_only(
2463 if (shouldMention)
2464 verbose_outputf("## merging registers (union) with existing edge");
2468 // Scan table for instruction with the lowest priority, meaning it is used
2469 // furthest in the future.
2470 LIns* Assembler::findVictim(RegisterMask allow)
2472 NanoAssert(allow);
2473 LIns *ins, *vic = 0;
2474 int allow_pri = 0x7fffffff;
2475 RegisterMask vic_set = allow & _allocator.activeMask();
2476 for (Register r = lsReg(vic_set); vic_set; r = nextLsReg(vic_set, r))
2478 ins = _allocator.getActive(r);
2479 int pri = canRemat(ins) ? 0 : _allocator.getPriority(r);
2480 if (!vic || pri < allow_pri) {
2481 vic = ins;
2482 allow_pri = pri;
2485 NanoAssert(vic != 0);
2486 return vic;
2489 #ifdef NJ_VERBOSE
2490 char Assembler::outline[8192];
2491 char Assembler::outlineEOL[512];
2493 void Assembler::output()
2495 // The +1 is for the terminating NUL char.
2496 VMPI_strncat(outline, outlineEOL, sizeof(outline)-(strlen(outline)+1));
2498 if (_outputCache) {
2499 char* str = new (alloc) char[VMPI_strlen(outline)+1];
2500 VMPI_strcpy(str, outline);
2501 _outputCache->insert(str);
2502 } else {
2503 _logc->printf("%s\n", outline);
2506 outline[0] = '\0';
2507 outlineEOL[0] = '\0';
2510 void Assembler::outputf(const char* format, ...)
2512 va_list args;
2513 va_start(args, format);
2515 outline[0] = '\0';
2516 vsprintf(outline, format, args);
2517 output();
2520 void Assembler::setOutputForEOL(const char* format, ...)
2522 va_list args;
2523 va_start(args, format);
2525 outlineEOL[0] = '\0';
2526 vsprintf(outlineEOL, format, args);
2528 #endif // NJ_VERBOSE
2530 void LabelStateMap::add(LIns *label, NIns *addr, RegAlloc &regs) {
2531 LabelState *st = new (alloc) LabelState(addr, regs);
2532 labels.put(label, st);
2535 LabelState* LabelStateMap::get(LIns *label) {
2536 return labels.get(label);
2539 #endif /* FEATURE_NANOJIT */