1 /* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */
2 /* vi: set ts=4 sw=4 expandtab: (add to ~/.vimrc: set modeline modelines=5) */
3 /* ***** BEGIN LICENSE BLOCK *****
4 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 1.1 (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
9 * http://www.mozilla.org/MPL/
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
16 * The Original Code is [Open Source Virtual Machine].
18 * The Initial Developer of the Original Code is
19 * Adobe System Incorporated.
20 * Portions created by the Initial Developer are Copyright (C) 2004-2007
21 * the Initial Developer. All Rights Reserved.
26 * Alternatively, the contents of this file may be used under the terms of
27 * either the GNU General Public License Version 2 or later (the "GPL"), or
28 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29 * in which case the provisions of the GPL or the LGPL are applicable instead
30 * of those above. If you wish to allow use of your version of this file only
31 * under the terms of either the GPL or the LGPL, and not to allow others to
32 * use your version of this file under the terms of the MPL, indicate your
33 * decision by deleting the provisions above and replace them with the notice
34 * and other provisions required by the GPL or the LGPL. If you do not delete
35 * the provisions above, a recipient may use your version of this file under
36 * the terms of any one of the MPL, the GPL or the LGPL.
38 * ***** END LICENSE BLOCK ***** */
42 #ifdef FEATURE_NANOJIT
45 #include "../core/CodegenLIR.h"
49 // disable some specific warnings which are normally useful, but pervasive in the code-gen macros
50 #pragma warning(disable:4310) // cast truncates constant value
55 using namespace nanojit
;
56 void vtuneStart(void*, NIns
*);
57 void vtuneEnd(void*, NIns
*);
58 void vtuneLine(void*, int, NIns
*);
59 void vtuneFile(void*, void*);
61 using namespace vtune
;
70 * - merging paths ( build a graph? ), possibly use external rep to drive codegen
72 Assembler::Assembler(CodeAlloc
& codeAlloc
, Allocator
& dataAlloc
, Allocator
& alloc
, AvmCore
* core
, LogControl
* logc
, const Config
& config
)
74 , _codeAlloc(codeAlloc
)
75 , _dataAlloc(dataAlloc
)
77 , _branchStateMap(alloc
)
97 verbose_only( _logc
= logc
; )
98 verbose_only( _outputCache
= 0; )
99 verbose_only( outline
[0] = '\0'; )
100 verbose_only( outlineEOL
[0] = '\0'; )
105 // Per-opcode register hint table. Default to no hints for all
106 // instructions. It's not marked const because individual back-ends can
107 // install hint values for opcodes of interest in nInit().
108 RegisterMask
Assembler::nHints
[LIR_sentinel
+1] = {
109 #define OP___(op, number, repKind, retType, isCse) \
111 #include "LIRopcode.tbl"
118 /*static*/ LIns
* const AR::BAD_ENTRY
= (LIns
*)0xdeadbeef;
120 void AR::validateQuick()
122 NanoAssert(_highWaterMark
< NJ_MAX_STACK_ENTRY
);
123 NanoAssert(_entries
[0] == NULL
);
124 // Only check a few entries around _highWaterMark.
125 uint32_t const RADIUS
= 4;
126 uint32_t const lo
= (_highWaterMark
> 1 + RADIUS
? _highWaterMark
- RADIUS
: 1);
127 uint32_t const hi
= (_highWaterMark
+ 1 + RADIUS
< NJ_MAX_STACK_ENTRY
? _highWaterMark
+ 1 + RADIUS
: NJ_MAX_STACK_ENTRY
);
128 for (uint32_t i
= lo
; i
<= _highWaterMark
; ++i
)
129 NanoAssert(_entries
[i
] != BAD_ENTRY
);
130 for (uint32_t i
= _highWaterMark
+1; i
< hi
; ++i
)
131 NanoAssert(_entries
[i
] == BAD_ENTRY
);
134 void AR::validateFull()
136 NanoAssert(_highWaterMark
< NJ_MAX_STACK_ENTRY
);
137 NanoAssert(_entries
[0] == NULL
);
138 for (uint32_t i
= 1; i
<= _highWaterMark
; ++i
)
139 NanoAssert(_entries
[i
] != BAD_ENTRY
);
140 for (uint32_t i
= _highWaterMark
+1; i
< NJ_MAX_STACK_ENTRY
; ++i
)
141 NanoAssert(_entries
[i
] == BAD_ENTRY
);
146 static uint32_t validateCounter
= 0;
147 if (++validateCounter
>= 100)
160 inline void AR::clear()
163 NanoAssert(_entries
[0] == NULL
);
165 for (uint32_t i
= 1; i
< NJ_MAX_STACK_ENTRY
; ++i
)
166 _entries
[i
] = BAD_ENTRY
;
170 bool AR::Iter::next(LIns
*& ins
, uint32_t& nStackSlots
, int32_t& arIndex
)
172 while (_i
<= _ar
._highWaterMark
) {
173 ins
= _ar
._entries
[_i
];
176 nStackSlots
= nStackSlotsFor(ins
);
188 void Assembler::arReset()
191 _branchStateMap
.clear();
194 #if NJ_USES_IMMD_POOL
199 void Assembler::registerResetAll()
201 nRegisterResetAll(_allocator
);
202 _allocator
.managed
= _allocator
.free
;
204 // At start, should have some registers free and none active.
205 NanoAssert(0 != _allocator
.free
);
206 NanoAssert(0 == _allocator
.activeMask());
208 debug_only(_fpuStkDepth
= 0; )
212 // Legend for register sets: A = allowed, P = preferred, F = free, S = SavedReg.
214 // Finds a register in 'setA___' to store the result of 'ins' (one from
215 // 'set_P__' if possible), evicting one if necessary. Doesn't consider
216 // the prior state of 'ins'.
218 // Nb: 'setA___' comes from the instruction's use, 'set_P__' comes from its def.
219 // Eg. in 'add(call(...), ...)':
220 // - the call's use means setA___==GpRegs;
221 // - the call's def means set_P__==rmask(retRegs[0]).
223 Register
Assembler::registerAlloc(LIns
* ins
, RegisterMask setA___
, RegisterMask set_P__
)
226 RegisterMask set__F_
= _allocator
.free
;
227 RegisterMask setA_F_
= setA___
& set__F_
;
230 RegisterMask set___S
= SavedRegs
;
231 RegisterMask setA_FS
= setA_F_
& set___S
;
232 RegisterMask setAPF_
= setA_F_
& set_P__
;
233 RegisterMask setAPFS
= setA_FS
& set_P__
;
236 if (setAPFS
) set
= setAPFS
;
237 else if (setAPF_
) set
= setAPF_
;
238 else if (setA_FS
) set
= setA_FS
;
241 r
= nRegisterAllocFromSet(set
);
242 _allocator
.addActive(r
, ins
);
245 // Nothing free, steal one.
246 // LSRA says pick the one with the furthest use.
247 LIns
* vic
= findVictim(setA___
);
248 NanoAssert(vic
->isInReg());
253 // r ends up staying active, but the LIns defining it changes.
254 _allocator
.removeFree(r
);
255 _allocator
.addActive(r
, ins
);
262 // Finds a register in 'allow' to store a temporary value (one not
263 // associated with a particular LIns), evicting one if necessary. The
264 // returned register is marked as being free and so can only be safely
265 // used for code generation purposes until the regstate is next inspected
267 Register
Assembler::registerAllocTmp(RegisterMask allow
)
270 Register r
= registerAlloc(&dummyIns
, allow
, /*prefer*/0);
272 // Mark r as free, ready for use as a temporary value.
273 _allocator
.removeActive(r
);
274 _allocator
.addFree(r
);
278 void Assembler::codeAlloc(NIns
*&start
, NIns
*&end
, NIns
*&eip
279 verbose_only(, size_t &nBytes
))
281 // save the block we just filled
283 CodeAlloc::add(codeList
, start
, end
);
285 // CodeAlloc contract: allocations never fail
286 _codeAlloc
.alloc(start
, end
);
287 verbose_only( nBytes
+= (end
- start
) * sizeof(NIns
); )
288 NanoAssert(uintptr_t(end
) - uintptr_t(start
) >= (size_t)LARGEST_UNDERRUN_PROT
);
292 void Assembler::clearNInsPtrs()
296 codeStart
= codeEnd
= 0;
297 exitStart
= exitEnd
= 0;
301 void Assembler::reset()
310 void Assembler::pageValidate()
313 // This may be a normal code chunk or an exit code chunk.
314 NanoAssertMsg(codeStart
<= _nIns
&& _nIns
<= codeEnd
,
315 "Native instruction pointer overstep paging bounds; check overrideProtect for last instruction");
321 bool AR::isValidEntry(uint32_t idx
, LIns
* ins
) const
323 return idx
> 0 && idx
<= _highWaterMark
&& _entries
[idx
] == ins
;
326 void AR::checkForResourceConsistency(const RegAlloc
& regs
)
329 for (uint32_t i
= 1; i
<= _highWaterMark
; ++i
)
331 LIns
* ins
= _entries
[i
];
334 uint32_t arIndex
= ins
->getArIndex();
335 NanoAssert(arIndex
!= 0);
336 if (ins
->isop(LIR_allocp
)) {
337 int const n
= i
+ (ins
->size()>>2);
338 for (int j
=i
+1; j
< n
; j
++) {
339 NanoAssert(_entries
[j
]==ins
);
341 NanoAssert(arIndex
== (uint32_t)n
-1);
344 else if (ins
->isQorD()) {
345 NanoAssert(_entries
[i
+ 1]==ins
);
346 i
+= 1; // skip high word
349 NanoAssertMsg(arIndex
== i
, "Stack record index mismatch");
351 NanoAssertMsg(!ins
->isInReg() || regs
.isConsistent(ins
->getReg(), ins
),
352 "Register record mismatch");
356 void Assembler::resourceConsistencyCheck()
358 NanoAssert(!error());
360 // Within the expansion of a single LIR instruction, we may use the x87
361 // stack for unmanaged temporaries. Otherwise, we do not use the x87 stack
362 // as such, but use the top element alone as a single allocatable FP register.
363 // Compensation code must be inserted to keep the stack balanced and avoid
364 // overflow, and the mechanisms for this are rather fragile and IA32-specific.
365 // The predicate below should hold between any pair of instructions within
366 // a basic block, at labels, and just after a conditional branch. Currently,
367 // we enforce this condition between all pairs of instructions, but this is
368 // overly restrictive, and would fail if we did not generate unreachable x87
369 // stack pops following unconditional branches.
370 NanoAssert((_allocator
.active
[REGNUM(FST0
)] && _fpuStkDepth
== -1) ||
371 (!_allocator
.active
[REGNUM(FST0
)] && _fpuStkDepth
== 0));
373 _activation
.checkForResourceConsistency(_allocator
);
374 registerConsistencyCheck();
377 void Assembler::registerConsistencyCheck()
379 RegisterMask managed
= _allocator
.managed
;
380 for (Register r
= lsReg(managed
); managed
; r
= nextLsReg(managed
, r
)) {
381 // A register managed by register allocation must be either
382 // free or active, but not both.
383 if (_allocator
.isFree(r
)) {
384 NanoAssertMsgf(_allocator
.getActive(r
)==0,
385 "register %s is free but assigned to ins", gpn(r
));
387 // An LIns defining a register must have that register in
389 LIns
* ins
= _allocator
.getActive(r
);
391 NanoAssertMsg(r
== ins
->getReg(), "Register record mismatch");
395 RegisterMask not_managed
= ~_allocator
.managed
;
396 for (Register r
= lsReg(not_managed
); not_managed
; r
= nextLsReg(not_managed
, r
)) {
397 // A register not managed by register allocation must be
398 // neither free nor active.
399 if (REGNUM(r
) <= LastRegNum
) {
400 NanoAssert(!_allocator
.isFree(r
));
401 NanoAssert(!_allocator
.getActive(r
));
407 void Assembler::findRegFor2(RegisterMask allowa
, LIns
* ia
, Register
& ra
,
408 RegisterMask allowb
, LIns
* ib
, Register
& rb
)
410 // There should be some overlap between 'allowa' and 'allowb', else
411 // there's no point calling this function.
412 NanoAssert(allowa
& allowb
);
415 ra
= rb
= findRegFor(ia
, allowa
& allowb
); // use intersection(allowa, allowb)
417 } else if (ib
->isInRegMask(allowb
)) {
418 // 'ib' is already in an allowable reg -- don't let it get evicted
419 // when finding 'ra'.
421 ra
= findRegFor(ia
, allowa
& ~rmask(rb
));
424 ra
= findRegFor(ia
, allowa
);
425 rb
= findRegFor(ib
, allowb
& ~rmask(ra
));
429 Register
Assembler::findSpecificRegFor(LIns
* i
, Register w
)
431 return findRegFor(i
, rmask(w
));
434 // Like findRegFor(), but called when the LIns is used as a pointer. It
435 // doesn't have to be called, findRegFor() can still be used, but it can
436 // optimize the LIR_allocp case by indexing off FP, thus saving the use of
439 Register
Assembler::getBaseReg(LIns
* base
, int &d
, RegisterMask allow
)
442 if (base
->isop(LIR_allocp
)) {
443 // The value of a LIR_allocp is a pointer to its stack memory,
444 // which is always relative to FP. So we can just return FP if we
445 // also adjust 'd' (and can do so in a valid manner). Or, in the
446 // PEDANTIC case, we can just assign a register as normal;
447 // findRegFor() will allocate the stack memory for LIR_allocp if
449 d
+= findMemFor(base
);
455 return findRegFor(base
, allow
);
458 // Like findRegFor2(), but used for stores where the base value has the
459 // same type as the stored value, eg. in asm_store32() on 32-bit platforms
460 // and asm_store64() on 64-bit platforms. Similar to getBaseReg(),
461 // findRegFor2() can be called instead, but this function can optimize the
462 // case where the base value is a LIR_allocp.
463 void Assembler::getBaseReg2(RegisterMask allowValue
, LIns
* value
, Register
& rv
,
464 RegisterMask allowBase
, LIns
* base
, Register
& rb
, int &d
)
467 if (base
->isop(LIR_allocp
)) {
469 d
+= findMemFor(base
);
470 rv
= findRegFor(value
, allowValue
);
476 findRegFor2(allowValue
, value
, rv
, allowBase
, base
, rb
);
479 RegisterMask
Assembler::hint(LIns
* ins
)
481 RegisterMask prefer
= nHints
[ins
->opcode()];
482 return (prefer
== PREFER_SPECIAL
) ? nHint(ins
) : prefer
;
485 // Finds a register in 'allow' to hold the result of 'ins'. Used when we
486 // encounter a use of 'ins'. The actions depend on the prior regstate of
488 // - If the result of 'ins' is not in any register, we find an allowed
489 // one, evicting one if necessary.
490 // - If the result of 'ins' is already in an allowed register, we use that.
491 // - If the result of 'ins' is already in a not-allowed register, we find an
492 // allowed one and move it.
494 Register
Assembler::findRegFor(LIns
* ins
, RegisterMask allow
)
496 if (ins
->isop(LIR_allocp
)) {
497 // Never allocate a reg for this without stack space too.
503 if (!ins
->isInReg()) {
504 // 'ins' isn't in a register (must be in a spill slot or nowhere).
505 r
= registerAlloc(ins
, allow
, hint(ins
));
507 } else if (rmask(r
= ins
->getReg()) & allow
) {
508 // 'ins' is in an allowed register.
509 _allocator
.useActive(r
);
512 // 'ins' is in a register (r) that's not in 'allow'.
514 if (((rmask(r
)&XmmRegs
) && !(allow
&XmmRegs
)) ||
515 ((rmask(r
)&x87Regs
) && !(allow
&x87Regs
)))
517 // x87 <-> xmm copy required
518 //_nvprof("fpu-evict",1);
520 r
= registerAlloc(ins
, allow
, hint(ins
));
522 #elif defined(NANOJIT_PPC) || defined(NANOJIT_MIPS) || defined(NANOJIT_SPARC)
523 if (((rmask(r
)&GpRegs
) && !(allow
&GpRegs
)) ||
524 ((rmask(r
)&FpRegs
) && !(allow
&FpRegs
)))
527 r
= registerAlloc(ins
, allow
, hint(ins
));
531 // The post-state register holding 'ins' is 's', the pre-state
532 // register holding 'ins' is 'r'. For example, if s=eax and
535 // pre-state: ecx(ins)
536 // instruction: mov eax, ecx
537 // post-state: eax(ins)
540 _allocator
.retire(r
);
541 r
= registerAlloc(ins
, allow
, hint(ins
));
543 // 'ins' is in 'allow', in register r (different to the old r);
545 if ((rmask(s
) & GpRegs
) && (rmask(r
) & GpRegs
)) {
546 MR(s
, r
); // move 'ins' from its pre-state reg (r) to its post-state reg (s)
548 asm_nongp_copy(s
, r
);
556 // Like findSpecificRegFor(), but only for when 'r' is known to be free
557 // and 'ins' is known to not already have a register allocated. Updates
558 // the regstate (maintaining the invariants) but does not generate any
559 // code. The return value is redundant, always being 'r', but it's
560 // sometimes useful to have it there for assignments.
561 Register
Assembler::findSpecificRegForUnallocated(LIns
* ins
, Register r
)
563 if (ins
->isop(LIR_allocp
)) {
564 // never allocate a reg for this w/out stack space too
568 NanoAssert(!ins
->isInReg());
569 NanoAssert(_allocator
.free
& rmask(r
));
572 _allocator
.removeFree(r
);
573 _allocator
.addActive(r
, ins
);
578 #if NJ_USES_IMMD_POOL
579 const uint64_t* Assembler::findImmDFromPool(uint64_t q
)
581 uint64_t* p
= _immDPool
.get(q
);
584 p
= new (_dataAlloc
) uint64_t;
592 int Assembler::findMemFor(LIns
*ins
)
594 #if NJ_USES_IMMD_POOL
595 NanoAssert(!ins
->isImmD());
597 if (!ins
->isInAr()) {
598 uint32_t const arIndex
= arReserve(ins
);
599 ins
->setArIndex(arIndex
);
600 NanoAssert(_activation
.isValidEntry(ins
->getArIndex(), ins
) == (arIndex
!= 0));
605 // XXX: this function is dangerous and should be phased out;
606 // See bug 513615. Calls to it should replaced it with a
607 // prepareResultReg() / generate code / freeResourcesOf() sequence.
608 Register
Assembler::deprecated_prepResultReg(LIns
*ins
, RegisterMask allow
)
611 // We used to have to worry about possibly popping the x87 stack here.
612 // But this function is no longer used on i386, and this assertion
616 Register r
= findRegFor(ins
, allow
);
617 deprecated_freeRsrcOf(ins
);
621 // Finds a register in 'allow' to hold the result of 'ins'. Also
622 // generates code to spill the result if necessary. Called just prior to
623 // generating the code for 'ins' (because we generate code backwards).
625 // An example where no spill is necessary. Lines marked '*' are those
626 // done by this function.
629 // asm: define res into r
630 // * regstate: R + r(res)
634 // An example where a spill is necessary.
637 // asm: define res into r
638 // * regstate: R + r(res)
639 // * asm: spill res from r
642 // asm: restore res into r2
643 // regstate: R + r2(res) + other changes from "..."
644 // asm: use res in r2
646 Register
Assembler::prepareResultReg(LIns
*ins
, RegisterMask allow
)
648 // At this point, we know the result of 'ins' is used later in the
649 // code, unless it is a call to an impure function that must be
650 // included for effect even though its result is ignored. It may have
651 // had to be evicted, in which case the restore will have already been
652 // generated, so we now generate the spill. QUERY: Is there any attempt
653 // to elide the spill if we know that all restores can be rematerialized?
655 const bool notInFST0
= (!ins
->isInReg() || ins
->getReg() != FST0
);
656 Register r
= findRegFor(ins
, allow
);
657 // If the result register is FST0, but FST0 is not in the post-regstate,
658 // then we must pop the x87 stack. This may occur because the result is
659 // unused, or because it has been stored to a spill slot or an XMM register.
660 const bool needPop
= notInFST0
&& (r
== FST0
);
661 const bool didSpill
= asm_maybe_spill(ins
, needPop
);
662 if (!didSpill
&& needPop
) {
663 // If the instruction is spilled, then the pop will have already
664 // been performed by the store to the stack slot. Otherwise, we
665 // must pop now. This may occur when the result of a LIR_calld
666 // to an impure (side-effecting) function is not used.
670 Register r
= findRegFor(ins
, allow
);
671 asm_maybe_spill(ins
, false);
676 bool Assembler::asm_maybe_spill(LIns
* ins
, bool pop
)
680 Register r
= ins
->getReg();
681 verbose_only( RefBuf b
;
682 if (_logc
->lcbits
& LC_Native
) {
683 setOutputForEOL(" <= spill %s",
684 _thisfrag
->lirbuf
->printer
->formatRef(&b
, ins
)); } )
686 asm_spill(r
, d
, pop
);
689 asm_spill(r
, d
, ins
->isQorD());
696 // XXX: This function is error-prone and should be phased out; see bug 513615.
697 void Assembler::deprecated_freeRsrcOf(LIns
*ins
)
699 if (ins
->isInReg()) {
700 asm_maybe_spill(ins
, /*pop*/false);
701 _allocator
.retire(ins
->getReg()); // free any register associated with entry
705 arFree(ins
); // free any AR space associated with entry
710 // Frees all record of registers and spill slots used by 'ins'.
711 void Assembler::freeResourcesOf(LIns
*ins
)
713 if (ins
->isInReg()) {
714 _allocator
.retire(ins
->getReg()); // free any register associated with entry
718 arFree(ins
); // free any AR space associated with entry
723 // Frees 'r' in the RegAlloc regstate, if it's not already free.
724 void Assembler::evictIfActive(Register r
)
726 if (LIns
* vic
= _allocator
.getActive(r
)) {
727 NanoAssert(vic
->getReg() == r
);
732 // Frees 'r' (which currently holds the result of 'vic') in the regstate.
735 // pre-regstate: eax(ld1)
736 // instruction: mov ebx,-4(ebp) <= restore add1 # %ebx is dest
737 // post-regstate: eax(ld1) ebx(add1)
739 // At run-time we are *restoring* 'add1' into %ebx, hence the call to
740 // asm_restore(). But at regalloc-time we are moving backwards through
741 // the code, so in that sense we are *evicting* 'add1' from %ebx.
743 void Assembler::evict(LIns
* vic
)
745 // Not free, need to steal.
746 Register r
= vic
->getReg();
748 NanoAssert(!_allocator
.isFree(r
));
749 NanoAssert(vic
== _allocator
.getActive(r
));
751 verbose_only( RefBuf b
;
752 if (_logc
->lcbits
& LC_Native
) {
753 setOutputForEOL(" <= restore %s",
754 _thisfrag
->lirbuf
->printer
->formatRef(&b
, vic
)); } )
757 _allocator
.retire(r
);
760 // At this point 'vic' is unused (if rematerializable), or in a spill
766 // W = ld(addp(B, lshp(I, k)))[d] , where int(1) <= k <= int(3)
768 // then we set base=B, index=I, scale=k.
770 // Otherwise, we must have this:
772 // W = ld(addp(B, I))[d]
774 // and we set base=B, index=I, scale=0.
776 void Assembler::getBaseIndexScale(LIns
* addp
, LIns
** base
, LIns
** index
, int* scale
)
778 NanoAssert(addp
->isop(LIR_addp
));
780 *base
= addp
->oprnd1();
781 LIns
* rhs
= addp
->oprnd2();
784 if (rhs
->opcode() == LIR_lshp
&& rhs
->oprnd2()->isImmI() &&
785 (k
= rhs
->oprnd2()->immI(), (1 <= k
&& k
<= 3)))
787 *index
= rhs
->oprnd1();
794 void Assembler::patch(GuardRecord
*lr
)
796 if (!lr
->jmp
) // the guard might have been eliminated as redundant
798 Fragment
*frag
= lr
->exit
->target
;
799 NanoAssert(frag
->fragEntry
!= 0);
800 nPatchBranch((NIns
*)lr
->jmp
, frag
->fragEntry
);
801 CodeAlloc::flushICache(lr
->jmp
, LARGEST_BRANCH_PATCH
);
802 verbose_only(verbose_outputf("patching jump at %p to target %p\n",
803 lr
->jmp
, frag
->fragEntry
);)
806 void Assembler::patch(SideExit
*exit
)
808 GuardRecord
*rec
= exit
->guards
;
817 void Assembler::patch(SideExit
* exit
, SwitchInfo
* si
)
819 for (GuardRecord
* lr
= exit
->guards
; lr
; lr
= lr
->next
) {
820 Fragment
*frag
= lr
->exit
->target
;
821 NanoAssert(frag
->fragEntry
!= 0);
822 si
->table
[si
->index
] = frag
->fragEntry
;
827 NIns
* Assembler::asm_exit(LIns
* guard
)
829 SideExit
*exit
= guard
->record()->exit
;
831 if (!_branchStateMap
.get(exit
))
833 at
= asm_leave_trace(guard
);
837 RegAlloc
* captured
= _branchStateMap
.get(exit
);
838 intersectRegisterState(*captured
);
839 at
= exit
->target
->fragEntry
;
841 _branchStateMap
.remove(exit
);
846 NIns
* Assembler::asm_leave_trace(LIns
* guard
)
848 verbose_only( verbose_outputf("----------------------------------- ## END exit block %p", guard
);)
850 // This point is unreachable. So free all the registers. If an
851 // instruction has a stack entry we will leave it alone, otherwise we
852 // free it entirely. intersectRegisterState() will restore.
853 RegAlloc capture
= _allocator
;
860 debug_only( _sv_fpuStkDepth
= _fpuStkDepth
; _fpuStkDepth
= 0; )
865 // Restore the callee-saved register and parameters.
869 intersectRegisterState(capture
);
871 // this can be useful for breaking whenever an exit is taken
875 // we are done producing the exit logic for the guard so demark where our exit block code begins
876 NIns
* jmpTarget
= _nIns
; // target in exit path for our mainline conditional jump
878 // swap back pointers, effectively storing the last location used in the exit path
882 //verbose_only( verbose_outputf(" LIR_xt/xf swapCodeChunks, _nIns is now %08X(%08X), _nExitIns is now %08X(%08X)",_nIns, *_nIns,_nExitIns,*_nExitIns) );
883 verbose_only( verbose_outputf("%p:", jmpTarget
);)
884 verbose_only( verbose_outputf("----------------------------------- ## BEGIN exit block (LIR_xt|LIR_xf)") );
887 NanoAssertMsgf(_fpuStkDepth
== _sv_fpuStkDepth
, "LIR_xtf, _fpuStkDepth=%d, expect %d",_fpuStkDepth
, _sv_fpuStkDepth
);
888 debug_only( _fpuStkDepth
= _sv_fpuStkDepth
; _sv_fpuStkDepth
= 9999; )
894 void Assembler::compile(Fragment
* frag
, Allocator
& alloc
, bool optimize
verbose_only(, LInsPrinter
* printer
))
897 bool anyVerb
= (_logc
->lcbits
& 0xFFFF & ~LC_FragProfile
) > 0;
898 bool liveVerb
= (_logc
->lcbits
& 0xFFFF & LC_Liveness
) > 0;
901 /* BEGIN decorative preamble */
904 _logc
->printf("========================================"
905 "========================================\n");
906 _logc
->printf("=== BEGIN LIR::compile(%p, %p)\n",
907 (void*)this, (void*)frag
);
908 _logc
->printf("===\n");
910 /* END decorative preamble */
912 verbose_only( if (liveVerb
) {
914 _logc
->printf("=== Results of liveness analysis:\n");
915 _logc
->printf("===\n");
916 LirReader
br(frag
->lastIns
);
917 LirFilter
* lir
= &br
;
919 StackFilter
* sf
= new (alloc
) StackFilter(lir
, alloc
, frag
->lirbuf
->sp
);
922 live(lir
, alloc
, frag
, _logc
);
925 /* Set up the generic text output cache for the assembler */
926 verbose_only( StringList
asmOutput(alloc
); )
927 verbose_only( _outputCache
= &asmOutput
; )
933 //_logc->printf("recompile trigger %X kind %d\n", (int)frag, frag->kind);
935 verbose_only( if (anyVerb
) {
936 _logc
->printf("=== Translating LIR fragments into assembly:\n");
939 // now the the main trunk
940 verbose_only( RefBuf b
; )
941 verbose_only( if (anyVerb
) {
942 _logc
->printf("=== -- Compile trunk %s: begin\n", printer
->formatAddr(&b
, frag
));
945 // Used for debug printing, if needed
946 debug_only(ValidateReader
*validate
= NULL
;)
948 ReverseLister
*pp_init
= NULL
;
949 ReverseLister
*pp_after_sf
= NULL
;
952 // The LIR passes through these filters as listed in this
953 // function, viz, top to bottom.
955 // set up backwards pipeline: assembler <- StackFilter <- LirReader
956 LirFilter
* lir
= new (alloc
) LirReader(frag
->lastIns
);
960 validate
= new (alloc
) ValidateReader(lir
);
965 verbose_only( if (_logc
->lcbits
& LC_ReadLIR
) {
966 pp_init
= new (alloc
) ReverseLister(lir
, alloc
, frag
->lirbuf
->printer
, _logc
,
973 StackFilter
* stackfilter
= new (alloc
) StackFilter(lir
, alloc
, frag
->lirbuf
->sp
);
977 verbose_only( if (_logc
->lcbits
& LC_AfterSF
) {
978 pp_after_sf
= new (alloc
) ReverseLister(lir
, alloc
, frag
->lirbuf
->printer
, _logc
,
979 "After StackFilter");
985 // If we were accumulating debug info in the various ReverseListers,
986 // call finish() to emit whatever contents they have accumulated.
988 if (pp_init
) pp_init
->finish();
989 if (pp_after_sf
) pp_after_sf
->finish();
992 verbose_only( if (anyVerb
) {
993 _logc
->printf("=== -- Compile trunk %s: end\n", printer
->formatAddr(&b
, frag
));
998 // Reverse output so that assembly is displayed low-to-high.
999 // Up to this point, _outputCache has been non-NULL, and so has been
1000 // accumulating output. Now we set it to NULL, traverse the entire
1001 // list of stored strings, and hand them a second time to output.
1002 // Since _outputCache is now NULL, outputf just hands these strings
1003 // directly onwards to _logc->printf.
1004 verbose_only( if (anyVerb
) {
1005 _logc
->printf("\n");
1006 _logc
->printf("=== Aggregated assembly output: BEGIN\n");
1007 _logc
->printf("===\n");
1009 for (Seq
<char*>* p
= asmOutput
.get(); p
!= NULL
; p
= p
->tail
) {
1010 char *str
= p
->head
;
1011 outputf(" %s", str
);
1013 _logc
->printf("===\n");
1014 _logc
->printf("=== Aggregated assembly output: END\n");
1018 frag
->fragEntry
= 0;
1020 verbose_only( frag
->nCodeBytes
+= codeBytes
; )
1021 verbose_only( frag
->nExitBytes
+= exitBytes
; )
1023 /* BEGIN decorative postamble */
1024 verbose_only( if (anyVerb
) {
1025 _logc
->printf("\n");
1026 _logc
->printf("===\n");
1027 _logc
->printf("=== END LIR::compile(%p, %p)\n",
1028 (void*)this, (void*)frag
);
1029 _logc
->printf("========================================"
1030 "========================================\n");
1031 _logc
->printf("\n");
1033 /* END decorative postamble */
1036 void Assembler::beginAssembly(Fragment
*frag
)
1038 verbose_only( codeBytes
= 0; )
1039 verbose_only( exitBytes
= 0; )
1043 NanoAssert(codeList
== 0);
1044 NanoAssert(codeStart
== 0);
1045 NanoAssert(codeEnd
== 0);
1046 NanoAssert(exitStart
== 0);
1047 NanoAssert(exitEnd
== 0);
1048 NanoAssert(_nIns
== 0);
1049 NanoAssert(_nExitIns
== 0);
1056 // native code gen buffer setup
1059 // make sure we got memory at least one page
1060 if (error()) return;
1067 void Assembler::assemble(Fragment
* frag
, LirFilter
* reader
)
1069 if (error()) return;
1072 // check the fragment is starting out with a sane profiling state
1073 verbose_only( NanoAssert(frag
->nStaticExits
== 0); )
1074 verbose_only( NanoAssert(frag
->nCodeBytes
== 0); )
1075 verbose_only( NanoAssert(frag
->nExitBytes
== 0); )
1076 verbose_only( NanoAssert(frag
->profCount
== 0); )
1077 verbose_only( if (_logc
->lcbits
& LC_FragProfile
)
1078 NanoAssert(frag
->profFragID
> 0);
1080 NanoAssert(frag
->profFragID
== 0); )
1087 // patch all branches
1088 NInsMap::Iter
iter(_patches
);
1089 while (iter
.next()) {
1090 NIns
* where
= iter
.key();
1091 LIns
* target
= iter
.value();
1092 if (target
->isop(LIR_jtbl
)) {
1093 // Need to patch up a whole jump table, 'where' is the table.
1094 LIns
*jtbl
= target
;
1095 NIns
** native_table
= (NIns
**) (void *) where
;
1096 for (uint32_t i
= 0, n
= jtbl
->getTableSize(); i
< n
; i
++) {
1097 LabelState
* lstate
= _labels
.get(jtbl
->getTarget(i
));
1098 NIns
* ntarget
= lstate
->addr
;
1100 native_table
[i
] = ntarget
;
1102 setError(UnknownBranch
);
1107 // target is a label for a single-target branch
1108 LabelState
*lstate
= _labels
.get(target
);
1109 NIns
* ntarget
= lstate
->addr
;
1111 nPatchBranch(where
, ntarget
);
1113 setError(UnknownBranch
);
1121 void Assembler::cleanupAfterError()
1123 _codeAlloc
.freeAll(codeList
);
1125 _codeAlloc
.free(exitStart
, exitEnd
);
1126 _codeAlloc
.free(codeStart
, codeEnd
);
1128 _codeAlloc
.markAllExec(); // expensive but safe, we mark all code pages R-X
1131 void Assembler::endAssembly(Fragment
* frag
)
1133 // don't try to patch code if we are in an error state since we might have partially
1134 // overwritten the code cache already
1136 // something went wrong, release all allocated code memory
1137 cleanupAfterError();
1141 NIns
* fragEntry
= genPrologue();
1142 verbose_only( asm_output("[prologue]"); )
1144 debug_only(_activation
.checkForResourceLeaks());
1146 NanoAssert(!_inExit
);
1147 // save used parts of current block on fragment's code list, free the rest
1148 #if defined(NANOJIT_ARM) || defined(NANOJIT_MIPS)
1149 // [codeStart, _nSlot) ... gap ... [_nIns, codeEnd)
1151 _codeAlloc
.addRemainder(codeList
, exitStart
, exitEnd
, _nExitSlot
, _nExitIns
);
1152 verbose_only( exitBytes
-= (_nExitIns
- _nExitSlot
) * sizeof(NIns
); )
1154 _codeAlloc
.addRemainder(codeList
, codeStart
, codeEnd
, _nSlot
, _nIns
);
1155 verbose_only( codeBytes
-= (_nIns
- _nSlot
) * sizeof(NIns
); )
1157 // [codeStart ... gap ... [_nIns, codeEnd))
1159 _codeAlloc
.addRemainder(codeList
, exitStart
, exitEnd
, exitStart
, _nExitIns
);
1160 verbose_only( exitBytes
-= (_nExitIns
- exitStart
) * sizeof(NIns
); )
1162 _codeAlloc
.addRemainder(codeList
, codeStart
, codeEnd
, codeStart
, _nIns
);
1163 verbose_only( codeBytes
-= (_nIns
- codeStart
) * sizeof(NIns
); )
1166 // note: the code pages are no longer writable from this point onwards
1167 _codeAlloc
.markExec(codeList
);
1169 // at this point all our new code is in the d-cache and not the i-cache,
1170 // so flush the i-cache on cpu's that need it.
1171 CodeAlloc::flushICache(codeList
);
1173 // save entry point pointers
1174 frag
->fragEntry
= fragEntry
;
1175 frag
->setCode(_nIns
);
1180 vtuneEnd(vtuneHandle
, codeEnd
);
1181 vtuneStart(vtuneHandle
, _nIns
);
1185 PERFM_NVPROF("code", CodeAlloc::size(codeList
));
1188 NanoAssertMsgf(_fpuStkDepth
== 0,"_fpuStkDepth %d\n",_fpuStkDepth
);
1191 debug_only( pageValidate(); )
1192 NanoAssert(_branchStateMap
.isEmpty());
1195 void Assembler::releaseRegisters()
1197 RegisterMask active
= _allocator
.activeMask();
1198 for (Register r
= lsReg(active
); active
; r
= nextLsReg(active
, r
))
1200 LIns
*ins
= _allocator
.getActive(r
);
1201 // Clear reg allocation, preserve stack allocation.
1202 _allocator
.retire(r
);
1203 NanoAssert(r
== ins
->getReg());
1209 #define countlir_live() _nvprof("lir-live",1)
1210 #define countlir_ret() _nvprof("lir-ret",1)
1211 #define countlir_alloc() _nvprof("lir-alloc",1)
1212 #define countlir_var() _nvprof("lir-var",1)
1213 #define countlir_use() _nvprof("lir-use",1)
1214 #define countlir_def() _nvprof("lir-def",1)
1215 #define countlir_imm() _nvprof("lir-imm",1)
1216 #define countlir_param() _nvprof("lir-param",1)
1217 #define countlir_cmov() _nvprof("lir-cmov",1)
1218 #define countlir_ld() _nvprof("lir-ld",1)
1219 #define countlir_ldq() _nvprof("lir-ldq",1)
1220 #define countlir_alu() _nvprof("lir-alu",1)
1221 #define countlir_qjoin() _nvprof("lir-qjoin",1)
1222 #define countlir_qlo() _nvprof("lir-qlo",1)
1223 #define countlir_qhi() _nvprof("lir-qhi",1)
1224 #define countlir_fpu() _nvprof("lir-fpu",1)
1225 #define countlir_st() _nvprof("lir-st",1)
1226 #define countlir_stq() _nvprof("lir-stq",1)
1227 #define countlir_jmp() _nvprof("lir-jmp",1)
1228 #define countlir_jcc() _nvprof("lir-jcc",1)
1229 #define countlir_label() _nvprof("lir-label",1)
1230 #define countlir_xcc() _nvprof("lir-xcc",1)
1231 #define countlir_x() _nvprof("lir-x",1)
1232 #define countlir_call() _nvprof("lir-call",1)
1233 #define countlir_jtbl() _nvprof("lir-jtbl",1)
1235 #define countlir_live()
1236 #define countlir_ret()
1237 #define countlir_alloc()
1238 #define countlir_var()
1239 #define countlir_use()
1240 #define countlir_def()
1241 #define countlir_imm()
1242 #define countlir_param()
1243 #define countlir_cmov()
1244 #define countlir_ld()
1245 #define countlir_ldq()
1246 #define countlir_alu()
1247 #define countlir_qjoin()
1248 #define countlir_qlo()
1249 #define countlir_qhi()
1250 #define countlir_fpu()
1251 #define countlir_st()
1252 #define countlir_stq()
1253 #define countlir_jmp()
1254 #define countlir_jcc()
1255 #define countlir_label()
1256 #define countlir_xcc()
1257 #define countlir_x()
1258 #define countlir_call()
1259 #define countlir_jtbl()
1262 void Assembler::asm_jmp(LIns
* ins
, InsList
& pending_lives
)
1264 NanoAssert((ins
->isop(LIR_j
) && !ins
->oprnd1()) ||
1265 (ins
->isop(LIR_jf
) && ins
->oprnd1()->isImmI(0)) ||
1266 (ins
->isop(LIR_jt
) && ins
->oprnd1()->isImmI(1)));
1269 LIns
* to
= ins
->getTarget();
1270 LabelState
*label
= _labels
.get(to
);
1271 // The jump is always taken so whatever register state we
1272 // have from downstream code, is irrelevant to code before
1273 // this jump. So clear it out. We will pick up register
1274 // state from the jump target, if we have seen that label.
1277 // Unreachable, so assume correct stack depth.
1278 debug_only( _fpuStkDepth
= 0; )
1280 if (label
&& label
->addr
) {
1281 // Forward jump - pick up register state from target.
1282 unionRegisterState(label
->regs
);
1284 // Set stack depth according to the register state we just loaded,
1285 // negating the effect of any unreachable x87 stack pop that might
1286 // have been emitted by unionRegisterState().
1287 debug_only( _fpuStkDepth
= (_allocator
.getActive(FST0
) ? -1 : 0); )
1293 handleLoopCarriedExprs(pending_lives
);
1295 // save empty register state at loop header
1296 _labels
.add(to
, 0, _allocator
);
1299 intersectRegisterState(label
->regs
);
1301 debug_only( _fpuStkDepth
= (_allocator
.getActive(FST0
) ? -1 : 0); )
1305 _patches
.put(_nIns
, to
);
1309 void Assembler::asm_jcc(LIns
* ins
, InsList
& pending_lives
)
1311 bool branchOnFalse
= (ins
->opcode() == LIR_jf
);
1312 LIns
* cond
= ins
->oprnd1();
1313 if (cond
->isImmI()) {
1314 if ((!branchOnFalse
&& !cond
->immI()) || (branchOnFalse
&& cond
->immI())) {
1315 // jmp never taken, not needed
1317 asm_jmp(ins
, pending_lives
); // jmp always taken
1322 // Changes to the logic below will likely need to be propagated to Assembler::asm_jov().
1325 LIns
* to
= ins
->getTarget();
1326 LabelState
*label
= _labels
.get(to
);
1327 if (label
&& label
->addr
) {
1328 // Forward jump to known label. Need to merge with label's register state.
1329 unionRegisterState(label
->regs
);
1330 asm_branch(branchOnFalse
, cond
, label
->addr
);
1334 handleLoopCarriedExprs(pending_lives
);
1336 // Evict all registers, most conservative approach.
1337 evictAllActiveRegs();
1338 _labels
.add(to
, 0, _allocator
);
1341 // Evict all registers, most conservative approach.
1342 intersectRegisterState(label
->regs
);
1344 NIns
*branch
= asm_branch(branchOnFalse
, cond
, 0);
1345 _patches
.put(branch
,to
);
1349 void Assembler::asm_jov(LIns
* ins
, InsList
& pending_lives
)
1351 // The caller is responsible for countlir_* profiling, unlike
1352 // asm_jcc above. The reason for this is that asm_jov may not be
1353 // be called if the instruction is dead, and it is our convention
1354 // to count such instructions anyway.
1355 LOpcode op
= ins
->opcode();
1356 LIns
* to
= ins
->getTarget();
1357 LabelState
*label
= _labels
.get(to
);
1358 if (label
&& label
->addr
) {
1359 // forward jump to known label. need to merge with label's register state.
1360 unionRegisterState(label
->regs
);
1361 asm_branch_ov(op
, label
->addr
);
1365 handleLoopCarriedExprs(pending_lives
);
1367 // evict all registers, most conservative approach.
1368 evictAllActiveRegs();
1369 _labels
.add(to
, 0, _allocator
);
1372 // evict all registers, most conservative approach.
1373 intersectRegisterState(label
->regs
);
1375 NIns
*branch
= asm_branch_ov(op
, 0);
1376 _patches
.put(branch
,to
);
1380 void Assembler::asm_x(LIns
* ins
)
1382 verbose_only( _thisfrag
->nStaticExits
++; )
1384 // Generate the side exit branch on the main trace.
1385 NIns
*exit
= asm_exit(ins
);
1389 void Assembler::asm_xcc(LIns
* ins
)
1391 LIns
* cond
= ins
->oprnd1();
1392 if (cond
->isImmI()) {
1393 if ((ins
->isop(LIR_xt
) && !cond
->immI()) || (ins
->isop(LIR_xf
) && cond
->immI())) {
1394 // guard never taken, not needed
1396 asm_x(ins
); // guard always taken
1401 verbose_only( _thisfrag
->nStaticExits
++; )
1403 // We only support cmp with guard right now, also assume it is 'close'
1404 // and only emit the branch.
1405 NIns
* exit
= asm_exit(ins
); // does intersectRegisterState()
1406 asm_branch(ins
->opcode() == LIR_xf
, cond
, exit
);
1409 // helper function for nop insertion feature that results in no more
1410 // than 1 no-op instruction insertion every 128-1151 Bytes
1411 static inline uint32_t noiseForNopInsertion(Noise
* n
) {
1412 return n
->getValue(1023) + 128;
1415 void Assembler::gen(LirFilter
* reader
)
1417 NanoAssert(_thisfrag
->nStaticExits
== 0);
1419 InsList
pending_lives(alloc
);
1421 NanoAssert(!error());
1423 // compiler hardening setup
1424 NIns
* priorIns
= _nIns
;
1425 int32_t nopInsertTrigger
= hardenNopInsertion(_config
) ? noiseForNopInsertion(_noise
): 0;
1427 // What's going on here: we're visiting all the LIR instructions in
1428 // the buffer, working strictly backwards in buffer-order, and
1429 // generating machine instructions for them as we go.
1431 // For each LIns, we first check if it's live. If so we mark its
1432 // operands as also live, and then generate code for it *if
1433 // necessary*. It may not be necessary if the instruction is an
1434 // expression and code has already been generated for all its uses in
1435 // combination with previously handled instructions (ins->isExtant()
1436 // will return false if this is so).
1438 // Note that the backwards code traversal can make register allocation
1439 // confusing. (For example, we restore a value before we spill it!)
1440 // In particular, words like "before" and "after" must be used very
1441 // carefully -- their meaning at regalloc-time is opposite to their
1442 // meaning at run-time. We use the term "pre-regstate" to refer to
1443 // the register allocation state that occurs prior to an instruction's
1444 // execution, and "post-regstate" to refer to the state that occurs
1445 // after an instruction's execution, e.g.:
1447 // pre-regstate: ebx(ins)
1448 // instruction: mov eax, ebx // mov dst, src
1449 // post-regstate: eax(ins)
1451 // At run-time, the instruction updates the pre-regstate into the
1452 // post-regstate (and these states are the real machine's regstates).
1453 // But when allocating registers, because we go backwards, the
1454 // pre-regstate is constructed from the post-regstate (and these
1455 // regstates are those stored in RegAlloc).
1457 // One consequence of generating code backwards is that we tend to
1458 // both spill and restore registers as early (at run-time) as
1459 // possible; this is good for tolerating memory latency. If we
1460 // generated code forwards, we would expect to both spill and restore
1461 // registers as late (at run-time) as possible; this might be better
1462 // for reducing register pressure.
1464 // The trace must end with one of these opcodes. Mark it as live.
1465 NanoAssert(reader
->finalIns()->isop(LIR_x
) ||
1466 reader
->finalIns()->isop(LIR_xtbl
) ||
1467 reader
->finalIns()->isRet() ||
1468 isLiveOpcode(reader
->finalIns()->opcode()));
1470 for (currIns
= reader
->read(); !currIns
->isop(LIR_start
); currIns
= reader
->read())
1472 LIns
* ins
= currIns
; // give it a shorter name for local use
1474 if (!ins
->isLive()) {
1475 NanoAssert(!ins
->isExtant());
1480 // Output the post-regstate (registers and/or activation).
1481 // Because asm output comes in reverse order, doing it now means
1482 // it is printed after the LIR and native code, exactly when the
1483 // post-regstate should be shown.
1484 if ((_logc
->lcbits
& LC_Native
) && (_logc
->lcbits
& LC_Activation
))
1485 printActivationState();
1486 if ((_logc
->lcbits
& LC_Native
) && (_logc
->lcbits
& LC_RegAlloc
))
1490 // compiler hardening technique that inserts no-op instructions in the compiled method when nopInsertTrigger < 0
1491 if (hardenNopInsertion(_config
))
1493 size_t delta
= (uintptr_t)priorIns
- (uintptr_t)_nIns
; // # bytes that have been emitted since last go-around
1495 // if no codeList then we know priorIns and _nIns are on same page, otherwise make sure priorIns was not in the previous code block
1496 if (!codeList
|| !codeList
->isInBlock(priorIns
)) {
1497 NanoAssert(delta
< VMPI_getVMPageSize()); // sanity check
1498 nopInsertTrigger
-= (int32_t) delta
;
1499 if (nopInsertTrigger
< 0)
1501 nopInsertTrigger
= noiseForNopInsertion(_noise
);
1502 asm_insert_random_nop();
1503 PERFM_NVPROF("hardening:nop-insert", 1);
1509 LOpcode op
= ins
->opcode();
1513 NanoAssertMsgf(false, "unsupported LIR instruction: %d\n", op
);
1517 evictAllActiveRegs();
1524 LIns
* op1
= ins
->oprnd1();
1525 op1
->setResultLive();
1526 // LIR_allocp's are meant to live until the point of the
1527 // LIR_livep instruction, marking other expressions as
1528 // live ensures that they remain so at loop bottoms.
1529 // LIR_allocp areas require special treatment because they
1530 // are accessed indirectly and the indirect accesses are
1531 // invisible to the assembler, other than via LIR_livep.
1532 // Other expression results are only accessed directly in
1533 // ways that are visible to the assembler, so extending
1534 // those expression's lifetimes past the last loop edge
1536 if (op1
->isop(LIR_allocp
)) {
1539 pending_lives
.add(ins
);
1548 ins
->oprnd1()->setResultLive();
1552 // Allocate some stack space. The value of this instruction
1553 // is the address of the stack space.
1556 if (ins
->isExtant()) {
1557 NanoAssert(ins
->isInAr());
1560 freeResourcesOf(ins
);
1566 if (ins
->isExtant()) {
1571 #ifdef NANOJIT_64BIT
1574 if (ins
->isExtant()) {
1581 if (ins
->isExtant()) {
1588 if (ins
->isExtant()) {
1593 #if NJ_SOFTFLOAT_SUPPORTED
1595 LIns
* op1
= ins
->oprnd1();
1596 op1
->setResultLive();
1597 if (ins
->isExtant()) {
1598 // Return result of quad-call in register.
1599 deprecated_prepResultReg(ins
, rmask(retRegs
[1]));
1600 // If hi half was used, we must use the call to ensure it happens.
1601 findSpecificRegFor(op1
, retRegs
[0]);
1608 ins
->oprnd1()->setResultLive();
1609 if (ins
->isExtant()) {
1616 ins
->oprnd1()->setResultLive();
1617 if (ins
->isExtant()) {
1624 ins
->oprnd1()->setResultLive();
1625 ins
->oprnd2()->setResultLive();
1626 if (ins
->isExtant()) {
1635 ins
->oprnd1()->setResultLive();
1636 ins
->oprnd2()->setResultLive();
1637 ins
->oprnd3()->setResultLive();
1638 if (ins
->isExtant()) {
1649 ins
->oprnd1()->setResultLive();
1650 if (ins
->isExtant()) {
1659 ins
->oprnd1()->setResultLive();
1660 if (ins
->isExtant()) {
1668 ins
->oprnd1()->setResultLive();
1669 if (ins
->isExtant()) {
1674 #if defined NANOJIT_64BIT
1684 ins
->oprnd1()->setResultLive();
1685 ins
->oprnd2()->setResultLive();
1686 if (ins
->isExtant()) {
1703 ins
->oprnd1()->setResultLive();
1704 ins
->oprnd2()->setResultLive();
1705 if (ins
->isExtant()) {
1710 #if defined NANOJIT_IA32 || defined NANOJIT_X64
1713 ins
->oprnd1()->setResultLive();
1714 if (ins
->isExtant()) {
1722 ins
->oprnd1()->setResultLive();
1723 if (ins
->isExtant()) {
1733 ins
->oprnd1()->setResultLive();
1734 ins
->oprnd2()->setResultLive();
1735 if (ins
->isExtant()) {
1742 ins
->oprnd1()->setResultLive();
1743 if (ins
->isExtant()) {
1750 ins
->oprnd1()->setResultLive();
1751 if (ins
->isExtant()) {
1758 ins
->oprnd1()->setResultLive();
1759 if (ins
->isExtant()) {
1764 #ifdef NANOJIT_64BIT
1768 ins
->oprnd1()->setResultLive();
1769 if (ins
->isExtant()) {
1776 ins
->oprnd1()->setResultLive();
1777 if (ins
->isExtant()) {
1784 ins
->oprnd1()->setResultLive();
1785 if (ins
->isExtant()) {
1792 ins
->oprnd1()->setResultLive();
1793 if (ins
->isExtant()) {
1802 ins
->oprnd1()->setResultLive();
1803 ins
->oprnd2()->setResultLive();
1804 asm_store32(op
, ins
->oprnd1(), ins
->disp(), ins
->oprnd2());
1811 ins
->oprnd1()->setResultLive();
1812 ins
->oprnd2()->setResultLive();
1813 LIns
* value
= ins
->oprnd1();
1814 LIns
* base
= ins
->oprnd2();
1815 int dr
= ins
->disp();
1816 #if NJ_SOFTFLOAT_SUPPORTED
1817 if (value
->isop(LIR_ii2d
) && op
== LIR_std
)
1819 // This is correct for little-endian only.
1820 asm_store32(LIR_sti
, value
->oprnd1(), dr
, base
);
1821 asm_store32(LIR_sti
, value
->oprnd2(), dr
+4, base
);
1826 asm_store64(op
, value
, dr
, base
);
1832 asm_jmp(ins
, pending_lives
);
1837 ins
->oprnd1()->setResultLive();
1838 asm_jcc(ins
, pending_lives
);
1841 #if NJ_JTBL_SUPPORTED
1844 ins
->oprnd1()->setResultLive();
1845 // Multiway jump can contain both forward and backward jumps.
1846 // Out of range indices aren't allowed or checked.
1847 // Code after this jtbl instruction is unreachable.
1849 NanoAssert(_allocator
.activeMask() == 0);
1851 uint32_t count
= ins
->getTableSize();
1852 bool has_back_edges
= false;
1854 // Merge the regstates of labels we have already seen.
1855 for (uint32_t i
= count
; i
-- > 0;) {
1856 LIns
* to
= ins
->getTarget(i
);
1857 LabelState
*lstate
= _labels
.get(to
);
1859 unionRegisterState(lstate
->regs
);
1860 verbose_only( RefBuf b
; )
1861 asm_output(" %u: [&%s]", i
, _thisfrag
->lirbuf
->printer
->formatRef(&b
, to
));
1863 has_back_edges
= true;
1866 asm_output("forward edges");
1868 // In a multi-way jump, the register allocator has no ability to deal
1869 // with two existing edges that have conflicting register assignments, unlike
1870 // a conditional branch where code can be inserted on the fall-through path
1871 // to reconcile registers. So, frontends *must* insert LIR_regfence at labels of
1872 // forward jtbl jumps. Check here to make sure no registers were picked up from
1873 // any forward edges.
1874 NanoAssert(_allocator
.activeMask() == 0);
1876 if (has_back_edges
) {
1877 handleLoopCarriedExprs(pending_lives
);
1878 // save merged (empty) register state at target labels we haven't seen yet
1879 for (uint32_t i
= count
; i
-- > 0;) {
1880 LIns
* to
= ins
->getTarget(i
);
1881 LabelState
*lstate
= _labels
.get(to
);
1883 _labels
.add(to
, 0, _allocator
);
1884 verbose_only( RefBuf b
; )
1885 asm_output(" %u: [&%s]", i
, _thisfrag
->lirbuf
->printer
->formatRef(&b
, to
));
1888 asm_output("backward edges");
1891 // Emit the jump instruction, which allocates 1 register for the jump index.
1892 NIns
** native_table
= new (_dataAlloc
) NIns
*[count
];
1893 asm_output("[%p]:", (void*)native_table
);
1894 _patches
.put((NIns
*)native_table
, ins
);
1895 asm_jtbl(ins
, native_table
);
1902 LabelState
*label
= _labels
.get(ins
);
1903 // add profiling inc, if necessary.
1904 verbose_only( if (_logc
->lcbits
& LC_FragProfile
) {
1905 if (ins
== _thisfrag
->loopLabel
)
1906 asm_inc_m32(& _thisfrag
->profCount
);
1909 // label seen first, normal target of forward jump, save addr & allocator
1910 _labels
.add(ins
, _nIns
, _allocator
);
1913 // we're at the top of a loop
1914 NanoAssert(label
->addr
== 0);
1915 //evictAllActiveRegs();
1916 intersectRegisterState(label
->regs
);
1917 label
->addr
= _nIns
;
1921 if (_logc
->lcbits
& LC_Native
) {
1922 asm_output("[%s]", _thisfrag
->lirbuf
->printer
->formatRef(&b
, ins
));
1931 ins
->oprnd1()->setResultLive();
1933 NIns
* exit
= asm_exit(ins
); // does intersectRegisterState()
1934 asm_switch(ins
, exit
);
1936 NanoAssertMsg(0, "Not supported for this architecture");
1943 ins
->oprnd1()->setResultLive();
1954 verbose_only( _thisfrag
->nStaticExits
++; )
1957 ins
->oprnd1()->setResultLive();
1958 ins
->oprnd2()->setResultLive();
1959 if (ins
->isExtant()) {
1960 NIns
* exit
= asm_exit(ins
); // does intersectRegisterState()
1961 asm_branch_ov(op
, exit
);
1971 ins
->oprnd1()->setResultLive();
1972 ins
->oprnd2()->setResultLive();
1973 if (ins
->isExtant()) {
1974 asm_jov(ins
, pending_lives
);
1979 #ifdef NANOJIT_64BIT
1984 ins
->oprnd1()->setResultLive();
1985 ins
->oprnd2()->setResultLive();
1986 if (ins
->isExtant()) {
1987 asm_jov(ins
, pending_lives
);
1999 ins
->oprnd1()->setResultLive();
2000 ins
->oprnd2()->setResultLive();
2001 if (ins
->isExtant()) {
2025 ins
->oprnd1()->setResultLive();
2026 ins
->oprnd2()->setResultLive();
2027 if (ins
->isExtant()) {
2037 for (int i
= 0, argc
= ins
->argc(); i
< argc
; i
++)
2038 ins
->arg(i
)->setResultLive();
2039 // It must be impure or pure-and-extant -- it couldn't be
2040 // pure-and-not-extant, because there's no way the codegen
2041 // for a call can be folded into the codegen of another
2043 NanoAssert(!ins
->callInfo()->_isPure
|| ins
->isExtant());
2049 // we traverse backwards so we are now hitting the file
2050 // that is associated with a bunch of LIR_lines we already have seen
2052 void * currentFile
= (void *) ins
->oprnd1()->immI();
2053 vtuneFile(vtuneHandle
, currentFile
);
2058 // add a new table entry, we don't yet knwo which file it belongs
2059 // to so we need to add it to the update table too
2060 // note the alloc, actual act is delayed; see above
2062 uint32_t currentLine
= (uint32_t) ins
->oprnd1()->immI();
2063 vtuneLine(vtuneHandle
, currentLine
, _nIns
);
2067 #endif // VMCFG_VTUNE
2075 // We do final LIR printing inside this loop to avoid printing
2076 // dead LIR instructions. We print the LIns after generating the
2077 // code. This ensures that the LIns will appear in debug output
2078 // *before* the native code, because Assembler::outputf()
2079 // prints everything in reverse.
2081 if (_logc
->lcbits
& LC_AfterDCE
) {
2083 LInsPrinter
* printer
= _thisfrag
->lirbuf
->printer
;
2084 if (ins
->isop(LIR_comment
))
2085 outputf("%s", printer
->formatIns(&b
, ins
));
2087 outputf(" %s", printer
->formatIns(&b
, ins
));
2094 // check that all is well (don't check in exit paths since its more complicated)
2095 debug_only( pageValidate(); )
2096 debug_only( resourceConsistencyCheck(); )
2101 * Write a jump table for the given SwitchInfo and store the table
2102 * address in the SwitchInfo. Every entry will initially point to
2105 void Assembler::emitJumpTable(SwitchInfo
* si
, NIns
* target
)
2107 si
->table
= (NIns
**) alloc
.alloc(si
->count
* sizeof(NIns
*));
2108 for (uint32_t i
= 0; i
< si
->count
; ++i
)
2109 si
->table
[i
] = target
;
2112 void Assembler::assignSavedRegs()
2114 // Restore saved regsters.
2115 LirBuffer
*b
= _thisfrag
->lirbuf
;
2116 for (int i
=0, n
= NumSavedRegs
; i
< n
; i
++) {
2117 LIns
*p
= b
->savedRegs
[i
];
2119 findSpecificRegForUnallocated(p
, savedRegs
[p
->paramArg()]);
2123 void Assembler::reserveSavedRegs()
2125 LirBuffer
*b
= _thisfrag
->lirbuf
;
2126 for (int i
= 0, n
= NumSavedRegs
; i
< n
; i
++) {
2127 LIns
*ins
= b
->savedRegs
[i
];
2133 void Assembler::assignParamRegs()
2135 LIns
* state
= _thisfrag
->lirbuf
->state
;
2137 findSpecificRegForUnallocated(state
, argRegs
[state
->paramArg()]);
2138 LIns
* param1
= _thisfrag
->lirbuf
->param1
;
2140 findSpecificRegForUnallocated(param1
, argRegs
[param1
->paramArg()]);
2143 void Assembler::handleLoopCarriedExprs(InsList
& pending_lives
)
2145 // ensure that exprs spanning the loop are marked live at the end of the loop
2147 for (Seq
<LIns
*> *p
= pending_lives
.get(); p
!= NULL
; p
= p
->tail
) {
2148 LIns
*ins
= p
->head
;
2149 NanoAssert(isLiveOpcode(ins
->opcode()));
2150 LIns
*op1
= ins
->oprnd1();
2151 // Must findMemFor even if we're going to findRegFor; loop-carried
2152 // operands may spill on another edge, and we need them to always
2153 // spill to the same place.
2154 #if NJ_USES_IMMD_POOL
2155 // Exception: if float constants are true constants, we should
2156 // never call findMemFor on those ops.
2162 if (!op1
->isImmAny())
2163 findRegFor(op1
, ins
->isop(LIR_lived
) ? FpRegs
: GpRegs
);
2166 // clear this list since we have now dealt with those lifetimes. extending
2167 // their lifetimes again later (earlier in the code) serves no purpose.
2168 pending_lives
.clear();
2171 void AR::freeEntryAt(uint32_t idx
)
2173 NanoAssert(idx
> 0 && idx
<= _highWaterMark
);
2175 // NB: this loop relies on using entry[0] being NULL,
2176 // so that we are guaranteed to terminate
2177 // without access negative entries.
2178 LIns
* i
= _entries
[idx
];
2179 NanoAssert(i
!= NULL
);
2181 _entries
[idx
] = NULL
;
2183 } while (_entries
[idx
] == i
);
2187 void Assembler::printRegState()
2189 char* s
= &outline
[0];
2190 VMPI_memset(s
, ' ', 26); s
[26] = '\0';
2191 s
+= VMPI_strlen(s
);
2192 VMPI_sprintf(s
, "RR");
2193 s
+= VMPI_strlen(s
);
2195 RegisterMask active
= _allocator
.activeMask();
2196 for (Register r
= lsReg(active
); active
!= 0; r
= nextLsReg(active
, r
)) {
2197 LIns
*ins
= _allocator
.getActive(r
);
2198 NanoAssertMsg(!_allocator
.isFree(r
),
2199 "Coding error; register is both free and active! " );
2201 const char* n
= _thisfrag
->lirbuf
->printer
->formatRef(&b
, ins
);
2203 if (ins
->isop(LIR_paramp
) && ins
->paramKind()==1 &&
2204 r
== Assembler::savedRegs
[ins
->paramArg()])
2206 // dont print callee-saved regs that arent used
2210 VMPI_sprintf(s
, " %s(%s)", gpn(r
), n
);
2211 s
+= VMPI_strlen(s
);
2216 void Assembler::printActivationState()
2218 char* s
= &outline
[0];
2219 VMPI_memset(s
, ' ', 26); s
[26] = '\0';
2220 s
+= VMPI_strlen(s
);
2221 VMPI_sprintf(s
, "AR");
2222 s
+= VMPI_strlen(s
);
2225 uint32_t nStackSlots
= 0;
2226 int32_t arIndex
= 0;
2227 for (AR::Iter
iter(_activation
); iter
.next(ins
, nStackSlots
, arIndex
); )
2230 const char* n
= _thisfrag
->lirbuf
->printer
->formatRef(&b
, ins
);
2231 if (nStackSlots
> 1) {
2232 VMPI_sprintf(s
," %d-%d(%s)", 4*arIndex
, 4*(arIndex
+nStackSlots
-1), n
);
2235 VMPI_sprintf(s
," %d(%s)", 4*arIndex
, n
);
2237 s
+= VMPI_strlen(s
);
2243 inline bool AR::isEmptyRange(uint32_t start
, uint32_t nStackSlots
) const
2245 for (uint32_t i
=0; i
< nStackSlots
; i
++)
2247 if (_entries
[start
-i
] != NULL
)
2253 uint32_t AR::reserveEntry(LIns
* ins
)
2255 uint32_t const nStackSlots
= nStackSlotsFor(ins
);
2257 if (nStackSlots
== 1)
2259 for (uint32_t i
= 1; i
<= _highWaterMark
; i
++)
2261 if (_entries
[i
] == NULL
)
2267 if (_highWaterMark
< NJ_MAX_STACK_ENTRY
- 1)
2269 NanoAssert(_entries
[_highWaterMark
+1] == BAD_ENTRY
);
2271 _entries
[_highWaterMark
] = ins
;
2272 return _highWaterMark
;
2277 // alloc larger block on 8byte boundary.
2278 uint32_t const start
= nStackSlots
+ (nStackSlots
& 1);
2279 for (uint32_t i
= start
; i
<= _highWaterMark
; i
+= 2)
2281 if (isEmptyRange(i
, nStackSlots
))
2283 // place the entry in the table and mark the instruction with it
2284 for (uint32_t j
=0; j
< nStackSlots
; j
++)
2286 NanoAssert(i
-j
<= _highWaterMark
);
2287 NanoAssert(_entries
[i
-j
] == NULL
);
2288 _entries
[i
-j
] = ins
;
2294 // Be sure to account for any 8-byte-round-up when calculating spaceNeeded.
2295 uint32_t const spaceLeft
= NJ_MAX_STACK_ENTRY
- _highWaterMark
- 1;
2296 uint32_t const spaceNeeded
= nStackSlots
+ (_highWaterMark
& 1);
2297 if (spaceLeft
>= spaceNeeded
)
2299 if (_highWaterMark
& 1)
2301 NanoAssert(_entries
[_highWaterMark
+1] == BAD_ENTRY
);
2302 _entries
[_highWaterMark
+1] = NULL
;
2304 _highWaterMark
+= spaceNeeded
;
2305 for (uint32_t j
= 0; j
< nStackSlots
; j
++)
2307 NanoAssert(_highWaterMark
-j
< NJ_MAX_STACK_ENTRY
);
2308 NanoAssert(_entries
[_highWaterMark
-j
] == BAD_ENTRY
);
2309 _entries
[_highWaterMark
-j
] = ins
;
2311 return _highWaterMark
;
2314 // no space. oh well.
2319 void AR::checkForResourceLeaks() const
2321 for (uint32_t i
= 1; i
<= _highWaterMark
; i
++) {
2322 NanoAssertMsgf(_entries
[i
] == NULL
, "frame entry %d wasn't freed\n",4*i
);
2327 uint32_t Assembler::arReserve(LIns
* ins
)
2329 uint32_t i
= _activation
.reserveEntry(ins
);
2331 setError(StackFull
);
2335 void Assembler::arFree(LIns
* ins
)
2337 NanoAssert(ins
->isInAr());
2338 uint32_t arIndex
= ins
->getArIndex();
2339 NanoAssert(arIndex
);
2340 NanoAssert(_activation
.isValidEntry(arIndex
, ins
));
2341 _activation
.freeEntryAt(arIndex
); // free any stack stack space associated with entry
2345 * Move regs around so the SavedRegs contains the highest priority regs.
2347 void Assembler::evictScratchRegsExcept(RegisterMask ignore
)
2349 // Find the top GpRegs that are candidates to put in SavedRegs.
2351 // 'tosave' is a binary heap stored in an array. The root is tosave[0],
2352 // left child is at i+1, right child is at i+2.
2354 Register tosave
[LastRegNum
- FirstRegNum
+ 1];
2356 RegAlloc
*regs
= &_allocator
;
2357 RegisterMask evict_set
= regs
->activeMask() & GpRegs
& ~ignore
;
2358 for (Register r
= lsReg(evict_set
); evict_set
; r
= nextLsReg(evict_set
, r
)) {
2359 LIns
*ins
= regs
->getActive(r
);
2360 if (canRemat(ins
)) {
2361 NanoAssert(ins
->getReg() == r
);
2365 int32_t pri
= regs
->getPriority(r
);
2366 // add to heap by adding to end and bubbling up
2368 while (j
> 0 && pri
> regs
->getPriority(tosave
[j
/2])) {
2369 tosave
[j
] = tosave
[j
/2];
2372 NanoAssert(size_t(j
) < sizeof(tosave
)/sizeof(tosave
[0]));
2377 // Now primap has the live exprs in priority order.
2378 // Allocate each of the top priority exprs to a SavedReg.
2380 RegisterMask allow
= SavedRegs
;
2381 while (allow
&& len
> 0) {
2382 // get the highest priority var
2383 Register hi
= tosave
[0];
2384 if (!(rmask(hi
) & SavedRegs
)) {
2385 LIns
*ins
= regs
->getActive(hi
);
2386 Register r
= findRegFor(ins
, allow
);
2390 // hi is already in a saved reg, leave it alone.
2391 allow
&= ~rmask(hi
);
2394 // remove from heap by replacing root with end element and bubbling down.
2395 if (allow
&& --len
> 0) {
2396 Register last
= tosave
[len
];
2400 if (j
+2 < len
&& regs
->getPriority(tosave
[j
+2]) > regs
->getPriority(tosave
[j
+1]))
2402 if (regs
->getPriority(last
) > regs
->getPriority(tosave
[child
]))
2404 tosave
[j
] = tosave
[child
];
2411 // now evict everything else.
2412 evictSomeActiveRegs(~(SavedRegs
| ignore
));
2415 // Generate code to restore any registers in 'regs' that are currently active,
2416 void Assembler::evictSomeActiveRegs(RegisterMask regs
)
2418 RegisterMask evict_set
= regs
& _allocator
.activeMask();
2419 for (Register r
= lsReg(evict_set
); evict_set
; r
= nextLsReg(evict_set
, r
))
2420 evict(_allocator
.getActive(r
));
2424 * Merge the current regstate with a previously stored version.
2426 * Situation Change to _allocator
2427 * --------- --------------------
2429 * !current & saved add saved
2430 * current & !saved evict current (unionRegisterState does nothing)
2431 * current & saved & current==saved
2432 * current & saved & current!=saved evict current, add saved
2434 void Assembler::intersectRegisterState(RegAlloc
& saved
)
2436 Register regsTodo
[LastRegNum
+ 1];
2437 LIns
* insTodo
[LastRegNum
+ 1];
2440 // Do evictions and pops first.
2441 verbose_only(bool shouldMention
=false; )
2442 // The obvious thing to do here is to iterate from FirstRegNum to
2443 // LastRegNum. However, on ARM that causes lower-numbered integer
2444 // registers to be be saved at higher addresses, which inhibits the
2445 // formation of load/store multiple instructions. Hence iterate the
2446 // loop the other way.
2447 RegisterMask reg_set
= _allocator
.activeMask() | saved
.activeMask();
2448 for (Register r
= msReg(reg_set
); reg_set
; r
= nextMsReg(reg_set
, r
))
2450 LIns
* curins
= _allocator
.getActive(r
);
2451 LIns
* savedins
= saved
.getActive(r
);
2452 if (curins
!= savedins
)
2455 regsTodo
[nTodo
] = r
;
2456 insTodo
[nTodo
] = savedins
;
2460 //_nvprof("intersect-evict",1);
2461 verbose_only( shouldMention
=true; )
2462 NanoAssert(curins
->getReg() == r
);
2467 if (savedins
&& r
== FST0
) {
2468 verbose_only( shouldMention
=true; )
2474 // Now reassign mainline registers.
2475 for (int i
= 0; i
< nTodo
; i
++) {
2476 findSpecificRegFor(insTodo
[i
], regsTodo
[i
]);
2480 verbose_outputf("## merging registers (intersect) with existing edge");
2485 * Merge the current state of the registers with a previously stored version.
2487 * Situation Change to _allocator
2488 * --------- --------------------
2489 * !current & !saved none
2490 * !current & saved add saved
2491 * current & !saved none (intersectRegisterState evicts current)
2492 * current & saved & current==saved none
2493 * current & saved & current!=saved evict current, add saved
2495 void Assembler::unionRegisterState(RegAlloc
& saved
)
2497 Register regsTodo
[LastRegNum
+ 1];
2498 LIns
* insTodo
[LastRegNum
+ 1];
2501 // Do evictions and pops first.
2502 verbose_only(bool shouldMention
=false; )
2503 RegisterMask reg_set
= _allocator
.activeMask() | saved
.activeMask();
2504 for (Register r
= lsReg(reg_set
); reg_set
; r
= nextLsReg(reg_set
, r
))
2506 LIns
* curins
= _allocator
.getActive(r
);
2507 LIns
* savedins
= saved
.getActive(r
);
2508 if (curins
!= savedins
)
2511 regsTodo
[nTodo
] = r
;
2512 insTodo
[nTodo
] = savedins
;
2515 if (curins
&& savedins
) {
2516 //_nvprof("union-evict",1);
2517 verbose_only( shouldMention
=true; )
2518 NanoAssert(curins
->getReg() == r
);
2525 // Discard top of x87 stack.
2529 // Saved state did not have fpu reg allocated,
2530 // so we must evict here to keep x87 stack balanced.
2533 verbose_only( shouldMention
=true; )
2538 // Now reassign mainline registers.
2539 for (int i
= 0; i
< nTodo
; i
++) {
2540 findSpecificRegFor(insTodo
[i
], regsTodo
[i
]);
2544 verbose_outputf("## merging registers (union) with existing edge");
2548 // Scan table for instruction with the lowest priority, meaning it is used
2549 // furthest in the future.
2550 LIns
* Assembler::findVictim(RegisterMask allow
)
2553 LIns
*ins
, *vic
= 0;
2554 int allow_pri
= 0x7fffffff;
2555 RegisterMask vic_set
= allow
& _allocator
.activeMask();
2556 for (Register r
= lsReg(vic_set
); vic_set
; r
= nextLsReg(vic_set
, r
))
2558 ins
= _allocator
.getActive(r
);
2559 int pri
= canRemat(ins
) ? 0 : _allocator
.getPriority(r
);
2560 if (!vic
|| pri
< allow_pri
) {
2565 NanoAssert(vic
!= 0);
2570 char Assembler::outline
[8192];
2571 char Assembler::outlineEOL
[512];
2573 void Assembler::output()
2575 // The +1 is for the terminating NUL char.
2576 VMPI_strncat(outline
, outlineEOL
, sizeof(outline
)-(strlen(outline
)+1));
2579 char* str
= new (alloc
) char[VMPI_strlen(outline
)+1];
2580 VMPI_strcpy(str
, outline
);
2581 _outputCache
->insert(str
);
2583 _logc
->printf("%s\n", outline
);
2587 outlineEOL
[0] = '\0';
2590 void Assembler::outputf(const char* format
, ...)
2593 va_start(args
, format
);
2596 vsprintf(outline
, format
, args
);
2600 void Assembler::setOutputForEOL(const char* format
, ...)
2603 va_start(args
, format
);
2605 outlineEOL
[0] = '\0';
2606 vsprintf(outlineEOL
, format
, args
);
2608 #endif // NJ_VERBOSE
2610 void LabelStateMap::add(LIns
*label
, NIns
*addr
, RegAlloc
®s
) {
2611 LabelState
*st
= new (alloc
) LabelState(addr
, regs
);
2612 labels
.put(label
, st
);
2615 LabelState
* LabelStateMap::get(LIns
*label
) {
2616 return labels
.get(label
);
2619 #endif /* FEATURE_NANOJIT */