1 /* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */
2 /* vi: set ts=4 sw=4 expandtab: (add to ~/.vimrc: set modeline modelines=5) */
3 /* ***** BEGIN LICENSE BLOCK *****
4 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 1.1 (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
9 * http://www.mozilla.org/MPL/
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
16 * The Original Code is [Open Source Virtual Machine].
18 * The Initial Developer of the Original Code is
19 * Adobe System Incorporated.
20 * Portions created by the Initial Developer are Copyright (C) 2004-2007
21 * the Initial Developer. All Rights Reserved.
26 * Alternatively, the contents of this file may be used under the terms of
27 * either the GNU General Public License Version 2 or later (the "GPL"), or
28 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29 * in which case the provisions of the GPL or the LGPL are applicable instead
30 * of those above. If you wish to allow use of your version of this file only
31 * under the terms of either the GPL or the LGPL, and not to allow others to
32 * use your version of this file under the terms of the MPL, indicate your
33 * decision by deleting the provisions above and replace them with the notice
34 * and other provisions required by the GPL or the LGPL. If you do not delete
35 * the provisions above, a recipient may use your version of this file under
36 * the terms of any one of the MPL, the GPL or the LGPL.
38 * ***** END LICENSE BLOCK ***** */
42 #ifdef FEATURE_NANOJIT
45 #include "../core/CodegenLIR.h"
49 // disable some specific warnings which are normally useful, but pervasive in the code-gen macros
50 #pragma warning(disable:4310) // cast truncates constant value
55 using namespace nanojit
;
56 void vtuneStart(void*, NIns
*);
57 void vtuneEnd(void*, NIns
*);
58 void vtuneLine(void*, int, NIns
*);
59 void vtuneFile(void*, void*);
61 using namespace vtune
;
70 * - merging paths ( build a graph? ), possibly use external rep to drive codegen
72 Assembler::Assembler(CodeAlloc
& codeAlloc
, Allocator
& dataAlloc
, Allocator
& alloc
, AvmCore
* core
, LogControl
* logc
, const Config
& config
)
75 , _codeAlloc(codeAlloc
)
76 , _dataAlloc(dataAlloc
)
78 , _branchStateMap(alloc
)
96 verbose_only( _logc
= logc
; )
97 verbose_only( _outputCache
= 0; )
98 verbose_only( outline
[0] = '\0'; )
99 verbose_only( outlineEOL
[0] = '\0'; )
104 // Per-opcode register hint table. Default to no hints for all
105 // instructions. It's not marked const because individual back-ends can
106 // install hint values for opcodes of interest in nInit().
107 RegisterMask
Assembler::nHints
[LIR_sentinel
+1] = {
108 #define OP___(op, number, repKind, retType, isCse) \
110 #include "LIRopcode.tbl"
117 /*static*/ LIns
* const AR::BAD_ENTRY
= (LIns
*)0xdeadbeef;
119 void AR::validateQuick()
121 NanoAssert(_highWaterMark
< NJ_MAX_STACK_ENTRY
);
122 NanoAssert(_entries
[0] == NULL
);
123 // Only check a few entries around _highWaterMark.
124 uint32_t const RADIUS
= 4;
125 uint32_t const lo
= (_highWaterMark
> 1 + RADIUS
? _highWaterMark
- RADIUS
: 1);
126 uint32_t const hi
= (_highWaterMark
+ 1 + RADIUS
< NJ_MAX_STACK_ENTRY
? _highWaterMark
+ 1 + RADIUS
: NJ_MAX_STACK_ENTRY
);
127 for (uint32_t i
= lo
; i
<= _highWaterMark
; ++i
)
128 NanoAssert(_entries
[i
] != BAD_ENTRY
);
129 for (uint32_t i
= _highWaterMark
+1; i
< hi
; ++i
)
130 NanoAssert(_entries
[i
] == BAD_ENTRY
);
133 void AR::validateFull()
135 NanoAssert(_highWaterMark
< NJ_MAX_STACK_ENTRY
);
136 NanoAssert(_entries
[0] == NULL
);
137 for (uint32_t i
= 1; i
<= _highWaterMark
; ++i
)
138 NanoAssert(_entries
[i
] != BAD_ENTRY
);
139 for (uint32_t i
= _highWaterMark
+1; i
< NJ_MAX_STACK_ENTRY
; ++i
)
140 NanoAssert(_entries
[i
] == BAD_ENTRY
);
145 static uint32_t validateCounter
= 0;
146 if (++validateCounter
>= 100)
159 inline void AR::clear()
162 NanoAssert(_entries
[0] == NULL
);
164 for (uint32_t i
= 1; i
< NJ_MAX_STACK_ENTRY
; ++i
)
165 _entries
[i
] = BAD_ENTRY
;
169 bool AR::Iter::next(LIns
*& ins
, uint32_t& nStackSlots
, int32_t& arIndex
)
171 while (_i
<= _ar
._highWaterMark
) {
172 ins
= _ar
._entries
[_i
];
175 nStackSlots
= nStackSlotsFor(ins
);
187 void Assembler::arReset()
190 _branchStateMap
.clear();
193 #if NJ_USES_IMMD_POOL
198 void Assembler::registerResetAll()
200 nRegisterResetAll(_allocator
);
201 _allocator
.managed
= _allocator
.free
;
203 // At start, should have some registers free and none active.
204 NanoAssert(0 != _allocator
.free
);
205 NanoAssert(0 == _allocator
.activeMask());
207 debug_only(_fpuStkDepth
= 0; )
211 // Legend for register sets: A = allowed, P = preferred, F = free, S = SavedReg.
213 // Finds a register in 'setA___' to store the result of 'ins' (one from
214 // 'set_P__' if possible), evicting one if necessary. Doesn't consider
215 // the prior state of 'ins'.
217 // Nb: 'setA___' comes from the instruction's use, 'set_P__' comes from its def.
218 // Eg. in 'add(call(...), ...)':
219 // - the call's use means setA___==GpRegs;
220 // - the call's def means set_P__==rmask(retRegs[0]).
222 Register
Assembler::registerAlloc(LIns
* ins
, RegisterMask setA___
, RegisterMask set_P__
)
225 RegisterMask set__F_
= _allocator
.free
;
226 RegisterMask setA_F_
= setA___
& set__F_
;
229 RegisterMask set___S
= SavedRegs
;
230 RegisterMask setA_FS
= setA_F_
& set___S
;
231 RegisterMask setAPF_
= setA_F_
& set_P__
;
232 RegisterMask setAPFS
= setA_FS
& set_P__
;
235 if (setAPFS
) set
= setAPFS
;
236 else if (setAPF_
) set
= setAPF_
;
237 else if (setA_FS
) set
= setA_FS
;
240 r
= nRegisterAllocFromSet(set
);
241 _allocator
.addActive(r
, ins
);
244 // Nothing free, steal one.
245 // LSRA says pick the one with the furthest use.
246 LIns
* vic
= findVictim(setA___
);
247 NanoAssert(vic
->isInReg());
252 // r ends up staying active, but the LIns defining it changes.
253 _allocator
.removeFree(r
);
254 _allocator
.addActive(r
, ins
);
261 // Finds a register in 'allow' to store a temporary value (one not
262 // associated with a particular LIns), evicting one if necessary. The
263 // returned register is marked as being free and so can only be safely
264 // used for code generation purposes until the regstate is next inspected
266 Register
Assembler::registerAllocTmp(RegisterMask allow
)
269 Register r
= registerAlloc(&dummyIns
, allow
, /*prefer*/0);
271 // Mark r as free, ready for use as a temporary value.
272 _allocator
.removeActive(r
);
273 _allocator
.addFree(r
);
277 void Assembler::codeAlloc(NIns
*&start
, NIns
*&end
, NIns
*&eip
278 verbose_only(, size_t &nBytes
))
280 // save the block we just filled
282 CodeAlloc::add(codeList
, start
, end
);
284 // CodeAlloc contract: allocations never fail
285 _codeAlloc
.alloc(start
, end
);
286 verbose_only( nBytes
+= (end
- start
) * sizeof(NIns
); )
287 NanoAssert(uintptr_t(end
) - uintptr_t(start
) >= (size_t)LARGEST_UNDERRUN_PROT
);
291 void Assembler::reset()
295 codeStart
= codeEnd
= 0;
296 exitStart
= exitEnd
= 0;
305 void Assembler::pageValidate()
308 // This may be a normal code chunk or an exit code chunk.
309 NanoAssertMsg(codeStart
<= _nIns
&& _nIns
<= codeEnd
,
310 "Native instruction pointer overstep paging bounds; check overrideProtect for last instruction");
316 bool AR::isValidEntry(uint32_t idx
, LIns
* ins
) const
318 return idx
> 0 && idx
<= _highWaterMark
&& _entries
[idx
] == ins
;
321 void AR::checkForResourceConsistency(const RegAlloc
& regs
)
324 for (uint32_t i
= 1; i
<= _highWaterMark
; ++i
)
326 LIns
* ins
= _entries
[i
];
329 uint32_t arIndex
= ins
->getArIndex();
330 NanoAssert(arIndex
!= 0);
331 if (ins
->isop(LIR_allocp
)) {
332 int const n
= i
+ (ins
->size()>>2);
333 for (int j
=i
+1; j
< n
; j
++) {
334 NanoAssert(_entries
[j
]==ins
);
336 NanoAssert(arIndex
== (uint32_t)n
-1);
339 else if (ins
->isQorD()) {
340 NanoAssert(_entries
[i
+ 1]==ins
);
341 i
+= 1; // skip high word
344 NanoAssertMsg(arIndex
== i
, "Stack record index mismatch");
346 NanoAssertMsg(!ins
->isInReg() || regs
.isConsistent(ins
->getReg(), ins
),
347 "Register record mismatch");
351 void Assembler::resourceConsistencyCheck()
353 NanoAssert(!error());
355 // Within the expansion of a single LIR instruction, we may use the x87
356 // stack for unmanaged temporaries. Otherwise, we do not use the x87 stack
357 // as such, but use the top element alone as a single allocatable FP register.
358 // Compensation code must be inserted to keep the stack balanced and avoid
359 // overflow, and the mechanisms for this are rather fragile and IA32-specific.
360 // The predicate below should hold between any pair of instructions within
361 // a basic block, at labels, and just after a conditional branch. Currently,
362 // we enforce this condition between all pairs of instructions, but this is
363 // overly restrictive, and would fail if we did not generate unreachable x87
364 // stack pops following unconditional branches.
365 NanoAssert((_allocator
.active
[FST0
] && _fpuStkDepth
== -1) ||
366 (!_allocator
.active
[FST0
] && _fpuStkDepth
== 0));
368 _activation
.checkForResourceConsistency(_allocator
);
369 registerConsistencyCheck();
372 void Assembler::registerConsistencyCheck()
374 RegisterMask managed
= _allocator
.managed
;
375 for (Register r
= lsReg(managed
); managed
; r
= nextLsReg(managed
, r
)) {
376 // A register managed by register allocation must be either
377 // free or active, but not both.
378 if (_allocator
.isFree(r
)) {
379 NanoAssertMsgf(_allocator
.getActive(r
)==0,
380 "register %s is free but assigned to ins", gpn(r
));
382 // An LIns defining a register must have that register in
384 LIns
* ins
= _allocator
.getActive(r
);
386 NanoAssertMsg(r
== ins
->getReg(), "Register record mismatch");
390 RegisterMask not_managed
= ~_allocator
.managed
;
391 for (Register r
= lsReg(not_managed
); not_managed
; r
= nextLsReg(not_managed
, r
)) {
392 // A register not managed by register allocation must be
393 // neither free nor active.
395 NanoAssert(!_allocator
.isFree(r
));
396 NanoAssert(!_allocator
.getActive(r
));
402 void Assembler::findRegFor2(RegisterMask allowa
, LIns
* ia
, Register
& ra
,
403 RegisterMask allowb
, LIns
* ib
, Register
& rb
)
405 // There should be some overlap between 'allowa' and 'allowb', else
406 // there's no point calling this function.
407 NanoAssert(allowa
& allowb
);
410 ra
= rb
= findRegFor(ia
, allowa
& allowb
); // use intersection(allowa, allowb)
412 } else if (ib
->isInRegMask(allowb
)) {
413 // 'ib' is already in an allowable reg -- don't let it get evicted
414 // when finding 'ra'.
416 ra
= findRegFor(ia
, allowa
& ~rmask(rb
));
419 ra
= findRegFor(ia
, allowa
);
420 rb
= findRegFor(ib
, allowb
& ~rmask(ra
));
424 Register
Assembler::findSpecificRegFor(LIns
* i
, Register w
)
426 return findRegFor(i
, rmask(w
));
429 // Like findRegFor(), but called when the LIns is used as a pointer. It
430 // doesn't have to be called, findRegFor() can still be used, but it can
431 // optimize the LIR_allocp case by indexing off FP, thus saving the use of
434 Register
Assembler::getBaseReg(LIns
* base
, int &d
, RegisterMask allow
)
437 if (base
->isop(LIR_allocp
)) {
438 // The value of a LIR_allocp is a pointer to its stack memory,
439 // which is always relative to FP. So we can just return FP if we
440 // also adjust 'd' (and can do so in a valid manner). Or, in the
441 // PEDANTIC case, we can just assign a register as normal;
442 // findRegFor() will allocate the stack memory for LIR_allocp if
444 d
+= findMemFor(base
);
450 return findRegFor(base
, allow
);
453 // Like findRegFor2(), but used for stores where the base value has the
454 // same type as the stored value, eg. in asm_store32() on 32-bit platforms
455 // and asm_store64() on 64-bit platforms. Similar to getBaseReg(),
456 // findRegFor2() can be called instead, but this function can optimize the
457 // case where the base value is a LIR_allocp.
458 void Assembler::getBaseReg2(RegisterMask allowValue
, LIns
* value
, Register
& rv
,
459 RegisterMask allowBase
, LIns
* base
, Register
& rb
, int &d
)
462 if (base
->isop(LIR_allocp
)) {
464 d
+= findMemFor(base
);
465 rv
= findRegFor(value
, allowValue
);
471 findRegFor2(allowValue
, value
, rv
, allowBase
, base
, rb
);
474 RegisterMask
Assembler::hint(LIns
* ins
)
476 RegisterMask prefer
= nHints
[ins
->opcode()];
477 return (prefer
== PREFER_SPECIAL
) ? nHint(ins
) : prefer
;
480 // Finds a register in 'allow' to hold the result of 'ins'. Used when we
481 // encounter a use of 'ins'. The actions depend on the prior regstate of
483 // - If the result of 'ins' is not in any register, we find an allowed
484 // one, evicting one if necessary.
485 // - If the result of 'ins' is already in an allowed register, we use that.
486 // - If the result of 'ins' is already in a not-allowed register, we find an
487 // allowed one and move it.
489 Register
Assembler::findRegFor(LIns
* ins
, RegisterMask allow
)
491 if (ins
->isop(LIR_allocp
)) {
492 // Never allocate a reg for this without stack space too.
498 if (!ins
->isInReg()) {
499 // 'ins' isn't in a register (must be in a spill slot or nowhere).
500 r
= registerAlloc(ins
, allow
, hint(ins
));
502 } else if (rmask(r
= ins
->getReg()) & allow
) {
503 // 'ins' is in an allowed register.
504 _allocator
.useActive(r
);
507 // 'ins' is in a register (r) that's not in 'allow'.
509 if (((rmask(r
)&XmmRegs
) && !(allow
&XmmRegs
)) ||
510 ((rmask(r
)&x87Regs
) && !(allow
&x87Regs
)))
512 // x87 <-> xmm copy required
513 //_nvprof("fpu-evict",1);
515 r
= registerAlloc(ins
, allow
, hint(ins
));
517 #elif defined(NANOJIT_PPC) || defined(NANOJIT_MIPS)
518 if (((rmask(r
)&GpRegs
) && !(allow
&GpRegs
)) ||
519 ((rmask(r
)&FpRegs
) && !(allow
&FpRegs
)))
522 r
= registerAlloc(ins
, allow
, hint(ins
));
526 // The post-state register holding 'ins' is 's', the pre-state
527 // register holding 'ins' is 'r'. For example, if s=eax and
530 // pre-state: ecx(ins)
531 // instruction: mov eax, ecx
532 // post-state: eax(ins)
535 _allocator
.retire(r
);
536 r
= registerAlloc(ins
, allow
, hint(ins
));
538 // 'ins' is in 'allow', in register r (different to the old r);
540 if ((rmask(s
) & GpRegs
) && (rmask(r
) & GpRegs
)) {
541 MR(s
, r
); // move 'ins' from its pre-state reg (r) to its post-state reg (s)
543 asm_nongp_copy(s
, r
);
551 // Like findSpecificRegFor(), but only for when 'r' is known to be free
552 // and 'ins' is known to not already have a register allocated. Updates
553 // the regstate (maintaining the invariants) but does not generate any
554 // code. The return value is redundant, always being 'r', but it's
555 // sometimes useful to have it there for assignments.
556 Register
Assembler::findSpecificRegForUnallocated(LIns
* ins
, Register r
)
558 if (ins
->isop(LIR_allocp
)) {
559 // never allocate a reg for this w/out stack space too
563 NanoAssert(!ins
->isInReg());
564 NanoAssert(_allocator
.free
& rmask(r
));
567 _allocator
.removeFree(r
);
568 _allocator
.addActive(r
, ins
);
573 #if NJ_USES_IMMD_POOL
574 const uint64_t* Assembler::findImmDFromPool(uint64_t q
)
576 uint64_t* p
= _immDPool
.get(q
);
579 p
= new (_dataAlloc
) uint64_t;
587 int Assembler::findMemFor(LIns
*ins
)
589 #if NJ_USES_IMMD_POOL
590 NanoAssert(!ins
->isImmD());
592 if (!ins
->isInAr()) {
593 uint32_t const arIndex
= arReserve(ins
);
594 ins
->setArIndex(arIndex
);
595 NanoAssert(_activation
.isValidEntry(ins
->getArIndex(), ins
) == (arIndex
!= 0));
600 // XXX: this function is dangerous and should be phased out;
601 // See bug 513615. Calls to it should replaced it with a
602 // prepareResultReg() / generate code / freeResourcesOf() sequence.
603 Register
Assembler::deprecated_prepResultReg(LIns
*ins
, RegisterMask allow
)
606 // We used to have to worry about possibly popping the x87 stack here.
607 // But this function is no longer used on i386, and this assertion
611 Register r
= findRegFor(ins
, allow
);
612 deprecated_freeRsrcOf(ins
);
616 // Finds a register in 'allow' to hold the result of 'ins'. Also
617 // generates code to spill the result if necessary. Called just prior to
618 // generating the code for 'ins' (because we generate code backwards).
620 // An example where no spill is necessary. Lines marked '*' are those
621 // done by this function.
624 // asm: define res into r
625 // * regstate: R + r(res)
629 // An example where a spill is necessary.
632 // asm: define res into r
633 // * regstate: R + r(res)
634 // * asm: spill res from r
637 // asm: restore res into r2
638 // regstate: R + r2(res) + other changes from "..."
639 // asm: use res in r2
641 Register
Assembler::prepareResultReg(LIns
*ins
, RegisterMask allow
)
643 // At this point, we know the result of 'ins' is used later in the
644 // code, unless it is a call to an impure function that must be
645 // included for effect even though its result is ignored. It may have
646 // had to be evicted, in which case the restore will have already been
647 // generated, so we now generate the spill. QUERY: Is there any attempt
648 // to elide the spill if we know that all restores can be rematerialized?
650 const bool notInFST0
= (!ins
->isInReg() || ins
->getReg() != FST0
);
651 Register r
= findRegFor(ins
, allow
);
652 // If the result register is FST0, but FST0 is not in the post-regstate,
653 // then we must pop the x87 stack. This may occur because the result is
654 // unused, or because it has been stored to a spill slot or an XMM register.
655 const bool needPop
= notInFST0
&& (r
== FST0
);
656 const bool didSpill
= asm_maybe_spill(ins
, needPop
);
657 if (!didSpill
&& needPop
) {
658 // If the instruction is spilled, then the pop will have already
659 // been performed by the store to the stack slot. Otherwise, we
660 // must pop now. This may occur when the result of a LIR_calld
661 // to an impure (side-effecting) function is not used.
665 Register r
= findRegFor(ins
, allow
);
666 asm_maybe_spill(ins
, false);
671 bool Assembler::asm_maybe_spill(LIns
* ins
, bool pop
)
675 Register r
= ins
->getReg();
676 verbose_only( RefBuf b
;
677 if (_logc
->lcbits
& LC_Native
) {
678 setOutputForEOL(" <= spill %s",
679 _thisfrag
->lirbuf
->printer
->formatRef(&b
, ins
)); } )
681 asm_spill(r
, d
, pop
);
684 asm_spill(r
, d
, ins
->isQorD());
691 // XXX: This function is error-prone and should be phased out; see bug 513615.
692 void Assembler::deprecated_freeRsrcOf(LIns
*ins
)
694 if (ins
->isInReg()) {
695 asm_maybe_spill(ins
, /*pop*/false);
696 _allocator
.retire(ins
->getReg()); // free any register associated with entry
700 arFree(ins
); // free any AR space associated with entry
705 // Frees all record of registers and spill slots used by 'ins'.
706 void Assembler::freeResourcesOf(LIns
*ins
)
708 if (ins
->isInReg()) {
709 _allocator
.retire(ins
->getReg()); // free any register associated with entry
713 arFree(ins
); // free any AR space associated with entry
718 // Frees 'r' in the RegAlloc regstate, if it's not already free.
719 void Assembler::evictIfActive(Register r
)
721 if (LIns
* vic
= _allocator
.getActive(r
)) {
722 NanoAssert(vic
->getReg() == r
);
727 // Frees 'r' (which currently holds the result of 'vic') in the regstate.
730 // pre-regstate: eax(ld1)
731 // instruction: mov ebx,-4(ebp) <= restore add1 # %ebx is dest
732 // post-regstate: eax(ld1) ebx(add1)
734 // At run-time we are *restoring* 'add1' into %ebx, hence the call to
735 // asm_restore(). But at regalloc-time we are moving backwards through
736 // the code, so in that sense we are *evicting* 'add1' from %ebx.
738 void Assembler::evict(LIns
* vic
)
740 // Not free, need to steal.
741 Register r
= vic
->getReg();
743 NanoAssert(!_allocator
.isFree(r
));
744 NanoAssert(vic
== _allocator
.getActive(r
));
746 verbose_only( RefBuf b
;
747 if (_logc
->lcbits
& LC_Native
) {
748 setOutputForEOL(" <= restore %s",
749 _thisfrag
->lirbuf
->printer
->formatRef(&b
, vic
)); } )
752 _allocator
.retire(r
);
755 // At this point 'vic' is unused (if rematerializable), or in a spill
759 void Assembler::patch(GuardRecord
*lr
)
761 if (!lr
->jmp
) // the guard might have been eliminated as redundant
763 Fragment
*frag
= lr
->exit
->target
;
764 NanoAssert(frag
->fragEntry
!= 0);
765 nPatchBranch((NIns
*)lr
->jmp
, frag
->fragEntry
);
766 CodeAlloc::flushICache(lr
->jmp
, LARGEST_BRANCH_PATCH
);
767 verbose_only(verbose_outputf("patching jump at %p to target %p\n",
768 lr
->jmp
, frag
->fragEntry
);)
771 void Assembler::patch(SideExit
*exit
)
773 GuardRecord
*rec
= exit
->guards
;
782 void Assembler::patch(SideExit
* exit
, SwitchInfo
* si
)
784 for (GuardRecord
* lr
= exit
->guards
; lr
; lr
= lr
->next
) {
785 Fragment
*frag
= lr
->exit
->target
;
786 NanoAssert(frag
->fragEntry
!= 0);
787 si
->table
[si
->index
] = frag
->fragEntry
;
792 NIns
* Assembler::asm_exit(LIns
* guard
)
794 SideExit
*exit
= guard
->record()->exit
;
796 if (!_branchStateMap
.get(exit
))
798 at
= asm_leave_trace(guard
);
802 RegAlloc
* captured
= _branchStateMap
.get(exit
);
803 intersectRegisterState(*captured
);
804 at
= exit
->target
->fragEntry
;
806 _branchStateMap
.remove(exit
);
811 NIns
* Assembler::asm_leave_trace(LIns
* guard
)
813 verbose_only( verbose_outputf("----------------------------------- ## END exit block %p", guard
);)
815 // This point is unreachable. So free all the registers. If an
816 // instruction has a stack entry we will leave it alone, otherwise we
817 // free it entirely. intersectRegisterState() will restore.
818 RegAlloc capture
= _allocator
;
825 debug_only( _sv_fpuStkDepth
= _fpuStkDepth
; _fpuStkDepth
= 0; )
830 // Restore the callee-saved register and parameters.
834 intersectRegisterState(capture
);
836 // this can be useful for breaking whenever an exit is taken
840 // we are done producing the exit logic for the guard so demark where our exit block code begins
841 NIns
* jmpTarget
= _nIns
; // target in exit path for our mainline conditional jump
843 // swap back pointers, effectively storing the last location used in the exit path
847 //verbose_only( verbose_outputf(" LIR_xt/xf swapCodeChunks, _nIns is now %08X(%08X), _nExitIns is now %08X(%08X)",_nIns, *_nIns,_nExitIns,*_nExitIns) );
848 verbose_only( verbose_outputf("%p:", jmpTarget
);)
849 verbose_only( verbose_outputf("----------------------------------- ## BEGIN exit block (LIR_xt|LIR_xf)") );
852 NanoAssertMsgf(_fpuStkDepth
== _sv_fpuStkDepth
, "LIR_xtf, _fpuStkDepth=%d, expect %d",_fpuStkDepth
, _sv_fpuStkDepth
);
853 debug_only( _fpuStkDepth
= _sv_fpuStkDepth
; _sv_fpuStkDepth
= 9999; )
859 void Assembler::compile(Fragment
* frag
, Allocator
& alloc
, bool optimize
verbose_only(, LInsPrinter
* printer
))
862 bool anyVerb
= (_logc
->lcbits
& 0xFFFF & ~LC_FragProfile
) > 0;
863 bool liveVerb
= (_logc
->lcbits
& 0xFFFF & LC_Liveness
) > 0;
866 /* BEGIN decorative preamble */
869 _logc
->printf("========================================"
870 "========================================\n");
871 _logc
->printf("=== BEGIN LIR::compile(%p, %p)\n",
872 (void*)this, (void*)frag
);
873 _logc
->printf("===\n");
875 /* END decorative preamble */
877 verbose_only( if (liveVerb
) {
879 _logc
->printf("=== Results of liveness analysis:\n");
880 _logc
->printf("===\n");
881 LirReader
br(frag
->lastIns
);
882 LirFilter
* lir
= &br
;
884 StackFilter
* sf
= new (alloc
) StackFilter(lir
, alloc
, frag
->lirbuf
->sp
);
887 live(lir
, alloc
, frag
, _logc
);
890 /* Set up the generic text output cache for the assembler */
891 verbose_only( StringList
asmOutput(alloc
); )
892 verbose_only( _outputCache
= &asmOutput
; )
898 //_logc->printf("recompile trigger %X kind %d\n", (int)frag, frag->kind);
900 verbose_only( if (anyVerb
) {
901 _logc
->printf("=== Translating LIR fragments into assembly:\n");
904 // now the the main trunk
905 verbose_only( RefBuf b
; )
906 verbose_only( if (anyVerb
) {
907 _logc
->printf("=== -- Compile trunk %s: begin\n", printer
->formatAddr(&b
, frag
));
910 // Used for debug printing, if needed
911 debug_only(ValidateReader
*validate
= NULL
;)
913 ReverseLister
*pp_init
= NULL
;
914 ReverseLister
*pp_after_sf
= NULL
;
917 // The LIR passes through these filters as listed in this
918 // function, viz, top to bottom.
920 // set up backwards pipeline: assembler <- StackFilter <- LirReader
921 LirFilter
* lir
= new (alloc
) LirReader(frag
->lastIns
);
925 validate
= new (alloc
) ValidateReader(lir
);
930 verbose_only( if (_logc
->lcbits
& LC_ReadLIR
) {
931 pp_init
= new (alloc
) ReverseLister(lir
, alloc
, frag
->lirbuf
->printer
, _logc
,
938 StackFilter
* stackfilter
= new (alloc
) StackFilter(lir
, alloc
, frag
->lirbuf
->sp
);
942 verbose_only( if (_logc
->lcbits
& LC_AfterSF
) {
943 pp_after_sf
= new (alloc
) ReverseLister(lir
, alloc
, frag
->lirbuf
->printer
, _logc
,
944 "After StackFilter");
950 // If we were accumulating debug info in the various ReverseListers,
951 // call finish() to emit whatever contents they have accumulated.
953 if (pp_init
) pp_init
->finish();
954 if (pp_after_sf
) pp_after_sf
->finish();
957 verbose_only( if (anyVerb
) {
958 _logc
->printf("=== -- Compile trunk %s: end\n", printer
->formatAddr(&b
, frag
));
963 // Reverse output so that assembly is displayed low-to-high.
964 // Up to this point, _outputCache has been non-NULL, and so has been
965 // accumulating output. Now we set it to NULL, traverse the entire
966 // list of stored strings, and hand them a second time to output.
967 // Since _outputCache is now NULL, outputf just hands these strings
968 // directly onwards to _logc->printf.
969 verbose_only( if (anyVerb
) {
971 _logc
->printf("=== Aggregated assembly output: BEGIN\n");
972 _logc
->printf("===\n");
974 for (Seq
<char*>* p
= asmOutput
.get(); p
!= NULL
; p
= p
->tail
) {
978 _logc
->printf("===\n");
979 _logc
->printf("=== Aggregated assembly output: END\n");
985 verbose_only( frag
->nCodeBytes
+= codeBytes
; )
986 verbose_only( frag
->nExitBytes
+= exitBytes
; )
988 /* BEGIN decorative postamble */
989 verbose_only( if (anyVerb
) {
991 _logc
->printf("===\n");
992 _logc
->printf("=== END LIR::compile(%p, %p)\n",
993 (void*)this, (void*)frag
);
994 _logc
->printf("========================================"
995 "========================================\n");
998 /* END decorative postamble */
1001 void Assembler::beginAssembly(Fragment
*frag
)
1003 verbose_only( codeBytes
= 0; )
1004 verbose_only( exitBytes
= 0; )
1008 NanoAssert(codeList
== 0);
1009 NanoAssert(codeStart
== 0);
1010 NanoAssert(codeEnd
== 0);
1011 NanoAssert(exitStart
== 0);
1012 NanoAssert(exitEnd
== 0);
1013 NanoAssert(_nIns
== 0);
1014 NanoAssert(_nExitIns
== 0);
1021 // native code gen buffer setup
1024 // make sure we got memory at least one page
1025 if (error()) return;
1032 void Assembler::assemble(Fragment
* frag
, LirFilter
* reader
)
1034 if (error()) return;
1037 // check the fragment is starting out with a sane profiling state
1038 verbose_only( NanoAssert(frag
->nStaticExits
== 0); )
1039 verbose_only( NanoAssert(frag
->nCodeBytes
== 0); )
1040 verbose_only( NanoAssert(frag
->nExitBytes
== 0); )
1041 verbose_only( NanoAssert(frag
->profCount
== 0); )
1042 verbose_only( if (_logc
->lcbits
& LC_FragProfile
)
1043 NanoAssert(frag
->profFragID
> 0);
1045 NanoAssert(frag
->profFragID
== 0); )
1052 // patch all branches
1053 NInsMap::Iter
iter(_patches
);
1054 while (iter
.next()) {
1055 NIns
* where
= iter
.key();
1056 LIns
* target
= iter
.value();
1057 if (target
->isop(LIR_jtbl
)) {
1058 // Need to patch up a whole jump table, 'where' is the table.
1059 LIns
*jtbl
= target
;
1060 NIns
** native_table
= (NIns
**) (void *) where
;
1061 for (uint32_t i
= 0, n
= jtbl
->getTableSize(); i
< n
; i
++) {
1062 LabelState
* lstate
= _labels
.get(jtbl
->getTarget(i
));
1063 NIns
* ntarget
= lstate
->addr
;
1065 native_table
[i
] = ntarget
;
1067 setError(UnknownBranch
);
1072 // target is a label for a single-target branch
1073 LabelState
*lstate
= _labels
.get(target
);
1074 NIns
* ntarget
= lstate
->addr
;
1076 nPatchBranch(where
, ntarget
);
1078 setError(UnknownBranch
);
1086 void Assembler::endAssembly(Fragment
* frag
)
1088 // don't try to patch code if we are in an error state since we might have partially
1089 // overwritten the code cache already
1091 // something went wrong, release all allocated code memory
1092 _codeAlloc
.freeAll(codeList
);
1094 _codeAlloc
.free(exitStart
, exitEnd
);
1095 _codeAlloc
.free(codeStart
, codeEnd
);
1100 NIns
* fragEntry
= genPrologue();
1101 verbose_only( asm_output("[prologue]"); )
1103 debug_only(_activation
.checkForResourceLeaks());
1105 NanoAssert(!_inExit
);
1106 // save used parts of current block on fragment's code list, free the rest
1107 #if defined(NANOJIT_ARM) || defined(NANOJIT_MIPS)
1108 // [codeStart, _nSlot) ... gap ... [_nIns, codeEnd)
1110 _codeAlloc
.addRemainder(codeList
, exitStart
, exitEnd
, _nExitSlot
, _nExitIns
);
1111 verbose_only( exitBytes
-= (_nExitIns
- _nExitSlot
) * sizeof(NIns
); )
1113 _codeAlloc
.addRemainder(codeList
, codeStart
, codeEnd
, _nSlot
, _nIns
);
1114 verbose_only( codeBytes
-= (_nIns
- _nSlot
) * sizeof(NIns
); )
1116 // [codeStart ... gap ... [_nIns, codeEnd))
1118 _codeAlloc
.addRemainder(codeList
, exitStart
, exitEnd
, exitStart
, _nExitIns
);
1119 verbose_only( exitBytes
-= (_nExitIns
- exitStart
) * sizeof(NIns
); )
1121 _codeAlloc
.addRemainder(codeList
, codeStart
, codeEnd
, codeStart
, _nIns
);
1122 verbose_only( codeBytes
-= (_nIns
- codeStart
) * sizeof(NIns
); )
1125 // at this point all our new code is in the d-cache and not the i-cache,
1126 // so flush the i-cache on cpu's that need it.
1127 CodeAlloc::flushICache(codeList
);
1129 // save entry point pointers
1130 frag
->fragEntry
= fragEntry
;
1131 frag
->setCode(_nIns
);
1136 vtuneEnd(vtuneHandle
, codeEnd
);
1137 vtuneStart(vtuneHandle
, _nIns
);
1141 PERFM_NVPROF("code", CodeAlloc::size(codeList
));
1144 NanoAssertMsgf(_fpuStkDepth
== 0,"_fpuStkDepth %d\n",_fpuStkDepth
);
1147 debug_only( pageValidate(); )
1148 NanoAssert(_branchStateMap
.isEmpty());
1151 void Assembler::releaseRegisters()
1153 RegisterMask active
= _allocator
.activeMask();
1154 for (Register r
= lsReg(active
); active
; r
= nextLsReg(active
, r
))
1156 LIns
*ins
= _allocator
.getActive(r
);
1157 // Clear reg allocation, preserve stack allocation.
1158 _allocator
.retire(r
);
1159 NanoAssert(r
== ins
->getReg());
1165 #define countlir_live() _nvprof("lir-live",1)
1166 #define countlir_ret() _nvprof("lir-ret",1)
1167 #define countlir_alloc() _nvprof("lir-alloc",1)
1168 #define countlir_var() _nvprof("lir-var",1)
1169 #define countlir_use() _nvprof("lir-use",1)
1170 #define countlir_def() _nvprof("lir-def",1)
1171 #define countlir_imm() _nvprof("lir-imm",1)
1172 #define countlir_param() _nvprof("lir-param",1)
1173 #define countlir_cmov() _nvprof("lir-cmov",1)
1174 #define countlir_ld() _nvprof("lir-ld",1)
1175 #define countlir_ldq() _nvprof("lir-ldq",1)
1176 #define countlir_alu() _nvprof("lir-alu",1)
1177 #define countlir_qjoin() _nvprof("lir-qjoin",1)
1178 #define countlir_qlo() _nvprof("lir-qlo",1)
1179 #define countlir_qhi() _nvprof("lir-qhi",1)
1180 #define countlir_fpu() _nvprof("lir-fpu",1)
1181 #define countlir_st() _nvprof("lir-st",1)
1182 #define countlir_stq() _nvprof("lir-stq",1)
1183 #define countlir_jmp() _nvprof("lir-jmp",1)
1184 #define countlir_jcc() _nvprof("lir-jcc",1)
1185 #define countlir_label() _nvprof("lir-label",1)
1186 #define countlir_xcc() _nvprof("lir-xcc",1)
1187 #define countlir_x() _nvprof("lir-x",1)
1188 #define countlir_call() _nvprof("lir-call",1)
1189 #define countlir_jtbl() _nvprof("lir-jtbl",1)
1191 #define countlir_live()
1192 #define countlir_ret()
1193 #define countlir_alloc()
1194 #define countlir_var()
1195 #define countlir_use()
1196 #define countlir_def()
1197 #define countlir_imm()
1198 #define countlir_param()
1199 #define countlir_cmov()
1200 #define countlir_ld()
1201 #define countlir_ldq()
1202 #define countlir_alu()
1203 #define countlir_qjoin()
1204 #define countlir_qlo()
1205 #define countlir_qhi()
1206 #define countlir_fpu()
1207 #define countlir_st()
1208 #define countlir_stq()
1209 #define countlir_jmp()
1210 #define countlir_jcc()
1211 #define countlir_label()
1212 #define countlir_xcc()
1213 #define countlir_x()
1214 #define countlir_call()
1215 #define countlir_jtbl()
1218 void Assembler::asm_jmp(LIns
* ins
, InsList
& pending_lives
)
1220 NanoAssert((ins
->isop(LIR_j
) && !ins
->oprnd1()) ||
1221 (ins
->isop(LIR_jf
) && ins
->oprnd1()->isImmI(0)) ||
1222 (ins
->isop(LIR_jt
) && ins
->oprnd1()->isImmI(1)));
1225 LIns
* to
= ins
->getTarget();
1226 LabelState
*label
= _labels
.get(to
);
1227 // The jump is always taken so whatever register state we
1228 // have from downstream code, is irrelevant to code before
1229 // this jump. So clear it out. We will pick up register
1230 // state from the jump target, if we have seen that label.
1233 // Unreachable, so assume correct stack depth.
1234 debug_only( _fpuStkDepth
= 0; )
1236 if (label
&& label
->addr
) {
1237 // Forward jump - pick up register state from target.
1238 unionRegisterState(label
->regs
);
1240 // Set stack depth according to the register state we just loaded,
1241 // negating the effect of any unreachable x87 stack pop that might
1242 // have been emitted by unionRegisterState().
1243 debug_only( _fpuStkDepth
= (_allocator
.getActive(FST0
) ? -1 : 0); )
1249 handleLoopCarriedExprs(pending_lives
);
1251 // save empty register state at loop header
1252 _labels
.add(to
, 0, _allocator
);
1255 intersectRegisterState(label
->regs
);
1257 debug_only( _fpuStkDepth
= (_allocator
.getActive(FST0
) ? -1 : 0); )
1261 _patches
.put(_nIns
, to
);
1265 void Assembler::asm_jcc(LIns
* ins
, InsList
& pending_lives
)
1267 bool branchOnFalse
= (ins
->opcode() == LIR_jf
);
1268 LIns
* cond
= ins
->oprnd1();
1269 if (cond
->isImmI()) {
1270 if ((!branchOnFalse
&& !cond
->immI()) || (branchOnFalse
&& cond
->immI())) {
1271 // jmp never taken, not needed
1273 asm_jmp(ins
, pending_lives
); // jmp always taken
1278 // Changes to the logic below will likely need to be propagated to Assembler::asm_jov().
1281 LIns
* to
= ins
->getTarget();
1282 LabelState
*label
= _labels
.get(to
);
1283 if (label
&& label
->addr
) {
1284 // Forward jump to known label. Need to merge with label's register state.
1285 unionRegisterState(label
->regs
);
1286 asm_branch(branchOnFalse
, cond
, label
->addr
);
1290 handleLoopCarriedExprs(pending_lives
);
1292 // Evict all registers, most conservative approach.
1293 evictAllActiveRegs();
1294 _labels
.add(to
, 0, _allocator
);
1297 // Evict all registers, most conservative approach.
1298 intersectRegisterState(label
->regs
);
1300 NIns
*branch
= asm_branch(branchOnFalse
, cond
, 0);
1301 _patches
.put(branch
,to
);
1305 void Assembler::asm_jov(LIns
* ins
, InsList
& pending_lives
)
1307 // The caller is responsible for countlir_* profiling, unlike
1308 // asm_jcc above. The reason for this is that asm_jov may not be
1309 // be called if the instruction is dead, and it is our convention
1310 // to count such instructions anyway.
1311 LOpcode op
= ins
->opcode();
1312 LIns
* to
= ins
->getTarget();
1313 LabelState
*label
= _labels
.get(to
);
1314 if (label
&& label
->addr
) {
1315 // forward jump to known label. need to merge with label's register state.
1316 unionRegisterState(label
->regs
);
1317 asm_branch_ov(op
, label
->addr
);
1321 handleLoopCarriedExprs(pending_lives
);
1323 // evict all registers, most conservative approach.
1324 evictAllActiveRegs();
1325 _labels
.add(to
, 0, _allocator
);
1328 // evict all registers, most conservative approach.
1329 intersectRegisterState(label
->regs
);
1331 NIns
*branch
= asm_branch_ov(op
, 0);
1332 _patches
.put(branch
,to
);
1336 void Assembler::asm_x(LIns
* ins
)
1338 verbose_only( _thisfrag
->nStaticExits
++; )
1340 // Generate the side exit branch on the main trace.
1341 NIns
*exit
= asm_exit(ins
);
1345 void Assembler::asm_xcc(LIns
* ins
)
1347 LIns
* cond
= ins
->oprnd1();
1348 if (cond
->isImmI()) {
1349 if ((ins
->isop(LIR_xt
) && !cond
->immI()) || (ins
->isop(LIR_xf
) && cond
->immI())) {
1350 // guard never taken, not needed
1352 asm_x(ins
); // guard always taken
1357 verbose_only( _thisfrag
->nStaticExits
++; )
1359 // We only support cmp with guard right now, also assume it is 'close'
1360 // and only emit the branch.
1361 NIns
* exit
= asm_exit(ins
); // does intersectRegisterState()
1362 asm_branch(ins
->opcode() == LIR_xf
, cond
, exit
);
1365 void Assembler::gen(LirFilter
* reader
)
1367 NanoAssert(_thisfrag
->nStaticExits
== 0);
1369 InsList
pending_lives(alloc
);
1371 NanoAssert(!error());
1373 // What's going on here: we're visiting all the LIR instructions in
1374 // the buffer, working strictly backwards in buffer-order, and
1375 // generating machine instructions for them as we go.
1377 // For each LIns, we first check if it's live. If so we mark its
1378 // operands as also live, and then generate code for it *if
1379 // necessary*. It may not be necessary if the instruction is an
1380 // expression and code has already been generated for all its uses in
1381 // combination with previously handled instructions (ins->isExtant()
1382 // will return false if this is so).
1384 // Note that the backwards code traversal can make register allocation
1385 // confusing. (For example, we restore a value before we spill it!)
1386 // In particular, words like "before" and "after" must be used very
1387 // carefully -- their meaning at regalloc-time is opposite to their
1388 // meaning at run-time. We use the term "pre-regstate" to refer to
1389 // the register allocation state that occurs prior to an instruction's
1390 // execution, and "post-regstate" to refer to the state that occurs
1391 // after an instruction's execution, e.g.:
1393 // pre-regstate: ebx(ins)
1394 // instruction: mov eax, ebx // mov dst, src
1395 // post-regstate: eax(ins)
1397 // At run-time, the instruction updates the pre-regstate into the
1398 // post-regstate (and these states are the real machine's regstates).
1399 // But when allocating registers, because we go backwards, the
1400 // pre-regstate is constructed from the post-regstate (and these
1401 // regstates are those stored in RegAlloc).
1403 // One consequence of generating code backwards is that we tend to
1404 // both spill and restore registers as early (at run-time) as
1405 // possible; this is good for tolerating memory latency. If we
1406 // generated code forwards, we would expect to both spill and restore
1407 // registers as late (at run-time) as possible; this might be better
1408 // for reducing register pressure.
1410 // The trace must end with one of these opcodes. Mark it as live.
1411 NanoAssert(reader
->finalIns()->isop(LIR_x
) ||
1412 reader
->finalIns()->isop(LIR_xtbl
) ||
1413 reader
->finalIns()->isRet() ||
1414 isLiveOpcode(reader
->finalIns()->opcode()));
1416 for (currIns
= reader
->read(); !currIns
->isop(LIR_start
); currIns
= reader
->read())
1418 LIns
* ins
= currIns
; // give it a shorter name for local use
1420 if (!ins
->isLive()) {
1421 NanoAssert(!ins
->isExtant());
1426 // Output the post-regstate (registers and/or activation).
1427 // Because asm output comes in reverse order, doing it now means
1428 // it is printed after the LIR and native code, exactly when the
1429 // post-regstate should be shown.
1430 if ((_logc
->lcbits
& LC_Native
) && (_logc
->lcbits
& LC_Activation
))
1431 printActivationState();
1432 if ((_logc
->lcbits
& LC_Native
) && (_logc
->lcbits
& LC_RegAlloc
))
1436 LOpcode op
= ins
->opcode();
1440 NanoAssertMsgf(false, "unsupported LIR instruction: %d\n", op
);
1444 evictAllActiveRegs();
1451 LIns
* op1
= ins
->oprnd1();
1452 op1
->setResultLive();
1453 // LIR_allocp's are meant to live until the point of the
1454 // LIR_livep instruction, marking other expressions as
1455 // live ensures that they remain so at loop bottoms.
1456 // LIR_allocp areas require special treatment because they
1457 // are accessed indirectly and the indirect accesses are
1458 // invisible to the assembler, other than via LIR_livep.
1459 // Other expression results are only accessed directly in
1460 // ways that are visible to the assembler, so extending
1461 // those expression's lifetimes past the last loop edge
1463 if (op1
->isop(LIR_allocp
)) {
1466 pending_lives
.add(ins
);
1475 ins
->oprnd1()->setResultLive();
1479 // Allocate some stack space. The value of this instruction
1480 // is the address of the stack space.
1483 if (ins
->isExtant()) {
1484 NanoAssert(ins
->isInAr());
1487 freeResourcesOf(ins
);
1493 if (ins
->isExtant()) {
1498 #ifdef NANOJIT_64BIT
1501 if (ins
->isExtant()) {
1508 if (ins
->isExtant()) {
1515 if (ins
->isExtant()) {
1520 #if NJ_SOFTFLOAT_SUPPORTED
1522 LIns
* op1
= ins
->oprnd1();
1523 op1
->setResultLive();
1524 if (ins
->isExtant()) {
1525 // Return result of quad-call in register.
1526 deprecated_prepResultReg(ins
, rmask(retRegs
[1]));
1527 // If hi half was used, we must use the call to ensure it happens.
1528 findSpecificRegFor(op1
, retRegs
[0]);
1535 ins
->oprnd1()->setResultLive();
1536 if (ins
->isExtant()) {
1543 ins
->oprnd1()->setResultLive();
1544 if (ins
->isExtant()) {
1551 ins
->oprnd1()->setResultLive();
1552 ins
->oprnd2()->setResultLive();
1553 if (ins
->isExtant()) {
1562 ins
->oprnd1()->setResultLive();
1563 ins
->oprnd2()->setResultLive();
1564 ins
->oprnd3()->setResultLive();
1565 if (ins
->isExtant()) {
1576 ins
->oprnd1()->setResultLive();
1577 if (ins
->isExtant()) {
1586 ins
->oprnd1()->setResultLive();
1587 if (ins
->isExtant()) {
1595 ins
->oprnd1()->setResultLive();
1596 if (ins
->isExtant()) {
1601 #if defined NANOJIT_64BIT
1611 ins
->oprnd1()->setResultLive();
1612 ins
->oprnd2()->setResultLive();
1613 if (ins
->isExtant()) {
1630 ins
->oprnd1()->setResultLive();
1631 ins
->oprnd2()->setResultLive();
1632 if (ins
->isExtant()) {
1637 #if defined NANOJIT_IA32 || defined NANOJIT_X64
1640 ins
->oprnd1()->setResultLive();
1641 if (ins
->isExtant()) {
1649 ins
->oprnd1()->setResultLive();
1650 if (ins
->isExtant()) {
1660 ins
->oprnd1()->setResultLive();
1661 ins
->oprnd2()->setResultLive();
1662 if (ins
->isExtant()) {
1669 ins
->oprnd1()->setResultLive();
1670 if (ins
->isExtant()) {
1677 ins
->oprnd1()->setResultLive();
1678 if (ins
->isExtant()) {
1685 ins
->oprnd1()->setResultLive();
1686 if (ins
->isExtant()) {
1691 #ifdef NANOJIT_64BIT
1695 ins
->oprnd1()->setResultLive();
1696 if (ins
->isExtant()) {
1703 ins
->oprnd1()->setResultLive();
1704 if (ins
->isExtant()) {
1711 ins
->oprnd1()->setResultLive();
1712 if (ins
->isExtant()) {
1719 ins
->oprnd1()->setResultLive();
1720 if (ins
->isExtant()) {
1729 ins
->oprnd1()->setResultLive();
1730 ins
->oprnd2()->setResultLive();
1731 asm_store32(op
, ins
->oprnd1(), ins
->disp(), ins
->oprnd2());
1738 ins
->oprnd1()->setResultLive();
1739 ins
->oprnd2()->setResultLive();
1740 LIns
* value
= ins
->oprnd1();
1741 LIns
* base
= ins
->oprnd2();
1742 int dr
= ins
->disp();
1743 #if NJ_SOFTFLOAT_SUPPORTED
1744 if (value
->isop(LIR_ii2d
) && op
== LIR_std
)
1746 // This is correct for little-endian only.
1747 asm_store32(LIR_sti
, value
->oprnd1(), dr
, base
);
1748 asm_store32(LIR_sti
, value
->oprnd2(), dr
+4, base
);
1753 asm_store64(op
, value
, dr
, base
);
1759 asm_jmp(ins
, pending_lives
);
1764 ins
->oprnd1()->setResultLive();
1765 asm_jcc(ins
, pending_lives
);
1768 #if NJ_JTBL_SUPPORTED
1771 ins
->oprnd1()->setResultLive();
1772 // Multiway jump can contain both forward and backward jumps.
1773 // Out of range indices aren't allowed or checked.
1774 // Code after this jtbl instruction is unreachable.
1776 NanoAssert(_allocator
.activeMask() == 0);
1778 uint32_t count
= ins
->getTableSize();
1779 bool has_back_edges
= false;
1781 // Merge the regstates of labels we have already seen.
1782 for (uint32_t i
= count
; i
-- > 0;) {
1783 LIns
* to
= ins
->getTarget(i
);
1784 LabelState
*lstate
= _labels
.get(to
);
1786 unionRegisterState(lstate
->regs
);
1787 verbose_only( RefBuf b
; )
1788 asm_output(" %u: [&%s]", i
, _thisfrag
->lirbuf
->printer
->formatRef(&b
, to
));
1790 has_back_edges
= true;
1793 asm_output("forward edges");
1795 // In a multi-way jump, the register allocator has no ability to deal
1796 // with two existing edges that have conflicting register assignments, unlike
1797 // a conditional branch where code can be inserted on the fall-through path
1798 // to reconcile registers. So, frontends *must* insert LIR_regfence at labels of
1799 // forward jtbl jumps. Check here to make sure no registers were picked up from
1800 // any forward edges.
1801 NanoAssert(_allocator
.activeMask() == 0);
1803 if (has_back_edges
) {
1804 handleLoopCarriedExprs(pending_lives
);
1805 // save merged (empty) register state at target labels we haven't seen yet
1806 for (uint32_t i
= count
; i
-- > 0;) {
1807 LIns
* to
= ins
->getTarget(i
);
1808 LabelState
*lstate
= _labels
.get(to
);
1810 _labels
.add(to
, 0, _allocator
);
1811 verbose_only( RefBuf b
; )
1812 asm_output(" %u: [&%s]", i
, _thisfrag
->lirbuf
->printer
->formatRef(&b
, to
));
1815 asm_output("backward edges");
1818 // Emit the jump instruction, which allocates 1 register for the jump index.
1819 NIns
** native_table
= new (_dataAlloc
) NIns
*[count
];
1820 asm_output("[%p]:", (void*)native_table
);
1821 _patches
.put((NIns
*)native_table
, ins
);
1822 asm_jtbl(ins
, native_table
);
1829 LabelState
*label
= _labels
.get(ins
);
1830 // add profiling inc, if necessary.
1831 verbose_only( if (_logc
->lcbits
& LC_FragProfile
) {
1832 if (ins
== _thisfrag
->loopLabel
)
1833 asm_inc_m32(& _thisfrag
->profCount
);
1836 // label seen first, normal target of forward jump, save addr & allocator
1837 _labels
.add(ins
, _nIns
, _allocator
);
1840 // we're at the top of a loop
1841 NanoAssert(label
->addr
== 0);
1842 //evictAllActiveRegs();
1843 intersectRegisterState(label
->regs
);
1844 label
->addr
= _nIns
;
1848 if (_logc
->lcbits
& LC_Native
) {
1849 asm_output("[%s]", _thisfrag
->lirbuf
->printer
->formatRef(&b
, ins
));
1858 ins
->oprnd1()->setResultLive();
1860 NIns
* exit
= asm_exit(ins
); // does intersectRegisterState()
1861 asm_switch(ins
, exit
);
1863 NanoAssertMsg(0, "Not supported for this architecture");
1870 ins
->oprnd1()->setResultLive();
1881 verbose_only( _thisfrag
->nStaticExits
++; )
1884 ins
->oprnd1()->setResultLive();
1885 ins
->oprnd2()->setResultLive();
1886 if (ins
->isExtant()) {
1887 NIns
* exit
= asm_exit(ins
); // does intersectRegisterState()
1888 asm_branch_ov(op
, exit
);
1898 ins
->oprnd1()->setResultLive();
1899 ins
->oprnd2()->setResultLive();
1900 if (ins
->isExtant()) {
1901 asm_jov(ins
, pending_lives
);
1906 #ifdef NANOJIT_64BIT
1911 ins
->oprnd1()->setResultLive();
1912 ins
->oprnd2()->setResultLive();
1913 if (ins
->isExtant()) {
1914 asm_jov(ins
, pending_lives
);
1926 ins
->oprnd1()->setResultLive();
1927 ins
->oprnd2()->setResultLive();
1928 if (ins
->isExtant()) {
1952 ins
->oprnd1()->setResultLive();
1953 ins
->oprnd2()->setResultLive();
1954 if (ins
->isExtant()) {
1963 for (int i
= 0, argc
= ins
->argc(); i
< argc
; i
++)
1964 ins
->arg(i
)->setResultLive();
1965 // It must be impure or pure-and-extant -- it couldn't be
1966 // pure-and-not-extant, because there's no way the codegen
1967 // for a call can be folded into the codegen of another
1969 NanoAssert(!ins
->callInfo()->_isPure
|| ins
->isExtant());
1975 // we traverse backwards so we are now hitting the file
1976 // that is associated with a bunch of LIR_lines we already have seen
1978 void * currentFile
= (void *) ins
->oprnd1()->immI();
1979 vtuneFile(vtuneHandle
, currentFile
);
1984 // add a new table entry, we don't yet knwo which file it belongs
1985 // to so we need to add it to the update table too
1986 // note the alloc, actual act is delayed; see above
1988 uint32_t currentLine
= (uint32_t) ins
->oprnd1()->immI();
1989 vtuneLine(vtuneHandle
, currentLine
, _nIns
);
1993 #endif // VMCFG_VTUNE
1998 // We do final LIR printing inside this loop to avoid printing
1999 // dead LIR instructions. We print the LIns after generating the
2000 // code. This ensures that the LIns will appear in debug output
2001 // *before* the native code, because Assembler::outputf()
2002 // prints everything in reverse.
2004 if (_logc
->lcbits
& LC_AfterDCE
) {
2006 LInsPrinter
* printer
= _thisfrag
->lirbuf
->printer
;
2007 outputf(" %s", printer
->formatIns(&b
, ins
));
2014 // check that all is well (don't check in exit paths since its more complicated)
2015 debug_only( pageValidate(); )
2016 debug_only( resourceConsistencyCheck(); )
2021 * Write a jump table for the given SwitchInfo and store the table
2022 * address in the SwitchInfo. Every entry will initially point to
2025 void Assembler::emitJumpTable(SwitchInfo
* si
, NIns
* target
)
2027 si
->table
= (NIns
**) alloc
.alloc(si
->count
* sizeof(NIns
*));
2028 for (uint32_t i
= 0; i
< si
->count
; ++i
)
2029 si
->table
[i
] = target
;
2032 void Assembler::assignSavedRegs()
2034 // Restore saved regsters.
2035 LirBuffer
*b
= _thisfrag
->lirbuf
;
2036 for (int i
=0, n
= NumSavedRegs
; i
< n
; i
++) {
2037 LIns
*p
= b
->savedRegs
[i
];
2039 findSpecificRegForUnallocated(p
, savedRegs
[p
->paramArg()]);
2043 void Assembler::reserveSavedRegs()
2045 LirBuffer
*b
= _thisfrag
->lirbuf
;
2046 for (int i
= 0, n
= NumSavedRegs
; i
< n
; i
++) {
2047 LIns
*ins
= b
->savedRegs
[i
];
2053 void Assembler::assignParamRegs()
2055 LIns
* state
= _thisfrag
->lirbuf
->state
;
2057 findSpecificRegForUnallocated(state
, argRegs
[state
->paramArg()]);
2058 LIns
* param1
= _thisfrag
->lirbuf
->param1
;
2060 findSpecificRegForUnallocated(param1
, argRegs
[param1
->paramArg()]);
2063 void Assembler::handleLoopCarriedExprs(InsList
& pending_lives
)
2065 // ensure that exprs spanning the loop are marked live at the end of the loop
2067 for (Seq
<LIns
*> *p
= pending_lives
.get(); p
!= NULL
; p
= p
->tail
) {
2068 LIns
*ins
= p
->head
;
2069 NanoAssert(isLiveOpcode(ins
->opcode()));
2070 LIns
*op1
= ins
->oprnd1();
2071 // Must findMemFor even if we're going to findRegFor; loop-carried
2072 // operands may spill on another edge, and we need them to always
2073 // spill to the same place.
2074 #if NJ_USES_IMMD_POOL
2075 // Exception: if float constants are true constants, we should
2076 // never call findMemFor on those ops.
2082 if (!op1
->isImmAny())
2083 findRegFor(op1
, ins
->isop(LIR_lived
) ? FpRegs
: GpRegs
);
2086 // clear this list since we have now dealt with those lifetimes. extending
2087 // their lifetimes again later (earlier in the code) serves no purpose.
2088 pending_lives
.clear();
2091 void AR::freeEntryAt(uint32_t idx
)
2093 NanoAssert(idx
> 0 && idx
<= _highWaterMark
);
2095 // NB: this loop relies on using entry[0] being NULL,
2096 // so that we are guaranteed to terminate
2097 // without access negative entries.
2098 LIns
* i
= _entries
[idx
];
2099 NanoAssert(i
!= NULL
);
2101 _entries
[idx
] = NULL
;
2103 } while (_entries
[idx
] == i
);
2107 void Assembler::printRegState()
2109 char* s
= &outline
[0];
2110 VMPI_memset(s
, ' ', 26); s
[26] = '\0';
2111 s
+= VMPI_strlen(s
);
2112 VMPI_sprintf(s
, "RR");
2113 s
+= VMPI_strlen(s
);
2115 RegisterMask active
= _allocator
.activeMask();
2116 for (Register r
= lsReg(active
); active
!= 0; r
= nextLsReg(active
, r
)) {
2117 LIns
*ins
= _allocator
.getActive(r
);
2118 NanoAssertMsg(!_allocator
.isFree(r
),
2119 "Coding error; register is both free and active! " );
2121 const char* n
= _thisfrag
->lirbuf
->printer
->formatRef(&b
, ins
);
2123 if (ins
->isop(LIR_paramp
) && ins
->paramKind()==1 &&
2124 r
== Assembler::savedRegs
[ins
->paramArg()])
2126 // dont print callee-saved regs that arent used
2130 VMPI_sprintf(s
, " %s(%s)", gpn(r
), n
);
2131 s
+= VMPI_strlen(s
);
2136 void Assembler::printActivationState()
2138 char* s
= &outline
[0];
2139 VMPI_memset(s
, ' ', 26); s
[26] = '\0';
2140 s
+= VMPI_strlen(s
);
2141 VMPI_sprintf(s
, "AR");
2142 s
+= VMPI_strlen(s
);
2145 uint32_t nStackSlots
= 0;
2146 int32_t arIndex
= 0;
2147 for (AR::Iter
iter(_activation
); iter
.next(ins
, nStackSlots
, arIndex
); )
2150 const char* n
= _thisfrag
->lirbuf
->printer
->formatRef(&b
, ins
);
2151 if (nStackSlots
> 1) {
2152 VMPI_sprintf(s
," %d-%d(%s)", 4*arIndex
, 4*(arIndex
+nStackSlots
-1), n
);
2155 VMPI_sprintf(s
," %d(%s)", 4*arIndex
, n
);
2157 s
+= VMPI_strlen(s
);
2163 inline bool AR::isEmptyRange(uint32_t start
, uint32_t nStackSlots
) const
2165 for (uint32_t i
=0; i
< nStackSlots
; i
++)
2167 if (_entries
[start
-i
] != NULL
)
2173 uint32_t AR::reserveEntry(LIns
* ins
)
2175 uint32_t const nStackSlots
= nStackSlotsFor(ins
);
2177 if (nStackSlots
== 1)
2179 for (uint32_t i
= 1; i
<= _highWaterMark
; i
++)
2181 if (_entries
[i
] == NULL
)
2187 if (_highWaterMark
< NJ_MAX_STACK_ENTRY
- 1)
2189 NanoAssert(_entries
[_highWaterMark
+1] == BAD_ENTRY
);
2191 _entries
[_highWaterMark
] = ins
;
2192 return _highWaterMark
;
2197 // alloc larger block on 8byte boundary.
2198 uint32_t const start
= nStackSlots
+ (nStackSlots
& 1);
2199 for (uint32_t i
= start
; i
<= _highWaterMark
; i
+= 2)
2201 if (isEmptyRange(i
, nStackSlots
))
2203 // place the entry in the table and mark the instruction with it
2204 for (uint32_t j
=0; j
< nStackSlots
; j
++)
2206 NanoAssert(i
-j
<= _highWaterMark
);
2207 NanoAssert(_entries
[i
-j
] == NULL
);
2208 _entries
[i
-j
] = ins
;
2214 // Be sure to account for any 8-byte-round-up when calculating spaceNeeded.
2215 uint32_t const spaceLeft
= NJ_MAX_STACK_ENTRY
- _highWaterMark
- 1;
2216 uint32_t const spaceNeeded
= nStackSlots
+ (_highWaterMark
& 1);
2217 if (spaceLeft
>= spaceNeeded
)
2219 if (_highWaterMark
& 1)
2221 NanoAssert(_entries
[_highWaterMark
+1] == BAD_ENTRY
);
2222 _entries
[_highWaterMark
+1] = NULL
;
2224 _highWaterMark
+= spaceNeeded
;
2225 for (uint32_t j
= 0; j
< nStackSlots
; j
++)
2227 NanoAssert(_highWaterMark
-j
< NJ_MAX_STACK_ENTRY
);
2228 NanoAssert(_entries
[_highWaterMark
-j
] == BAD_ENTRY
);
2229 _entries
[_highWaterMark
-j
] = ins
;
2231 return _highWaterMark
;
2234 // no space. oh well.
2239 void AR::checkForResourceLeaks() const
2241 for (uint32_t i
= 1; i
<= _highWaterMark
; i
++) {
2242 NanoAssertMsgf(_entries
[i
] == NULL
, "frame entry %d wasn't freed\n",4*i
);
2247 uint32_t Assembler::arReserve(LIns
* ins
)
2249 uint32_t i
= _activation
.reserveEntry(ins
);
2251 setError(StackFull
);
2255 void Assembler::arFree(LIns
* ins
)
2257 NanoAssert(ins
->isInAr());
2258 uint32_t arIndex
= ins
->getArIndex();
2259 NanoAssert(arIndex
);
2260 NanoAssert(_activation
.isValidEntry(arIndex
, ins
));
2261 _activation
.freeEntryAt(arIndex
); // free any stack stack space associated with entry
2265 * Move regs around so the SavedRegs contains the highest priority regs.
2267 void Assembler::evictScratchRegsExcept(RegisterMask ignore
)
2269 // Find the top GpRegs that are candidates to put in SavedRegs.
2271 // 'tosave' is a binary heap stored in an array. The root is tosave[0],
2272 // left child is at i+1, right child is at i+2.
2274 Register tosave
[LastReg
-FirstReg
+1];
2276 RegAlloc
*regs
= &_allocator
;
2277 RegisterMask evict_set
= regs
->activeMask() & GpRegs
& ~ignore
;
2278 for (Register r
= lsReg(evict_set
); evict_set
; r
= nextLsReg(evict_set
, r
)) {
2279 LIns
*ins
= regs
->getActive(r
);
2280 if (canRemat(ins
)) {
2281 NanoAssert(ins
->getReg() == r
);
2285 int32_t pri
= regs
->getPriority(r
);
2286 // add to heap by adding to end and bubbling up
2288 while (j
> 0 && pri
> regs
->getPriority(tosave
[j
/2])) {
2289 tosave
[j
] = tosave
[j
/2];
2292 NanoAssert(size_t(j
) < sizeof(tosave
)/sizeof(tosave
[0]));
2297 // Now primap has the live exprs in priority order.
2298 // Allocate each of the top priority exprs to a SavedReg.
2300 RegisterMask allow
= SavedRegs
;
2301 while (allow
&& len
> 0) {
2302 // get the highest priority var
2303 Register hi
= tosave
[0];
2304 if (!(rmask(hi
) & SavedRegs
)) {
2305 LIns
*ins
= regs
->getActive(hi
);
2306 Register r
= findRegFor(ins
, allow
);
2310 // hi is already in a saved reg, leave it alone.
2311 allow
&= ~rmask(hi
);
2314 // remove from heap by replacing root with end element and bubbling down.
2315 if (allow
&& --len
> 0) {
2316 Register last
= tosave
[len
];
2320 if (j
+2 < len
&& regs
->getPriority(tosave
[j
+2]) > regs
->getPriority(tosave
[j
+1]))
2322 if (regs
->getPriority(last
) > regs
->getPriority(tosave
[child
]))
2324 tosave
[j
] = tosave
[child
];
2331 // now evict everything else.
2332 evictSomeActiveRegs(~(SavedRegs
| ignore
));
2335 // Generate code to restore any registers in 'regs' that are currently active,
2336 void Assembler::evictSomeActiveRegs(RegisterMask regs
)
2338 RegisterMask evict_set
= regs
& _allocator
.activeMask();
2339 for (Register r
= lsReg(evict_set
); evict_set
; r
= nextLsReg(evict_set
, r
))
2340 evict(_allocator
.getActive(r
));
2344 * Merge the current regstate with a previously stored version.
2346 * Situation Change to _allocator
2347 * --------- --------------------
2349 * !current & saved add saved
2350 * current & !saved evict current (unionRegisterState does nothing)
2351 * current & saved & current==saved
2352 * current & saved & current!=saved evict current, add saved
2354 void Assembler::intersectRegisterState(RegAlloc
& saved
)
2356 Register regsTodo
[LastReg
+ 1];
2357 LIns
* insTodo
[LastReg
+ 1];
2360 // Do evictions and pops first.
2361 verbose_only(bool shouldMention
=false; )
2362 // The obvious thing to do here is to iterate from FirstReg to LastReg.
2363 // However, on ARM that causes lower-numbered integer registers
2364 // to be be saved at higher addresses, which inhibits the formation
2365 // of load/store multiple instructions. Hence iterate the loop the
2367 RegisterMask reg_set
= _allocator
.activeMask() | saved
.activeMask();
2368 for (Register r
= msReg(reg_set
); reg_set
; r
= nextMsReg(reg_set
, r
))
2370 LIns
* curins
= _allocator
.getActive(r
);
2371 LIns
* savedins
= saved
.getActive(r
);
2372 if (curins
!= savedins
)
2375 regsTodo
[nTodo
] = r
;
2376 insTodo
[nTodo
] = savedins
;
2380 //_nvprof("intersect-evict",1);
2381 verbose_only( shouldMention
=true; )
2382 NanoAssert(curins
->getReg() == r
);
2387 if (savedins
&& r
== FST0
) {
2388 verbose_only( shouldMention
=true; )
2394 // Now reassign mainline registers.
2395 for (int i
= 0; i
< nTodo
; i
++) {
2396 findSpecificRegFor(insTodo
[i
], regsTodo
[i
]);
2400 verbose_outputf("## merging registers (intersect) with existing edge");
2405 * Merge the current state of the registers with a previously stored version.
2407 * Situation Change to _allocator
2408 * --------- --------------------
2409 * !current & !saved none
2410 * !current & saved add saved
2411 * current & !saved none (intersectRegisterState evicts current)
2412 * current & saved & current==saved none
2413 * current & saved & current!=saved evict current, add saved
2415 void Assembler::unionRegisterState(RegAlloc
& saved
)
2417 Register regsTodo
[LastReg
+ 1];
2418 LIns
* insTodo
[LastReg
+ 1];
2421 // Do evictions and pops first.
2422 verbose_only(bool shouldMention
=false; )
2423 RegisterMask reg_set
= _allocator
.activeMask() | saved
.activeMask();
2424 for (Register r
= lsReg(reg_set
); reg_set
; r
= nextLsReg(reg_set
, r
))
2426 LIns
* curins
= _allocator
.getActive(r
);
2427 LIns
* savedins
= saved
.getActive(r
);
2428 if (curins
!= savedins
)
2431 regsTodo
[nTodo
] = r
;
2432 insTodo
[nTodo
] = savedins
;
2435 if (curins
&& savedins
) {
2436 //_nvprof("union-evict",1);
2437 verbose_only( shouldMention
=true; )
2438 NanoAssert(curins
->getReg() == r
);
2445 // Discard top of x87 stack.
2449 // Saved state did not have fpu reg allocated,
2450 // so we must evict here to keep x87 stack balanced.
2453 verbose_only( shouldMention
=true; )
2458 // Now reassign mainline registers.
2459 for (int i
= 0; i
< nTodo
; i
++) {
2460 findSpecificRegFor(insTodo
[i
], regsTodo
[i
]);
2464 verbose_outputf("## merging registers (union) with existing edge");
2468 // Scan table for instruction with the lowest priority, meaning it is used
2469 // furthest in the future.
2470 LIns
* Assembler::findVictim(RegisterMask allow
)
2473 LIns
*ins
, *vic
= 0;
2474 int allow_pri
= 0x7fffffff;
2475 RegisterMask vic_set
= allow
& _allocator
.activeMask();
2476 for (Register r
= lsReg(vic_set
); vic_set
; r
= nextLsReg(vic_set
, r
))
2478 ins
= _allocator
.getActive(r
);
2479 int pri
= canRemat(ins
) ? 0 : _allocator
.getPriority(r
);
2480 if (!vic
|| pri
< allow_pri
) {
2485 NanoAssert(vic
!= 0);
2490 char Assembler::outline
[8192];
2491 char Assembler::outlineEOL
[512];
2493 void Assembler::output()
2495 // The +1 is for the terminating NUL char.
2496 VMPI_strncat(outline
, outlineEOL
, sizeof(outline
)-(strlen(outline
)+1));
2499 char* str
= new (alloc
) char[VMPI_strlen(outline
)+1];
2500 VMPI_strcpy(str
, outline
);
2501 _outputCache
->insert(str
);
2503 _logc
->printf("%s\n", outline
);
2507 outlineEOL
[0] = '\0';
2510 void Assembler::outputf(const char* format
, ...)
2513 va_start(args
, format
);
2516 vsprintf(outline
, format
, args
);
2520 void Assembler::setOutputForEOL(const char* format
, ...)
2523 va_start(args
, format
);
2525 outlineEOL
[0] = '\0';
2526 vsprintf(outlineEOL
, format
, args
);
2528 #endif // NJ_VERBOSE
2530 void LabelStateMap::add(LIns
*label
, NIns
*addr
, RegAlloc
®s
) {
2531 LabelState
*st
= new (alloc
) LabelState(addr
, regs
);
2532 labels
.put(label
, st
);
2535 LabelState
* LabelStateMap::get(LIns
*label
) {
2536 return labels
.get(label
);
2539 #endif /* FEATURE_NANOJIT */