1 /* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */
2 /* vi: set ts=4 sw=4 expandtab: (add to ~/.vimrc: set modeline modelines=5) */
3 /* ***** BEGIN LICENSE BLOCK *****
4 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 1.1 (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
9 * http://www.mozilla.org/MPL/
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
16 * The Original Code is [Open Source Virtual Machine].
18 * The Initial Developer of the Original Code is
19 * Adobe System Incorporated.
20 * Portions created by the Initial Developer are Copyright (C) 2004-2007
21 * the Initial Developer. All Rights Reserved.
26 * Alternatively, the contents of this file may be used under the terms of
27 * either the GNU General Public License Version 2 or later (the "GPL"), or
28 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29 * in which case the provisions of the GPL or the LGPL are applicable instead
30 * of those above. If you wish to allow use of your version of this file only
31 * under the terms of either the GPL or the LGPL, and not to allow others to
32 * use your version of this file under the terms of the MPL, indicate your
33 * decision by deleting the provisions above and replace them with the notice
34 * and other provisions required by the GPL or the LGPL. If you do not delete
35 * the provisions above, a recipient may use your version of this file under
36 * the terms of any one of the MPL, the GPL or the LGPL.
38 * ***** END LICENSE BLOCK ***** */
42 #ifdef FEATURE_NANOJIT
45 #include "../core/CodegenLIR.h"
49 // disable some specific warnings which are normally useful, but pervasive in the code-gen macros
50 #pragma warning(disable:4310) // cast truncates constant value
58 * - merging paths ( build a graph? ), possibly use external rep to drive codegen
60 Assembler::Assembler(CodeAlloc
& codeAlloc
, Allocator
& dataAlloc
, Allocator
& alloc
, AvmCore
* core
, LogControl
* logc
, const Config
& config
)
63 , _codeAlloc(codeAlloc
)
64 , _dataAlloc(dataAlloc
)
66 , _branchStateMap(alloc
)
69 #if NJ_USES_QUAD_CONSTANTS
70 , _quadConstants(alloc
)
82 VMPI_memset(&_stats
, 0, sizeof(_stats
));
83 VMPI_memset(lookahead
, 0, N_LOOKAHEAD
* sizeof(LInsp
));
86 verbose_only( _logc
= logc
; )
87 verbose_only( _outputCache
= 0; )
88 verbose_only( outline
[0] = '\0'; )
89 verbose_only( outlineEOL
[0] = '\0'; )
96 /*static*/ LIns
* const AR::BAD_ENTRY
= (LIns
*)0xdeadbeef;
98 void AR::validateQuick()
100 NanoAssert(_highWaterMark
< NJ_MAX_STACK_ENTRY
);
101 NanoAssert(_entries
[0] == NULL
);
102 // Only check a few entries around _highWaterMark.
103 uint32_t const RADIUS
= 4;
104 uint32_t const lo
= (_highWaterMark
> 1 + RADIUS
? _highWaterMark
- RADIUS
: 1);
105 uint32_t const hi
= (_highWaterMark
+ 1 + RADIUS
< NJ_MAX_STACK_ENTRY
? _highWaterMark
+ 1 + RADIUS
: NJ_MAX_STACK_ENTRY
);
106 for (uint32_t i
= lo
; i
<= _highWaterMark
; ++i
)
107 NanoAssert(_entries
[i
] != BAD_ENTRY
);
108 for (uint32_t i
= _highWaterMark
+1; i
< hi
; ++i
)
109 NanoAssert(_entries
[i
] == BAD_ENTRY
);
112 void AR::validateFull()
114 NanoAssert(_highWaterMark
< NJ_MAX_STACK_ENTRY
);
115 NanoAssert(_entries
[0] == NULL
);
116 for (uint32_t i
= 1; i
<= _highWaterMark
; ++i
)
117 NanoAssert(_entries
[i
] != BAD_ENTRY
);
118 for (uint32_t i
= _highWaterMark
+1; i
< NJ_MAX_STACK_ENTRY
; ++i
)
119 NanoAssert(_entries
[i
] == BAD_ENTRY
);
124 static uint32_t validateCounter
= 0;
125 if (++validateCounter
>= 100)
138 inline void AR::clear()
141 NanoAssert(_entries
[0] == NULL
);
143 for (uint32_t i
= 1; i
< NJ_MAX_STACK_ENTRY
; ++i
)
144 _entries
[i
] = BAD_ENTRY
;
148 bool AR::Iter::next(LIns
*& ins
, uint32_t& nStackSlots
, int32_t& arIndex
)
150 while (_i
<= _ar
._highWaterMark
) {
151 ins
= _ar
._entries
[_i
];
154 nStackSlots
= nStackSlotsFor(ins
);
166 void Assembler::arReset()
169 _branchStateMap
.clear();
172 #if NJ_USES_QUAD_CONSTANTS
173 _quadConstants
.clear();
177 void Assembler::registerResetAll()
179 nRegisterResetAll(_allocator
);
181 // At start, should have some registers free and none active.
182 NanoAssert(0 != _allocator
.free
);
183 NanoAssert(0 == _allocator
.countActive());
185 debug_only(_fpuStkDepth
= 0; )
189 // Legend for register sets: A = allowed, P = preferred, F = free, S = SavedReg.
191 // Finds a register in 'setA___' to store the result of 'ins' (one from
192 // 'set_P__' if possible), evicting one if necessary. Doesn't consider
193 // the prior state of 'ins'.
195 // Nb: 'setA___' comes from the instruction's use, 'set_P__' comes from its def.
196 // Eg. in 'add(call(...), ...)':
197 // - the call's use means setA___==GpRegs;
198 // - the call's def means set_P__==rmask(retRegs[0]).
200 Register
Assembler::registerAlloc(LIns
* ins
, RegisterMask setA___
, RegisterMask set_P__
)
203 RegisterMask set__F_
= _allocator
.free
;
204 RegisterMask setA_F_
= setA___
& set__F_
;
207 RegisterMask set___S
= SavedRegs
;
208 RegisterMask setA_FS
= setA_F_
& set___S
;
209 RegisterMask setAPF_
= setA_F_
& set_P__
;
210 RegisterMask setAPFS
= setA_FS
& set_P__
;
213 if (setAPFS
) set
= setAPFS
;
214 else if (setAPF_
) set
= setAPF_
;
215 else if (setA_FS
) set
= setA_FS
;
218 r
= nRegisterAllocFromSet(set
);
219 _allocator
.addActive(r
, ins
);
222 counter_increment(steals
);
224 // Nothing free, steal one.
225 // LSRA says pick the one with the furthest use.
226 LIns
* vic
= findVictim(setA___
);
227 NanoAssert(vic
->isInReg());
232 // r ends up staying active, but the LIns defining it changes.
233 _allocator
.removeFree(r
);
234 _allocator
.addActive(r
, ins
);
241 // Finds a register in 'allow' to store a temporary value (one not
242 // associated with a particular LIns), evicting one if necessary. The
243 // returned register is marked as being free and so can only be safely
244 // used for code generation purposes until the regstate is next inspected
246 Register
Assembler::registerAllocTmp(RegisterMask allow
)
249 Register r
= registerAlloc(&dummyIns
, allow
, /*prefer*/0);
251 // Mark r as free, ready for use as a temporary value.
252 _allocator
.removeActive(r
);
253 _allocator
.addFree(r
);
258 * these instructions don't have to be saved & reloaded to spill,
259 * they can just be recalculated w/out any inputs.
261 bool Assembler::canRemat(LIns
*i
) {
262 return i
->isImmAny() || i
->isop(LIR_alloc
);
265 void Assembler::codeAlloc(NIns
*&start
, NIns
*&end
, NIns
*&eip
266 verbose_only(, size_t &nBytes
))
268 // save the block we just filled
270 CodeAlloc::add(codeList
, start
, end
);
272 // CodeAlloc contract: allocations never fail
273 _codeAlloc
.alloc(start
, end
);
274 verbose_only( nBytes
+= (end
- start
) * sizeof(NIns
); )
275 NanoAssert(uintptr_t(end
) - uintptr_t(start
) >= (size_t)LARGEST_UNDERRUN_PROT
);
279 if (_nIns
&& _nExitIns
) {
280 //cgen->jitAddRecord((uintptr_t)list->code, 0, 0, true); // add placeholder record for top of page
281 cgen
->jitCodePosUpdate((uintptr_t)list
->code
);
282 cgen
->jitPushInfo(); // new page requires new entry
287 void Assembler::reset()
291 codeStart
= codeEnd
= 0;
292 exitStart
= exitEnd
= 0;
302 void Assembler::pageValidate()
305 // This may be a normal code chunk or an exit code chunk.
306 NanoAssertMsg(codeStart
<= _nIns
&& _nIns
<= codeEnd
,
307 "Native instruction pointer overstep paging bounds; check overrideProtect for last instruction");
313 bool AR::isValidEntry(uint32_t idx
, LIns
* ins
) const
315 return idx
> 0 && idx
<= _highWaterMark
&& _entries
[idx
] == ins
;
318 void AR::checkForResourceConsistency(const RegAlloc
& regs
)
321 for (uint32_t i
= 1; i
<= _highWaterMark
; ++i
)
323 LIns
* ins
= _entries
[i
];
326 uint32_t arIndex
= ins
->getArIndex();
327 NanoAssert(arIndex
!= 0);
328 if (ins
->isop(LIR_alloc
)) {
329 int const n
= i
+ (ins
->size()>>2);
330 for (int j
=i
+1; j
< n
; j
++) {
331 NanoAssert(_entries
[j
]==ins
);
333 NanoAssert(arIndex
== (uint32_t)n
-1);
336 else if (ins
->isN64()) {
337 NanoAssert(_entries
[i
+ 1]==ins
);
338 i
+= 1; // skip high word
341 NanoAssertMsg(arIndex
== i
, "Stack record index mismatch");
343 NanoAssertMsg(!ins
->isInReg() || regs
.isConsistent(ins
->getReg(), ins
),
344 "Register record mismatch");
348 void Assembler::resourceConsistencyCheck()
350 NanoAssert(!error());
353 NanoAssert((_allocator
.active
[FST0
] && _fpuStkDepth
== -1) ||
354 (!_allocator
.active
[FST0
] && _fpuStkDepth
== 0));
357 _activation
.checkForResourceConsistency(_allocator
);
359 registerConsistencyCheck();
362 void Assembler::registerConsistencyCheck()
364 RegisterMask managed
= _allocator
.managed
;
365 for (Register r
= FirstReg
; r
<= LastReg
; r
= nextreg(r
)) {
366 if (rmask(r
) & managed
) {
367 // A register managed by register allocation must be either
368 // free or active, but not both.
369 if (_allocator
.isFree(r
)) {
370 NanoAssertMsgf(_allocator
.getActive(r
)==0,
371 "register %s is free but assigned to ins", gpn(r
));
373 // An LIns defining a register must have that register in
375 LIns
* ins
= _allocator
.getActive(r
);
377 NanoAssertMsg(r
== ins
->getReg(), "Register record mismatch");
380 // A register not managed by register allocation must be
381 // neither free nor active.
382 NanoAssert(!_allocator
.isFree(r
));
383 NanoAssert(!_allocator
.getActive(r
));
389 void Assembler::findRegFor2(RegisterMask allowa
, LIns
* ia
, Register
& ra
,
390 RegisterMask allowb
, LIns
* ib
, Register
& rb
)
392 // There should be some overlap between 'allowa' and 'allowb', else
393 // there's no point calling this function.
394 NanoAssert(allowa
& allowb
);
397 ra
= rb
= findRegFor(ia
, allowa
& allowb
); // use intersection(allowa, allowb)
399 } else if (ib
->isInRegMask(allowb
)) {
400 // 'ib' is already in an allowable reg -- don't let it get evicted
401 // when finding 'ra'.
403 ra
= findRegFor(ia
, allowa
& ~rmask(rb
));
406 ra
= findRegFor(ia
, allowa
);
407 rb
= findRegFor(ib
, allowb
& ~rmask(ra
));
411 Register
Assembler::findSpecificRegFor(LIns
* i
, Register w
)
413 return findRegFor(i
, rmask(w
));
416 // Like findRegFor(), but called when the LIns is used as a pointer. It
417 // doesn't have to be called, findRegFor() can still be used, but it can
418 // optimize the LIR_alloc case by indexing off FP, thus saving the use of
421 Register
Assembler::getBaseReg(LInsp base
, int &d
, RegisterMask allow
)
424 if (base
->isop(LIR_alloc
)) {
425 // The value of a LIR_alloc is a pointer to its stack memory,
426 // which is always relative to FP. So we can just return FP if we
427 // also adjust 'd' (and can do so in a valid manner). Or, in the
428 // PEDANTIC case, we can just assign a register as normal;
429 // findRegFor() will allocate the stack memory for LIR_alloc if
431 d
+= findMemFor(base
);
437 return findRegFor(base
, allow
);
440 // Like findRegFor2(), but used for stores where the base value has the
441 // same type as the stored value, eg. in asm_store32() on 32-bit platforms
442 // and asm_store64() on 64-bit platforms. Similar to getBaseReg(),
443 // findRegFor2() can be called instead, but this function can optimize the
444 // case where the base value is a LIR_alloc.
445 void Assembler::getBaseReg2(RegisterMask allowValue
, LIns
* value
, Register
& rv
,
446 RegisterMask allowBase
, LIns
* base
, Register
& rb
, int &d
)
449 if (base
->isop(LIR_alloc
)) {
451 d
+= findMemFor(base
);
452 rv
= findRegFor(value
, allowValue
);
458 findRegFor2(allowValue
, value
, rv
, allowBase
, base
, rb
);
461 // Finds a register in 'allow' to hold the result of 'ins'. Used when we
462 // encounter a use of 'ins'. The actions depend on the prior regstate of
464 // - If the result of 'ins' is not in any register, we find an allowed
465 // one, evicting one if necessary.
466 // - If the result of 'ins' is already in an allowed register, we use that.
467 // - If the result of 'ins' is already in a not-allowed register, we find an
468 // allowed one and move it.
470 Register
Assembler::findRegFor(LIns
* ins
, RegisterMask allow
)
472 if (ins
->isop(LIR_alloc
)) {
473 // Never allocate a reg for this without stack space too.
479 if (!ins
->isInReg()) {
480 // 'ins' isn't in a register (must be in a spill slot or nowhere).
481 r
= registerAlloc(ins
, allow
, hint(ins
));
483 } else if (rmask(r
= ins
->getReg()) & allow
) {
484 // 'ins' is in an allowed register.
485 _allocator
.useActive(r
);
488 // 'ins' is in a register (r) that's not in 'allow'.
490 if (((rmask(r
)&XmmRegs
) && !(allow
&XmmRegs
)) ||
491 ((rmask(r
)&x87Regs
) && !(allow
&x87Regs
)))
493 // x87 <-> xmm copy required
494 //_nvprof("fpu-evict",1);
496 r
= registerAlloc(ins
, allow
, hint(ins
));
498 #elif defined(NANOJIT_PPC) || defined(NANOJIT_MIPS)
499 if (((rmask(r
)&GpRegs
) && !(allow
&GpRegs
)) ||
500 ((rmask(r
)&FpRegs
) && !(allow
&FpRegs
)))
503 r
= registerAlloc(ins
, allow
, hint(ins
));
507 // The post-state register holding 'ins' is 's', the pre-state
508 // register holding 'ins' is 'r'. For example, if s=eax and
511 // pre-state: ecx(ins)
512 // instruction: mov eax, ecx
513 // post-state: eax(ins)
516 _allocator
.retire(r
);
517 r
= registerAlloc(ins
, allow
, hint(ins
));
519 // 'ins' is in 'allow', in register r (different to the old r);
521 if ((rmask(s
) & GpRegs
) && (rmask(r
) & GpRegs
)) {
522 MR(s
, r
); // move 'ins' from its pre-state reg (r) to its post-state reg (s)
524 asm_nongp_copy(s
, r
);
532 // Like findSpecificRegFor(), but only for when 'r' is known to be free
533 // and 'ins' is known to not already have a register allocated. Updates
534 // the regstate (maintaining the invariants) but does not generate any
535 // code. The return value is redundant, always being 'r', but it's
536 // sometimes useful to have it there for assignments.
537 Register
Assembler::findSpecificRegForUnallocated(LIns
* ins
, Register r
)
539 if (ins
->isop(LIR_alloc
)) {
540 // never allocate a reg for this w/out stack space too
544 NanoAssert(!ins
->isInReg());
545 NanoAssert(_allocator
.free
& rmask(r
));
548 _allocator
.removeFree(r
);
549 _allocator
.addActive(r
, ins
);
554 #if NJ_USES_QUAD_CONSTANTS
555 const uint64_t* Assembler::findQuadConstant(uint64_t q
)
557 uint64_t* p
= _quadConstants
.get(q
);
560 p
= new (_dataAlloc
) uint64_t;
562 _quadConstants
.put(q
, p
);
568 int Assembler::findMemFor(LIns
*ins
)
570 #if NJ_USES_QUAD_CONSTANTS
571 NanoAssert(!ins
->isconstf());
573 if (!ins
->isInAr()) {
574 uint32_t const arIndex
= arReserve(ins
);
575 ins
->setArIndex(arIndex
);
576 NanoAssert(_activation
.isValidEntry(ins
->getArIndex(), ins
) == (arIndex
!= 0));
581 // XXX: this function is dangerous and should be phased out;
582 // See bug 513615. Calls to it should replaced it with a
583 // prepareResultReg() / generate code / freeResourcesOf() sequence.
584 Register
Assembler::deprecated_prepResultReg(LIns
*ins
, RegisterMask allow
)
587 // We used to have to worry about possibly popping the x87 stack here.
588 // But this function is no longer used on i386, and this assertion
592 Register r
= findRegFor(ins
, allow
);
593 deprecated_freeRsrcOf(ins
);
597 // Finds a register in 'allow' to hold the result of 'ins'. Also
598 // generates code to spill the result if necessary. Called just prior to
599 // generating the code for 'ins' (because we generate code backwards).
601 // An example where no spill is necessary. Lines marked '*' are those
602 // done by this function.
605 // asm: define res into r
606 // * regstate: R + r(res)
610 // An example where a spill is necessary.
613 // asm: define res into r
614 // * regstate: R + r(res)
615 // * asm: spill res from r
618 // asm: restore res into r2
619 // regstate: R + r2(res) + other changes from "..."
620 // asm: use res in r2
622 Register
Assembler::prepareResultReg(LIns
*ins
, RegisterMask allow
)
624 // At this point, we know the result of 'ins' result has a use later
625 // in the code. (Exception: if 'ins' is a call to an impure function
626 // the return value may not be used, but 'ins' will still be present
627 // because it has side-effects.) It may have had to be evicted, in
628 // which case the restore will have already been generated, so we now
629 // generate the spill (unless the restore was actually a
630 // rematerialize, in which case it's not necessary).
632 // If 'allow' includes FST0 we have to pop if 'ins' isn't in FST0 in
633 // the post-regstate. This could be because 'ins' is unused, 'ins' is
634 // in a spill slot, or 'ins' is in an XMM register.
635 const bool pop
= (allow
& rmask(FST0
)) &&
636 (!ins
->isInReg() || ins
->getReg() != FST0
);
638 const bool pop
= false;
640 Register r
= findRegFor(ins
, allow
);
641 asm_maybe_spill(ins
, pop
);
643 if (!ins
->isInAr() && pop
&& r
== FST0
) {
644 // This can only happen with a LIR_fcall to an impure function
645 // whose return value was ignored (ie. if ins->isInReg() was false
646 // prior to the findRegFor() call).
647 FSTP(FST0
); // pop the fpu result since it isn't used
653 void Assembler::asm_maybe_spill(LInsp ins
, bool pop
)
655 int d
= ins
->isInAr() ? arDisp(ins
) : 0;
656 Register r
= ins
->getReg();
658 verbose_only( RefBuf b
;
659 if (_logc
->lcbits
& LC_Assembly
) {
660 setOutputForEOL(" <= spill %s",
661 _thisfrag
->lirbuf
->printer
->formatRef(&b
, ins
)); } )
662 asm_spill(r
, d
, pop
, ins
->isN64());
666 // XXX: This function is error-prone and should be phased out; see bug 513615.
667 void Assembler::deprecated_freeRsrcOf(LIns
*ins
)
669 if (ins
->isInReg()) {
670 asm_maybe_spill(ins
, /*pop*/false);
671 _allocator
.retire(ins
->getReg()); // free any register associated with entry
675 arFree(ins
); // free any AR space associated with entry
680 // Frees all record of registers and spill slots used by 'ins'.
681 void Assembler::freeResourcesOf(LIns
*ins
)
683 if (ins
->isInReg()) {
684 _allocator
.retire(ins
->getReg()); // free any register associated with entry
688 arFree(ins
); // free any AR space associated with entry
693 // Frees 'r' in the RegAlloc regstate, if it's not already free.
694 void Assembler::evictIfActive(Register r
)
696 if (LIns
* vic
= _allocator
.getActive(r
)) {
697 NanoAssert(vic
->getReg() == r
);
702 // Frees 'r' (which currently holds the result of 'vic') in the regstate.
705 // pre-regstate: eax(ld1)
706 // instruction: mov ebx,-4(ebp) <= restore add1 # %ebx is dest
707 // post-regstate: eax(ld1) ebx(add1)
709 // At run-time we are *restoring* 'add1' into %ebx, hence the call to
710 // asm_restore(). But at regalloc-time we are moving backwards through
711 // the code, so in that sense we are *evicting* 'add1' from %ebx.
713 void Assembler::evict(LIns
* vic
)
715 // Not free, need to steal.
716 counter_increment(steals
);
718 Register r
= vic
->getReg();
720 NanoAssert(!_allocator
.isFree(r
));
721 NanoAssert(vic
== _allocator
.getActive(r
));
723 verbose_only( RefBuf b
;
724 if (_logc
->lcbits
& LC_Assembly
) {
725 setOutputForEOL(" <= restore %s",
726 _thisfrag
->lirbuf
->printer
->formatRef(&b
, vic
)); } )
729 _allocator
.retire(r
);
732 // At this point 'vic' is unused (if rematerializable), or in a spill
736 void Assembler::patch(GuardRecord
*lr
)
738 if (!lr
->jmp
) // the guard might have been eliminated as redundant
740 Fragment
*frag
= lr
->exit
->target
;
741 NanoAssert(frag
->fragEntry
!= 0);
742 nPatchBranch((NIns
*)lr
->jmp
, frag
->fragEntry
);
743 CodeAlloc::flushICache(lr
->jmp
, LARGEST_BRANCH_PATCH
);
744 verbose_only(verbose_outputf("patching jump at %p to target %p\n",
745 lr
->jmp
, frag
->fragEntry
);)
748 void Assembler::patch(SideExit
*exit
)
750 GuardRecord
*rec
= exit
->guards
;
759 void Assembler::patch(SideExit
* exit
, SwitchInfo
* si
)
761 for (GuardRecord
* lr
= exit
->guards
; lr
; lr
= lr
->next
) {
762 Fragment
*frag
= lr
->exit
->target
;
763 NanoAssert(frag
->fragEntry
!= 0);
764 si
->table
[si
->index
] = frag
->fragEntry
;
769 NIns
* Assembler::asm_exit(LInsp guard
)
771 SideExit
*exit
= guard
->record()->exit
;
773 if (!_branchStateMap
.get(exit
))
775 at
= asm_leave_trace(guard
);
779 RegAlloc
* captured
= _branchStateMap
.get(exit
);
780 intersectRegisterState(*captured
);
781 at
= exit
->target
->fragEntry
;
783 _branchStateMap
.remove(exit
);
788 NIns
* Assembler::asm_leave_trace(LInsp guard
)
790 verbose_only( int32_t nativeSave
= _stats
.native
);
791 verbose_only( verbose_outputf("----------------------------------- ## END exit block %p", guard
);)
793 // This point is unreachable. So free all the registers. If an
794 // instruction has a stack entry we will leave it alone, otherwise we
795 // free it entirely. intersectRegisterState() will restore.
796 RegAlloc capture
= _allocator
;
803 debug_only( _sv_fpuStkDepth
= _fpuStkDepth
; _fpuStkDepth
= 0; )
808 // Restore the callee-saved register and parameters.
812 intersectRegisterState(capture
);
814 // this can be useful for breaking whenever an exit is taken
818 // we are done producing the exit logic for the guard so demark where our exit block code begins
819 NIns
* jmpTarget
= _nIns
; // target in exit path for our mainline conditional jump
821 // swap back pointers, effectively storing the last location used in the exit path
825 //verbose_only( verbose_outputf(" LIR_xt/xf swapCodeChunks, _nIns is now %08X(%08X), _nExitIns is now %08X(%08X)",_nIns, *_nIns,_nExitIns,*_nExitIns) );
826 verbose_only( verbose_outputf("%010lx:", (unsigned long)jmpTarget
);)
827 verbose_only( verbose_outputf("----------------------------------- ## BEGIN exit block (LIR_xt|LIR_xf)") );
830 NanoAssertMsgf(_fpuStkDepth
== _sv_fpuStkDepth
, "LIR_xtf, _fpuStkDepth=%d, expect %d",_fpuStkDepth
, _sv_fpuStkDepth
);
831 debug_only( _fpuStkDepth
= _sv_fpuStkDepth
; _sv_fpuStkDepth
= 9999; )
834 verbose_only(_stats
.exitnative
+= (_stats
.native
-nativeSave
));
839 void Assembler::compile(Fragment
* frag
, Allocator
& alloc
, bool optimize
verbose_only(, LInsPrinter
* printer
))
842 bool anyVerb
= (_logc
->lcbits
& 0xFFFF & ~LC_FragProfile
) > 0;
843 bool asmVerb
= (_logc
->lcbits
& 0xFFFF & LC_Assembly
) > 0;
844 bool liveVerb
= (_logc
->lcbits
& 0xFFFF & LC_Liveness
) > 0;
847 /* BEGIN decorative preamble */
850 _logc
->printf("========================================"
851 "========================================\n");
852 _logc
->printf("=== BEGIN LIR::compile(%p, %p)\n",
853 (void*)this, (void*)frag
);
854 _logc
->printf("===\n");
856 /* END decorative preamble */
858 verbose_only( if (liveVerb
) {
860 _logc
->printf("=== Results of liveness analysis:\n");
861 _logc
->printf("===\n");
862 LirReader
br(frag
->lastIns
);
863 LirFilter
* lir
= &br
;
865 StackFilter
* sf
= new (alloc
) StackFilter(lir
, alloc
, frag
->lirbuf
->sp
);
868 live(lir
, alloc
, frag
, _logc
);
871 /* Set up the generic text output cache for the assembler */
872 verbose_only( StringList
asmOutput(alloc
); )
873 verbose_only( _outputCache
= &asmOutput
; )
879 //_logc->printf("recompile trigger %X kind %d\n", (int)frag, frag->kind);
881 verbose_only( if (anyVerb
) {
882 _logc
->printf("=== Translating LIR fragments into assembly:\n");
885 // now the the main trunk
886 verbose_only( RefBuf b
; )
887 verbose_only( if (anyVerb
) {
888 _logc
->printf("=== -- Compile trunk %s: begin\n", printer
->formatAddr(&b
, frag
));
891 // Used for debug printing, if needed
892 debug_only(ValidateReader
*validate
= NULL
;)
894 ReverseLister
*pp_init
= NULL
;
895 ReverseLister
*pp_after_sf
= NULL
;
898 // The LIR passes through these filters as listed in this
899 // function, viz, top to bottom.
901 // set up backwards pipeline: assembler <- StackFilter <- LirReader
902 LirFilter
* lir
= new (alloc
) LirReader(frag
->lastIns
);
906 validate
= new (alloc
) ValidateReader(lir
);
911 verbose_only( if (_logc
->lcbits
& LC_ReadLIR
) {
912 pp_init
= new (alloc
) ReverseLister(lir
, alloc
, frag
->lirbuf
->printer
, _logc
,
919 StackFilter
* stackfilter
= new (alloc
) StackFilter(lir
, alloc
, frag
->lirbuf
->sp
);
923 verbose_only( if (_logc
->lcbits
& LC_AfterSF
) {
924 pp_after_sf
= new (alloc
) ReverseLister(lir
, alloc
, frag
->lirbuf
->printer
, _logc
,
925 "After StackFilter");
931 // If we were accumulating debug info in the various ReverseListers,
932 // call finish() to emit whatever contents they have accumulated.
934 if (pp_init
) pp_init
->finish();
935 if (pp_after_sf
) pp_after_sf
->finish();
938 verbose_only( if (anyVerb
) {
939 _logc
->printf("=== -- Compile trunk %s: end\n", printer
->formatAddr(&b
, frag
));
944 outputf("## compiling trunk %s", printer
->formatAddr(&b
, frag
));
948 // Reverse output so that assembly is displayed low-to-high.
949 // Up to this point, _outputCache has been non-NULL, and so has been
950 // accumulating output. Now we set it to NULL, traverse the entire
951 // list of stored strings, and hand them a second time to output.
952 // Since _outputCache is now NULL, outputf just hands these strings
953 // directly onwards to _logc->printf.
954 verbose_only( if (anyVerb
) {
956 _logc
->printf("=== Aggregated assembly output: BEGIN\n");
957 _logc
->printf("===\n");
959 for (Seq
<char*>* p
= asmOutput
.get(); p
!= NULL
; p
= p
->tail
) {
963 _logc
->printf("===\n");
964 _logc
->printf("=== Aggregated assembly output: END\n");
970 verbose_only( frag
->nCodeBytes
+= codeBytes
; )
971 verbose_only( frag
->nExitBytes
+= exitBytes
; )
973 /* BEGIN decorative postamble */
974 verbose_only( if (anyVerb
) {
976 _logc
->printf("===\n");
977 _logc
->printf("=== END LIR::compile(%p, %p)\n",
978 (void*)this, (void*)frag
);
979 _logc
->printf("========================================"
980 "========================================\n");
983 /* END decorative postamble */
986 void Assembler::beginAssembly(Fragment
*frag
)
988 verbose_only( codeBytes
= 0; )
989 verbose_only( exitBytes
= 0; )
993 NanoAssert(codeList
== 0);
994 NanoAssert(codeStart
== 0);
995 NanoAssert(codeEnd
== 0);
996 NanoAssert(exitStart
== 0);
997 NanoAssert(exitEnd
== 0);
998 NanoAssert(_nIns
== 0);
999 NanoAssert(_nExitIns
== 0);
1004 counter_reset(native
);
1005 counter_reset(exitnative
);
1006 counter_reset(steals
);
1007 counter_reset(spills
);
1008 counter_reset(remats
);
1012 // native code gen buffer setup
1015 // make sure we got memory at least one page
1016 if (error()) return;
1020 _stats
.codeStart
= _nIns
-1;
1021 _stats
.codeExitStart
= _nExitIns
-1;
1029 void Assembler::assemble(Fragment
* frag
, LirFilter
* reader
)
1031 if (error()) return;
1034 // check the fragment is starting out with a sane profiling state
1035 verbose_only( NanoAssert(frag
->nStaticExits
== 0); )
1036 verbose_only( NanoAssert(frag
->nCodeBytes
== 0); )
1037 verbose_only( NanoAssert(frag
->nExitBytes
== 0); )
1038 verbose_only( NanoAssert(frag
->profCount
== 0); )
1039 verbose_only( if (_logc
->lcbits
& LC_FragProfile
)
1040 NanoAssert(frag
->profFragID
> 0);
1042 NanoAssert(frag
->profFragID
== 0); )
1049 // patch all branches
1050 NInsMap::Iter
iter(_patches
);
1051 while (iter
.next()) {
1052 NIns
* where
= iter
.key();
1053 LIns
* target
= iter
.value();
1054 if (target
->isop(LIR_jtbl
)) {
1055 // Need to patch up a whole jump table, 'where' is the table.
1056 LIns
*jtbl
= target
;
1057 NIns
** native_table
= (NIns
**) where
;
1058 for (uint32_t i
= 0, n
= jtbl
->getTableSize(); i
< n
; i
++) {
1059 LabelState
* lstate
= _labels
.get(jtbl
->getTarget(i
));
1060 NIns
* ntarget
= lstate
->addr
;
1062 native_table
[i
] = ntarget
;
1064 setError(UnknownBranch
);
1069 // target is a label for a single-target branch
1070 LabelState
*lstate
= _labels
.get(target
);
1071 NIns
* ntarget
= lstate
->addr
;
1073 nPatchBranch(where
, ntarget
);
1075 setError(UnknownBranch
);
1083 void Assembler::endAssembly(Fragment
* frag
)
1085 // don't try to patch code if we are in an error state since we might have partially
1086 // overwritten the code cache already
1088 // something went wrong, release all allocated code memory
1089 _codeAlloc
.freeAll(codeList
);
1091 _codeAlloc
.free(exitStart
, exitEnd
);
1092 _codeAlloc
.free(codeStart
, codeEnd
);
1097 NIns
* fragEntry
= genPrologue();
1098 verbose_only( asm_output("[prologue]"); )
1100 debug_only(_activation
.checkForResourceLeaks());
1102 NanoAssert(!_inExit
);
1103 // save used parts of current block on fragment's code list, free the rest
1104 #if defined(NANOJIT_ARM) || defined(NANOJIT_MIPS)
1105 // [codeStart, _nSlot) ... gap ... [_nIns, codeEnd)
1107 _codeAlloc
.addRemainder(codeList
, exitStart
, exitEnd
, _nExitSlot
, _nExitIns
);
1108 verbose_only( exitBytes
-= (_nExitIns
- _nExitSlot
) * sizeof(NIns
); )
1110 _codeAlloc
.addRemainder(codeList
, codeStart
, codeEnd
, _nSlot
, _nIns
);
1111 verbose_only( codeBytes
-= (_nIns
- _nSlot
) * sizeof(NIns
); )
1113 // [codeStart ... gap ... [_nIns, codeEnd))
1115 _codeAlloc
.addRemainder(codeList
, exitStart
, exitEnd
, exitStart
, _nExitIns
);
1116 verbose_only( exitBytes
-= (_nExitIns
- exitStart
) * sizeof(NIns
); )
1118 _codeAlloc
.addRemainder(codeList
, codeStart
, codeEnd
, codeStart
, _nIns
);
1119 verbose_only( codeBytes
-= (_nIns
- codeStart
) * sizeof(NIns
); )
1122 // at this point all our new code is in the d-cache and not the i-cache,
1123 // so flush the i-cache on cpu's that need it.
1124 CodeAlloc::flushICache(codeList
);
1126 // save entry point pointers
1127 frag
->fragEntry
= fragEntry
;
1128 frag
->setCode(_nIns
);
1129 PERFM_NVPROF("code", CodeAlloc::size(codeList
));
1132 NanoAssertMsgf(_fpuStkDepth
== 0,"_fpuStkDepth %d\n",_fpuStkDepth
);
1135 debug_only( pageValidate(); )
1136 NanoAssert(_branchStateMap
.isEmpty());
1139 void Assembler::releaseRegisters()
1141 for (Register r
= FirstReg
; r
<= LastReg
; r
= nextreg(r
))
1143 LIns
*ins
= _allocator
.getActive(r
);
1145 // Clear reg allocation, preserve stack allocation.
1146 _allocator
.retire(r
);
1147 NanoAssert(r
== ins
->getReg());
1154 #define countlir_live() _nvprof("lir-live",1)
1155 #define countlir_ret() _nvprof("lir-ret",1)
1156 #define countlir_alloc() _nvprof("lir-alloc",1)
1157 #define countlir_var() _nvprof("lir-var",1)
1158 #define countlir_use() _nvprof("lir-use",1)
1159 #define countlir_def() _nvprof("lir-def",1)
1160 #define countlir_imm() _nvprof("lir-imm",1)
1161 #define countlir_param() _nvprof("lir-param",1)
1162 #define countlir_cmov() _nvprof("lir-cmov",1)
1163 #define countlir_ld() _nvprof("lir-ld",1)
1164 #define countlir_ldq() _nvprof("lir-ldq",1)
1165 #define countlir_alu() _nvprof("lir-alu",1)
1166 #define countlir_qjoin() _nvprof("lir-qjoin",1)
1167 #define countlir_qlo() _nvprof("lir-qlo",1)
1168 #define countlir_qhi() _nvprof("lir-qhi",1)
1169 #define countlir_fpu() _nvprof("lir-fpu",1)
1170 #define countlir_st() _nvprof("lir-st",1)
1171 #define countlir_stq() _nvprof("lir-stq",1)
1172 #define countlir_jmp() _nvprof("lir-jmp",1)
1173 #define countlir_jcc() _nvprof("lir-jcc",1)
1174 #define countlir_label() _nvprof("lir-label",1)
1175 #define countlir_xcc() _nvprof("lir-xcc",1)
1176 #define countlir_x() _nvprof("lir-x",1)
1177 #define countlir_call() _nvprof("lir-call",1)
1178 #define countlir_jtbl() _nvprof("lir-jtbl",1)
1180 #define countlir_live()
1181 #define countlir_ret()
1182 #define countlir_alloc()
1183 #define countlir_var()
1184 #define countlir_use()
1185 #define countlir_def()
1186 #define countlir_imm()
1187 #define countlir_param()
1188 #define countlir_cmov()
1189 #define countlir_ld()
1190 #define countlir_ldq()
1191 #define countlir_alu()
1192 #define countlir_qjoin()
1193 #define countlir_qlo()
1194 #define countlir_qhi()
1195 #define countlir_fpu()
1196 #define countlir_st()
1197 #define countlir_stq()
1198 #define countlir_jmp()
1199 #define countlir_jcc()
1200 #define countlir_label()
1201 #define countlir_xcc()
1202 #define countlir_x()
1203 #define countlir_call()
1204 #define countlir_jtbl()
1207 void Assembler::gen(LirFilter
* reader
)
1209 NanoAssert(_thisfrag
->nStaticExits
== 0);
1211 // The trace must end with one of these opcodes.
1212 NanoAssert(reader
->finalIns()->isop(LIR_x
) ||
1213 reader
->finalIns()->isop(LIR_xtbl
) ||
1214 reader
->finalIns()->isRet() ||
1215 reader
->finalIns()->isLive());
1217 InsList
pending_lives(alloc
);
1219 NanoAssert(!error());
1221 // What's going on here: we're visiting all the LIR instructions in
1222 // the buffer, working strictly backwards in buffer-order, and
1223 // generating machine instructions for them as we go.
1225 // For each LIns, we first determine whether it's actually necessary,
1226 // and if not skip it. Otherwise we generate code for it. There are
1227 // two kinds of "necessary" instructions:
1229 // - "Statement" instructions, which have side effects. Anything that
1230 // could change control flow or the state of memory.
1232 // - "Value" or "expression" instructions, which compute a value based
1233 // only on the operands to the instruction (and, in the case of
1234 // loads, the state of memory). Because we visit instructions in
1235 // reverse order, if some previously visited instruction uses the
1236 // value computed by this instruction, then this instruction will
1237 // already have a register assigned to hold that value. Hence we
1238 // can consult the instruction to detect whether its value is in
1239 // fact used (i.e. not dead).
1241 // Note that the backwards code traversal can make register allocation
1242 // confusing. (For example, we restore a value before we spill it!)
1243 // In particular, words like "before" and "after" must be used very
1244 // carefully -- their meaning at regalloc-time is opposite to their
1245 // meaning at run-time. We use the term "pre-regstate" to refer to
1246 // the register allocation state that occurs prior to an instruction's
1247 // execution, and "post-regstate" to refer to the state that occurs
1248 // after an instruction's execution, e.g.:
1250 // pre-regstate: ebx(ins)
1251 // instruction: mov eax, ebx // mov dst, src
1252 // post-regstate: eax(ins)
1254 // At run-time, the instruction updates the pre-regstate into the
1255 // post-regstate (and these states are the real machine's regstates).
1256 // But when allocating registers, because we go backwards, the
1257 // pre-regstate is constructed from the post-regstate (and these
1258 // regstates are those stored in RegAlloc).
1260 // One consequence of generating code backwards is that we tend to
1261 // both spill and restore registers as early (at run-time) as
1262 // possible; this is good for tolerating memory latency. If we
1263 // generated code forwards, we would expect to both spill and restore
1264 // registers as late (at run-time) as possible; this might be better
1265 // for reducing register pressure.
1267 // Another thing to note: we provide N_LOOKAHEAD instruction's worth
1268 // of lookahead because it's useful for backends. This is nice and
1269 // easy because once read() gets to the LIR_start at the beginning of
1270 // the buffer it'll just keep regetting it.
1272 for (int32_t i
= 0; i
< N_LOOKAHEAD
; i
++)
1273 lookahead
[i
] = reader
->read();
1275 while (!lookahead
[0]->isop(LIR_start
))
1277 LInsp ins
= lookahead
[0]; // give it a shorter name for local use
1278 LOpcode op
= ins
->opcode();
1280 bool required
= ins
->isStmt() || ins
->isUsed();
1285 // Output the post-regstate (registers and/or activation).
1286 // Because asm output comes in reverse order, doing it now means
1287 // it is printed after the LIR and asm, exactly when the
1288 // post-regstate should be shown.
1289 if ((_logc
->lcbits
& LC_Assembly
) && (_logc
->lcbits
& LC_Activation
))
1290 printActivationState();
1291 if ((_logc
->lcbits
& LC_Assembly
) && (_logc
->lcbits
& LC_RegAlloc
))
1298 NanoAssertMsgf(false, "unsupported LIR instruction: %d\n", op
);
1302 evictAllActiveRegs();
1307 CASE64(LIR_qlive
:) {
1309 LInsp op1
= ins
->oprnd1();
1310 // alloca's are meant to live until the point of the LIR_live instruction, marking
1311 // other expressions as live ensures that they remain so at loop bottoms.
1312 // alloca areas require special treatment because they are accessed indirectly and
1313 // the indirect accesses are invisible to the assembler, other than via LIR_live.
1314 // other expression results are only accessed directly in ways that are visible to
1315 // the assembler, so extending those expression's lifetimes past the last loop edge
1317 if (op1
->isop(LIR_alloc
)) {
1320 pending_lives
.add(ins
);
1333 // Allocate some stack space. The value of this instruction
1334 // is the address of the stack space.
1337 NanoAssert(ins
->isInAr());
1338 if (ins
->isInReg()) {
1339 Register r
= ins
->getReg();
1340 asm_restore(ins
, r
);
1341 _allocator
.retire(r
);
1344 freeResourcesOf(ins
);
1353 #ifdef NANOJIT_64BIT
1373 #if NJ_SOFTFLOAT_SUPPORTED
1376 // return result of quad-call in register
1377 deprecated_prepResultReg(ins
, rmask(retRegs
[1]));
1378 // if hi half was used, we must use the call to ensure it happens
1379 findSpecificRegFor(ins
->oprnd1(), retRegs
[0]);
1435 #if defined NANOJIT_64BIT
1498 #ifdef NANOJIT_64BIT
1518 asm_store32(op
, ins
->oprnd1(), ins
->disp(), ins
->oprnd2());
1526 LIns
* value
= ins
->oprnd1();
1527 LIns
* base
= ins
->oprnd2();
1528 int dr
= ins
->disp();
1529 #if NJ_SOFTFLOAT_SUPPORTED
1530 if (value
->isop(LIR_qjoin
) && op
== LIR_stfi
)
1532 // This is correct for little-endian only.
1533 asm_store32(LIR_sti
, value
->oprnd1(), dr
, base
);
1534 asm_store32(LIR_sti
, value
->oprnd2(), dr
+4, base
);
1539 asm_store64(op
, value
, dr
, base
);
1547 LInsp to
= ins
->getTarget();
1548 LabelState
*label
= _labels
.get(to
);
1549 // the jump is always taken so whatever register state we
1550 // have from downstream code, is irrelevant to code before
1551 // this jump. so clear it out. we will pick up register
1552 // state from the jump target, if we have seen that label.
1554 if (label
&& label
->addr
) {
1555 // forward jump - pick up register state from target.
1556 unionRegisterState(label
->regs
);
1561 handleLoopCarriedExprs(pending_lives
);
1563 // save empty register state at loop header
1564 _labels
.add(to
, 0, _allocator
);
1567 intersectRegisterState(label
->regs
);
1570 _patches
.put(_nIns
, to
);
1579 LInsp to
= ins
->getTarget();
1580 LIns
* cond
= ins
->oprnd1();
1581 LabelState
*label
= _labels
.get(to
);
1582 if (label
&& label
->addr
) {
1583 // forward jump to known label. need to merge with label's register state.
1584 unionRegisterState(label
->regs
);
1585 asm_branch(op
== LIR_jf
, cond
, label
->addr
);
1589 handleLoopCarriedExprs(pending_lives
);
1591 // evict all registers, most conservative approach.
1592 evictAllActiveRegs();
1593 _labels
.add(to
, 0, _allocator
);
1596 // evict all registers, most conservative approach.
1597 intersectRegisterState(label
->regs
);
1599 NIns
*branch
= asm_branch(op
== LIR_jf
, cond
, 0);
1600 _patches
.put(branch
,to
);
1605 #if NJ_JTBL_SUPPORTED
1609 // Multiway jump can contain both forward and backward jumps.
1610 // Out of range indices aren't allowed or checked.
1611 // Code after this jtbl instruction is unreachable.
1613 NanoAssert(_allocator
.countActive() == 0);
1615 uint32_t count
= ins
->getTableSize();
1616 bool has_back_edges
= false;
1618 // Merge the regstates of labels we have already seen.
1619 for (uint32_t i
= count
; i
-- > 0;) {
1620 LIns
* to
= ins
->getTarget(i
);
1621 LabelState
*lstate
= _labels
.get(to
);
1623 unionRegisterState(lstate
->regs
);
1624 verbose_only( RefBuf b
; )
1625 asm_output(" %u: [&%s]", i
, _thisfrag
->lirbuf
->printer
->formatRef(&b
, to
));
1627 has_back_edges
= true;
1630 asm_output("forward edges");
1632 // In a multi-way jump, the register allocator has no ability to deal
1633 // with two existing edges that have conflicting register assignments, unlike
1634 // a conditional branch where code can be inserted on the fall-through path
1635 // to reconcile registers. So, frontends *must* insert LIR_regfence at labels of
1636 // forward jtbl jumps. Check here to make sure no registers were picked up from
1637 // any forward edges.
1638 NanoAssert(_allocator
.countActive() == 0);
1640 if (has_back_edges
) {
1641 handleLoopCarriedExprs(pending_lives
);
1642 // save merged (empty) register state at target labels we haven't seen yet
1643 for (uint32_t i
= count
; i
-- > 0;) {
1644 LIns
* to
= ins
->getTarget(i
);
1645 LabelState
*lstate
= _labels
.get(to
);
1647 _labels
.add(to
, 0, _allocator
);
1648 verbose_only( RefBuf b
; )
1649 asm_output(" %u: [&%s]", i
, _thisfrag
->lirbuf
->printer
->formatRef(&b
, to
));
1652 asm_output("backward edges");
1655 // Emit the jump instruction, which allocates 1 register for the jump index.
1656 NIns
** native_table
= new (_dataAlloc
) NIns
*[count
];
1657 asm_output("[%p]:", (void*)native_table
);
1658 _patches
.put((NIns
*)native_table
, ins
);
1659 asm_jtbl(ins
, native_table
);
1667 LabelState
*label
= _labels
.get(ins
);
1668 // add profiling inc, if necessary.
1669 verbose_only( if (_logc
->lcbits
& LC_FragProfile
) {
1670 if (ins
== _thisfrag
->loopLabel
)
1671 asm_inc_m32(& _thisfrag
->profCount
);
1674 // label seen first, normal target of forward jump, save addr & allocator
1675 _labels
.add(ins
, _nIns
, _allocator
);
1678 // we're at the top of a loop
1679 NanoAssert(label
->addr
== 0);
1680 //evictAllActiveRegs();
1681 intersectRegisterState(label
->regs
);
1682 label
->addr
= _nIns
;
1686 if (_logc
->lcbits
& LC_Assembly
) {
1687 asm_output("[%s]", _thisfrag
->lirbuf
->printer
->formatRef(&b
, ins
));
1691 case LIR_xbarrier
: {
1696 NIns
* exit
= asm_exit(ins
); // does intersectRegisterState()
1697 asm_switch(ins
, exit
);
1702 NanoAssertMsg(0, "Not supported for this architecture");
1708 verbose_only( _thisfrag
->nStaticExits
++; )
1710 // we only support cmp with guard right now, also assume it is 'close' and only emit the branch
1711 NIns
* exit
= asm_exit(ins
); // does intersectRegisterState()
1712 LIns
* cond
= ins
->oprnd1();
1713 asm_branch(op
== LIR_xf
, cond
, exit
);
1718 verbose_only( _thisfrag
->nStaticExits
++; )
1720 // generate the side exit branch on the main trace.
1721 NIns
*exit
= asm_exit(ins
);
1729 verbose_only( _thisfrag
->nStaticExits
++; )
1732 NIns
* exit
= asm_exit(ins
); // does intersectRegisterState()
1733 asm_branch_xov(op
, exit
);
1757 #ifdef NANOJIT_64BIT
1775 #ifdef NANOJIT_64BIT
1788 // we traverse backwards so we are now hitting the file
1789 // that is associated with a bunch of LIR_lines we already have seen
1790 uintptr_t currentFile
= ins
->oprnd1()->imm32();
1791 cgen
->jitFilenameUpdate(currentFile
);
1796 // add a new table entry, we don't yet knwo which file it belongs
1797 // to so we need to add it to the update table too
1798 // note the alloc, actual act is delayed; see above
1799 uint32_t currentLine
= (uint32_t) ins
->oprnd1()->imm32();
1800 cgen
->jitLineNumUpdate(currentLine
);
1801 cgen
->jitAddRecord((uintptr_t)_nIns
, 0, currentLine
, true);
1808 // We have to do final LIR printing inside this loop. If we do it
1809 // before this loop, we we end up printing a lot of dead LIR
1812 // We print the LIns after generating the code. This ensures that
1813 // the LIns will appear in debug output *before* the generated
1814 // code, because Assembler::outputf() prints everything in reverse.
1816 // Note that some live LIR instructions won't be printed. Eg. an
1817 // immediate won't be printed unless it is explicitly loaded into
1818 // a register (as opposed to being incorporated into an immediate
1819 // field in another machine instruction).
1821 if (_logc
->lcbits
& LC_Assembly
) {
1823 LInsPrinter
* printer
= _thisfrag
->lirbuf
->printer
;
1824 outputf(" %s", printer
->formatIns(&b
, ins
));
1825 if (ins
->isGuard() && ins
->oprnd1() && ins
->oprnd1()->isCmp()) {
1826 // Special case: code is generated for guard conditions at
1827 // the same time that code is generated for the guard
1828 // itself. If the condition is only used by the guard, we
1829 // must print it now otherwise it won't get printed. So
1830 // we do print it now, with an explanatory comment. If
1831 // the condition *is* used again we'll end up printing it
1832 // twice, but that's ok.
1833 outputf(" %s # codegen'd with the %s",
1834 printer
->formatIns(&b
, ins
->oprnd1()), lirNames
[op
]);
1836 } else if (ins
->isCmov()) {
1837 // Likewise for cmov conditions.
1838 outputf(" %s # codegen'd with the %s",
1839 printer
->formatIns(&b
, ins
->oprnd1()), lirNames
[op
]);
1842 #if defined NANOJIT_IA32 || defined NANOJIT_X64
1843 else if (ins
->isop(LIR_mod
)) {
1844 // There's a similar case when a div feeds into a mod.
1845 outputf(" %s # codegen'd with the mod",
1846 printer
->formatIns(&b
, ins
->oprnd1()));
1856 cgen
->jitCodePosUpdate((uintptr_t)_nIns
);
1859 // check that all is well (don't check in exit paths since its more complicated)
1860 debug_only( pageValidate(); )
1861 debug_only( resourceConsistencyCheck(); )
1864 for (int32_t i
= 1; i
< N_LOOKAHEAD
; i
++)
1865 lookahead
[i
-1] = lookahead
[i
];
1866 lookahead
[N_LOOKAHEAD
-1] = reader
->read();
1871 * Write a jump table for the given SwitchInfo and store the table
1872 * address in the SwitchInfo. Every entry will initially point to
1875 void Assembler::emitJumpTable(SwitchInfo
* si
, NIns
* target
)
1877 si
->table
= (NIns
**) alloc
.alloc(si
->count
* sizeof(NIns
*));
1878 for (uint32_t i
= 0; i
< si
->count
; ++i
)
1879 si
->table
[i
] = target
;
1882 void Assembler::assignSavedRegs()
1884 // Restore saved regsters.
1885 LirBuffer
*b
= _thisfrag
->lirbuf
;
1886 for (int i
=0, n
= NumSavedRegs
; i
< n
; i
++) {
1887 LIns
*p
= b
->savedRegs
[i
];
1889 findSpecificRegForUnallocated(p
, savedRegs
[p
->paramArg()]);
1893 void Assembler::reserveSavedRegs()
1895 LirBuffer
*b
= _thisfrag
->lirbuf
;
1896 for (int i
= 0, n
= NumSavedRegs
; i
< n
; i
++) {
1897 LIns
*ins
= b
->savedRegs
[i
];
1903 void Assembler::assignParamRegs()
1905 LInsp state
= _thisfrag
->lirbuf
->state
;
1907 findSpecificRegForUnallocated(state
, argRegs
[state
->paramArg()]);
1908 LInsp param1
= _thisfrag
->lirbuf
->param1
;
1910 findSpecificRegForUnallocated(param1
, argRegs
[param1
->paramArg()]);
1913 void Assembler::handleLoopCarriedExprs(InsList
& pending_lives
)
1915 // ensure that exprs spanning the loop are marked live at the end of the loop
1917 for (Seq
<LIns
*> *p
= pending_lives
.get(); p
!= NULL
; p
= p
->tail
) {
1918 LIns
*ins
= p
->head
;
1919 NanoAssert(ins
->isLive());
1920 LIns
*op1
= ins
->oprnd1();
1921 // Must findMemFor even if we're going to findRegFor; loop-carried
1922 // operands may spill on another edge, and we need them to always
1923 // spill to the same place.
1924 #if NJ_USES_QUAD_CONSTANTS
1925 // Exception: if float constants are true constants, we should
1926 // never call findMemFor on those ops.
1927 if (!op1
->isconstf())
1932 if (!op1
->isImmAny())
1933 findRegFor(op1
, ins
->isop(LIR_flive
) ? FpRegs
: GpRegs
);
1936 // clear this list since we have now dealt with those lifetimes. extending
1937 // their lifetimes again later (earlier in the code) serves no purpose.
1938 pending_lives
.clear();
1941 void AR::freeEntryAt(uint32_t idx
)
1943 NanoAssert(idx
> 0 && idx
<= _highWaterMark
);
1945 // NB: this loop relies on using entry[0] being NULL,
1946 // so that we are guaranteed to terminate
1947 // without access negative entries.
1948 LIns
* i
= _entries
[idx
];
1949 NanoAssert(i
!= NULL
);
1951 _entries
[idx
] = NULL
;
1953 } while (_entries
[idx
] == i
);
1957 void Assembler::printRegState()
1959 char* s
= &outline
[0];
1960 VMPI_memset(s
, ' ', 26); s
[26] = '\0';
1961 s
+= VMPI_strlen(s
);
1962 VMPI_sprintf(s
, "RR");
1963 s
+= VMPI_strlen(s
);
1965 for (Register r
= FirstReg
; r
<= LastReg
; r
= nextreg(r
)) {
1966 LIns
*ins
= _allocator
.getActive(r
);
1968 NanoAssertMsg(!_allocator
.isFree(r
),
1969 "Coding error; register is both free and active! " );
1971 const char* n
= _thisfrag
->lirbuf
->printer
->formatRef(&b
, ins
);
1973 if (ins
->isop(LIR_param
) && ins
->paramKind()==1 &&
1974 r
== Assembler::savedRegs
[ins
->paramArg()])
1976 // dont print callee-saved regs that arent used
1980 VMPI_sprintf(s
, " %s(%s)", gpn(r
), n
);
1981 s
+= VMPI_strlen(s
);
1987 void Assembler::printActivationState()
1989 char* s
= &outline
[0];
1990 VMPI_memset(s
, ' ', 26); s
[26] = '\0';
1991 s
+= VMPI_strlen(s
);
1992 VMPI_sprintf(s
, "AR");
1993 s
+= VMPI_strlen(s
);
1996 uint32_t nStackSlots
= 0;
1997 int32_t arIndex
= 0;
1998 for (AR::Iter
iter(_activation
); iter
.next(ins
, nStackSlots
, arIndex
); )
2001 const char* n
= _thisfrag
->lirbuf
->printer
->formatRef(&b
, ins
);
2002 if (nStackSlots
> 1) {
2003 VMPI_sprintf(s
," %d-%d(%s)", 4*arIndex
, 4*(arIndex
+nStackSlots
-1), n
);
2006 VMPI_sprintf(s
," %d(%s)", 4*arIndex
, n
);
2008 s
+= VMPI_strlen(s
);
2014 inline bool AR::isEmptyRange(uint32_t start
, uint32_t nStackSlots
) const
2016 for (uint32_t i
=0; i
< nStackSlots
; i
++)
2018 if (_entries
[start
-i
] != NULL
)
2024 uint32_t AR::reserveEntry(LIns
* ins
)
2026 uint32_t const nStackSlots
= nStackSlotsFor(ins
);
2028 if (nStackSlots
== 1)
2030 for (uint32_t i
= 1; i
<= _highWaterMark
; i
++)
2032 if (_entries
[i
] == NULL
)
2038 if (_highWaterMark
< NJ_MAX_STACK_ENTRY
- 1)
2040 NanoAssert(_entries
[_highWaterMark
+1] == BAD_ENTRY
);
2042 _entries
[_highWaterMark
] = ins
;
2043 return _highWaterMark
;
2048 // alloc larger block on 8byte boundary.
2049 uint32_t const start
= nStackSlots
+ (nStackSlots
& 1);
2050 for (uint32_t i
= start
; i
<= _highWaterMark
; i
+= 2)
2052 if (isEmptyRange(i
, nStackSlots
))
2054 // place the entry in the table and mark the instruction with it
2055 for (uint32_t j
=0; j
< nStackSlots
; j
++)
2057 NanoAssert(i
-j
<= _highWaterMark
);
2058 NanoAssert(_entries
[i
-j
] == NULL
);
2059 _entries
[i
-j
] = ins
;
2065 // Be sure to account for any 8-byte-round-up when calculating spaceNeeded.
2066 uint32_t const spaceLeft
= NJ_MAX_STACK_ENTRY
- _highWaterMark
- 1;
2067 uint32_t const spaceNeeded
= nStackSlots
+ (_highWaterMark
& 1);
2068 if (spaceLeft
>= spaceNeeded
)
2070 if (_highWaterMark
& 1)
2072 NanoAssert(_entries
[_highWaterMark
+1] == BAD_ENTRY
);
2073 _entries
[_highWaterMark
+1] = NULL
;
2075 _highWaterMark
+= spaceNeeded
;
2076 for (uint32_t j
= 0; j
< nStackSlots
; j
++)
2078 NanoAssert(_highWaterMark
-j
< NJ_MAX_STACK_ENTRY
);
2079 NanoAssert(_entries
[_highWaterMark
-j
] == BAD_ENTRY
);
2080 _entries
[_highWaterMark
-j
] = ins
;
2082 return _highWaterMark
;
2085 // no space. oh well.
2090 void AR::checkForResourceLeaks() const
2092 for (uint32_t i
= 1; i
<= _highWaterMark
; i
++) {
2093 NanoAssertMsgf(_entries
[i
] == NULL
, "frame entry %d wasn't freed\n",4*i
);
2098 uint32_t Assembler::arReserve(LIns
* ins
)
2100 uint32_t i
= _activation
.reserveEntry(ins
);
2102 setError(StackFull
);
2106 void Assembler::arFree(LIns
* ins
)
2108 NanoAssert(ins
->isInAr());
2109 uint32_t arIndex
= ins
->getArIndex();
2110 NanoAssert(arIndex
);
2111 NanoAssert(_activation
.isValidEntry(arIndex
, ins
));
2112 _activation
.freeEntryAt(arIndex
); // free any stack stack space associated with entry
2116 * Move regs around so the SavedRegs contains the highest priority regs.
2118 void Assembler::evictScratchRegsExcept(RegisterMask ignore
)
2120 // Find the top GpRegs that are candidates to put in SavedRegs.
2122 // 'tosave' is a binary heap stored in an array. The root is tosave[0],
2123 // left child is at i+1, right child is at i+2.
2125 Register tosave
[LastReg
-FirstReg
+1];
2127 RegAlloc
*regs
= &_allocator
;
2128 for (Register r
= FirstReg
; r
<= LastReg
; r
= nextreg(r
)) {
2129 if (rmask(r
) & GpRegs
& ~ignore
) {
2130 LIns
*ins
= regs
->getActive(r
);
2132 if (canRemat(ins
)) {
2133 NanoAssert(ins
->getReg() == r
);
2137 int32_t pri
= regs
->getPriority(r
);
2138 // add to heap by adding to end and bubbling up
2140 while (j
> 0 && pri
> regs
->getPriority(tosave
[j
/2])) {
2141 tosave
[j
] = tosave
[j
/2];
2144 NanoAssert(size_t(j
) < sizeof(tosave
)/sizeof(tosave
[0]));
2151 // Now primap has the live exprs in priority order.
2152 // Allocate each of the top priority exprs to a SavedReg.
2154 RegisterMask allow
= SavedRegs
;
2155 while (allow
&& len
> 0) {
2156 // get the highest priority var
2157 Register hi
= tosave
[0];
2158 if (!(rmask(hi
) & SavedRegs
)) {
2159 LIns
*ins
= regs
->getActive(hi
);
2160 Register r
= findRegFor(ins
, allow
);
2164 // hi is already in a saved reg, leave it alone.
2165 allow
&= ~rmask(hi
);
2168 // remove from heap by replacing root with end element and bubbling down.
2169 if (allow
&& --len
> 0) {
2170 Register last
= tosave
[len
];
2174 if (j
+2 < len
&& regs
->getPriority(tosave
[j
+2]) > regs
->getPriority(tosave
[j
+1]))
2176 if (regs
->getPriority(last
) > regs
->getPriority(tosave
[child
]))
2178 tosave
[j
] = tosave
[child
];
2185 // now evict everything else.
2186 evictSomeActiveRegs(~(SavedRegs
| ignore
));
2189 void Assembler::evictAllActiveRegs()
2191 // generate code to restore callee saved registers
2192 // @todo speed this up
2193 for (Register r
= FirstReg
; r
<= LastReg
; r
= nextreg(r
)) {
2198 void Assembler::evictSomeActiveRegs(RegisterMask regs
)
2200 // generate code to restore callee saved registers
2201 // @todo speed this up
2202 for (Register r
= FirstReg
; r
<= LastReg
; r
= nextreg(r
)) {
2203 if ((rmask(r
) & regs
)) {
2210 * Merge the current regstate with a previously stored version.
2212 * Situation Change to _allocator
2213 * --------- --------------------
2215 * !current & saved add saved
2216 * current & !saved evict current (unionRegisterState does nothing)
2217 * current & saved & current==saved
2218 * current & saved & current!=saved evict current, add saved
2220 void Assembler::intersectRegisterState(RegAlloc
& saved
)
2222 Register regsTodo
[LastReg
+ 1];
2223 LIns
* insTodo
[LastReg
+ 1];
2226 // Do evictions and pops first.
2227 verbose_only(bool shouldMention
=false; )
2228 // The obvious thing to do here is to iterate from FirstReg to LastReg.
2229 // viz: for (Register r = FirstReg; r <= LastReg; r = nextreg(r)) ...
2230 // However, on ARM that causes lower-numbered integer registers
2231 // to be be saved at higher addresses, which inhibits the formation
2232 // of load/store multiple instructions. Hence iterate the loop the
2233 // other way. The "r <= LastReg" guards against wraparound in
2234 // the case where Register is treated as unsigned and FirstReg is zero.
2236 // Note, the loop var is deliberately typed as int (*not* Register)
2237 // to outsmart compilers that will otherwise report
2238 // "error: comparison is always true due to limited range of data type".
2239 for (int ri
= LastReg
; ri
>= FirstReg
&& ri
<= LastReg
; ri
= int(prevreg(Register(ri
))))
2241 Register
const r
= Register(ri
);
2242 LIns
* curins
= _allocator
.getActive(r
);
2243 LIns
* savedins
= saved
.getActive(r
);
2244 if (curins
!= savedins
)
2247 regsTodo
[nTodo
] = r
;
2248 insTodo
[nTodo
] = savedins
;
2252 //_nvprof("intersect-evict",1);
2253 verbose_only( shouldMention
=true; )
2254 NanoAssert(curins
->getReg() == r
);
2259 if (savedins
&& (rmask(r
) & x87Regs
)) {
2260 verbose_only( shouldMention
=true; )
2266 // Now reassign mainline registers.
2267 for (int i
= 0; i
< nTodo
; i
++) {
2268 findSpecificRegFor(insTodo
[i
], regsTodo
[i
]);
2272 verbose_outputf("## merging registers (intersect) with existing edge");
2277 * Merge the current state of the registers with a previously stored version.
2279 * Situation Change to _allocator
2280 * --------- --------------------
2281 * !current & !saved none
2282 * !current & saved add saved
2283 * current & !saved none (intersectRegisterState evicts current)
2284 * current & saved & current==saved none
2285 * current & saved & current!=saved evict current, add saved
2287 void Assembler::unionRegisterState(RegAlloc
& saved
)
2289 Register regsTodo
[LastReg
+ 1];
2290 LIns
* insTodo
[LastReg
+ 1];
2293 // Do evictions and pops first.
2294 verbose_only(bool shouldMention
=false; )
2295 for (Register r
= FirstReg
; r
<= LastReg
; r
= nextreg(r
))
2297 LIns
* curins
= _allocator
.getActive(r
);
2298 LIns
* savedins
= saved
.getActive(r
);
2299 if (curins
!= savedins
)
2302 regsTodo
[nTodo
] = r
;
2303 insTodo
[nTodo
] = savedins
;
2306 if (curins
&& savedins
) {
2307 //_nvprof("union-evict",1);
2308 verbose_only( shouldMention
=true; )
2309 NanoAssert(curins
->getReg() == r
);
2314 if (rmask(r
) & x87Regs
) {
2319 // saved state did not have fpu reg allocated,
2320 // so we must evict here to keep x87 stack balanced.
2323 verbose_only( shouldMention
=true; )
2328 // Now reassign mainline registers.
2329 for (int i
= 0; i
< nTodo
; i
++) {
2330 findSpecificRegFor(insTodo
[i
], regsTodo
[i
]);
2334 verbose_outputf("## merging registers (union) with existing edge");
2338 // Scan table for instruction with the lowest priority, meaning it is used
2339 // furthest in the future.
2340 LIns
* Assembler::findVictim(RegisterMask allow
)
2343 LIns
*ins
, *vic
= 0;
2344 int allow_pri
= 0x7fffffff;
2345 for (Register r
= FirstReg
; r
<= LastReg
; r
= nextreg(r
))
2347 if ((allow
& rmask(r
)) && (ins
= _allocator
.getActive(r
)) != 0)
2349 int pri
= canRemat(ins
) ? 0 : _allocator
.getPriority(r
);
2350 if (!vic
|| pri
< allow_pri
) {
2356 NanoAssert(vic
!= 0);
2361 char Assembler::outline
[8192];
2362 char Assembler::outlineEOL
[512];
2364 void Assembler::output()
2366 // The +1 is for the terminating NUL char.
2367 VMPI_strncat(outline
, outlineEOL
, sizeof(outline
)-(strlen(outline
)+1));
2370 char* str
= new (alloc
) char[VMPI_strlen(outline
)+1];
2371 VMPI_strcpy(str
, outline
);
2372 _outputCache
->insert(str
);
2374 _logc
->printf("%s\n", outline
);
2378 outlineEOL
[0] = '\0';
2381 void Assembler::outputf(const char* format
, ...)
2384 va_start(args
, format
);
2387 vsprintf(outline
, format
, args
);
2391 void Assembler::setOutputForEOL(const char* format
, ...)
2394 va_start(args
, format
);
2396 outlineEOL
[0] = '\0';
2397 vsprintf(outlineEOL
, format
, args
);
2399 #endif // NJ_VERBOSE
2401 void LabelStateMap::add(LIns
*label
, NIns
*addr
, RegAlloc
®s
) {
2402 LabelState
*st
= new (alloc
) LabelState(addr
, regs
);
2403 labels
.put(label
, st
);
2406 LabelState
* LabelStateMap::get(LIns
*label
) {
2407 return labels
.get(label
);
2410 #endif /* FEATURE_NANOJIT */