Bug 555045: Add regression test (r=cpeyer)
[tamarin-stm.git] / nanojit / Assembler.cpp
blob12cd3361dd9abc2bac0c7cfa6f3df25cdc27439b
1 /* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */
2 /* vi: set ts=4 sw=4 expandtab: (add to ~/.vimrc: set modeline modelines=5) */
3 /* ***** BEGIN LICENSE BLOCK *****
4 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 1.1 (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
9 * http://www.mozilla.org/MPL/
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
14 * License.
16 * The Original Code is [Open Source Virtual Machine].
18 * The Initial Developer of the Original Code is
19 * Adobe System Incorporated.
20 * Portions created by the Initial Developer are Copyright (C) 2004-2007
21 * the Initial Developer. All Rights Reserved.
23 * Contributor(s):
24 * Adobe AS3 Team
26 * Alternatively, the contents of this file may be used under the terms of
27 * either the GNU General Public License Version 2 or later (the "GPL"), or
28 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29 * in which case the provisions of the GPL or the LGPL are applicable instead
30 * of those above. If you wish to allow use of your version of this file only
31 * under the terms of either the GPL or the LGPL, and not to allow others to
32 * use your version of this file under the terms of the MPL, indicate your
33 * decision by deleting the provisions above and replace them with the notice
34 * and other provisions required by the GPL or the LGPL. If you do not delete
35 * the provisions above, a recipient may use your version of this file under
36 * the terms of any one of the MPL, the GPL or the LGPL.
38 * ***** END LICENSE BLOCK ***** */
40 #include "nanojit.h"
42 #ifdef FEATURE_NANOJIT
44 #ifdef VTUNE
45 #include "../core/CodegenLIR.h"
46 #endif
48 #ifdef _MSC_VER
49 // disable some specific warnings which are normally useful, but pervasive in the code-gen macros
50 #pragma warning(disable:4310) // cast truncates constant value
51 #endif
53 namespace nanojit
55 /**
56 * Need the following:
58 * - merging paths ( build a graph? ), possibly use external rep to drive codegen
60 Assembler::Assembler(CodeAlloc& codeAlloc, Allocator& dataAlloc, Allocator& alloc, AvmCore* core, LogControl* logc, const Config& config)
61 : codeList(NULL)
62 , alloc(alloc)
63 , _codeAlloc(codeAlloc)
64 , _dataAlloc(dataAlloc)
65 , _thisfrag(NULL)
66 , _branchStateMap(alloc)
67 , _patches(alloc)
68 , _labels(alloc)
69 #if NJ_USES_QUAD_CONSTANTS
70 , _quadConstants(alloc)
71 #endif
72 , _epilogue(NULL)
73 , _err(None)
74 #if PEDANTIC
75 , pedanticTop(NULL)
76 #endif
77 #ifdef VTUNE
78 , cgen(NULL)
79 #endif
80 , _config(config)
82 VMPI_memset(&_stats, 0, sizeof(_stats));
83 VMPI_memset(lookahead, 0, N_LOOKAHEAD * sizeof(LInsp));
84 nInit(core);
85 (void)logc;
86 verbose_only( _logc = logc; )
87 verbose_only( _outputCache = 0; )
88 verbose_only( outline[0] = '\0'; )
89 verbose_only( outlineEOL[0] = '\0'; )
91 reset();
94 #ifdef _DEBUG
96 /*static*/ LIns* const AR::BAD_ENTRY = (LIns*)0xdeadbeef;
98 void AR::validateQuick()
100 NanoAssert(_highWaterMark < NJ_MAX_STACK_ENTRY);
101 NanoAssert(_entries[0] == NULL);
102 // Only check a few entries around _highWaterMark.
103 uint32_t const RADIUS = 4;
104 uint32_t const lo = (_highWaterMark > 1 + RADIUS ? _highWaterMark - RADIUS : 1);
105 uint32_t const hi = (_highWaterMark + 1 + RADIUS < NJ_MAX_STACK_ENTRY ? _highWaterMark + 1 + RADIUS : NJ_MAX_STACK_ENTRY);
106 for (uint32_t i = lo; i <= _highWaterMark; ++i)
107 NanoAssert(_entries[i] != BAD_ENTRY);
108 for (uint32_t i = _highWaterMark+1; i < hi; ++i)
109 NanoAssert(_entries[i] == BAD_ENTRY);
112 void AR::validateFull()
114 NanoAssert(_highWaterMark < NJ_MAX_STACK_ENTRY);
115 NanoAssert(_entries[0] == NULL);
116 for (uint32_t i = 1; i <= _highWaterMark; ++i)
117 NanoAssert(_entries[i] != BAD_ENTRY);
118 for (uint32_t i = _highWaterMark+1; i < NJ_MAX_STACK_ENTRY; ++i)
119 NanoAssert(_entries[i] == BAD_ENTRY);
122 void AR::validate()
124 static uint32_t validateCounter = 0;
125 if (++validateCounter >= 100)
127 validateFull();
128 validateCounter = 0;
130 else
132 validateQuick();
136 #endif
138 inline void AR::clear()
140 _highWaterMark = 0;
141 NanoAssert(_entries[0] == NULL);
142 #ifdef _DEBUG
143 for (uint32_t i = 1; i < NJ_MAX_STACK_ENTRY; ++i)
144 _entries[i] = BAD_ENTRY;
145 #endif
148 bool AR::Iter::next(LIns*& ins, uint32_t& nStackSlots, int32_t& arIndex)
150 while (_i <= _ar._highWaterMark) {
151 ins = _ar._entries[_i];
152 if (ins) {
153 arIndex = _i;
154 nStackSlots = nStackSlotsFor(ins);
155 _i += nStackSlots;
156 return true;
158 _i++;
160 ins = NULL;
161 nStackSlots = 0;
162 arIndex = 0;
163 return false;
166 void Assembler::arReset()
168 _activation.clear();
169 _branchStateMap.clear();
170 _patches.clear();
171 _labels.clear();
172 #if NJ_USES_QUAD_CONSTANTS
173 _quadConstants.clear();
174 #endif
177 void Assembler::registerResetAll()
179 nRegisterResetAll(_allocator);
181 // At start, should have some registers free and none active.
182 NanoAssert(0 != _allocator.free);
183 NanoAssert(0 == _allocator.countActive());
184 #ifdef NANOJIT_IA32
185 debug_only(_fpuStkDepth = 0; )
186 #endif
189 // Legend for register sets: A = allowed, P = preferred, F = free, S = SavedReg.
191 // Finds a register in 'setA___' to store the result of 'ins' (one from
192 // 'set_P__' if possible), evicting one if necessary. Doesn't consider
193 // the prior state of 'ins'.
195 // Nb: 'setA___' comes from the instruction's use, 'set_P__' comes from its def.
196 // Eg. in 'add(call(...), ...)':
197 // - the call's use means setA___==GpRegs;
198 // - the call's def means set_P__==rmask(retRegs[0]).
200 Register Assembler::registerAlloc(LIns* ins, RegisterMask setA___, RegisterMask set_P__)
202 Register r;
203 RegisterMask set__F_ = _allocator.free;
204 RegisterMask setA_F_ = setA___ & set__F_;
206 if (setA_F_) {
207 RegisterMask set___S = SavedRegs;
208 RegisterMask setA_FS = setA_F_ & set___S;
209 RegisterMask setAPF_ = setA_F_ & set_P__;
210 RegisterMask setAPFS = setA_FS & set_P__;
211 RegisterMask set;
213 if (setAPFS) set = setAPFS;
214 else if (setAPF_) set = setAPF_;
215 else if (setA_FS) set = setA_FS;
216 else set = setA_F_;
218 r = nRegisterAllocFromSet(set);
219 _allocator.addActive(r, ins);
220 ins->setReg(r);
221 } else {
222 counter_increment(steals);
224 // Nothing free, steal one.
225 // LSRA says pick the one with the furthest use.
226 LIns* vic = findVictim(setA___);
227 NanoAssert(vic->isInReg());
228 r = vic->getReg();
230 evict(vic);
232 // r ends up staying active, but the LIns defining it changes.
233 _allocator.removeFree(r);
234 _allocator.addActive(r, ins);
235 ins->setReg(r);
238 return r;
241 // Finds a register in 'allow' to store a temporary value (one not
242 // associated with a particular LIns), evicting one if necessary. The
243 // returned register is marked as being free and so can only be safely
244 // used for code generation purposes until the regstate is next inspected
245 // or updated.
246 Register Assembler::registerAllocTmp(RegisterMask allow)
248 LIns dummyIns;
249 Register r = registerAlloc(&dummyIns, allow, /*prefer*/0);
251 // Mark r as free, ready for use as a temporary value.
252 _allocator.removeActive(r);
253 _allocator.addFree(r);
254 return r;
258 * these instructions don't have to be saved & reloaded to spill,
259 * they can just be recalculated w/out any inputs.
261 bool Assembler::canRemat(LIns *i) {
262 return i->isImmAny() || i->isop(LIR_alloc);
265 void Assembler::codeAlloc(NIns *&start, NIns *&end, NIns *&eip
266 verbose_only(, size_t &nBytes))
268 // save the block we just filled
269 if (start)
270 CodeAlloc::add(codeList, start, end);
272 // CodeAlloc contract: allocations never fail
273 _codeAlloc.alloc(start, end);
274 verbose_only( nBytes += (end - start) * sizeof(NIns); )
275 NanoAssert(uintptr_t(end) - uintptr_t(start) >= (size_t)LARGEST_UNDERRUN_PROT);
276 eip = end;
278 #ifdef VTUNE
279 if (_nIns && _nExitIns) {
280 //cgen->jitAddRecord((uintptr_t)list->code, 0, 0, true); // add placeholder record for top of page
281 cgen->jitCodePosUpdate((uintptr_t)list->code);
282 cgen->jitPushInfo(); // new page requires new entry
284 #endif
287 void Assembler::reset()
289 _nIns = 0;
290 _nExitIns = 0;
291 codeStart = codeEnd = 0;
292 exitStart = exitEnd = 0;
293 _stats.pages = 0;
294 codeList = 0;
296 nativePageReset();
297 registerResetAll();
298 arReset();
301 #ifdef _DEBUG
302 void Assembler::pageValidate()
304 if (error()) return;
305 // This may be a normal code chunk or an exit code chunk.
306 NanoAssertMsg(codeStart <= _nIns && _nIns <= codeEnd,
307 "Native instruction pointer overstep paging bounds; check overrideProtect for last instruction");
309 #endif
311 #ifdef _DEBUG
313 bool AR::isValidEntry(uint32_t idx, LIns* ins) const
315 return idx > 0 && idx <= _highWaterMark && _entries[idx] == ins;
318 void AR::checkForResourceConsistency(const RegAlloc& regs)
320 validate();
321 for (uint32_t i = 1; i <= _highWaterMark; ++i)
323 LIns* ins = _entries[i];
324 if (!ins)
325 continue;
326 uint32_t arIndex = ins->getArIndex();
327 NanoAssert(arIndex != 0);
328 if (ins->isop(LIR_alloc)) {
329 int const n = i + (ins->size()>>2);
330 for (int j=i+1; j < n; j++) {
331 NanoAssert(_entries[j]==ins);
333 NanoAssert(arIndex == (uint32_t)n-1);
334 i = n-1;
336 else if (ins->isN64()) {
337 NanoAssert(_entries[i + 1]==ins);
338 i += 1; // skip high word
340 else {
341 NanoAssertMsg(arIndex == i, "Stack record index mismatch");
343 NanoAssertMsg(!ins->isInReg() || regs.isConsistent(ins->getReg(), ins),
344 "Register record mismatch");
348 void Assembler::resourceConsistencyCheck()
350 NanoAssert(!error());
352 #ifdef NANOJIT_IA32
353 NanoAssert((_allocator.active[FST0] && _fpuStkDepth == -1) ||
354 (!_allocator.active[FST0] && _fpuStkDepth == 0));
355 #endif
357 _activation.checkForResourceConsistency(_allocator);
359 registerConsistencyCheck();
362 void Assembler::registerConsistencyCheck()
364 RegisterMask managed = _allocator.managed;
365 for (Register r = FirstReg; r <= LastReg; r = nextreg(r)) {
366 if (rmask(r) & managed) {
367 // A register managed by register allocation must be either
368 // free or active, but not both.
369 if (_allocator.isFree(r)) {
370 NanoAssertMsgf(_allocator.getActive(r)==0,
371 "register %s is free but assigned to ins", gpn(r));
372 } else {
373 // An LIns defining a register must have that register in
374 // its reservation.
375 LIns* ins = _allocator.getActive(r);
376 NanoAssert(ins);
377 NanoAssertMsg(r == ins->getReg(), "Register record mismatch");
379 } else {
380 // A register not managed by register allocation must be
381 // neither free nor active.
382 NanoAssert(!_allocator.isFree(r));
383 NanoAssert(!_allocator.getActive(r));
387 #endif /* _DEBUG */
389 void Assembler::findRegFor2(RegisterMask allowa, LIns* ia, Register& ra,
390 RegisterMask allowb, LIns* ib, Register& rb)
392 // There should be some overlap between 'allowa' and 'allowb', else
393 // there's no point calling this function.
394 NanoAssert(allowa & allowb);
396 if (ia == ib) {
397 ra = rb = findRegFor(ia, allowa & allowb); // use intersection(allowa, allowb)
399 } else if (ib->isInRegMask(allowb)) {
400 // 'ib' is already in an allowable reg -- don't let it get evicted
401 // when finding 'ra'.
402 rb = ib->getReg();
403 ra = findRegFor(ia, allowa & ~rmask(rb));
405 } else {
406 ra = findRegFor(ia, allowa);
407 rb = findRegFor(ib, allowb & ~rmask(ra));
411 Register Assembler::findSpecificRegFor(LIns* i, Register w)
413 return findRegFor(i, rmask(w));
416 // Like findRegFor(), but called when the LIns is used as a pointer. It
417 // doesn't have to be called, findRegFor() can still be used, but it can
418 // optimize the LIR_alloc case by indexing off FP, thus saving the use of
419 // a GpReg.
421 Register Assembler::getBaseReg(LInsp base, int &d, RegisterMask allow)
423 #if !PEDANTIC
424 if (base->isop(LIR_alloc)) {
425 // The value of a LIR_alloc is a pointer to its stack memory,
426 // which is always relative to FP. So we can just return FP if we
427 // also adjust 'd' (and can do so in a valid manner). Or, in the
428 // PEDANTIC case, we can just assign a register as normal;
429 // findRegFor() will allocate the stack memory for LIR_alloc if
430 // necessary.
431 d += findMemFor(base);
432 return FP;
434 #else
435 (void) d;
436 #endif
437 return findRegFor(base, allow);
440 // Like findRegFor2(), but used for stores where the base value has the
441 // same type as the stored value, eg. in asm_store32() on 32-bit platforms
442 // and asm_store64() on 64-bit platforms. Similar to getBaseReg(),
443 // findRegFor2() can be called instead, but this function can optimize the
444 // case where the base value is a LIR_alloc.
445 void Assembler::getBaseReg2(RegisterMask allowValue, LIns* value, Register& rv,
446 RegisterMask allowBase, LIns* base, Register& rb, int &d)
448 #if !PEDANTIC
449 if (base->isop(LIR_alloc)) {
450 rb = FP;
451 d += findMemFor(base);
452 rv = findRegFor(value, allowValue);
453 return;
455 #else
456 (void) d;
457 #endif
458 findRegFor2(allowValue, value, rv, allowBase, base, rb);
461 // Finds a register in 'allow' to hold the result of 'ins'. Used when we
462 // encounter a use of 'ins'. The actions depend on the prior regstate of
463 // 'ins':
464 // - If the result of 'ins' is not in any register, we find an allowed
465 // one, evicting one if necessary.
466 // - If the result of 'ins' is already in an allowed register, we use that.
467 // - If the result of 'ins' is already in a not-allowed register, we find an
468 // allowed one and move it.
470 Register Assembler::findRegFor(LIns* ins, RegisterMask allow)
472 if (ins->isop(LIR_alloc)) {
473 // Never allocate a reg for this without stack space too.
474 findMemFor(ins);
477 Register r;
479 if (!ins->isInReg()) {
480 // 'ins' isn't in a register (must be in a spill slot or nowhere).
481 r = registerAlloc(ins, allow, hint(ins));
483 } else if (rmask(r = ins->getReg()) & allow) {
484 // 'ins' is in an allowed register.
485 _allocator.useActive(r);
487 } else {
488 // 'ins' is in a register (r) that's not in 'allow'.
489 #ifdef NANOJIT_IA32
490 if (((rmask(r)&XmmRegs) && !(allow&XmmRegs)) ||
491 ((rmask(r)&x87Regs) && !(allow&x87Regs)))
493 // x87 <-> xmm copy required
494 //_nvprof("fpu-evict",1);
495 evict(ins);
496 r = registerAlloc(ins, allow, hint(ins));
497 } else
498 #elif defined(NANOJIT_PPC) || defined(NANOJIT_MIPS)
499 if (((rmask(r)&GpRegs) && !(allow&GpRegs)) ||
500 ((rmask(r)&FpRegs) && !(allow&FpRegs)))
502 evict(ins);
503 r = registerAlloc(ins, allow, hint(ins));
504 } else
505 #endif
507 // The post-state register holding 'ins' is 's', the pre-state
508 // register holding 'ins' is 'r'. For example, if s=eax and
509 // r=ecx:
511 // pre-state: ecx(ins)
512 // instruction: mov eax, ecx
513 // post-state: eax(ins)
515 Register s = r;
516 _allocator.retire(r);
517 r = registerAlloc(ins, allow, hint(ins));
519 // 'ins' is in 'allow', in register r (different to the old r);
520 // s is the old r.
521 if ((rmask(s) & GpRegs) && (rmask(r) & GpRegs)) {
522 MR(s, r); // move 'ins' from its pre-state reg (r) to its post-state reg (s)
523 } else {
524 asm_nongp_copy(s, r);
529 return r;
532 // Like findSpecificRegFor(), but only for when 'r' is known to be free
533 // and 'ins' is known to not already have a register allocated. Updates
534 // the regstate (maintaining the invariants) but does not generate any
535 // code. The return value is redundant, always being 'r', but it's
536 // sometimes useful to have it there for assignments.
537 Register Assembler::findSpecificRegForUnallocated(LIns* ins, Register r)
539 if (ins->isop(LIR_alloc)) {
540 // never allocate a reg for this w/out stack space too
541 findMemFor(ins);
544 NanoAssert(!ins->isInReg());
545 NanoAssert(_allocator.free & rmask(r));
547 ins->setReg(r);
548 _allocator.removeFree(r);
549 _allocator.addActive(r, ins);
551 return r;
554 #if NJ_USES_QUAD_CONSTANTS
555 const uint64_t* Assembler::findQuadConstant(uint64_t q)
557 uint64_t* p = _quadConstants.get(q);
558 if (!p)
560 p = new (_dataAlloc) uint64_t;
561 *p = q;
562 _quadConstants.put(q, p);
564 return p;
566 #endif
568 int Assembler::findMemFor(LIns *ins)
570 #if NJ_USES_QUAD_CONSTANTS
571 NanoAssert(!ins->isconstf());
572 #endif
573 if (!ins->isInAr()) {
574 uint32_t const arIndex = arReserve(ins);
575 ins->setArIndex(arIndex);
576 NanoAssert(_activation.isValidEntry(ins->getArIndex(), ins) == (arIndex != 0));
578 return arDisp(ins);
581 // XXX: this function is dangerous and should be phased out;
582 // See bug 513615. Calls to it should replaced it with a
583 // prepareResultReg() / generate code / freeResourcesOf() sequence.
584 Register Assembler::deprecated_prepResultReg(LIns *ins, RegisterMask allow)
586 #ifdef NANOJIT_IA32
587 // We used to have to worry about possibly popping the x87 stack here.
588 // But this function is no longer used on i386, and this assertion
589 // ensures that.
590 NanoAssert(0);
591 #endif
592 Register r = findRegFor(ins, allow);
593 deprecated_freeRsrcOf(ins);
594 return r;
597 // Finds a register in 'allow' to hold the result of 'ins'. Also
598 // generates code to spill the result if necessary. Called just prior to
599 // generating the code for 'ins' (because we generate code backwards).
601 // An example where no spill is necessary. Lines marked '*' are those
602 // done by this function.
604 // regstate: R
605 // asm: define res into r
606 // * regstate: R + r(res)
607 // ...
608 // asm: use res in r
610 // An example where a spill is necessary.
612 // regstate: R
613 // asm: define res into r
614 // * regstate: R + r(res)
615 // * asm: spill res from r
616 // regstate: R
617 // ...
618 // asm: restore res into r2
619 // regstate: R + r2(res) + other changes from "..."
620 // asm: use res in r2
622 Register Assembler::prepareResultReg(LIns *ins, RegisterMask allow)
624 // At this point, we know the result of 'ins' result has a use later
625 // in the code. (Exception: if 'ins' is a call to an impure function
626 // the return value may not be used, but 'ins' will still be present
627 // because it has side-effects.) It may have had to be evicted, in
628 // which case the restore will have already been generated, so we now
629 // generate the spill (unless the restore was actually a
630 // rematerialize, in which case it's not necessary).
631 #ifdef NANOJIT_IA32
632 // If 'allow' includes FST0 we have to pop if 'ins' isn't in FST0 in
633 // the post-regstate. This could be because 'ins' is unused, 'ins' is
634 // in a spill slot, or 'ins' is in an XMM register.
635 const bool pop = (allow & rmask(FST0)) &&
636 (!ins->isInReg() || ins->getReg() != FST0);
637 #else
638 const bool pop = false;
639 #endif
640 Register r = findRegFor(ins, allow);
641 asm_maybe_spill(ins, pop);
642 #ifdef NANOJIT_IA32
643 if (!ins->isInAr() && pop && r == FST0) {
644 // This can only happen with a LIR_fcall to an impure function
645 // whose return value was ignored (ie. if ins->isInReg() was false
646 // prior to the findRegFor() call).
647 FSTP(FST0); // pop the fpu result since it isn't used
649 #endif
650 return r;
653 void Assembler::asm_maybe_spill(LInsp ins, bool pop)
655 int d = ins->isInAr() ? arDisp(ins) : 0;
656 Register r = ins->getReg();
657 if (ins->isInAr()) {
658 verbose_only( RefBuf b;
659 if (_logc->lcbits & LC_Assembly) {
660 setOutputForEOL(" <= spill %s",
661 _thisfrag->lirbuf->printer->formatRef(&b, ins)); } )
662 asm_spill(r, d, pop, ins->isN64());
666 // XXX: This function is error-prone and should be phased out; see bug 513615.
667 void Assembler::deprecated_freeRsrcOf(LIns *ins)
669 if (ins->isInReg()) {
670 asm_maybe_spill(ins, /*pop*/false);
671 _allocator.retire(ins->getReg()); // free any register associated with entry
672 ins->clearReg();
674 if (ins->isInAr()) {
675 arFree(ins); // free any AR space associated with entry
676 ins->clearArIndex();
680 // Frees all record of registers and spill slots used by 'ins'.
681 void Assembler::freeResourcesOf(LIns *ins)
683 if (ins->isInReg()) {
684 _allocator.retire(ins->getReg()); // free any register associated with entry
685 ins->clearReg();
687 if (ins->isInAr()) {
688 arFree(ins); // free any AR space associated with entry
689 ins->clearArIndex();
693 // Frees 'r' in the RegAlloc regstate, if it's not already free.
694 void Assembler::evictIfActive(Register r)
696 if (LIns* vic = _allocator.getActive(r)) {
697 NanoAssert(vic->getReg() == r);
698 evict(vic);
702 // Frees 'r' (which currently holds the result of 'vic') in the regstate.
703 // An example:
705 // pre-regstate: eax(ld1)
706 // instruction: mov ebx,-4(ebp) <= restore add1 # %ebx is dest
707 // post-regstate: eax(ld1) ebx(add1)
709 // At run-time we are *restoring* 'add1' into %ebx, hence the call to
710 // asm_restore(). But at regalloc-time we are moving backwards through
711 // the code, so in that sense we are *evicting* 'add1' from %ebx.
713 void Assembler::evict(LIns* vic)
715 // Not free, need to steal.
716 counter_increment(steals);
718 Register r = vic->getReg();
720 NanoAssert(!_allocator.isFree(r));
721 NanoAssert(vic == _allocator.getActive(r));
723 verbose_only( RefBuf b;
724 if (_logc->lcbits & LC_Assembly) {
725 setOutputForEOL(" <= restore %s",
726 _thisfrag->lirbuf->printer->formatRef(&b, vic)); } )
727 asm_restore(vic, r);
729 _allocator.retire(r);
730 vic->clearReg();
732 // At this point 'vic' is unused (if rematerializable), or in a spill
733 // slot (if not).
736 void Assembler::patch(GuardRecord *lr)
738 if (!lr->jmp) // the guard might have been eliminated as redundant
739 return;
740 Fragment *frag = lr->exit->target;
741 NanoAssert(frag->fragEntry != 0);
742 nPatchBranch((NIns*)lr->jmp, frag->fragEntry);
743 CodeAlloc::flushICache(lr->jmp, LARGEST_BRANCH_PATCH);
744 verbose_only(verbose_outputf("patching jump at %p to target %p\n",
745 lr->jmp, frag->fragEntry);)
748 void Assembler::patch(SideExit *exit)
750 GuardRecord *rec = exit->guards;
751 NanoAssert(rec);
752 while (rec) {
753 patch(rec);
754 rec = rec->next;
758 #ifdef NANOJIT_IA32
759 void Assembler::patch(SideExit* exit, SwitchInfo* si)
761 for (GuardRecord* lr = exit->guards; lr; lr = lr->next) {
762 Fragment *frag = lr->exit->target;
763 NanoAssert(frag->fragEntry != 0);
764 si->table[si->index] = frag->fragEntry;
767 #endif
769 NIns* Assembler::asm_exit(LInsp guard)
771 SideExit *exit = guard->record()->exit;
772 NIns* at = 0;
773 if (!_branchStateMap.get(exit))
775 at = asm_leave_trace(guard);
777 else
779 RegAlloc* captured = _branchStateMap.get(exit);
780 intersectRegisterState(*captured);
781 at = exit->target->fragEntry;
782 NanoAssert(at != 0);
783 _branchStateMap.remove(exit);
785 return at;
788 NIns* Assembler::asm_leave_trace(LInsp guard)
790 verbose_only( int32_t nativeSave = _stats.native );
791 verbose_only( verbose_outputf("----------------------------------- ## END exit block %p", guard);)
793 // This point is unreachable. So free all the registers. If an
794 // instruction has a stack entry we will leave it alone, otherwise we
795 // free it entirely. intersectRegisterState() will restore.
796 RegAlloc capture = _allocator;
797 releaseRegisters();
799 swapCodeChunks();
800 _inExit = true;
802 #ifdef NANOJIT_IA32
803 debug_only( _sv_fpuStkDepth = _fpuStkDepth; _fpuStkDepth = 0; )
804 #endif
806 nFragExit(guard);
808 // Restore the callee-saved register and parameters.
809 assignSavedRegs();
810 assignParamRegs();
812 intersectRegisterState(capture);
814 // this can be useful for breaking whenever an exit is taken
815 //INT3();
816 //NOP();
818 // we are done producing the exit logic for the guard so demark where our exit block code begins
819 NIns* jmpTarget = _nIns; // target in exit path for our mainline conditional jump
821 // swap back pointers, effectively storing the last location used in the exit path
822 swapCodeChunks();
823 _inExit = false;
825 //verbose_only( verbose_outputf(" LIR_xt/xf swapCodeChunks, _nIns is now %08X(%08X), _nExitIns is now %08X(%08X)",_nIns, *_nIns,_nExitIns,*_nExitIns) );
826 verbose_only( verbose_outputf("%010lx:", (unsigned long)jmpTarget);)
827 verbose_only( verbose_outputf("----------------------------------- ## BEGIN exit block (LIR_xt|LIR_xf)") );
829 #ifdef NANOJIT_IA32
830 NanoAssertMsgf(_fpuStkDepth == _sv_fpuStkDepth, "LIR_xtf, _fpuStkDepth=%d, expect %d",_fpuStkDepth, _sv_fpuStkDepth);
831 debug_only( _fpuStkDepth = _sv_fpuStkDepth; _sv_fpuStkDepth = 9999; )
832 #endif
834 verbose_only(_stats.exitnative += (_stats.native-nativeSave));
836 return jmpTarget;
839 void Assembler::compile(Fragment* frag, Allocator& alloc, bool optimize verbose_only(, LInsPrinter* printer))
841 verbose_only(
842 bool anyVerb = (_logc->lcbits & 0xFFFF & ~LC_FragProfile) > 0;
843 bool asmVerb = (_logc->lcbits & 0xFFFF & LC_Assembly) > 0;
844 bool liveVerb = (_logc->lcbits & 0xFFFF & LC_Liveness) > 0;
847 /* BEGIN decorative preamble */
848 verbose_only(
849 if (anyVerb) {
850 _logc->printf("========================================"
851 "========================================\n");
852 _logc->printf("=== BEGIN LIR::compile(%p, %p)\n",
853 (void*)this, (void*)frag);
854 _logc->printf("===\n");
856 /* END decorative preamble */
858 verbose_only( if (liveVerb) {
859 _logc->printf("\n");
860 _logc->printf("=== Results of liveness analysis:\n");
861 _logc->printf("===\n");
862 LirReader br(frag->lastIns);
863 LirFilter* lir = &br;
864 if (optimize) {
865 StackFilter* sf = new (alloc) StackFilter(lir, alloc, frag->lirbuf->sp);
866 lir = sf;
868 live(lir, alloc, frag, _logc);
871 /* Set up the generic text output cache for the assembler */
872 verbose_only( StringList asmOutput(alloc); )
873 verbose_only( _outputCache = &asmOutput; )
875 beginAssembly(frag);
876 if (error())
877 return;
879 //_logc->printf("recompile trigger %X kind %d\n", (int)frag, frag->kind);
881 verbose_only( if (anyVerb) {
882 _logc->printf("=== Translating LIR fragments into assembly:\n");
885 // now the the main trunk
886 verbose_only( RefBuf b; )
887 verbose_only( if (anyVerb) {
888 _logc->printf("=== -- Compile trunk %s: begin\n", printer->formatAddr(&b, frag));
891 // Used for debug printing, if needed
892 debug_only(ValidateReader *validate = NULL;)
893 verbose_only(
894 ReverseLister *pp_init = NULL;
895 ReverseLister *pp_after_sf = NULL;
898 // The LIR passes through these filters as listed in this
899 // function, viz, top to bottom.
901 // set up backwards pipeline: assembler <- StackFilter <- LirReader
902 LirFilter* lir = new (alloc) LirReader(frag->lastIns);
904 #ifdef DEBUG
905 // VALIDATION
906 validate = new (alloc) ValidateReader(lir);
907 lir = validate;
908 #endif
910 // INITIAL PRINTING
911 verbose_only( if (_logc->lcbits & LC_ReadLIR) {
912 pp_init = new (alloc) ReverseLister(lir, alloc, frag->lirbuf->printer, _logc,
913 "Initial LIR");
914 lir = pp_init;
917 // STACKFILTER
918 if (optimize) {
919 StackFilter* stackfilter = new (alloc) StackFilter(lir, alloc, frag->lirbuf->sp);
920 lir = stackfilter;
923 verbose_only( if (_logc->lcbits & LC_AfterSF) {
924 pp_after_sf = new (alloc) ReverseLister(lir, alloc, frag->lirbuf->printer, _logc,
925 "After StackFilter");
926 lir = pp_after_sf;
929 assemble(frag, lir);
931 // If we were accumulating debug info in the various ReverseListers,
932 // call finish() to emit whatever contents they have accumulated.
933 verbose_only(
934 if (pp_init) pp_init->finish();
935 if (pp_after_sf) pp_after_sf->finish();
938 verbose_only( if (anyVerb) {
939 _logc->printf("=== -- Compile trunk %s: end\n", printer->formatAddr(&b, frag));
942 verbose_only(
943 if (asmVerb)
944 outputf("## compiling trunk %s", printer->formatAddr(&b, frag));
946 endAssembly(frag);
948 // Reverse output so that assembly is displayed low-to-high.
949 // Up to this point, _outputCache has been non-NULL, and so has been
950 // accumulating output. Now we set it to NULL, traverse the entire
951 // list of stored strings, and hand them a second time to output.
952 // Since _outputCache is now NULL, outputf just hands these strings
953 // directly onwards to _logc->printf.
954 verbose_only( if (anyVerb) {
955 _logc->printf("\n");
956 _logc->printf("=== Aggregated assembly output: BEGIN\n");
957 _logc->printf("===\n");
958 _outputCache = 0;
959 for (Seq<char*>* p = asmOutput.get(); p != NULL; p = p->tail) {
960 char *str = p->head;
961 outputf(" %s", str);
963 _logc->printf("===\n");
964 _logc->printf("=== Aggregated assembly output: END\n");
967 if (error())
968 frag->fragEntry = 0;
970 verbose_only( frag->nCodeBytes += codeBytes; )
971 verbose_only( frag->nExitBytes += exitBytes; )
973 /* BEGIN decorative postamble */
974 verbose_only( if (anyVerb) {
975 _logc->printf("\n");
976 _logc->printf("===\n");
977 _logc->printf("=== END LIR::compile(%p, %p)\n",
978 (void*)this, (void*)frag);
979 _logc->printf("========================================"
980 "========================================\n");
981 _logc->printf("\n");
983 /* END decorative postamble */
986 void Assembler::beginAssembly(Fragment *frag)
988 verbose_only( codeBytes = 0; )
989 verbose_only( exitBytes = 0; )
991 reset();
993 NanoAssert(codeList == 0);
994 NanoAssert(codeStart == 0);
995 NanoAssert(codeEnd == 0);
996 NanoAssert(exitStart == 0);
997 NanoAssert(exitEnd == 0);
998 NanoAssert(_nIns == 0);
999 NanoAssert(_nExitIns == 0);
1001 _thisfrag = frag;
1002 _inExit = false;
1004 counter_reset(native);
1005 counter_reset(exitnative);
1006 counter_reset(steals);
1007 counter_reset(spills);
1008 counter_reset(remats);
1010 setError(None);
1012 // native code gen buffer setup
1013 nativePageSetup();
1015 // make sure we got memory at least one page
1016 if (error()) return;
1018 #ifdef PERFM
1019 _stats.pages = 0;
1020 _stats.codeStart = _nIns-1;
1021 _stats.codeExitStart = _nExitIns-1;
1022 #endif /* PERFM */
1024 _epilogue = NULL;
1026 nBeginAssembly();
1029 void Assembler::assemble(Fragment* frag, LirFilter* reader)
1031 if (error()) return;
1032 _thisfrag = frag;
1034 // check the fragment is starting out with a sane profiling state
1035 verbose_only( NanoAssert(frag->nStaticExits == 0); )
1036 verbose_only( NanoAssert(frag->nCodeBytes == 0); )
1037 verbose_only( NanoAssert(frag->nExitBytes == 0); )
1038 verbose_only( NanoAssert(frag->profCount == 0); )
1039 verbose_only( if (_logc->lcbits & LC_FragProfile)
1040 NanoAssert(frag->profFragID > 0);
1041 else
1042 NanoAssert(frag->profFragID == 0); )
1044 _inExit = false;
1046 gen(reader);
1048 if (!error()) {
1049 // patch all branches
1050 NInsMap::Iter iter(_patches);
1051 while (iter.next()) {
1052 NIns* where = iter.key();
1053 LIns* target = iter.value();
1054 if (target->isop(LIR_jtbl)) {
1055 // Need to patch up a whole jump table, 'where' is the table.
1056 LIns *jtbl = target;
1057 NIns** native_table = (NIns**) where;
1058 for (uint32_t i = 0, n = jtbl->getTableSize(); i < n; i++) {
1059 LabelState* lstate = _labels.get(jtbl->getTarget(i));
1060 NIns* ntarget = lstate->addr;
1061 if (ntarget) {
1062 native_table[i] = ntarget;
1063 } else {
1064 setError(UnknownBranch);
1065 break;
1068 } else {
1069 // target is a label for a single-target branch
1070 LabelState *lstate = _labels.get(target);
1071 NIns* ntarget = lstate->addr;
1072 if (ntarget) {
1073 nPatchBranch(where, ntarget);
1074 } else {
1075 setError(UnknownBranch);
1076 break;
1083 void Assembler::endAssembly(Fragment* frag)
1085 // don't try to patch code if we are in an error state since we might have partially
1086 // overwritten the code cache already
1087 if (error()) {
1088 // something went wrong, release all allocated code memory
1089 _codeAlloc.freeAll(codeList);
1090 if (_nExitIns)
1091 _codeAlloc.free(exitStart, exitEnd);
1092 _codeAlloc.free(codeStart, codeEnd);
1093 codeList = NULL;
1094 return;
1097 NIns* fragEntry = genPrologue();
1098 verbose_only( asm_output("[prologue]"); )
1100 debug_only(_activation.checkForResourceLeaks());
1102 NanoAssert(!_inExit);
1103 // save used parts of current block on fragment's code list, free the rest
1104 #if defined(NANOJIT_ARM) || defined(NANOJIT_MIPS)
1105 // [codeStart, _nSlot) ... gap ... [_nIns, codeEnd)
1106 if (_nExitIns) {
1107 _codeAlloc.addRemainder(codeList, exitStart, exitEnd, _nExitSlot, _nExitIns);
1108 verbose_only( exitBytes -= (_nExitIns - _nExitSlot) * sizeof(NIns); )
1110 _codeAlloc.addRemainder(codeList, codeStart, codeEnd, _nSlot, _nIns);
1111 verbose_only( codeBytes -= (_nIns - _nSlot) * sizeof(NIns); )
1112 #else
1113 // [codeStart ... gap ... [_nIns, codeEnd))
1114 if (_nExitIns) {
1115 _codeAlloc.addRemainder(codeList, exitStart, exitEnd, exitStart, _nExitIns);
1116 verbose_only( exitBytes -= (_nExitIns - exitStart) * sizeof(NIns); )
1118 _codeAlloc.addRemainder(codeList, codeStart, codeEnd, codeStart, _nIns);
1119 verbose_only( codeBytes -= (_nIns - codeStart) * sizeof(NIns); )
1120 #endif
1122 // at this point all our new code is in the d-cache and not the i-cache,
1123 // so flush the i-cache on cpu's that need it.
1124 CodeAlloc::flushICache(codeList);
1126 // save entry point pointers
1127 frag->fragEntry = fragEntry;
1128 frag->setCode(_nIns);
1129 PERFM_NVPROF("code", CodeAlloc::size(codeList));
1131 #ifdef NANOJIT_IA32
1132 NanoAssertMsgf(_fpuStkDepth == 0,"_fpuStkDepth %d\n",_fpuStkDepth);
1133 #endif
1135 debug_only( pageValidate(); )
1136 NanoAssert(_branchStateMap.isEmpty());
1139 void Assembler::releaseRegisters()
1141 for (Register r = FirstReg; r <= LastReg; r = nextreg(r))
1143 LIns *ins = _allocator.getActive(r);
1144 if (ins) {
1145 // Clear reg allocation, preserve stack allocation.
1146 _allocator.retire(r);
1147 NanoAssert(r == ins->getReg());
1148 ins->clearReg();
1153 #ifdef PERFM
1154 #define countlir_live() _nvprof("lir-live",1)
1155 #define countlir_ret() _nvprof("lir-ret",1)
1156 #define countlir_alloc() _nvprof("lir-alloc",1)
1157 #define countlir_var() _nvprof("lir-var",1)
1158 #define countlir_use() _nvprof("lir-use",1)
1159 #define countlir_def() _nvprof("lir-def",1)
1160 #define countlir_imm() _nvprof("lir-imm",1)
1161 #define countlir_param() _nvprof("lir-param",1)
1162 #define countlir_cmov() _nvprof("lir-cmov",1)
1163 #define countlir_ld() _nvprof("lir-ld",1)
1164 #define countlir_ldq() _nvprof("lir-ldq",1)
1165 #define countlir_alu() _nvprof("lir-alu",1)
1166 #define countlir_qjoin() _nvprof("lir-qjoin",1)
1167 #define countlir_qlo() _nvprof("lir-qlo",1)
1168 #define countlir_qhi() _nvprof("lir-qhi",1)
1169 #define countlir_fpu() _nvprof("lir-fpu",1)
1170 #define countlir_st() _nvprof("lir-st",1)
1171 #define countlir_stq() _nvprof("lir-stq",1)
1172 #define countlir_jmp() _nvprof("lir-jmp",1)
1173 #define countlir_jcc() _nvprof("lir-jcc",1)
1174 #define countlir_label() _nvprof("lir-label",1)
1175 #define countlir_xcc() _nvprof("lir-xcc",1)
1176 #define countlir_x() _nvprof("lir-x",1)
1177 #define countlir_call() _nvprof("lir-call",1)
1178 #define countlir_jtbl() _nvprof("lir-jtbl",1)
1179 #else
1180 #define countlir_live()
1181 #define countlir_ret()
1182 #define countlir_alloc()
1183 #define countlir_var()
1184 #define countlir_use()
1185 #define countlir_def()
1186 #define countlir_imm()
1187 #define countlir_param()
1188 #define countlir_cmov()
1189 #define countlir_ld()
1190 #define countlir_ldq()
1191 #define countlir_alu()
1192 #define countlir_qjoin()
1193 #define countlir_qlo()
1194 #define countlir_qhi()
1195 #define countlir_fpu()
1196 #define countlir_st()
1197 #define countlir_stq()
1198 #define countlir_jmp()
1199 #define countlir_jcc()
1200 #define countlir_label()
1201 #define countlir_xcc()
1202 #define countlir_x()
1203 #define countlir_call()
1204 #define countlir_jtbl()
1205 #endif
1207 void Assembler::gen(LirFilter* reader)
1209 NanoAssert(_thisfrag->nStaticExits == 0);
1211 // The trace must end with one of these opcodes.
1212 NanoAssert(reader->finalIns()->isop(LIR_x) ||
1213 reader->finalIns()->isop(LIR_xtbl) ||
1214 reader->finalIns()->isRet() ||
1215 reader->finalIns()->isLive());
1217 InsList pending_lives(alloc);
1219 NanoAssert(!error());
1221 // What's going on here: we're visiting all the LIR instructions in
1222 // the buffer, working strictly backwards in buffer-order, and
1223 // generating machine instructions for them as we go.
1225 // For each LIns, we first determine whether it's actually necessary,
1226 // and if not skip it. Otherwise we generate code for it. There are
1227 // two kinds of "necessary" instructions:
1229 // - "Statement" instructions, which have side effects. Anything that
1230 // could change control flow or the state of memory.
1232 // - "Value" or "expression" instructions, which compute a value based
1233 // only on the operands to the instruction (and, in the case of
1234 // loads, the state of memory). Because we visit instructions in
1235 // reverse order, if some previously visited instruction uses the
1236 // value computed by this instruction, then this instruction will
1237 // already have a register assigned to hold that value. Hence we
1238 // can consult the instruction to detect whether its value is in
1239 // fact used (i.e. not dead).
1241 // Note that the backwards code traversal can make register allocation
1242 // confusing. (For example, we restore a value before we spill it!)
1243 // In particular, words like "before" and "after" must be used very
1244 // carefully -- their meaning at regalloc-time is opposite to their
1245 // meaning at run-time. We use the term "pre-regstate" to refer to
1246 // the register allocation state that occurs prior to an instruction's
1247 // execution, and "post-regstate" to refer to the state that occurs
1248 // after an instruction's execution, e.g.:
1250 // pre-regstate: ebx(ins)
1251 // instruction: mov eax, ebx // mov dst, src
1252 // post-regstate: eax(ins)
1254 // At run-time, the instruction updates the pre-regstate into the
1255 // post-regstate (and these states are the real machine's regstates).
1256 // But when allocating registers, because we go backwards, the
1257 // pre-regstate is constructed from the post-regstate (and these
1258 // regstates are those stored in RegAlloc).
1260 // One consequence of generating code backwards is that we tend to
1261 // both spill and restore registers as early (at run-time) as
1262 // possible; this is good for tolerating memory latency. If we
1263 // generated code forwards, we would expect to both spill and restore
1264 // registers as late (at run-time) as possible; this might be better
1265 // for reducing register pressure.
1267 // Another thing to note: we provide N_LOOKAHEAD instruction's worth
1268 // of lookahead because it's useful for backends. This is nice and
1269 // easy because once read() gets to the LIR_start at the beginning of
1270 // the buffer it'll just keep regetting it.
1272 for (int32_t i = 0; i < N_LOOKAHEAD; i++)
1273 lookahead[i] = reader->read();
1275 while (!lookahead[0]->isop(LIR_start))
1277 LInsp ins = lookahead[0]; // give it a shorter name for local use
1278 LOpcode op = ins->opcode();
1280 bool required = ins->isStmt() || ins->isUsed();
1281 if (!required)
1282 goto end_of_loop;
1284 #ifdef NJ_VERBOSE
1285 // Output the post-regstate (registers and/or activation).
1286 // Because asm output comes in reverse order, doing it now means
1287 // it is printed after the LIR and asm, exactly when the
1288 // post-regstate should be shown.
1289 if ((_logc->lcbits & LC_Assembly) && (_logc->lcbits & LC_Activation))
1290 printActivationState();
1291 if ((_logc->lcbits & LC_Assembly) && (_logc->lcbits & LC_RegAlloc))
1292 printRegState();
1293 #endif
1295 switch (op)
1297 default:
1298 NanoAssertMsgf(false, "unsupported LIR instruction: %d\n", op);
1299 break;
1301 case LIR_regfence:
1302 evictAllActiveRegs();
1303 break;
1305 case LIR_live:
1306 case LIR_flive:
1307 CASE64(LIR_qlive:) {
1308 countlir_live();
1309 LInsp op1 = ins->oprnd1();
1310 // alloca's are meant to live until the point of the LIR_live instruction, marking
1311 // other expressions as live ensures that they remain so at loop bottoms.
1312 // alloca areas require special treatment because they are accessed indirectly and
1313 // the indirect accesses are invisible to the assembler, other than via LIR_live.
1314 // other expression results are only accessed directly in ways that are visible to
1315 // the assembler, so extending those expression's lifetimes past the last loop edge
1316 // isn't necessary.
1317 if (op1->isop(LIR_alloc)) {
1318 findMemFor(op1);
1319 } else {
1320 pending_lives.add(ins);
1322 break;
1325 case LIR_ret:
1326 case LIR_fret:
1327 CASE64(LIR_qret:) {
1328 countlir_ret();
1329 asm_ret(ins);
1330 break;
1333 // Allocate some stack space. The value of this instruction
1334 // is the address of the stack space.
1335 case LIR_alloc: {
1336 countlir_alloc();
1337 NanoAssert(ins->isInAr());
1338 if (ins->isInReg()) {
1339 Register r = ins->getReg();
1340 asm_restore(ins, r);
1341 _allocator.retire(r);
1342 ins->clearReg();
1344 freeResourcesOf(ins);
1345 break;
1347 case LIR_int:
1349 countlir_imm();
1350 asm_immi(ins);
1351 break;
1353 #ifdef NANOJIT_64BIT
1354 case LIR_quad:
1356 countlir_imm();
1357 asm_immq(ins);
1358 break;
1360 #endif
1361 case LIR_float:
1363 countlir_imm();
1364 asm_immf(ins);
1365 break;
1367 case LIR_param:
1369 countlir_param();
1370 asm_param(ins);
1371 break;
1373 #if NJ_SOFTFLOAT_SUPPORTED
1374 case LIR_callh:
1376 // return result of quad-call in register
1377 deprecated_prepResultReg(ins, rmask(retRegs[1]));
1378 // if hi half was used, we must use the call to ensure it happens
1379 findSpecificRegFor(ins->oprnd1(), retRegs[0]);
1380 break;
1382 case LIR_qlo:
1384 countlir_qlo();
1385 asm_qlo(ins);
1386 break;
1388 case LIR_qhi:
1390 countlir_qhi();
1391 asm_qhi(ins);
1392 break;
1394 case LIR_qjoin:
1396 countlir_qjoin();
1397 asm_qjoin(ins);
1398 break;
1400 #endif
1401 CASE64(LIR_qcmov:)
1402 case LIR_cmov:
1404 countlir_cmov();
1405 asm_cmov(ins);
1406 break;
1408 case LIR_ldzb:
1409 case LIR_ldzs:
1410 case LIR_ldsb:
1411 case LIR_ldss:
1412 case LIR_ld:
1414 countlir_ld();
1415 asm_load32(ins);
1416 break;
1419 case LIR_ld32f:
1420 case LIR_ldf:
1421 CASE64(LIR_ldq:)
1423 countlir_ldq();
1424 asm_load64(ins);
1425 break;
1427 case LIR_neg:
1428 case LIR_not:
1430 countlir_alu();
1431 asm_neg_not(ins);
1432 break;
1435 #if defined NANOJIT_64BIT
1436 case LIR_qiadd:
1437 case LIR_qiand:
1438 case LIR_qilsh:
1439 case LIR_qursh:
1440 case LIR_qirsh:
1441 case LIR_qior:
1442 case LIR_qxor:
1444 asm_qbinop(ins);
1445 break;
1447 #endif
1449 case LIR_add:
1450 case LIR_sub:
1451 case LIR_mul:
1452 case LIR_and:
1453 case LIR_or:
1454 case LIR_xor:
1455 case LIR_lsh:
1456 case LIR_rsh:
1457 case LIR_ush:
1458 CASE86(LIR_div:)
1459 CASE86(LIR_mod:)
1461 countlir_alu();
1462 asm_arith(ins);
1463 break;
1465 case LIR_fneg:
1467 countlir_fpu();
1468 asm_fneg(ins);
1469 break;
1471 case LIR_fadd:
1472 case LIR_fsub:
1473 case LIR_fmul:
1474 case LIR_fdiv:
1476 countlir_fpu();
1477 asm_fop(ins);
1478 break;
1480 case LIR_i2f:
1482 countlir_fpu();
1483 asm_i2f(ins);
1484 break;
1486 case LIR_u2f:
1488 countlir_fpu();
1489 asm_u2f(ins);
1490 break;
1492 case LIR_f2i:
1494 countlir_fpu();
1495 asm_f2i(ins);
1496 break;
1498 #ifdef NANOJIT_64BIT
1499 case LIR_i2q:
1500 case LIR_u2q:
1502 countlir_alu();
1503 asm_promote(ins);
1504 break;
1506 case LIR_q2i:
1508 countlir_alu();
1509 asm_q2i(ins);
1510 break;
1512 #endif
1513 case LIR_stb:
1514 case LIR_sts:
1515 case LIR_sti:
1517 countlir_st();
1518 asm_store32(op, ins->oprnd1(), ins->disp(), ins->oprnd2());
1519 break;
1521 case LIR_st32f:
1522 case LIR_stfi:
1523 CASE64(LIR_stqi:)
1525 countlir_stq();
1526 LIns* value = ins->oprnd1();
1527 LIns* base = ins->oprnd2();
1528 int dr = ins->disp();
1529 #if NJ_SOFTFLOAT_SUPPORTED
1530 if (value->isop(LIR_qjoin) && op == LIR_stfi)
1532 // This is correct for little-endian only.
1533 asm_store32(LIR_sti, value->oprnd1(), dr, base);
1534 asm_store32(LIR_sti, value->oprnd2(), dr+4, base);
1536 else
1537 #endif
1539 asm_store64(op, value, dr, base);
1541 break;
1544 case LIR_j:
1546 countlir_jmp();
1547 LInsp to = ins->getTarget();
1548 LabelState *label = _labels.get(to);
1549 // the jump is always taken so whatever register state we
1550 // have from downstream code, is irrelevant to code before
1551 // this jump. so clear it out. we will pick up register
1552 // state from the jump target, if we have seen that label.
1553 releaseRegisters();
1554 if (label && label->addr) {
1555 // forward jump - pick up register state from target.
1556 unionRegisterState(label->regs);
1557 JMP(label->addr);
1559 else {
1560 // backwards jump
1561 handleLoopCarriedExprs(pending_lives);
1562 if (!label) {
1563 // save empty register state at loop header
1564 _labels.add(to, 0, _allocator);
1566 else {
1567 intersectRegisterState(label->regs);
1569 JMP(0);
1570 _patches.put(_nIns, to);
1572 break;
1575 case LIR_jt:
1576 case LIR_jf:
1578 countlir_jcc();
1579 LInsp to = ins->getTarget();
1580 LIns* cond = ins->oprnd1();
1581 LabelState *label = _labels.get(to);
1582 if (label && label->addr) {
1583 // forward jump to known label. need to merge with label's register state.
1584 unionRegisterState(label->regs);
1585 asm_branch(op == LIR_jf, cond, label->addr);
1587 else {
1588 // back edge.
1589 handleLoopCarriedExprs(pending_lives);
1590 if (!label) {
1591 // evict all registers, most conservative approach.
1592 evictAllActiveRegs();
1593 _labels.add(to, 0, _allocator);
1595 else {
1596 // evict all registers, most conservative approach.
1597 intersectRegisterState(label->regs);
1599 NIns *branch = asm_branch(op == LIR_jf, cond, 0);
1600 _patches.put(branch,to);
1602 break;
1605 #if NJ_JTBL_SUPPORTED
1606 case LIR_jtbl:
1608 countlir_jtbl();
1609 // Multiway jump can contain both forward and backward jumps.
1610 // Out of range indices aren't allowed or checked.
1611 // Code after this jtbl instruction is unreachable.
1612 releaseRegisters();
1613 NanoAssert(_allocator.countActive() == 0);
1615 uint32_t count = ins->getTableSize();
1616 bool has_back_edges = false;
1618 // Merge the regstates of labels we have already seen.
1619 for (uint32_t i = count; i-- > 0;) {
1620 LIns* to = ins->getTarget(i);
1621 LabelState *lstate = _labels.get(to);
1622 if (lstate) {
1623 unionRegisterState(lstate->regs);
1624 verbose_only( RefBuf b; )
1625 asm_output(" %u: [&%s]", i, _thisfrag->lirbuf->printer->formatRef(&b, to));
1626 } else {
1627 has_back_edges = true;
1630 asm_output("forward edges");
1632 // In a multi-way jump, the register allocator has no ability to deal
1633 // with two existing edges that have conflicting register assignments, unlike
1634 // a conditional branch where code can be inserted on the fall-through path
1635 // to reconcile registers. So, frontends *must* insert LIR_regfence at labels of
1636 // forward jtbl jumps. Check here to make sure no registers were picked up from
1637 // any forward edges.
1638 NanoAssert(_allocator.countActive() == 0);
1640 if (has_back_edges) {
1641 handleLoopCarriedExprs(pending_lives);
1642 // save merged (empty) register state at target labels we haven't seen yet
1643 for (uint32_t i = count; i-- > 0;) {
1644 LIns* to = ins->getTarget(i);
1645 LabelState *lstate = _labels.get(to);
1646 if (!lstate) {
1647 _labels.add(to, 0, _allocator);
1648 verbose_only( RefBuf b; )
1649 asm_output(" %u: [&%s]", i, _thisfrag->lirbuf->printer->formatRef(&b, to));
1652 asm_output("backward edges");
1655 // Emit the jump instruction, which allocates 1 register for the jump index.
1656 NIns** native_table = new (_dataAlloc) NIns*[count];
1657 asm_output("[%p]:", (void*)native_table);
1658 _patches.put((NIns*)native_table, ins);
1659 asm_jtbl(ins, native_table);
1660 break;
1662 #endif
1664 case LIR_label:
1666 countlir_label();
1667 LabelState *label = _labels.get(ins);
1668 // add profiling inc, if necessary.
1669 verbose_only( if (_logc->lcbits & LC_FragProfile) {
1670 if (ins == _thisfrag->loopLabel)
1671 asm_inc_m32(& _thisfrag->profCount);
1673 if (!label) {
1674 // label seen first, normal target of forward jump, save addr & allocator
1675 _labels.add(ins, _nIns, _allocator);
1677 else {
1678 // we're at the top of a loop
1679 NanoAssert(label->addr == 0);
1680 //evictAllActiveRegs();
1681 intersectRegisterState(label->regs);
1682 label->addr = _nIns;
1684 verbose_only(
1685 RefBuf b;
1686 if (_logc->lcbits & LC_Assembly) {
1687 asm_output("[%s]", _thisfrag->lirbuf->printer->formatRef(&b, ins));
1689 break;
1691 case LIR_xbarrier: {
1692 break;
1694 #ifdef NANOJIT_IA32
1695 case LIR_xtbl: {
1696 NIns* exit = asm_exit(ins); // does intersectRegisterState()
1697 asm_switch(ins, exit);
1698 break;
1700 #else
1701 case LIR_xtbl:
1702 NanoAssertMsg(0, "Not supported for this architecture");
1703 break;
1704 #endif
1705 case LIR_xt:
1706 case LIR_xf:
1708 verbose_only( _thisfrag->nStaticExits++; )
1709 countlir_xcc();
1710 // we only support cmp with guard right now, also assume it is 'close' and only emit the branch
1711 NIns* exit = asm_exit(ins); // does intersectRegisterState()
1712 LIns* cond = ins->oprnd1();
1713 asm_branch(op == LIR_xf, cond, exit);
1714 break;
1716 case LIR_x:
1718 verbose_only( _thisfrag->nStaticExits++; )
1719 countlir_x();
1720 // generate the side exit branch on the main trace.
1721 NIns *exit = asm_exit(ins);
1722 JMP( exit );
1723 break;
1725 case LIR_addxov:
1726 case LIR_subxov:
1727 case LIR_mulxov:
1729 verbose_only( _thisfrag->nStaticExits++; )
1730 countlir_xcc();
1731 countlir_alu();
1732 NIns* exit = asm_exit(ins); // does intersectRegisterState()
1733 asm_branch_xov(op, exit);
1734 asm_arith(ins);
1735 break;
1738 case LIR_feq:
1739 case LIR_fle:
1740 case LIR_flt:
1741 case LIR_fgt:
1742 case LIR_fge:
1744 countlir_fpu();
1745 asm_fcond(ins);
1746 break;
1748 case LIR_eq:
1749 case LIR_le:
1750 case LIR_lt:
1751 case LIR_gt:
1752 case LIR_ge:
1753 case LIR_ult:
1754 case LIR_ule:
1755 case LIR_ugt:
1756 case LIR_uge:
1757 #ifdef NANOJIT_64BIT
1758 case LIR_qeq:
1759 case LIR_qle:
1760 case LIR_qlt:
1761 case LIR_qgt:
1762 case LIR_qge:
1763 case LIR_qult:
1764 case LIR_qule:
1765 case LIR_qugt:
1766 case LIR_quge:
1767 #endif
1769 countlir_alu();
1770 asm_cond(ins);
1771 break;
1774 case LIR_fcall:
1775 #ifdef NANOJIT_64BIT
1776 case LIR_qcall:
1777 #endif
1778 case LIR_icall:
1780 countlir_call();
1781 asm_call(ins);
1782 break;
1785 #ifdef VTUNE
1786 case LIR_file:
1788 // we traverse backwards so we are now hitting the file
1789 // that is associated with a bunch of LIR_lines we already have seen
1790 uintptr_t currentFile = ins->oprnd1()->imm32();
1791 cgen->jitFilenameUpdate(currentFile);
1792 break;
1794 case LIR_line:
1796 // add a new table entry, we don't yet knwo which file it belongs
1797 // to so we need to add it to the update table too
1798 // note the alloc, actual act is delayed; see above
1799 uint32_t currentLine = (uint32_t) ins->oprnd1()->imm32();
1800 cgen->jitLineNumUpdate(currentLine);
1801 cgen->jitAddRecord((uintptr_t)_nIns, 0, currentLine, true);
1802 break;
1804 #endif // VTUNE
1807 #ifdef NJ_VERBOSE
1808 // We have to do final LIR printing inside this loop. If we do it
1809 // before this loop, we we end up printing a lot of dead LIR
1810 // instructions.
1812 // We print the LIns after generating the code. This ensures that
1813 // the LIns will appear in debug output *before* the generated
1814 // code, because Assembler::outputf() prints everything in reverse.
1816 // Note that some live LIR instructions won't be printed. Eg. an
1817 // immediate won't be printed unless it is explicitly loaded into
1818 // a register (as opposed to being incorporated into an immediate
1819 // field in another machine instruction).
1821 if (_logc->lcbits & LC_Assembly) {
1822 InsBuf b;
1823 LInsPrinter* printer = _thisfrag->lirbuf->printer;
1824 outputf(" %s", printer->formatIns(&b, ins));
1825 if (ins->isGuard() && ins->oprnd1() && ins->oprnd1()->isCmp()) {
1826 // Special case: code is generated for guard conditions at
1827 // the same time that code is generated for the guard
1828 // itself. If the condition is only used by the guard, we
1829 // must print it now otherwise it won't get printed. So
1830 // we do print it now, with an explanatory comment. If
1831 // the condition *is* used again we'll end up printing it
1832 // twice, but that's ok.
1833 outputf(" %s # codegen'd with the %s",
1834 printer->formatIns(&b, ins->oprnd1()), lirNames[op]);
1836 } else if (ins->isCmov()) {
1837 // Likewise for cmov conditions.
1838 outputf(" %s # codegen'd with the %s",
1839 printer->formatIns(&b, ins->oprnd1()), lirNames[op]);
1842 #if defined NANOJIT_IA32 || defined NANOJIT_X64
1843 else if (ins->isop(LIR_mod)) {
1844 // There's a similar case when a div feeds into a mod.
1845 outputf(" %s # codegen'd with the mod",
1846 printer->formatIns(&b, ins->oprnd1()));
1848 #endif
1850 #endif
1852 if (error())
1853 return;
1855 #ifdef VTUNE
1856 cgen->jitCodePosUpdate((uintptr_t)_nIns);
1857 #endif
1859 // check that all is well (don't check in exit paths since its more complicated)
1860 debug_only( pageValidate(); )
1861 debug_only( resourceConsistencyCheck(); )
1863 end_of_loop:
1864 for (int32_t i = 1; i < N_LOOKAHEAD; i++)
1865 lookahead[i-1] = lookahead[i];
1866 lookahead[N_LOOKAHEAD-1] = reader->read();
1871 * Write a jump table for the given SwitchInfo and store the table
1872 * address in the SwitchInfo. Every entry will initially point to
1873 * target.
1875 void Assembler::emitJumpTable(SwitchInfo* si, NIns* target)
1877 si->table = (NIns **) alloc.alloc(si->count * sizeof(NIns*));
1878 for (uint32_t i = 0; i < si->count; ++i)
1879 si->table[i] = target;
1882 void Assembler::assignSavedRegs()
1884 // Restore saved regsters.
1885 LirBuffer *b = _thisfrag->lirbuf;
1886 for (int i=0, n = NumSavedRegs; i < n; i++) {
1887 LIns *p = b->savedRegs[i];
1888 if (p)
1889 findSpecificRegForUnallocated(p, savedRegs[p->paramArg()]);
1893 void Assembler::reserveSavedRegs()
1895 LirBuffer *b = _thisfrag->lirbuf;
1896 for (int i = 0, n = NumSavedRegs; i < n; i++) {
1897 LIns *ins = b->savedRegs[i];
1898 if (ins)
1899 findMemFor(ins);
1903 void Assembler::assignParamRegs()
1905 LInsp state = _thisfrag->lirbuf->state;
1906 if (state)
1907 findSpecificRegForUnallocated(state, argRegs[state->paramArg()]);
1908 LInsp param1 = _thisfrag->lirbuf->param1;
1909 if (param1)
1910 findSpecificRegForUnallocated(param1, argRegs[param1->paramArg()]);
1913 void Assembler::handleLoopCarriedExprs(InsList& pending_lives)
1915 // ensure that exprs spanning the loop are marked live at the end of the loop
1916 reserveSavedRegs();
1917 for (Seq<LIns*> *p = pending_lives.get(); p != NULL; p = p->tail) {
1918 LIns *ins = p->head;
1919 NanoAssert(ins->isLive());
1920 LIns *op1 = ins->oprnd1();
1921 // Must findMemFor even if we're going to findRegFor; loop-carried
1922 // operands may spill on another edge, and we need them to always
1923 // spill to the same place.
1924 #if NJ_USES_QUAD_CONSTANTS
1925 // Exception: if float constants are true constants, we should
1926 // never call findMemFor on those ops.
1927 if (!op1->isconstf())
1928 #endif
1930 findMemFor(op1);
1932 if (!op1->isImmAny())
1933 findRegFor(op1, ins->isop(LIR_flive) ? FpRegs : GpRegs);
1936 // clear this list since we have now dealt with those lifetimes. extending
1937 // their lifetimes again later (earlier in the code) serves no purpose.
1938 pending_lives.clear();
1941 void AR::freeEntryAt(uint32_t idx)
1943 NanoAssert(idx > 0 && idx <= _highWaterMark);
1945 // NB: this loop relies on using entry[0] being NULL,
1946 // so that we are guaranteed to terminate
1947 // without access negative entries.
1948 LIns* i = _entries[idx];
1949 NanoAssert(i != NULL);
1950 do {
1951 _entries[idx] = NULL;
1952 idx--;
1953 } while (_entries[idx] == i);
1956 #ifdef NJ_VERBOSE
1957 void Assembler::printRegState()
1959 char* s = &outline[0];
1960 VMPI_memset(s, ' ', 26); s[26] = '\0';
1961 s += VMPI_strlen(s);
1962 VMPI_sprintf(s, "RR");
1963 s += VMPI_strlen(s);
1965 for (Register r = FirstReg; r <= LastReg; r = nextreg(r)) {
1966 LIns *ins = _allocator.getActive(r);
1967 if (ins) {
1968 NanoAssertMsg(!_allocator.isFree(r),
1969 "Coding error; register is both free and active! " );
1970 RefBuf b;
1971 const char* n = _thisfrag->lirbuf->printer->formatRef(&b, ins);
1973 if (ins->isop(LIR_param) && ins->paramKind()==1 &&
1974 r == Assembler::savedRegs[ins->paramArg()])
1976 // dont print callee-saved regs that arent used
1977 continue;
1980 VMPI_sprintf(s, " %s(%s)", gpn(r), n);
1981 s += VMPI_strlen(s);
1984 output();
1987 void Assembler::printActivationState()
1989 char* s = &outline[0];
1990 VMPI_memset(s, ' ', 26); s[26] = '\0';
1991 s += VMPI_strlen(s);
1992 VMPI_sprintf(s, "AR");
1993 s += VMPI_strlen(s);
1995 LIns* ins = 0;
1996 uint32_t nStackSlots = 0;
1997 int32_t arIndex = 0;
1998 for (AR::Iter iter(_activation); iter.next(ins, nStackSlots, arIndex); )
2000 RefBuf b;
2001 const char* n = _thisfrag->lirbuf->printer->formatRef(&b, ins);
2002 if (nStackSlots > 1) {
2003 VMPI_sprintf(s," %d-%d(%s)", 4*arIndex, 4*(arIndex+nStackSlots-1), n);
2005 else {
2006 VMPI_sprintf(s," %d(%s)", 4*arIndex, n);
2008 s += VMPI_strlen(s);
2010 output();
2012 #endif
2014 inline bool AR::isEmptyRange(uint32_t start, uint32_t nStackSlots) const
2016 for (uint32_t i=0; i < nStackSlots; i++)
2018 if (_entries[start-i] != NULL)
2019 return false;
2021 return true;
2024 uint32_t AR::reserveEntry(LIns* ins)
2026 uint32_t const nStackSlots = nStackSlotsFor(ins);
2028 if (nStackSlots == 1)
2030 for (uint32_t i = 1; i <= _highWaterMark; i++)
2032 if (_entries[i] == NULL)
2034 _entries[i] = ins;
2035 return i;
2038 if (_highWaterMark < NJ_MAX_STACK_ENTRY - 1)
2040 NanoAssert(_entries[_highWaterMark+1] == BAD_ENTRY);
2041 _highWaterMark++;
2042 _entries[_highWaterMark] = ins;
2043 return _highWaterMark;
2046 else
2048 // alloc larger block on 8byte boundary.
2049 uint32_t const start = nStackSlots + (nStackSlots & 1);
2050 for (uint32_t i = start; i <= _highWaterMark; i += 2)
2052 if (isEmptyRange(i, nStackSlots))
2054 // place the entry in the table and mark the instruction with it
2055 for (uint32_t j=0; j < nStackSlots; j++)
2057 NanoAssert(i-j <= _highWaterMark);
2058 NanoAssert(_entries[i-j] == NULL);
2059 _entries[i-j] = ins;
2061 return i;
2065 // Be sure to account for any 8-byte-round-up when calculating spaceNeeded.
2066 uint32_t const spaceLeft = NJ_MAX_STACK_ENTRY - _highWaterMark - 1;
2067 uint32_t const spaceNeeded = nStackSlots + (_highWaterMark & 1);
2068 if (spaceLeft >= spaceNeeded)
2070 if (_highWaterMark & 1)
2072 NanoAssert(_entries[_highWaterMark+1] == BAD_ENTRY);
2073 _entries[_highWaterMark+1] = NULL;
2075 _highWaterMark += spaceNeeded;
2076 for (uint32_t j = 0; j < nStackSlots; j++)
2078 NanoAssert(_highWaterMark-j < NJ_MAX_STACK_ENTRY);
2079 NanoAssert(_entries[_highWaterMark-j] == BAD_ENTRY);
2080 _entries[_highWaterMark-j] = ins;
2082 return _highWaterMark;
2085 // no space. oh well.
2086 return 0;
2089 #ifdef _DEBUG
2090 void AR::checkForResourceLeaks() const
2092 for (uint32_t i = 1; i <= _highWaterMark; i++) {
2093 NanoAssertMsgf(_entries[i] == NULL, "frame entry %d wasn't freed\n",4*i);
2096 #endif
2098 uint32_t Assembler::arReserve(LIns* ins)
2100 uint32_t i = _activation.reserveEntry(ins);
2101 if (!i)
2102 setError(StackFull);
2103 return i;
2106 void Assembler::arFree(LIns* ins)
2108 NanoAssert(ins->isInAr());
2109 uint32_t arIndex = ins->getArIndex();
2110 NanoAssert(arIndex);
2111 NanoAssert(_activation.isValidEntry(arIndex, ins));
2112 _activation.freeEntryAt(arIndex); // free any stack stack space associated with entry
2116 * Move regs around so the SavedRegs contains the highest priority regs.
2118 void Assembler::evictScratchRegsExcept(RegisterMask ignore)
2120 // Find the top GpRegs that are candidates to put in SavedRegs.
2122 // 'tosave' is a binary heap stored in an array. The root is tosave[0],
2123 // left child is at i+1, right child is at i+2.
2125 Register tosave[LastReg-FirstReg+1];
2126 int len=0;
2127 RegAlloc *regs = &_allocator;
2128 for (Register r = FirstReg; r <= LastReg; r = nextreg(r)) {
2129 if (rmask(r) & GpRegs & ~ignore) {
2130 LIns *ins = regs->getActive(r);
2131 if (ins) {
2132 if (canRemat(ins)) {
2133 NanoAssert(ins->getReg() == r);
2134 evict(ins);
2136 else {
2137 int32_t pri = regs->getPriority(r);
2138 // add to heap by adding to end and bubbling up
2139 int j = len++;
2140 while (j > 0 && pri > regs->getPriority(tosave[j/2])) {
2141 tosave[j] = tosave[j/2];
2142 j /= 2;
2144 NanoAssert(size_t(j) < sizeof(tosave)/sizeof(tosave[0]));
2145 tosave[j] = r;
2151 // Now primap has the live exprs in priority order.
2152 // Allocate each of the top priority exprs to a SavedReg.
2154 RegisterMask allow = SavedRegs;
2155 while (allow && len > 0) {
2156 // get the highest priority var
2157 Register hi = tosave[0];
2158 if (!(rmask(hi) & SavedRegs)) {
2159 LIns *ins = regs->getActive(hi);
2160 Register r = findRegFor(ins, allow);
2161 allow &= ~rmask(r);
2163 else {
2164 // hi is already in a saved reg, leave it alone.
2165 allow &= ~rmask(hi);
2168 // remove from heap by replacing root with end element and bubbling down.
2169 if (allow && --len > 0) {
2170 Register last = tosave[len];
2171 int j = 0;
2172 while (j+1 < len) {
2173 int child = j+1;
2174 if (j+2 < len && regs->getPriority(tosave[j+2]) > regs->getPriority(tosave[j+1]))
2175 child++;
2176 if (regs->getPriority(last) > regs->getPriority(tosave[child]))
2177 break;
2178 tosave[j] = tosave[child];
2179 j = child;
2181 tosave[j] = last;
2185 // now evict everything else.
2186 evictSomeActiveRegs(~(SavedRegs | ignore));
2189 void Assembler::evictAllActiveRegs()
2191 // generate code to restore callee saved registers
2192 // @todo speed this up
2193 for (Register r = FirstReg; r <= LastReg; r = nextreg(r)) {
2194 evictIfActive(r);
2198 void Assembler::evictSomeActiveRegs(RegisterMask regs)
2200 // generate code to restore callee saved registers
2201 // @todo speed this up
2202 for (Register r = FirstReg; r <= LastReg; r = nextreg(r)) {
2203 if ((rmask(r) & regs)) {
2204 evictIfActive(r);
2210 * Merge the current regstate with a previously stored version.
2212 * Situation Change to _allocator
2213 * --------- --------------------
2214 * !current & !saved
2215 * !current & saved add saved
2216 * current & !saved evict current (unionRegisterState does nothing)
2217 * current & saved & current==saved
2218 * current & saved & current!=saved evict current, add saved
2220 void Assembler::intersectRegisterState(RegAlloc& saved)
2222 Register regsTodo[LastReg + 1];
2223 LIns* insTodo[LastReg + 1];
2224 int nTodo = 0;
2226 // Do evictions and pops first.
2227 verbose_only(bool shouldMention=false; )
2228 // The obvious thing to do here is to iterate from FirstReg to LastReg.
2229 // viz: for (Register r = FirstReg; r <= LastReg; r = nextreg(r)) ...
2230 // However, on ARM that causes lower-numbered integer registers
2231 // to be be saved at higher addresses, which inhibits the formation
2232 // of load/store multiple instructions. Hence iterate the loop the
2233 // other way. The "r <= LastReg" guards against wraparound in
2234 // the case where Register is treated as unsigned and FirstReg is zero.
2236 // Note, the loop var is deliberately typed as int (*not* Register)
2237 // to outsmart compilers that will otherwise report
2238 // "error: comparison is always true due to limited range of data type".
2239 for (int ri = LastReg; ri >= FirstReg && ri <= LastReg; ri = int(prevreg(Register(ri))))
2241 Register const r = Register(ri);
2242 LIns* curins = _allocator.getActive(r);
2243 LIns* savedins = saved.getActive(r);
2244 if (curins != savedins)
2246 if (savedins) {
2247 regsTodo[nTodo] = r;
2248 insTodo[nTodo] = savedins;
2249 nTodo++;
2251 if (curins) {
2252 //_nvprof("intersect-evict",1);
2253 verbose_only( shouldMention=true; )
2254 NanoAssert(curins->getReg() == r);
2255 evict(curins);
2258 #ifdef NANOJIT_IA32
2259 if (savedins && (rmask(r) & x87Regs)) {
2260 verbose_only( shouldMention=true; )
2261 FSTP(r);
2263 #endif
2266 // Now reassign mainline registers.
2267 for (int i = 0; i < nTodo; i++) {
2268 findSpecificRegFor(insTodo[i], regsTodo[i]);
2270 verbose_only(
2271 if (shouldMention)
2272 verbose_outputf("## merging registers (intersect) with existing edge");
2277 * Merge the current state of the registers with a previously stored version.
2279 * Situation Change to _allocator
2280 * --------- --------------------
2281 * !current & !saved none
2282 * !current & saved add saved
2283 * current & !saved none (intersectRegisterState evicts current)
2284 * current & saved & current==saved none
2285 * current & saved & current!=saved evict current, add saved
2287 void Assembler::unionRegisterState(RegAlloc& saved)
2289 Register regsTodo[LastReg + 1];
2290 LIns* insTodo[LastReg + 1];
2291 int nTodo = 0;
2293 // Do evictions and pops first.
2294 verbose_only(bool shouldMention=false; )
2295 for (Register r = FirstReg; r <= LastReg; r = nextreg(r))
2297 LIns* curins = _allocator.getActive(r);
2298 LIns* savedins = saved.getActive(r);
2299 if (curins != savedins)
2301 if (savedins) {
2302 regsTodo[nTodo] = r;
2303 insTodo[nTodo] = savedins;
2304 nTodo++;
2306 if (curins && savedins) {
2307 //_nvprof("union-evict",1);
2308 verbose_only( shouldMention=true; )
2309 NanoAssert(curins->getReg() == r);
2310 evict(curins);
2313 #ifdef NANOJIT_IA32
2314 if (rmask(r) & x87Regs) {
2315 if (savedins) {
2316 FSTP(r);
2318 else if (curins) {
2319 // saved state did not have fpu reg allocated,
2320 // so we must evict here to keep x87 stack balanced.
2321 evict(curins);
2323 verbose_only( shouldMention=true; )
2325 #endif
2328 // Now reassign mainline registers.
2329 for (int i = 0; i < nTodo; i++) {
2330 findSpecificRegFor(insTodo[i], regsTodo[i]);
2332 verbose_only(
2333 if (shouldMention)
2334 verbose_outputf("## merging registers (union) with existing edge");
2338 // Scan table for instruction with the lowest priority, meaning it is used
2339 // furthest in the future.
2340 LIns* Assembler::findVictim(RegisterMask allow)
2342 NanoAssert(allow);
2343 LIns *ins, *vic = 0;
2344 int allow_pri = 0x7fffffff;
2345 for (Register r = FirstReg; r <= LastReg; r = nextreg(r))
2347 if ((allow & rmask(r)) && (ins = _allocator.getActive(r)) != 0)
2349 int pri = canRemat(ins) ? 0 : _allocator.getPriority(r);
2350 if (!vic || pri < allow_pri) {
2351 vic = ins;
2352 allow_pri = pri;
2356 NanoAssert(vic != 0);
2357 return vic;
2360 #ifdef NJ_VERBOSE
2361 char Assembler::outline[8192];
2362 char Assembler::outlineEOL[512];
2364 void Assembler::output()
2366 // The +1 is for the terminating NUL char.
2367 VMPI_strncat(outline, outlineEOL, sizeof(outline)-(strlen(outline)+1));
2369 if (_outputCache) {
2370 char* str = new (alloc) char[VMPI_strlen(outline)+1];
2371 VMPI_strcpy(str, outline);
2372 _outputCache->insert(str);
2373 } else {
2374 _logc->printf("%s\n", outline);
2377 outline[0] = '\0';
2378 outlineEOL[0] = '\0';
2381 void Assembler::outputf(const char* format, ...)
2383 va_list args;
2384 va_start(args, format);
2386 outline[0] = '\0';
2387 vsprintf(outline, format, args);
2388 output();
2391 void Assembler::setOutputForEOL(const char* format, ...)
2393 va_list args;
2394 va_start(args, format);
2396 outlineEOL[0] = '\0';
2397 vsprintf(outlineEOL, format, args);
2399 #endif // NJ_VERBOSE
2401 void LabelStateMap::add(LIns *label, NIns *addr, RegAlloc &regs) {
2402 LabelState *st = new (alloc) LabelState(addr, regs);
2403 labels.put(label, st);
2406 LabelState* LabelStateMap::get(LIns *label) {
2407 return labels.get(label);
2410 #endif /* FEATURE_NANOJIT */