Bug 617935: Check string lengths using StringBuffer. (r=lw)
[mozilla-central.git] / js / src / nanojit / Assembler.cpp
blobd6d56fb6d6daac465883d787702338122184db7a
1 /* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */
2 /* vi: set ts=4 sw=4 expandtab: (add to ~/.vimrc: set modeline modelines=5) */
3 /* ***** BEGIN LICENSE BLOCK *****
4 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 1.1 (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
9 * http://www.mozilla.org/MPL/
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
14 * License.
16 * The Original Code is [Open Source Virtual Machine].
18 * The Initial Developer of the Original Code is
19 * Adobe System Incorporated.
20 * Portions created by the Initial Developer are Copyright (C) 2004-2007
21 * the Initial Developer. All Rights Reserved.
23 * Contributor(s):
24 * Adobe AS3 Team
26 * Alternatively, the contents of this file may be used under the terms of
27 * either the GNU General Public License Version 2 or later (the "GPL"), or
28 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29 * in which case the provisions of the GPL or the LGPL are applicable instead
30 * of those above. If you wish to allow use of your version of this file only
31 * under the terms of either the GPL or the LGPL, and not to allow others to
32 * use your version of this file under the terms of the MPL, indicate your
33 * decision by deleting the provisions above and replace them with the notice
34 * and other provisions required by the GPL or the LGPL. If you do not delete
35 * the provisions above, a recipient may use your version of this file under
36 * the terms of any one of the MPL, the GPL or the LGPL.
38 * ***** END LICENSE BLOCK ***** */
40 #include "nanojit.h"
42 #ifdef FEATURE_NANOJIT
44 #ifdef VMCFG_VTUNE
45 #include "../core/CodegenLIR.h"
46 #endif
48 #ifdef _MSC_VER
49 // disable some specific warnings which are normally useful, but pervasive in the code-gen macros
50 #pragma warning(disable:4310) // cast truncates constant value
51 #endif
53 #ifdef VMCFG_VTUNE
54 namespace vtune {
55 using namespace nanojit;
56 void vtuneStart(void*, NIns*);
57 void vtuneEnd(void*, NIns*);
58 void vtuneLine(void*, int, NIns*);
59 void vtuneFile(void*, void*);
61 using namespace vtune;
62 #endif // VMCFG_VTUNE
65 namespace nanojit
67 /**
68 * Need the following:
70 * - merging paths ( build a graph? ), possibly use external rep to drive codegen
72 Assembler::Assembler(CodeAlloc& codeAlloc, Allocator& dataAlloc, Allocator& alloc, AvmCore* core, LogControl* logc, const Config& config)
73 : alloc(alloc)
74 , _codeAlloc(codeAlloc)
75 , _dataAlloc(dataAlloc)
76 , _thisfrag(NULL)
77 , _branchStateMap(alloc)
78 , _patches(alloc)
79 , _labels(alloc)
80 , _noise(NULL)
81 #if NJ_USES_IMMD_POOL
82 , _immDPool(alloc)
83 #endif
84 , codeList(NULL)
85 , _epilogue(NULL)
86 , _err(None)
87 #if PEDANTIC
88 , pedanticTop(NULL)
89 #endif
90 #ifdef VMCFG_VTUNE
91 , vtuneHandle(NULL)
92 #endif
93 , _config(config)
95 nInit(core);
96 (void)logc;
97 verbose_only( _logc = logc; )
98 verbose_only( _outputCache = 0; )
99 verbose_only( outline[0] = '\0'; )
100 verbose_only( outlineEOL[0] = '\0'; )
102 reset();
105 // Per-opcode register hint table. Default to no hints for all
106 // instructions. It's not marked const because individual back-ends can
107 // install hint values for opcodes of interest in nInit().
108 RegisterMask Assembler::nHints[LIR_sentinel+1] = {
109 #define OP___(op, number, repKind, retType, isCse) \
111 #include "LIRopcode.tbl"
112 #undef OP___
116 #ifdef _DEBUG
118 /*static*/ LIns* const AR::BAD_ENTRY = (LIns*)0xdeadbeef;
120 void AR::validateQuick()
122 NanoAssert(_highWaterMark < NJ_MAX_STACK_ENTRY);
123 NanoAssert(_entries[0] == NULL);
124 // Only check a few entries around _highWaterMark.
125 uint32_t const RADIUS = 4;
126 uint32_t const lo = (_highWaterMark > 1 + RADIUS ? _highWaterMark - RADIUS : 1);
127 uint32_t const hi = (_highWaterMark + 1 + RADIUS < NJ_MAX_STACK_ENTRY ? _highWaterMark + 1 + RADIUS : NJ_MAX_STACK_ENTRY);
128 for (uint32_t i = lo; i <= _highWaterMark; ++i)
129 NanoAssert(_entries[i] != BAD_ENTRY);
130 for (uint32_t i = _highWaterMark+1; i < hi; ++i)
131 NanoAssert(_entries[i] == BAD_ENTRY);
134 void AR::validateFull()
136 NanoAssert(_highWaterMark < NJ_MAX_STACK_ENTRY);
137 NanoAssert(_entries[0] == NULL);
138 for (uint32_t i = 1; i <= _highWaterMark; ++i)
139 NanoAssert(_entries[i] != BAD_ENTRY);
140 for (uint32_t i = _highWaterMark+1; i < NJ_MAX_STACK_ENTRY; ++i)
141 NanoAssert(_entries[i] == BAD_ENTRY);
144 void AR::validate()
146 static uint32_t validateCounter = 0;
147 if (++validateCounter >= 100)
149 validateFull();
150 validateCounter = 0;
152 else
154 validateQuick();
158 #endif
160 inline void AR::clear()
162 _highWaterMark = 0;
163 NanoAssert(_entries[0] == NULL);
164 #ifdef _DEBUG
165 for (uint32_t i = 1; i < NJ_MAX_STACK_ENTRY; ++i)
166 _entries[i] = BAD_ENTRY;
167 #endif
170 bool AR::Iter::next(LIns*& ins, uint32_t& nStackSlots, int32_t& arIndex)
172 while (_i <= _ar._highWaterMark) {
173 ins = _ar._entries[_i];
174 if (ins) {
175 arIndex = _i;
176 nStackSlots = nStackSlotsFor(ins);
177 _i += nStackSlots;
178 return true;
180 _i++;
182 ins = NULL;
183 nStackSlots = 0;
184 arIndex = 0;
185 return false;
188 void Assembler::arReset()
190 _activation.clear();
191 _branchStateMap.clear();
192 _patches.clear();
193 _labels.clear();
194 #if NJ_USES_IMMD_POOL
195 _immDPool.clear();
196 #endif
199 void Assembler::registerResetAll()
201 nRegisterResetAll(_allocator);
202 _allocator.managed = _allocator.free;
204 // At start, should have some registers free and none active.
205 NanoAssert(0 != _allocator.free);
206 NanoAssert(0 == _allocator.activeMask());
207 #ifdef NANOJIT_IA32
208 debug_only(_fpuStkDepth = 0; )
209 #endif
212 // Legend for register sets: A = allowed, P = preferred, F = free, S = SavedReg.
214 // Finds a register in 'setA___' to store the result of 'ins' (one from
215 // 'set_P__' if possible), evicting one if necessary. Doesn't consider
216 // the prior state of 'ins'.
218 // Nb: 'setA___' comes from the instruction's use, 'set_P__' comes from its def.
219 // Eg. in 'add(call(...), ...)':
220 // - the call's use means setA___==GpRegs;
221 // - the call's def means set_P__==rmask(retRegs[0]).
223 Register Assembler::registerAlloc(LIns* ins, RegisterMask setA___, RegisterMask set_P__)
225 Register r;
226 RegisterMask set__F_ = _allocator.free;
227 RegisterMask setA_F_ = setA___ & set__F_;
229 if (setA_F_) {
230 RegisterMask set___S = SavedRegs;
231 RegisterMask setA_FS = setA_F_ & set___S;
232 RegisterMask setAPF_ = setA_F_ & set_P__;
233 RegisterMask setAPFS = setA_FS & set_P__;
234 RegisterMask set;
236 if (setAPFS) set = setAPFS;
237 else if (setAPF_) set = setAPF_;
238 else if (setA_FS) set = setA_FS;
239 else set = setA_F_;
241 r = nRegisterAllocFromSet(set);
242 _allocator.addActive(r, ins);
243 ins->setReg(r);
244 } else {
245 // Nothing free, steal one.
246 // LSRA says pick the one with the furthest use.
247 LIns* vic = findVictim(setA___);
248 NanoAssert(vic->isInReg());
249 r = vic->getReg();
251 evict(vic);
253 // r ends up staying active, but the LIns defining it changes.
254 _allocator.removeFree(r);
255 _allocator.addActive(r, ins);
256 ins->setReg(r);
259 return r;
262 // Finds a register in 'allow' to store a temporary value (one not
263 // associated with a particular LIns), evicting one if necessary. The
264 // returned register is marked as being free and so can only be safely
265 // used for code generation purposes until the regstate is next inspected
266 // or updated.
267 Register Assembler::registerAllocTmp(RegisterMask allow)
269 LIns dummyIns;
270 Register r = registerAlloc(&dummyIns, allow, /*prefer*/0);
272 // Mark r as free, ready for use as a temporary value.
273 _allocator.removeActive(r);
274 _allocator.addFree(r);
275 return r;
278 void Assembler::codeAlloc(NIns *&start, NIns *&end, NIns *&eip
279 verbose_only(, size_t &nBytes))
281 // save the block we just filled
282 if (start)
283 CodeAlloc::add(codeList, start, end);
285 // CodeAlloc contract: allocations never fail
286 _codeAlloc.alloc(start, end);
287 verbose_only( nBytes += (end - start) * sizeof(NIns); )
288 NanoAssert(uintptr_t(end) - uintptr_t(start) >= (size_t)LARGEST_UNDERRUN_PROT);
289 eip = end;
292 void Assembler::clearNInsPtrs()
294 _nIns = 0;
295 _nExitIns = 0;
296 codeStart = codeEnd = 0;
297 exitStart = exitEnd = 0;
298 codeList = 0;
301 void Assembler::reset()
303 clearNInsPtrs();
304 nativePageReset();
305 registerResetAll();
306 arReset();
309 #ifdef _DEBUG
310 void Assembler::pageValidate()
312 if (error()) return;
313 // This may be a normal code chunk or an exit code chunk.
314 NanoAssertMsg(codeStart <= _nIns && _nIns <= codeEnd,
315 "Native instruction pointer overstep paging bounds; check overrideProtect for last instruction");
317 #endif
319 #ifdef _DEBUG
321 bool AR::isValidEntry(uint32_t idx, LIns* ins) const
323 return idx > 0 && idx <= _highWaterMark && _entries[idx] == ins;
326 void AR::checkForResourceConsistency(const RegAlloc& regs)
328 validate();
329 for (uint32_t i = 1; i <= _highWaterMark; ++i)
331 LIns* ins = _entries[i];
332 if (!ins)
333 continue;
334 uint32_t arIndex = ins->getArIndex();
335 NanoAssert(arIndex != 0);
336 if (ins->isop(LIR_allocp)) {
337 int const n = i + (ins->size()>>2);
338 for (int j=i+1; j < n; j++) {
339 NanoAssert(_entries[j]==ins);
341 NanoAssert(arIndex == (uint32_t)n-1);
342 i = n-1;
344 else if (ins->isQorD()) {
345 NanoAssert(_entries[i + 1]==ins);
346 i += 1; // skip high word
348 else {
349 NanoAssertMsg(arIndex == i, "Stack record index mismatch");
351 NanoAssertMsg(!ins->isInReg() || regs.isConsistent(ins->getReg(), ins),
352 "Register record mismatch");
356 void Assembler::resourceConsistencyCheck()
358 NanoAssert(!error());
359 #ifdef NANOJIT_IA32
360 // Within the expansion of a single LIR instruction, we may use the x87
361 // stack for unmanaged temporaries. Otherwise, we do not use the x87 stack
362 // as such, but use the top element alone as a single allocatable FP register.
363 // Compensation code must be inserted to keep the stack balanced and avoid
364 // overflow, and the mechanisms for this are rather fragile and IA32-specific.
365 // The predicate below should hold between any pair of instructions within
366 // a basic block, at labels, and just after a conditional branch. Currently,
367 // we enforce this condition between all pairs of instructions, but this is
368 // overly restrictive, and would fail if we did not generate unreachable x87
369 // stack pops following unconditional branches.
370 NanoAssert((_allocator.active[REGNUM(FST0)] && _fpuStkDepth == -1) ||
371 (!_allocator.active[REGNUM(FST0)] && _fpuStkDepth == 0));
372 #endif
373 _activation.checkForResourceConsistency(_allocator);
374 registerConsistencyCheck();
377 void Assembler::registerConsistencyCheck()
379 RegisterMask managed = _allocator.managed;
380 for (Register r = lsReg(managed); managed; r = nextLsReg(managed, r)) {
381 // A register managed by register allocation must be either
382 // free or active, but not both.
383 if (_allocator.isFree(r)) {
384 NanoAssertMsgf(_allocator.getActive(r)==0,
385 "register %s is free but assigned to ins", gpn(r));
386 } else {
387 // An LIns defining a register must have that register in
388 // its reservation.
389 LIns* ins = _allocator.getActive(r);
390 NanoAssert(ins);
391 NanoAssertMsg(r == ins->getReg(), "Register record mismatch");
395 RegisterMask not_managed = ~_allocator.managed;
396 for (Register r = lsReg(not_managed); not_managed; r = nextLsReg(not_managed, r)) {
397 // A register not managed by register allocation must be
398 // neither free nor active.
399 if (REGNUM(r) <= LastRegNum) {
400 NanoAssert(!_allocator.isFree(r));
401 NanoAssert(!_allocator.getActive(r));
405 #endif /* _DEBUG */
407 void Assembler::findRegFor2(RegisterMask allowa, LIns* ia, Register& ra,
408 RegisterMask allowb, LIns* ib, Register& rb)
410 // There should be some overlap between 'allowa' and 'allowb', else
411 // there's no point calling this function.
412 NanoAssert(allowa & allowb);
414 if (ia == ib) {
415 ra = rb = findRegFor(ia, allowa & allowb); // use intersection(allowa, allowb)
417 } else if (ib->isInRegMask(allowb)) {
418 // 'ib' is already in an allowable reg -- don't let it get evicted
419 // when finding 'ra'.
420 rb = ib->getReg();
421 ra = findRegFor(ia, allowa & ~rmask(rb));
423 } else {
424 ra = findRegFor(ia, allowa);
425 rb = findRegFor(ib, allowb & ~rmask(ra));
429 Register Assembler::findSpecificRegFor(LIns* i, Register w)
431 return findRegFor(i, rmask(w));
434 // Like findRegFor(), but called when the LIns is used as a pointer. It
435 // doesn't have to be called, findRegFor() can still be used, but it can
436 // optimize the LIR_allocp case by indexing off FP, thus saving the use of
437 // a GpReg.
439 Register Assembler::getBaseReg(LIns* base, int &d, RegisterMask allow)
441 #if !PEDANTIC
442 if (base->isop(LIR_allocp)) {
443 // The value of a LIR_allocp is a pointer to its stack memory,
444 // which is always relative to FP. So we can just return FP if we
445 // also adjust 'd' (and can do so in a valid manner). Or, in the
446 // PEDANTIC case, we can just assign a register as normal;
447 // findRegFor() will allocate the stack memory for LIR_allocp if
448 // necessary.
449 d += findMemFor(base);
450 return FP;
452 #else
453 (void) d;
454 #endif
455 return findRegFor(base, allow);
458 // Like findRegFor2(), but used for stores where the base value has the
459 // same type as the stored value, eg. in asm_store32() on 32-bit platforms
460 // and asm_store64() on 64-bit platforms. Similar to getBaseReg(),
461 // findRegFor2() can be called instead, but this function can optimize the
462 // case where the base value is a LIR_allocp.
463 void Assembler::getBaseReg2(RegisterMask allowValue, LIns* value, Register& rv,
464 RegisterMask allowBase, LIns* base, Register& rb, int &d)
466 #if !PEDANTIC
467 if (base->isop(LIR_allocp)) {
468 rb = FP;
469 d += findMemFor(base);
470 rv = findRegFor(value, allowValue);
471 return;
473 #else
474 (void) d;
475 #endif
476 findRegFor2(allowValue, value, rv, allowBase, base, rb);
479 RegisterMask Assembler::hint(LIns* ins)
481 RegisterMask prefer = nHints[ins->opcode()];
482 return (prefer == PREFER_SPECIAL) ? nHint(ins) : prefer;
485 // Finds a register in 'allow' to hold the result of 'ins'. Used when we
486 // encounter a use of 'ins'. The actions depend on the prior regstate of
487 // 'ins':
488 // - If the result of 'ins' is not in any register, we find an allowed
489 // one, evicting one if necessary.
490 // - If the result of 'ins' is already in an allowed register, we use that.
491 // - If the result of 'ins' is already in a not-allowed register, we find an
492 // allowed one and move it.
494 Register Assembler::findRegFor(LIns* ins, RegisterMask allow)
496 if (ins->isop(LIR_allocp)) {
497 // Never allocate a reg for this without stack space too.
498 findMemFor(ins);
501 Register r;
503 if (!ins->isInReg()) {
504 // 'ins' isn't in a register (must be in a spill slot or nowhere).
505 r = registerAlloc(ins, allow, hint(ins));
507 } else if (rmask(r = ins->getReg()) & allow) {
508 // 'ins' is in an allowed register.
509 _allocator.useActive(r);
511 } else {
512 // 'ins' is in a register (r) that's not in 'allow'.
513 #ifdef NANOJIT_IA32
514 if (((rmask(r)&XmmRegs) && !(allow&XmmRegs)) ||
515 ((rmask(r)&x87Regs) && !(allow&x87Regs)))
517 // x87 <-> xmm copy required
518 //_nvprof("fpu-evict",1);
519 evict(ins);
520 r = registerAlloc(ins, allow, hint(ins));
521 } else
522 #elif defined(NANOJIT_PPC) || defined(NANOJIT_MIPS) || defined(NANOJIT_SPARC)
523 if (((rmask(r)&GpRegs) && !(allow&GpRegs)) ||
524 ((rmask(r)&FpRegs) && !(allow&FpRegs)))
526 evict(ins);
527 r = registerAlloc(ins, allow, hint(ins));
528 } else
529 #endif
531 // The post-state register holding 'ins' is 's', the pre-state
532 // register holding 'ins' is 'r'. For example, if s=eax and
533 // r=ecx:
535 // pre-state: ecx(ins)
536 // instruction: mov eax, ecx
537 // post-state: eax(ins)
539 Register s = r;
540 _allocator.retire(r);
541 r = registerAlloc(ins, allow, hint(ins));
543 // 'ins' is in 'allow', in register r (different to the old r);
544 // s is the old r.
545 if ((rmask(s) & GpRegs) && (rmask(r) & GpRegs)) {
546 MR(s, r); // move 'ins' from its pre-state reg (r) to its post-state reg (s)
547 } else {
548 asm_nongp_copy(s, r);
553 return r;
556 // Like findSpecificRegFor(), but only for when 'r' is known to be free
557 // and 'ins' is known to not already have a register allocated. Updates
558 // the regstate (maintaining the invariants) but does not generate any
559 // code. The return value is redundant, always being 'r', but it's
560 // sometimes useful to have it there for assignments.
561 Register Assembler::findSpecificRegForUnallocated(LIns* ins, Register r)
563 if (ins->isop(LIR_allocp)) {
564 // never allocate a reg for this w/out stack space too
565 findMemFor(ins);
568 NanoAssert(!ins->isInReg());
569 NanoAssert(_allocator.free & rmask(r));
571 ins->setReg(r);
572 _allocator.removeFree(r);
573 _allocator.addActive(r, ins);
575 return r;
578 #if NJ_USES_IMMD_POOL
579 const uint64_t* Assembler::findImmDFromPool(uint64_t q)
581 uint64_t* p = _immDPool.get(q);
582 if (!p)
584 p = new (_dataAlloc) uint64_t;
585 *p = q;
586 _immDPool.put(q, p);
588 return p;
590 #endif
592 int Assembler::findMemFor(LIns *ins)
594 #if NJ_USES_IMMD_POOL
595 NanoAssert(!ins->isImmD());
596 #endif
597 if (!ins->isInAr()) {
598 uint32_t const arIndex = arReserve(ins);
599 ins->setArIndex(arIndex);
600 NanoAssert(_activation.isValidEntry(ins->getArIndex(), ins) == (arIndex != 0));
602 return arDisp(ins);
605 // XXX: this function is dangerous and should be phased out;
606 // See bug 513615. Calls to it should replaced it with a
607 // prepareResultReg() / generate code / freeResourcesOf() sequence.
608 Register Assembler::deprecated_prepResultReg(LIns *ins, RegisterMask allow)
610 #ifdef NANOJIT_IA32
611 // We used to have to worry about possibly popping the x87 stack here.
612 // But this function is no longer used on i386, and this assertion
613 // ensures that.
614 NanoAssert(0);
615 #endif
616 Register r = findRegFor(ins, allow);
617 deprecated_freeRsrcOf(ins);
618 return r;
621 // Finds a register in 'allow' to hold the result of 'ins'. Also
622 // generates code to spill the result if necessary. Called just prior to
623 // generating the code for 'ins' (because we generate code backwards).
625 // An example where no spill is necessary. Lines marked '*' are those
626 // done by this function.
628 // regstate: R
629 // asm: define res into r
630 // * regstate: R + r(res)
631 // ...
632 // asm: use res in r
634 // An example where a spill is necessary.
636 // regstate: R
637 // asm: define res into r
638 // * regstate: R + r(res)
639 // * asm: spill res from r
640 // regstate: R
641 // ...
642 // asm: restore res into r2
643 // regstate: R + r2(res) + other changes from "..."
644 // asm: use res in r2
646 Register Assembler::prepareResultReg(LIns *ins, RegisterMask allow)
648 // At this point, we know the result of 'ins' is used later in the
649 // code, unless it is a call to an impure function that must be
650 // included for effect even though its result is ignored. It may have
651 // had to be evicted, in which case the restore will have already been
652 // generated, so we now generate the spill. QUERY: Is there any attempt
653 // to elide the spill if we know that all restores can be rematerialized?
654 #ifdef NANOJIT_IA32
655 const bool notInFST0 = (!ins->isInReg() || ins->getReg() != FST0);
656 Register r = findRegFor(ins, allow);
657 // If the result register is FST0, but FST0 is not in the post-regstate,
658 // then we must pop the x87 stack. This may occur because the result is
659 // unused, or because it has been stored to a spill slot or an XMM register.
660 const bool needPop = notInFST0 && (r == FST0);
661 const bool didSpill = asm_maybe_spill(ins, needPop);
662 if (!didSpill && needPop) {
663 // If the instruction is spilled, then the pop will have already
664 // been performed by the store to the stack slot. Otherwise, we
665 // must pop now. This may occur when the result of a LIR_calld
666 // to an impure (side-effecting) function is not used.
667 FSTP(FST0);
669 #else
670 Register r = findRegFor(ins, allow);
671 asm_maybe_spill(ins, false);
672 #endif
673 return r;
676 bool Assembler::asm_maybe_spill(LIns* ins, bool pop)
678 if (ins->isInAr()) {
679 int d = arDisp(ins);
680 Register r = ins->getReg();
681 verbose_only( RefBuf b;
682 if (_logc->lcbits & LC_Native) {
683 setOutputForEOL(" <= spill %s",
684 _thisfrag->lirbuf->printer->formatRef(&b, ins)); } )
685 #ifdef NANOJIT_IA32
686 asm_spill(r, d, pop);
687 #else
688 (void)pop;
689 asm_spill(r, d, ins->isQorD());
690 #endif
691 return true;
693 return false;
696 // XXX: This function is error-prone and should be phased out; see bug 513615.
697 void Assembler::deprecated_freeRsrcOf(LIns *ins)
699 if (ins->isInReg()) {
700 asm_maybe_spill(ins, /*pop*/false);
701 _allocator.retire(ins->getReg()); // free any register associated with entry
702 ins->clearReg();
704 if (ins->isInAr()) {
705 arFree(ins); // free any AR space associated with entry
706 ins->clearArIndex();
710 // Frees all record of registers and spill slots used by 'ins'.
711 void Assembler::freeResourcesOf(LIns *ins)
713 if (ins->isInReg()) {
714 _allocator.retire(ins->getReg()); // free any register associated with entry
715 ins->clearReg();
717 if (ins->isInAr()) {
718 arFree(ins); // free any AR space associated with entry
719 ins->clearArIndex();
723 // Frees 'r' in the RegAlloc regstate, if it's not already free.
724 void Assembler::evictIfActive(Register r)
726 if (LIns* vic = _allocator.getActive(r)) {
727 NanoAssert(vic->getReg() == r);
728 evict(vic);
732 // Frees 'r' (which currently holds the result of 'vic') in the regstate.
733 // An example:
735 // pre-regstate: eax(ld1)
736 // instruction: mov ebx,-4(ebp) <= restore add1 # %ebx is dest
737 // post-regstate: eax(ld1) ebx(add1)
739 // At run-time we are *restoring* 'add1' into %ebx, hence the call to
740 // asm_restore(). But at regalloc-time we are moving backwards through
741 // the code, so in that sense we are *evicting* 'add1' from %ebx.
743 void Assembler::evict(LIns* vic)
745 // Not free, need to steal.
746 Register r = vic->getReg();
748 NanoAssert(!_allocator.isFree(r));
749 NanoAssert(vic == _allocator.getActive(r));
751 verbose_only( RefBuf b;
752 if (_logc->lcbits & LC_Native) {
753 setOutputForEOL(" <= restore %s",
754 _thisfrag->lirbuf->printer->formatRef(&b, vic)); } )
755 asm_restore(vic, r);
757 _allocator.retire(r);
758 vic->clearReg();
760 // At this point 'vic' is unused (if rematerializable), or in a spill
761 // slot (if not).
764 // If we have this:
766 // W = ld(addp(B, lshp(I, k)))[d] , where int(1) <= k <= int(3)
768 // then we set base=B, index=I, scale=k.
770 // Otherwise, we must have this:
772 // W = ld(addp(B, I))[d]
774 // and we set base=B, index=I, scale=0.
776 void Assembler::getBaseIndexScale(LIns* addp, LIns** base, LIns** index, int* scale)
778 NanoAssert(addp->isop(LIR_addp));
780 *base = addp->oprnd1();
781 LIns* rhs = addp->oprnd2();
782 int k;
784 if (rhs->opcode() == LIR_lshp && rhs->oprnd2()->isImmI() &&
785 (k = rhs->oprnd2()->immI(), (1 <= k && k <= 3)))
787 *index = rhs->oprnd1();
788 *scale = k;
789 } else {
790 *index = rhs;
791 *scale = 0;
794 void Assembler::patch(GuardRecord *lr)
796 if (!lr->jmp) // the guard might have been eliminated as redundant
797 return;
798 Fragment *frag = lr->exit->target;
799 NanoAssert(frag->fragEntry != 0);
800 nPatchBranch((NIns*)lr->jmp, frag->fragEntry);
801 CodeAlloc::flushICache(lr->jmp, LARGEST_BRANCH_PATCH);
802 verbose_only(verbose_outputf("patching jump at %p to target %p\n",
803 lr->jmp, frag->fragEntry);)
806 void Assembler::patch(SideExit *exit)
808 GuardRecord *rec = exit->guards;
809 NanoAssert(rec);
810 while (rec) {
811 patch(rec);
812 rec = rec->next;
816 #ifdef NANOJIT_IA32
817 void Assembler::patch(SideExit* exit, SwitchInfo* si)
819 for (GuardRecord* lr = exit->guards; lr; lr = lr->next) {
820 Fragment *frag = lr->exit->target;
821 NanoAssert(frag->fragEntry != 0);
822 si->table[si->index] = frag->fragEntry;
825 #endif
827 NIns* Assembler::asm_exit(LIns* guard)
829 SideExit *exit = guard->record()->exit;
830 NIns* at = 0;
831 if (!_branchStateMap.get(exit))
833 at = asm_leave_trace(guard);
835 else
837 RegAlloc* captured = _branchStateMap.get(exit);
838 intersectRegisterState(*captured);
839 at = exit->target->fragEntry;
840 NanoAssert(at != 0);
841 _branchStateMap.remove(exit);
843 return at;
846 NIns* Assembler::asm_leave_trace(LIns* guard)
848 verbose_only( verbose_outputf("----------------------------------- ## END exit block %p", guard);)
850 // This point is unreachable. So free all the registers. If an
851 // instruction has a stack entry we will leave it alone, otherwise we
852 // free it entirely. intersectRegisterState() will restore.
853 RegAlloc capture = _allocator;
854 releaseRegisters();
856 swapCodeChunks();
857 _inExit = true;
859 #ifdef NANOJIT_IA32
860 debug_only( _sv_fpuStkDepth = _fpuStkDepth; _fpuStkDepth = 0; )
861 #endif
863 nFragExit(guard);
865 // Restore the callee-saved register and parameters.
866 assignSavedRegs();
867 assignParamRegs();
869 intersectRegisterState(capture);
871 // this can be useful for breaking whenever an exit is taken
872 //INT3();
873 //NOP();
875 // we are done producing the exit logic for the guard so demark where our exit block code begins
876 NIns* jmpTarget = _nIns; // target in exit path for our mainline conditional jump
878 // swap back pointers, effectively storing the last location used in the exit path
879 swapCodeChunks();
880 _inExit = false;
882 //verbose_only( verbose_outputf(" LIR_xt/xf swapCodeChunks, _nIns is now %08X(%08X), _nExitIns is now %08X(%08X)",_nIns, *_nIns,_nExitIns,*_nExitIns) );
883 verbose_only( verbose_outputf("%p:", jmpTarget);)
884 verbose_only( verbose_outputf("----------------------------------- ## BEGIN exit block (LIR_xt|LIR_xf)") );
886 #ifdef NANOJIT_IA32
887 NanoAssertMsgf(_fpuStkDepth == _sv_fpuStkDepth, "LIR_xtf, _fpuStkDepth=%d, expect %d",_fpuStkDepth, _sv_fpuStkDepth);
888 debug_only( _fpuStkDepth = _sv_fpuStkDepth; _sv_fpuStkDepth = 9999; )
889 #endif
891 return jmpTarget;
894 void Assembler::compile(Fragment* frag, Allocator& alloc, bool optimize verbose_only(, LInsPrinter* printer))
896 verbose_only(
897 bool anyVerb = (_logc->lcbits & 0xFFFF & ~LC_FragProfile) > 0;
898 bool liveVerb = (_logc->lcbits & 0xFFFF & LC_Liveness) > 0;
901 /* BEGIN decorative preamble */
902 verbose_only(
903 if (anyVerb) {
904 _logc->printf("========================================"
905 "========================================\n");
906 _logc->printf("=== BEGIN LIR::compile(%p, %p)\n",
907 (void*)this, (void*)frag);
908 _logc->printf("===\n");
910 /* END decorative preamble */
912 verbose_only( if (liveVerb) {
913 _logc->printf("\n");
914 _logc->printf("=== Results of liveness analysis:\n");
915 _logc->printf("===\n");
916 LirReader br(frag->lastIns);
917 LirFilter* lir = &br;
918 if (optimize) {
919 StackFilter* sf = new (alloc) StackFilter(lir, alloc, frag->lirbuf->sp);
920 lir = sf;
922 live(lir, alloc, frag, _logc);
925 /* Set up the generic text output cache for the assembler */
926 verbose_only( StringList asmOutput(alloc); )
927 verbose_only( _outputCache = &asmOutput; )
929 beginAssembly(frag);
930 if (error())
931 return;
933 //_logc->printf("recompile trigger %X kind %d\n", (int)frag, frag->kind);
935 verbose_only( if (anyVerb) {
936 _logc->printf("=== Translating LIR fragments into assembly:\n");
939 // now the the main trunk
940 verbose_only( RefBuf b; )
941 verbose_only( if (anyVerb) {
942 _logc->printf("=== -- Compile trunk %s: begin\n", printer->formatAddr(&b, frag));
945 // Used for debug printing, if needed
946 debug_only(ValidateReader *validate = NULL;)
947 verbose_only(
948 ReverseLister *pp_init = NULL;
949 ReverseLister *pp_after_sf = NULL;
952 // The LIR passes through these filters as listed in this
953 // function, viz, top to bottom.
955 // set up backwards pipeline: assembler <- StackFilter <- LirReader
956 LirFilter* lir = new (alloc) LirReader(frag->lastIns);
958 #ifdef DEBUG
959 // VALIDATION
960 validate = new (alloc) ValidateReader(lir);
961 lir = validate;
962 #endif
964 // INITIAL PRINTING
965 verbose_only( if (_logc->lcbits & LC_ReadLIR) {
966 pp_init = new (alloc) ReverseLister(lir, alloc, frag->lirbuf->printer, _logc,
967 "Initial LIR");
968 lir = pp_init;
971 // STACKFILTER
972 if (optimize) {
973 StackFilter* stackfilter = new (alloc) StackFilter(lir, alloc, frag->lirbuf->sp);
974 lir = stackfilter;
977 verbose_only( if (_logc->lcbits & LC_AfterSF) {
978 pp_after_sf = new (alloc) ReverseLister(lir, alloc, frag->lirbuf->printer, _logc,
979 "After StackFilter");
980 lir = pp_after_sf;
983 assemble(frag, lir);
985 // If we were accumulating debug info in the various ReverseListers,
986 // call finish() to emit whatever contents they have accumulated.
987 verbose_only(
988 if (pp_init) pp_init->finish();
989 if (pp_after_sf) pp_after_sf->finish();
992 verbose_only( if (anyVerb) {
993 _logc->printf("=== -- Compile trunk %s: end\n", printer->formatAddr(&b, frag));
996 endAssembly(frag);
998 // Reverse output so that assembly is displayed low-to-high.
999 // Up to this point, _outputCache has been non-NULL, and so has been
1000 // accumulating output. Now we set it to NULL, traverse the entire
1001 // list of stored strings, and hand them a second time to output.
1002 // Since _outputCache is now NULL, outputf just hands these strings
1003 // directly onwards to _logc->printf.
1004 verbose_only( if (anyVerb) {
1005 _logc->printf("\n");
1006 _logc->printf("=== Aggregated assembly output: BEGIN\n");
1007 _logc->printf("===\n");
1008 _outputCache = 0;
1009 for (Seq<char*>* p = asmOutput.get(); p != NULL; p = p->tail) {
1010 char *str = p->head;
1011 outputf(" %s", str);
1013 _logc->printf("===\n");
1014 _logc->printf("=== Aggregated assembly output: END\n");
1017 if (error())
1018 frag->fragEntry = 0;
1020 verbose_only( frag->nCodeBytes += codeBytes; )
1021 verbose_only( frag->nExitBytes += exitBytes; )
1023 /* BEGIN decorative postamble */
1024 verbose_only( if (anyVerb) {
1025 _logc->printf("\n");
1026 _logc->printf("===\n");
1027 _logc->printf("=== END LIR::compile(%p, %p)\n",
1028 (void*)this, (void*)frag);
1029 _logc->printf("========================================"
1030 "========================================\n");
1031 _logc->printf("\n");
1033 /* END decorative postamble */
1036 void Assembler::beginAssembly(Fragment *frag)
1038 verbose_only( codeBytes = 0; )
1039 verbose_only( exitBytes = 0; )
1041 reset();
1043 NanoAssert(codeList == 0);
1044 NanoAssert(codeStart == 0);
1045 NanoAssert(codeEnd == 0);
1046 NanoAssert(exitStart == 0);
1047 NanoAssert(exitEnd == 0);
1048 NanoAssert(_nIns == 0);
1049 NanoAssert(_nExitIns == 0);
1051 _thisfrag = frag;
1052 _inExit = false;
1054 setError(None);
1056 // native code gen buffer setup
1057 nativePageSetup();
1059 // make sure we got memory at least one page
1060 if (error()) return;
1062 _epilogue = NULL;
1064 nBeginAssembly();
1067 void Assembler::assemble(Fragment* frag, LirFilter* reader)
1069 if (error()) return;
1070 _thisfrag = frag;
1072 // check the fragment is starting out with a sane profiling state
1073 verbose_only( NanoAssert(frag->nStaticExits == 0); )
1074 verbose_only( NanoAssert(frag->nCodeBytes == 0); )
1075 verbose_only( NanoAssert(frag->nExitBytes == 0); )
1076 verbose_only( NanoAssert(frag->profCount == 0); )
1077 verbose_only( if (_logc->lcbits & LC_FragProfile)
1078 NanoAssert(frag->profFragID > 0);
1079 else
1080 NanoAssert(frag->profFragID == 0); )
1082 _inExit = false;
1084 gen(reader);
1086 if (!error()) {
1087 // patch all branches
1088 NInsMap::Iter iter(_patches);
1089 while (iter.next()) {
1090 NIns* where = iter.key();
1091 LIns* target = iter.value();
1092 if (target->isop(LIR_jtbl)) {
1093 // Need to patch up a whole jump table, 'where' is the table.
1094 LIns *jtbl = target;
1095 NIns** native_table = (NIns**) (void *) where;
1096 for (uint32_t i = 0, n = jtbl->getTableSize(); i < n; i++) {
1097 LabelState* lstate = _labels.get(jtbl->getTarget(i));
1098 NIns* ntarget = lstate->addr;
1099 if (ntarget) {
1100 native_table[i] = ntarget;
1101 } else {
1102 setError(UnknownBranch);
1103 break;
1106 } else {
1107 // target is a label for a single-target branch
1108 LabelState *lstate = _labels.get(target);
1109 NIns* ntarget = lstate->addr;
1110 if (ntarget) {
1111 nPatchBranch(where, ntarget);
1112 } else {
1113 setError(UnknownBranch);
1114 break;
1121 void Assembler::cleanupAfterError()
1123 _codeAlloc.freeAll(codeList);
1124 if (_nExitIns)
1125 _codeAlloc.free(exitStart, exitEnd);
1126 _codeAlloc.free(codeStart, codeEnd);
1127 codeList = NULL;
1128 _codeAlloc.markAllExec(); // expensive but safe, we mark all code pages R-X
1131 void Assembler::endAssembly(Fragment* frag)
1133 // don't try to patch code if we are in an error state since we might have partially
1134 // overwritten the code cache already
1135 if (error()) {
1136 // something went wrong, release all allocated code memory
1137 cleanupAfterError();
1138 return;
1141 NIns* fragEntry = genPrologue();
1142 verbose_only( asm_output("[prologue]"); )
1144 debug_only(_activation.checkForResourceLeaks());
1146 NanoAssert(!_inExit);
1147 // save used parts of current block on fragment's code list, free the rest
1148 #if defined(NANOJIT_ARM) || defined(NANOJIT_MIPS)
1149 // [codeStart, _nSlot) ... gap ... [_nIns, codeEnd)
1150 if (_nExitIns) {
1151 _codeAlloc.addRemainder(codeList, exitStart, exitEnd, _nExitSlot, _nExitIns);
1152 verbose_only( exitBytes -= (_nExitIns - _nExitSlot) * sizeof(NIns); )
1154 _codeAlloc.addRemainder(codeList, codeStart, codeEnd, _nSlot, _nIns);
1155 verbose_only( codeBytes -= (_nIns - _nSlot) * sizeof(NIns); )
1156 #else
1157 // [codeStart ... gap ... [_nIns, codeEnd))
1158 if (_nExitIns) {
1159 _codeAlloc.addRemainder(codeList, exitStart, exitEnd, exitStart, _nExitIns);
1160 verbose_only( exitBytes -= (_nExitIns - exitStart) * sizeof(NIns); )
1162 _codeAlloc.addRemainder(codeList, codeStart, codeEnd, codeStart, _nIns);
1163 verbose_only( codeBytes -= (_nIns - codeStart) * sizeof(NIns); )
1164 #endif
1166 // note: the code pages are no longer writable from this point onwards
1167 _codeAlloc.markExec(codeList);
1169 // at this point all our new code is in the d-cache and not the i-cache,
1170 // so flush the i-cache on cpu's that need it.
1171 CodeAlloc::flushICache(codeList);
1173 // save entry point pointers
1174 frag->fragEntry = fragEntry;
1175 frag->setCode(_nIns);
1177 #ifdef VMCFG_VTUNE
1178 if (vtuneHandle)
1180 vtuneEnd(vtuneHandle, codeEnd);
1181 vtuneStart(vtuneHandle, _nIns);
1183 #endif
1185 PERFM_NVPROF("code", CodeAlloc::size(codeList));
1187 #ifdef NANOJIT_IA32
1188 NanoAssertMsgf(_fpuStkDepth == 0,"_fpuStkDepth %d\n",_fpuStkDepth);
1189 #endif
1191 debug_only( pageValidate(); )
1192 NanoAssert(_branchStateMap.isEmpty());
1195 void Assembler::releaseRegisters()
1197 RegisterMask active = _allocator.activeMask();
1198 for (Register r = lsReg(active); active; r = nextLsReg(active, r))
1200 LIns *ins = _allocator.getActive(r);
1201 // Clear reg allocation, preserve stack allocation.
1202 _allocator.retire(r);
1203 NanoAssert(r == ins->getReg());
1204 ins->clearReg();
1208 #ifdef PERFM
1209 #define countlir_live() _nvprof("lir-live",1)
1210 #define countlir_ret() _nvprof("lir-ret",1)
1211 #define countlir_alloc() _nvprof("lir-alloc",1)
1212 #define countlir_var() _nvprof("lir-var",1)
1213 #define countlir_use() _nvprof("lir-use",1)
1214 #define countlir_def() _nvprof("lir-def",1)
1215 #define countlir_imm() _nvprof("lir-imm",1)
1216 #define countlir_param() _nvprof("lir-param",1)
1217 #define countlir_cmov() _nvprof("lir-cmov",1)
1218 #define countlir_ld() _nvprof("lir-ld",1)
1219 #define countlir_ldq() _nvprof("lir-ldq",1)
1220 #define countlir_alu() _nvprof("lir-alu",1)
1221 #define countlir_qjoin() _nvprof("lir-qjoin",1)
1222 #define countlir_qlo() _nvprof("lir-qlo",1)
1223 #define countlir_qhi() _nvprof("lir-qhi",1)
1224 #define countlir_fpu() _nvprof("lir-fpu",1)
1225 #define countlir_st() _nvprof("lir-st",1)
1226 #define countlir_stq() _nvprof("lir-stq",1)
1227 #define countlir_jmp() _nvprof("lir-jmp",1)
1228 #define countlir_jcc() _nvprof("lir-jcc",1)
1229 #define countlir_label() _nvprof("lir-label",1)
1230 #define countlir_xcc() _nvprof("lir-xcc",1)
1231 #define countlir_x() _nvprof("lir-x",1)
1232 #define countlir_call() _nvprof("lir-call",1)
1233 #define countlir_jtbl() _nvprof("lir-jtbl",1)
1234 #else
1235 #define countlir_live()
1236 #define countlir_ret()
1237 #define countlir_alloc()
1238 #define countlir_var()
1239 #define countlir_use()
1240 #define countlir_def()
1241 #define countlir_imm()
1242 #define countlir_param()
1243 #define countlir_cmov()
1244 #define countlir_ld()
1245 #define countlir_ldq()
1246 #define countlir_alu()
1247 #define countlir_qjoin()
1248 #define countlir_qlo()
1249 #define countlir_qhi()
1250 #define countlir_fpu()
1251 #define countlir_st()
1252 #define countlir_stq()
1253 #define countlir_jmp()
1254 #define countlir_jcc()
1255 #define countlir_label()
1256 #define countlir_xcc()
1257 #define countlir_x()
1258 #define countlir_call()
1259 #define countlir_jtbl()
1260 #endif
1262 void Assembler::asm_jmp(LIns* ins, InsList& pending_lives)
1264 NanoAssert((ins->isop(LIR_j) && !ins->oprnd1()) ||
1265 (ins->isop(LIR_jf) && ins->oprnd1()->isImmI(0)) ||
1266 (ins->isop(LIR_jt) && ins->oprnd1()->isImmI(1)));
1268 countlir_jmp();
1269 LIns* to = ins->getTarget();
1270 LabelState *label = _labels.get(to);
1271 // The jump is always taken so whatever register state we
1272 // have from downstream code, is irrelevant to code before
1273 // this jump. So clear it out. We will pick up register
1274 // state from the jump target, if we have seen that label.
1275 releaseRegisters();
1276 #ifdef NANOJIT_IA32
1277 // Unreachable, so assume correct stack depth.
1278 debug_only( _fpuStkDepth = 0; )
1279 #endif
1280 if (label && label->addr) {
1281 // Forward jump - pick up register state from target.
1282 unionRegisterState(label->regs);
1283 #ifdef NANOJIT_IA32
1284 // Set stack depth according to the register state we just loaded,
1285 // negating the effect of any unreachable x87 stack pop that might
1286 // have been emitted by unionRegisterState().
1287 debug_only( _fpuStkDepth = (_allocator.getActive(FST0) ? -1 : 0); )
1288 #endif
1289 JMP(label->addr);
1291 else {
1292 // Backwards jump.
1293 handleLoopCarriedExprs(pending_lives);
1294 if (!label) {
1295 // save empty register state at loop header
1296 _labels.add(to, 0, _allocator);
1298 else {
1299 intersectRegisterState(label->regs);
1300 #ifdef NANOJIT_IA32
1301 debug_only( _fpuStkDepth = (_allocator.getActive(FST0) ? -1 : 0); )
1302 #endif
1304 JMP(0);
1305 _patches.put(_nIns, to);
1309 void Assembler::asm_jcc(LIns* ins, InsList& pending_lives)
1311 bool branchOnFalse = (ins->opcode() == LIR_jf);
1312 LIns* cond = ins->oprnd1();
1313 if (cond->isImmI()) {
1314 if ((!branchOnFalse && !cond->immI()) || (branchOnFalse && cond->immI())) {
1315 // jmp never taken, not needed
1316 } else {
1317 asm_jmp(ins, pending_lives); // jmp always taken
1319 return;
1322 // Changes to the logic below will likely need to be propagated to Assembler::asm_jov().
1324 countlir_jcc();
1325 LIns* to = ins->getTarget();
1326 LabelState *label = _labels.get(to);
1327 if (label && label->addr) {
1328 // Forward jump to known label. Need to merge with label's register state.
1329 unionRegisterState(label->regs);
1330 asm_branch(branchOnFalse, cond, label->addr);
1332 else {
1333 // Back edge.
1334 handleLoopCarriedExprs(pending_lives);
1335 if (!label) {
1336 // Evict all registers, most conservative approach.
1337 evictAllActiveRegs();
1338 _labels.add(to, 0, _allocator);
1340 else {
1341 // Evict all registers, most conservative approach.
1342 intersectRegisterState(label->regs);
1344 NIns *branch = asm_branch(branchOnFalse, cond, 0);
1345 _patches.put(branch,to);
1349 void Assembler::asm_jov(LIns* ins, InsList& pending_lives)
1351 // The caller is responsible for countlir_* profiling, unlike
1352 // asm_jcc above. The reason for this is that asm_jov may not be
1353 // be called if the instruction is dead, and it is our convention
1354 // to count such instructions anyway.
1355 LOpcode op = ins->opcode();
1356 LIns* to = ins->getTarget();
1357 LabelState *label = _labels.get(to);
1358 if (label && label->addr) {
1359 // forward jump to known label. need to merge with label's register state.
1360 unionRegisterState(label->regs);
1361 asm_branch_ov(op, label->addr);
1363 else {
1364 // back edge.
1365 handleLoopCarriedExprs(pending_lives);
1366 if (!label) {
1367 // evict all registers, most conservative approach.
1368 evictAllActiveRegs();
1369 _labels.add(to, 0, _allocator);
1371 else {
1372 // evict all registers, most conservative approach.
1373 intersectRegisterState(label->regs);
1375 NIns *branch = asm_branch_ov(op, 0);
1376 _patches.put(branch,to);
1380 void Assembler::asm_x(LIns* ins)
1382 verbose_only( _thisfrag->nStaticExits++; )
1383 countlir_x();
1384 // Generate the side exit branch on the main trace.
1385 NIns *exit = asm_exit(ins);
1386 JMP(exit);
1389 void Assembler::asm_xcc(LIns* ins)
1391 LIns* cond = ins->oprnd1();
1392 if (cond->isImmI()) {
1393 if ((ins->isop(LIR_xt) && !cond->immI()) || (ins->isop(LIR_xf) && cond->immI())) {
1394 // guard never taken, not needed
1395 } else {
1396 asm_x(ins); // guard always taken
1398 return;
1401 verbose_only( _thisfrag->nStaticExits++; )
1402 countlir_xcc();
1403 // We only support cmp with guard right now, also assume it is 'close'
1404 // and only emit the branch.
1405 NIns* exit = asm_exit(ins); // does intersectRegisterState()
1406 asm_branch(ins->opcode() == LIR_xf, cond, exit);
1409 // helper function for nop insertion feature that results in no more
1410 // than 1 no-op instruction insertion every 128-1151 Bytes
1411 static inline uint32_t noiseForNopInsertion(Noise* n) {
1412 return n->getValue(1023) + 128;
1415 void Assembler::gen(LirFilter* reader)
1417 NanoAssert(_thisfrag->nStaticExits == 0);
1419 InsList pending_lives(alloc);
1421 NanoAssert(!error());
1423 // compiler hardening setup
1424 NIns* priorIns = _nIns;
1425 int32_t nopInsertTrigger = hardenNopInsertion(_config) ? noiseForNopInsertion(_noise): 0;
1427 // What's going on here: we're visiting all the LIR instructions in
1428 // the buffer, working strictly backwards in buffer-order, and
1429 // generating machine instructions for them as we go.
1431 // For each LIns, we first check if it's live. If so we mark its
1432 // operands as also live, and then generate code for it *if
1433 // necessary*. It may not be necessary if the instruction is an
1434 // expression and code has already been generated for all its uses in
1435 // combination with previously handled instructions (ins->isExtant()
1436 // will return false if this is so).
1438 // Note that the backwards code traversal can make register allocation
1439 // confusing. (For example, we restore a value before we spill it!)
1440 // In particular, words like "before" and "after" must be used very
1441 // carefully -- their meaning at regalloc-time is opposite to their
1442 // meaning at run-time. We use the term "pre-regstate" to refer to
1443 // the register allocation state that occurs prior to an instruction's
1444 // execution, and "post-regstate" to refer to the state that occurs
1445 // after an instruction's execution, e.g.:
1447 // pre-regstate: ebx(ins)
1448 // instruction: mov eax, ebx // mov dst, src
1449 // post-regstate: eax(ins)
1451 // At run-time, the instruction updates the pre-regstate into the
1452 // post-regstate (and these states are the real machine's regstates).
1453 // But when allocating registers, because we go backwards, the
1454 // pre-regstate is constructed from the post-regstate (and these
1455 // regstates are those stored in RegAlloc).
1457 // One consequence of generating code backwards is that we tend to
1458 // both spill and restore registers as early (at run-time) as
1459 // possible; this is good for tolerating memory latency. If we
1460 // generated code forwards, we would expect to both spill and restore
1461 // registers as late (at run-time) as possible; this might be better
1462 // for reducing register pressure.
1464 // The trace must end with one of these opcodes. Mark it as live.
1465 NanoAssert(reader->finalIns()->isop(LIR_x) ||
1466 reader->finalIns()->isop(LIR_xtbl) ||
1467 reader->finalIns()->isRet() ||
1468 isLiveOpcode(reader->finalIns()->opcode()));
1470 for (currIns = reader->read(); !currIns->isop(LIR_start); currIns = reader->read())
1472 LIns* ins = currIns; // give it a shorter name for local use
1474 if (!ins->isLive()) {
1475 NanoAssert(!ins->isExtant());
1476 continue;
1479 #ifdef NJ_VERBOSE
1480 // Output the post-regstate (registers and/or activation).
1481 // Because asm output comes in reverse order, doing it now means
1482 // it is printed after the LIR and native code, exactly when the
1483 // post-regstate should be shown.
1484 if ((_logc->lcbits & LC_Native) && (_logc->lcbits & LC_Activation))
1485 printActivationState();
1486 if ((_logc->lcbits & LC_Native) && (_logc->lcbits & LC_RegAlloc))
1487 printRegState();
1488 #endif
1490 // compiler hardening technique that inserts no-op instructions in the compiled method when nopInsertTrigger < 0
1491 if (hardenNopInsertion(_config))
1493 size_t delta = (uintptr_t)priorIns - (uintptr_t)_nIns; // # bytes that have been emitted since last go-around
1495 // if no codeList then we know priorIns and _nIns are on same page, otherwise make sure priorIns was not in the previous code block
1496 if (!codeList || !codeList->isInBlock(priorIns)) {
1497 NanoAssert(delta < VMPI_getVMPageSize()); // sanity check
1498 nopInsertTrigger -= (int32_t) delta;
1499 if (nopInsertTrigger < 0)
1501 nopInsertTrigger = noiseForNopInsertion(_noise);
1502 asm_insert_random_nop();
1503 PERFM_NVPROF("hardening:nop-insert", 1);
1506 priorIns = _nIns;
1509 LOpcode op = ins->opcode();
1510 switch (op)
1512 default:
1513 NanoAssertMsgf(false, "unsupported LIR instruction: %d\n", op);
1514 break;
1516 case LIR_regfence:
1517 evictAllActiveRegs();
1518 break;
1520 case LIR_livei:
1521 CASE64(LIR_liveq:)
1522 case LIR_lived: {
1523 countlir_live();
1524 LIns* op1 = ins->oprnd1();
1525 op1->setResultLive();
1526 // LIR_allocp's are meant to live until the point of the
1527 // LIR_livep instruction, marking other expressions as
1528 // live ensures that they remain so at loop bottoms.
1529 // LIR_allocp areas require special treatment because they
1530 // are accessed indirectly and the indirect accesses are
1531 // invisible to the assembler, other than via LIR_livep.
1532 // Other expression results are only accessed directly in
1533 // ways that are visible to the assembler, so extending
1534 // those expression's lifetimes past the last loop edge
1535 // isn't necessary.
1536 if (op1->isop(LIR_allocp)) {
1537 findMemFor(op1);
1538 } else {
1539 pending_lives.add(ins);
1541 break;
1544 case LIR_reti:
1545 CASE64(LIR_retq:)
1546 case LIR_retd:
1547 countlir_ret();
1548 ins->oprnd1()->setResultLive();
1549 asm_ret(ins);
1550 break;
1552 // Allocate some stack space. The value of this instruction
1553 // is the address of the stack space.
1554 case LIR_allocp:
1555 countlir_alloc();
1556 if (ins->isExtant()) {
1557 NanoAssert(ins->isInAr());
1558 if (ins->isInReg())
1559 evict(ins);
1560 freeResourcesOf(ins);
1562 break;
1564 case LIR_immi:
1565 countlir_imm();
1566 if (ins->isExtant()) {
1567 asm_immi(ins);
1569 break;
1571 #ifdef NANOJIT_64BIT
1572 case LIR_immq:
1573 countlir_imm();
1574 if (ins->isExtant()) {
1575 asm_immq(ins);
1577 break;
1578 #endif
1579 case LIR_immd:
1580 countlir_imm();
1581 if (ins->isExtant()) {
1582 asm_immd(ins);
1584 break;
1586 case LIR_paramp:
1587 countlir_param();
1588 if (ins->isExtant()) {
1589 asm_param(ins);
1591 break;
1593 #if NJ_SOFTFLOAT_SUPPORTED
1594 case LIR_hcalli: {
1595 LIns* op1 = ins->oprnd1();
1596 op1->setResultLive();
1597 if (ins->isExtant()) {
1598 // Return result of quad-call in register.
1599 deprecated_prepResultReg(ins, rmask(retRegs[1]));
1600 // If hi half was used, we must use the call to ensure it happens.
1601 findSpecificRegFor(op1, retRegs[0]);
1603 break;
1606 case LIR_dlo2i:
1607 countlir_qlo();
1608 ins->oprnd1()->setResultLive();
1609 if (ins->isExtant()) {
1610 asm_qlo(ins);
1612 break;
1614 case LIR_dhi2i:
1615 countlir_qhi();
1616 ins->oprnd1()->setResultLive();
1617 if (ins->isExtant()) {
1618 asm_qhi(ins);
1620 break;
1622 case LIR_ii2d:
1623 countlir_qjoin();
1624 ins->oprnd1()->setResultLive();
1625 ins->oprnd2()->setResultLive();
1626 if (ins->isExtant()) {
1627 asm_qjoin(ins);
1629 break;
1630 #endif
1631 case LIR_cmovi:
1632 CASE64(LIR_cmovq:)
1633 case LIR_cmovd:
1634 countlir_cmov();
1635 ins->oprnd1()->setResultLive();
1636 ins->oprnd2()->setResultLive();
1637 ins->oprnd3()->setResultLive();
1638 if (ins->isExtant()) {
1639 asm_cmov(ins);
1641 break;
1643 case LIR_lduc2ui:
1644 case LIR_ldus2ui:
1645 case LIR_ldc2i:
1646 case LIR_lds2i:
1647 case LIR_ldi:
1648 countlir_ld();
1649 ins->oprnd1()->setResultLive();
1650 if (ins->isExtant()) {
1651 asm_load32(ins);
1653 break;
1655 CASE64(LIR_ldq:)
1656 case LIR_ldd:
1657 case LIR_ldf2d:
1658 countlir_ldq();
1659 ins->oprnd1()->setResultLive();
1660 if (ins->isExtant()) {
1661 asm_load64(ins);
1663 break;
1665 case LIR_negi:
1666 case LIR_noti:
1667 countlir_alu();
1668 ins->oprnd1()->setResultLive();
1669 if (ins->isExtant()) {
1670 asm_neg_not(ins);
1672 break;
1674 #if defined NANOJIT_64BIT
1675 case LIR_addq:
1676 case LIR_subq:
1677 case LIR_andq:
1678 case LIR_lshq:
1679 case LIR_rshuq:
1680 case LIR_rshq:
1681 case LIR_orq:
1682 case LIR_xorq:
1683 countlir_alu();
1684 ins->oprnd1()->setResultLive();
1685 ins->oprnd2()->setResultLive();
1686 if (ins->isExtant()) {
1687 asm_qbinop(ins);
1689 break;
1690 #endif
1692 case LIR_addi:
1693 case LIR_subi:
1694 case LIR_muli:
1695 case LIR_andi:
1696 case LIR_ori:
1697 case LIR_xori:
1698 case LIR_lshi:
1699 case LIR_rshi:
1700 case LIR_rshui:
1701 CASE86(LIR_divi:)
1702 countlir_alu();
1703 ins->oprnd1()->setResultLive();
1704 ins->oprnd2()->setResultLive();
1705 if (ins->isExtant()) {
1706 asm_arith(ins);
1708 break;
1710 #if defined NANOJIT_IA32 || defined NANOJIT_X64
1711 CASE86(LIR_modi:)
1712 countlir_alu();
1713 ins->oprnd1()->setResultLive();
1714 if (ins->isExtant()) {
1715 asm_arith(ins);
1717 break;
1718 #endif
1720 case LIR_negd:
1721 countlir_fpu();
1722 ins->oprnd1()->setResultLive();
1723 if (ins->isExtant()) {
1724 asm_fneg(ins);
1726 break;
1728 case LIR_addd:
1729 case LIR_subd:
1730 case LIR_muld:
1731 case LIR_divd:
1732 countlir_fpu();
1733 ins->oprnd1()->setResultLive();
1734 ins->oprnd2()->setResultLive();
1735 if (ins->isExtant()) {
1736 asm_fop(ins);
1738 break;
1740 case LIR_i2d:
1741 countlir_fpu();
1742 ins->oprnd1()->setResultLive();
1743 if (ins->isExtant()) {
1744 asm_i2d(ins);
1746 break;
1748 case LIR_ui2d:
1749 countlir_fpu();
1750 ins->oprnd1()->setResultLive();
1751 if (ins->isExtant()) {
1752 asm_ui2d(ins);
1754 break;
1756 case LIR_d2i:
1757 countlir_fpu();
1758 ins->oprnd1()->setResultLive();
1759 if (ins->isExtant()) {
1760 asm_d2i(ins);
1762 break;
1764 #ifdef NANOJIT_64BIT
1765 case LIR_i2q:
1766 case LIR_ui2uq:
1767 countlir_alu();
1768 ins->oprnd1()->setResultLive();
1769 if (ins->isExtant()) {
1770 asm_ui2uq(ins);
1772 break;
1774 case LIR_q2i:
1775 countlir_alu();
1776 ins->oprnd1()->setResultLive();
1777 if (ins->isExtant()) {
1778 asm_q2i(ins);
1780 break;
1782 case LIR_dasq:
1783 countlir_alu();
1784 ins->oprnd1()->setResultLive();
1785 if (ins->isExtant()) {
1786 asm_dasq(ins);
1788 break;
1790 case LIR_qasd:
1791 countlir_alu();
1792 ins->oprnd1()->setResultLive();
1793 if (ins->isExtant()) {
1794 asm_qasd(ins);
1796 break;
1797 #endif
1798 case LIR_sti2c:
1799 case LIR_sti2s:
1800 case LIR_sti:
1801 countlir_st();
1802 ins->oprnd1()->setResultLive();
1803 ins->oprnd2()->setResultLive();
1804 asm_store32(op, ins->oprnd1(), ins->disp(), ins->oprnd2());
1805 break;
1807 CASE64(LIR_stq:)
1808 case LIR_std:
1809 case LIR_std2f: {
1810 countlir_stq();
1811 ins->oprnd1()->setResultLive();
1812 ins->oprnd2()->setResultLive();
1813 LIns* value = ins->oprnd1();
1814 LIns* base = ins->oprnd2();
1815 int dr = ins->disp();
1816 #if NJ_SOFTFLOAT_SUPPORTED
1817 if (value->isop(LIR_ii2d) && op == LIR_std)
1819 // This is correct for little-endian only.
1820 asm_store32(LIR_sti, value->oprnd1(), dr, base);
1821 asm_store32(LIR_sti, value->oprnd2(), dr+4, base);
1823 else
1824 #endif
1826 asm_store64(op, value, dr, base);
1828 break;
1831 case LIR_j:
1832 asm_jmp(ins, pending_lives);
1833 break;
1835 case LIR_jt:
1836 case LIR_jf:
1837 ins->oprnd1()->setResultLive();
1838 asm_jcc(ins, pending_lives);
1839 break;
1841 #if NJ_JTBL_SUPPORTED
1842 case LIR_jtbl: {
1843 countlir_jtbl();
1844 ins->oprnd1()->setResultLive();
1845 // Multiway jump can contain both forward and backward jumps.
1846 // Out of range indices aren't allowed or checked.
1847 // Code after this jtbl instruction is unreachable.
1848 releaseRegisters();
1849 NanoAssert(_allocator.activeMask() == 0);
1851 uint32_t count = ins->getTableSize();
1852 bool has_back_edges = false;
1854 // Merge the regstates of labels we have already seen.
1855 for (uint32_t i = count; i-- > 0;) {
1856 LIns* to = ins->getTarget(i);
1857 LabelState *lstate = _labels.get(to);
1858 if (lstate) {
1859 unionRegisterState(lstate->regs);
1860 verbose_only( RefBuf b; )
1861 asm_output(" %u: [&%s]", i, _thisfrag->lirbuf->printer->formatRef(&b, to));
1862 } else {
1863 has_back_edges = true;
1866 asm_output("forward edges");
1868 // In a multi-way jump, the register allocator has no ability to deal
1869 // with two existing edges that have conflicting register assignments, unlike
1870 // a conditional branch where code can be inserted on the fall-through path
1871 // to reconcile registers. So, frontends *must* insert LIR_regfence at labels of
1872 // forward jtbl jumps. Check here to make sure no registers were picked up from
1873 // any forward edges.
1874 NanoAssert(_allocator.activeMask() == 0);
1876 if (has_back_edges) {
1877 handleLoopCarriedExprs(pending_lives);
1878 // save merged (empty) register state at target labels we haven't seen yet
1879 for (uint32_t i = count; i-- > 0;) {
1880 LIns* to = ins->getTarget(i);
1881 LabelState *lstate = _labels.get(to);
1882 if (!lstate) {
1883 _labels.add(to, 0, _allocator);
1884 verbose_only( RefBuf b; )
1885 asm_output(" %u: [&%s]", i, _thisfrag->lirbuf->printer->formatRef(&b, to));
1888 asm_output("backward edges");
1891 // Emit the jump instruction, which allocates 1 register for the jump index.
1892 NIns** native_table = new (_dataAlloc) NIns*[count];
1893 asm_output("[%p]:", (void*)native_table);
1894 _patches.put((NIns*)native_table, ins);
1895 asm_jtbl(ins, native_table);
1896 break;
1898 #endif
1900 case LIR_label: {
1901 countlir_label();
1902 LabelState *label = _labels.get(ins);
1903 // add profiling inc, if necessary.
1904 verbose_only( if (_logc->lcbits & LC_FragProfile) {
1905 if (ins == _thisfrag->loopLabel)
1906 asm_inc_m32(& _thisfrag->profCount);
1908 if (!label) {
1909 // label seen first, normal target of forward jump, save addr & allocator
1910 _labels.add(ins, _nIns, _allocator);
1912 else {
1913 // we're at the top of a loop
1914 NanoAssert(label->addr == 0);
1915 //evictAllActiveRegs();
1916 intersectRegisterState(label->regs);
1917 label->addr = _nIns;
1919 verbose_only(
1920 RefBuf b;
1921 if (_logc->lcbits & LC_Native) {
1922 asm_output("[%s]", _thisfrag->lirbuf->printer->formatRef(&b, ins));
1924 break;
1927 case LIR_xbarrier:
1928 break;
1930 case LIR_xtbl: {
1931 ins->oprnd1()->setResultLive();
1932 #ifdef NANOJIT_IA32
1933 NIns* exit = asm_exit(ins); // does intersectRegisterState()
1934 asm_switch(ins, exit);
1935 #else
1936 NanoAssertMsg(0, "Not supported for this architecture");
1937 #endif
1938 break;
1941 case LIR_xt:
1942 case LIR_xf:
1943 ins->oprnd1()->setResultLive();
1944 asm_xcc(ins);
1945 break;
1947 case LIR_x:
1948 asm_x(ins);
1949 break;
1951 case LIR_addxovi:
1952 case LIR_subxovi:
1953 case LIR_mulxovi:
1954 verbose_only( _thisfrag->nStaticExits++; )
1955 countlir_xcc();
1956 countlir_alu();
1957 ins->oprnd1()->setResultLive();
1958 ins->oprnd2()->setResultLive();
1959 if (ins->isExtant()) {
1960 NIns* exit = asm_exit(ins); // does intersectRegisterState()
1961 asm_branch_ov(op, exit);
1962 asm_arith(ins);
1964 break;
1966 case LIR_addjovi:
1967 case LIR_subjovi:
1968 case LIR_muljovi:
1969 countlir_jcc();
1970 countlir_alu();
1971 ins->oprnd1()->setResultLive();
1972 ins->oprnd2()->setResultLive();
1973 if (ins->isExtant()) {
1974 asm_jov(ins, pending_lives);
1975 asm_arith(ins);
1977 break;
1979 #ifdef NANOJIT_64BIT
1980 case LIR_addjovq:
1981 case LIR_subjovq:
1982 countlir_jcc();
1983 countlir_alu();
1984 ins->oprnd1()->setResultLive();
1985 ins->oprnd2()->setResultLive();
1986 if (ins->isExtant()) {
1987 asm_jov(ins, pending_lives);
1988 asm_qbinop(ins);
1990 break;
1991 #endif
1993 case LIR_eqd:
1994 case LIR_led:
1995 case LIR_ltd:
1996 case LIR_gtd:
1997 case LIR_ged:
1998 countlir_fpu();
1999 ins->oprnd1()->setResultLive();
2000 ins->oprnd2()->setResultLive();
2001 if (ins->isExtant()) {
2002 asm_condd(ins);
2004 break;
2006 case LIR_eqi:
2007 case LIR_lei:
2008 case LIR_lti:
2009 case LIR_gti:
2010 case LIR_gei:
2011 case LIR_ltui:
2012 case LIR_leui:
2013 case LIR_gtui:
2014 case LIR_geui:
2015 CASE64(LIR_eqq:)
2016 CASE64(LIR_leq:)
2017 CASE64(LIR_ltq:)
2018 CASE64(LIR_gtq:)
2019 CASE64(LIR_geq:)
2020 CASE64(LIR_ltuq:)
2021 CASE64(LIR_leuq:)
2022 CASE64(LIR_gtuq:)
2023 CASE64(LIR_geuq:)
2024 countlir_alu();
2025 ins->oprnd1()->setResultLive();
2026 ins->oprnd2()->setResultLive();
2027 if (ins->isExtant()) {
2028 asm_cond(ins);
2030 break;
2032 case LIR_callv:
2033 case LIR_calli:
2034 CASE64(LIR_callq:)
2035 case LIR_calld:
2036 countlir_call();
2037 for (int i = 0, argc = ins->argc(); i < argc; i++)
2038 ins->arg(i)->setResultLive();
2039 // It must be impure or pure-and-extant -- it couldn't be
2040 // pure-and-not-extant, because there's no way the codegen
2041 // for a call can be folded into the codegen of another
2042 // LIR instruction.
2043 NanoAssert(!ins->callInfo()->_isPure || ins->isExtant());
2044 asm_call(ins);
2045 break;
2047 #ifdef VMCFG_VTUNE
2048 case LIR_file: {
2049 // we traverse backwards so we are now hitting the file
2050 // that is associated with a bunch of LIR_lines we already have seen
2051 if (vtuneHandle) {
2052 void * currentFile = (void *) ins->oprnd1()->immI();
2053 vtuneFile(vtuneHandle, currentFile);
2055 break;
2057 case LIR_line: {
2058 // add a new table entry, we don't yet knwo which file it belongs
2059 // to so we need to add it to the update table too
2060 // note the alloc, actual act is delayed; see above
2061 if (vtuneHandle) {
2062 uint32_t currentLine = (uint32_t) ins->oprnd1()->immI();
2063 vtuneLine(vtuneHandle, currentLine, _nIns);
2065 break;
2067 #endif // VMCFG_VTUNE
2069 case LIR_comment:
2070 // Do nothing.
2071 break;
2074 #ifdef NJ_VERBOSE
2075 // We do final LIR printing inside this loop to avoid printing
2076 // dead LIR instructions. We print the LIns after generating the
2077 // code. This ensures that the LIns will appear in debug output
2078 // *before* the native code, because Assembler::outputf()
2079 // prints everything in reverse.
2081 if (_logc->lcbits & LC_AfterDCE) {
2082 InsBuf b;
2083 LInsPrinter* printer = _thisfrag->lirbuf->printer;
2084 if (ins->isop(LIR_comment))
2085 outputf("%s", printer->formatIns(&b, ins));
2086 else
2087 outputf(" %s", printer->formatIns(&b, ins));
2089 #endif
2091 if (error())
2092 return;
2094 // check that all is well (don't check in exit paths since its more complicated)
2095 debug_only( pageValidate(); )
2096 debug_only( resourceConsistencyCheck(); )
2101 * Write a jump table for the given SwitchInfo and store the table
2102 * address in the SwitchInfo. Every entry will initially point to
2103 * target.
2105 void Assembler::emitJumpTable(SwitchInfo* si, NIns* target)
2107 si->table = (NIns **) alloc.alloc(si->count * sizeof(NIns*));
2108 for (uint32_t i = 0; i < si->count; ++i)
2109 si->table[i] = target;
2112 void Assembler::assignSavedRegs()
2114 // Restore saved regsters.
2115 LirBuffer *b = _thisfrag->lirbuf;
2116 for (int i=0, n = NumSavedRegs; i < n; i++) {
2117 LIns *p = b->savedRegs[i];
2118 if (p)
2119 findSpecificRegForUnallocated(p, savedRegs[p->paramArg()]);
2123 void Assembler::reserveSavedRegs()
2125 LirBuffer *b = _thisfrag->lirbuf;
2126 for (int i = 0, n = NumSavedRegs; i < n; i++) {
2127 LIns *ins = b->savedRegs[i];
2128 if (ins)
2129 findMemFor(ins);
2133 void Assembler::assignParamRegs()
2135 LIns* state = _thisfrag->lirbuf->state;
2136 if (state)
2137 findSpecificRegForUnallocated(state, argRegs[state->paramArg()]);
2138 LIns* param1 = _thisfrag->lirbuf->param1;
2139 if (param1)
2140 findSpecificRegForUnallocated(param1, argRegs[param1->paramArg()]);
2143 void Assembler::handleLoopCarriedExprs(InsList& pending_lives)
2145 // ensure that exprs spanning the loop are marked live at the end of the loop
2146 reserveSavedRegs();
2147 for (Seq<LIns*> *p = pending_lives.get(); p != NULL; p = p->tail) {
2148 LIns *ins = p->head;
2149 NanoAssert(isLiveOpcode(ins->opcode()));
2150 LIns *op1 = ins->oprnd1();
2151 // Must findMemFor even if we're going to findRegFor; loop-carried
2152 // operands may spill on another edge, and we need them to always
2153 // spill to the same place.
2154 #if NJ_USES_IMMD_POOL
2155 // Exception: if float constants are true constants, we should
2156 // never call findMemFor on those ops.
2157 if (!op1->isImmD())
2158 #endif
2160 findMemFor(op1);
2162 if (!op1->isImmAny())
2163 findRegFor(op1, ins->isop(LIR_lived) ? FpRegs : GpRegs);
2166 // clear this list since we have now dealt with those lifetimes. extending
2167 // their lifetimes again later (earlier in the code) serves no purpose.
2168 pending_lives.clear();
2171 void AR::freeEntryAt(uint32_t idx)
2173 NanoAssert(idx > 0 && idx <= _highWaterMark);
2175 // NB: this loop relies on using entry[0] being NULL,
2176 // so that we are guaranteed to terminate
2177 // without access negative entries.
2178 LIns* i = _entries[idx];
2179 NanoAssert(i != NULL);
2180 do {
2181 _entries[idx] = NULL;
2182 idx--;
2183 } while (_entries[idx] == i);
2186 #ifdef NJ_VERBOSE
2187 void Assembler::printRegState()
2189 char* s = &outline[0];
2190 VMPI_memset(s, ' ', 26); s[26] = '\0';
2191 s += VMPI_strlen(s);
2192 VMPI_sprintf(s, "RR");
2193 s += VMPI_strlen(s);
2195 RegisterMask active = _allocator.activeMask();
2196 for (Register r = lsReg(active); active != 0; r = nextLsReg(active, r)) {
2197 LIns *ins = _allocator.getActive(r);
2198 NanoAssertMsg(!_allocator.isFree(r),
2199 "Coding error; register is both free and active! " );
2200 RefBuf b;
2201 const char* n = _thisfrag->lirbuf->printer->formatRef(&b, ins);
2203 if (ins->isop(LIR_paramp) && ins->paramKind()==1 &&
2204 r == Assembler::savedRegs[ins->paramArg()])
2206 // dont print callee-saved regs that arent used
2207 continue;
2210 VMPI_sprintf(s, " %s(%s)", gpn(r), n);
2211 s += VMPI_strlen(s);
2213 output();
2216 void Assembler::printActivationState()
2218 char* s = &outline[0];
2219 VMPI_memset(s, ' ', 26); s[26] = '\0';
2220 s += VMPI_strlen(s);
2221 VMPI_sprintf(s, "AR");
2222 s += VMPI_strlen(s);
2224 LIns* ins = 0;
2225 uint32_t nStackSlots = 0;
2226 int32_t arIndex = 0;
2227 for (AR::Iter iter(_activation); iter.next(ins, nStackSlots, arIndex); )
2229 RefBuf b;
2230 const char* n = _thisfrag->lirbuf->printer->formatRef(&b, ins);
2231 if (nStackSlots > 1) {
2232 VMPI_sprintf(s," %d-%d(%s)", 4*arIndex, 4*(arIndex+nStackSlots-1), n);
2234 else {
2235 VMPI_sprintf(s," %d(%s)", 4*arIndex, n);
2237 s += VMPI_strlen(s);
2239 output();
2241 #endif
2243 inline bool AR::isEmptyRange(uint32_t start, uint32_t nStackSlots) const
2245 for (uint32_t i=0; i < nStackSlots; i++)
2247 if (_entries[start-i] != NULL)
2248 return false;
2250 return true;
2253 uint32_t AR::reserveEntry(LIns* ins)
2255 uint32_t const nStackSlots = nStackSlotsFor(ins);
2257 if (nStackSlots == 1)
2259 for (uint32_t i = 1; i <= _highWaterMark; i++)
2261 if (_entries[i] == NULL)
2263 _entries[i] = ins;
2264 return i;
2267 if (_highWaterMark < NJ_MAX_STACK_ENTRY - 1)
2269 NanoAssert(_entries[_highWaterMark+1] == BAD_ENTRY);
2270 _highWaterMark++;
2271 _entries[_highWaterMark] = ins;
2272 return _highWaterMark;
2275 else
2277 // alloc larger block on 8byte boundary.
2278 uint32_t const start = nStackSlots + (nStackSlots & 1);
2279 for (uint32_t i = start; i <= _highWaterMark; i += 2)
2281 if (isEmptyRange(i, nStackSlots))
2283 // place the entry in the table and mark the instruction with it
2284 for (uint32_t j=0; j < nStackSlots; j++)
2286 NanoAssert(i-j <= _highWaterMark);
2287 NanoAssert(_entries[i-j] == NULL);
2288 _entries[i-j] = ins;
2290 return i;
2294 // Be sure to account for any 8-byte-round-up when calculating spaceNeeded.
2295 uint32_t const spaceLeft = NJ_MAX_STACK_ENTRY - _highWaterMark - 1;
2296 uint32_t const spaceNeeded = nStackSlots + (_highWaterMark & 1);
2297 if (spaceLeft >= spaceNeeded)
2299 if (_highWaterMark & 1)
2301 NanoAssert(_entries[_highWaterMark+1] == BAD_ENTRY);
2302 _entries[_highWaterMark+1] = NULL;
2304 _highWaterMark += spaceNeeded;
2305 for (uint32_t j = 0; j < nStackSlots; j++)
2307 NanoAssert(_highWaterMark-j < NJ_MAX_STACK_ENTRY);
2308 NanoAssert(_entries[_highWaterMark-j] == BAD_ENTRY);
2309 _entries[_highWaterMark-j] = ins;
2311 return _highWaterMark;
2314 // no space. oh well.
2315 return 0;
2318 #ifdef _DEBUG
2319 void AR::checkForResourceLeaks() const
2321 for (uint32_t i = 1; i <= _highWaterMark; i++) {
2322 NanoAssertMsgf(_entries[i] == NULL, "frame entry %d wasn't freed\n",4*i);
2325 #endif
2327 uint32_t Assembler::arReserve(LIns* ins)
2329 uint32_t i = _activation.reserveEntry(ins);
2330 if (!i)
2331 setError(StackFull);
2332 return i;
2335 void Assembler::arFree(LIns* ins)
2337 NanoAssert(ins->isInAr());
2338 uint32_t arIndex = ins->getArIndex();
2339 NanoAssert(arIndex);
2340 NanoAssert(_activation.isValidEntry(arIndex, ins));
2341 _activation.freeEntryAt(arIndex); // free any stack stack space associated with entry
2345 * Move regs around so the SavedRegs contains the highest priority regs.
2347 void Assembler::evictScratchRegsExcept(RegisterMask ignore)
2349 // Find the top GpRegs that are candidates to put in SavedRegs.
2351 // 'tosave' is a binary heap stored in an array. The root is tosave[0],
2352 // left child is at i+1, right child is at i+2.
2354 Register tosave[LastRegNum - FirstRegNum + 1];
2355 int len=0;
2356 RegAlloc *regs = &_allocator;
2357 RegisterMask evict_set = regs->activeMask() & GpRegs & ~ignore;
2358 for (Register r = lsReg(evict_set); evict_set; r = nextLsReg(evict_set, r)) {
2359 LIns *ins = regs->getActive(r);
2360 if (canRemat(ins)) {
2361 NanoAssert(ins->getReg() == r);
2362 evict(ins);
2364 else {
2365 int32_t pri = regs->getPriority(r);
2366 // add to heap by adding to end and bubbling up
2367 int j = len++;
2368 while (j > 0 && pri > regs->getPriority(tosave[j/2])) {
2369 tosave[j] = tosave[j/2];
2370 j /= 2;
2372 NanoAssert(size_t(j) < sizeof(tosave)/sizeof(tosave[0]));
2373 tosave[j] = r;
2377 // Now primap has the live exprs in priority order.
2378 // Allocate each of the top priority exprs to a SavedReg.
2380 RegisterMask allow = SavedRegs;
2381 while (allow && len > 0) {
2382 // get the highest priority var
2383 Register hi = tosave[0];
2384 if (!(rmask(hi) & SavedRegs)) {
2385 LIns *ins = regs->getActive(hi);
2386 Register r = findRegFor(ins, allow);
2387 allow &= ~rmask(r);
2389 else {
2390 // hi is already in a saved reg, leave it alone.
2391 allow &= ~rmask(hi);
2394 // remove from heap by replacing root with end element and bubbling down.
2395 if (allow && --len > 0) {
2396 Register last = tosave[len];
2397 int j = 0;
2398 while (j+1 < len) {
2399 int child = j+1;
2400 if (j+2 < len && regs->getPriority(tosave[j+2]) > regs->getPriority(tosave[j+1]))
2401 child++;
2402 if (regs->getPriority(last) > regs->getPriority(tosave[child]))
2403 break;
2404 tosave[j] = tosave[child];
2405 j = child;
2407 tosave[j] = last;
2411 // now evict everything else.
2412 evictSomeActiveRegs(~(SavedRegs | ignore));
2415 // Generate code to restore any registers in 'regs' that are currently active,
2416 void Assembler::evictSomeActiveRegs(RegisterMask regs)
2418 RegisterMask evict_set = regs & _allocator.activeMask();
2419 for (Register r = lsReg(evict_set); evict_set; r = nextLsReg(evict_set, r))
2420 evict(_allocator.getActive(r));
2424 * Merge the current regstate with a previously stored version.
2426 * Situation Change to _allocator
2427 * --------- --------------------
2428 * !current & !saved
2429 * !current & saved add saved
2430 * current & !saved evict current (unionRegisterState does nothing)
2431 * current & saved & current==saved
2432 * current & saved & current!=saved evict current, add saved
2434 void Assembler::intersectRegisterState(RegAlloc& saved)
2436 Register regsTodo[LastRegNum + 1];
2437 LIns* insTodo[LastRegNum + 1];
2438 int nTodo = 0;
2440 // Do evictions and pops first.
2441 verbose_only(bool shouldMention=false; )
2442 // The obvious thing to do here is to iterate from FirstRegNum to
2443 // LastRegNum. However, on ARM that causes lower-numbered integer
2444 // registers to be be saved at higher addresses, which inhibits the
2445 // formation of load/store multiple instructions. Hence iterate the
2446 // loop the other way.
2447 RegisterMask reg_set = _allocator.activeMask() | saved.activeMask();
2448 for (Register r = msReg(reg_set); reg_set; r = nextMsReg(reg_set, r))
2450 LIns* curins = _allocator.getActive(r);
2451 LIns* savedins = saved.getActive(r);
2452 if (curins != savedins)
2454 if (savedins) {
2455 regsTodo[nTodo] = r;
2456 insTodo[nTodo] = savedins;
2457 nTodo++;
2459 if (curins) {
2460 //_nvprof("intersect-evict",1);
2461 verbose_only( shouldMention=true; )
2462 NanoAssert(curins->getReg() == r);
2463 evict(curins);
2466 #ifdef NANOJIT_IA32
2467 if (savedins && r == FST0) {
2468 verbose_only( shouldMention=true; )
2469 FSTP(FST0);
2471 #endif
2474 // Now reassign mainline registers.
2475 for (int i = 0; i < nTodo; i++) {
2476 findSpecificRegFor(insTodo[i], regsTodo[i]);
2478 verbose_only(
2479 if (shouldMention)
2480 verbose_outputf("## merging registers (intersect) with existing edge");
2485 * Merge the current state of the registers with a previously stored version.
2487 * Situation Change to _allocator
2488 * --------- --------------------
2489 * !current & !saved none
2490 * !current & saved add saved
2491 * current & !saved none (intersectRegisterState evicts current)
2492 * current & saved & current==saved none
2493 * current & saved & current!=saved evict current, add saved
2495 void Assembler::unionRegisterState(RegAlloc& saved)
2497 Register regsTodo[LastRegNum + 1];
2498 LIns* insTodo[LastRegNum + 1];
2499 int nTodo = 0;
2501 // Do evictions and pops first.
2502 verbose_only(bool shouldMention=false; )
2503 RegisterMask reg_set = _allocator.activeMask() | saved.activeMask();
2504 for (Register r = lsReg(reg_set); reg_set; r = nextLsReg(reg_set, r))
2506 LIns* curins = _allocator.getActive(r);
2507 LIns* savedins = saved.getActive(r);
2508 if (curins != savedins)
2510 if (savedins) {
2511 regsTodo[nTodo] = r;
2512 insTodo[nTodo] = savedins;
2513 nTodo++;
2515 if (curins && savedins) {
2516 //_nvprof("union-evict",1);
2517 verbose_only( shouldMention=true; )
2518 NanoAssert(curins->getReg() == r);
2519 evict(curins);
2522 #ifdef NANOJIT_IA32
2523 if (r == FST0) {
2524 if (savedins) {
2525 // Discard top of x87 stack.
2526 FSTP(FST0);
2528 else if (curins) {
2529 // Saved state did not have fpu reg allocated,
2530 // so we must evict here to keep x87 stack balanced.
2531 evict(curins);
2533 verbose_only( shouldMention=true; )
2535 #endif
2538 // Now reassign mainline registers.
2539 for (int i = 0; i < nTodo; i++) {
2540 findSpecificRegFor(insTodo[i], regsTodo[i]);
2542 verbose_only(
2543 if (shouldMention)
2544 verbose_outputf("## merging registers (union) with existing edge");
2548 // Scan table for instruction with the lowest priority, meaning it is used
2549 // furthest in the future.
2550 LIns* Assembler::findVictim(RegisterMask allow)
2552 NanoAssert(allow);
2553 LIns *ins, *vic = 0;
2554 int allow_pri = 0x7fffffff;
2555 RegisterMask vic_set = allow & _allocator.activeMask();
2556 for (Register r = lsReg(vic_set); vic_set; r = nextLsReg(vic_set, r))
2558 ins = _allocator.getActive(r);
2559 int pri = canRemat(ins) ? 0 : _allocator.getPriority(r);
2560 if (!vic || pri < allow_pri) {
2561 vic = ins;
2562 allow_pri = pri;
2565 NanoAssert(vic != 0);
2566 return vic;
2569 #ifdef NJ_VERBOSE
2570 char Assembler::outline[8192];
2571 char Assembler::outlineEOL[512];
2573 void Assembler::output()
2575 // The +1 is for the terminating NUL char.
2576 VMPI_strncat(outline, outlineEOL, sizeof(outline)-(strlen(outline)+1));
2578 if (_outputCache) {
2579 char* str = new (alloc) char[VMPI_strlen(outline)+1];
2580 VMPI_strcpy(str, outline);
2581 _outputCache->insert(str);
2582 } else {
2583 _logc->printf("%s\n", outline);
2586 outline[0] = '\0';
2587 outlineEOL[0] = '\0';
2590 void Assembler::outputf(const char* format, ...)
2592 va_list args;
2593 va_start(args, format);
2595 outline[0] = '\0';
2596 vsprintf(outline, format, args);
2597 output();
2600 void Assembler::setOutputForEOL(const char* format, ...)
2602 va_list args;
2603 va_start(args, format);
2605 outlineEOL[0] = '\0';
2606 vsprintf(outlineEOL, format, args);
2608 #endif // NJ_VERBOSE
2610 void LabelStateMap::add(LIns *label, NIns *addr, RegAlloc &regs) {
2611 LabelState *st = new (alloc) LabelState(addr, regs);
2612 labels.put(label, st);
2615 LabelState* LabelStateMap::get(LIns *label) {
2616 return labels.get(label);
2619 #endif /* FEATURE_NANOJIT */