ASC-4041: Skip two spidermonkey regression tests due to stack overflow when compiling...
[tamarin-stm.git] / nanojit / NativeMIPS.cpp
blob7576a0fd606f18817718fc8ca5ebb29108ee809e
1 /* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */
2 /* vi: set ts=4 sw=4 expandtab: (add to ~/.vimrc: set modeline modelines=5) */
3 /* ***** BEGIN LICENSE BLOCK *****
4 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 1.1 (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
9 * http://www.mozilla.org/MPL/
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
14 * License.
16 * The Original Code is [Open Source Virtual Machine].
18 * The Initial Developer of the Original Code is
19 * MIPS Technologies Inc
20 * Portions created by the Initial Developer are Copyright (C) 2009
21 * the Initial Developer. All Rights Reserved.
23 * Contributor(s):
24 * Chris Dearman <chris@mips.com>
26 * Alternatively, the contents of this file may be used under the terms of
27 * either the GNU General Public License Version 2 or later (the "GPL"), or
28 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29 * in which case the provisions of the GPL or the LGPL are applicable instead
30 * of those above. If you wish to allow use of your version of this file only
31 * under the terms of either the GPL or the LGPL, and not to allow others to
32 * use your version of this file under the terms of the MPL, indicate your
33 * decision by deleting the provisions above and replace them with the notice
34 * and other provisions required by the GPL or the LGPL. If you do not delete
35 * the provisions above, a recipient may use your version of this file under
36 * the terms of any one of the MPL, the GPL or the LGPL.
38 * ***** END LICENSE BLOCK ***** */
40 #include "nanojit.h"
42 #if defined FEATURE_NANOJIT && defined NANOJIT_MIPS
44 namespace nanojit
46 #ifdef NJ_VERBOSE
47 const char *regNames[] = {
48 "$zr", "$at", "$v0", "$v1", "$a0", "$a1", "$a2", "$a3",
49 "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7",
50 "$s0", "$s1", "$s2", "$s3", "$s4", "$s5", "$s6", "$s7",
51 "$t8", "$t9", "$k0", "$k1", "$gp", "$sp", "$fp", "$ra",
53 "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "$f6", "$f7",
54 "$f8", "$f9", "$f10", "$f11", "$f12", "$f13", "$f14", "$f15",
55 "$f16", "$f17", "$f18", "$f19", "$f20", "$f21", "$f22", "$f23",
56 "$f24", "$f25", "$f26", "$f27", "$f28", "$f29", "$f30", "$f31"
59 const char *cname[16] = {
60 "f", "un", "eq", "ueq",
61 "olt", "ult", "ole", "ule",
62 "sf", "ngle", "seq", "ngl",
63 "lt", "nge", "le", "ngt"
66 const char *fname[32] = {
67 "resv", "resv", "resv", "resv",
68 "resv", "resv", "resv", "resv",
69 "resv", "resv", "resv", "resv",
70 "resv", "resv", "resv", "resv",
71 "s", "d", "resv", "resv",
72 "w", "l", "ps", "resv",
73 "resv", "resv", "resv", "resv",
74 "resv", "resv", "resv", "resv",
77 const char *oname[64] = {
78 "special", "regimm", "j", "jal", "beq", "bne", "blez", "bgtz",
79 "addi", "addiu", "slti", "sltiu", "andi", "ori", "xori", "lui",
80 "cop0", "cop1", "cop2", "cop1x", "beql", "bnel", "blezl", "bgtzl",
81 "resv", "resv", "resv", "resv", "special2", "jalx", "resv", "special3",
82 "lb", "lh", "lwl", "lw", "lbu", "lhu", "lwr", "resv",
83 "sb", "sh", "swl", "sw", "resv", "resv", "swr", "cache",
84 "ll", "lwc1", "lwc2", "pref", "resv", "ldc1", "ldc2", "resv",
85 "sc", "swc1", "swc2", "resv", "resv", "sdc1", "sdc2", "resv",
87 #endif
89 const Register Assembler::argRegs[] = { A0, A1, A2, A3 };
90 const Register Assembler::retRegs[] = { V0, V1 };
91 const Register Assembler::savedRegs[] = {
92 S0, S1, S2, S3, S4, S5, S6, S7,
93 #ifdef FPCALLEESAVED
94 FS0, FS1, FS2, FS3, FS4, FS5
95 #endif
98 #define USE(x) (void)x
99 #define BADOPCODE(op) NanoAssertMsgf(false, "unexpected opcode %s", lirNames[op])
101 // This function will get will get optimised by the compiler into a known value
102 static inline bool isLittleEndian(void)
104 const union {
105 uint32_t ival;
106 unsigned char cval[4];
107 } u = { 1 };
108 return u.cval[0] == 1;
111 // offsets to most/least significant parts of 64bit data in memory
112 // These functions will get optimised by the compiler into a known value
113 static inline int mswoff(void) {
114 return isLittleEndian() ? 4 : 0;
117 static inline int lswoff(void) {
118 return isLittleEndian() ? 0 : 4;
121 static inline Register mswregpair(Register r) {
122 return Register(r + (isLittleEndian() ? 1 : 0));
125 static inline Register lswregpair(Register r) {
126 return Register(r + (isLittleEndian() ? 0 : 1));
129 // These variables affect the code generator
130 // They can be defined as constants and the compiler will remove
131 // the unused paths through dead code elimination
132 // Alternatively they can be defined as variables which will allow
133 // the exact code generated to be determined at runtime
135 // cpu_has_fpu CPU has fpu
136 // cpu_has_movn CPU has movn
137 // cpu_has_cmov CPU has movf/movn instructions
138 // cpu_has_lsdc1 CPU has ldc1/sdc1 instructions
139 // cpu_has_lsxdc1 CPU has ldxc1/sdxc1 instructions
140 // cpu_has_fpuhazard hazard between c.xx.xx & bc1[tf]
142 // Currently the values are initialised bases on preprocessor definitions
144 #ifdef DEBUG
145 // Don't allow the compiler to eliminate dead code for debug builds
146 #define _CONST
147 #else
148 #define _CONST const
149 #endif
151 #if NJ_SOFTFLOAT_SUPPORTED
152 _CONST bool cpu_has_fpu = false;
153 #else
154 _CONST bool cpu_has_fpu = true;
155 #endif
157 #if (__mips==4 || __mips==32 || __mips==64)
158 _CONST bool cpu_has_cmov = true;
159 #else
160 _CONST bool cpu_has_cmov = false;
161 #endif
163 #if __mips != 1
164 _CONST bool cpu_has_lsdc1 = true;
165 #else
166 _CONST bool cpu_has_lsdc1 = false;
167 #endif
169 #if (__mips==32 || __mips==64) && __mips_isa_rev>=2
170 _CONST bool cpu_has_lsdxc1 = true;
171 #else
172 _CONST bool cpu_has_lsdxc1 = false;
173 #endif
175 #if (__mips==1 || __mips==2 || __mips==3)
176 _CONST bool cpu_has_fpuhazard = true;
177 #else
178 _CONST bool cpu_has_fpuhazard = false;
179 #endif
180 #undef _CONST
182 /* Support routines */
184 debug_only (
185 // break to debugger when generating code to this address
186 static NIns *breakAddr;
187 static void codegenBreak(NIns *genAddr)
189 NanoAssert (breakAddr != genAddr);
193 // Equivalent to assembler %hi(), %lo()
194 uint16_t hi(uint32_t v)
196 uint16_t r = v >> 16;
197 if ((int16_t)(v) < 0)
198 r += 1;
199 return r;
202 int16_t lo(uint32_t v)
204 int16_t r = v;
205 return r;
208 void Assembler::asm_li32(Register r, int32_t imm)
210 // general case generating a full 32-bit load
211 ADDIU(r, r, lo(imm));
212 LUI(r, hi(imm));
215 void Assembler::asm_li(Register r, int32_t imm)
217 #if !PEDANTIC
218 if (isU16(imm)) {
219 ORI(r, ZERO, imm);
220 return;
222 if (isS16(imm)) {
223 ADDIU(r, ZERO, imm);
224 return;
226 if ((imm & 0xffff) == 0) {
227 LUI(r, uint32_t(imm) >> 16);
228 return;
230 #endif
231 asm_li32(r, imm);
234 // 64 bit immediate load to a register pair
235 void Assembler::asm_li_d(Register r, int32_t msw, int32_t lsw)
237 if (IsFpReg(r)) {
238 NanoAssert(cpu_has_fpu);
239 // li $at,lsw # iff lsw != 0
240 // mtc1 $at,$r # may use $0 instead of $at
241 // li $at,msw # iff (msw != 0) && (msw != lsw)
242 // mtc1 $at,$(r+1) # may use $0 instead of $at
243 if (msw == 0)
244 MTC1(ZERO, r+1);
245 else {
246 MTC1(AT, r+1);
247 // If the MSW & LSW values are different, reload AT
248 if (msw != lsw)
249 asm_li(AT, msw);
251 if (lsw == 0)
252 MTC1(ZERO, r);
253 else {
254 MTC1(AT, r);
255 asm_li(AT, lsw);
258 else {
260 * li $r.lo, lsw
261 * li $r.hi, msw # will be converted to move $f.hi,$f.lo if (msw==lsw)
263 if (msw == lsw)
264 MOVE(mswregpair(r), lswregpair(r));
265 else
266 asm_li(mswregpair(r), msw);
267 asm_li(lswregpair(r), lsw);
271 void Assembler::asm_move(Register d, Register s)
273 MOVE(d, s);
276 // General load/store operation
277 void Assembler::asm_ldst(int op, Register rt, int dr, Register rbase)
279 #if !PEDANTIC
280 if (isS16(dr)) {
281 LDST(op, rt, dr, rbase);
282 return;
284 #endif
286 // lui AT,hi(d)
287 // addu AT,rbase
288 // ldst rt,lo(d)(AT)
289 LDST(op, rt, lo(dr), AT);
290 ADDU(AT, AT, rbase);
291 LUI(AT, hi(dr));
294 void Assembler::asm_ldst64(bool store, Register r, int dr, Register rbase)
296 #if !PEDANTIC
297 if (isS16(dr) && isS16(dr+4)) {
298 if (IsGpReg(r)) {
299 LDST(store ? OP_SW : OP_LW, r+1, dr+4, rbase);
300 LDST(store ? OP_SW : OP_LW, r, dr, rbase);
302 else {
303 NanoAssert(cpu_has_fpu);
304 // NanoAssert((dr & 7) == 0);
305 if (cpu_has_lsdc1 && ((dr & 7) == 0)) {
306 // lsdc1 $fr,dr($rbase)
307 LDST(store ? OP_SDC1 : OP_LDC1, r, dr, rbase);
309 else {
310 // lswc1 $fr, dr+LSWOFF($rbase)
311 // lswc1 $fr+1,dr+MSWOFF($rbase)
312 LDST(store ? OP_SWC1 : OP_LWC1, r+1, dr+mswoff(), rbase);
313 LDST(store ? OP_SWC1 : OP_LWC1, r, dr+lswoff(), rbase);
315 return;
318 #endif
320 if (IsGpReg(r)) {
321 // lui $at,%hi(d)
322 // addu $at,$rbase
323 // ldsw $r, %lo(d)($at)
324 // ldst $r+1,%lo(d+4)($at)
325 LDST(store ? OP_SW : OP_LW, r+1, lo(dr+4), AT);
326 LDST(store ? OP_SW : OP_LW, r, lo(dr), AT);
327 ADDU(AT, AT, rbase);
328 LUI(AT, hi(dr));
330 else {
331 NanoAssert(cpu_has_fpu);
332 if (cpu_has_lsdxc1) {
333 // li $at,dr
334 // lsdcx1 $r,$at($rbase)
335 if (store)
336 SDXC1(r, AT, rbase);
337 else
338 LDXC1(r, AT, rbase);
339 asm_li(AT, dr);
341 else if (cpu_has_lsdc1) {
342 // lui $at,%hi(dr)
343 // addu $at,$rbase
344 // lsdc1 $r,%lo(dr)($at)
345 LDST(store ? OP_SDC1 : OP_LDC1, r, lo(dr), AT);
346 ADDU(AT, AT, rbase);
347 LUI(AT, hi(dr));
349 else {
350 // lui $at,%hi(d)
351 // addu $at,$rbase
352 // lswc1 $r, %lo(d+LSWOFF)($at)
353 // lswc1 $r+1,%lo(d+MSWOFF)($at)
354 LDST(store ? OP_SWC1 : OP_LWC1, r+1, lo(dr+mswoff()), AT);
355 LDST(store ? OP_SWC1 : OP_LWC1, r, lo(dr+lswoff()), AT);
356 ADDU(AT, AT, rbase);
357 LUI(AT, hi(dr));
362 void Assembler::asm_store_imm64(LIns *value, int dr, Register rbase)
364 NanoAssert(value->isImmD());
365 int32_t msw = value->immDhi();
366 int32_t lsw = value->immDlo();
368 // li $at,lsw # iff lsw != 0
369 // sw $at,off+LSWOFF($rbase) # may use $0 instead of $at
370 // li $at,msw # iff (msw != 0) && (msw != lsw)
371 // sw $at,off+MSWOFF($rbase) # may use $0 instead of $at
373 NanoAssert(isS16(dr) && isS16(dr+4));
375 if (lsw == 0)
376 SW(ZERO, dr+lswoff(), rbase);
377 else {
378 SW(AT, dr+lswoff(), rbase);
379 if (msw != lsw)
380 asm_li(AT, lsw);
382 if (msw == 0)
383 SW(ZERO, dr+mswoff(), rbase);
384 else {
385 SW(AT, dr+mswoff(), rbase);
386 // If the MSW & LSW values are different, reload AT
387 if (msw != lsw)
388 asm_li(AT, msw);
392 void Assembler::asm_regarg(ArgType ty, LIns* p, Register r)
394 NanoAssert(deprecated_isKnownReg(r));
395 if (ty == ARGTYPE_I || ty == ARGTYPE_UI) {
396 // arg goes in specific register
397 if (p->isImmI())
398 asm_li(r, p->immI());
399 else {
400 if (p->isExtant()) {
401 if (!p->deprecated_hasKnownReg()) {
402 // load it into the arg reg
403 int d = findMemFor(p);
404 if (p->isop(LIR_allocp))
405 ADDIU(r, FP, d);
406 else
407 asm_ldst(OP_LW, r, d, FP);
409 else
410 // it must be in a saved reg
411 MOVE(r, p->deprecated_getReg());
413 else {
414 // this is the last use, so fine to assign it
415 // to the scratch reg, it's dead after this point.
416 findSpecificRegFor(p, r);
420 else {
421 // Other argument types unsupported
422 NanoAssert(false);
426 void Assembler::asm_stkarg(LIns* arg, int stkd)
428 bool isF64 = arg->isD();
429 Register rr;
430 if (arg->isExtant() && (rr = arg->deprecated_getReg(), deprecated_isKnownReg(rr))) {
431 // The argument resides somewhere in registers, so we simply need to
432 // push it onto the stack.
433 if (!cpu_has_fpu || !isF64) {
434 NanoAssert(IsGpReg(rr));
435 SW(rr, stkd, SP);
437 else {
438 NanoAssert(cpu_has_fpu);
439 NanoAssert(IsFpReg(rr));
440 NanoAssert((stkd & 7) == 0);
441 asm_ldst64(true, rr, stkd, SP);
444 else {
445 // The argument does not reside in registers, so we need to get some
446 // memory for it and then copy it onto the stack.
447 int d = findMemFor(arg);
448 if (!isF64) {
449 SW(AT, stkd, SP);
450 if (arg->isop(LIR_allocp))
451 ADDIU(AT, FP, d);
452 else
453 LW(AT, d, FP);
455 else {
456 NanoAssert((stkd & 7) == 0);
457 SW(AT, stkd+4, SP);
458 LW(AT, d+4, FP);
459 SW(AT, stkd, SP);
460 LW(AT, d, FP);
465 // Encode a 64-bit floating-point argument using the appropriate ABI.
466 // This function operates in the same way as asm_arg, except that it will only
467 // handle arguments where (ArgType)ty == ARGTYPE_D.
468 void
469 Assembler::asm_arg_64(LIns* arg, Register& r, Register& fr, int& stkd)
471 // The stack offset always be at least aligned to 4 bytes.
472 NanoAssert((stkd & 3) == 0);
473 #if NJ_SOFTFLOAT_SUPPORTED
474 NanoAssert(arg->isop(LIR_ii2d));
475 #else
476 NanoAssert(cpu_has_fpu);
477 #endif
479 // O32 ABI requires that 64-bit arguments are aligned on even-numbered
480 // registers, as A0:A1/FA0 or A2:A3/FA1. Use the stack offset to keep track
481 // where we are
482 if (stkd & 4) {
483 if (stkd < 16) {
484 r = Register(r + 1);
485 fr = Register(fr + 1);
487 stkd += 4;
490 if (stkd < 16) {
491 NanoAssert(fr == FA0 || fr == FA1 || fr == A2);
492 if (fr == FA0 || fr == FA1)
493 findSpecificRegFor(arg, fr);
494 else {
495 findSpecificRegFor(arg, FA1);
496 // Move it to the integer pair
497 Register fpupair = arg->getReg();
498 Register intpair = fr;
499 MFC1(mswregpair(intpair), Register(fpupair + 1)); // Odd fpu register contains sign,expt,manthi
500 MFC1(lswregpair(intpair), fpupair); // Even fpu register contains mantlo
502 r = Register(r + 2);
503 fr = Register(fr + 2);
505 else
506 asm_stkarg(arg, stkd);
508 stkd += 8;
511 /* Required functions */
513 #define FRAMESIZE 8
514 #define RA_OFFSET 4
515 #define FP_OFFSET 0
517 void Assembler::asm_store32(LOpcode op, LIns *value, int dr, LIns *base)
519 Register rt, rbase;
520 getBaseReg2(GpRegs, value, rt, GpRegs, base, rbase, dr);
522 switch (op) {
523 case LIR_sti:
524 asm_ldst(OP_SW, rt, dr, rbase);
525 break;
526 case LIR_sti2s:
527 asm_ldst(OP_SH, rt, dr, rbase);
528 break;
529 case LIR_sti2c:
530 asm_ldst(OP_SB, rt, dr, rbase);
531 break;
532 default:
533 BADOPCODE(op);
536 TAG("asm_store32(value=%p{%s}, dr=%d, base=%p{%s})",
537 value, lirNames[value->opcode()], dr, base, lirNames[base->opcode()]);
540 void Assembler::asm_ui2d(LIns *ins)
542 Register fr = deprecated_prepResultReg(ins, FpRegs);
543 Register v = findRegFor(ins->oprnd1(), GpRegs);
544 Register ft = registerAllocTmp(FpRegs & ~(rmask(fr))); // allocate temporary register for constant
546 // todo: support int value in memory, as per x86
547 NanoAssert(deprecated_isKnownReg(v));
549 // mtc1 $v,$ft
550 // bgez $v,1f
551 // cvt.d.w $fr,$ft
552 // lui $at,0x41f0 # (double)0x10000000LL = 0x41f0000000000000
553 // mtc1 $0,$ft
554 // mtc1 $at,$ft+1
555 // add.d $fr,$fr,$ft
556 // 1:
558 underrunProtect(6*4); // keep branch and destination together
559 NIns *here = _nIns;
560 ADD_D(fr,fr,ft);
561 MTC1(AT,ft+1);
562 MTC1(ZERO,ft);
563 LUI(AT,0x41f0);
564 CVT_D_W(fr,ft); // branch delay slot
565 BGEZ(v,here);
566 MTC1(v,ft);
568 TAG("asm_ui2d(ins=%p{%s})", ins, lirNames[ins->opcode()]);
571 void Assembler::asm_d2i(LIns* ins)
573 NanoAssert(cpu_has_fpu);
575 Register rr = deprecated_prepResultReg(ins, GpRegs);
576 Register sr = findRegFor(ins->oprnd1(), FpRegs);
577 // trunc.w.d $sr,$sr
578 // mfc1 $rr,$sr
579 MFC1(rr,sr);
580 TRUNC_W_D(sr,sr);
581 TAG("asm_d2i(ins=%p{%s})", ins, lirNames[ins->opcode()]);
584 void Assembler::asm_fop(LIns *ins)
586 NanoAssert(cpu_has_fpu);
587 if (cpu_has_fpu) {
588 LIns* lhs = ins->oprnd1();
589 LIns* rhs = ins->oprnd2();
590 LOpcode op = ins->opcode();
592 // rr = ra OP rb
594 Register rr = deprecated_prepResultReg(ins, FpRegs);
595 Register ra = findRegFor(lhs, FpRegs);
596 Register rb = (rhs == lhs) ? ra : findRegFor(rhs, FpRegs & ~rmask(ra));
598 switch (op) {
599 case LIR_addd: ADD_D(rr, ra, rb); break;
600 case LIR_subd: SUB_D(rr, ra, rb); break;
601 case LIR_muld: MUL_D(rr, ra, rb); break;
602 case LIR_divd: DIV_D(rr, ra, rb); break;
603 default:
604 BADOPCODE(op);
607 TAG("asm_fop(ins=%p{%s})", ins, lirNames[ins->opcode()]);
610 void Assembler::asm_fneg(LIns *ins)
612 NanoAssert(cpu_has_fpu);
613 if (cpu_has_fpu) {
614 LIns* lhs = ins->oprnd1();
615 Register rr = deprecated_prepResultReg(ins, FpRegs);
616 Register sr = ( !lhs->isInReg()
617 ? findRegFor(lhs, FpRegs)
618 : lhs->deprecated_getReg() );
619 NEG_D(rr, sr);
621 TAG("asm_fneg(ins=%p{%s})", ins, lirNames[ins->opcode()]);
624 void Assembler::asm_immd(LIns *ins)
626 int d = deprecated_disp(ins);
627 Register rr = ins->deprecated_getReg();
629 deprecated_freeRsrcOf(ins);
631 if (cpu_has_fpu && deprecated_isKnownReg(rr)) {
632 if (d)
633 asm_spill(rr, d, true);
634 asm_li_d(rr, ins->immDhi(), ins->immDlo());
636 else {
637 NanoAssert(d);
638 asm_store_imm64(ins, d, FP);
640 TAG("asm_immd(ins=%p{%s})", ins, lirNames[ins->opcode()]);
643 #ifdef NANOJIT_64BIT
644 void
645 Assembler::asm_q2i(LIns *)
647 NanoAssert(0); // q2i shouldn't occur on 32-bit platforms
650 void Assembler::asm_ui2uq(LIns *ins)
652 USE(ins);
653 TODO(asm_ui2uq);
654 TAG("asm_ui2uq(ins=%p{%s})", ins, lirNames[ins->opcode()]);
656 #endif
658 void Assembler::asm_load64(LIns *ins)
660 NanoAssert(ins->isD());
662 LIns* base = ins->oprnd1();
663 int dr = ins->disp();
665 Register rd = ins->deprecated_getReg();
666 int ds = deprecated_disp(ins);
668 Register rbase = findRegFor(base, GpRegs);
669 NanoAssert(IsGpReg(rbase));
670 deprecated_freeRsrcOf(ins);
672 if (cpu_has_fpu && deprecated_isKnownReg(rd)) {
673 NanoAssert(IsFpReg(rd));
674 asm_ldst64 (false, rd, dr, rbase);
676 else {
677 // Either FPU is not available or the result needs to go into memory;
678 // in either case, FPU instructions are not required. Note that the
679 // result will never be loaded into registers if FPU is not available.
680 NanoAssert(!deprecated_isKnownReg(rd));
681 NanoAssert(ds != 0);
683 NanoAssert(isS16(dr) && isS16(dr+4));
684 NanoAssert(isS16(ds) && isS16(ds+4));
686 // Check that the offset is 8-byte (64-bit) aligned.
687 NanoAssert((ds & 0x7) == 0);
689 // FIXME: allocate a temporary to use for the copy
690 // to avoid load to use delay
691 // lw $at,dr($rbase)
692 // sw $at,ds($fp)
693 // lw $at,dr+4($rbase)
694 // sw $at,ds+4($fp)
696 SW(AT, ds+4, FP);
697 LW(AT, dr+4, rbase);
698 SW(AT, ds, FP);
699 LW(AT, dr, rbase);
702 TAG("asm_load64(ins=%p{%s})", ins, lirNames[ins->opcode()]);
705 void Assembler::asm_cond(LIns *ins)
707 Register r = deprecated_prepResultReg(ins, GpRegs);
708 LOpcode op = ins->opcode();
709 LIns *a = ins->oprnd1();
710 LIns *b = ins->oprnd2();
712 asm_cmp(op, a, b, r);
714 TAG("asm_cond(ins=%p{%s})", ins, lirNames[ins->opcode()]);
717 #if NJ_SOFTFLOAT_SUPPORTED
718 void Assembler::asm_qhi(LIns *ins)
720 Register rr = deprecated_prepResultReg(ins, GpRegs);
721 LIns *q = ins->oprnd1();
722 int d = findMemFor(q);
723 LW(rr, d+mswoff(), FP);
724 TAG("asm_qhi(ins=%p{%s})", ins, lirNames[ins->opcode()]);
727 void Assembler::asm_qlo(LIns *ins)
729 Register rr = deprecated_prepResultReg(ins, GpRegs);
730 LIns *q = ins->oprnd1();
731 int d = findMemFor(q);
732 LW(rr, d+lswoff(), FP);
733 TAG("asm_qlo(ins=%p{%s})", ins, lirNames[ins->opcode()]);
736 void Assembler::asm_qjoin(LIns *ins)
738 int d = findMemFor(ins);
739 NanoAssert(d && isS16(d));
740 LIns* lo = ins->oprnd1();
741 LIns* hi = ins->oprnd2();
743 Register r = findRegFor(hi, GpRegs);
744 SW(r, d+mswoff(), FP);
745 r = findRegFor(lo, GpRegs); // okay if r gets recycled.
746 SW(r, d+lswoff(), FP);
747 deprecated_freeRsrcOf(ins); // if we had a reg in use, flush it to mem
749 TAG("asm_qjoin(ins=%p{%s})", ins, lirNames[ins->opcode()]);
752 #endif
754 void Assembler::asm_neg_not(LIns *ins)
756 LOpcode op = ins->opcode();
757 Register rr = deprecated_prepResultReg(ins, GpRegs);
759 LIns* lhs = ins->oprnd1();
760 // If this is the last use of lhs in reg, we can re-use result reg.
761 // Else, lhs already has a register assigned.
762 Register ra = !lhs->isInReg() ? findSpecificRegFor(lhs, rr) : lhs->deprecated_getReg();
763 if (op == LIR_noti)
764 NOT(rr, ra);
765 else
766 NEGU(rr, ra);
767 TAG("asm_neg_not(ins=%p{%s})", ins, lirNames[ins->opcode()]);
770 void Assembler::asm_immi(LIns *ins)
772 Register rr = deprecated_prepResultReg(ins, GpRegs);
773 asm_li(rr, ins->immI());
774 TAG("asm_immi(ins=%p{%s})", ins, lirNames[ins->opcode()]);
777 void Assembler::asm_cmov(LIns *ins)
779 LIns* condval = ins->oprnd1();
780 LIns* iftrue = ins->oprnd2();
781 LIns* iffalse = ins->oprnd3();
783 NanoAssert(condval->isCmp());
784 NanoAssert(ins->opcode() == LIR_cmovi && iftrue->isI() && iffalse->isI());
786 const Register rr = deprecated_prepResultReg(ins, GpRegs);
788 const Register iftruereg = findRegFor(iftrue, GpRegs & ~rmask(rr));
789 MOVN(rr, iftruereg, AT);
790 /*const Register iffalsereg =*/ findSpecificRegFor(iffalse, rr);
791 asm_cmp(condval->opcode(), condval->oprnd1(), condval->oprnd2(), AT);
792 TAG("asm_cmov(ins=%p{%s})", ins, lirNames[ins->opcode()]);
795 void Assembler::asm_condd(LIns *ins)
797 NanoAssert(cpu_has_fpu);
798 if (cpu_has_fpu) {
799 Register r = deprecated_prepResultReg(ins, GpRegs);
800 LOpcode op = ins->opcode();
801 LIns *a = ins->oprnd1();
802 LIns *b = ins->oprnd2();
804 if (cpu_has_cmov) {
805 // c.xx.d $a,$b
806 // li $r,1
807 // movf $r,$0,$fcc0
808 MOVF(r, ZERO, 0);
809 ORI(r, ZERO, 1);
811 else {
812 // c.xx.d $a,$b
813 // [nop]
814 // bc1t 1f
815 // li $r,1
816 // move $r,$0
817 // 1:
818 NIns *here = _nIns;
819 verbose_only(verbose_outputf("%p:", here);)
820 underrunProtect(3*4);
821 MOVE(r, ZERO);
822 ORI(r, ZERO, 1); // branch delay slot
823 BC1T(here);
824 if (cpu_has_fpuhazard)
825 NOP();
827 asm_cmp(op, a, b, r);
829 TAG("asm_condd(ins=%p{%s})", ins, lirNames[ins->opcode()]);
832 void Assembler::asm_i2d(LIns *ins)
834 NanoAssert(cpu_has_fpu);
835 if (cpu_has_fpu) {
836 Register fr = deprecated_prepResultReg(ins, FpRegs);
837 Register v = findRegFor(ins->oprnd1(), GpRegs);
839 // mtc1 $v,$fr
840 // cvt.d.w $fr,$fr
841 CVT_D_W(fr,fr);
842 MTC1(v,fr);
844 TAG("asm_i2d(ins=%p{%s})", ins, lirNames[ins->opcode()]);
847 void Assembler::asm_ret(LIns *ins)
849 genEpilogue();
851 releaseRegisters();
852 assignSavedRegs();
854 LIns *value = ins->oprnd1();
855 if (ins->isop(LIR_reti)) {
856 findSpecificRegFor(value, V0);
858 else {
859 NanoAssert(ins->isop(LIR_retd));
860 #if NJ_SOFTFLOAT_SUPPORTED
861 NanoAssert(value->isop(LIR_ii2d));
862 findSpecificRegFor(value->oprnd1(), V0); // lo
863 findSpecificRegFor(value->oprnd2(), V1); // hi
864 #else
865 findSpecificRegFor(value, FV0);
866 #endif
868 TAG("asm_ret(ins=%p{%s})", ins, lirNames[ins->opcode()]);
871 void Assembler::asm_load32(LIns *ins)
873 LOpcode op = ins->opcode();
874 LIns* base = ins->oprnd1();
875 int d = ins->disp();
877 Register rres = deprecated_prepResultReg(ins, GpRegs);
878 Register rbase = getBaseReg(base, d, GpRegs);
880 switch (op) {
881 case LIR_lduc2ui: // 8-bit integer load, zero-extend to 32-bit
882 asm_ldst(OP_LBU, rres, d, rbase);
883 break;
884 case LIR_ldus2ui: // 16-bit integer load, zero-extend to 32-bit
885 asm_ldst(OP_LHU, rres, d, rbase);
886 break;
887 case LIR_ldc2i: // 8-bit integer load, sign-extend to 32-bit
888 asm_ldst(OP_LB, rres, d, rbase);
889 break;
890 case LIR_lds2i: // 16-bit integer load, sign-extend to 32-bit
891 asm_ldst(OP_LH, rres, d, rbase);
892 break;
893 case LIR_ldi: // 32-bit integer load
894 asm_ldst(OP_LW, rres, d, rbase);
895 break;
896 default:
897 BADOPCODE(op);
900 TAG("asm_load32(ins=%p{%s})", ins, lirNames[ins->opcode()]);
903 void Assembler::asm_param(LIns *ins)
905 uint32_t a = ins->paramArg();
906 uint32_t kind = ins->paramKind();
908 if (kind == 0) {
909 // ordinary param
910 // first 4 args A0..A3
911 if (a < 4) {
912 // incoming arg in register
913 deprecated_prepResultReg(ins, rmask(argRegs[a]));
914 } else {
915 // incoming arg is on stack
916 Register r = deprecated_prepResultReg(ins, GpRegs);
917 TODO(Check stack offset);
918 int d = FRAMESIZE + a * sizeof(intptr_t);
919 LW(r, d, FP);
922 else {
923 // saved param
924 deprecated_prepResultReg(ins, rmask(savedRegs[a]));
926 TAG("asm_param(ins=%p{%s})", ins, lirNames[ins->opcode()]);
929 void Assembler::asm_arith(LIns *ins)
931 LOpcode op = ins->opcode();
932 LIns* lhs = ins->oprnd1();
933 LIns* rhs = ins->oprnd2();
935 RegisterMask allow = GpRegs;
937 // We always need the result register and the first operand register.
938 Register rr = deprecated_prepResultReg(ins, allow);
940 // If this is the last use of lhs in reg, we can re-use the result reg.
941 // Else, lhs already has a register assigned.
942 Register ra = !lhs->isInReg() ? findSpecificRegFor(lhs, rr) : lhs->deprecated_getReg();
943 Register rb, t;
945 // Don't re-use the registers we've already allocated.
946 NanoAssert(deprecated_isKnownReg(rr));
947 NanoAssert(deprecated_isKnownReg(ra));
948 allow &= ~rmask(rr);
949 allow &= ~rmask(ra);
951 if (rhs->isImmI()) {
952 int32_t rhsc = rhs->immI();
953 if (isS16(rhsc)) {
954 // MIPS arith immediate ops sign-extend the imm16 value
955 switch (op) {
956 case LIR_addxovi:
957 case LIR_addjovi:
958 // add with overflow result into $at
959 // overflow is indicated by ((sign(rr)^sign(ra)) & (sign(rr)^sign(rhsc))
961 // [move $t,$ra] if (rr==ra)
962 // addiu $rr,$ra,rhsc
963 // [xor $at,$rr,$ra] if (rr!=ra)
964 // [xor $at,$rr,$t] if (rr==ra)
965 // [not $t,$rr] if (rhsc < 0)
966 // [and $at,$at,$t] if (rhsc < 0)
967 // [and $at,$at,$rr] if (rhsc >= 0)
968 // srl $at,$at,31
970 t = registerAllocTmp(allow);
971 SRL(AT, AT, 31);
972 if (rhsc < 0) {
973 AND(AT, AT, t);
974 NOT(t, rr);
976 else
977 AND(AT, AT, rr);
978 if (rr == ra)
979 XOR(AT, rr, t);
980 else
981 XOR(AT, rr, ra);
982 ADDIU(rr, ra, rhsc);
983 if (rr == ra)
984 MOVE(t, ra);
985 goto done;
986 case LIR_addi:
987 ADDIU(rr, ra, rhsc);
988 goto done;
989 case LIR_subxovi:
990 case LIR_subjovi:
991 // subtract with overflow result into $at
992 // overflow is indicated by (sign(ra)^sign(rhsc)) & (sign(rr)^sign(ra))
994 // [move $t,$ra] if (rr==ra)
995 // addiu $rr,$ra,-rhsc
996 // [xor $at,$rr,$ra] if (rr!=ra)
997 // [xor $at,$rr,$t] if (rr==ra)
998 // [and $at,$at,$ra] if (rhsc >= 0 && rr!=ra)
999 // [and $at,$at,$t] if (rhsc >= 0 && rr==ra)
1000 // [not $t,$ra] if (rhsc < 0 && rr!=ra)
1001 // [not $t,$t] if (rhsc < 0 && rr==ra)
1002 // [and $at,$at,$t] if (rhsc < 0)
1003 // srl $at,$at,31
1004 if (isS16(-rhsc)) {
1005 t = registerAllocTmp(allow);
1006 SRL(AT,AT,31);
1007 if (rhsc < 0) {
1008 AND(AT, AT, t);
1009 if (rr == ra)
1010 NOT(t, t);
1011 else
1012 NOT(t, ra);
1014 else {
1015 if (rr == ra)
1016 AND(AT, AT, t);
1017 else
1018 AND(AT, AT, ra);
1020 if (rr == ra)
1021 XOR(AT, rr, t);
1022 else
1023 XOR(AT, rr, ra);
1024 ADDIU(rr, ra, -rhsc);
1025 if (rr == ra)
1026 MOVE(t, ra);
1027 goto done;
1029 break;
1030 case LIR_subi:
1031 if (isS16(-rhsc)) {
1032 ADDIU(rr, ra, -rhsc);
1033 goto done;
1035 break;
1036 case LIR_mulxovi:
1037 case LIR_muljovi:
1038 case LIR_muli:
1039 // FIXME: optimise constant multiply by 2^n
1040 // if ((rhsc & (rhsc-1)) == 0)
1041 // SLL(rr, ra, ffs(rhsc)-1);
1042 //goto done;
1043 break;
1044 default:
1045 break;
1048 if (isU16(rhsc)) {
1049 // MIPS logical immediate zero-extend the imm16 value
1050 switch (op) {
1051 case LIR_ori:
1052 ORI(rr, ra, rhsc);
1053 goto done;
1054 case LIR_andi:
1055 ANDI(rr, ra, rhsc);
1056 goto done;
1057 case LIR_xori:
1058 XORI(rr, ra, rhsc);
1059 goto done;
1060 default:
1061 break;
1065 // LIR shift ops only use last 5bits of shift const
1066 switch (op) {
1067 case LIR_lshi:
1068 SLL(rr, ra, rhsc&31);
1069 goto done;
1070 case LIR_rshui:
1071 SRL(rr, ra, rhsc&31);
1072 goto done;
1073 case LIR_rshi:
1074 SRA(rr, ra, rhsc&31);
1075 goto done;
1076 default:
1077 break;
1081 // general case, put rhs in register
1082 rb = (rhs == lhs) ? ra : findRegFor(rhs, allow);
1083 NanoAssert(deprecated_isKnownReg(rb));
1084 allow &= ~rmask(rb);
1086 // The register allocator will have set up one of these 4 cases
1087 // rr==ra && ra==rb r0 = r0 op r0
1088 // rr==ra && ra!=rb r0 = r0 op r1
1089 // rr!=ra && ra==rb r0 = r1 op r1
1090 // rr!=ra && ra!=rb && rr!=rb r0 = r1 op r2
1091 NanoAssert(ra == rb || rr != rb);
1093 switch (op) {
1094 case LIR_addxovi:
1095 case LIR_addjovi:
1096 // add with overflow result into $at
1097 // overflow is indicated by (sign(rr)^sign(ra)) & (sign(rr)^sign(rb))
1099 // [move $t,$ra] if (rr==ra)
1100 // addu $rr,$ra,$rb
1101 // ; Generate sign($rr)^sign($ra)
1102 // [xor $at,$rr,$t] sign($at)=sign($rr)^sign($t) if (rr==ra)
1103 // [xor $at,$rr,$ra] sign($at)=sign($rr)^sign($ra) if (rr!=ra)
1104 // ; Generate sign($rr)^sign($rb) if $ra!=$rb
1105 // [xor $t,$rr,$rb] if (ra!=rb)
1106 // [and $at,$t] if (ra!=rb)
1107 // srl $at,31
1109 t = ZERO;
1110 if (rr == ra || ra != rb)
1111 t = registerAllocTmp(allow);
1112 SRL(AT, AT, 31);
1113 if (ra != rb) {
1114 AND(AT, AT, t);
1115 XOR(t, rr, rb);
1117 if (rr == ra)
1118 XOR(AT, rr, t);
1119 else
1120 XOR(AT, rr, ra);
1121 ADDU(rr, ra, rb);
1122 if (rr == ra)
1123 MOVE(t, ra);
1124 break;
1125 case LIR_addi:
1126 ADDU(rr, ra, rb);
1127 break;
1128 case LIR_andi:
1129 AND(rr, ra, rb);
1130 break;
1131 case LIR_ori:
1132 OR(rr, ra, rb);
1133 break;
1134 case LIR_xori:
1135 XOR(rr, ra, rb);
1136 break;
1137 case LIR_subxovi:
1138 case LIR_subjovi:
1139 // subtract with overflow result into $at
1140 // overflow is indicated by (sign(ra)^sign(rb)) & (sign(rr)^sign(ra))
1142 // [move $t,$ra] if (rr==ra)
1143 // ; Generate sign($at)=sign($ra)^sign($rb)
1144 // xor $at,$ra,$rb
1145 // subu $rr,$ra,$rb
1146 // ; Generate sign($t)=sign($rr)^sign($ra)
1147 // [xor $t,$rr,$ra] if (rr!=ra)
1148 // [xor $t,$rr,$t] if (rr==ra)
1149 // and $at,$at,$t
1150 // srl $at,$at,31
1152 if (ra == rb) {
1153 // special case for (ra == rb) which can't overflow
1154 MOVE(AT, ZERO);
1155 SUBU(rr, ra, rb);
1157 else {
1158 t = registerAllocTmp(allow);
1159 SRL(AT, AT, 31);
1160 AND(AT, AT, t);
1161 if (rr == ra)
1162 XOR(t, rr, t);
1163 else
1164 XOR(t, rr, ra);
1165 SUBU(rr, ra, rb);
1166 XOR(AT, ra, rb);
1167 if (rr == ra)
1168 MOVE(t, ra);
1170 break;
1171 case LIR_subi:
1172 SUBU(rr, ra, rb);
1173 break;
1174 case LIR_lshi:
1175 // SLLV uses the low-order 5 bits of rb for the shift amount so no masking required
1176 SLLV(rr, ra, rb);
1177 break;
1178 case LIR_rshi:
1179 // SRAV uses the low-order 5 bits of rb for the shift amount so no masking required
1180 SRAV(rr, ra, rb);
1181 break;
1182 case LIR_rshui:
1183 // SRLV uses the low-order 5 bits of rb for the shift amount so no masking required
1184 SRLV(rr, ra, rb);
1185 break;
1186 case LIR_mulxovi:
1187 case LIR_muljovi:
1188 t = registerAllocTmp(allow);
1189 // Overflow indication required
1190 // Do a 32x32 signed multiply generating a 64 bit result
1191 // Compare bit31 of the result with the high order bits
1192 // mult $ra,$rb
1193 // mflo $rr # result to $rr
1194 // sra $t,$rr,31 # $t = 0x00000000 or 0xffffffff
1195 // mfhi $at
1196 // xor $at,$at,$t # sets $at to nonzero if overflow
1197 XOR(AT, AT, t);
1198 MFHI(AT);
1199 SRA(t, rr, 31);
1200 MFLO(rr);
1201 MULT(ra, rb);
1202 break;
1203 case LIR_muli:
1204 MUL(rr, ra, rb);
1205 break;
1206 default:
1207 BADOPCODE(op);
1209 done:
1210 TAG("asm_arith(ins=%p{%s})", ins, lirNames[ins->opcode()]);
1213 void Assembler::asm_store64(LOpcode op, LIns *value, int dr, LIns *base)
1215 // NanoAssert((dr & 7) == 0);
1216 #if NANOJIT_64BIT
1217 NanoAssert (op == LIR_stq || op == LIR_std2f || op == LIR_std);
1218 #else
1219 NanoAssert (op == LIR_std2f || op == LIR_std);
1220 #endif
1222 switch (op) {
1223 case LIR_std:
1224 if (cpu_has_fpu) {
1225 Register rbase = findRegFor(base, GpRegs);
1227 if (value->isImmD())
1228 asm_store_imm64(value, dr, rbase);
1229 else {
1230 Register fr = findRegFor(value, FpRegs);
1231 asm_ldst64(true, fr, dr, rbase);
1234 else {
1235 Register rbase = findRegFor(base, GpRegs);
1236 // *(uint64_t*)(rb+dr) = *(uint64_t*)(FP+da)
1238 int ds = findMemFor(value);
1240 // lw $at,ds(FP)
1241 // sw $at,dr($rbase)
1242 // lw $at,ds+4(FP)
1243 // sw $at,dr+4($rbase)
1244 SW(AT, dr+4, rbase);
1245 LW(AT, ds+4, FP);
1246 SW(AT, dr, rbase);
1247 LW(AT, ds, FP);
1250 break;
1251 case LIR_std2f:
1252 NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
1253 return;
1254 default:
1255 BADOPCODE(op);
1256 return;
1259 TAG("asm_store64(value=%p{%s}, dr=%d, base=%p{%s})",
1260 value, lirNames[value->opcode()], dr, base, lirNames[base->opcode()]);
1263 bool Assembler::canRemat(LIns* ins)
1265 return ins->isImmI() || ins->isop(LIR_allocp);
1268 void Assembler::asm_restore(LIns *i, Register r)
1270 int d;
1271 if (i->isop(LIR_allocp)) {
1272 d = deprecated_disp(i);
1273 if (isS16(d))
1274 ADDIU(r, FP, d);
1275 else {
1276 ADDU(r, FP, AT);
1277 asm_li(AT, d);
1280 else if (i->isImmI()) {
1281 asm_li(r, i->immI());
1283 else {
1284 d = findMemFor(i);
1285 if (IsFpReg(r)) {
1286 asm_ldst64(false, r, d, FP);
1288 else {
1289 asm_ldst(OP_LW, r, d, FP);
1292 TAG("asm_restore(i=%p{%s}, r=%d)", i, lirNames[i->opcode()], r);
1295 void Assembler::asm_cmp(LOpcode condop, LIns *a, LIns *b, Register cr)
1297 RegisterMask allow = isCmpDOpcode(condop) ? FpRegs : GpRegs;
1298 Register ra = findRegFor(a, allow);
1299 Register rb = (b==a) ? ra : findRegFor(b, allow & ~rmask(ra));
1301 // FIXME: Use slti if b is small constant
1303 /* Generate the condition code */
1304 switch (condop) {
1305 case LIR_eqi:
1306 SLTIU(cr,cr,1);
1307 XOR(cr,ra,rb);
1308 break;
1309 case LIR_lti:
1310 SLT(cr,ra,rb);
1311 break;
1312 case LIR_gti:
1313 SLT(cr,rb,ra);
1314 break;
1315 case LIR_lei:
1316 XORI(cr,cr,1);
1317 SLT(cr,rb,ra);
1318 break;
1319 case LIR_gei:
1320 XORI(cr,cr,1);
1321 SLT(cr,ra,rb);
1322 break;
1323 case LIR_ltui:
1324 SLTU(cr,ra,rb);
1325 break;
1326 case LIR_gtui:
1327 SLTU(cr,rb,ra);
1328 break;
1329 case LIR_leui:
1330 XORI(cr,cr,1);
1331 SLTU(cr,rb,ra);
1332 break;
1333 case LIR_geui:
1334 XORI(cr,cr,1);
1335 SLTU(cr,ra,rb);
1336 break;
1337 case LIR_eqd:
1338 C_EQ_D(ra,rb);
1339 break;
1340 case LIR_ltd:
1341 C_LT_D(ra,rb);
1342 break;
1343 case LIR_gtd:
1344 C_LT_D(rb,ra);
1345 break;
1346 case LIR_led:
1347 C_LE_D(ra,rb);
1348 break;
1349 case LIR_ged:
1350 C_LE_D(rb,ra);
1351 break;
1352 default:
1353 debug_only(outputf("%s",lirNames[condop]);)
1354 TODO(asm_cond);
1358 #define SEG(addr) (uint32_t(addr) & 0xf0000000)
1359 #define SEGOFFS(addr) (uint32_t(addr) & 0x0fffffff)
1362 // Check that the branch target is in range
1363 // Generate a trampoline if it isn't
1364 // Emits the branch delay slot instruction
1365 NIns* Assembler::asm_branchtarget(NIns * const targ)
1367 bool inrange;
1368 NIns *btarg = targ;
1370 // do initial underrun check here to ensure that inrange test is correct
1371 // allow
1372 if (targ)
1373 underrunProtect(2 * 4); // branch + delay slot
1375 // MIPS offsets are based on the address of the branch delay slot
1376 // which is the next instruction that will be generated
1377 ptrdiff_t bd = BOFFSET(targ-1);
1379 #if PEDANTIC
1380 inrange = false;
1381 #else
1382 inrange = (targ && isS16(bd));
1383 #endif
1385 // If the branch target is known and in range we can just generate a branch
1386 // Otherwise generate a branch to a trampoline that will be stored in the
1387 // literal area
1388 if (inrange)
1389 NOP();
1390 else {
1391 NIns *tramp = _nSlot;
1392 if (targ) {
1393 // Can the target be reached by a jump instruction?
1394 if (SEG(targ) == SEG(tramp)) {
1395 // [linkedinstructions]
1396 // bxxx trampoline
1397 // nop
1398 // ...
1399 // trampoline:
1400 // j targ
1401 // nop
1403 underrunProtect(4 * 4); // keep bxx and trampoline together
1405 NOP(); // delay slot
1407 // NB trampoline code is emitted in the correct order
1408 trampJ(targ);
1409 trampNOP(); // trampoline delay slot
1412 else {
1413 // [linkedinstructions]
1414 // bxxx trampoline
1415 // lui $at,%hi(targ)
1416 // ...
1417 // trampoline:
1418 // addiu $at,%lo(targ)
1419 // jr $at
1420 // nop
1422 underrunProtect(5 * 4); // keep bxx and trampoline together
1424 LUI(AT,hi(uint32_t(targ))); // delay slot
1426 // NB trampoline code is emitted in the correct order
1427 trampADDIU(AT, AT, lo(uint32_t(targ)));
1428 trampJR(AT);
1429 trampNOP(); // trampoline delay slot
1433 else {
1434 // Worst case is bxxx,lui addiu;jr;nop as above
1435 // Best case is branch to trampoline can be replaced
1436 // with branch to target in which case the trampoline will be abandoned
1437 // Fixup handled in nPatchBranch
1439 underrunProtect(5 * 4); // keep bxx and trampoline together
1441 NOP(); // delay slot
1443 trampNOP();
1444 trampNOP();
1445 trampNOP();
1448 btarg = tramp;
1451 return btarg;
1455 NIns* Assembler::asm_bxx(bool branchOnFalse, LOpcode condop, Register ra, Register rb, NIns * const targ)
1457 NIns *patch = NULL;
1458 NIns *btarg = asm_branchtarget(targ);
1460 if (cpu_has_fpu && isCmpDOpcode(condop)) {
1461 // c.xx.d $ra,$rb
1462 // bc1x btarg
1463 switch (condop) {
1464 case LIR_eqd:
1465 if (branchOnFalse)
1466 BC1F(btarg);
1467 else
1468 BC1T(btarg);
1469 patch = _nIns;
1470 if (cpu_has_fpuhazard)
1471 NOP();
1472 C_EQ_D(ra, rb);
1473 break;
1474 case LIR_ltd:
1475 if (branchOnFalse)
1476 BC1F(btarg);
1477 else
1478 BC1T(btarg);
1479 patch = _nIns;
1480 if (cpu_has_fpuhazard)
1481 NOP();
1482 C_LT_D(ra, rb);
1483 break;
1484 case LIR_gtd:
1485 if (branchOnFalse)
1486 BC1F(btarg);
1487 else
1488 BC1T(btarg);
1489 patch = _nIns;
1490 if (cpu_has_fpuhazard)
1491 NOP();
1492 C_LT_D(rb, ra);
1493 break;
1494 case LIR_led:
1495 if (branchOnFalse)
1496 BC1F(btarg);
1497 else
1498 BC1T(btarg);
1499 patch = _nIns;
1500 if (cpu_has_fpuhazard)
1501 NOP();
1502 C_LE_D(ra, rb);
1503 break;
1504 case LIR_ged:
1505 if (branchOnFalse)
1506 BC1F(btarg);
1507 else
1508 BC1T(btarg);
1509 patch = _nIns;
1510 if (cpu_has_fpuhazard)
1511 NOP();
1512 C_LE_D(rb, ra);
1513 break;
1514 default:
1515 BADOPCODE(condop);
1516 break;
1519 else {
1520 // general case
1521 // s[lg]tu? $at,($ra,$rb|$rb,$ra)
1522 // b(ne|eq)z $at,btarg
1523 switch (condop) {
1524 case LIR_eqi:
1525 // special case
1526 // b(ne|eq) $ra,$rb,btarg
1527 if (branchOnFalse)
1528 BNE(ra, rb, btarg);
1529 else {
1530 if (ra == rb)
1531 B(btarg);
1532 else
1533 BEQ(ra, rb, btarg);
1535 patch = _nIns;
1536 break;
1537 case LIR_lti:
1538 if (branchOnFalse)
1539 BEQ(AT, ZERO, btarg);
1540 else
1541 BNE(AT, ZERO, btarg);
1542 patch = _nIns;
1543 SLT(AT, ra, rb);
1544 break;
1545 case LIR_gti:
1546 if (branchOnFalse)
1547 BEQ(AT, ZERO, btarg);
1548 else
1549 BNE(AT, ZERO, btarg);
1550 patch = _nIns;
1551 SLT(AT, rb, ra);
1552 break;
1553 case LIR_lei:
1554 if (branchOnFalse)
1555 BNE(AT, ZERO, btarg);
1556 else
1557 BEQ(AT, ZERO, btarg);
1558 patch = _nIns;
1559 SLT(AT, rb, ra);
1560 break;
1561 case LIR_gei:
1562 if (branchOnFalse)
1563 BNE(AT, ZERO, btarg);
1564 else
1565 BEQ(AT, ZERO, btarg);
1566 patch = _nIns;
1567 SLT(AT, ra, rb);
1568 break;
1569 case LIR_ltui:
1570 if (branchOnFalse)
1571 BEQ(AT, ZERO, btarg);
1572 else
1573 BNE(AT, ZERO, btarg);
1574 patch = _nIns;
1575 SLTU(AT, ra, rb);
1576 break;
1577 case LIR_gtui:
1578 if (branchOnFalse)
1579 BEQ(AT, ZERO, btarg);
1580 else
1581 BNE(AT, ZERO, btarg);
1582 patch = _nIns;
1583 SLTU(AT, rb, ra);
1584 break;
1585 case LIR_leui:
1586 if (branchOnFalse)
1587 BNE(AT, ZERO, btarg);
1588 else
1589 BEQ(AT, ZERO, btarg);
1590 patch = _nIns;
1591 SLT(AT, rb, ra);
1592 break;
1593 case LIR_geui:
1594 if (branchOnFalse)
1595 BNE(AT, ZERO, btarg);
1596 else
1597 BEQ(AT, ZERO, btarg);
1598 patch = _nIns;
1599 SLTU(AT, ra, rb);
1600 break;
1601 default:
1602 BADOPCODE(condop);
1605 TAG("asm_bxx(branchOnFalse=%d, condop=%s, ra=%s rb=%s targ=%p)",
1606 branchOnFalse, lirNames[condop], gpn(ra), gpn(rb), targ);
1607 return patch;
1610 NIns* Assembler::asm_branch_ov(LOpcode op, NIns* target)
1612 USE(op);
1613 NanoAssert(target != NULL);
1615 NIns* patch = asm_bxx(true, LIR_eqi, AT, ZERO, target);
1617 TAG("asm_branch_ov(op=%s, target=%p)", lirNames[op], target);
1618 return patch;
1621 NIns* Assembler::asm_branch(bool branchOnFalse, LIns *cond, NIns * const targ)
1623 NanoAssert(cond->isCmp());
1624 LOpcode condop = cond->opcode();
1625 RegisterMask allow = isCmpDOpcode(condop) ? FpRegs : GpRegs;
1626 LIns *a = cond->oprnd1();
1627 LIns *b = cond->oprnd2();
1628 Register ra = findRegFor(a, allow);
1629 Register rb = (b==a) ? ra : findRegFor(b, allow & ~rmask(ra));
1631 return asm_bxx(branchOnFalse, condop, ra, rb, targ);
1634 void Assembler::asm_j(NIns * const targ, bool bdelay)
1636 if (targ == NULL) {
1637 NanoAssert(bdelay);
1638 (void) asm_bxx(false, LIR_eqi, ZERO, ZERO, targ);
1640 else {
1641 NanoAssert(SEG(targ) == SEG(_nIns));
1642 if (bdelay) {
1643 underrunProtect(2*4); // j + delay
1644 NOP();
1646 J(targ);
1648 TAG("asm_j(targ=%p) bdelay=%d", targ);
1651 void
1652 Assembler::asm_spill(Register rr, int d, bool quad)
1654 USE(quad);
1655 NanoAssert(d);
1656 if (IsFpReg(rr)) {
1657 NanoAssert(quad);
1658 asm_ldst64(true, rr, d, FP);
1660 else {
1661 NanoAssert(!quad);
1662 asm_ldst(OP_SW, rr, d, FP);
1664 TAG("asm_spill(rr=%d, d=%d, quad=%d)", rr, d, quad);
1667 void
1668 Assembler::asm_nongp_copy(Register dst, Register src)
1670 NanoAssert ((rmask(dst) & FpRegs) && (rmask(src) & FpRegs));
1671 MOV_D(dst, src);
1672 TAG("asm_nongp_copy(dst=%d src=%d)", dst, src);
1676 * asm_arg will encode the specified argument according to the current ABI, and
1677 * will update r and stkd as appropriate so that the next argument can be
1678 * encoded.
1680 * - doubles are 64-bit aligned. both in registers and on the stack.
1681 * If the next available argument register is A1, it is skipped
1682 * and the double is placed in A2:A3. If A0:A1 or A2:A3 are not
1683 * available, the double is placed on the stack, 64-bit aligned.
1684 * - 32-bit arguments are placed in registers and 32-bit aligned
1685 * on the stack.
1687 void
1688 Assembler::asm_arg(ArgType ty, LIns* arg, Register& r, Register& fr, int& stkd)
1690 // The stack offset must always be at least aligned to 4 bytes.
1691 NanoAssert((stkd & 3) == 0);
1693 if (ty == ARGTYPE_D) {
1694 // This task is fairly complex and so is delegated to asm_arg_64.
1695 asm_arg_64(arg, r, fr, stkd);
1696 } else {
1697 NanoAssert(ty == ARGTYPE_I || ty == ARGTYPE_UI);
1698 if (stkd < 16) {
1699 asm_regarg(ty, arg, r);
1700 fr = Register(fr + 1);
1701 r = Register(r + 1);
1703 else
1704 asm_stkarg(arg, stkd);
1705 // The o32 ABI calling convention is that if the first arguments
1706 // is not a double, subsequent double values are passed in integer registers
1707 fr = r;
1708 stkd += 4;
1712 void
1713 Assembler::asm_call(LIns* ins)
1715 Register rr;
1716 LOpcode op = ins->opcode();
1718 switch (op) {
1719 case LIR_calld:
1720 NanoAssert(cpu_has_fpu);
1721 rr = FV0;
1722 break;
1723 case LIR_calli:
1724 rr = retRegs[0];
1725 break;
1726 default:
1727 BADOPCODE(op);
1728 return;
1731 deprecated_prepResultReg(ins, rmask(rr));
1733 // Do this after we've handled the call result, so we don't
1734 // force the call result to be spilled unnecessarily.
1736 evictScratchRegsExcept(0);
1738 const CallInfo* ci = ins->callInfo();
1739 ArgType argTypes[MAXARGS];
1740 uint32_t argc = ci->getArgTypes(argTypes);
1741 bool indirect = ci->isIndirect();
1743 // FIXME: Put one of the argument moves into the BDS slot
1745 underrunProtect(2*4); // jalr+delay
1746 NOP();
1747 JALR(T9);
1749 if (!indirect)
1750 // FIXME: If we can tell that we are calling non-PIC
1751 // (ie JIT) code, we could call direct instead of using t9
1752 asm_li(T9, ci->_address);
1753 else
1754 // Indirect call: we assign the address arg to t9
1755 // which matches the o32 ABI for calling functions
1756 asm_regarg(ARGTYPE_P, ins->arg(--argc), T9);
1758 // Encode the arguments, starting at A0 and with an empty argument stack.
1759 Register r = A0, fr = FA0;
1760 int stkd = 0;
1762 // Iterate through the argument list and encode each argument according to
1763 // the ABI.
1764 // Note that we loop through the arguments backwards as LIR specifies them
1765 // in reverse order.
1766 while(argc--)
1767 asm_arg(argTypes[argc], ins->arg(argc), r, fr, stkd);
1769 if (stkd > max_out_args)
1770 max_out_args = stkd;
1771 TAG("asm_call(ins=%p{%s})", ins, lirNames[ins->opcode()]);
1774 Register
1775 Assembler::nRegisterAllocFromSet(RegisterMask set)
1777 Register i;
1778 int n;
1780 // note, deliberate truncation of 64->32 bits
1781 if (set & 0xffffffff) {
1782 // gp reg
1783 n = ffs(int(set));
1784 NanoAssert(n != 0);
1785 i = Register(n - 1);
1787 else {
1788 // fp reg
1789 NanoAssert(cpu_has_fpu);
1790 n = ffs(int(set >> 32));
1791 NanoAssert(n != 0);
1792 i = Register(32 + n - 1);
1794 _allocator.free &= ~rmask(i);
1795 TAG("nRegisterAllocFromSet(set=%016llx) => %s", set, gpn(i));
1796 return i;
1799 void
1800 Assembler::nRegisterResetAll(RegAlloc& regs)
1802 regs.clear();
1803 regs.free = GpRegs;
1804 if (cpu_has_fpu)
1805 regs.free |= FpRegs;
1808 #define signextend16(s) ((int32_t(s)<<16)>>16)
1810 void
1811 Assembler::nPatchBranch(NIns* branch, NIns* target)
1813 uint32_t op = (branch[0] >> 26) & 0x3f;
1814 uint32_t bdoffset = target-(branch+1);
1816 if (op == OP_BEQ || op == OP_BNE ||
1817 ((branch[0] & 0xfffe0000) == ((OP_COP1 << 26) | (COP1_BC << 21)))) {
1818 if (isS16(bdoffset)) {
1819 // The branch is in range, so just replace the offset in the instruction
1820 // The trampoline that was allocated is redundant and will remain unused
1821 branch[0] = (branch[0] & 0xffff0000) | (bdoffset & 0xffff);
1823 else {
1824 // The branch is pointing to a trampoline. Find out where that is
1825 NIns *tramp = branch + 1 + (signextend16(branch[0] & 0xffff));
1826 if (SEG(branch) == SEG(target)) {
1827 *tramp = J_FORMAT(OP_J,JINDEX(target));
1829 else {
1830 // Full 32-bit jump
1831 // bxx tramp
1832 // lui $at,(target>>16)>0xffff
1833 // ..
1834 // tramp:
1835 // ori $at,target & 0xffff
1836 // jr $at
1837 // nop
1838 branch[1] = U_FORMAT(OP_LUI,0,AT,hi(uint32_t(target)));
1839 tramp[0] = U_FORMAT(OP_ADDIU,AT,AT,lo(uint32_t(target)));
1840 tramp[1] = R_FORMAT(OP_SPECIAL,AT,0,0,0,SPECIAL_JR);
1844 else if (op == OP_J) {
1845 NanoAssert (SEG(branch) == SEG(target));
1846 branch[0] = J_FORMAT(OP_J,JINDEX(target));
1848 else
1849 TODO(unknown_patch);
1850 // TAG("nPatchBranch(branch=%p target=%p)", branch, target);
1853 void
1854 Assembler::nFragExit(LIns *guard)
1856 SideExit *exit = guard->record()->exit;
1857 Fragment *frag = exit->target;
1858 bool destKnown = (frag && frag->fragEntry);
1860 // Generate jump to epilogue and initialize lr.
1862 // If the guard already exists, use a simple jump.
1863 if (destKnown) {
1864 // j _fragEntry
1865 // move $v0,$zero
1866 underrunProtect(2 * 4); // j + branch delay
1867 MOVE(V0, ZERO);
1868 asm_j(frag->fragEntry, false);
1870 else {
1871 // Target doesn't exist. Jump to an epilogue for now.
1872 // This can be patched later.
1873 if (!_epilogue)
1874 _epilogue = genEpilogue();
1875 GuardRecord *lr = guard->record();
1876 // FIXME: _epilogue may be in another segment
1877 // lui $v0,%hi(lr)
1878 // j _epilogue
1879 // addiu $v0,%lo(lr)
1880 underrunProtect(2 * 4); // j + branch delay
1881 ADDIU(V0, V0, lo(int32_t(lr)));
1882 asm_j(_epilogue, false);
1883 LUI(V0, hi(int32_t(lr)));
1884 lr->jmp = _nIns;
1887 // profiling for the exit
1888 verbose_only(
1889 if (_logc->lcbits & LC_FragProfile) {
1890 // lui $fp,%hi(profCount)
1891 // lw $at,%lo(profCount)(fp)
1892 // addiu $at,1
1893 // sw $at,%lo(profCount)(fp)
1894 uint32_t profCount = uint32_t(&guard->record()->profCount);
1895 SW(AT, lo(profCount), FP);
1896 ADDIU(AT, AT, 1);
1897 LW(AT, lo(profCount), FP);
1898 LUI(FP, hi(profCount));
1902 // Pop the stack frame.
1903 MOVE(SP, FP);
1905 // return value is GuardRecord*
1906 TAG("nFragExit(guard=%p{%s})", guard, lirNames[guard->opcode()]);
1909 void
1910 Assembler::nInit(AvmCore*)
1912 nHints[LIR_calli] = rmask(V0);
1913 #if NJ_SOFTFLOAT_SUPPORTED
1914 nHints[LIR_hcalli] = rmask(V1);
1915 #endif
1916 nHints[LIR_calld] = rmask(FV0);
1917 nHints[LIR_paramp] = PREFER_SPECIAL;
1920 void Assembler::nBeginAssembly()
1922 max_out_args = 16; // Always reserve space for a0-a3
1925 // Increment the 32-bit profiling counter at pCtr, without
1926 // changing any registers.
1927 verbose_only(
1928 void Assembler::asm_inc_m32(uint32_t* /*pCtr*/)
1930 // TODO: implement this
1934 void
1935 Assembler::nativePageReset(void)
1937 _nSlot = 0;
1938 _nExitSlot = 0;
1939 TAG("nativePageReset()");
1942 void
1943 Assembler::nativePageSetup(void)
1945 NanoAssert(!_inExit);
1946 if (!_nIns)
1947 codeAlloc(codeStart, codeEnd, _nIns verbose_only(, codeBytes));
1948 if (!_nExitIns)
1949 codeAlloc(exitStart, exitEnd, _nExitIns verbose_only(, exitBytes));
1951 // constpool starts at bottom of page and moves up
1952 // code starts at top of page and goes down,
1954 if (!_nSlot)
1955 _nSlot = codeStart;
1956 if (!_nExitSlot)
1957 _nExitSlot = exitStart;
1959 TAG("nativePageSetup()");
1963 NIns*
1964 Assembler::genPrologue(void)
1967 * Use a non standard fp because we don't know the final framesize until now
1968 * addiu $sp,-FRAMESIZE
1969 * sw $ra,RA_OFFSET($sp)
1970 * sw $fp,FP_OFFSET($sp)
1971 * move $fp,$sp
1972 * addu $sp,-stackNeeded
1975 uint32_t stackNeeded = max_out_args + STACK_GRANULARITY * _activation.stackSlotsNeeded();
1976 uint32_t amt = alignUp(stackNeeded, NJ_ALIGN_STACK);
1978 if (amt) {
1979 if (isS16(-amt))
1980 ADDIU(SP, SP, -amt);
1981 else {
1982 ADDU(SP, SP, AT);
1983 asm_li(AT, -amt);
1987 NIns *patchEntry = _nIns; // FIXME: who uses this value and where should it point?
1989 MOVE(FP, SP);
1990 SW(FP, FP_OFFSET, SP);
1991 SW(RA, RA_OFFSET, SP); // No need to save for leaf functions
1992 ADDIU(SP, SP, -FRAMESIZE);
1994 TAG("genPrologue()");
1996 return patchEntry;
1999 NIns*
2000 Assembler::genEpilogue(void)
2003 * move $sp,$fp
2004 * lw $ra,RA_OFFSET($sp)
2005 * lw $fp,FP_OFFSET($sp)
2006 * j $ra
2007 * addiu $sp,FRAMESIZE
2009 ADDIU(SP, SP, FRAMESIZE);
2010 JR(RA);
2011 LW(FP, FP_OFFSET, SP);
2012 LW(RA, RA_OFFSET, SP);
2013 MOVE(SP, FP);
2015 TAG("genEpilogue()");
2017 return _nIns;
2020 RegisterMask
2021 Assembler::nHint(LIns* ins)
2023 NanoAssert(ins->isop(LIR_paramp));
2024 RegisterMask prefer = 0;
2025 // FIXME: FLOAT parameters?
2026 if (ins->paramKind() == 0)
2027 if (ins->paramArg() < 4)
2028 prefer = rmask(argRegs[ins->paramArg()]);
2029 return prefer;
2032 void
2033 Assembler::underrunProtect(int bytes)
2035 NanoAssertMsg(bytes<=LARGEST_UNDERRUN_PROT, "constant LARGEST_UNDERRUN_PROT is too small");
2036 NanoAssert(_nSlot != 0);
2037 uintptr_t top = uintptr_t(_nSlot);
2038 uintptr_t pc = uintptr_t(_nIns);
2039 if (pc - bytes < top) {
2040 verbose_only(verbose_outputf(" %p:", _nIns);)
2041 NIns* target = _nIns;
2042 codeAlloc(codeStart, codeEnd, _nIns verbose_only(, codeBytes));
2044 _nSlot = codeStart;
2046 // _nSlot points to the first empty position in the new code block
2047 // _nIns points just past the last empty position.
2048 asm_j(target, true);
2052 void
2053 Assembler::swapCodeChunks() {
2054 if (!_nExitIns)
2055 codeAlloc(exitStart, exitEnd, _nExitIns verbose_only(, exitBytes));
2056 if (!_nExitSlot)
2057 _nExitSlot = exitStart;
2058 SWAP(NIns*, _nIns, _nExitIns);
2059 SWAP(NIns*, _nSlot, _nExitSlot);
2060 SWAP(NIns*, codeStart, exitStart);
2061 SWAP(NIns*, codeEnd, exitEnd);
2062 verbose_only( SWAP(size_t, codeBytes, exitBytes); )
2066 #endif // FEATURE_NANOJIT && NANOJIT_MIPS