nanojit/NativeMIPS.cpp

   1 /* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */
   2 /* vi: set ts=4 sw=4 expandtab: (add to ~/.vimrc: set modeline modelines=5) */
   3 /* ***** BEGIN LICENSE BLOCK *****
   4  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
   5  *
   6  * The contents of this file are subject to the Mozilla Public License Version
   7  * 1.1 (the "License"); you may not use this file except in compliance with
   8  * the License. You may obtain a copy of the License at
   9  * http://www.mozilla.org/MPL/
  10  *
  11  * Software distributed under the License is distributed on an "AS IS" basis,
  12  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  13  * for the specific language governing rights and limitations under the
  14  * License.
  15  *
  16  * The Original Code is [Open Source Virtual Machine].
  17  *
  18  * The Initial Developer of the Original Code is
  19  * MIPS Technologies Inc
  20  * Portions created by the Initial Developer are Copyright (C) 2009
  21  * the Initial Developer. All Rights Reserved.
  22  *
  23  * Contributor(s):
  24  *   Chris Dearman <chris@mips.com>
  25  *
  26  * Alternatively, the contents of this file may be used under the terms of
  27  * either the GNU General Public License Version 2 or later (the "GPL"), or
  28  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  29  * in which case the provisions of the GPL or the LGPL are applicable instead
  30  * of those above. If you wish to allow use of your version of this file only
  31  * under the terms of either the GPL or the LGPL, and not to allow others to
  32  * use your version of this file under the terms of the MPL, indicate your
  33  * decision by deleting the provisions above and replace them with the notice
  34  * and other provisions required by the GPL or the LGPL. If you do not delete
  35  * the provisions above, a recipient may use your version of this file under
  36  * the terms of any one of the MPL, the GPL or the LGPL.
  37  *
  38  * ***** END LICENSE BLOCK ***** */
  39
  40 #include "nanojit.h"
  41
  42 #if defined FEATURE_NANOJIT && defined NANOJIT_MIPS
  43
  44 namespace nanojit
  45 {
  46 #ifdef NJ_VERBOSE
  47     const char *regNames[] = {
  48         "$zr", "$at", "$v0", "$v1", "$a0", "$a1", "$a2", "$a3",
  49         "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7",
  50         "$s0", "$s1", "$s2", "$s3", "$s4", "$s5", "$s6", "$s7",
  51         "$t8", "$t9", "$k0", "$k1", "$gp", "$sp", "$fp", "$ra",
  52
  53         "$f0",  "$f1",  "$f2",  "$f3",  "$f4",  "$f5",  "$f6",  "$f7",
  54         "$f8",  "$f9",  "$f10", "$f11", "$f12", "$f13", "$f14", "$f15",
  55         "$f16", "$f17", "$f18", "$f19", "$f20", "$f21", "$f22", "$f23",
  56         "$f24", "$f25", "$f26", "$f27", "$f28", "$f29", "$f30", "$f31"
  57     };
  58
  59     const char *cname[16] = {
  60         "f",    "un",   "eq",   "ueq",
  61         "olt",  "ult",  "ole",  "ule",
  62         "sf",   "ngle", "seq",  "ngl",
  63         "lt",   "nge",  "le",   "ngt"
  64     };
  65
  66     const char *fname[32] = {
  67         "resv", "resv", "resv", "resv",
  68         "resv", "resv", "resv", "resv",
  69         "resv", "resv", "resv", "resv",
  70         "resv", "resv", "resv", "resv",
  71         "s",    "d",    "resv", "resv",
  72         "w",    "l",    "ps",   "resv",
  73         "resv", "resv", "resv", "resv",
  74         "resv", "resv", "resv", "resv",
  75     };
  76
  77     const char *oname[64] = {
  78         "special", "regimm", "j",    "jal",   "beq",      "bne",  "blez",  "bgtz",
  79         "addi",    "addiu",  "slti", "sltiu", "andi",     "ori",  "xori",  "lui",
  80         "cop0",    "cop1",   "cop2", "cop1x", "beql",     "bnel", "blezl", "bgtzl",
  81         "resv",    "resv",   "resv", "resv",  "special2", "jalx", "resv",  "special3",
  82         "lb",      "lh",     "lwl",  "lw",    "lbu",      "lhu",  "lwr",   "resv",
  83         "sb",      "sh",     "swl",  "sw",    "resv",     "resv", "swr",   "cache",
  84         "ll",      "lwc1",   "lwc2", "pref",  "resv",     "ldc1", "ldc2",  "resv",
  85         "sc",      "swc1",   "swc2", "resv",  "resv",     "sdc1", "sdc2",  "resv",
  86     };
  87 #endif
  88
  89     const Register Assembler::argRegs[] = { A0, A1, A2, A3 };
  90     const Register Assembler::retRegs[] = { V0, V1 };
  91     const Register Assembler::savedRegs[] = {
  92         S0, S1, S2, S3, S4, S5, S6, S7,
  93 #ifdef FPCALLEESAVED
  94         FS0, FS1, FS2, FS3, FS4, FS5
  95 #endif
  96     };
  97
  98 #define USE(x) (void)x
  99 #define BADOPCODE(op) NanoAssertMsgf(false, "unexpected opcode %s", lirNames[op])
 100
 101     // This function will get will get optimised by the compiler into a known value
 102     static inline bool isLittleEndian(void)
 103     {
 104         const union {
 105             uint32_t      ival;
 106             unsigned char cval[4];
 107         } u = { 1 };
 108         return u.cval[0] == 1;
 109     }
 110
 111     // offsets to most/least significant parts of 64bit data in memory
 112     // These functions will get optimised by the compiler into a known value
 113     static inline int mswoff(void) {
 114         return isLittleEndian() ? 4 : 0;
 115     }
 116
 117     static inline int lswoff(void) {
 118         return isLittleEndian() ? 0 : 4;
 119     }
 120
 121     static inline Register mswregpair(Register r) {
 122         return Register(r + (isLittleEndian() ? 1 : 0));
 123     }
 124
 125     static inline Register lswregpair(Register r) {
 126         return Register(r + (isLittleEndian() ? 0 : 1));
 127     }
 128
 129 // These variables affect the code generator
 130 // They can be defined as constants and the compiler will remove
 131 // the unused paths through dead code elimination
 132 // Alternatively they can be defined as variables which will allow
 133 // the exact code generated to be determined at runtime
 134 //
 135 //  cpu_has_fpu        CPU has fpu
 136 //  cpu_has_movn       CPU has movn
 137 //  cpu_has_cmov       CPU has movf/movn instructions
 138 //  cpu_has_lsdc1      CPU has ldc1/sdc1 instructions
 139 //  cpu_has_lsxdc1     CPU has ldxc1/sdxc1 instructions
 140 //  cpu_has_fpuhazard  hazard between c.xx.xx & bc1[tf]
 141 //
 142 // Currently the values are initialised bases on preprocessor definitions
 143
 144 #ifdef DEBUG
 145     // Don't allow the compiler to eliminate dead code for debug builds
 146     #define _CONST
 147 #else
 148     #define _CONST const
 149 #endif
 150
 151 #if NJ_SOFTFLOAT_SUPPORTED
 152     _CONST bool cpu_has_fpu = false;
 153 #else
 154     _CONST bool cpu_has_fpu = true;
 155 #endif
 156
 157 #if (__mips==4 || __mips==32 || __mips==64)
 158     _CONST bool cpu_has_cmov = true;
 159 #else
 160     _CONST bool cpu_has_cmov = false;
 161 #endif
 162
 163 #if __mips != 1
 164     _CONST bool cpu_has_lsdc1 = true;
 165 #else
 166     _CONST bool cpu_has_lsdc1 = false;
 167 #endif
 168
 169 #if (__mips==32 || __mips==64) && __mips_isa_rev>=2
 170     _CONST bool cpu_has_lsdxc1 = true;
 171 #else
 172     _CONST bool cpu_has_lsdxc1 = false;
 173 #endif
 174
 175 #if (__mips==1 || __mips==2 || __mips==3)
 176     _CONST bool cpu_has_fpuhazard = true;
 177 #else
 178     _CONST bool cpu_has_fpuhazard = false;
 179 #endif
 180 #undef _CONST
 181
 182     /* Support routines */
 183
 184     debug_only (
 185                 // break to debugger when generating code to this address
 186                 static NIns *breakAddr;
 187                 static void codegenBreak(NIns *genAddr)
 188                 {
 189                     NanoAssert (breakAddr != genAddr);
 190                 }
 191     )
 192
 193     // Equivalent to assembler %hi(), %lo()
 194     uint16_t hi(uint32_t v)
 195     {
 196         uint16_t r = v >> 16;
 197         if ((int16_t)(v) < 0)
 198             r += 1;
 199         return r;
 200     }
 201
 202     int16_t lo(uint32_t v)
 203     {
 204         int16_t r = v;
 205         return r;
 206     }
 207
 208     void Assembler::asm_li32(Register r, int32_t imm)
 209     {
 210         // general case generating a full 32-bit load
 211         ADDIU(r, r, lo(imm));
 212         LUI(r, hi(imm));
 213     }
 214
 215     void Assembler::asm_li(Register r, int32_t imm)
 216     {
 217 #if !PEDANTIC
 218         if (isU16(imm)) {
 219             ORI(r, ZERO, imm);
 220             return;
 221         }
 222         if (isS16(imm)) {
 223             ADDIU(r, ZERO, imm);
 224             return;
 225         }
 226         if ((imm & 0xffff) == 0) {
 227             LUI(r, uint32_t(imm) >> 16);
 228             return;
 229         }
 230 #endif
 231         asm_li32(r, imm);
 232     }
 233
 234     // 64 bit immediate load to a register pair
 235     void Assembler::asm_li_d(Register r, int32_t msw, int32_t lsw)
 236     {
 237         if (IsFpReg(r)) {
 238             NanoAssert(cpu_has_fpu);
 239             // li   $at,lsw         # iff lsw != 0
 240             // mtc1 $at,$r          # may use $0 instead of $at
 241             // li   $at,msw         # iff (msw != 0) && (msw != lsw)
 242             // mtc1 $at,$(r+1)      # may use $0 instead of $at
 243             if (msw == 0)
 244                 MTC1(ZERO, r+1);
 245             else {
 246                 MTC1(AT, r+1);
 247                 // If the MSW & LSW values are different, reload AT
 248                 if (msw != lsw)
 249                     asm_li(AT, msw);
 250             }
 251             if (lsw == 0)
 252                 MTC1(ZERO, r);
 253             else {
 254                 MTC1(AT, r);
 255                 asm_li(AT, lsw);
 256             }
 257         }
 258         else {
 259             /*
 260              * li $r.lo, lsw
 261              * li $r.hi, msw   # will be converted to move $f.hi,$f.lo if (msw==lsw)
 262              */
 263             if (msw == lsw)
 264                 MOVE(mswregpair(r), lswregpair(r));
 265             else
 266                 asm_li(mswregpair(r), msw);
 267             asm_li(lswregpair(r), lsw);
 268         }
 269     }
 270
 271     void Assembler::asm_move(Register d, Register s)
 272     {
 273         MOVE(d, s);
 274     }
 275
 276     // General load/store operation
 277     void Assembler::asm_ldst(int op, Register rt, int dr, Register rbase)
 278     {
 279 #if !PEDANTIC
 280         if (isS16(dr)) {
 281             LDST(op, rt, dr, rbase);
 282             return;
 283         }
 284 #endif
 285
 286         // lui AT,hi(d)
 287         // addu AT,rbase
 288         // ldst rt,lo(d)(AT)
 289         LDST(op, rt, lo(dr), AT);
 290         ADDU(AT, AT, rbase);
 291         LUI(AT, hi(dr));
 292     }
 293
 294     void Assembler::asm_ldst64(bool store, Register r, int dr, Register rbase)
 295     {
 296 #if !PEDANTIC
 297         if (isS16(dr) && isS16(dr+4)) {
 298             if (IsGpReg(r)) {
 299                 LDST(store ? OP_SW : OP_LW, r+1, dr+4, rbase);
 300                 LDST(store ? OP_SW : OP_LW, r,   dr, rbase);
 301             }
 302             else {
 303                 NanoAssert(cpu_has_fpu);
 304                 // NanoAssert((dr & 7) == 0);
 305                 if (cpu_has_lsdc1 && ((dr & 7) == 0)) {
 306                     // lsdc1 $fr,dr($rbase)
 307                     LDST(store ? OP_SDC1 : OP_LDC1, r, dr, rbase);
 308                 }
 309                 else {
 310                     // lswc1 $fr,  dr+LSWOFF($rbase)
 311                     // lswc1 $fr+1,dr+MSWOFF($rbase)
 312                     LDST(store ? OP_SWC1 : OP_LWC1, r+1, dr+mswoff(), rbase);
 313                     LDST(store ? OP_SWC1 : OP_LWC1, r,   dr+lswoff(), rbase);
 314                 }
 315                 return;
 316             }
 317         }
 318 #endif
 319
 320         if (IsGpReg(r)) {
 321             // lui   $at,%hi(d)
 322             // addu  $at,$rbase
 323             // ldsw  $r,  %lo(d)($at)
 324             // ldst  $r+1,%lo(d+4)($at)
 325             LDST(store ? OP_SW : OP_LW, r+1, lo(dr+4), AT);
 326             LDST(store ? OP_SW : OP_LW, r,   lo(dr), AT);
 327             ADDU(AT, AT, rbase);
 328             LUI(AT, hi(dr));
 329         }
 330         else {
 331             NanoAssert(cpu_has_fpu);
 332             if (cpu_has_lsdxc1) {
 333                 // li     $at,dr
 334                 // lsdcx1 $r,$at($rbase)
 335                 if (store)
 336                     SDXC1(r, AT, rbase);
 337                 else
 338                     LDXC1(r, AT, rbase);
 339                 asm_li(AT, dr);
 340             }
 341             else if (cpu_has_lsdc1) {
 342                 // lui    $at,%hi(dr)
 343                 // addu   $at,$rbase
 344                 // lsdc1  $r,%lo(dr)($at)
 345                 LDST(store ? OP_SDC1 : OP_LDC1, r, lo(dr), AT);
 346                 ADDU(AT, AT, rbase);
 347                 LUI(AT, hi(dr));
 348             }
 349             else {
 350                 // lui   $at,%hi(d)
 351                 // addu  $at,$rbase
 352                 // lswc1 $r,  %lo(d+LSWOFF)($at)
 353                 // lswc1 $r+1,%lo(d+MSWOFF)($at)
 354                 LDST(store ? OP_SWC1 : OP_LWC1, r+1, lo(dr+mswoff()), AT);
 355                 LDST(store ? OP_SWC1 : OP_LWC1, r,   lo(dr+lswoff()), AT);
 356                 ADDU(AT, AT, rbase);
 357                 LUI(AT, hi(dr));
 358             }
 359         }
 360     }
 361
 362     void Assembler::asm_store_imm64(LIns *value, int dr, Register rbase)
 363     {
 364         NanoAssert(value->isImmD());
 365         int32_t msw = value->immDhi();
 366         int32_t lsw = value->immDlo();
 367
 368         // li $at,lsw                   # iff lsw != 0
 369         // sw $at,off+LSWOFF($rbase)    # may use $0 instead of $at
 370         // li $at,msw                   # iff (msw != 0) && (msw != lsw)
 371         // sw $at,off+MSWOFF($rbase)    # may use $0 instead of $at
 372
 373         NanoAssert(isS16(dr) && isS16(dr+4));
 374
 375         if (lsw == 0)
 376             SW(ZERO, dr+lswoff(), rbase);
 377         else {
 378             SW(AT, dr+lswoff(), rbase);
 379             if (msw != lsw)
 380                 asm_li(AT, lsw);
 381         }
 382         if (msw == 0)
 383             SW(ZERO, dr+mswoff(), rbase);
 384         else {
 385             SW(AT, dr+mswoff(), rbase);
 386             // If the MSW & LSW values are different, reload AT
 387             if (msw != lsw)
 388                 asm_li(AT, msw);
 389         }
 390     }
 391
 392     void Assembler::asm_regarg(ArgType ty, LIns* p, Register r)
 393     {
 394         NanoAssert(deprecated_isKnownReg(r));
 395         if (ty == ARGTYPE_I || ty == ARGTYPE_UI) {
 396             // arg goes in specific register
 397             if (p->isImmI())
 398                 asm_li(r, p->immI());
 399             else {
 400                 if (p->isExtant()) {
 401                     if (!p->deprecated_hasKnownReg()) {
 402                         // load it into the arg reg
 403                         int d = findMemFor(p);
 404                         if (p->isop(LIR_allocp))
 405                             ADDIU(r, FP, d);
 406                         else
 407                             asm_ldst(OP_LW, r, d, FP);
 408                     }
 409                     else
 410                         // it must be in a saved reg
 411                         MOVE(r, p->deprecated_getReg());
 412                 }
 413                 else {
 414                     // this is the last use, so fine to assign it
 415                     // to the scratch reg, it's dead after this point.
 416                     findSpecificRegFor(p, r);
 417                 }
 418             }
 419         }
 420         else {
 421             // Other argument types unsupported
 422             NanoAssert(false);
 423         }
 424     }
 425
 426     void Assembler::asm_stkarg(LIns* arg, int stkd)
 427     {
 428         bool isF64 = arg->isD();
 429         Register rr;
 430         if (arg->isExtant() && (rr = arg->deprecated_getReg(), deprecated_isKnownReg(rr))) {
 431             // The argument resides somewhere in registers, so we simply need to
 432             // push it onto the stack.
 433             if (!cpu_has_fpu || !isF64) {
 434                 NanoAssert(IsGpReg(rr));
 435                 SW(rr, stkd, SP);
 436             }
 437             else {
 438                 NanoAssert(cpu_has_fpu);
 439                 NanoAssert(IsFpReg(rr));
 440                 NanoAssert((stkd & 7) == 0);
 441                 asm_ldst64(true, rr, stkd, SP);
 442             }
 443         }
 444         else {
 445             // The argument does not reside in registers, so we need to get some
 446             // memory for it and then copy it onto the stack.
 447             int d = findMemFor(arg);
 448             if (!isF64) {
 449                 SW(AT, stkd, SP);
 450                 if (arg->isop(LIR_allocp))
 451                     ADDIU(AT, FP, d);
 452                 else
 453                     LW(AT, d, FP);
 454             }
 455             else {
 456                 NanoAssert((stkd & 7) == 0);
 457                 SW(AT, stkd+4, SP);
 458                 LW(AT, d+4,    FP);
 459                 SW(AT, stkd,   SP);
 460                 LW(AT, d,      FP);
 461             }
 462         }
 463     }
 464
 465     // Encode a 64-bit floating-point argument using the appropriate ABI.
 466     // This function operates in the same way as asm_arg, except that it will only
 467     // handle arguments where (ArgType)ty == ARGTYPE_D.
 468     void
 469     Assembler::asm_arg_64(LIns* arg, Register& r, Register& fr, int& stkd)
 470     {
 471         // The stack offset always be at least aligned to 4 bytes.
 472         NanoAssert((stkd & 3) == 0);
 473 #if NJ_SOFTFLOAT_SUPPORTED
 474         NanoAssert(arg->isop(LIR_ii2d));
 475 #else
 476         NanoAssert(cpu_has_fpu);
 477 #endif
 478
 479         // O32 ABI requires that 64-bit arguments are aligned on even-numbered
 480         // registers, as A0:A1/FA0 or A2:A3/FA1. Use the stack offset to keep track
 481         // where we are
 482         if (stkd & 4) {
 483             if (stkd < 16) {
 484                 r = Register(r + 1);
 485                 fr = Register(fr + 1);
 486             }
 487             stkd += 4;
 488         }
 489
 490         if (stkd < 16) {
 491             NanoAssert(fr == FA0 || fr == FA1 || fr == A2);
 492             if (fr == FA0 || fr == FA1)
 493                 findSpecificRegFor(arg, fr);
 494             else {
 495                 findSpecificRegFor(arg, FA1);
 496                 // Move it to the integer pair
 497                 Register fpupair = arg->getReg();
 498                 Register intpair = fr;
 499                 MFC1(mswregpair(intpair), Register(fpupair + 1));  // Odd fpu register contains sign,expt,manthi
 500                 MFC1(lswregpair(intpair), fpupair);                // Even fpu register contains mantlo
 501             }
 502             r = Register(r + 2);
 503             fr = Register(fr + 2);
 504         }
 505         else
 506             asm_stkarg(arg, stkd);
 507
 508         stkd += 8;
 509     }
 510
 511     /* Required functions */
 512
 513 #define FRAMESIZE        8
 514 #define RA_OFFSET        4
 515 #define FP_OFFSET        0
 516
 517     void Assembler::asm_store32(LOpcode op, LIns *value, int dr, LIns *base)
 518     {
 519         Register rt, rbase;
 520         getBaseReg2(GpRegs, value, rt, GpRegs, base, rbase, dr);
 521
 522         switch (op) {
 523         case LIR_sti:
 524             asm_ldst(OP_SW, rt, dr, rbase);
 525             break;
 526         case LIR_sti2s:
 527             asm_ldst(OP_SH, rt, dr, rbase);
 528             break;
 529         case LIR_sti2c:
 530             asm_ldst(OP_SB, rt, dr, rbase);
 531             break;
 532         default:
 533             BADOPCODE(op);
 534         }
 535
 536         TAG("asm_store32(value=%p{%s}, dr=%d, base=%p{%s})",
 537             value, lirNames[value->opcode()], dr, base, lirNames[base->opcode()]);
 538     }
 539
 540     void Assembler::asm_ui2d(LIns *ins)
 541     {
 542         Register fr = deprecated_prepResultReg(ins, FpRegs);
 543         Register v = findRegFor(ins->oprnd1(), GpRegs);
 544         Register ft = registerAllocTmp(FpRegs & ~(rmask(fr)));    // allocate temporary register for constant
 545
 546         // todo: support int value in memory, as per x86
 547         NanoAssert(deprecated_isKnownReg(v));
 548
 549         // mtc1       $v,$ft
 550         // bgez       $v,1f
 551         //  cvt.d.w $fr,$ft
 552         // lui       $at,0x41f0    # (double)0x10000000LL = 0x41f0000000000000
 553         // mtc1    $0,$ft
 554         // mtc1    $at,$ft+1
 555         // add.d   $fr,$fr,$ft
 556         // 1:
 557
 558         underrunProtect(6*4);   // keep branch and destination together
 559         NIns *here = _nIns;
 560         ADD_D(fr,fr,ft);
 561         MTC1(AT,ft+1);
 562         MTC1(ZERO,ft);
 563         LUI(AT,0x41f0);
 564         CVT_D_W(fr,ft);            // branch delay slot
 565         BGEZ(v,here);
 566         MTC1(v,ft);
 567
 568         TAG("asm_ui2d(ins=%p{%s})", ins, lirNames[ins->opcode()]);
 569     }
 570
 571     void Assembler::asm_d2i(LIns* ins)
 572     {
 573         NanoAssert(cpu_has_fpu);
 574
 575         Register rr = deprecated_prepResultReg(ins, GpRegs);
 576         Register sr = findRegFor(ins->oprnd1(), FpRegs);
 577         // trunc.w.d $sr,$sr
 578         // mfc1 $rr,$sr
 579         MFC1(rr,sr);
 580         TRUNC_W_D(sr,sr);
 581         TAG("asm_d2i(ins=%p{%s})", ins, lirNames[ins->opcode()]);
 582     }
 583
 584     void Assembler::asm_fop(LIns *ins)
 585     {
 586         NanoAssert(cpu_has_fpu);
 587         if (cpu_has_fpu) {
 588             LIns* lhs = ins->oprnd1();
 589             LIns* rhs = ins->oprnd2();
 590             LOpcode op = ins->opcode();
 591
 592             // rr = ra OP rb
 593
 594             Register rr = deprecated_prepResultReg(ins, FpRegs);
 595             Register ra = findRegFor(lhs, FpRegs);
 596             Register rb = (rhs == lhs) ? ra : findRegFor(rhs, FpRegs & ~rmask(ra));
 597
 598             switch (op) {
 599             case LIR_addd: ADD_D(rr, ra, rb); break;
 600             case LIR_subd: SUB_D(rr, ra, rb); break;
 601             case LIR_muld: MUL_D(rr, ra, rb); break;
 602             case LIR_divd: DIV_D(rr, ra, rb); break;
 603             default:
 604                 BADOPCODE(op);
 605             }
 606         }
 607         TAG("asm_fop(ins=%p{%s})", ins, lirNames[ins->opcode()]);
 608     }
 609
 610     void Assembler::asm_fneg(LIns *ins)
 611     {
 612         NanoAssert(cpu_has_fpu);
 613         if (cpu_has_fpu) {
 614             LIns* lhs = ins->oprnd1();
 615             Register rr = deprecated_prepResultReg(ins, FpRegs);
 616             Register sr = ( !lhs->isInReg()
 617                             ? findRegFor(lhs, FpRegs)
 618                             : lhs->deprecated_getReg() );
 619             NEG_D(rr, sr);
 620         }
 621         TAG("asm_fneg(ins=%p{%s})", ins, lirNames[ins->opcode()]);
 622     }
 623
 624     void Assembler::asm_immd(LIns *ins)
 625     {
 626         int d = deprecated_disp(ins);
 627         Register rr = ins->deprecated_getReg();
 628
 629         deprecated_freeRsrcOf(ins);
 630
 631         if (cpu_has_fpu && deprecated_isKnownReg(rr)) {
 632             if (d)
 633                 asm_spill(rr, d, true);
 634             asm_li_d(rr, ins->immDhi(), ins->immDlo());
 635         }
 636         else {
 637             NanoAssert(d);
 638             asm_store_imm64(ins, d, FP);
 639         }
 640         TAG("asm_immd(ins=%p{%s})", ins, lirNames[ins->opcode()]);
 641     }
 642
 643 #ifdef NANOJIT_64BIT
 644     void
 645     Assembler::asm_q2i(LIns *)
 646     {
 647         NanoAssert(0);  // q2i shouldn't occur on 32-bit platforms
 648     }
 649
 650     void Assembler::asm_ui2uq(LIns *ins)
 651     {
 652         USE(ins);
 653         TODO(asm_ui2uq);
 654         TAG("asm_ui2uq(ins=%p{%s})", ins, lirNames[ins->opcode()]);
 655     }
 656 #endif
 657
 658     void Assembler::asm_load64(LIns *ins)
 659     {
 660         NanoAssert(ins->isD());
 661
 662         LIns* base = ins->oprnd1();
 663         int dr = ins->disp();
 664
 665         Register rd = ins->deprecated_getReg();
 666         int ds = deprecated_disp(ins);
 667
 668         Register rbase = findRegFor(base, GpRegs);
 669         NanoAssert(IsGpReg(rbase));
 670         deprecated_freeRsrcOf(ins);
 671
 672         if (cpu_has_fpu && deprecated_isKnownReg(rd)) {
 673             NanoAssert(IsFpReg(rd));
 674             asm_ldst64 (false, rd, dr, rbase);
 675         }
 676         else {
 677             // Either FPU is not available or the result needs to go into memory;
 678             // in either case, FPU instructions are not required. Note that the
 679             // result will never be loaded into registers if FPU is not available.
 680             NanoAssert(!deprecated_isKnownReg(rd));
 681             NanoAssert(ds != 0);
 682
 683             NanoAssert(isS16(dr) && isS16(dr+4));
 684             NanoAssert(isS16(ds) && isS16(ds+4));
 685
 686             // Check that the offset is 8-byte (64-bit) aligned.
 687             NanoAssert((ds & 0x7) == 0);
 688
 689             // FIXME: allocate a temporary to use for the copy
 690             // to avoid load to use delay
 691             // lw $at,dr($rbase)
 692             // sw $at,ds($fp)
 693             // lw $at,dr+4($rbase)
 694             // sw $at,ds+4($fp)
 695
 696             SW(AT, ds+4, FP);
 697             LW(AT, dr+4, rbase);
 698             SW(AT, ds,   FP);
 699             LW(AT, dr,   rbase);
 700         }
 701
 702         TAG("asm_load64(ins=%p{%s})", ins, lirNames[ins->opcode()]);
 703     }
 704
 705     void Assembler::asm_cond(LIns *ins)
 706     {
 707         Register r = deprecated_prepResultReg(ins, GpRegs);
 708         LOpcode op = ins->opcode();
 709         LIns *a = ins->oprnd1();
 710         LIns *b = ins->oprnd2();
 711
 712         asm_cmp(op, a, b, r);
 713
 714         TAG("asm_cond(ins=%p{%s})", ins, lirNames[ins->opcode()]);
 715     }
 716
 717 #if NJ_SOFTFLOAT_SUPPORTED
 718     void Assembler::asm_qhi(LIns *ins)
 719     {
 720         Register rr = deprecated_prepResultReg(ins, GpRegs);
 721         LIns *q = ins->oprnd1();
 722         int d = findMemFor(q);
 723         LW(rr, d+mswoff(), FP);
 724         TAG("asm_qhi(ins=%p{%s})", ins, lirNames[ins->opcode()]);
 725     }
 726
 727     void Assembler::asm_qlo(LIns *ins)
 728     {
 729         Register rr = deprecated_prepResultReg(ins, GpRegs);
 730         LIns *q = ins->oprnd1();
 731         int d = findMemFor(q);
 732         LW(rr, d+lswoff(), FP);
 733         TAG("asm_qlo(ins=%p{%s})", ins, lirNames[ins->opcode()]);
 734     }
 735
 736     void Assembler::asm_qjoin(LIns *ins)
 737     {
 738         int d = findMemFor(ins);
 739         NanoAssert(d && isS16(d));
 740         LIns* lo = ins->oprnd1();
 741         LIns* hi = ins->oprnd2();
 742
 743         Register r = findRegFor(hi, GpRegs);
 744         SW(r, d+mswoff(), FP);
 745         r = findRegFor(lo, GpRegs);             // okay if r gets recycled.
 746         SW(r, d+lswoff(), FP);
 747         deprecated_freeRsrcOf(ins);             // if we had a reg in use, flush it to mem
 748
 749         TAG("asm_qjoin(ins=%p{%s})", ins, lirNames[ins->opcode()]);
 750     }
 751
 752 #endif
 753
 754     void Assembler::asm_neg_not(LIns *ins)
 755     {
 756         LOpcode op = ins->opcode();
 757         Register rr = deprecated_prepResultReg(ins, GpRegs);
 758
 759         LIns* lhs = ins->oprnd1();
 760         // If this is the last use of lhs in reg, we can re-use result reg.
 761         // Else, lhs already has a register assigned.
 762         Register ra = !lhs->isInReg() ? findSpecificRegFor(lhs, rr) : lhs->deprecated_getReg();
 763         if (op == LIR_noti)
 764             NOT(rr, ra);
 765         else
 766             NEGU(rr, ra);
 767         TAG("asm_neg_not(ins=%p{%s})", ins, lirNames[ins->opcode()]);
 768     }
 769
 770     void Assembler::asm_immi(LIns *ins)
 771     {
 772         Register rr = deprecated_prepResultReg(ins, GpRegs);
 773         asm_li(rr, ins->immI());
 774         TAG("asm_immi(ins=%p{%s})", ins, lirNames[ins->opcode()]);
 775     }
 776
 777     void Assembler::asm_cmov(LIns *ins)
 778     {
 779         LIns* condval = ins->oprnd1();
 780         LIns* iftrue  = ins->oprnd2();
 781         LIns* iffalse = ins->oprnd3();
 782
 783         NanoAssert(condval->isCmp());
 784         NanoAssert(ins->opcode() == LIR_cmovi && iftrue->isI() && iffalse->isI());
 785
 786         const Register rr = deprecated_prepResultReg(ins, GpRegs);
 787
 788         const Register iftruereg = findRegFor(iftrue, GpRegs & ~rmask(rr));
 789         MOVN(rr, iftruereg, AT);
 790         /*const Register iffalsereg =*/ findSpecificRegFor(iffalse, rr);
 791         asm_cmp(condval->opcode(), condval->oprnd1(), condval->oprnd2(), AT);
 792         TAG("asm_cmov(ins=%p{%s})", ins, lirNames[ins->opcode()]);
 793     }
 794
 795     void Assembler::asm_condd(LIns *ins)
 796     {
 797         NanoAssert(cpu_has_fpu);
 798         if (cpu_has_fpu) {
 799             Register r = deprecated_prepResultReg(ins, GpRegs);
 800             LOpcode op = ins->opcode();
 801             LIns *a = ins->oprnd1();
 802             LIns *b = ins->oprnd2();
 803
 804             if (cpu_has_cmov) {
 805                 // c.xx.d  $a,$b
 806                 // li      $r,1
 807                 // movf    $r,$0,$fcc0
 808                 MOVF(r, ZERO, 0);
 809                 ORI(r, ZERO, 1);
 810             }
 811             else {
 812                 // c.xx.d  $a,$b
 813                 // [nop]
 814                 // bc1t    1f
 815                 //  li      $r,1
 816                 // move    $r,$0
 817                 // 1:
 818                 NIns *here = _nIns;
 819                 verbose_only(verbose_outputf("%p:", here);)
 820                 underrunProtect(3*4);
 821                 MOVE(r, ZERO);
 822                 ORI(r, ZERO, 1);        // branch delay slot
 823                 BC1T(here);
 824                 if (cpu_has_fpuhazard)
 825                     NOP();
 826             }
 827             asm_cmp(op, a, b, r);
 828         }
 829         TAG("asm_condd(ins=%p{%s})", ins, lirNames[ins->opcode()]);
 830     }
 831
 832     void Assembler::asm_i2d(LIns *ins)
 833     {
 834         NanoAssert(cpu_has_fpu);
 835         if (cpu_has_fpu) {
 836             Register fr = deprecated_prepResultReg(ins, FpRegs);
 837             Register v = findRegFor(ins->oprnd1(), GpRegs);
 838
 839             // mtc1    $v,$fr
 840             // cvt.d.w $fr,$fr
 841             CVT_D_W(fr,fr);
 842             MTC1(v,fr);
 843         }
 844         TAG("asm_i2d(ins=%p{%s})", ins, lirNames[ins->opcode()]);
 845     }
 846
 847     void Assembler::asm_ret(LIns *ins)
 848     {
 849         genEpilogue();
 850
 851         releaseRegisters();
 852         assignSavedRegs();
 853
 854         LIns *value = ins->oprnd1();
 855         if (ins->isop(LIR_reti)) {
 856             findSpecificRegFor(value, V0);
 857         }
 858         else {
 859             NanoAssert(ins->isop(LIR_retd));
 860 #if NJ_SOFTFLOAT_SUPPORTED
 861             NanoAssert(value->isop(LIR_ii2d));
 862             findSpecificRegFor(value->oprnd1(), V0); // lo
 863             findSpecificRegFor(value->oprnd2(), V1); // hi
 864 #else
 865             findSpecificRegFor(value, FV0);
 866 #endif
 867         }
 868         TAG("asm_ret(ins=%p{%s})", ins, lirNames[ins->opcode()]);
 869     }
 870
 871     void Assembler::asm_load32(LIns *ins)
 872     {
 873         LOpcode op = ins->opcode();
 874         LIns* base = ins->oprnd1();
 875         int d = ins->disp();
 876
 877         Register rres = deprecated_prepResultReg(ins, GpRegs);
 878         Register rbase = getBaseReg(base, d, GpRegs);
 879
 880         switch (op) {
 881         case LIR_lduc2ui:          // 8-bit integer load, zero-extend to 32-bit
 882             asm_ldst(OP_LBU, rres, d, rbase);
 883             break;
 884         case LIR_ldus2ui:          // 16-bit integer load, zero-extend to 32-bit
 885             asm_ldst(OP_LHU, rres, d, rbase);
 886             break;
 887         case LIR_ldc2i:          // 8-bit integer load, sign-extend to 32-bit
 888             asm_ldst(OP_LB, rres, d, rbase);
 889             break;
 890         case LIR_lds2i:          // 16-bit integer load, sign-extend to 32-bit
 891             asm_ldst(OP_LH, rres, d, rbase);
 892             break;
 893         case LIR_ldi:            // 32-bit integer load
 894             asm_ldst(OP_LW, rres, d, rbase);
 895             break;
 896         default:
 897             BADOPCODE(op);
 898         }
 899
 900         TAG("asm_load32(ins=%p{%s})", ins, lirNames[ins->opcode()]);
 901     }
 902
 903     void Assembler::asm_param(LIns *ins)
 904     {
 905         uint32_t a = ins->paramArg();
 906         uint32_t kind = ins->paramKind();
 907
 908         if (kind == 0) {
 909             // ordinary param
 910             // first 4 args A0..A3
 911             if (a < 4) {
 912                 // incoming arg in register
 913                 deprecated_prepResultReg(ins, rmask(argRegs[a]));
 914             } else {
 915                 // incoming arg is on stack
 916                 Register r = deprecated_prepResultReg(ins, GpRegs);
 917                 TODO(Check stack offset);
 918                 int d = FRAMESIZE + a * sizeof(intptr_t);
 919                 LW(r, d, FP);
 920             }
 921         }
 922         else {
 923             // saved param
 924             deprecated_prepResultReg(ins, rmask(savedRegs[a]));
 925         }
 926         TAG("asm_param(ins=%p{%s})", ins, lirNames[ins->opcode()]);
 927     }
 928
 929     void Assembler::asm_arith(LIns *ins)
 930     {
 931         LOpcode op = ins->opcode();
 932         LIns* lhs = ins->oprnd1();
 933         LIns* rhs = ins->oprnd2();
 934
 935         RegisterMask allow = GpRegs;
 936
 937         // We always need the result register and the first operand register.
 938         Register rr = deprecated_prepResultReg(ins, allow);
 939
 940         // If this is the last use of lhs in reg, we can re-use the result reg.
 941         // Else, lhs already has a register assigned.
 942         Register ra = !lhs->isInReg() ? findSpecificRegFor(lhs, rr) : lhs->deprecated_getReg();
 943         Register rb, t;
 944
 945         // Don't re-use the registers we've already allocated.
 946         NanoAssert(deprecated_isKnownReg(rr));
 947         NanoAssert(deprecated_isKnownReg(ra));
 948         allow &= ~rmask(rr);
 949         allow &= ~rmask(ra);
 950
 951         if (rhs->isImmI()) {
 952             int32_t rhsc = rhs->immI();
 953             if (isS16(rhsc)) {
 954                 // MIPS arith immediate ops sign-extend the imm16 value
 955                 switch (op) {
 956                 case LIR_addxovi:
 957                 case LIR_addjovi:
 958                     // add with overflow result into $at
 959                     // overflow is indicated by ((sign(rr)^sign(ra)) & (sign(rr)^sign(rhsc))
 960
 961                     // [move $t,$ra]            if (rr==ra)
 962                     // addiu $rr,$ra,rhsc
 963                     // [xor  $at,$rr,$ra]       if (rr!=ra)
 964                     // [xor  $at,$rr,$t]        if (rr==ra)
 965                     // [not  $t,$rr]            if (rhsc < 0)
 966                     // [and  $at,$at,$t]        if (rhsc < 0)
 967                     // [and  $at,$at,$rr]       if (rhsc >= 0)
 968                     // srl   $at,$at,31
 969
 970                     t = registerAllocTmp(allow);
 971                     SRL(AT, AT, 31);
 972                     if (rhsc < 0) {
 973                         AND(AT, AT, t);
 974                         NOT(t, rr);
 975                     }
 976                     else
 977                         AND(AT, AT, rr);
 978                     if (rr == ra)
 979                         XOR(AT, rr, t);
 980                     else
 981                         XOR(AT, rr, ra);
 982                     ADDIU(rr, ra, rhsc);
 983                     if (rr == ra)
 984                         MOVE(t, ra);
 985                     goto done;
 986                 case LIR_addi:
 987                     ADDIU(rr, ra, rhsc);
 988                     goto done;
 989                 case LIR_subxovi:
 990                 case LIR_subjovi:
 991                     // subtract with overflow result into $at
 992                     // overflow is indicated by (sign(ra)^sign(rhsc)) & (sign(rr)^sign(ra))
 993
 994                     // [move $t,$ra]            if (rr==ra)
 995                     // addiu $rr,$ra,-rhsc
 996                     // [xor  $at,$rr,$ra]       if (rr!=ra)
 997                     // [xor  $at,$rr,$t]        if (rr==ra)
 998                     // [and  $at,$at,$ra]       if (rhsc >= 0 && rr!=ra)
 999                     // [and  $at,$at,$t]        if (rhsc >= 0 && rr==ra)
1000                     // [not  $t,$ra]            if (rhsc < 0 && rr!=ra)
1001                     // [not  $t,$t]             if (rhsc < 0 && rr==ra)
1002                     // [and  $at,$at,$t]        if (rhsc < 0)
1003                     // srl   $at,$at,31
1004                     if (isS16(-rhsc)) {
1005                         t = registerAllocTmp(allow);
1006                         SRL(AT,AT,31);
1007                         if (rhsc < 0) {
1008                             AND(AT, AT, t);
1009                             if (rr == ra)
1010                                 NOT(t, t);
1011                             else
1012                                 NOT(t, ra);
1013                         }
1014                         else {
1015                             if (rr == ra)
1016                                 AND(AT, AT, t);
1017                             else
1018                                 AND(AT, AT, ra);
1019                         }
1020                         if (rr == ra)
1021                             XOR(AT, rr, t);
1022                         else
1023                             XOR(AT, rr, ra);
1024                         ADDIU(rr, ra, -rhsc);
1025                         if (rr == ra)
1026                             MOVE(t, ra);
1027                         goto done;
1028                     }
1029                     break;
1030                 case LIR_subi:
1031                     if (isS16(-rhsc)) {
1032                         ADDIU(rr, ra, -rhsc);
1033                         goto done;
1034                     }
1035                     break;
1036                 case LIR_mulxovi:
1037                 case LIR_muljovi:
1038                 case LIR_muli:
1039                     // FIXME: optimise constant multiply by 2^n
1040                     // if ((rhsc & (rhsc-1)) == 0)
1041                     //    SLL(rr, ra, ffs(rhsc)-1);
1042                     //goto done;
1043                     break;
1044                 default:
1045                     break;
1046                 }
1047             }
1048             if (isU16(rhsc)) {
1049                 // MIPS logical immediate zero-extend the imm16 value
1050                 switch (op) {
1051                 case LIR_ori:
1052                     ORI(rr, ra, rhsc);
1053                     goto done;
1054                 case LIR_andi:
1055                     ANDI(rr, ra, rhsc);
1056                     goto done;
1057                 case LIR_xori:
1058                     XORI(rr, ra, rhsc);
1059                     goto done;
1060                 default:
1061                     break;
1062                 }
1063             }
1064
1065             // LIR shift ops only use last 5bits of shift const
1066             switch (op) {
1067             case LIR_lshi:
1068                 SLL(rr, ra, rhsc&31);
1069                 goto done;
1070             case LIR_rshui:
1071                 SRL(rr, ra, rhsc&31);
1072                 goto done;
1073             case LIR_rshi:
1074                 SRA(rr, ra, rhsc&31);
1075                 goto done;
1076             default:
1077                 break;
1078             }
1079         }
1080
1081         // general case, put rhs in register
1082         rb = (rhs == lhs) ? ra : findRegFor(rhs, allow);
1083         NanoAssert(deprecated_isKnownReg(rb));
1084         allow &= ~rmask(rb);
1085
1086         // The register allocator will have set up one of these 4 cases
1087         // rr==ra && ra==rb              r0 = r0 op r0
1088         // rr==ra && ra!=rb              r0 = r0 op r1
1089         // rr!=ra && ra==rb              r0 = r1 op r1
1090         // rr!=ra && ra!=rb && rr!=rb    r0 = r1 op r2
1091         NanoAssert(ra == rb || rr != rb);
1092
1093         switch (op) {
1094             case LIR_addxovi:
1095             case LIR_addjovi:
1096                 // add with overflow result into $at
1097                 // overflow is indicated by (sign(rr)^sign(ra)) & (sign(rr)^sign(rb))
1098
1099                 // [move $t,$ra]        if (rr==ra)
1100                 // addu  $rr,$ra,$rb
1101                 // ; Generate sign($rr)^sign($ra)
1102                 // [xor  $at,$rr,$t]    sign($at)=sign($rr)^sign($t) if (rr==ra)
1103                 // [xor  $at,$rr,$ra]   sign($at)=sign($rr)^sign($ra) if (rr!=ra)
1104                 // ; Generate sign($rr)^sign($rb) if $ra!=$rb
1105                 // [xor  $t,$rr,$rb]    if (ra!=rb)
1106                 // [and  $at,$t]        if (ra!=rb)
1107                 // srl   $at,31
1108
1109                 t = ZERO;
1110                 if (rr == ra || ra != rb)
1111                     t = registerAllocTmp(allow);
1112                 SRL(AT, AT, 31);
1113                 if (ra != rb) {
1114                     AND(AT, AT, t);
1115                     XOR(t, rr, rb);
1116                 }
1117                 if (rr == ra)
1118                     XOR(AT, rr, t);
1119                 else
1120                     XOR(AT, rr, ra);
1121                 ADDU(rr, ra, rb);
1122                 if (rr == ra)
1123                     MOVE(t, ra);
1124                 break;
1125             case LIR_addi:
1126                 ADDU(rr, ra, rb);
1127                 break;
1128             case LIR_andi:
1129                 AND(rr, ra, rb);
1130                 break;
1131             case LIR_ori:
1132                 OR(rr, ra, rb);
1133                 break;
1134             case LIR_xori:
1135                 XOR(rr, ra, rb);
1136                 break;
1137             case LIR_subxovi:
1138             case LIR_subjovi:
1139                 // subtract with overflow result into $at
1140                 // overflow is indicated by (sign(ra)^sign(rb)) & (sign(rr)^sign(ra))
1141
1142                 // [move $t,$ra]        if (rr==ra)
1143                 // ; Generate sign($at)=sign($ra)^sign($rb)
1144                 // xor   $at,$ra,$rb
1145                 // subu  $rr,$ra,$rb
1146                 // ; Generate sign($t)=sign($rr)^sign($ra)
1147                 // [xor  $t,$rr,$ra]    if (rr!=ra)
1148                 // [xor  $t,$rr,$t]     if (rr==ra)
1149                 // and   $at,$at,$t
1150                 // srl   $at,$at,31
1151
1152                 if (ra == rb) {
1153                     // special case for (ra == rb) which can't overflow
1154                     MOVE(AT, ZERO);
1155                     SUBU(rr, ra, rb);
1156                 }
1157                 else {
1158                     t = registerAllocTmp(allow);
1159                     SRL(AT, AT, 31);
1160                     AND(AT, AT, t);
1161                     if (rr == ra)
1162                         XOR(t, rr, t);
1163                     else
1164                         XOR(t, rr, ra);
1165                     SUBU(rr, ra, rb);
1166                     XOR(AT, ra, rb);
1167                     if (rr == ra)
1168                         MOVE(t, ra);
1169                 }
1170                 break;
1171             case LIR_subi:
1172                 SUBU(rr, ra, rb);
1173                 break;
1174             case LIR_lshi:
1175                 // SLLV uses the low-order 5 bits of rb for the shift amount so no masking required
1176                 SLLV(rr, ra, rb);
1177                 break;
1178             case LIR_rshi:
1179                 // SRAV uses the low-order 5 bits of rb for the shift amount so no masking required
1180                 SRAV(rr, ra, rb);
1181                 break;
1182             case LIR_rshui:
1183                 // SRLV uses the low-order 5 bits of rb for the shift amount so no masking required
1184                 SRLV(rr, ra, rb);
1185                 break;
1186             case LIR_mulxovi:
1187             case LIR_muljovi:
1188                 t = registerAllocTmp(allow);
1189                 // Overflow indication required
1190                 // Do a 32x32 signed multiply generating a 64 bit result
1191                 // Compare bit31 of the result with the high order bits
1192                 // mult $ra,$rb
1193                 // mflo $rr             # result to $rr
1194                 // sra  $t,$rr,31       # $t = 0x00000000 or 0xffffffff
1195                 // mfhi $at
1196                 // xor  $at,$at,$t      # sets $at to nonzero if overflow
1197                 XOR(AT, AT, t);
1198                 MFHI(AT);
1199                 SRA(t, rr, 31);
1200                 MFLO(rr);
1201                 MULT(ra, rb);
1202                 break;
1203             case LIR_muli:
1204                 MUL(rr, ra, rb);
1205                 break;
1206             default:
1207                 BADOPCODE(op);
1208         }
1209     done:
1210         TAG("asm_arith(ins=%p{%s})", ins, lirNames[ins->opcode()]);
1211     }
1212
1213     void Assembler::asm_store64(LOpcode op, LIns *value, int dr, LIns *base)
1214     {
1215         // NanoAssert((dr & 7) == 0);
1216 #if NANOJIT_64BIT
1217         NanoAssert (op == LIR_stq || op == LIR_std2f || op == LIR_std);
1218 #else
1219         NanoAssert (op == LIR_std2f || op == LIR_std);
1220 #endif
1221
1222         switch (op) {
1223             case LIR_std:
1224                 if (cpu_has_fpu) {
1225                     Register rbase = findRegFor(base, GpRegs);
1226
1227                     if (value->isImmD())
1228                         asm_store_imm64(value, dr, rbase);
1229                     else {
1230                         Register fr = findRegFor(value, FpRegs);
1231                         asm_ldst64(true, fr, dr, rbase);
1232                     }
1233                 }
1234                 else {
1235                     Register rbase = findRegFor(base, GpRegs);
1236                     // *(uint64_t*)(rb+dr) = *(uint64_t*)(FP+da)
1237
1238                     int ds = findMemFor(value);
1239
1240                     // lw $at,ds(FP)
1241                     // sw $at,dr($rbase)
1242                     // lw $at,ds+4(FP)
1243                     // sw $at,dr+4($rbase)
1244                     SW(AT, dr+4, rbase);
1245                     LW(AT, ds+4, FP);
1246                     SW(AT, dr,   rbase);
1247                     LW(AT, ds,   FP);
1248                 }
1249
1250                 break;
1251             case LIR_std2f:
1252                 NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
1253                 return;
1254             default:
1255                 BADOPCODE(op);
1256                 return;
1257         }
1258
1259         TAG("asm_store64(value=%p{%s}, dr=%d, base=%p{%s})",
1260             value, lirNames[value->opcode()], dr, base, lirNames[base->opcode()]);
1261     }
1262
1263     bool Assembler::canRemat(LIns* ins)
1264     {
1265         return ins->isImmI() || ins->isop(LIR_allocp);
1266     }
1267
1268     void Assembler::asm_restore(LIns *i, Register r)
1269     {
1270         int d;
1271         if (i->isop(LIR_allocp)) {
1272             d = deprecated_disp(i);
1273             if (isS16(d))
1274                 ADDIU(r, FP, d);
1275             else {
1276                 ADDU(r, FP, AT);
1277                 asm_li(AT, d);
1278             }
1279         }
1280         else if (i->isImmI()) {
1281             asm_li(r, i->immI());
1282         }
1283         else {
1284             d = findMemFor(i);
1285             if (IsFpReg(r)) {
1286                 asm_ldst64(false, r, d, FP);
1287             }
1288             else {
1289                 asm_ldst(OP_LW, r, d, FP);
1290             }
1291         }
1292         TAG("asm_restore(i=%p{%s}, r=%d)", i, lirNames[i->opcode()], r);
1293     }
1294
1295     void Assembler::asm_cmp(LOpcode condop, LIns *a, LIns *b, Register cr)
1296     {
1297         RegisterMask allow = isCmpDOpcode(condop) ? FpRegs : GpRegs;
1298         Register ra = findRegFor(a, allow);
1299         Register rb = (b==a) ? ra : findRegFor(b, allow & ~rmask(ra));
1300
1301         // FIXME: Use slti if b is small constant
1302
1303         /* Generate the condition code */
1304         switch (condop) {
1305         case LIR_eqi:
1306             SLTIU(cr,cr,1);
1307             XOR(cr,ra,rb);
1308             break;
1309         case LIR_lti:
1310             SLT(cr,ra,rb);
1311             break;
1312         case LIR_gti:
1313             SLT(cr,rb,ra);
1314             break;
1315         case LIR_lei:
1316             XORI(cr,cr,1);
1317             SLT(cr,rb,ra);
1318             break;
1319         case LIR_gei:
1320             XORI(cr,cr,1);
1321             SLT(cr,ra,rb);
1322             break;
1323         case LIR_ltui:
1324             SLTU(cr,ra,rb);
1325             break;
1326         case LIR_gtui:
1327             SLTU(cr,rb,ra);
1328             break;
1329         case LIR_leui:
1330             XORI(cr,cr,1);
1331             SLTU(cr,rb,ra);
1332             break;
1333         case LIR_geui:
1334             XORI(cr,cr,1);
1335             SLTU(cr,ra,rb);
1336             break;
1337         case LIR_eqd:
1338             C_EQ_D(ra,rb);
1339             break;
1340         case LIR_ltd:
1341             C_LT_D(ra,rb);
1342             break;
1343         case LIR_gtd:
1344             C_LT_D(rb,ra);
1345             break;
1346         case LIR_led:
1347             C_LE_D(ra,rb);
1348             break;
1349         case LIR_ged:
1350             C_LE_D(rb,ra);
1351             break;
1352         default:
1353             debug_only(outputf("%s",lirNames[condop]);)
1354             TODO(asm_cond);
1355         }
1356     }
1357
1358 #define SEG(addr) (uint32_t(addr) & 0xf0000000)
1359 #define SEGOFFS(addr) (uint32_t(addr) & 0x0fffffff)
1360
1361
1362     // Check that the branch target is in range
1363     // Generate a trampoline if it isn't
1364     // Emits the branch delay slot instruction
1365     NIns* Assembler::asm_branchtarget(NIns * const targ)
1366     {
1367         bool inrange;
1368         NIns *btarg = targ;
1369
1370         // do initial underrun check here to ensure that inrange test is correct
1371         // allow
1372         if (targ)
1373             underrunProtect(2 * 4);    // branch + delay slot
1374
1375         // MIPS offsets are based on the address of the branch delay slot
1376         // which is the next instruction that will be generated
1377         ptrdiff_t bd = BOFFSET(targ-1);
1378
1379 #if PEDANTIC
1380         inrange = false;
1381 #else
1382         inrange = (targ && isS16(bd));
1383 #endif
1384
1385         // If the branch target is known and in range we can just generate a branch
1386         // Otherwise generate a branch to a trampoline that will be stored in the
1387         // literal area
1388         if (inrange)
1389             NOP();
1390         else {
1391             NIns *tramp = _nSlot;
1392             if (targ) {
1393                 // Can the target be reached by a jump instruction?
1394                 if (SEG(targ) == SEG(tramp)) {
1395                     //  [linkedinstructions]
1396                     //  bxxx trampoline
1397                     //   nop
1398                     //  ...
1399                     // trampoline:
1400                     //  j targ
1401                     //   nop
1402
1403                     underrunProtect(4 * 4);             // keep bxx and trampoline together
1404
1405                     NOP();                              // delay slot
1406
1407                     // NB trampoline code is emitted in the correct order
1408                     trampJ(targ);
1409                     trampNOP();                         // trampoline delay slot
1410
1411                 }
1412                 else {
1413                     //  [linkedinstructions]
1414                     //  bxxx trampoline
1415                     //   lui $at,%hi(targ)
1416                     //  ...
1417                     // trampoline:
1418                     //  addiu $at,%lo(targ)
1419                     //  jr $at
1420                     //   nop
1421
1422                     underrunProtect(5 * 4);             // keep bxx and trampoline together
1423
1424                     LUI(AT,hi(uint32_t(targ)));         // delay slot
1425
1426                     // NB trampoline code is emitted in the correct order
1427                     trampADDIU(AT, AT, lo(uint32_t(targ)));
1428                     trampJR(AT);
1429                     trampNOP();                         // trampoline delay slot
1430
1431                 }
1432             }
1433             else {
1434                 // Worst case is bxxx,lui addiu;jr;nop as above
1435                 // Best case is branch to trampoline can be replaced
1436                 // with branch to target in which case the trampoline will be abandoned
1437                 // Fixup handled in nPatchBranch
1438
1439                 underrunProtect(5 * 4);                 // keep bxx and trampoline together
1440
1441                 NOP();                                  // delay slot
1442
1443                 trampNOP();
1444                 trampNOP();
1445                 trampNOP();
1446
1447             }
1448             btarg = tramp;
1449         }
1450
1451         return btarg;
1452     }
1453
1454
1455     NIns* Assembler::asm_bxx(bool branchOnFalse, LOpcode condop, Register ra, Register rb, NIns * const targ)
1456     {
1457         NIns *patch = NULL;
1458         NIns *btarg = asm_branchtarget(targ);
1459
1460         if (cpu_has_fpu && isCmpDOpcode(condop)) {
1461             // c.xx.d $ra,$rb
1462             // bc1x   btarg
1463             switch (condop) {
1464             case LIR_eqd:
1465                 if (branchOnFalse)
1466                     BC1F(btarg);
1467                 else
1468                     BC1T(btarg);
1469                 patch = _nIns;
1470                 if (cpu_has_fpuhazard)
1471                     NOP();
1472                 C_EQ_D(ra, rb);
1473                 break;
1474             case LIR_ltd:
1475                 if (branchOnFalse)
1476                     BC1F(btarg);
1477                 else
1478                     BC1T(btarg);
1479                 patch = _nIns;
1480                 if (cpu_has_fpuhazard)
1481                     NOP();
1482                 C_LT_D(ra, rb);
1483                 break;
1484             case LIR_gtd:
1485                 if (branchOnFalse)
1486                     BC1F(btarg);
1487                 else
1488                     BC1T(btarg);
1489                 patch = _nIns;
1490                 if (cpu_has_fpuhazard)
1491                     NOP();
1492                 C_LT_D(rb, ra);
1493                 break;
1494             case LIR_led:
1495                 if (branchOnFalse)
1496                     BC1F(btarg);
1497                 else
1498                     BC1T(btarg);
1499                 patch = _nIns;
1500                 if (cpu_has_fpuhazard)
1501                     NOP();
1502                 C_LE_D(ra, rb);
1503                 break;
1504             case LIR_ged:
1505                 if (branchOnFalse)
1506                     BC1F(btarg);
1507                 else
1508                     BC1T(btarg);
1509                 patch = _nIns;
1510                 if (cpu_has_fpuhazard)
1511                     NOP();
1512                 C_LE_D(rb, ra);
1513                 break;
1514             default:
1515                 BADOPCODE(condop);
1516                 break;
1517             }
1518         }
1519         else {
1520             // general case
1521             // s[lg]tu?   $at,($ra,$rb|$rb,$ra)
1522             // b(ne|eq)z  $at,btarg
1523             switch (condop) {
1524             case LIR_eqi:
1525                 // special case
1526                 // b(ne|eq)  $ra,$rb,btarg
1527                 if (branchOnFalse)
1528                     BNE(ra, rb, btarg);
1529                 else {
1530                     if (ra == rb)
1531                         B(btarg);
1532                     else
1533                         BEQ(ra, rb, btarg);
1534                 }
1535                 patch = _nIns;
1536                 break;
1537             case LIR_lti:
1538                 if (branchOnFalse)
1539                     BEQ(AT, ZERO, btarg);
1540                 else
1541                     BNE(AT, ZERO, btarg);
1542                 patch = _nIns;
1543                 SLT(AT, ra, rb);
1544                 break;
1545             case LIR_gti:
1546                 if (branchOnFalse)
1547                     BEQ(AT, ZERO, btarg);
1548                 else
1549                     BNE(AT, ZERO, btarg);
1550                 patch = _nIns;
1551                 SLT(AT, rb, ra);
1552                 break;
1553             case LIR_lei:
1554                 if (branchOnFalse)
1555                     BNE(AT, ZERO, btarg);
1556                 else
1557                     BEQ(AT, ZERO, btarg);
1558                 patch = _nIns;
1559                 SLT(AT, rb, ra);
1560                 break;
1561             case LIR_gei:
1562                 if (branchOnFalse)
1563                     BNE(AT, ZERO, btarg);
1564                 else
1565                     BEQ(AT, ZERO, btarg);
1566                 patch = _nIns;
1567                 SLT(AT, ra, rb);
1568                 break;
1569             case LIR_ltui:
1570                 if (branchOnFalse)
1571                     BEQ(AT, ZERO, btarg);
1572                 else
1573                     BNE(AT, ZERO, btarg);
1574                 patch = _nIns;
1575                 SLTU(AT, ra, rb);
1576                 break;
1577             case LIR_gtui:
1578                 if (branchOnFalse)
1579                     BEQ(AT, ZERO, btarg);
1580                 else
1581                     BNE(AT, ZERO, btarg);
1582                 patch = _nIns;
1583                 SLTU(AT, rb, ra);
1584                 break;
1585             case LIR_leui:
1586                 if (branchOnFalse)
1587                     BNE(AT, ZERO, btarg);
1588                 else
1589                     BEQ(AT, ZERO, btarg);
1590                 patch = _nIns;
1591                 SLT(AT, rb, ra);
1592                 break;
1593             case LIR_geui:
1594                 if (branchOnFalse)
1595                     BNE(AT, ZERO, btarg);
1596                 else
1597                     BEQ(AT, ZERO, btarg);
1598                 patch = _nIns;
1599                 SLTU(AT, ra, rb);
1600                 break;
1601             default:
1602                 BADOPCODE(condop);
1603             }
1604         }
1605         TAG("asm_bxx(branchOnFalse=%d, condop=%s, ra=%s rb=%s targ=%p)",
1606             branchOnFalse, lirNames[condop], gpn(ra), gpn(rb), targ);
1607         return patch;
1608     }
1609
1610     NIns* Assembler::asm_branch_ov(LOpcode op, NIns* target)
1611     {
1612         USE(op);
1613         NanoAssert(target != NULL);
1614
1615         NIns* patch = asm_bxx(true, LIR_eqi, AT, ZERO, target);
1616
1617         TAG("asm_branch_ov(op=%s, target=%p)", lirNames[op], target);
1618         return patch;
1619     }
1620
1621     NIns* Assembler::asm_branch(bool branchOnFalse, LIns *cond, NIns * const targ)
1622     {
1623         NanoAssert(cond->isCmp());
1624         LOpcode condop = cond->opcode();
1625         RegisterMask allow = isCmpDOpcode(condop) ? FpRegs : GpRegs;
1626         LIns *a = cond->oprnd1();
1627         LIns *b = cond->oprnd2();
1628         Register ra = findRegFor(a, allow);
1629         Register rb = (b==a) ? ra : findRegFor(b, allow & ~rmask(ra));
1630
1631         return asm_bxx(branchOnFalse, condop, ra, rb, targ);
1632     }
1633
1634     void Assembler::asm_j(NIns * const targ, bool bdelay)
1635     {
1636         if (targ == NULL) {
1637             NanoAssert(bdelay);
1638             (void) asm_bxx(false, LIR_eqi, ZERO, ZERO, targ);
1639         }
1640         else {
1641             NanoAssert(SEG(targ) == SEG(_nIns));
1642             if (bdelay) {
1643                 underrunProtect(2*4);    // j + delay
1644                 NOP();
1645             }
1646             J(targ);
1647         }
1648         TAG("asm_j(targ=%p) bdelay=%d", targ);
1649     }
1650
1651     void
1652     Assembler::asm_spill(Register rr, int d, bool quad)
1653     {
1654         USE(quad);
1655         NanoAssert(d);
1656         if (IsFpReg(rr)) {
1657             NanoAssert(quad);
1658             asm_ldst64(true, rr, d, FP);
1659         }
1660         else {
1661             NanoAssert(!quad);
1662             asm_ldst(OP_SW, rr, d, FP);
1663         }
1664         TAG("asm_spill(rr=%d, d=%d, quad=%d)", rr, d, quad);
1665     }
1666
1667     void
1668     Assembler::asm_nongp_copy(Register dst, Register src)
1669     {
1670         NanoAssert ((rmask(dst) & FpRegs) && (rmask(src) & FpRegs));
1671         MOV_D(dst, src);
1672         TAG("asm_nongp_copy(dst=%d src=%d)", dst, src);
1673     }
1674
1675     /*
1676      * asm_arg will encode the specified argument according to the current ABI, and
1677      * will update r and stkd as appropriate so that the next argument can be
1678      * encoded.
1679      *
1680      * - doubles are 64-bit aligned.  both in registers and on the stack.
1681      *   If the next available argument register is A1, it is skipped
1682      *   and the double is placed in A2:A3.  If A0:A1 or A2:A3 are not
1683      *   available, the double is placed on the stack, 64-bit aligned.
1684      * - 32-bit arguments are placed in registers and 32-bit aligned
1685      *   on the stack.
1686      */
1687     void
1688     Assembler::asm_arg(ArgType ty, LIns* arg, Register& r, Register& fr, int& stkd)
1689     {
1690         // The stack offset must always be at least aligned to 4 bytes.
1691         NanoAssert((stkd & 3) == 0);
1692
1693         if (ty == ARGTYPE_D) {
1694             // This task is fairly complex and so is delegated to asm_arg_64.
1695             asm_arg_64(arg, r, fr, stkd);
1696         } else {
1697             NanoAssert(ty == ARGTYPE_I || ty == ARGTYPE_UI);
1698             if (stkd < 16) {
1699                 asm_regarg(ty, arg, r);
1700                 fr = Register(fr + 1);
1701                 r = Register(r + 1);
1702             }
1703             else
1704                 asm_stkarg(arg, stkd);
1705             // The o32 ABI calling convention is that if the first arguments
1706             // is not a double, subsequent double values are passed in integer registers
1707             fr = r;
1708             stkd += 4;
1709         }
1710     }
1711
1712     void
1713     Assembler::asm_call(LIns* ins)
1714     {
1715         Register rr;
1716         LOpcode op = ins->opcode();
1717
1718         switch (op) {
1719         case LIR_calld:
1720             NanoAssert(cpu_has_fpu);
1721             rr = FV0;
1722             break;
1723         case LIR_calli:
1724             rr = retRegs[0];
1725             break;
1726         default:
1727             BADOPCODE(op);
1728             return;
1729         }
1730
1731         deprecated_prepResultReg(ins, rmask(rr));
1732
1733         // Do this after we've handled the call result, so we don't
1734         // force the call result to be spilled unnecessarily.
1735
1736         evictScratchRegsExcept(0);
1737
1738         const CallInfo* ci = ins->callInfo();
1739         ArgType argTypes[MAXARGS];
1740         uint32_t argc = ci->getArgTypes(argTypes);
1741         bool indirect = ci->isIndirect();
1742
1743         // FIXME: Put one of the argument moves into the BDS slot
1744
1745         underrunProtect(2*4);    // jalr+delay
1746         NOP();
1747         JALR(T9);
1748
1749         if (!indirect)
1750             // FIXME: If we can tell that we are calling non-PIC
1751             // (ie JIT) code, we could call direct instead of using t9
1752             asm_li(T9, ci->_address);
1753         else
1754             // Indirect call: we assign the address arg to t9
1755             // which matches the o32 ABI for calling functions
1756             asm_regarg(ARGTYPE_P, ins->arg(--argc), T9);
1757
1758         // Encode the arguments, starting at A0 and with an empty argument stack.
1759         Register    r = A0, fr = FA0;
1760         int         stkd = 0;
1761
1762         // Iterate through the argument list and encode each argument according to
1763         // the ABI.
1764         // Note that we loop through the arguments backwards as LIR specifies them
1765         // in reverse order.
1766         while(argc--)
1767             asm_arg(argTypes[argc], ins->arg(argc), r, fr, stkd);
1768
1769         if (stkd > max_out_args)
1770             max_out_args = stkd;
1771         TAG("asm_call(ins=%p{%s})", ins, lirNames[ins->opcode()]);
1772     }
1773
1774     Register
1775     Assembler::nRegisterAllocFromSet(RegisterMask set)
1776     {
1777         Register i;
1778         int n;
1779
1780         // note, deliberate truncation of 64->32 bits
1781         if (set & 0xffffffff) {
1782             // gp reg
1783             n = ffs(int(set));
1784             NanoAssert(n != 0);
1785             i = Register(n - 1);
1786         }
1787         else {
1788             // fp reg
1789             NanoAssert(cpu_has_fpu);
1790             n = ffs(int(set >> 32));
1791             NanoAssert(n != 0);
1792             i = Register(32 + n - 1);
1793         }
1794         _allocator.free &= ~rmask(i);
1795         TAG("nRegisterAllocFromSet(set=%016llx) => %s", set, gpn(i));
1796         return i;
1797     }
1798
1799     void
1800     Assembler::nRegisterResetAll(RegAlloc& regs)
1801     {
1802         regs.clear();
1803         regs.free = GpRegs;
1804         if (cpu_has_fpu)
1805             regs.free |= FpRegs;
1806     }
1807
1808 #define signextend16(s) ((int32_t(s)<<16)>>16)
1809
1810     void
1811     Assembler::nPatchBranch(NIns* branch, NIns* target)
1812     {
1813         uint32_t op = (branch[0] >> 26) & 0x3f;
1814         uint32_t bdoffset = target-(branch+1);
1815
1816         if (op == OP_BEQ || op == OP_BNE ||
1817             ((branch[0] & 0xfffe0000) == ((OP_COP1 << 26) | (COP1_BC << 21)))) {
1818             if (isS16(bdoffset)) {
1819                 // The branch is in range, so just replace the offset in the instruction
1820                 // The trampoline that was allocated is redundant and will remain unused
1821                 branch[0] = (branch[0]  & 0xffff0000) | (bdoffset & 0xffff);
1822             }
1823             else {
1824                 // The branch is pointing to a trampoline. Find out where that is
1825                 NIns *tramp = branch + 1 + (signextend16(branch[0] & 0xffff));
1826                 if (SEG(branch) == SEG(target)) {
1827                     *tramp = J_FORMAT(OP_J,JINDEX(target));
1828                 }
1829                 else {
1830                     // Full 32-bit jump
1831                     // bxx tramp
1832                     //  lui $at,(target>>16)>0xffff
1833                     // ..
1834                     // tramp:
1835                     // ori $at,target & 0xffff
1836                     // jr $at
1837                     //  nop
1838                     branch[1] = U_FORMAT(OP_LUI,0,AT,hi(uint32_t(target)));
1839                     tramp[0] = U_FORMAT(OP_ADDIU,AT,AT,lo(uint32_t(target)));
1840                     tramp[1] = R_FORMAT(OP_SPECIAL,AT,0,0,0,SPECIAL_JR);
1841                 }
1842             }
1843         }
1844         else if (op == OP_J) {
1845             NanoAssert (SEG(branch) == SEG(target));
1846             branch[0] = J_FORMAT(OP_J,JINDEX(target));
1847         }
1848         else
1849             TODO(unknown_patch);
1850         // TAG("nPatchBranch(branch=%p target=%p)", branch, target);
1851     }
1852
1853     void
1854     Assembler::nFragExit(LIns *guard)
1855     {
1856         SideExit *exit = guard->record()->exit;
1857         Fragment *frag = exit->target;
1858         bool destKnown = (frag && frag->fragEntry);
1859
1860         // Generate jump to epilogue and initialize lr.
1861
1862         // If the guard already exists, use a simple jump.
1863         if (destKnown) {
1864             // j     _fragEntry
1865             //  move $v0,$zero
1866             underrunProtect(2 * 4);     // j + branch delay
1867             MOVE(V0, ZERO);
1868             asm_j(frag->fragEntry, false);
1869         }
1870         else {
1871             // Target doesn't exist. Jump to an epilogue for now.
1872             // This can be patched later.
1873             if (!_epilogue)
1874                 _epilogue = genEpilogue();
1875             GuardRecord *lr = guard->record();
1876             // FIXME: _epilogue may be in another segment
1877             // lui    $v0,%hi(lr)
1878             // j      _epilogue
1879             //  addiu $v0,%lo(lr)
1880             underrunProtect(2 * 4);     // j + branch delay
1881             ADDIU(V0, V0, lo(int32_t(lr)));
1882             asm_j(_epilogue, false);
1883             LUI(V0, hi(int32_t(lr)));
1884             lr->jmp = _nIns;
1885         }
1886
1887         // profiling for the exit
1888         verbose_only(
1889             if (_logc->lcbits & LC_FragProfile) {
1890                 // lui   $fp,%hi(profCount)
1891                 // lw    $at,%lo(profCount)(fp)
1892                 // addiu $at,1
1893                 // sw    $at,%lo(profCount)(fp)
1894                 uint32_t profCount = uint32_t(&guard->record()->profCount);
1895                 SW(AT, lo(profCount), FP);
1896                 ADDIU(AT, AT, 1);
1897                 LW(AT, lo(profCount), FP);
1898                 LUI(FP, hi(profCount));
1899             }
1900         )
1901
1902         // Pop the stack frame.
1903         MOVE(SP, FP);
1904
1905         // return value is GuardRecord*
1906         TAG("nFragExit(guard=%p{%s})", guard, lirNames[guard->opcode()]);
1907     }
1908
1909     void
1910     Assembler::nInit(AvmCore*)
1911     {
1912         nHints[LIR_calli]  = rmask(V0);
1913 #if NJ_SOFTFLOAT_SUPPORTED
1914         nHints[LIR_hcalli] = rmask(V1);
1915 #endif
1916         nHints[LIR_calld]  = rmask(FV0);
1917         nHints[LIR_paramp] = PREFER_SPECIAL;
1918     }
1919
1920     void Assembler::nBeginAssembly()
1921     {
1922         max_out_args = 16;        // Always reserve space for a0-a3
1923     }
1924
1925     // Increment the 32-bit profiling counter at pCtr, without
1926     // changing any registers.
1927     verbose_only(
1928     void Assembler::asm_inc_m32(uint32_t* /*pCtr*/)
1929     {
1930         // TODO: implement this
1931     }
1932     )
1933
1934     void
1935     Assembler::nativePageReset(void)
1936     {
1937         _nSlot = 0;
1938         _nExitSlot = 0;
1939         TAG("nativePageReset()");
1940     }
1941
1942     void
1943     Assembler::nativePageSetup(void)
1944     {
1945         NanoAssert(!_inExit);
1946         if (!_nIns)
1947             codeAlloc(codeStart, codeEnd, _nIns verbose_only(, codeBytes));
1948         if (!_nExitIns)
1949             codeAlloc(exitStart, exitEnd, _nExitIns verbose_only(, exitBytes));
1950
1951         // constpool starts at bottom of page and moves up
1952         // code starts at top of page and goes down,
1953
1954         if (!_nSlot)
1955             _nSlot = codeStart;
1956         if (!_nExitSlot)
1957             _nExitSlot = exitStart;
1958
1959         TAG("nativePageSetup()");
1960     }
1961
1962
1963     NIns*
1964     Assembler::genPrologue(void)
1965     {
1966         /*
1967          * Use a non standard fp because we don't know the final framesize until now
1968          * addiu  $sp,-FRAMESIZE
1969          * sw      $ra,RA_OFFSET($sp)
1970          * sw      $fp,FP_OFFSET($sp)
1971          * move   $fp,$sp
1972          * addu      $sp,-stackNeeded
1973          */
1974
1975         uint32_t stackNeeded = max_out_args + STACK_GRANULARITY * _activation.stackSlotsNeeded();
1976         uint32_t amt = alignUp(stackNeeded, NJ_ALIGN_STACK);
1977
1978         if (amt) {
1979             if (isS16(-amt))
1980                 ADDIU(SP, SP, -amt);
1981             else {
1982                 ADDU(SP, SP, AT);
1983                 asm_li(AT, -amt);
1984             }
1985         }
1986
1987         NIns *patchEntry = _nIns; // FIXME: who uses this value and where should it point?
1988
1989         MOVE(FP, SP);
1990         SW(FP, FP_OFFSET, SP);
1991         SW(RA, RA_OFFSET, SP);        // No need to save for leaf functions
1992         ADDIU(SP, SP, -FRAMESIZE);
1993
1994         TAG("genPrologue()");
1995
1996         return patchEntry;
1997     }
1998
1999     NIns*
2000     Assembler::genEpilogue(void)
2001     {
2002         /*
2003          * move    $sp,$fp
2004          * lw      $ra,RA_OFFSET($sp)
2005          * lw      $fp,FP_OFFSET($sp)
2006          * j       $ra
2007          * addiu   $sp,FRAMESIZE
2008          */
2009         ADDIU(SP, SP, FRAMESIZE);
2010         JR(RA);
2011         LW(FP, FP_OFFSET, SP);
2012         LW(RA, RA_OFFSET, SP);
2013         MOVE(SP, FP);
2014
2015         TAG("genEpilogue()");
2016
2017         return _nIns;
2018     }
2019
2020     RegisterMask
2021     Assembler::nHint(LIns* ins)
2022     {
2023         NanoAssert(ins->isop(LIR_paramp));
2024         RegisterMask prefer = 0;
2025         // FIXME: FLOAT parameters?
2026         if (ins->paramKind() == 0)
2027             if (ins->paramArg() < 4)
2028                 prefer = rmask(argRegs[ins->paramArg()]);
2029         return prefer;
2030     }
2031
2032     void
2033     Assembler::underrunProtect(int bytes)
2034     {
2035         NanoAssertMsg(bytes<=LARGEST_UNDERRUN_PROT, "constant LARGEST_UNDERRUN_PROT is too small");
2036         NanoAssert(_nSlot != 0);
2037         uintptr_t top = uintptr_t(_nSlot);
2038         uintptr_t pc = uintptr_t(_nIns);
2039         if (pc - bytes < top) {
2040             verbose_only(verbose_outputf("        %p:", _nIns);)
2041             NIns* target = _nIns;
2042             codeAlloc(codeStart, codeEnd, _nIns verbose_only(, codeBytes));
2043
2044             _nSlot = codeStart;
2045
2046             // _nSlot points to the first empty position in the new code block
2047             // _nIns points just past the last empty position.
2048             asm_j(target, true);
2049         }
2050     }
2051
2052     void
2053     Assembler::swapCodeChunks() {
2054         if (!_nExitIns)
2055             codeAlloc(exitStart, exitEnd, _nExitIns verbose_only(, exitBytes));
2056         if (!_nExitSlot)
2057             _nExitSlot = exitStart;
2058         SWAP(NIns*, _nIns, _nExitIns);
2059         SWAP(NIns*, _nSlot, _nExitSlot);
2060         SWAP(NIns*, codeStart, exitStart);
2061         SWAP(NIns*, codeEnd, exitEnd);
2062         verbose_only( SWAP(size_t, codeBytes, exitBytes); )
2063     }
2064 }
2065
2066 #endif // FEATURE_NANOJIT && NANOJIT_MIPS