FFI: Compile calls to stdcall, fastcall and vararg functions.
[luajit-2.0.git] / src / lj_asm_ppc.h
blob196ca2edb97518df041751b5f22f209e01c738cf
1 /*
2 ** PPC IR assembler (SSA IR -> machine code).
3 ** Copyright (C) 2005-2011 Mike Pall. See Copyright Notice in luajit.h
4 */
6 /* -- Register allocator extensions --------------------------------------- */
8 /* Allocate a register with a hint. */
9 static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow)
11 Reg r = IR(ref)->r;
12 if (ra_noreg(r)) {
13 if (!ra_hashint(r) && !iscrossref(as, ref))
14 ra_sethint(IR(ref)->r, hint); /* Propagate register hint. */
15 r = ra_allocref(as, ref, allow);
17 ra_noweak(as, r);
18 return r;
21 /* Allocate two source registers for three-operand instructions. */
22 static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow)
24 IRIns *irl = IR(ir->op1), *irr = IR(ir->op2);
25 Reg left = irl->r, right = irr->r;
26 if (ra_hasreg(left)) {
27 ra_noweak(as, left);
28 if (ra_noreg(right))
29 right = ra_allocref(as, ir->op2, rset_exclude(allow, left));
30 else
31 ra_noweak(as, right);
32 } else if (ra_hasreg(right)) {
33 ra_noweak(as, right);
34 left = ra_allocref(as, ir->op1, rset_exclude(allow, right));
35 } else if (ra_hashint(right)) {
36 right = ra_allocref(as, ir->op2, allow);
37 left = ra_alloc1(as, ir->op1, rset_exclude(allow, right));
38 } else {
39 left = ra_allocref(as, ir->op1, allow);
40 right = ra_alloc1(as, ir->op2, rset_exclude(allow, left));
42 return left | (right << 8);
45 /* -- Guard handling ------------------------------------------------------ */
47 /* Setup exit stubs after the end of each trace. */
48 static void asm_exitstub_setup(ASMState *as, ExitNo nexits)
50 ExitNo i;
51 MCode *mxp = as->mctop;
52 /* 1: mflr r0; bl ->vm_exit_handler; li r0, traceno; bl <1; bl <1; ... */
53 for (i = nexits-1; (int32_t)i >= 0; i--)
54 *--mxp = PPCI_BL|(((-3-i)&0x00ffffffu)<<2);
55 *--mxp = PPCI_LI|PPCF_T(RID_TMP)|as->T->traceno; /* Read by exit handler. */
56 mxp--;
57 *mxp = PPCI_BL|((((MCode *)(void *)lj_vm_exit_handler-mxp)&0x00ffffffu)<<2);
58 *--mxp = PPCI_MFLR|PPCF_T(RID_TMP);
59 as->mctop = mxp;
62 static MCode *asm_exitstub_addr(ASMState *as, ExitNo exitno)
64 /* Keep this in-sync with exitstub_trace_addr(). */
65 return as->mctop + exitno + 3;
68 /* Emit conditional branch to exit for guard. */
69 static void asm_guardcc(ASMState *as, PPCCC cc)
71 MCode *target = asm_exitstub_addr(as, as->snapno);
72 MCode *p = as->mcp;
73 if (LJ_UNLIKELY(p == as->invmcp)) {
74 as->loopinv = 1;
75 *p = PPCI_B | (((target-p) & 0x00ffffffu) << 2);
76 emit_condbranch(as, PPCI_BC, cc^4, p);
77 return;
79 emit_condbranch(as, PPCI_BC, cc, target);
82 /* -- Operand fusion ------------------------------------------------------ */
84 /* Limit linear search to this distance. Avoids O(n^2) behavior. */
85 #define CONFLICT_SEARCH_LIM 31
87 /* Check if there's no conflicting instruction between curins and ref. */
88 static int noconflict(ASMState *as, IRRef ref, IROp conflict)
90 IRIns *ir = as->ir;
91 IRRef i = as->curins;
92 if (i > ref + CONFLICT_SEARCH_LIM)
93 return 0; /* Give up, ref is too far away. */
94 while (--i > ref)
95 if (ir[i].o == conflict)
96 return 0; /* Conflict found. */
97 return 1; /* Ok, no conflict. */
100 /* Fuse the array base of colocated arrays. */
101 static int32_t asm_fuseabase(ASMState *as, IRRef ref)
103 IRIns *ir = IR(ref);
104 if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE &&
105 !neverfuse(as) && noconflict(as, ref, IR_NEWREF))
106 return (int32_t)sizeof(GCtab);
107 return 0;
110 /* Indicates load/store indexed is ok. */
111 #define AHUREF_LSX ((int32_t)0x80000000)
113 /* Fuse array/hash/upvalue reference into register+offset operand. */
114 static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow)
116 IRIns *ir = IR(ref);
117 if (ra_noreg(ir->r)) {
118 if (ir->o == IR_AREF) {
119 if (mayfuse(as, ref)) {
120 if (irref_isk(ir->op2)) {
121 IRRef tab = IR(ir->op1)->op1;
122 int32_t ofs = asm_fuseabase(as, tab);
123 IRRef refa = ofs ? tab : ir->op1;
124 ofs += 8*IR(ir->op2)->i;
125 if (checki16(ofs)) {
126 *ofsp = ofs;
127 return ra_alloc1(as, refa, allow);
130 if (*ofsp == AHUREF_LSX) {
131 Reg base = ra_alloc1(as, ir->op1, allow);
132 Reg idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base));
133 return base | (idx << 8);
136 } else if (ir->o == IR_HREFK) {
137 if (mayfuse(as, ref)) {
138 int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node));
139 if (checki16(ofs)) {
140 *ofsp = ofs;
141 return ra_alloc1(as, ir->op1, allow);
144 } else if (ir->o == IR_UREFC) {
145 if (irref_isk(ir->op1)) {
146 GCfunc *fn = ir_kfunc(IR(ir->op1));
147 int32_t ofs = i32ptr(&gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.tv);
148 int32_t jgl = (intptr_t)J2G(as->J);
149 if ((uint32_t)(ofs-jgl) < 65536) {
150 *ofsp = ofs-jgl-32768;
151 return RID_JGL;
152 } else {
153 *ofsp = (int16_t)ofs;
154 return ra_allock(as, ofs-(int16_t)ofs, allow);
159 *ofsp = 0;
160 return ra_alloc1(as, ref, allow);
163 /* Fuse XLOAD/XSTORE reference into load/store operand. */
164 static void asm_fusexref(ASMState *as, PPCIns pi, Reg rt, IRRef ref,
165 RegSet allow)
167 IRIns *ir = IR(ref);
168 int32_t ofs = 0;
169 Reg base;
170 if (ra_noreg(ir->r) && mayfuse(as, ref)) {
171 if (ir->o == IR_ADD) {
172 if (irref_isk(ir->op2) && (ofs = IR(ir->op2)->i, checki16(ofs))) {
173 ref = ir->op1;
174 } else {
175 Reg right, left = ra_alloc2(as, ir, allow);
176 right = (left >> 8); left &= 255;
177 emit_fab(as, PPCI_LWZX | ((pi >> 20) & 0x780), rt, left, right);
178 return;
180 } else if (ir->o == IR_STRREF) {
181 ofs = (int32_t)sizeof(GCstr);
182 if (irref_isk(ir->op2)) {
183 ofs += IR(ir->op2)->i;
184 ref = ir->op1;
185 } else if (irref_isk(ir->op1)) {
186 ofs += IR(ir->op1)->i;
187 ref = ir->op2;
188 } else {
189 /* NYI: Fuse ADD with constant. */
190 Reg right, left = ra_alloc2(as, ir, allow);
191 right = (left >> 8); left &= 255;
192 emit_fai(as, pi, rt, rt, ofs);
193 emit_tab(as, PPCI_ADD, rt, left, right);
194 return;
196 if (!checki16(ofs)) {
197 Reg left = ra_alloc1(as, ref, allow);
198 Reg right = ra_allock(as, ofs, rset_exclude(allow, left));
199 emit_fab(as, PPCI_LWZX | ((pi >> 20) & 0x780), rt, left, right);
200 return;
204 base = ra_alloc1(as, ref, allow);
205 emit_fai(as, pi, rt, base, ofs);
208 /* Fuse XLOAD/XSTORE reference into indexed-only load/store operand. */
209 static void asm_fusexrefx(ASMState *as, PPCIns pi, Reg rt, IRRef ref,
210 RegSet allow)
212 IRIns *ira = IR(ref);
213 Reg right, left;
214 if (mayfuse(as, ref) && ira->o == IR_ADD && ra_noreg(ira->r)) {
215 left = ra_alloc2(as, ira, allow);
216 right = (left >> 8); left &= 255;
217 } else {
218 right = ra_alloc1(as, ref, allow);
219 left = RID_R0;
221 emit_tab(as, pi, rt, left, right);
224 /* Fuse to multiply-add/sub instruction. */
225 static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir)
227 IRRef lref = ir->op1, rref = ir->op2;
228 IRIns *irm;
229 if (lref != rref &&
230 ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) &&
231 ra_noreg(irm->r)) ||
232 (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) &&
233 (rref = lref, pi = pir, ra_noreg(irm->r))))) {
234 Reg dest = ra_dest(as, ir, RSET_FPR);
235 Reg add = ra_alloc1(as, rref, RSET_FPR);
236 Reg right, left = ra_alloc2(as, irm, rset_exclude(RSET_FPR, add));
237 right = (left >> 8); left &= 255;
238 emit_facb(as, pi, dest, left, right, add);
239 return 1;
241 return 0;
244 /* -- Calls --------------------------------------------------------------- */
246 /* Generate a call to a C function. */
247 static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
249 uint32_t n, nargs = CCI_NARGS(ci);
250 int32_t ofs = 8;
251 Reg gpr = REGARG_FIRSTGPR, fpr = REGARG_FIRSTFPR;
252 if ((void *)ci->func)
253 emit_call(as, (void *)ci->func);
254 for (n = 0; n < nargs; n++) { /* Setup args. */
255 IRRef ref = args[n];
256 if (ref) {
257 IRIns *ir = IR(ref);
258 if (irt_isfp(ir->t)) {
259 if (fpr <= REGARG_LASTFPR) {
260 lua_assert(rset_test(as->freeset, fpr)); /* Already evicted. */
261 ra_leftov(as, fpr, ref);
262 fpr++;
263 } else {
264 Reg r = ra_alloc1(as, ref, RSET_FPR);
265 if (irt_isnum(ir->t)) ofs = (ofs + 4) & ~4;
266 emit_spstore(as, ir, r, ofs);
267 ofs += irt_isnum(ir->t) ? 8 : 4;
269 } else {
270 if (gpr <= REGARG_LASTGPR) {
271 lua_assert(rset_test(as->freeset, gpr)); /* Already evicted. */
272 ra_leftov(as, gpr, ref);
273 gpr++;
274 } else {
275 Reg r = ra_alloc1(as, ref, RSET_GPR);
276 emit_spstore(as, ir, r, ofs);
277 ofs += 4;
280 } else {
281 if (gpr <= REGARG_LASTGPR)
282 gpr++;
283 else
284 ofs += 4;
287 if ((ci->flags & CCI_VARARG)) /* Vararg calls need to know about FPR use. */
288 emit_tab(as, fpr == REGARG_FIRSTFPR ? PPCI_CRXOR : PPCI_CREQV, 6, 6, 6);
291 /* Setup result reg/sp for call. Evict scratch regs. */
292 static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
294 RegSet drop = RSET_SCRATCH;
295 int hiop = ((ir+1)->o == IR_HIOP);
296 if ((ci->flags & CCI_NOFPRCLOBBER))
297 drop &= ~RSET_FPR;
298 if (ra_hasreg(ir->r))
299 rset_clear(drop, ir->r); /* Dest reg handled below. */
300 if (hiop && ra_hasreg((ir+1)->r))
301 rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */
302 ra_evictset(as, drop); /* Evictions must be performed first. */
303 if (ra_used(ir)) {
304 lua_assert(!irt_ispri(ir->t));
305 if (irt_isfp(ir->t)) {
306 if ((ci->flags & CCI_CASTU64)) {
307 /* Use spill slot or temp slots. */
308 int32_t ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP;
309 Reg dest = ir->r;
310 if (ra_hasreg(dest)) {
311 ra_free(as, dest);
312 ra_modified(as, dest);
313 emit_fai(as, PPCI_LFD, dest, RID_SP, ofs);
315 emit_tai(as, PPCI_STW, RID_RETHI, RID_SP, ofs);
316 emit_tai(as, PPCI_STW, RID_RETLO, RID_SP, ofs+4);
317 } else {
318 ra_destreg(as, ir, RID_FPRET);
320 } else if (hiop) {
321 ra_destpair(as, ir);
322 } else {
323 ra_destreg(as, ir, RID_RET);
328 static void asm_call(ASMState *as, IRIns *ir)
330 IRRef args[CCI_NARGS_MAX];
331 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
332 asm_collectargs(as, ir, ci, args);
333 asm_setupresult(as, ir, ci);
334 asm_gencall(as, ci, args);
337 static void asm_callx(ASMState *as, IRIns *ir)
339 IRRef args[CCI_NARGS_MAX];
340 CCallInfo ci;
341 IRRef func;
342 IRIns *irf;
343 ci.flags = asm_callx_flags(as, ir);
344 asm_collectargs(as, ir, &ci, args);
345 asm_setupresult(as, ir, &ci);
346 func = ir->op2; irf = IR(func);
347 if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); }
348 if (irref_isk(func)) { /* Call to constant address. */
349 ci.func = (ASMFunction)(void *)(irf->i);
350 } else { /* Need a non-argument register for indirect calls. */
351 RegSet allow = RSET_GPR & ~RSET_RANGE(RID_R0, REGARG_LASTGPR+1);
352 Reg freg = ra_alloc1(as, func, allow);
353 *--as->mcp = PPCI_BCTRL;
354 *--as->mcp = PPCI_MTCTR | PPCF_T(freg);
355 ci.func = (ASMFunction)(void *)0;
357 asm_gencall(as, &ci, args);
360 static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
362 const CCallInfo *ci = &lj_ir_callinfo[id];
363 IRRef args[2];
364 args[0] = ir->op1;
365 args[1] = ir->op2;
366 asm_setupresult(as, ir, ci);
367 asm_gencall(as, ci, args);
370 /* -- Returns ------------------------------------------------------------- */
372 /* Return to lower frame. Guard that it goes to the right spot. */
373 static void asm_retf(ASMState *as, IRIns *ir)
375 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
376 void *pc = ir_kptr(IR(ir->op2));
377 int32_t delta = 1+bc_a(*((const BCIns *)pc - 1));
378 as->topslot -= (BCReg)delta;
379 if ((int32_t)as->topslot < 0) as->topslot = 0;
380 emit_setgl(as, base, jit_base);
381 emit_addptr(as, base, -8*delta);
382 asm_guardcc(as, CC_NE);
383 emit_ab(as, PPCI_CMPW, RID_TMP,
384 ra_allock(as, i32ptr(pc), rset_exclude(RSET_GPR, base)));
385 emit_tai(as, PPCI_LWZ, RID_TMP, base, -8);
388 /* -- Type conversions ---------------------------------------------------- */
390 static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
392 RegSet allow = RSET_FPR;
393 Reg tmp = ra_scratch(as, rset_clear(allow, left));
394 Reg fbias = ra_scratch(as, rset_clear(allow, tmp));
395 Reg dest = ra_dest(as, ir, RSET_GPR);
396 Reg hibias = ra_allock(as, 0x43300000, rset_exclude(RSET_GPR, dest));
397 asm_guardcc(as, CC_NE);
398 emit_fab(as, PPCI_FCMPU, 0, tmp, left);
399 emit_fab(as, PPCI_FSUB, tmp, tmp, fbias);
400 emit_fai(as, PPCI_LFD, tmp, RID_SP, SPOFS_TMP);
401 emit_tai(as, PPCI_STW, RID_TMP, RID_SP, SPOFS_TMPLO);
402 emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI);
403 emit_asi(as, PPCI_XORIS, RID_TMP, dest, 0x8000);
404 emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
405 emit_lsptr(as, PPCI_LFS, (fbias & 31),
406 (void *)lj_ir_k64_find(as->J, U64x(59800004,59800000)),
407 RSET_GPR);
408 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
409 emit_fb(as, PPCI_FCTIWZ, tmp, left);
412 static void asm_tobit(ASMState *as, IRIns *ir)
414 RegSet allow = RSET_FPR;
415 Reg dest = ra_dest(as, ir, RSET_GPR);
416 Reg left = ra_alloc1(as, ir->op1, allow);
417 Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left));
418 Reg tmp = ra_scratch(as, rset_clear(allow, right));
419 emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
420 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
421 emit_fab(as, PPCI_FADD, tmp, left, right);
424 static void asm_conv(ASMState *as, IRIns *ir)
426 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
427 int stfp = (st == IRT_NUM || st == IRT_FLOAT);
428 IRRef lref = ir->op1;
429 lua_assert(irt_type(ir->t) != st);
430 lua_assert(!(irt_isint64(ir->t) ||
431 (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */
432 if (irt_isfp(ir->t)) {
433 Reg dest = ra_dest(as, ir, RSET_FPR);
434 if (stfp) { /* FP to FP conversion. */
435 if (st == IRT_NUM) /* double -> float conversion. */
436 emit_fb(as, PPCI_FRSP, dest, ra_alloc1(as, lref, RSET_FPR));
437 else /* float -> double conversion is a no-op on PPC. */
438 ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */
439 } else { /* Integer to FP conversion. */
440 /* IRT_INT: Flip hibit, bias with 2^52, subtract 2^52+2^31. */
441 /* IRT_U32: Bias with 2^52, subtract 2^52. */
442 RegSet allow = RSET_GPR;
443 Reg left = ra_alloc1(as, lref, allow);
444 Reg hibias = ra_allock(as, 0x43300000, rset_clear(allow, left));
445 Reg fbias = ra_scratch(as, rset_exclude(RSET_FPR, dest));
446 const float *kbias;
447 if (irt_isfloat(ir->t)) emit_fb(as, PPCI_FRSP, dest, dest);
448 emit_fab(as, PPCI_FSUB, dest, dest, fbias);
449 emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP);
450 kbias = (const float *)lj_ir_k64_find(as->J, U64x(59800004,59800000));
451 if (st == IRT_U32) kbias++;
452 emit_lsptr(as, PPCI_LFS, (fbias & 31), (void *)kbias,
453 rset_clear(allow, hibias));
454 emit_tai(as, PPCI_STW, st == IRT_U32 ? left : RID_TMP,
455 RID_SP, SPOFS_TMPLO);
456 emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI);
457 if (st != IRT_U32) emit_asi(as, PPCI_XORIS, RID_TMP, left, 0x8000);
459 } else if (stfp) { /* FP to integer conversion. */
460 if (irt_isguard(ir->t)) {
461 /* Checked conversions are only supported from number to int. */
462 lua_assert(irt_isint(ir->t) && st == IRT_NUM);
463 asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
464 } else {
465 Reg dest = ra_dest(as, ir, RSET_GPR);
466 Reg left = ra_alloc1(as, lref, RSET_FPR);
467 Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
468 if (irt_isu32(ir->t)) {
469 /* Convert both x and x-2^31 to int and merge results. */
470 Reg tmpi = ra_scratch(as, rset_exclude(RSET_GPR, dest));
471 emit_asb(as, PPCI_OR, dest, dest, tmpi); /* Select with mask idiom. */
472 emit_asb(as, PPCI_AND, tmpi, tmpi, RID_TMP);
473 emit_asb(as, PPCI_ANDC, dest, dest, RID_TMP);
474 emit_tai(as, PPCI_LWZ, tmpi, RID_SP, SPOFS_TMPLO); /* tmp = (int)(x) */
475 emit_tai(as, PPCI_ADDIS, dest, dest, 0x8000); /* dest += 2^31 */
476 emit_asb(as, PPCI_SRAWI, RID_TMP, dest, 31); /* mask = -(dest < 0) */
477 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
478 emit_tai(as, PPCI_LWZ, dest,
479 RID_SP, SPOFS_TMPLO); /* dest = (int)(x-2^31) */
480 emit_fb(as, PPCI_FCTIWZ, tmp, left);
481 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
482 emit_fb(as, PPCI_FCTIWZ, tmp, tmp);
483 emit_fab(as, PPCI_FSUB, tmp, left, tmp);
484 emit_lsptr(as, PPCI_LFS, (tmp & 31),
485 (void *)lj_ir_k64_find(as->J, U64x(4f000000,00000000)),
486 RSET_GPR);
487 } else {
488 emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
489 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
490 emit_fb(as, PPCI_FCTIWZ, tmp, left);
493 } else {
494 Reg dest = ra_dest(as, ir, RSET_GPR);
495 if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */
496 Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
497 lua_assert(irt_isint(ir->t) || irt_isu32(ir->t));
498 if ((ir->op2 & IRCONV_SEXT))
499 emit_as(as, st == IRT_I8 ? PPCI_EXTSB : PPCI_EXTSH, dest, left);
500 else
501 emit_rot(as, PPCI_RLWINM, dest, left, 0, st == IRT_U8 ? 24 : 16, 31);
502 } else { /* 32/64 bit integer conversions. */
503 /* Only need to handle 32/32 bit no-op (cast) on 32 bit archs. */
504 ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */
509 #if LJ_HASFFI
510 static void asm_conv64(ASMState *as, IRIns *ir)
512 IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
513 IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
514 IRCallID id;
515 const CCallInfo *ci;
516 IRRef args[2];
517 args[0] = ir->op1;
518 args[1] = (ir-1)->op1;
519 if (st == IRT_NUM || st == IRT_FLOAT) {
520 id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
521 ir--;
522 } else {
523 id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
525 ci = &lj_ir_callinfo[id];
526 asm_setupresult(as, ir, ci);
527 asm_gencall(as, ci, args);
529 #endif
531 static void asm_strto(ASMState *as, IRIns *ir)
533 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_tonum];
534 IRRef args[2];
535 int32_t ofs;
536 RegSet drop = RSET_SCRATCH;
537 if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */
538 ra_evictset(as, drop);
539 asm_guardcc(as, CC_EQ);
540 emit_ai(as, PPCI_CMPWI, RID_RET, 0); /* Test return status. */
541 args[0] = ir->op1; /* GCstr *str */
542 args[1] = ASMREF_TMP1; /* TValue *n */
543 asm_gencall(as, ci, args);
544 /* Store the result to the spill slot or temp slots. */
545 ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP;
546 emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs);
549 /* Get pointer to TValue. */
550 static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
552 IRIns *ir = IR(ref);
553 if (irt_isnum(ir->t)) {
554 if (irref_isk(ref)) /* Use the number constant itself as a TValue. */
555 ra_allockreg(as, i32ptr(ir_knum(ir)), dest);
556 else /* Otherwise force a spill and use the spill slot. */
557 emit_tai(as, PPCI_ADDI, dest, RID_SP, ra_spill(as, ir));
558 } else {
559 /* Otherwise use g->tmptv to hold the TValue. */
560 RegSet allow = rset_exclude(RSET_GPR, dest);
561 Reg type;
562 emit_tai(as, PPCI_ADDI, dest, RID_JGL, offsetof(global_State, tmptv)-32768);
563 if (!irt_ispri(ir->t)) {
564 Reg src = ra_alloc1(as, ref, allow);
565 emit_setgl(as, src, tmptv.gcr);
567 type = ra_allock(as, irt_toitype(ir->t), allow);
568 emit_setgl(as, type, tmptv.it);
572 static void asm_tostr(ASMState *as, IRIns *ir)
574 IRRef args[2];
575 args[0] = ASMREF_L;
576 as->gcsteps++;
577 if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) {
578 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
579 args[1] = ASMREF_TMP1; /* const lua_Number * */
580 asm_setupresult(as, ir, ci); /* GCstr * */
581 asm_gencall(as, ci, args);
582 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
583 } else {
584 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
585 args[1] = ir->op1; /* int32_t k */
586 asm_setupresult(as, ir, ci); /* GCstr * */
587 asm_gencall(as, ci, args);
591 /* -- Memory references --------------------------------------------------- */
593 static void asm_aref(ASMState *as, IRIns *ir)
595 Reg dest = ra_dest(as, ir, RSET_GPR);
596 Reg idx, base;
597 if (irref_isk(ir->op2)) {
598 IRRef tab = IR(ir->op1)->op1;
599 int32_t ofs = asm_fuseabase(as, tab);
600 IRRef refa = ofs ? tab : ir->op1;
601 ofs += 8*IR(ir->op2)->i;
602 if (checki16(ofs)) {
603 base = ra_alloc1(as, refa, RSET_GPR);
604 emit_tai(as, PPCI_ADDI, dest, base, ofs);
605 return;
608 base = ra_alloc1(as, ir->op1, RSET_GPR);
609 idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base));
610 emit_tab(as, PPCI_ADD, dest, RID_TMP, base);
611 emit_slwi(as, RID_TMP, idx, 3);
614 /* Inlined hash lookup. Specialized for key type and for const keys.
615 ** The equivalent C code is:
616 ** Node *n = hashkey(t, key);
617 ** do {
618 ** if (lj_obj_equal(&n->key, key)) return &n->val;
619 ** } while ((n = nextnode(n)));
620 ** return niltv(L);
622 static void asm_href(ASMState *as, IRIns *ir, IROp merge)
624 RegSet allow = RSET_GPR;
625 int destused = ra_used(ir);
626 Reg dest = ra_dest(as, ir, allow);
627 Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
628 Reg key = RID_NONE, tmp1 = RID_TMP, tmp2;
629 Reg tisnum = RID_NONE, tmpnum = RID_NONE;
630 IRRef refkey = ir->op2;
631 IRIns *irkey = IR(refkey);
632 IRType1 kt = irkey->t;
633 uint32_t khash;
634 MCLabel l_end, l_loop, l_next;
636 rset_clear(allow, tab);
637 if (irt_isnum(kt)) {
638 key = ra_alloc1(as, refkey, RSET_FPR);
639 tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key));
640 tisnum = ra_allock(as, (int32_t)LJ_TISNUM, allow);
641 rset_clear(allow, tisnum);
642 } else if (!irt_ispri(kt)) {
643 key = ra_alloc1(as, refkey, allow);
644 rset_clear(allow, key);
646 tmp2 = ra_scratch(as, allow);
647 rset_clear(allow, tmp2);
649 /* Key not found in chain: jump to exit (if merged) or load niltv. */
650 l_end = emit_label(as);
651 as->invmcp = NULL;
652 if (merge == IR_NE)
653 asm_guardcc(as, CC_EQ);
654 else if (destused)
655 emit_loada(as, dest, niltvg(J2G(as->J)));
657 /* Follow hash chain until the end. */
658 l_loop = --as->mcp;
659 emit_ai(as, PPCI_CMPWI, dest, 0);
660 emit_tai(as, PPCI_LWZ, dest, dest, (int32_t)offsetof(Node, next));
661 l_next = emit_label(as);
663 /* Type and value comparison. */
664 if (merge == IR_EQ)
665 asm_guardcc(as, CC_EQ);
666 else
667 emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end);
668 if (irt_isnum(kt)) {
669 emit_fab(as, PPCI_FCMPU, 0, tmpnum, key);
670 emit_condbranch(as, PPCI_BC, CC_GE, l_next);
671 emit_ab(as, PPCI_CMPLW, tmp1, tisnum);
672 emit_fai(as, PPCI_LFD, tmpnum, dest, (int32_t)offsetof(Node, key.n));
673 } else {
674 if (!irt_ispri(kt)) {
675 emit_ab(as, PPCI_CMPW, tmp2, key);
676 emit_condbranch(as, PPCI_BC, CC_NE, l_next);
678 emit_ai(as, PPCI_CMPWI, tmp1, irt_toitype(irkey->t));
679 if (!irt_ispri(kt))
680 emit_tai(as, PPCI_LWZ, tmp2, dest, (int32_t)offsetof(Node, key.gcr));
682 emit_tai(as, PPCI_LWZ, tmp1, dest, (int32_t)offsetof(Node, key.it));
683 *l_loop = PPCI_BC | PPCF_Y | PPCF_CC(CC_NE) |
684 (((char *)as->mcp-(char *)l_loop) & 0xffffu);
686 /* Load main position relative to tab->node into dest. */
687 khash = irref_isk(refkey) ? ir_khash(irkey) : 1;
688 if (khash == 0) {
689 emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node));
690 } else {
691 Reg tmphash = tmp1;
692 if (irref_isk(refkey))
693 tmphash = ra_allock(as, khash, allow);
694 emit_tab(as, PPCI_ADD, dest, dest, tmp1);
695 emit_tai(as, PPCI_MULLI, tmp1, tmp1, sizeof(Node));
696 emit_asb(as, PPCI_AND, tmp1, tmp2, tmphash);
697 emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node));
698 emit_tai(as, PPCI_LWZ, tmp2, tab, (int32_t)offsetof(GCtab, hmask));
699 if (irref_isk(refkey)) {
700 /* Nothing to do. */
701 } else if (irt_isstr(kt)) {
702 emit_tai(as, PPCI_LWZ, tmp1, key, (int32_t)offsetof(GCstr, hash));
703 } else { /* Must match with hash*() in lj_tab.c. */
704 emit_tab(as, PPCI_SUBF, tmp1, tmp2, tmp1);
705 emit_rotlwi(as, tmp2, tmp2, HASH_ROT3);
706 emit_asb(as, PPCI_XOR, tmp1, tmp1, tmp2);
707 emit_rotlwi(as, tmp1, tmp1, (HASH_ROT2+HASH_ROT1)&31);
708 emit_tab(as, PPCI_SUBF, tmp2, dest, tmp2);
709 if (irt_isnum(kt)) {
710 int32_t ofs = ra_spill(as, irkey);
711 emit_asb(as, PPCI_XOR, tmp2, tmp2, tmp1);
712 emit_rotlwi(as, dest, tmp1, HASH_ROT1);
713 emit_tab(as, PPCI_ADD, tmp1, tmp1, tmp1);
714 emit_tai(as, PPCI_LWZ, tmp2, RID_SP, ofs+4);
715 emit_tai(as, PPCI_LWZ, tmp1, RID_SP, ofs);
716 } else {
717 emit_asb(as, PPCI_XOR, tmp2, key, tmp1);
718 emit_rotlwi(as, dest, tmp1, HASH_ROT1);
719 emit_tai(as, PPCI_ADDI, tmp1, tmp2, HASH_BIAS);
720 emit_tai(as, PPCI_ADDIS, tmp2, key, (HASH_BIAS + 32768)>>16);
726 static void asm_hrefk(ASMState *as, IRIns *ir)
728 IRIns *kslot = IR(ir->op2);
729 IRIns *irkey = IR(kslot->op1);
730 int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node));
731 int32_t kofs = ofs + (int32_t)offsetof(Node, key);
732 Reg dest = (ra_used(ir)||ofs > 65535) ? ra_dest(as, ir, RSET_GPR) : RID_NONE;
733 Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
734 Reg key = RID_NONE, type = RID_TMP, idx = node;
735 RegSet allow = rset_exclude(RSET_GPR, node);
736 lua_assert(ofs % sizeof(Node) == 0);
737 if (ofs > 65535) {
738 idx = dest;
739 rset_clear(allow, dest);
740 kofs = (int32_t)offsetof(Node, key);
741 } else if (ra_hasreg(dest)) {
742 emit_tai(as, PPCI_ADDI, dest, node, ofs);
744 asm_guardcc(as, CC_NE);
745 if (!irt_ispri(irkey->t)) {
746 key = ra_scratch(as, allow);
747 rset_clear(allow, key);
749 rset_clear(allow, type);
750 if (irt_isnum(irkey->t)) {
751 emit_cmpi(as, key, (int32_t)ir_knum(irkey)->u32.lo);
752 asm_guardcc(as, CC_NE);
753 emit_cmpi(as, type, (int32_t)ir_knum(irkey)->u32.hi);
754 } else {
755 if (ra_hasreg(key)) {
756 emit_cmpi(as, key, irkey->i); /* May use RID_TMP, i.e. type. */
757 asm_guardcc(as, CC_NE);
759 emit_ai(as, PPCI_CMPWI, type, irt_toitype(irkey->t));
761 if (ra_hasreg(key)) emit_tai(as, PPCI_LWZ, key, idx, kofs+4);
762 emit_tai(as, PPCI_LWZ, type, idx, kofs);
763 if (ofs > 65535) {
764 emit_tai(as, PPCI_ADDIS, dest, dest, (ofs + 32768) >> 16);
765 emit_tai(as, PPCI_ADDI, dest, node, ofs);
769 static void asm_newref(ASMState *as, IRIns *ir)
771 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
772 IRRef args[3];
773 args[0] = ASMREF_L; /* lua_State *L */
774 args[1] = ir->op1; /* GCtab *t */
775 args[2] = ASMREF_TMP1; /* cTValue *key */
776 asm_setupresult(as, ir, ci); /* TValue * */
777 asm_gencall(as, ci, args);
778 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
781 static void asm_uref(ASMState *as, IRIns *ir)
783 /* NYI: Check that UREFO is still open and not aliasing a slot. */
784 Reg dest = ra_dest(as, ir, RSET_GPR);
785 if (irref_isk(ir->op1)) {
786 GCfunc *fn = ir_kfunc(IR(ir->op1));
787 MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
788 emit_lsptr(as, PPCI_LWZ, dest, v, RSET_GPR);
789 } else {
790 Reg uv = ra_scratch(as, RSET_GPR);
791 Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
792 if (ir->o == IR_UREFC) {
793 asm_guardcc(as, CC_NE);
794 emit_ai(as, PPCI_CMPWI, RID_TMP, 1);
795 emit_tai(as, PPCI_ADDI, dest, uv, (int32_t)offsetof(GCupval, tv));
796 emit_tai(as, PPCI_LBZ, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
797 } else {
798 emit_tai(as, PPCI_LWZ, dest, uv, (int32_t)offsetof(GCupval, v));
800 emit_tai(as, PPCI_LWZ, uv, func,
801 (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
805 static void asm_fref(ASMState *as, IRIns *ir)
807 UNUSED(as); UNUSED(ir);
808 lua_assert(!ra_used(ir));
811 static void asm_strref(ASMState *as, IRIns *ir)
813 Reg dest = ra_dest(as, ir, RSET_GPR);
814 IRRef ref = ir->op2, refk = ir->op1;
815 int32_t ofs = (int32_t)sizeof(GCstr);
816 Reg r;
817 if (irref_isk(ref)) {
818 IRRef tmp = refk; refk = ref; ref = tmp;
819 } else if (!irref_isk(refk)) {
820 Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR);
821 IRIns *irr = IR(ir->op2);
822 if (ra_hasreg(irr->r)) {
823 ra_noweak(as, irr->r);
824 right = irr->r;
825 } else if (mayfuse(as, irr->op2) &&
826 irr->o == IR_ADD && irref_isk(irr->op2) &&
827 checki16(ofs + IR(irr->op2)->i)) {
828 ofs += IR(irr->op2)->i;
829 right = ra_alloc1(as, irr->op1, rset_exclude(RSET_GPR, left));
830 } else {
831 right = ra_allocref(as, ir->op2, rset_exclude(RSET_GPR, left));
833 emit_tai(as, PPCI_ADDI, dest, dest, ofs);
834 emit_tab(as, PPCI_ADD, dest, left, right);
835 return;
837 r = ra_alloc1(as, ref, RSET_GPR);
838 ofs += IR(refk)->i;
839 if (checki16(ofs))
840 emit_tai(as, PPCI_ADDI, dest, r, ofs);
841 else
842 emit_tab(as, PPCI_ADD, dest, r,
843 ra_allock(as, ofs, rset_exclude(RSET_GPR, r)));
846 /* -- Loads and stores ---------------------------------------------------- */
848 static PPCIns asm_fxloadins(IRIns *ir)
850 switch (irt_type(ir->t)) {
851 case IRT_I8: return PPCI_LBZ; /* Needs sign-extension. */
852 case IRT_U8: return PPCI_LBZ;
853 case IRT_I16: return PPCI_LHA;
854 case IRT_U16: return PPCI_LHZ;
855 case IRT_NUM: return PPCI_LFD;
856 case IRT_FLOAT: return PPCI_LFS;
857 default: return PPCI_LWZ;
861 static PPCIns asm_fxstoreins(IRIns *ir)
863 switch (irt_type(ir->t)) {
864 case IRT_I8: case IRT_U8: return PPCI_STB;
865 case IRT_I16: case IRT_U16: return PPCI_STH;
866 case IRT_NUM: return PPCI_STFD;
867 case IRT_FLOAT: return PPCI_STFS;
868 default: return PPCI_STW;
872 static void asm_fload(ASMState *as, IRIns *ir)
874 Reg dest = ra_dest(as, ir, RSET_GPR);
875 Reg idx = ra_alloc1(as, ir->op1, RSET_GPR);
876 PPCIns pi = asm_fxloadins(ir);
877 int32_t ofs;
878 if (ir->op2 == IRFL_TAB_ARRAY) {
879 ofs = asm_fuseabase(as, ir->op1);
880 if (ofs) { /* Turn the t->array load into an add for colocated arrays. */
881 emit_tai(as, PPCI_ADDI, dest, idx, ofs);
882 return;
885 ofs = field_ofs[ir->op2];
886 lua_assert(!irt_isi8(ir->t));
887 emit_tai(as, pi, dest, idx, ofs);
890 static void asm_fstore(ASMState *as, IRIns *ir)
892 Reg src = ra_alloc1(as, ir->op2, RSET_GPR);
893 IRIns *irf = IR(ir->op1);
894 Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src));
895 int32_t ofs = field_ofs[irf->op2];
896 PPCIns pi = asm_fxstoreins(ir);
897 emit_tai(as, pi, src, idx, ofs);
900 static void asm_xload(ASMState *as, IRIns *ir)
902 Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
903 lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED));
904 if (irt_isi8(ir->t))
905 emit_as(as, PPCI_EXTSB, dest, dest);
906 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR);
909 static void asm_xstore(ASMState *as, IRIns *ir)
911 IRIns *irb;
912 if (mayfuse(as, ir->op2) && (irb = IR(ir->op2))->o == IR_BSWAP &&
913 ra_noreg(irb->r) && (irt_isint(ir->t) || irt_isu32(ir->t))) {
914 /* Fuse BSWAP with XSTORE to stwbrx. */
915 Reg src = ra_alloc1(as, irb->op1, RSET_GPR);
916 asm_fusexrefx(as, PPCI_STWBRX, src, ir->op1, rset_exclude(RSET_GPR, src));
917 } else {
918 Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
919 asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
920 rset_exclude(RSET_GPR, src));
924 static void asm_ahuvload(ASMState *as, IRIns *ir)
926 IRType1 t = ir->t;
927 Reg dest = RID_NONE, type = RID_TMP, tmp = RID_TMP, idx;
928 RegSet allow = RSET_GPR;
929 int32_t ofs = AHUREF_LSX;
930 if (ra_used(ir)) {
931 lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
932 if (!irt_isnum(t)) ofs = 0;
933 dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR);
934 rset_clear(allow, dest);
936 idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
937 if (irt_isnum(t)) {
938 Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, rset_exclude(allow, idx));
939 asm_guardcc(as, CC_GE);
940 emit_ab(as, PPCI_CMPLW, type, tisnum);
941 if (ra_hasreg(dest)) {
942 if (ofs == AHUREF_LSX) {
943 tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR,
944 (idx&255)), (idx>>8)));
945 emit_fab(as, PPCI_LFDX, dest, (idx&255), tmp);
946 } else {
947 emit_fai(as, PPCI_LFD, dest, idx, ofs);
950 } else {
951 asm_guardcc(as, CC_NE);
952 emit_ai(as, PPCI_CMPWI, type, irt_toitype(t));
953 if (ra_hasreg(dest)) emit_tai(as, PPCI_LWZ, dest, idx, ofs+4);
955 if (ofs == AHUREF_LSX) {
956 emit_tab(as, PPCI_LWZX, type, (idx&255), tmp);
957 emit_slwi(as, tmp, (idx>>8), 3);
958 } else {
959 emit_tai(as, PPCI_LWZ, type, idx, ofs);
963 static void asm_ahustore(ASMState *as, IRIns *ir)
965 RegSet allow = RSET_GPR;
966 Reg idx, src = RID_NONE, type = RID_NONE;
967 int32_t ofs = AHUREF_LSX;
968 if (irt_isnum(ir->t)) {
969 src = ra_alloc1(as, ir->op2, RSET_FPR);
970 } else {
971 if (!irt_ispri(ir->t)) {
972 src = ra_alloc1(as, ir->op2, allow);
973 rset_clear(allow, src);
974 ofs = 0;
976 type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
977 rset_clear(allow, type);
979 idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
980 if (irt_isnum(ir->t)) {
981 if (ofs == AHUREF_LSX) {
982 emit_fab(as, PPCI_STFDX, src, (idx&255), RID_TMP);
983 emit_slwi(as, RID_TMP, (idx>>8), 3);
984 } else {
985 emit_fai(as, PPCI_STFD, src, idx, ofs);
987 } else {
988 if (ra_hasreg(src))
989 emit_tai(as, PPCI_STW, src, idx, ofs+4);
990 if (ofs == AHUREF_LSX) {
991 emit_tab(as, PPCI_STWX, type, (idx&255), RID_TMP);
992 emit_slwi(as, RID_TMP, (idx>>8), 3);
993 } else {
994 emit_tai(as, PPCI_STW, type, idx, ofs);
999 static void asm_sload(ASMState *as, IRIns *ir)
1001 int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 0 : 4);
1002 IRType1 t = ir->t;
1003 Reg dest = RID_NONE, type = RID_NONE, base;
1004 RegSet allow = RSET_GPR;
1005 lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */
1006 lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK));
1007 lua_assert(LJ_DUALNUM ||
1008 !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME)));
1009 if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) {
1010 dest = ra_scratch(as, RSET_FPR);
1011 asm_tointg(as, ir, dest);
1012 t.irt = IRT_NUM; /* Continue with a regular number type check. */
1013 } else if (ra_used(ir)) {
1014 lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
1015 dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR);
1016 rset_clear(allow, dest);
1017 base = ra_alloc1(as, REF_BASE, allow);
1018 rset_clear(allow, base);
1019 if ((ir->op2 & IRSLOAD_CONVERT)) {
1020 if (irt_isint(t)) {
1021 emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
1022 dest = ra_scratch(as, RSET_FPR);
1023 emit_fai(as, PPCI_STFD, dest, RID_SP, SPOFS_TMP);
1024 emit_fb(as, PPCI_FCTIWZ, dest, dest);
1025 t.irt = IRT_NUM; /* Check for original type. */
1026 } else {
1027 Reg tmp = ra_scratch(as, allow);
1028 Reg hibias = ra_allock(as, 0x43300000, rset_clear(allow, tmp));
1029 Reg fbias = ra_scratch(as, rset_exclude(RSET_FPR, dest));
1030 emit_fab(as, PPCI_FSUB, dest, dest, fbias);
1031 emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP);
1032 emit_lsptr(as, PPCI_LFS, (fbias & 31),
1033 (void *)lj_ir_k64_find(as->J, U64x(59800004,59800000)),
1034 rset_clear(allow, hibias));
1035 emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPLO);
1036 emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI);
1037 emit_asi(as, PPCI_XORIS, tmp, tmp, 0x8000);
1038 dest = tmp;
1039 t.irt = IRT_INT; /* Check for original type. */
1042 goto dotypecheck;
1044 base = ra_alloc1(as, REF_BASE, allow);
1045 rset_clear(allow, base);
1046 dotypecheck:
1047 if (irt_isnum(t)) {
1048 if ((ir->op2 & IRSLOAD_TYPECHECK)) {
1049 Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, allow);
1050 asm_guardcc(as, CC_GE);
1051 emit_ab(as, PPCI_CMPLW, RID_TMP, tisnum);
1052 type = RID_TMP;
1054 if (ra_hasreg(dest)) emit_fai(as, PPCI_LFD, dest, base, ofs-4);
1055 } else {
1056 if ((ir->op2 & IRSLOAD_TYPECHECK)) {
1057 asm_guardcc(as, CC_NE);
1058 emit_ai(as, PPCI_CMPWI, RID_TMP, irt_toitype(t));
1059 type = RID_TMP;
1061 if (ra_hasreg(dest)) emit_tai(as, PPCI_LWZ, dest, base, ofs);
1063 if (ra_hasreg(type)) emit_tai(as, PPCI_LWZ, type, base, ofs-4);
1066 /* -- Allocations --------------------------------------------------------- */
1068 #if LJ_HASFFI
1069 static void asm_cnew(ASMState *as, IRIns *ir)
1071 CTState *cts = ctype_ctsG(J2G(as->J));
1072 CTypeID typeid = (CTypeID)IR(ir->op1)->i;
1073 CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ?
1074 lj_ctype_size(cts, typeid) : (CTSize)IR(ir->op2)->i;
1075 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
1076 IRRef args[2];
1077 RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
1078 RegSet drop = RSET_SCRATCH;
1079 lua_assert(sz != CTSIZE_INVALID);
1081 args[0] = ASMREF_L; /* lua_State *L */
1082 args[1] = ASMREF_TMP1; /* MSize size */
1083 as->gcsteps++;
1085 if (ra_hasreg(ir->r))
1086 rset_clear(drop, ir->r); /* Dest reg handled below. */
1087 ra_evictset(as, drop);
1088 if (ra_used(ir))
1089 ra_destreg(as, ir, RID_RET); /* GCcdata * */
1091 /* Initialize immutable cdata object. */
1092 if (ir->o == IR_CNEWI) {
1093 int32_t ofs = sizeof(GCcdata);
1094 lua_assert(sz == 4 || sz == 8);
1095 if (sz == 8) {
1096 ofs += 4;
1097 lua_assert((ir+1)->o == IR_HIOP);
1099 for (;;) {
1100 Reg r = ra_alloc1(as, ir->op2, allow);
1101 emit_tai(as, PPCI_STW, r, RID_RET, ofs);
1102 rset_clear(allow, r);
1103 if (ofs == sizeof(GCcdata)) break;
1104 ofs -= 4; ir++;
1107 /* Initialize gct and typeid. lj_mem_newgco() already sets marked. */
1108 emit_tai(as, PPCI_STB, RID_RET+1, RID_RET, offsetof(GCcdata, gct));
1109 emit_tai(as, PPCI_STH, RID_TMP, RID_RET, offsetof(GCcdata, typeid));
1110 emit_ti(as, PPCI_LI, RID_RET+1, ~LJ_TCDATA);
1111 emit_ti(as, PPCI_LI, RID_TMP, typeid); /* Lower 16 bit used. Sign-ext ok. */
1112 asm_gencall(as, ci, args);
1113 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
1114 ra_releasetmp(as, ASMREF_TMP1));
1116 #else
1117 #define asm_cnew(as, ir) ((void)0)
1118 #endif
1120 /* -- Write barriers ------------------------------------------------------ */
1122 static void asm_tbar(ASMState *as, IRIns *ir)
1124 Reg tab = ra_alloc1(as, ir->op1, RSET_GPR);
1125 Reg mark = ra_scratch(as, rset_exclude(RSET_GPR, tab));
1126 Reg link = RID_TMP;
1127 MCLabel l_end = emit_label(as);
1128 emit_tai(as, PPCI_STW, link, tab, (int32_t)offsetof(GCtab, gclist));
1129 emit_tai(as, PPCI_STB, mark, tab, (int32_t)offsetof(GCtab, marked));
1130 emit_setgl(as, tab, gc.grayagain);
1131 lua_assert(LJ_GC_BLACK == 0x04);
1132 emit_rot(as, PPCI_RLWINM, mark, mark, 0, 30, 28); /* Clear black bit. */
1133 emit_getgl(as, link, gc.grayagain);
1134 emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end);
1135 emit_asi(as, PPCI_ANDIDOT, RID_TMP, mark, LJ_GC_BLACK);
1136 emit_tai(as, PPCI_LBZ, mark, tab, (int32_t)offsetof(GCtab, marked));
1139 static void asm_obar(ASMState *as, IRIns *ir)
1141 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv];
1142 IRRef args[2];
1143 MCLabel l_end;
1144 Reg obj, val, tmp;
1145 /* No need for other object barriers (yet). */
1146 lua_assert(IR(ir->op1)->o == IR_UREFC);
1147 ra_evictset(as, RSET_SCRATCH);
1148 l_end = emit_label(as);
1149 args[0] = ASMREF_TMP1; /* global_State *g */
1150 args[1] = ir->op1; /* TValue *tv */
1151 asm_gencall(as, ci, args);
1152 emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768);
1153 obj = IR(ir->op1)->r;
1154 tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj));
1155 emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end);
1156 emit_asi(as, PPCI_ANDIDOT, tmp, tmp, LJ_GC_BLACK);
1157 emit_condbranch(as, PPCI_BC, CC_EQ, l_end);
1158 emit_asi(as, PPCI_ANDIDOT, RID_TMP, RID_TMP, LJ_GC_WHITES);
1159 val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj));
1160 emit_tai(as, PPCI_LBZ, tmp, obj,
1161 (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv));
1162 emit_tai(as, PPCI_LBZ, RID_TMP, val, (int32_t)offsetof(GChead, marked));
1165 /* -- Arithmetic and logic operations ------------------------------------- */
1167 static void asm_fparith(ASMState *as, IRIns *ir, PPCIns pi)
1169 Reg dest = ra_dest(as, ir, RSET_FPR);
1170 Reg right, left = ra_alloc2(as, ir, RSET_FPR);
1171 right = (left >> 8); left &= 255;
1172 if (pi == PPCI_FMUL)
1173 emit_fac(as, pi, dest, left, right);
1174 else
1175 emit_fab(as, pi, dest, left, right);
1178 static void asm_fpunary(ASMState *as, IRIns *ir, PPCIns pi)
1180 Reg dest = ra_dest(as, ir, RSET_FPR);
1181 Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR);
1182 emit_fb(as, pi, dest, left);
1185 static int asm_fpjoin_pow(ASMState *as, IRIns *ir)
1187 IRIns *irp = IR(ir->op1);
1188 if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) {
1189 IRIns *irpp = IR(irp->op1);
1190 if (irpp == ir-2 && irpp->o == IR_FPMATH &&
1191 irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
1192 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow];
1193 IRRef args[2];
1194 args[0] = irpp->op1;
1195 args[1] = irp->op2;
1196 asm_setupresult(as, ir, ci);
1197 asm_gencall(as, ci, args);
1198 return 1;
1201 return 0;
1204 static void asm_add(ASMState *as, IRIns *ir)
1206 if (irt_isnum(ir->t)) {
1207 if (!asm_fusemadd(as, ir, PPCI_FMADD, PPCI_FMADD))
1208 asm_fparith(as, ir, PPCI_FADD);
1209 } else {
1210 Reg dest = ra_dest(as, ir, RSET_GPR);
1211 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1212 PPCIns pi;
1213 if (irref_isk(ir->op2)) {
1214 int32_t k = IR(ir->op2)->i;
1215 if (checki16(k)) {
1216 pi = PPCI_ADDI;
1217 /* May fail due to spills/restores above, but simplifies the logic. */
1218 if (as->flagmcp == as->mcp) {
1219 as->flagmcp = NULL;
1220 as->mcp++;
1221 pi = PPCI_ADDICDOT;
1223 emit_tai(as, pi, dest, left, k);
1224 return;
1225 } else if ((k & 0xffff) == 0) {
1226 emit_tai(as, PPCI_ADDIS, dest, left, (k >> 16));
1227 return;
1228 } else if (!as->sectref) {
1229 emit_tai(as, PPCI_ADDIS, dest, dest, (k + 32768) >> 16);
1230 emit_tai(as, PPCI_ADDI, dest, left, k);
1231 return;
1234 pi = PPCI_ADD;
1235 /* May fail due to spills/restores above, but simplifies the logic. */
1236 if (as->flagmcp == as->mcp) {
1237 as->flagmcp = NULL;
1238 as->mcp++;
1239 pi |= PPCF_DOT;
1241 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
1242 emit_tab(as, pi, dest, left, right);
1246 static void asm_sub(ASMState *as, IRIns *ir)
1248 if (irt_isnum(ir->t)) {
1249 if (!asm_fusemadd(as, ir, PPCI_FMSUB, PPCI_FNMSUB))
1250 asm_fparith(as, ir, PPCI_FSUB);
1251 } else {
1252 PPCIns pi = PPCI_SUBF;
1253 Reg dest = ra_dest(as, ir, RSET_GPR);
1254 Reg left, right;
1255 if (irref_isk(ir->op1)) {
1256 int32_t k = IR(ir->op1)->i;
1257 if (checki16(k)) {
1258 right = ra_alloc1(as, ir->op2, RSET_GPR);
1259 emit_tai(as, PPCI_SUBFIC, dest, right, k);
1260 return;
1263 /* May fail due to spills/restores above, but simplifies the logic. */
1264 if (as->flagmcp == as->mcp) {
1265 as->flagmcp = NULL;
1266 as->mcp++;
1267 pi |= PPCF_DOT;
1269 left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1270 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
1271 emit_tab(as, pi, dest, right, left); /* Subtract right _from_ left. */
1275 static void asm_mul(ASMState *as, IRIns *ir)
1277 if (irt_isnum(ir->t)) {
1278 asm_fparith(as, ir, PPCI_FMUL);
1279 } else {
1280 PPCIns pi = PPCI_MULLW;
1281 Reg dest = ra_dest(as, ir, RSET_GPR);
1282 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1283 if (irref_isk(ir->op2)) {
1284 int32_t k = IR(ir->op2)->i;
1285 if (checki16(k)) {
1286 emit_tai(as, PPCI_MULLI, dest, left, k);
1287 return;
1290 /* May fail due to spills/restores above, but simplifies the logic. */
1291 if (as->flagmcp == as->mcp) {
1292 as->flagmcp = NULL;
1293 as->mcp++;
1294 pi |= PPCF_DOT;
1296 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
1297 emit_tab(as, pi, dest, left, right);
1301 static void asm_neg(ASMState *as, IRIns *ir)
1303 if (irt_isnum(ir->t)) {
1304 asm_fpunary(as, ir, PPCI_FNEG);
1305 } else {
1306 Reg dest, left;
1307 PPCIns pi = PPCI_NEG;
1308 if (as->flagmcp == as->mcp) {
1309 as->flagmcp = NULL;
1310 as->mcp++;
1311 pi |= PPCF_DOT;
1313 dest = ra_dest(as, ir, RSET_GPR);
1314 left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1315 emit_tab(as, pi, dest, left, 0);
1319 static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi)
1321 Reg dest, left, right;
1322 if (as->flagmcp == as->mcp) {
1323 as->flagmcp = NULL;
1324 as->mcp++;
1326 asm_guardcc(as, CC_SO);
1327 dest = ra_dest(as, ir, RSET_GPR);
1328 left = ra_alloc2(as, ir, RSET_GPR);
1329 right = (left >> 8); left &= 255;
1330 if (pi == PPCI_SUBFO) { Reg tmp = left; left = right; right = tmp; }
1331 emit_tab(as, pi|PPCF_DOT, dest, left, right);
1334 #if LJ_HASFFI
1335 static void asm_add64(ASMState *as, IRIns *ir)
1337 Reg dest = ra_dest(as, ir, RSET_GPR);
1338 Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR);
1339 PPCIns pi = PPCI_ADDE;
1340 if (irref_isk(ir->op2)) {
1341 int32_t k = IR(ir->op2)->i;
1342 if (k == 0)
1343 pi = PPCI_ADDZE;
1344 else if (k == -1)
1345 pi = PPCI_ADDME;
1346 else
1347 goto needright;
1348 right = 0;
1349 } else {
1350 needright:
1351 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
1353 emit_tab(as, pi, dest, left, right);
1354 ir--;
1355 dest = ra_dest(as, ir, RSET_GPR);
1356 left = ra_alloc1(as, ir->op1, RSET_GPR);
1357 if (irref_isk(ir->op2)) {
1358 int32_t k = IR(ir->op2)->i;
1359 if (checki16(k)) {
1360 emit_tai(as, PPCI_ADDIC, dest, left, k);
1361 return;
1364 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
1365 emit_tab(as, PPCI_ADDC, dest, left, right);
1368 static void asm_sub64(ASMState *as, IRIns *ir)
1370 Reg dest = ra_dest(as, ir, RSET_GPR);
1371 Reg left, right = ra_alloc1(as, ir->op2, RSET_GPR);
1372 PPCIns pi = PPCI_SUBFE;
1373 if (irref_isk(ir->op1)) {
1374 int32_t k = IR(ir->op1)->i;
1375 if (k == 0)
1376 pi = PPCI_SUBFZE;
1377 else if (k == -1)
1378 pi = PPCI_SUBFME;
1379 else
1380 goto needleft;
1381 left = 0;
1382 } else {
1383 needleft:
1384 left = ra_alloc1(as, ir->op1, rset_exclude(RSET_GPR, right));
1386 emit_tab(as, pi, dest, right, left); /* Subtract right _from_ left. */
1387 ir--;
1388 dest = ra_dest(as, ir, RSET_GPR);
1389 right = ra_alloc1(as, ir->op2, RSET_GPR);
1390 if (irref_isk(ir->op1)) {
1391 int32_t k = IR(ir->op1)->i;
1392 if (checki16(k)) {
1393 emit_tai(as, PPCI_SUBFIC, dest, right, k);
1394 return;
1397 left = ra_alloc1(as, ir->op1, rset_exclude(RSET_GPR, right));
1398 emit_tab(as, PPCI_SUBFC, dest, right, left);
1401 static void asm_neg64(ASMState *as, IRIns *ir)
1403 Reg dest = ra_dest(as, ir, RSET_GPR);
1404 Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
1405 emit_tab(as, PPCI_SUBFZE, dest, left, 0);
1406 ir--;
1407 dest = ra_dest(as, ir, RSET_GPR);
1408 left = ra_alloc1(as, ir->op1, RSET_GPR);
1409 emit_tai(as, PPCI_SUBFIC, dest, left, 0);
1411 #endif
1413 static void asm_bitnot(ASMState *as, IRIns *ir)
1415 Reg dest, left, right;
1416 PPCIns pi = PPCI_NOR;
1417 if (as->flagmcp == as->mcp) {
1418 as->flagmcp = NULL;
1419 as->mcp++;
1420 pi |= PPCF_DOT;
1422 dest = ra_dest(as, ir, RSET_GPR);
1423 if (mayfuse(as, ir->op1)) {
1424 IRIns *irl = IR(ir->op1);
1425 if (irl->o == IR_BAND)
1426 pi ^= (PPCI_NOR ^ PPCI_NAND);
1427 else if (irl->o == IR_BXOR)
1428 pi ^= (PPCI_NOR ^ PPCI_EQV);
1429 else if (irl->o != IR_BOR)
1430 goto nofuse;
1431 left = ra_hintalloc(as, irl->op1, dest, RSET_GPR);
1432 right = ra_alloc1(as, irl->op2, rset_exclude(RSET_GPR, left));
1433 } else {
1434 nofuse:
1435 left = right = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1437 emit_asb(as, pi, dest, left, right);
1440 static void asm_bitswap(ASMState *as, IRIns *ir)
1442 Reg dest = ra_dest(as, ir, RSET_GPR);
1443 IRIns *irx;
1444 if (mayfuse(as, ir->op1) && (irx = IR(ir->op1))->o == IR_XLOAD &&
1445 ra_noreg(irx->r) && (irt_isint(irx->t) || irt_isu32(irx->t))) {
1446 /* Fuse BSWAP with XLOAD to lwbrx. */
1447 asm_fusexrefx(as, PPCI_LWBRX, dest, irx->op1, RSET_GPR);
1448 } else {
1449 Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
1450 Reg tmp = dest;
1451 if (tmp == left) {
1452 tmp = RID_TMP;
1453 emit_mr(as, dest, RID_TMP);
1455 emit_rot(as, PPCI_RLWIMI, tmp, left, 24, 16, 23);
1456 emit_rot(as, PPCI_RLWIMI, tmp, left, 24, 0, 7);
1457 emit_rotlwi(as, tmp, left, 8);
1461 static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
1463 Reg dest = ra_dest(as, ir, RSET_GPR);
1464 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1465 if (irref_isk(ir->op2)) {
1466 int32_t k = IR(ir->op2)->i;
1467 Reg tmp = left;
1468 if ((checku16(k) || (k & 0xffff) == 0) || (tmp = dest, !as->sectref)) {
1469 if (!checku16(k)) {
1470 emit_asi(as, pik ^ (PPCI_ORI ^ PPCI_ORIS), dest, tmp, (k >> 16));
1471 if ((k & 0xffff) == 0) return;
1473 emit_asi(as, pik, dest, left, k);
1474 return;
1477 /* May fail due to spills/restores above, but simplifies the logic. */
1478 if (as->flagmcp == as->mcp) {
1479 as->flagmcp = NULL;
1480 as->mcp++;
1481 pi |= PPCF_DOT;
1483 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
1484 emit_asb(as, pi, dest, left, right);
1487 /* Fuse BAND with contiguous bitmask and a shift to rlwinm. */
1488 static void asm_fuseandsh(ASMState *as, PPCIns pi, int32_t mask, IRRef ref)
1490 IRIns *ir;
1491 Reg left;
1492 if (mayfuse(as, ref) && (ir = IR(ref), ra_noreg(ir->r)) &&
1493 irref_isk(ir->op2)) {
1494 int32_t sh = (IR(ir->op2)->i & 31);
1495 switch (ir->o) {
1496 case IR_BSHL:
1497 if ((mask & ((1u<<sh)-1))) goto nofuse;
1498 break;
1499 case IR_BSHR:
1500 if ((mask & ~((~0u)>>sh))) goto nofuse;
1501 sh = ((32-sh)&31);
1502 break;
1503 case IR_BROL:
1504 break;
1505 default:
1506 goto nofuse;
1508 left = ra_alloc1(as, ir->op1, RSET_GPR);
1509 *--as->mcp = pi | PPCF_T(left) | PPCF_B(sh);
1510 return;
1512 nofuse:
1513 left = ra_alloc1(as, ref, RSET_GPR);
1514 *--as->mcp = pi | PPCF_T(left);
1517 static void asm_bitand(ASMState *as, IRIns *ir)
1519 Reg dest, left, right;
1520 IRRef lref = ir->op1;
1521 PPCIns dot = 0;
1522 IRRef op2;
1523 if (as->flagmcp == as->mcp) {
1524 as->flagmcp = NULL;
1525 as->mcp++;
1526 dot = PPCF_DOT;
1528 dest = ra_dest(as, ir, RSET_GPR);
1529 if (irref_isk(ir->op2)) {
1530 int32_t k = IR(ir->op2)->i;
1531 if (k) {
1532 /* First check for a contiguous bitmask as used by rlwinm. */
1533 uint32_t s1 = lj_ffs((uint32_t)k);
1534 uint32_t k1 = ((uint32_t)k >> s1);
1535 if ((k1 & (k1+1)) == 0) {
1536 asm_fuseandsh(as, PPCI_RLWINM|dot | PPCF_A(dest) |
1537 PPCF_MB(31-lj_fls((uint32_t)k)) | PPCF_ME(31-s1),
1538 k, lref);
1539 return;
1541 if (~(uint32_t)k) {
1542 uint32_t s2 = lj_ffs(~(uint32_t)k);
1543 uint32_t k2 = (~(uint32_t)k >> s2);
1544 if ((k2 & (k2+1)) == 0) {
1545 asm_fuseandsh(as, PPCI_RLWINM|dot | PPCF_A(dest) |
1546 PPCF_MB(32-s2) | PPCF_ME(30-lj_fls(~(uint32_t)k)),
1547 k, lref);
1548 return;
1552 if (checku16(k)) {
1553 left = ra_alloc1(as, lref, RSET_GPR);
1554 emit_asi(as, PPCI_ANDIDOT, dest, left, k);
1555 return;
1556 } else if ((k & 0xffff) == 0) {
1557 left = ra_alloc1(as, lref, RSET_GPR);
1558 emit_asi(as, PPCI_ANDISDOT, dest, left, (k >> 16));
1559 return;
1562 op2 = ir->op2;
1563 if (mayfuse(as, op2) && IR(op2)->o == IR_BNOT && ra_noreg(IR(op2)->r)) {
1564 dot ^= (PPCI_AND ^ PPCI_ANDC);
1565 op2 = IR(op2)->op1;
1567 left = ra_hintalloc(as, lref, dest, RSET_GPR);
1568 right = ra_alloc1(as, op2, rset_exclude(RSET_GPR, left));
1569 emit_asb(as, PPCI_AND ^ dot, dest, left, right);
1572 static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
1574 Reg dest, left;
1575 Reg dot = 0;
1576 if (as->flagmcp == as->mcp) {
1577 as->flagmcp = NULL;
1578 as->mcp++;
1579 dot = PPCF_DOT;
1581 dest = ra_dest(as, ir, RSET_GPR);
1582 left = ra_alloc1(as, ir->op1, RSET_GPR);
1583 if (irref_isk(ir->op2)) { /* Constant shifts. */
1584 int32_t shift = (IR(ir->op2)->i & 31);
1585 if (pik == 0) /* SLWI */
1586 emit_rot(as, PPCI_RLWINM|dot, dest, left, shift, 0, 31-shift);
1587 else if (pik == 1) /* SRWI */
1588 emit_rot(as, PPCI_RLWINM|dot, dest, left, (32-shift)&31, shift, 31);
1589 else
1590 emit_asb(as, pik|dot, dest, left, shift);
1591 } else {
1592 Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
1593 emit_asb(as, pi|dot, dest, left, right);
1597 static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
1599 if (irt_isnum(ir->t)) {
1600 Reg dest = ra_dest(as, ir, RSET_FPR);
1601 Reg tmp = dest;
1602 Reg right, left = ra_alloc2(as, ir, RSET_FPR);
1603 right = (left >> 8); left &= 255;
1604 if (tmp == left || tmp == right)
1605 tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_FPR,
1606 dest), left), right));
1607 emit_facb(as, PPCI_FSEL, dest, tmp,
1608 ismax ? left : right, ismax ? right : left);
1609 emit_fab(as, PPCI_FSUB, tmp, left, right);
1610 } else {
1611 Reg dest = ra_dest(as, ir, RSET_GPR);
1612 Reg tmp1 = RID_TMP, tmp2 = dest;
1613 Reg right, left = ra_alloc2(as, ir, RSET_GPR);
1614 right = (left >> 8); left &= 255;
1615 if (tmp2 == left || tmp2 == right)
1616 tmp2 = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_GPR,
1617 dest), left), right));
1618 emit_tab(as, PPCI_ADD, dest, tmp2, right);
1619 emit_asb(as, ismax ? PPCI_ANDC : PPCI_AND, tmp2, tmp2, tmp1);
1620 emit_tab(as, PPCI_SUBFE, tmp1, tmp1, tmp1);
1621 emit_tab(as, PPCI_SUBFC, tmp2, tmp2, tmp1);
1622 emit_asi(as, PPCI_XORIS, tmp2, right, 0x8000);
1623 emit_asi(as, PPCI_XORIS, tmp1, left, 0x8000);
1627 /* -- Comparisons --------------------------------------------------------- */
1629 #define CC_UNSIGNED 0x08 /* Unsigned integer comparison. */
1630 #define CC_TWO 0x80 /* Check two flags for FP comparison. */
1632 /* Map of comparisons to flags. ORDER IR. */
1633 static const uint8_t asm_compmap[IR_ABC+1] = {
1634 /* op int cc FP cc */
1635 /* LT */ CC_GE + (CC_GE<<4),
1636 /* GE */ CC_LT + (CC_LE<<4) + CC_TWO,
1637 /* LE */ CC_GT + (CC_GE<<4) + CC_TWO,
1638 /* GT */ CC_LE + (CC_LE<<4),
1639 /* ULT */ CC_GE + CC_UNSIGNED + (CC_GT<<4) + CC_TWO,
1640 /* UGE */ CC_LT + CC_UNSIGNED + (CC_LT<<4),
1641 /* ULE */ CC_GT + CC_UNSIGNED + (CC_GT<<4),
1642 /* UGT */ CC_LE + CC_UNSIGNED + (CC_LT<<4) + CC_TWO,
1643 /* EQ */ CC_NE + (CC_NE<<4),
1644 /* NE */ CC_EQ + (CC_EQ<<4),
1645 /* ABC */ CC_LE + CC_UNSIGNED + (CC_LT<<4) + CC_TWO /* Same as UGT. */
1648 static void asm_intcomp_(ASMState *as, IRRef lref, IRRef rref, Reg cr, PPCCC cc)
1650 Reg right, left = ra_alloc1(as, lref, RSET_GPR);
1651 if (irref_isk(rref)) {
1652 int32_t k = IR(rref)->i;
1653 if ((cc & CC_UNSIGNED) == 0) { /* Signed comparison with constant. */
1654 if (checki16(k)) {
1655 emit_tai(as, PPCI_CMPWI, cr, left, k);
1656 /* Signed comparison with zero and referencing previous ins? */
1657 if (k == 0 && lref == as->curins-1)
1658 as->flagmcp = as->mcp; /* Allow elimination of the compare. */
1659 return;
1660 } else if ((cc & 3) == (CC_EQ & 3)) { /* Use CMPLWI for EQ or NE. */
1661 if (checku16(k)) {
1662 emit_tai(as, PPCI_CMPLWI, cr, left, k);
1663 return;
1664 } else if (!as->sectref && ra_noreg(IR(rref)->r)) {
1665 emit_tai(as, PPCI_CMPLWI, cr, RID_TMP, k);
1666 emit_asi(as, PPCI_XORIS, RID_TMP, left, (k >> 16));
1667 return;
1670 } else { /* Unsigned comparison with constant. */
1671 if (checku16(k)) {
1672 emit_tai(as, PPCI_CMPLWI, cr, left, k);
1673 return;
1677 right = ra_alloc1(as, rref, rset_exclude(RSET_GPR, left));
1678 emit_tab(as, (cc & CC_UNSIGNED) ? PPCI_CMPLW : PPCI_CMPW, cr, left, right);
1681 static void asm_comp(ASMState *as, IRIns *ir)
1683 PPCCC cc = asm_compmap[ir->o];
1684 if (irt_isnum(ir->t)) {
1685 Reg right, left = ra_alloc2(as, ir, RSET_FPR);
1686 right = (left >> 8); left &= 255;
1687 asm_guardcc(as, (cc >> 4));
1688 if ((cc & CC_TWO))
1689 emit_tab(as, PPCI_CROR, ((cc>>4)&3), ((cc>>4)&3), (CC_EQ&3));
1690 emit_fab(as, PPCI_FCMPU, 0, left, right);
1691 } else {
1692 IRRef lref = ir->op1, rref = ir->op2;
1693 if (irref_isk(lref) && !irref_isk(rref)) {
1694 /* Swap constants to the right (only for ABC). */
1695 IRRef tmp = lref; lref = rref; rref = tmp;
1696 if ((cc & 2) == 0) cc ^= 1; /* LT <-> GT, LE <-> GE */
1698 asm_guardcc(as, cc);
1699 asm_intcomp_(as, lref, rref, 0, cc);
1703 #if LJ_HASFFI
1704 /* 64 bit integer comparisons. */
1705 static void asm_comp64(ASMState *as, IRIns *ir)
1707 PPCCC cc = asm_compmap[(ir-1)->o];
1708 if ((cc&3) == (CC_EQ&3)) {
1709 asm_guardcc(as, cc);
1710 emit_tab(as, (cc&4) ? PPCI_CRAND : PPCI_CROR,
1711 (CC_EQ&3), (CC_EQ&3), 4+(CC_EQ&3));
1712 } else {
1713 asm_guardcc(as, CC_EQ);
1714 emit_tab(as, PPCI_CROR, (CC_EQ&3), (CC_EQ&3), ((cc^~(cc>>2))&1));
1715 emit_tab(as, (cc&4) ? PPCI_CRAND : PPCI_CRANDC,
1716 (CC_EQ&3), (CC_EQ&3), 4+(cc&3));
1718 /* Loword comparison sets cr1 and is unsigned, except for equality. */
1719 asm_intcomp_(as, (ir-1)->op1, (ir-1)->op2, 4,
1720 cc | ((cc&3) == (CC_EQ&3) ? 0 : CC_UNSIGNED));
1721 /* Hiword comparison sets cr0. */
1722 asm_intcomp_(as, ir->op1, ir->op2, 0, cc);
1723 as->flagmcp = NULL; /* Doesn't work here. */
1725 #endif
1727 /* -- Support for 64 bit ops in 32 bit mode ------------------------------- */
1729 /* Hiword op of a split 64 bit op. Previous op must be the loword op. */
1730 static void asm_hiop(ASMState *as, IRIns *ir)
1732 #if LJ_HASFFI
1733 /* HIOP is marked as a store because it needs its own DCE logic. */
1734 int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */
1735 if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
1736 if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */
1737 as->curins--; /* Always skip the CONV. */
1738 if (usehi || uselo)
1739 asm_conv64(as, ir);
1740 return;
1741 } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */
1742 as->curins--; /* Always skip the loword comparison. */
1743 asm_comp64(as, ir);
1744 return;
1746 if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
1747 switch ((ir-1)->o) {
1748 case IR_ADD: as->curins--; asm_add64(as, ir); break;
1749 case IR_SUB: as->curins--; asm_sub64(as, ir); break;
1750 case IR_NEG: as->curins--; asm_neg64(as, ir); break;
1751 case IR_CALLN:
1752 case IR_CALLXS:
1753 if (!uselo)
1754 ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */
1755 break;
1756 case IR_CNEWI:
1757 /* Nothing to do here. Handled by lo op itself. */
1758 break;
1759 default: lua_assert(0); break;
1761 #else
1762 UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused without FFI. */
1763 #endif
1766 /* -- Stack handling ------------------------------------------------------ */
1768 /* Check Lua stack size for overflow. Use exit handler as fallback. */
1769 static void asm_stack_check(ASMState *as, BCReg topslot,
1770 IRIns *irp, RegSet allow, ExitNo exitno)
1772 /* Try to get an unused temp. register, otherwise spill/restore RID_RET*. */
1773 Reg tmp, pbase = irp ? (ra_hasreg(irp->r) ? irp->r : RID_TMP) : RID_BASE;
1774 rset_clear(allow, pbase);
1775 tmp = allow ? rset_pickbot(allow) :
1776 (pbase == RID_RETHI ? RID_RETLO : RID_RETHI);
1777 emit_condbranch(as, PPCI_BC, CC_LT, asm_exitstub_addr(as, exitno));
1778 if (allow == RSET_EMPTY) /* Restore temp. register. */
1779 emit_tai(as, PPCI_LWZ, tmp, RID_SP, SPOFS_TMPW);
1780 else
1781 ra_modified(as, tmp);
1782 emit_ai(as, PPCI_CMPLWI, RID_TMP, (int32_t)(8*topslot));
1783 emit_tab(as, PPCI_SUBF, RID_TMP, pbase, tmp);
1784 emit_tai(as, PPCI_LWZ, tmp, tmp, offsetof(lua_State, maxstack));
1785 if (pbase == RID_TMP)
1786 emit_getgl(as, RID_TMP, jit_base);
1787 emit_getgl(as, tmp, jit_L);
1788 if (allow == RSET_EMPTY) /* Spill temp. register. */
1789 emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPW);
1792 /* Restore Lua stack from on-trace state. */
1793 static void asm_stack_restore(ASMState *as, SnapShot *snap)
1795 SnapEntry *map = &as->T->snapmap[snap->mapofs];
1796 MSize n, nent = snap->nent;
1797 SnapEntry *flinks = map + nent + snap->depth;
1798 /* Store the value of all modified slots to the Lua stack. */
1799 for (n = 0; n < nent; n++) {
1800 SnapEntry sn = map[n];
1801 BCReg s = snap_slot(sn);
1802 int32_t ofs = 8*((int32_t)s-1);
1803 IRRef ref = snap_ref(sn);
1804 IRIns *ir = IR(ref);
1805 if ((sn & SNAP_NORESTORE))
1806 continue;
1807 if (irt_isnum(ir->t)) {
1808 Reg src = ra_alloc1(as, ref, RSET_FPR);
1809 emit_fai(as, PPCI_STFD, src, RID_BASE, ofs);
1810 } else {
1811 Reg type;
1812 RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
1813 lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t));
1814 if (!irt_ispri(ir->t)) {
1815 Reg src = ra_alloc1(as, ref, allow);
1816 rset_clear(allow, src);
1817 emit_tai(as, PPCI_STW, src, RID_BASE, ofs+4);
1819 if ((sn & (SNAP_CONT|SNAP_FRAME))) {
1820 if (s == 0) continue; /* Do not overwrite link to previous frame. */
1821 type = ra_allock(as, (int32_t)(*flinks--), allow);
1822 } else {
1823 type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
1825 emit_tai(as, PPCI_STW, type, RID_BASE, ofs);
1827 checkmclim(as);
1829 lua_assert(map + nent == flinks);
1832 /* -- GC handling --------------------------------------------------------- */
1834 /* Check GC threshold and do one or more GC steps. */
1835 static void asm_gc_check(ASMState *as)
1837 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit];
1838 IRRef args[2];
1839 MCLabel l_end;
1840 Reg tmp;
1841 ra_evictset(as, RSET_SCRATCH);
1842 l_end = emit_label(as);
1843 /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */
1844 asm_guardcc(as, CC_NE); /* Assumes asm_snap_prep() already done. */
1845 emit_ai(as, PPCI_CMPWI, RID_RET, 0);
1846 args[0] = ASMREF_TMP1; /* global_State *g */
1847 args[1] = ASMREF_TMP2; /* MSize steps */
1848 asm_gencall(as, ci, args);
1849 emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768);
1850 tmp = ra_releasetmp(as, ASMREF_TMP2);
1851 emit_loadi(as, tmp, (int32_t)as->gcsteps);
1852 /* Jump around GC step if GC total < GC threshold. */
1853 emit_condbranch(as, PPCI_BC|PPCF_Y, CC_LT, l_end);
1854 emit_ab(as, PPCI_CMPLW, RID_TMP, tmp);
1855 emit_getgl(as, tmp, gc.threshold);
1856 emit_getgl(as, RID_TMP, gc.total);
1857 as->gcsteps = 0;
1858 checkmclim(as);
1861 /* -- Loop handling ------------------------------------------------------- */
1863 /* Fixup the loop branch. */
1864 static void asm_loop_fixup(ASMState *as)
1866 MCode *p = as->mctop;
1867 MCode *target = as->mcp;
1868 if (as->loopinv) { /* Inverted loop branch? */
1869 /* asm_guardcc already inverted the cond branch and patched the final b. */
1870 p[-2] = (p[-2] & (0xffff0000u & ~PPCF_Y)) | (((target-p+2) & 0x3fffu) << 2);
1871 } else {
1872 p[-1] = PPCI_B|(((target-p+1)&0x00ffffffu)<<2);
1876 /* -- Head of trace ------------------------------------------------------- */
1878 /* Coalesce BASE register for a root trace. */
1879 static void asm_head_root_base(ASMState *as)
1881 IRIns *ir = IR(REF_BASE);
1882 Reg r = ir->r;
1883 if (ra_hasreg(r)) {
1884 ra_free(as, r);
1885 if (rset_test(as->modset, r))
1886 ir->r = RID_INIT; /* No inheritance for modified BASE register. */
1887 if (r != RID_BASE)
1888 emit_mr(as, r, RID_BASE);
1892 /* Coalesce BASE register for a side trace. */
1893 static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
1895 IRIns *ir = IR(REF_BASE);
1896 Reg r = ir->r;
1897 if (ra_hasreg(r)) {
1898 ra_free(as, r);
1899 if (rset_test(as->modset, r))
1900 ir->r = RID_INIT; /* No inheritance for modified BASE register. */
1901 if (irp->r == r) {
1902 rset_clear(allow, r); /* Mark same BASE register as coalesced. */
1903 } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) {
1904 rset_clear(allow, irp->r);
1905 emit_mr(as, r, irp->r); /* Move from coalesced parent reg. */
1906 } else {
1907 emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */
1910 return allow;
1913 /* -- Tail of trace ------------------------------------------------------- */
1915 /* Fixup the tail code. */
1916 static void asm_tail_fixup(ASMState *as, TraceNo lnk)
1918 MCode *p = as->mctop;
1919 MCode *target;
1920 int32_t spadj = as->T->spadjust;
1921 if (spadj == 0) {
1922 *--p = PPCI_NOP;
1923 *--p = PPCI_NOP;
1924 as->mctop = p;
1925 } else {
1926 /* Patch stack adjustment. */
1927 lua_assert(checki16(CFRAME_SIZE+spadj));
1928 p[-3] = PPCI_ADDI | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | (CFRAME_SIZE+spadj);
1929 p[-2] = PPCI_STWU | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | spadj;
1931 /* Patch exit branch. */
1932 target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp;
1933 p[-1] = PPCI_B|(((target-p+1)&0x00ffffffu)<<2);
1936 /* Prepare tail of code. */
1937 static void asm_tail_prep(ASMState *as)
1939 MCode *p = as->mctop - 1; /* Leave room for exit branch. */
1940 if (as->loopref) {
1941 as->invmcp = as->mcp = p;
1942 } else {
1943 as->mcp = p-2; /* Leave room for stack pointer adjustment. */
1944 as->invmcp = NULL;
1948 /* -- Instruction dispatch ------------------------------------------------ */
1950 /* Assemble a single instruction. */
1951 static void asm_ir(ASMState *as, IRIns *ir)
1953 switch ((IROp)ir->o) {
1954 /* Miscellaneous ops. */
1955 case IR_LOOP: asm_loop(as); break;
1956 case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
1957 case IR_USE:
1958 ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
1959 case IR_PHI: asm_phi(as, ir); break;
1960 case IR_HIOP: asm_hiop(as, ir); break;
1962 /* Guarded assertions. */
1963 case IR_EQ: case IR_NE:
1964 if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
1965 as->curins--;
1966 asm_href(as, ir-1, (IROp)ir->o);
1967 break;
1969 /* fallthrough */
1970 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
1971 case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
1972 case IR_ABC:
1973 asm_comp(as, ir);
1974 break;
1976 case IR_RETF: asm_retf(as, ir); break;
1978 /* Bit ops. */
1979 case IR_BNOT: asm_bitnot(as, ir); break;
1980 case IR_BSWAP: asm_bitswap(as, ir); break;
1982 case IR_BAND: asm_bitand(as, ir); break;
1983 case IR_BOR: asm_bitop(as, ir, PPCI_OR, PPCI_ORI); break;
1984 case IR_BXOR: asm_bitop(as, ir, PPCI_XOR, PPCI_XORI); break;
1986 case IR_BSHL: asm_bitshift(as, ir, PPCI_SLW, 0); break;
1987 case IR_BSHR: asm_bitshift(as, ir, PPCI_SRW, 1); break;
1988 case IR_BSAR: asm_bitshift(as, ir, PPCI_SRAW, PPCI_SRAWI); break;
1989 case IR_BROL: asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31),
1990 PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31)); break;
1991 case IR_BROR: lua_assert(0); break;
1993 /* Arithmetic ops. */
1994 case IR_ADD: asm_add(as, ir); break;
1995 case IR_SUB: asm_sub(as, ir); break;
1996 case IR_MUL: asm_mul(as, ir); break;
1997 case IR_DIV: asm_fparith(as, ir, PPCI_FDIV); break;
1998 case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break;
1999 case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break;
2000 case IR_NEG: asm_neg(as, ir); break;
2002 case IR_ABS: asm_fpunary(as, ir, PPCI_FABS); break;
2003 case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break;
2004 case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break;
2005 case IR_MIN: asm_min_max(as, ir, 0); break;
2006 case IR_MAX: asm_min_max(as, ir, 1); break;
2007 case IR_FPMATH:
2008 if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
2009 break;
2010 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
2011 break;
2013 /* Overflow-checking arithmetic ops. */
2014 case IR_ADDOV: asm_arithov(as, ir, PPCI_ADDO); break;
2015 case IR_SUBOV: asm_arithov(as, ir, PPCI_SUBFO); break;
2016 case IR_MULOV: asm_arithov(as, ir, PPCI_MULLWO); break;
2018 /* Memory references. */
2019 case IR_AREF: asm_aref(as, ir); break;
2020 case IR_HREF: asm_href(as, ir, 0); break;
2021 case IR_HREFK: asm_hrefk(as, ir); break;
2022 case IR_NEWREF: asm_newref(as, ir); break;
2023 case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
2024 case IR_FREF: asm_fref(as, ir); break;
2025 case IR_STRREF: asm_strref(as, ir); break;
2027 /* Loads and stores. */
2028 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
2029 asm_ahuvload(as, ir);
2030 break;
2031 case IR_FLOAD: asm_fload(as, ir); break;
2032 case IR_XLOAD: asm_xload(as, ir); break;
2033 case IR_SLOAD: asm_sload(as, ir); break;
2035 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
2036 case IR_FSTORE: asm_fstore(as, ir); break;
2037 case IR_XSTORE: asm_xstore(as, ir); break;
2039 /* Allocations. */
2040 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
2041 case IR_TNEW: asm_tnew(as, ir); break;
2042 case IR_TDUP: asm_tdup(as, ir); break;
2043 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
2045 /* Write barriers. */
2046 case IR_TBAR: asm_tbar(as, ir); break;
2047 case IR_OBAR: asm_obar(as, ir); break;
2049 /* Type conversions. */
2050 case IR_CONV: asm_conv(as, ir); break;
2051 case IR_TOBIT: asm_tobit(as, ir); break;
2052 case IR_TOSTR: asm_tostr(as, ir); break;
2053 case IR_STRTO: asm_strto(as, ir); break;
2055 /* Calls. */
2056 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
2057 case IR_CALLXS: asm_callx(as, ir); break;
2058 case IR_CARG: break;
2060 default:
2061 setintV(&as->J->errinfo, ir->o);
2062 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
2063 break;
2067 /* -- Trace setup --------------------------------------------------------- */
2069 /* Ensure there are enough stack slots for call arguments. */
2070 static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
2072 IRRef args[CCI_NARGS_MAX];
2073 uint32_t i, nargs = (int)CCI_NARGS(ci);
2074 int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
2075 asm_collectargs(as, ir, ci, args);
2076 for (i = 0; i < nargs; i++)
2077 if (args[i] && irt_isfp(IR(args[i])->t)) {
2078 if (nfpr > 0) nfpr--; else nslots = (nslots+3) & ~1;
2079 } else {
2080 if (ngpr > 0) ngpr--; else nslots++;
2082 if (nslots > as->evenspill) /* Leave room for args in stack slots. */
2083 as->evenspill = nslots;
2084 return irt_isfp(ir->t) ? REGSP_HINT(RID_FPRET) : REGSP_HINT(RID_RET);
2087 static void asm_setup_target(ASMState *as)
2089 asm_exitstub_setup(as, as->T->nsnap + (as->parent ? 1 : 0));
2092 /* -- Trace patching ------------------------------------------------------ */
2094 /* Patch exit jumps of existing machine code to a new target. */
2095 void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
2097 MCode *p = T->mcode;
2098 MCode *pe = (MCode *)((char *)p + T->szmcode);
2099 MCode *px = exitstub_trace_addr(T, exitno);
2100 MCode *cstart = NULL;
2101 MCode *mcarea = lj_mcode_patch(J, p, 0);
2102 int clearso = 0;
2103 for (; p < pe; p++) {
2104 /* Look for exitstub branch, try to replace with branch to target. */
2105 uint32_t ins = *p;
2106 if ((ins & 0xfc000000u) == 0x40000000u &&
2107 ((ins ^ ((char *)px-(char *)p)) & 0xffffu) == 0) {
2108 ptrdiff_t delta = (char *)target - (char *)p;
2109 if (((ins >> 16) & 3) == (CC_SO&3)) {
2110 clearso = sizeof(MCode);
2111 delta -= sizeof(MCode);
2113 /* Many, but not all short-range branches can be patched directly. */
2114 if (((delta + 0x8000) >> 16) == 0) {
2115 *p = (ins & 0xffdf0000u) | ((uint32_t)delta & 0xffffu) |
2116 ((delta & 0x8000) * (PPCF_Y/0x8000));
2117 if (!cstart) cstart = p;
2119 } else if ((ins & 0xfc000000u) == PPCI_B &&
2120 ((ins ^ ((char *)px-(char *)p)) & 0x03ffffffu) == 0) {
2121 ptrdiff_t delta = (char *)target - (char *)p;
2122 lua_assert(((delta + 0x02000000) >> 26) == 0);
2123 *p = PPCI_B | ((uint32_t)delta & 0x03ffffffu);
2124 if (!cstart) cstart = p;
2127 { /* Always patch long-range branch in exit stub itself. */
2128 ptrdiff_t delta = (char *)target - (char *)px - clearso;
2129 lua_assert(((delta + 0x02000000) >> 26) == 0);
2130 *px = PPCI_B | ((uint32_t)delta & 0x03ffffffu);
2132 if (!cstart) cstart = px;
2133 asm_cache_flush(cstart, px+1);
2134 if (clearso) { /* Extend the current trace. Ugly workaround. */
2135 MCode *pp = J->cur.mcode;
2136 J->cur.szmcode += sizeof(MCode);
2137 *--pp = PPCI_MCRXR; /* Clear SO flag. */
2138 J->cur.mcode = pp;
2139 asm_cache_flush(pp, pp+1);
2141 lj_mcode_patch(J, mcarea, 1);