PPC: Fix fusion of floating-point XLOAD/XSTORE.
[luajit-2.0.git] / src / lj_asm_ppc.h
blob69a4aec212ae26ae6c06b35321b5506abf2cef72
1 /*
2 ** PPC IR assembler (SSA IR -> machine code).
3 ** Copyright (C) 2005-2012 Mike Pall. See Copyright Notice in luajit.h
4 */
6 /* -- Register allocator extensions --------------------------------------- */
8 /* Allocate a register with a hint. */
9 static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow)
11 Reg r = IR(ref)->r;
12 if (ra_noreg(r)) {
13 if (!ra_hashint(r) && !iscrossref(as, ref))
14 ra_sethint(IR(ref)->r, hint); /* Propagate register hint. */
15 r = ra_allocref(as, ref, allow);
17 ra_noweak(as, r);
18 return r;
21 /* Allocate two source registers for three-operand instructions. */
22 static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow)
24 IRIns *irl = IR(ir->op1), *irr = IR(ir->op2);
25 Reg left = irl->r, right = irr->r;
26 if (ra_hasreg(left)) {
27 ra_noweak(as, left);
28 if (ra_noreg(right))
29 right = ra_allocref(as, ir->op2, rset_exclude(allow, left));
30 else
31 ra_noweak(as, right);
32 } else if (ra_hasreg(right)) {
33 ra_noweak(as, right);
34 left = ra_allocref(as, ir->op1, rset_exclude(allow, right));
35 } else if (ra_hashint(right)) {
36 right = ra_allocref(as, ir->op2, allow);
37 left = ra_alloc1(as, ir->op1, rset_exclude(allow, right));
38 } else {
39 left = ra_allocref(as, ir->op1, allow);
40 right = ra_alloc1(as, ir->op2, rset_exclude(allow, left));
42 return left | (right << 8);
45 /* -- Guard handling ------------------------------------------------------ */
47 /* Setup exit stubs after the end of each trace. */
48 static void asm_exitstub_setup(ASMState *as, ExitNo nexits)
50 ExitNo i;
51 MCode *mxp = as->mctop;
52 /* 1: mflr r0; bl ->vm_exit_handler; li r0, traceno; bl <1; bl <1; ... */
53 for (i = nexits-1; (int32_t)i >= 0; i--)
54 *--mxp = PPCI_BL|(((-3-i)&0x00ffffffu)<<2);
55 *--mxp = PPCI_LI|PPCF_T(RID_TMP)|as->T->traceno; /* Read by exit handler. */
56 mxp--;
57 *mxp = PPCI_BL|((((MCode *)(void *)lj_vm_exit_handler-mxp)&0x00ffffffu)<<2);
58 *--mxp = PPCI_MFLR|PPCF_T(RID_TMP);
59 as->mctop = mxp;
62 static MCode *asm_exitstub_addr(ASMState *as, ExitNo exitno)
64 /* Keep this in-sync with exitstub_trace_addr(). */
65 return as->mctop + exitno + 3;
68 /* Emit conditional branch to exit for guard. */
69 static void asm_guardcc(ASMState *as, PPCCC cc)
71 MCode *target = asm_exitstub_addr(as, as->snapno);
72 MCode *p = as->mcp;
73 if (LJ_UNLIKELY(p == as->invmcp)) {
74 as->loopinv = 1;
75 *p = PPCI_B | (((target-p) & 0x00ffffffu) << 2);
76 emit_condbranch(as, PPCI_BC, cc^4, p);
77 return;
79 emit_condbranch(as, PPCI_BC, cc, target);
82 /* -- Operand fusion ------------------------------------------------------ */
84 /* Limit linear search to this distance. Avoids O(n^2) behavior. */
85 #define CONFLICT_SEARCH_LIM 31
87 /* Check if there's no conflicting instruction between curins and ref. */
88 static int noconflict(ASMState *as, IRRef ref, IROp conflict)
90 IRIns *ir = as->ir;
91 IRRef i = as->curins;
92 if (i > ref + CONFLICT_SEARCH_LIM)
93 return 0; /* Give up, ref is too far away. */
94 while (--i > ref)
95 if (ir[i].o == conflict)
96 return 0; /* Conflict found. */
97 return 1; /* Ok, no conflict. */
100 /* Fuse the array base of colocated arrays. */
101 static int32_t asm_fuseabase(ASMState *as, IRRef ref)
103 IRIns *ir = IR(ref);
104 if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE &&
105 !neverfuse(as) && noconflict(as, ref, IR_NEWREF))
106 return (int32_t)sizeof(GCtab);
107 return 0;
110 /* Indicates load/store indexed is ok. */
111 #define AHUREF_LSX ((int32_t)0x80000000)
113 /* Fuse array/hash/upvalue reference into register+offset operand. */
114 static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow)
116 IRIns *ir = IR(ref);
117 if (ra_noreg(ir->r)) {
118 if (ir->o == IR_AREF) {
119 if (mayfuse(as, ref)) {
120 if (irref_isk(ir->op2)) {
121 IRRef tab = IR(ir->op1)->op1;
122 int32_t ofs = asm_fuseabase(as, tab);
123 IRRef refa = ofs ? tab : ir->op1;
124 ofs += 8*IR(ir->op2)->i;
125 if (checki16(ofs)) {
126 *ofsp = ofs;
127 return ra_alloc1(as, refa, allow);
130 if (*ofsp == AHUREF_LSX) {
131 Reg base = ra_alloc1(as, ir->op1, allow);
132 Reg idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base));
133 return base | (idx << 8);
136 } else if (ir->o == IR_HREFK) {
137 if (mayfuse(as, ref)) {
138 int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node));
139 if (checki16(ofs)) {
140 *ofsp = ofs;
141 return ra_alloc1(as, ir->op1, allow);
144 } else if (ir->o == IR_UREFC) {
145 if (irref_isk(ir->op1)) {
146 GCfunc *fn = ir_kfunc(IR(ir->op1));
147 int32_t ofs = i32ptr(&gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.tv);
148 int32_t jgl = (intptr_t)J2G(as->J);
149 if ((uint32_t)(ofs-jgl) < 65536) {
150 *ofsp = ofs-jgl-32768;
151 return RID_JGL;
152 } else {
153 *ofsp = (int16_t)ofs;
154 return ra_allock(as, ofs-(int16_t)ofs, allow);
159 *ofsp = 0;
160 return ra_alloc1(as, ref, allow);
163 /* Fuse XLOAD/XSTORE reference into load/store operand. */
164 static void asm_fusexref(ASMState *as, PPCIns pi, Reg rt, IRRef ref,
165 RegSet allow)
167 IRIns *ir = IR(ref);
168 int32_t ofs = 0;
169 Reg base;
170 if (ra_noreg(ir->r) && mayfuse(as, ref)) {
171 if (ir->o == IR_ADD) {
172 if (irref_isk(ir->op2) && (ofs = IR(ir->op2)->i, checki16(ofs))) {
173 ref = ir->op1;
174 } else {
175 Reg right, left = ra_alloc2(as, ir, allow);
176 right = (left >> 8); left &= 255;
177 emit_fab(as, PPCI_LWZX | ((pi >> 20) & 0x780), rt, left, right);
178 return;
180 } else if (ir->o == IR_STRREF) {
181 ofs = (int32_t)sizeof(GCstr);
182 if (irref_isk(ir->op2)) {
183 ofs += IR(ir->op2)->i;
184 ref = ir->op1;
185 } else if (irref_isk(ir->op1)) {
186 ofs += IR(ir->op1)->i;
187 ref = ir->op2;
188 } else {
189 /* NYI: Fuse ADD with constant. */
190 Reg tmp, right, left = ra_alloc2(as, ir, allow);
191 right = (left >> 8); left &= 255;
192 tmp = ra_scratch(as, rset_exclude(rset_exclude(allow, left), right));
193 emit_fai(as, pi, rt, tmp, ofs);
194 emit_tab(as, PPCI_ADD, tmp, left, right);
195 return;
197 if (!checki16(ofs)) {
198 Reg left = ra_alloc1(as, ref, allow);
199 Reg right = ra_allock(as, ofs, rset_exclude(allow, left));
200 emit_fab(as, PPCI_LWZX | ((pi >> 20) & 0x780), rt, left, right);
201 return;
205 base = ra_alloc1(as, ref, allow);
206 emit_fai(as, pi, rt, base, ofs);
209 /* Fuse XLOAD/XSTORE reference into indexed-only load/store operand. */
210 static void asm_fusexrefx(ASMState *as, PPCIns pi, Reg rt, IRRef ref,
211 RegSet allow)
213 IRIns *ira = IR(ref);
214 Reg right, left;
215 if (mayfuse(as, ref) && ira->o == IR_ADD && ra_noreg(ira->r)) {
216 left = ra_alloc2(as, ira, allow);
217 right = (left >> 8); left &= 255;
218 } else {
219 right = ra_alloc1(as, ref, allow);
220 left = RID_R0;
222 emit_tab(as, pi, rt, left, right);
225 /* Fuse to multiply-add/sub instruction. */
226 static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir)
228 IRRef lref = ir->op1, rref = ir->op2;
229 IRIns *irm;
230 if (lref != rref &&
231 ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) &&
232 ra_noreg(irm->r)) ||
233 (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) &&
234 (rref = lref, pi = pir, ra_noreg(irm->r))))) {
235 Reg dest = ra_dest(as, ir, RSET_FPR);
236 Reg add = ra_alloc1(as, rref, RSET_FPR);
237 Reg right, left = ra_alloc2(as, irm, rset_exclude(RSET_FPR, add));
238 right = (left >> 8); left &= 255;
239 emit_facb(as, pi, dest, left, right, add);
240 return 1;
242 return 0;
245 /* -- Calls --------------------------------------------------------------- */
247 /* Generate a call to a C function. */
248 static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
250 uint32_t n, nargs = CCI_NARGS(ci);
251 int32_t ofs = 8;
252 Reg gpr = REGARG_FIRSTGPR, fpr = REGARG_FIRSTFPR;
253 if ((void *)ci->func)
254 emit_call(as, (void *)ci->func);
255 for (n = 0; n < nargs; n++) { /* Setup args. */
256 IRRef ref = args[n];
257 if (ref) {
258 IRIns *ir = IR(ref);
259 if (irt_isfp(ir->t)) {
260 if (fpr <= REGARG_LASTFPR) {
261 lua_assert(rset_test(as->freeset, fpr)); /* Already evicted. */
262 ra_leftov(as, fpr, ref);
263 fpr++;
264 } else {
265 Reg r = ra_alloc1(as, ref, RSET_FPR);
266 if (irt_isnum(ir->t)) ofs = (ofs + 4) & ~4;
267 emit_spstore(as, ir, r, ofs);
268 ofs += irt_isnum(ir->t) ? 8 : 4;
270 } else {
271 if (gpr <= REGARG_LASTGPR) {
272 lua_assert(rset_test(as->freeset, gpr)); /* Already evicted. */
273 ra_leftov(as, gpr, ref);
274 gpr++;
275 } else {
276 Reg r = ra_alloc1(as, ref, RSET_GPR);
277 emit_spstore(as, ir, r, ofs);
278 ofs += 4;
281 } else {
282 if (gpr <= REGARG_LASTGPR)
283 gpr++;
284 else
285 ofs += 4;
288 if ((ci->flags & CCI_VARARG)) /* Vararg calls need to know about FPR use. */
289 emit_tab(as, fpr == REGARG_FIRSTFPR ? PPCI_CRXOR : PPCI_CREQV, 6, 6, 6);
292 /* Setup result reg/sp for call. Evict scratch regs. */
293 static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
295 RegSet drop = RSET_SCRATCH;
296 int hiop = ((ir+1)->o == IR_HIOP);
297 if ((ci->flags & CCI_NOFPRCLOBBER))
298 drop &= ~RSET_FPR;
299 if (ra_hasreg(ir->r))
300 rset_clear(drop, ir->r); /* Dest reg handled below. */
301 if (hiop && ra_hasreg((ir+1)->r))
302 rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */
303 ra_evictset(as, drop); /* Evictions must be performed first. */
304 if (ra_used(ir)) {
305 lua_assert(!irt_ispri(ir->t));
306 if (irt_isfp(ir->t)) {
307 if ((ci->flags & CCI_CASTU64)) {
308 /* Use spill slot or temp slots. */
309 int32_t ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP;
310 Reg dest = ir->r;
311 if (ra_hasreg(dest)) {
312 ra_free(as, dest);
313 ra_modified(as, dest);
314 emit_fai(as, PPCI_LFD, dest, RID_SP, ofs);
316 emit_tai(as, PPCI_STW, RID_RETHI, RID_SP, ofs);
317 emit_tai(as, PPCI_STW, RID_RETLO, RID_SP, ofs+4);
318 } else {
319 ra_destreg(as, ir, RID_FPRET);
321 } else if (hiop) {
322 ra_destpair(as, ir);
323 } else {
324 ra_destreg(as, ir, RID_RET);
329 static void asm_call(ASMState *as, IRIns *ir)
331 IRRef args[CCI_NARGS_MAX];
332 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
333 asm_collectargs(as, ir, ci, args);
334 asm_setupresult(as, ir, ci);
335 asm_gencall(as, ci, args);
338 static void asm_callx(ASMState *as, IRIns *ir)
340 IRRef args[CCI_NARGS_MAX];
341 CCallInfo ci;
342 IRRef func;
343 IRIns *irf;
344 ci.flags = asm_callx_flags(as, ir);
345 asm_collectargs(as, ir, &ci, args);
346 asm_setupresult(as, ir, &ci);
347 func = ir->op2; irf = IR(func);
348 if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); }
349 if (irref_isk(func)) { /* Call to constant address. */
350 ci.func = (ASMFunction)(void *)(irf->i);
351 } else { /* Need a non-argument register for indirect calls. */
352 RegSet allow = RSET_GPR & ~RSET_RANGE(RID_R0, REGARG_LASTGPR+1);
353 Reg freg = ra_alloc1(as, func, allow);
354 *--as->mcp = PPCI_BCTRL;
355 *--as->mcp = PPCI_MTCTR | PPCF_T(freg);
356 ci.func = (ASMFunction)(void *)0;
358 asm_gencall(as, &ci, args);
361 static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
363 const CCallInfo *ci = &lj_ir_callinfo[id];
364 IRRef args[2];
365 args[0] = ir->op1;
366 args[1] = ir->op2;
367 asm_setupresult(as, ir, ci);
368 asm_gencall(as, ci, args);
371 /* -- Returns ------------------------------------------------------------- */
373 /* Return to lower frame. Guard that it goes to the right spot. */
374 static void asm_retf(ASMState *as, IRIns *ir)
376 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
377 void *pc = ir_kptr(IR(ir->op2));
378 int32_t delta = 1+bc_a(*((const BCIns *)pc - 1));
379 as->topslot -= (BCReg)delta;
380 if ((int32_t)as->topslot < 0) as->topslot = 0;
381 emit_setgl(as, base, jit_base);
382 emit_addptr(as, base, -8*delta);
383 asm_guardcc(as, CC_NE);
384 emit_ab(as, PPCI_CMPW, RID_TMP,
385 ra_allock(as, i32ptr(pc), rset_exclude(RSET_GPR, base)));
386 emit_tai(as, PPCI_LWZ, RID_TMP, base, -8);
389 /* -- Type conversions ---------------------------------------------------- */
391 static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
393 RegSet allow = RSET_FPR;
394 Reg tmp = ra_scratch(as, rset_clear(allow, left));
395 Reg fbias = ra_scratch(as, rset_clear(allow, tmp));
396 Reg dest = ra_dest(as, ir, RSET_GPR);
397 Reg hibias = ra_allock(as, 0x43300000, rset_exclude(RSET_GPR, dest));
398 asm_guardcc(as, CC_NE);
399 emit_fab(as, PPCI_FCMPU, 0, tmp, left);
400 emit_fab(as, PPCI_FSUB, tmp, tmp, fbias);
401 emit_fai(as, PPCI_LFD, tmp, RID_SP, SPOFS_TMP);
402 emit_tai(as, PPCI_STW, RID_TMP, RID_SP, SPOFS_TMPLO);
403 emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI);
404 emit_asi(as, PPCI_XORIS, RID_TMP, dest, 0x8000);
405 emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
406 emit_lsptr(as, PPCI_LFS, (fbias & 31),
407 (void *)lj_ir_k64_find(as->J, U64x(59800004,59800000)),
408 RSET_GPR);
409 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
410 emit_fb(as, PPCI_FCTIWZ, tmp, left);
413 static void asm_tobit(ASMState *as, IRIns *ir)
415 RegSet allow = RSET_FPR;
416 Reg dest = ra_dest(as, ir, RSET_GPR);
417 Reg left = ra_alloc1(as, ir->op1, allow);
418 Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left));
419 Reg tmp = ra_scratch(as, rset_clear(allow, right));
420 emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
421 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
422 emit_fab(as, PPCI_FADD, tmp, left, right);
425 static void asm_conv(ASMState *as, IRIns *ir)
427 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
428 int stfp = (st == IRT_NUM || st == IRT_FLOAT);
429 IRRef lref = ir->op1;
430 lua_assert(irt_type(ir->t) != st);
431 lua_assert(!(irt_isint64(ir->t) ||
432 (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */
433 if (irt_isfp(ir->t)) {
434 Reg dest = ra_dest(as, ir, RSET_FPR);
435 if (stfp) { /* FP to FP conversion. */
436 if (st == IRT_NUM) /* double -> float conversion. */
437 emit_fb(as, PPCI_FRSP, dest, ra_alloc1(as, lref, RSET_FPR));
438 else /* float -> double conversion is a no-op on PPC. */
439 ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */
440 } else { /* Integer to FP conversion. */
441 /* IRT_INT: Flip hibit, bias with 2^52, subtract 2^52+2^31. */
442 /* IRT_U32: Bias with 2^52, subtract 2^52. */
443 RegSet allow = RSET_GPR;
444 Reg left = ra_alloc1(as, lref, allow);
445 Reg hibias = ra_allock(as, 0x43300000, rset_clear(allow, left));
446 Reg fbias = ra_scratch(as, rset_exclude(RSET_FPR, dest));
447 const float *kbias;
448 if (irt_isfloat(ir->t)) emit_fb(as, PPCI_FRSP, dest, dest);
449 emit_fab(as, PPCI_FSUB, dest, dest, fbias);
450 emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP);
451 kbias = (const float *)lj_ir_k64_find(as->J, U64x(59800004,59800000));
452 if (st == IRT_U32) kbias++;
453 emit_lsptr(as, PPCI_LFS, (fbias & 31), (void *)kbias,
454 rset_clear(allow, hibias));
455 emit_tai(as, PPCI_STW, st == IRT_U32 ? left : RID_TMP,
456 RID_SP, SPOFS_TMPLO);
457 emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI);
458 if (st != IRT_U32) emit_asi(as, PPCI_XORIS, RID_TMP, left, 0x8000);
460 } else if (stfp) { /* FP to integer conversion. */
461 if (irt_isguard(ir->t)) {
462 /* Checked conversions are only supported from number to int. */
463 lua_assert(irt_isint(ir->t) && st == IRT_NUM);
464 asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
465 } else {
466 Reg dest = ra_dest(as, ir, RSET_GPR);
467 Reg left = ra_alloc1(as, lref, RSET_FPR);
468 Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
469 if (irt_isu32(ir->t)) {
470 /* Convert both x and x-2^31 to int and merge results. */
471 Reg tmpi = ra_scratch(as, rset_exclude(RSET_GPR, dest));
472 emit_asb(as, PPCI_OR, dest, dest, tmpi); /* Select with mask idiom. */
473 emit_asb(as, PPCI_AND, tmpi, tmpi, RID_TMP);
474 emit_asb(as, PPCI_ANDC, dest, dest, RID_TMP);
475 emit_tai(as, PPCI_LWZ, tmpi, RID_SP, SPOFS_TMPLO); /* tmp = (int)(x) */
476 emit_tai(as, PPCI_ADDIS, dest, dest, 0x8000); /* dest += 2^31 */
477 emit_asb(as, PPCI_SRAWI, RID_TMP, dest, 31); /* mask = -(dest < 0) */
478 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
479 emit_tai(as, PPCI_LWZ, dest,
480 RID_SP, SPOFS_TMPLO); /* dest = (int)(x-2^31) */
481 emit_fb(as, PPCI_FCTIWZ, tmp, left);
482 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
483 emit_fb(as, PPCI_FCTIWZ, tmp, tmp);
484 emit_fab(as, PPCI_FSUB, tmp, left, tmp);
485 emit_lsptr(as, PPCI_LFS, (tmp & 31),
486 (void *)lj_ir_k64_find(as->J, U64x(4f000000,00000000)),
487 RSET_GPR);
488 } else {
489 emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
490 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
491 emit_fb(as, PPCI_FCTIWZ, tmp, left);
494 } else {
495 Reg dest = ra_dest(as, ir, RSET_GPR);
496 if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */
497 Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
498 lua_assert(irt_isint(ir->t) || irt_isu32(ir->t));
499 if ((ir->op2 & IRCONV_SEXT))
500 emit_as(as, st == IRT_I8 ? PPCI_EXTSB : PPCI_EXTSH, dest, left);
501 else
502 emit_rot(as, PPCI_RLWINM, dest, left, 0, st == IRT_U8 ? 24 : 16, 31);
503 } else { /* 32/64 bit integer conversions. */
504 /* Only need to handle 32/32 bit no-op (cast) on 32 bit archs. */
505 ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */
510 #if LJ_HASFFI
511 static void asm_conv64(ASMState *as, IRIns *ir)
513 IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
514 IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
515 IRCallID id;
516 const CCallInfo *ci;
517 IRRef args[2];
518 args[0] = ir->op1;
519 args[1] = (ir-1)->op1;
520 if (st == IRT_NUM || st == IRT_FLOAT) {
521 id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
522 ir--;
523 } else {
524 id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
526 ci = &lj_ir_callinfo[id];
527 asm_setupresult(as, ir, ci);
528 asm_gencall(as, ci, args);
530 #endif
532 static void asm_strto(ASMState *as, IRIns *ir)
534 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_tonum];
535 IRRef args[2];
536 int32_t ofs;
537 RegSet drop = RSET_SCRATCH;
538 if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */
539 ra_evictset(as, drop);
540 asm_guardcc(as, CC_EQ);
541 emit_ai(as, PPCI_CMPWI, RID_RET, 0); /* Test return status. */
542 args[0] = ir->op1; /* GCstr *str */
543 args[1] = ASMREF_TMP1; /* TValue *n */
544 asm_gencall(as, ci, args);
545 /* Store the result to the spill slot or temp slots. */
546 ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP;
547 emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs);
550 /* Get pointer to TValue. */
551 static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
553 IRIns *ir = IR(ref);
554 if (irt_isnum(ir->t)) {
555 if (irref_isk(ref)) /* Use the number constant itself as a TValue. */
556 ra_allockreg(as, i32ptr(ir_knum(ir)), dest);
557 else /* Otherwise force a spill and use the spill slot. */
558 emit_tai(as, PPCI_ADDI, dest, RID_SP, ra_spill(as, ir));
559 } else {
560 /* Otherwise use g->tmptv to hold the TValue. */
561 RegSet allow = rset_exclude(RSET_GPR, dest);
562 Reg type;
563 emit_tai(as, PPCI_ADDI, dest, RID_JGL, offsetof(global_State, tmptv)-32768);
564 if (!irt_ispri(ir->t)) {
565 Reg src = ra_alloc1(as, ref, allow);
566 emit_setgl(as, src, tmptv.gcr);
568 type = ra_allock(as, irt_toitype(ir->t), allow);
569 emit_setgl(as, type, tmptv.it);
573 static void asm_tostr(ASMState *as, IRIns *ir)
575 IRRef args[2];
576 args[0] = ASMREF_L;
577 as->gcsteps++;
578 if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) {
579 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
580 args[1] = ASMREF_TMP1; /* const lua_Number * */
581 asm_setupresult(as, ir, ci); /* GCstr * */
582 asm_gencall(as, ci, args);
583 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
584 } else {
585 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
586 args[1] = ir->op1; /* int32_t k */
587 asm_setupresult(as, ir, ci); /* GCstr * */
588 asm_gencall(as, ci, args);
592 /* -- Memory references --------------------------------------------------- */
594 static void asm_aref(ASMState *as, IRIns *ir)
596 Reg dest = ra_dest(as, ir, RSET_GPR);
597 Reg idx, base;
598 if (irref_isk(ir->op2)) {
599 IRRef tab = IR(ir->op1)->op1;
600 int32_t ofs = asm_fuseabase(as, tab);
601 IRRef refa = ofs ? tab : ir->op1;
602 ofs += 8*IR(ir->op2)->i;
603 if (checki16(ofs)) {
604 base = ra_alloc1(as, refa, RSET_GPR);
605 emit_tai(as, PPCI_ADDI, dest, base, ofs);
606 return;
609 base = ra_alloc1(as, ir->op1, RSET_GPR);
610 idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base));
611 emit_tab(as, PPCI_ADD, dest, RID_TMP, base);
612 emit_slwi(as, RID_TMP, idx, 3);
615 /* Inlined hash lookup. Specialized for key type and for const keys.
616 ** The equivalent C code is:
617 ** Node *n = hashkey(t, key);
618 ** do {
619 ** if (lj_obj_equal(&n->key, key)) return &n->val;
620 ** } while ((n = nextnode(n)));
621 ** return niltv(L);
623 static void asm_href(ASMState *as, IRIns *ir, IROp merge)
625 RegSet allow = RSET_GPR;
626 int destused = ra_used(ir);
627 Reg dest = ra_dest(as, ir, allow);
628 Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
629 Reg key = RID_NONE, tmp1 = RID_TMP, tmp2;
630 Reg tisnum = RID_NONE, tmpnum = RID_NONE;
631 IRRef refkey = ir->op2;
632 IRIns *irkey = IR(refkey);
633 IRType1 kt = irkey->t;
634 uint32_t khash;
635 MCLabel l_end, l_loop, l_next;
637 rset_clear(allow, tab);
638 if (irt_isnum(kt)) {
639 key = ra_alloc1(as, refkey, RSET_FPR);
640 tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key));
641 tisnum = ra_allock(as, (int32_t)LJ_TISNUM, allow);
642 rset_clear(allow, tisnum);
643 } else if (!irt_ispri(kt)) {
644 key = ra_alloc1(as, refkey, allow);
645 rset_clear(allow, key);
647 tmp2 = ra_scratch(as, allow);
648 rset_clear(allow, tmp2);
650 /* Key not found in chain: jump to exit (if merged) or load niltv. */
651 l_end = emit_label(as);
652 as->invmcp = NULL;
653 if (merge == IR_NE)
654 asm_guardcc(as, CC_EQ);
655 else if (destused)
656 emit_loada(as, dest, niltvg(J2G(as->J)));
658 /* Follow hash chain until the end. */
659 l_loop = --as->mcp;
660 emit_ai(as, PPCI_CMPWI, dest, 0);
661 emit_tai(as, PPCI_LWZ, dest, dest, (int32_t)offsetof(Node, next));
662 l_next = emit_label(as);
664 /* Type and value comparison. */
665 if (merge == IR_EQ)
666 asm_guardcc(as, CC_EQ);
667 else
668 emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end);
669 if (irt_isnum(kt)) {
670 emit_fab(as, PPCI_FCMPU, 0, tmpnum, key);
671 emit_condbranch(as, PPCI_BC, CC_GE, l_next);
672 emit_ab(as, PPCI_CMPLW, tmp1, tisnum);
673 emit_fai(as, PPCI_LFD, tmpnum, dest, (int32_t)offsetof(Node, key.n));
674 } else {
675 if (!irt_ispri(kt)) {
676 emit_ab(as, PPCI_CMPW, tmp2, key);
677 emit_condbranch(as, PPCI_BC, CC_NE, l_next);
679 emit_ai(as, PPCI_CMPWI, tmp1, irt_toitype(irkey->t));
680 if (!irt_ispri(kt))
681 emit_tai(as, PPCI_LWZ, tmp2, dest, (int32_t)offsetof(Node, key.gcr));
683 emit_tai(as, PPCI_LWZ, tmp1, dest, (int32_t)offsetof(Node, key.it));
684 *l_loop = PPCI_BC | PPCF_Y | PPCF_CC(CC_NE) |
685 (((char *)as->mcp-(char *)l_loop) & 0xffffu);
687 /* Load main position relative to tab->node into dest. */
688 khash = irref_isk(refkey) ? ir_khash(irkey) : 1;
689 if (khash == 0) {
690 emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node));
691 } else {
692 Reg tmphash = tmp1;
693 if (irref_isk(refkey))
694 tmphash = ra_allock(as, khash, allow);
695 emit_tab(as, PPCI_ADD, dest, dest, tmp1);
696 emit_tai(as, PPCI_MULLI, tmp1, tmp1, sizeof(Node));
697 emit_asb(as, PPCI_AND, tmp1, tmp2, tmphash);
698 emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node));
699 emit_tai(as, PPCI_LWZ, tmp2, tab, (int32_t)offsetof(GCtab, hmask));
700 if (irref_isk(refkey)) {
701 /* Nothing to do. */
702 } else if (irt_isstr(kt)) {
703 emit_tai(as, PPCI_LWZ, tmp1, key, (int32_t)offsetof(GCstr, hash));
704 } else { /* Must match with hash*() in lj_tab.c. */
705 emit_tab(as, PPCI_SUBF, tmp1, tmp2, tmp1);
706 emit_rotlwi(as, tmp2, tmp2, HASH_ROT3);
707 emit_asb(as, PPCI_XOR, tmp1, tmp1, tmp2);
708 emit_rotlwi(as, tmp1, tmp1, (HASH_ROT2+HASH_ROT1)&31);
709 emit_tab(as, PPCI_SUBF, tmp2, dest, tmp2);
710 if (irt_isnum(kt)) {
711 int32_t ofs = ra_spill(as, irkey);
712 emit_asb(as, PPCI_XOR, tmp2, tmp2, tmp1);
713 emit_rotlwi(as, dest, tmp1, HASH_ROT1);
714 emit_tab(as, PPCI_ADD, tmp1, tmp1, tmp1);
715 emit_tai(as, PPCI_LWZ, tmp2, RID_SP, ofs+4);
716 emit_tai(as, PPCI_LWZ, tmp1, RID_SP, ofs);
717 } else {
718 emit_asb(as, PPCI_XOR, tmp2, key, tmp1);
719 emit_rotlwi(as, dest, tmp1, HASH_ROT1);
720 emit_tai(as, PPCI_ADDI, tmp1, tmp2, HASH_BIAS);
721 emit_tai(as, PPCI_ADDIS, tmp2, key, (HASH_BIAS + 32768)>>16);
727 static void asm_hrefk(ASMState *as, IRIns *ir)
729 IRIns *kslot = IR(ir->op2);
730 IRIns *irkey = IR(kslot->op1);
731 int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node));
732 int32_t kofs = ofs + (int32_t)offsetof(Node, key);
733 Reg dest = (ra_used(ir)||ofs > 65535) ? ra_dest(as, ir, RSET_GPR) : RID_NONE;
734 Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
735 Reg key = RID_NONE, type = RID_TMP, idx = node;
736 RegSet allow = rset_exclude(RSET_GPR, node);
737 lua_assert(ofs % sizeof(Node) == 0);
738 if (ofs > 65535) {
739 idx = dest;
740 rset_clear(allow, dest);
741 kofs = (int32_t)offsetof(Node, key);
742 } else if (ra_hasreg(dest)) {
743 emit_tai(as, PPCI_ADDI, dest, node, ofs);
745 asm_guardcc(as, CC_NE);
746 if (!irt_ispri(irkey->t)) {
747 key = ra_scratch(as, allow);
748 rset_clear(allow, key);
750 rset_clear(allow, type);
751 if (irt_isnum(irkey->t)) {
752 emit_cmpi(as, key, (int32_t)ir_knum(irkey)->u32.lo);
753 asm_guardcc(as, CC_NE);
754 emit_cmpi(as, type, (int32_t)ir_knum(irkey)->u32.hi);
755 } else {
756 if (ra_hasreg(key)) {
757 emit_cmpi(as, key, irkey->i); /* May use RID_TMP, i.e. type. */
758 asm_guardcc(as, CC_NE);
760 emit_ai(as, PPCI_CMPWI, type, irt_toitype(irkey->t));
762 if (ra_hasreg(key)) emit_tai(as, PPCI_LWZ, key, idx, kofs+4);
763 emit_tai(as, PPCI_LWZ, type, idx, kofs);
764 if (ofs > 65535) {
765 emit_tai(as, PPCI_ADDIS, dest, dest, (ofs + 32768) >> 16);
766 emit_tai(as, PPCI_ADDI, dest, node, ofs);
770 static void asm_newref(ASMState *as, IRIns *ir)
772 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
773 IRRef args[3];
774 args[0] = ASMREF_L; /* lua_State *L */
775 args[1] = ir->op1; /* GCtab *t */
776 args[2] = ASMREF_TMP1; /* cTValue *key */
777 asm_setupresult(as, ir, ci); /* TValue * */
778 asm_gencall(as, ci, args);
779 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
782 static void asm_uref(ASMState *as, IRIns *ir)
784 /* NYI: Check that UREFO is still open and not aliasing a slot. */
785 Reg dest = ra_dest(as, ir, RSET_GPR);
786 if (irref_isk(ir->op1)) {
787 GCfunc *fn = ir_kfunc(IR(ir->op1));
788 MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
789 emit_lsptr(as, PPCI_LWZ, dest, v, RSET_GPR);
790 } else {
791 Reg uv = ra_scratch(as, RSET_GPR);
792 Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
793 if (ir->o == IR_UREFC) {
794 asm_guardcc(as, CC_NE);
795 emit_ai(as, PPCI_CMPWI, RID_TMP, 1);
796 emit_tai(as, PPCI_ADDI, dest, uv, (int32_t)offsetof(GCupval, tv));
797 emit_tai(as, PPCI_LBZ, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
798 } else {
799 emit_tai(as, PPCI_LWZ, dest, uv, (int32_t)offsetof(GCupval, v));
801 emit_tai(as, PPCI_LWZ, uv, func,
802 (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
806 static void asm_fref(ASMState *as, IRIns *ir)
808 UNUSED(as); UNUSED(ir);
809 lua_assert(!ra_used(ir));
812 static void asm_strref(ASMState *as, IRIns *ir)
814 Reg dest = ra_dest(as, ir, RSET_GPR);
815 IRRef ref = ir->op2, refk = ir->op1;
816 int32_t ofs = (int32_t)sizeof(GCstr);
817 Reg r;
818 if (irref_isk(ref)) {
819 IRRef tmp = refk; refk = ref; ref = tmp;
820 } else if (!irref_isk(refk)) {
821 Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR);
822 IRIns *irr = IR(ir->op2);
823 if (ra_hasreg(irr->r)) {
824 ra_noweak(as, irr->r);
825 right = irr->r;
826 } else if (mayfuse(as, irr->op2) &&
827 irr->o == IR_ADD && irref_isk(irr->op2) &&
828 checki16(ofs + IR(irr->op2)->i)) {
829 ofs += IR(irr->op2)->i;
830 right = ra_alloc1(as, irr->op1, rset_exclude(RSET_GPR, left));
831 } else {
832 right = ra_allocref(as, ir->op2, rset_exclude(RSET_GPR, left));
834 emit_tai(as, PPCI_ADDI, dest, dest, ofs);
835 emit_tab(as, PPCI_ADD, dest, left, right);
836 return;
838 r = ra_alloc1(as, ref, RSET_GPR);
839 ofs += IR(refk)->i;
840 if (checki16(ofs))
841 emit_tai(as, PPCI_ADDI, dest, r, ofs);
842 else
843 emit_tab(as, PPCI_ADD, dest, r,
844 ra_allock(as, ofs, rset_exclude(RSET_GPR, r)));
847 /* -- Loads and stores ---------------------------------------------------- */
849 static PPCIns asm_fxloadins(IRIns *ir)
851 switch (irt_type(ir->t)) {
852 case IRT_I8: return PPCI_LBZ; /* Needs sign-extension. */
853 case IRT_U8: return PPCI_LBZ;
854 case IRT_I16: return PPCI_LHA;
855 case IRT_U16: return PPCI_LHZ;
856 case IRT_NUM: return PPCI_LFD;
857 case IRT_FLOAT: return PPCI_LFS;
858 default: return PPCI_LWZ;
862 static PPCIns asm_fxstoreins(IRIns *ir)
864 switch (irt_type(ir->t)) {
865 case IRT_I8: case IRT_U8: return PPCI_STB;
866 case IRT_I16: case IRT_U16: return PPCI_STH;
867 case IRT_NUM: return PPCI_STFD;
868 case IRT_FLOAT: return PPCI_STFS;
869 default: return PPCI_STW;
873 static void asm_fload(ASMState *as, IRIns *ir)
875 Reg dest = ra_dest(as, ir, RSET_GPR);
876 Reg idx = ra_alloc1(as, ir->op1, RSET_GPR);
877 PPCIns pi = asm_fxloadins(ir);
878 int32_t ofs;
879 if (ir->op2 == IRFL_TAB_ARRAY) {
880 ofs = asm_fuseabase(as, ir->op1);
881 if (ofs) { /* Turn the t->array load into an add for colocated arrays. */
882 emit_tai(as, PPCI_ADDI, dest, idx, ofs);
883 return;
886 ofs = field_ofs[ir->op2];
887 lua_assert(!irt_isi8(ir->t));
888 emit_tai(as, pi, dest, idx, ofs);
891 static void asm_fstore(ASMState *as, IRIns *ir)
893 Reg src = ra_alloc1(as, ir->op2, RSET_GPR);
894 IRIns *irf = IR(ir->op1);
895 Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src));
896 int32_t ofs = field_ofs[irf->op2];
897 PPCIns pi = asm_fxstoreins(ir);
898 emit_tai(as, pi, src, idx, ofs);
901 static void asm_xload(ASMState *as, IRIns *ir)
903 Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
904 lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED));
905 if (irt_isi8(ir->t))
906 emit_as(as, PPCI_EXTSB, dest, dest);
907 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR);
910 static void asm_xstore(ASMState *as, IRIns *ir)
912 IRIns *irb;
913 if (mayfuse(as, ir->op2) && (irb = IR(ir->op2))->o == IR_BSWAP &&
914 ra_noreg(irb->r) && (irt_isint(ir->t) || irt_isu32(ir->t))) {
915 /* Fuse BSWAP with XSTORE to stwbrx. */
916 Reg src = ra_alloc1(as, irb->op1, RSET_GPR);
917 asm_fusexrefx(as, PPCI_STWBRX, src, ir->op1, rset_exclude(RSET_GPR, src));
918 } else {
919 Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
920 asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
921 rset_exclude(RSET_GPR, src));
925 static void asm_ahuvload(ASMState *as, IRIns *ir)
927 IRType1 t = ir->t;
928 Reg dest = RID_NONE, type = RID_TMP, tmp = RID_TMP, idx;
929 RegSet allow = RSET_GPR;
930 int32_t ofs = AHUREF_LSX;
931 if (ra_used(ir)) {
932 lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
933 if (!irt_isnum(t)) ofs = 0;
934 dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR);
935 rset_clear(allow, dest);
937 idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
938 if (irt_isnum(t)) {
939 Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, rset_exclude(allow, idx));
940 asm_guardcc(as, CC_GE);
941 emit_ab(as, PPCI_CMPLW, type, tisnum);
942 if (ra_hasreg(dest)) {
943 if (ofs == AHUREF_LSX) {
944 tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR,
945 (idx&255)), (idx>>8)));
946 emit_fab(as, PPCI_LFDX, dest, (idx&255), tmp);
947 } else {
948 emit_fai(as, PPCI_LFD, dest, idx, ofs);
951 } else {
952 asm_guardcc(as, CC_NE);
953 emit_ai(as, PPCI_CMPWI, type, irt_toitype(t));
954 if (ra_hasreg(dest)) emit_tai(as, PPCI_LWZ, dest, idx, ofs+4);
956 if (ofs == AHUREF_LSX) {
957 emit_tab(as, PPCI_LWZX, type, (idx&255), tmp);
958 emit_slwi(as, tmp, (idx>>8), 3);
959 } else {
960 emit_tai(as, PPCI_LWZ, type, idx, ofs);
964 static void asm_ahustore(ASMState *as, IRIns *ir)
966 RegSet allow = RSET_GPR;
967 Reg idx, src = RID_NONE, type = RID_NONE;
968 int32_t ofs = AHUREF_LSX;
969 if (irt_isnum(ir->t)) {
970 src = ra_alloc1(as, ir->op2, RSET_FPR);
971 } else {
972 if (!irt_ispri(ir->t)) {
973 src = ra_alloc1(as, ir->op2, allow);
974 rset_clear(allow, src);
975 ofs = 0;
977 type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
978 rset_clear(allow, type);
980 idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
981 if (irt_isnum(ir->t)) {
982 if (ofs == AHUREF_LSX) {
983 emit_fab(as, PPCI_STFDX, src, (idx&255), RID_TMP);
984 emit_slwi(as, RID_TMP, (idx>>8), 3);
985 } else {
986 emit_fai(as, PPCI_STFD, src, idx, ofs);
988 } else {
989 if (ra_hasreg(src))
990 emit_tai(as, PPCI_STW, src, idx, ofs+4);
991 if (ofs == AHUREF_LSX) {
992 emit_tab(as, PPCI_STWX, type, (idx&255), RID_TMP);
993 emit_slwi(as, RID_TMP, (idx>>8), 3);
994 } else {
995 emit_tai(as, PPCI_STW, type, idx, ofs);
1000 static void asm_sload(ASMState *as, IRIns *ir)
1002 int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 0 : 4);
1003 IRType1 t = ir->t;
1004 Reg dest = RID_NONE, type = RID_NONE, base;
1005 RegSet allow = RSET_GPR;
1006 lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */
1007 lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK));
1008 lua_assert(LJ_DUALNUM ||
1009 !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME)));
1010 if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) {
1011 dest = ra_scratch(as, RSET_FPR);
1012 asm_tointg(as, ir, dest);
1013 t.irt = IRT_NUM; /* Continue with a regular number type check. */
1014 } else if (ra_used(ir)) {
1015 lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
1016 dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR);
1017 rset_clear(allow, dest);
1018 base = ra_alloc1(as, REF_BASE, allow);
1019 rset_clear(allow, base);
1020 if ((ir->op2 & IRSLOAD_CONVERT)) {
1021 if (irt_isint(t)) {
1022 emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
1023 dest = ra_scratch(as, RSET_FPR);
1024 emit_fai(as, PPCI_STFD, dest, RID_SP, SPOFS_TMP);
1025 emit_fb(as, PPCI_FCTIWZ, dest, dest);
1026 t.irt = IRT_NUM; /* Check for original type. */
1027 } else {
1028 Reg tmp = ra_scratch(as, allow);
1029 Reg hibias = ra_allock(as, 0x43300000, rset_clear(allow, tmp));
1030 Reg fbias = ra_scratch(as, rset_exclude(RSET_FPR, dest));
1031 emit_fab(as, PPCI_FSUB, dest, dest, fbias);
1032 emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP);
1033 emit_lsptr(as, PPCI_LFS, (fbias & 31),
1034 (void *)lj_ir_k64_find(as->J, U64x(59800004,59800000)),
1035 rset_clear(allow, hibias));
1036 emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPLO);
1037 emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI);
1038 emit_asi(as, PPCI_XORIS, tmp, tmp, 0x8000);
1039 dest = tmp;
1040 t.irt = IRT_INT; /* Check for original type. */
1043 goto dotypecheck;
1045 base = ra_alloc1(as, REF_BASE, allow);
1046 rset_clear(allow, base);
1047 dotypecheck:
1048 if (irt_isnum(t)) {
1049 if ((ir->op2 & IRSLOAD_TYPECHECK)) {
1050 Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, allow);
1051 asm_guardcc(as, CC_GE);
1052 emit_ab(as, PPCI_CMPLW, RID_TMP, tisnum);
1053 type = RID_TMP;
1055 if (ra_hasreg(dest)) emit_fai(as, PPCI_LFD, dest, base, ofs-4);
1056 } else {
1057 if ((ir->op2 & IRSLOAD_TYPECHECK)) {
1058 asm_guardcc(as, CC_NE);
1059 emit_ai(as, PPCI_CMPWI, RID_TMP, irt_toitype(t));
1060 type = RID_TMP;
1062 if (ra_hasreg(dest)) emit_tai(as, PPCI_LWZ, dest, base, ofs);
1064 if (ra_hasreg(type)) emit_tai(as, PPCI_LWZ, type, base, ofs-4);
1067 /* -- Allocations --------------------------------------------------------- */
1069 #if LJ_HASFFI
1070 static void asm_cnew(ASMState *as, IRIns *ir)
1072 CTState *cts = ctype_ctsG(J2G(as->J));
1073 CTypeID typeid = (CTypeID)IR(ir->op1)->i;
1074 CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ?
1075 lj_ctype_size(cts, typeid) : (CTSize)IR(ir->op2)->i;
1076 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
1077 IRRef args[2];
1078 RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
1079 RegSet drop = RSET_SCRATCH;
1080 lua_assert(sz != CTSIZE_INVALID);
1082 args[0] = ASMREF_L; /* lua_State *L */
1083 args[1] = ASMREF_TMP1; /* MSize size */
1084 as->gcsteps++;
1086 if (ra_hasreg(ir->r))
1087 rset_clear(drop, ir->r); /* Dest reg handled below. */
1088 ra_evictset(as, drop);
1089 if (ra_used(ir))
1090 ra_destreg(as, ir, RID_RET); /* GCcdata * */
1092 /* Initialize immutable cdata object. */
1093 if (ir->o == IR_CNEWI) {
1094 int32_t ofs = sizeof(GCcdata);
1095 lua_assert(sz == 4 || sz == 8);
1096 if (sz == 8) {
1097 ofs += 4;
1098 lua_assert((ir+1)->o == IR_HIOP);
1100 for (;;) {
1101 Reg r = ra_alloc1(as, ir->op2, allow);
1102 emit_tai(as, PPCI_STW, r, RID_RET, ofs);
1103 rset_clear(allow, r);
1104 if (ofs == sizeof(GCcdata)) break;
1105 ofs -= 4; ir++;
1108 /* Initialize gct and typeid. lj_mem_newgco() already sets marked. */
1109 emit_tai(as, PPCI_STB, RID_RET+1, RID_RET, offsetof(GCcdata, gct));
1110 emit_tai(as, PPCI_STH, RID_TMP, RID_RET, offsetof(GCcdata, typeid));
1111 emit_ti(as, PPCI_LI, RID_RET+1, ~LJ_TCDATA);
1112 emit_ti(as, PPCI_LI, RID_TMP, typeid); /* Lower 16 bit used. Sign-ext ok. */
1113 asm_gencall(as, ci, args);
1114 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
1115 ra_releasetmp(as, ASMREF_TMP1));
1117 #else
1118 #define asm_cnew(as, ir) ((void)0)
1119 #endif
1121 /* -- Write barriers ------------------------------------------------------ */
1123 static void asm_tbar(ASMState *as, IRIns *ir)
1125 Reg tab = ra_alloc1(as, ir->op1, RSET_GPR);
1126 Reg mark = ra_scratch(as, rset_exclude(RSET_GPR, tab));
1127 Reg link = RID_TMP;
1128 MCLabel l_end = emit_label(as);
1129 emit_tai(as, PPCI_STW, link, tab, (int32_t)offsetof(GCtab, gclist));
1130 emit_tai(as, PPCI_STB, mark, tab, (int32_t)offsetof(GCtab, marked));
1131 emit_setgl(as, tab, gc.grayagain);
1132 lua_assert(LJ_GC_BLACK == 0x04);
1133 emit_rot(as, PPCI_RLWINM, mark, mark, 0, 30, 28); /* Clear black bit. */
1134 emit_getgl(as, link, gc.grayagain);
1135 emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end);
1136 emit_asi(as, PPCI_ANDIDOT, RID_TMP, mark, LJ_GC_BLACK);
1137 emit_tai(as, PPCI_LBZ, mark, tab, (int32_t)offsetof(GCtab, marked));
1140 static void asm_obar(ASMState *as, IRIns *ir)
1142 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv];
1143 IRRef args[2];
1144 MCLabel l_end;
1145 Reg obj, val, tmp;
1146 /* No need for other object barriers (yet). */
1147 lua_assert(IR(ir->op1)->o == IR_UREFC);
1148 ra_evictset(as, RSET_SCRATCH);
1149 l_end = emit_label(as);
1150 args[0] = ASMREF_TMP1; /* global_State *g */
1151 args[1] = ir->op1; /* TValue *tv */
1152 asm_gencall(as, ci, args);
1153 emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768);
1154 obj = IR(ir->op1)->r;
1155 tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj));
1156 emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end);
1157 emit_asi(as, PPCI_ANDIDOT, tmp, tmp, LJ_GC_BLACK);
1158 emit_condbranch(as, PPCI_BC, CC_EQ, l_end);
1159 emit_asi(as, PPCI_ANDIDOT, RID_TMP, RID_TMP, LJ_GC_WHITES);
1160 val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj));
1161 emit_tai(as, PPCI_LBZ, tmp, obj,
1162 (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv));
1163 emit_tai(as, PPCI_LBZ, RID_TMP, val, (int32_t)offsetof(GChead, marked));
1166 /* -- Arithmetic and logic operations ------------------------------------- */
1168 static void asm_fparith(ASMState *as, IRIns *ir, PPCIns pi)
1170 Reg dest = ra_dest(as, ir, RSET_FPR);
1171 Reg right, left = ra_alloc2(as, ir, RSET_FPR);
1172 right = (left >> 8); left &= 255;
1173 if (pi == PPCI_FMUL)
1174 emit_fac(as, pi, dest, left, right);
1175 else
1176 emit_fab(as, pi, dest, left, right);
1179 static void asm_fpunary(ASMState *as, IRIns *ir, PPCIns pi)
1181 Reg dest = ra_dest(as, ir, RSET_FPR);
1182 Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR);
1183 emit_fb(as, pi, dest, left);
1186 static int asm_fpjoin_pow(ASMState *as, IRIns *ir)
1188 IRIns *irp = IR(ir->op1);
1189 if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) {
1190 IRIns *irpp = IR(irp->op1);
1191 if (irpp == ir-2 && irpp->o == IR_FPMATH &&
1192 irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
1193 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow];
1194 IRRef args[2];
1195 args[0] = irpp->op1;
1196 args[1] = irp->op2;
1197 asm_setupresult(as, ir, ci);
1198 asm_gencall(as, ci, args);
1199 return 1;
1202 return 0;
1205 static void asm_add(ASMState *as, IRIns *ir)
1207 if (irt_isnum(ir->t)) {
1208 if (!asm_fusemadd(as, ir, PPCI_FMADD, PPCI_FMADD))
1209 asm_fparith(as, ir, PPCI_FADD);
1210 } else {
1211 Reg dest = ra_dest(as, ir, RSET_GPR);
1212 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1213 PPCIns pi;
1214 if (irref_isk(ir->op2)) {
1215 int32_t k = IR(ir->op2)->i;
1216 if (checki16(k)) {
1217 pi = PPCI_ADDI;
1218 /* May fail due to spills/restores above, but simplifies the logic. */
1219 if (as->flagmcp == as->mcp) {
1220 as->flagmcp = NULL;
1221 as->mcp++;
1222 pi = PPCI_ADDICDOT;
1224 emit_tai(as, pi, dest, left, k);
1225 return;
1226 } else if ((k & 0xffff) == 0) {
1227 emit_tai(as, PPCI_ADDIS, dest, left, (k >> 16));
1228 return;
1229 } else if (!as->sectref) {
1230 emit_tai(as, PPCI_ADDIS, dest, dest, (k + 32768) >> 16);
1231 emit_tai(as, PPCI_ADDI, dest, left, k);
1232 return;
1235 pi = PPCI_ADD;
1236 /* May fail due to spills/restores above, but simplifies the logic. */
1237 if (as->flagmcp == as->mcp) {
1238 as->flagmcp = NULL;
1239 as->mcp++;
1240 pi |= PPCF_DOT;
1242 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
1243 emit_tab(as, pi, dest, left, right);
1247 static void asm_sub(ASMState *as, IRIns *ir)
1249 if (irt_isnum(ir->t)) {
1250 if (!asm_fusemadd(as, ir, PPCI_FMSUB, PPCI_FNMSUB))
1251 asm_fparith(as, ir, PPCI_FSUB);
1252 } else {
1253 PPCIns pi = PPCI_SUBF;
1254 Reg dest = ra_dest(as, ir, RSET_GPR);
1255 Reg left, right;
1256 if (irref_isk(ir->op1)) {
1257 int32_t k = IR(ir->op1)->i;
1258 if (checki16(k)) {
1259 right = ra_alloc1(as, ir->op2, RSET_GPR);
1260 emit_tai(as, PPCI_SUBFIC, dest, right, k);
1261 return;
1264 /* May fail due to spills/restores above, but simplifies the logic. */
1265 if (as->flagmcp == as->mcp) {
1266 as->flagmcp = NULL;
1267 as->mcp++;
1268 pi |= PPCF_DOT;
1270 left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1271 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
1272 emit_tab(as, pi, dest, right, left); /* Subtract right _from_ left. */
1276 static void asm_mul(ASMState *as, IRIns *ir)
1278 if (irt_isnum(ir->t)) {
1279 asm_fparith(as, ir, PPCI_FMUL);
1280 } else {
1281 PPCIns pi = PPCI_MULLW;
1282 Reg dest = ra_dest(as, ir, RSET_GPR);
1283 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1284 if (irref_isk(ir->op2)) {
1285 int32_t k = IR(ir->op2)->i;
1286 if (checki16(k)) {
1287 emit_tai(as, PPCI_MULLI, dest, left, k);
1288 return;
1291 /* May fail due to spills/restores above, but simplifies the logic. */
1292 if (as->flagmcp == as->mcp) {
1293 as->flagmcp = NULL;
1294 as->mcp++;
1295 pi |= PPCF_DOT;
1297 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
1298 emit_tab(as, pi, dest, left, right);
1302 static void asm_neg(ASMState *as, IRIns *ir)
1304 if (irt_isnum(ir->t)) {
1305 asm_fpunary(as, ir, PPCI_FNEG);
1306 } else {
1307 Reg dest, left;
1308 PPCIns pi = PPCI_NEG;
1309 if (as->flagmcp == as->mcp) {
1310 as->flagmcp = NULL;
1311 as->mcp++;
1312 pi |= PPCF_DOT;
1314 dest = ra_dest(as, ir, RSET_GPR);
1315 left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1316 emit_tab(as, pi, dest, left, 0);
1320 static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi)
1322 Reg dest, left, right;
1323 if (as->flagmcp == as->mcp) {
1324 as->flagmcp = NULL;
1325 as->mcp++;
1327 asm_guardcc(as, CC_SO);
1328 dest = ra_dest(as, ir, RSET_GPR);
1329 left = ra_alloc2(as, ir, RSET_GPR);
1330 right = (left >> 8); left &= 255;
1331 if (pi == PPCI_SUBFO) { Reg tmp = left; left = right; right = tmp; }
1332 emit_tab(as, pi|PPCF_DOT, dest, left, right);
1335 #if LJ_HASFFI
1336 static void asm_add64(ASMState *as, IRIns *ir)
1338 Reg dest = ra_dest(as, ir, RSET_GPR);
1339 Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR);
1340 PPCIns pi = PPCI_ADDE;
1341 if (irref_isk(ir->op2)) {
1342 int32_t k = IR(ir->op2)->i;
1343 if (k == 0)
1344 pi = PPCI_ADDZE;
1345 else if (k == -1)
1346 pi = PPCI_ADDME;
1347 else
1348 goto needright;
1349 right = 0;
1350 } else {
1351 needright:
1352 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
1354 emit_tab(as, pi, dest, left, right);
1355 ir--;
1356 dest = ra_dest(as, ir, RSET_GPR);
1357 left = ra_alloc1(as, ir->op1, RSET_GPR);
1358 if (irref_isk(ir->op2)) {
1359 int32_t k = IR(ir->op2)->i;
1360 if (checki16(k)) {
1361 emit_tai(as, PPCI_ADDIC, dest, left, k);
1362 return;
1365 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
1366 emit_tab(as, PPCI_ADDC, dest, left, right);
1369 static void asm_sub64(ASMState *as, IRIns *ir)
1371 Reg dest = ra_dest(as, ir, RSET_GPR);
1372 Reg left, right = ra_alloc1(as, ir->op2, RSET_GPR);
1373 PPCIns pi = PPCI_SUBFE;
1374 if (irref_isk(ir->op1)) {
1375 int32_t k = IR(ir->op1)->i;
1376 if (k == 0)
1377 pi = PPCI_SUBFZE;
1378 else if (k == -1)
1379 pi = PPCI_SUBFME;
1380 else
1381 goto needleft;
1382 left = 0;
1383 } else {
1384 needleft:
1385 left = ra_alloc1(as, ir->op1, rset_exclude(RSET_GPR, right));
1387 emit_tab(as, pi, dest, right, left); /* Subtract right _from_ left. */
1388 ir--;
1389 dest = ra_dest(as, ir, RSET_GPR);
1390 right = ra_alloc1(as, ir->op2, RSET_GPR);
1391 if (irref_isk(ir->op1)) {
1392 int32_t k = IR(ir->op1)->i;
1393 if (checki16(k)) {
1394 emit_tai(as, PPCI_SUBFIC, dest, right, k);
1395 return;
1398 left = ra_alloc1(as, ir->op1, rset_exclude(RSET_GPR, right));
1399 emit_tab(as, PPCI_SUBFC, dest, right, left);
1402 static void asm_neg64(ASMState *as, IRIns *ir)
1404 Reg dest = ra_dest(as, ir, RSET_GPR);
1405 Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
1406 emit_tab(as, PPCI_SUBFZE, dest, left, 0);
1407 ir--;
1408 dest = ra_dest(as, ir, RSET_GPR);
1409 left = ra_alloc1(as, ir->op1, RSET_GPR);
1410 emit_tai(as, PPCI_SUBFIC, dest, left, 0);
1412 #endif
1414 static void asm_bitnot(ASMState *as, IRIns *ir)
1416 Reg dest, left, right;
1417 PPCIns pi = PPCI_NOR;
1418 if (as->flagmcp == as->mcp) {
1419 as->flagmcp = NULL;
1420 as->mcp++;
1421 pi |= PPCF_DOT;
1423 dest = ra_dest(as, ir, RSET_GPR);
1424 if (mayfuse(as, ir->op1)) {
1425 IRIns *irl = IR(ir->op1);
1426 if (irl->o == IR_BAND)
1427 pi ^= (PPCI_NOR ^ PPCI_NAND);
1428 else if (irl->o == IR_BXOR)
1429 pi ^= (PPCI_NOR ^ PPCI_EQV);
1430 else if (irl->o != IR_BOR)
1431 goto nofuse;
1432 left = ra_hintalloc(as, irl->op1, dest, RSET_GPR);
1433 right = ra_alloc1(as, irl->op2, rset_exclude(RSET_GPR, left));
1434 } else {
1435 nofuse:
1436 left = right = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1438 emit_asb(as, pi, dest, left, right);
1441 static void asm_bitswap(ASMState *as, IRIns *ir)
1443 Reg dest = ra_dest(as, ir, RSET_GPR);
1444 IRIns *irx;
1445 if (mayfuse(as, ir->op1) && (irx = IR(ir->op1))->o == IR_XLOAD &&
1446 ra_noreg(irx->r) && (irt_isint(irx->t) || irt_isu32(irx->t))) {
1447 /* Fuse BSWAP with XLOAD to lwbrx. */
1448 asm_fusexrefx(as, PPCI_LWBRX, dest, irx->op1, RSET_GPR);
1449 } else {
1450 Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
1451 Reg tmp = dest;
1452 if (tmp == left) {
1453 tmp = RID_TMP;
1454 emit_mr(as, dest, RID_TMP);
1456 emit_rot(as, PPCI_RLWIMI, tmp, left, 24, 16, 23);
1457 emit_rot(as, PPCI_RLWIMI, tmp, left, 24, 0, 7);
1458 emit_rotlwi(as, tmp, left, 8);
1462 static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
1464 Reg dest = ra_dest(as, ir, RSET_GPR);
1465 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1466 if (irref_isk(ir->op2)) {
1467 int32_t k = IR(ir->op2)->i;
1468 Reg tmp = left;
1469 if ((checku16(k) || (k & 0xffff) == 0) || (tmp = dest, !as->sectref)) {
1470 if (!checku16(k)) {
1471 emit_asi(as, pik ^ (PPCI_ORI ^ PPCI_ORIS), dest, tmp, (k >> 16));
1472 if ((k & 0xffff) == 0) return;
1474 emit_asi(as, pik, dest, left, k);
1475 return;
1478 /* May fail due to spills/restores above, but simplifies the logic. */
1479 if (as->flagmcp == as->mcp) {
1480 as->flagmcp = NULL;
1481 as->mcp++;
1482 pi |= PPCF_DOT;
1484 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
1485 emit_asb(as, pi, dest, left, right);
1488 /* Fuse BAND with contiguous bitmask and a shift to rlwinm. */
1489 static void asm_fuseandsh(ASMState *as, PPCIns pi, int32_t mask, IRRef ref)
1491 IRIns *ir;
1492 Reg left;
1493 if (mayfuse(as, ref) && (ir = IR(ref), ra_noreg(ir->r)) &&
1494 irref_isk(ir->op2) && ir->o >= IR_BSHL && ir->o <= IR_BROR) {
1495 int32_t sh = (IR(ir->op2)->i & 31);
1496 switch (ir->o) {
1497 case IR_BSHL:
1498 if ((mask & ((1u<<sh)-1))) goto nofuse;
1499 break;
1500 case IR_BSHR:
1501 if ((mask & ~((~0u)>>sh))) goto nofuse;
1502 sh = ((32-sh)&31);
1503 break;
1504 case IR_BROL:
1505 break;
1506 default:
1507 goto nofuse;
1509 left = ra_alloc1(as, ir->op1, RSET_GPR);
1510 *--as->mcp = pi | PPCF_T(left) | PPCF_B(sh);
1511 return;
1513 nofuse:
1514 left = ra_alloc1(as, ref, RSET_GPR);
1515 *--as->mcp = pi | PPCF_T(left);
1518 static void asm_bitand(ASMState *as, IRIns *ir)
1520 Reg dest, left, right;
1521 IRRef lref = ir->op1;
1522 PPCIns dot = 0;
1523 IRRef op2;
1524 if (as->flagmcp == as->mcp) {
1525 as->flagmcp = NULL;
1526 as->mcp++;
1527 dot = PPCF_DOT;
1529 dest = ra_dest(as, ir, RSET_GPR);
1530 if (irref_isk(ir->op2)) {
1531 int32_t k = IR(ir->op2)->i;
1532 if (k) {
1533 /* First check for a contiguous bitmask as used by rlwinm. */
1534 uint32_t s1 = lj_ffs((uint32_t)k);
1535 uint32_t k1 = ((uint32_t)k >> s1);
1536 if ((k1 & (k1+1)) == 0) {
1537 asm_fuseandsh(as, PPCI_RLWINM|dot | PPCF_A(dest) |
1538 PPCF_MB(31-lj_fls((uint32_t)k)) | PPCF_ME(31-s1),
1539 k, lref);
1540 return;
1542 if (~(uint32_t)k) {
1543 uint32_t s2 = lj_ffs(~(uint32_t)k);
1544 uint32_t k2 = (~(uint32_t)k >> s2);
1545 if ((k2 & (k2+1)) == 0) {
1546 asm_fuseandsh(as, PPCI_RLWINM|dot | PPCF_A(dest) |
1547 PPCF_MB(32-s2) | PPCF_ME(30-lj_fls(~(uint32_t)k)),
1548 k, lref);
1549 return;
1553 if (checku16(k)) {
1554 left = ra_alloc1(as, lref, RSET_GPR);
1555 emit_asi(as, PPCI_ANDIDOT, dest, left, k);
1556 return;
1557 } else if ((k & 0xffff) == 0) {
1558 left = ra_alloc1(as, lref, RSET_GPR);
1559 emit_asi(as, PPCI_ANDISDOT, dest, left, (k >> 16));
1560 return;
1563 op2 = ir->op2;
1564 if (mayfuse(as, op2) && IR(op2)->o == IR_BNOT && ra_noreg(IR(op2)->r)) {
1565 dot ^= (PPCI_AND ^ PPCI_ANDC);
1566 op2 = IR(op2)->op1;
1568 left = ra_hintalloc(as, lref, dest, RSET_GPR);
1569 right = ra_alloc1(as, op2, rset_exclude(RSET_GPR, left));
1570 emit_asb(as, PPCI_AND ^ dot, dest, left, right);
1573 static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
1575 Reg dest, left;
1576 Reg dot = 0;
1577 if (as->flagmcp == as->mcp) {
1578 as->flagmcp = NULL;
1579 as->mcp++;
1580 dot = PPCF_DOT;
1582 dest = ra_dest(as, ir, RSET_GPR);
1583 left = ra_alloc1(as, ir->op1, RSET_GPR);
1584 if (irref_isk(ir->op2)) { /* Constant shifts. */
1585 int32_t shift = (IR(ir->op2)->i & 31);
1586 if (pik == 0) /* SLWI */
1587 emit_rot(as, PPCI_RLWINM|dot, dest, left, shift, 0, 31-shift);
1588 else if (pik == 1) /* SRWI */
1589 emit_rot(as, PPCI_RLWINM|dot, dest, left, (32-shift)&31, shift, 31);
1590 else
1591 emit_asb(as, pik|dot, dest, left, shift);
1592 } else {
1593 Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
1594 emit_asb(as, pi|dot, dest, left, right);
1598 static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
1600 if (irt_isnum(ir->t)) {
1601 Reg dest = ra_dest(as, ir, RSET_FPR);
1602 Reg tmp = dest;
1603 Reg right, left = ra_alloc2(as, ir, RSET_FPR);
1604 right = (left >> 8); left &= 255;
1605 if (tmp == left || tmp == right)
1606 tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_FPR,
1607 dest), left), right));
1608 emit_facb(as, PPCI_FSEL, dest, tmp,
1609 ismax ? left : right, ismax ? right : left);
1610 emit_fab(as, PPCI_FSUB, tmp, left, right);
1611 } else {
1612 Reg dest = ra_dest(as, ir, RSET_GPR);
1613 Reg tmp1 = RID_TMP, tmp2 = dest;
1614 Reg right, left = ra_alloc2(as, ir, RSET_GPR);
1615 right = (left >> 8); left &= 255;
1616 if (tmp2 == left || tmp2 == right)
1617 tmp2 = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_GPR,
1618 dest), left), right));
1619 emit_tab(as, PPCI_ADD, dest, tmp2, right);
1620 emit_asb(as, ismax ? PPCI_ANDC : PPCI_AND, tmp2, tmp2, tmp1);
1621 emit_tab(as, PPCI_SUBFE, tmp1, tmp1, tmp1);
1622 emit_tab(as, PPCI_SUBFC, tmp2, tmp2, tmp1);
1623 emit_asi(as, PPCI_XORIS, tmp2, right, 0x8000);
1624 emit_asi(as, PPCI_XORIS, tmp1, left, 0x8000);
1628 /* -- Comparisons --------------------------------------------------------- */
1630 #define CC_UNSIGNED 0x08 /* Unsigned integer comparison. */
1631 #define CC_TWO 0x80 /* Check two flags for FP comparison. */
1633 /* Map of comparisons to flags. ORDER IR. */
1634 static const uint8_t asm_compmap[IR_ABC+1] = {
1635 /* op int cc FP cc */
1636 /* LT */ CC_GE + (CC_GE<<4),
1637 /* GE */ CC_LT + (CC_LE<<4) + CC_TWO,
1638 /* LE */ CC_GT + (CC_GE<<4) + CC_TWO,
1639 /* GT */ CC_LE + (CC_LE<<4),
1640 /* ULT */ CC_GE + CC_UNSIGNED + (CC_GT<<4) + CC_TWO,
1641 /* UGE */ CC_LT + CC_UNSIGNED + (CC_LT<<4),
1642 /* ULE */ CC_GT + CC_UNSIGNED + (CC_GT<<4),
1643 /* UGT */ CC_LE + CC_UNSIGNED + (CC_LT<<4) + CC_TWO,
1644 /* EQ */ CC_NE + (CC_NE<<4),
1645 /* NE */ CC_EQ + (CC_EQ<<4),
1646 /* ABC */ CC_LE + CC_UNSIGNED + (CC_LT<<4) + CC_TWO /* Same as UGT. */
1649 static void asm_intcomp_(ASMState *as, IRRef lref, IRRef rref, Reg cr, PPCCC cc)
1651 Reg right, left = ra_alloc1(as, lref, RSET_GPR);
1652 if (irref_isk(rref)) {
1653 int32_t k = IR(rref)->i;
1654 if ((cc & CC_UNSIGNED) == 0) { /* Signed comparison with constant. */
1655 if (checki16(k)) {
1656 emit_tai(as, PPCI_CMPWI, cr, left, k);
1657 /* Signed comparison with zero and referencing previous ins? */
1658 if (k == 0 && lref == as->curins-1)
1659 as->flagmcp = as->mcp; /* Allow elimination of the compare. */
1660 return;
1661 } else if ((cc & 3) == (CC_EQ & 3)) { /* Use CMPLWI for EQ or NE. */
1662 if (checku16(k)) {
1663 emit_tai(as, PPCI_CMPLWI, cr, left, k);
1664 return;
1665 } else if (!as->sectref && ra_noreg(IR(rref)->r)) {
1666 emit_tai(as, PPCI_CMPLWI, cr, RID_TMP, k);
1667 emit_asi(as, PPCI_XORIS, RID_TMP, left, (k >> 16));
1668 return;
1671 } else { /* Unsigned comparison with constant. */
1672 if (checku16(k)) {
1673 emit_tai(as, PPCI_CMPLWI, cr, left, k);
1674 return;
1678 right = ra_alloc1(as, rref, rset_exclude(RSET_GPR, left));
1679 emit_tab(as, (cc & CC_UNSIGNED) ? PPCI_CMPLW : PPCI_CMPW, cr, left, right);
1682 static void asm_comp(ASMState *as, IRIns *ir)
1684 PPCCC cc = asm_compmap[ir->o];
1685 if (irt_isnum(ir->t)) {
1686 Reg right, left = ra_alloc2(as, ir, RSET_FPR);
1687 right = (left >> 8); left &= 255;
1688 asm_guardcc(as, (cc >> 4));
1689 if ((cc & CC_TWO))
1690 emit_tab(as, PPCI_CROR, ((cc>>4)&3), ((cc>>4)&3), (CC_EQ&3));
1691 emit_fab(as, PPCI_FCMPU, 0, left, right);
1692 } else {
1693 IRRef lref = ir->op1, rref = ir->op2;
1694 if (irref_isk(lref) && !irref_isk(rref)) {
1695 /* Swap constants to the right (only for ABC). */
1696 IRRef tmp = lref; lref = rref; rref = tmp;
1697 if ((cc & 2) == 0) cc ^= 1; /* LT <-> GT, LE <-> GE */
1699 asm_guardcc(as, cc);
1700 asm_intcomp_(as, lref, rref, 0, cc);
1704 #if LJ_HASFFI
1705 /* 64 bit integer comparisons. */
1706 static void asm_comp64(ASMState *as, IRIns *ir)
1708 PPCCC cc = asm_compmap[(ir-1)->o];
1709 if ((cc&3) == (CC_EQ&3)) {
1710 asm_guardcc(as, cc);
1711 emit_tab(as, (cc&4) ? PPCI_CRAND : PPCI_CROR,
1712 (CC_EQ&3), (CC_EQ&3), 4+(CC_EQ&3));
1713 } else {
1714 asm_guardcc(as, CC_EQ);
1715 emit_tab(as, PPCI_CROR, (CC_EQ&3), (CC_EQ&3), ((cc^~(cc>>2))&1));
1716 emit_tab(as, (cc&4) ? PPCI_CRAND : PPCI_CRANDC,
1717 (CC_EQ&3), (CC_EQ&3), 4+(cc&3));
1719 /* Loword comparison sets cr1 and is unsigned, except for equality. */
1720 asm_intcomp_(as, (ir-1)->op1, (ir-1)->op2, 4,
1721 cc | ((cc&3) == (CC_EQ&3) ? 0 : CC_UNSIGNED));
1722 /* Hiword comparison sets cr0. */
1723 asm_intcomp_(as, ir->op1, ir->op2, 0, cc);
1724 as->flagmcp = NULL; /* Doesn't work here. */
1726 #endif
1728 /* -- Support for 64 bit ops in 32 bit mode ------------------------------- */
1730 /* Hiword op of a split 64 bit op. Previous op must be the loword op. */
1731 static void asm_hiop(ASMState *as, IRIns *ir)
1733 #if LJ_HASFFI
1734 /* HIOP is marked as a store because it needs its own DCE logic. */
1735 int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */
1736 if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
1737 if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */
1738 as->curins--; /* Always skip the CONV. */
1739 if (usehi || uselo)
1740 asm_conv64(as, ir);
1741 return;
1742 } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */
1743 as->curins--; /* Always skip the loword comparison. */
1744 asm_comp64(as, ir);
1745 return;
1747 if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
1748 switch ((ir-1)->o) {
1749 case IR_ADD: as->curins--; asm_add64(as, ir); break;
1750 case IR_SUB: as->curins--; asm_sub64(as, ir); break;
1751 case IR_NEG: as->curins--; asm_neg64(as, ir); break;
1752 case IR_CALLN:
1753 case IR_CALLXS:
1754 if (!uselo)
1755 ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */
1756 break;
1757 case IR_CNEWI:
1758 /* Nothing to do here. Handled by lo op itself. */
1759 break;
1760 default: lua_assert(0); break;
1762 #else
1763 UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused without FFI. */
1764 #endif
1767 /* -- Stack handling ------------------------------------------------------ */
1769 /* Check Lua stack size for overflow. Use exit handler as fallback. */
1770 static void asm_stack_check(ASMState *as, BCReg topslot,
1771 IRIns *irp, RegSet allow, ExitNo exitno)
1773 /* Try to get an unused temp. register, otherwise spill/restore RID_RET*. */
1774 Reg tmp, pbase = irp ? (ra_hasreg(irp->r) ? irp->r : RID_TMP) : RID_BASE;
1775 rset_clear(allow, pbase);
1776 tmp = allow ? rset_pickbot(allow) :
1777 (pbase == RID_RETHI ? RID_RETLO : RID_RETHI);
1778 emit_condbranch(as, PPCI_BC, CC_LT, asm_exitstub_addr(as, exitno));
1779 if (allow == RSET_EMPTY) /* Restore temp. register. */
1780 emit_tai(as, PPCI_LWZ, tmp, RID_SP, SPOFS_TMPW);
1781 else
1782 ra_modified(as, tmp);
1783 emit_ai(as, PPCI_CMPLWI, RID_TMP, (int32_t)(8*topslot));
1784 emit_tab(as, PPCI_SUBF, RID_TMP, pbase, tmp);
1785 emit_tai(as, PPCI_LWZ, tmp, tmp, offsetof(lua_State, maxstack));
1786 if (pbase == RID_TMP)
1787 emit_getgl(as, RID_TMP, jit_base);
1788 emit_getgl(as, tmp, jit_L);
1789 if (allow == RSET_EMPTY) /* Spill temp. register. */
1790 emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPW);
1793 /* Restore Lua stack from on-trace state. */
1794 static void asm_stack_restore(ASMState *as, SnapShot *snap)
1796 SnapEntry *map = &as->T->snapmap[snap->mapofs];
1797 SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1];
1798 MSize n, nent = snap->nent;
1799 /* Store the value of all modified slots to the Lua stack. */
1800 for (n = 0; n < nent; n++) {
1801 SnapEntry sn = map[n];
1802 BCReg s = snap_slot(sn);
1803 int32_t ofs = 8*((int32_t)s-1);
1804 IRRef ref = snap_ref(sn);
1805 IRIns *ir = IR(ref);
1806 if ((sn & SNAP_NORESTORE))
1807 continue;
1808 if (irt_isnum(ir->t)) {
1809 Reg src = ra_alloc1(as, ref, RSET_FPR);
1810 emit_fai(as, PPCI_STFD, src, RID_BASE, ofs);
1811 } else {
1812 Reg type;
1813 RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
1814 lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t));
1815 if (!irt_ispri(ir->t)) {
1816 Reg src = ra_alloc1(as, ref, allow);
1817 rset_clear(allow, src);
1818 emit_tai(as, PPCI_STW, src, RID_BASE, ofs+4);
1820 if ((sn & (SNAP_CONT|SNAP_FRAME))) {
1821 if (s == 0) continue; /* Do not overwrite link to previous frame. */
1822 type = ra_allock(as, (int32_t)(*flinks--), allow);
1823 } else {
1824 type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
1826 emit_tai(as, PPCI_STW, type, RID_BASE, ofs);
1828 checkmclim(as);
1830 lua_assert(map + nent == flinks);
1833 /* -- GC handling --------------------------------------------------------- */
1835 /* Check GC threshold and do one or more GC steps. */
1836 static void asm_gc_check(ASMState *as)
1838 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit];
1839 IRRef args[2];
1840 MCLabel l_end;
1841 Reg tmp;
1842 ra_evictset(as, RSET_SCRATCH);
1843 l_end = emit_label(as);
1844 /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */
1845 asm_guardcc(as, CC_NE); /* Assumes asm_snap_prep() already done. */
1846 emit_ai(as, PPCI_CMPWI, RID_RET, 0);
1847 args[0] = ASMREF_TMP1; /* global_State *g */
1848 args[1] = ASMREF_TMP2; /* MSize steps */
1849 asm_gencall(as, ci, args);
1850 emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768);
1851 tmp = ra_releasetmp(as, ASMREF_TMP2);
1852 emit_loadi(as, tmp, (int32_t)as->gcsteps);
1853 /* Jump around GC step if GC total < GC threshold. */
1854 emit_condbranch(as, PPCI_BC|PPCF_Y, CC_LT, l_end);
1855 emit_ab(as, PPCI_CMPLW, RID_TMP, tmp);
1856 emit_getgl(as, tmp, gc.threshold);
1857 emit_getgl(as, RID_TMP, gc.total);
1858 as->gcsteps = 0;
1859 checkmclim(as);
1862 /* -- Loop handling ------------------------------------------------------- */
1864 /* Fixup the loop branch. */
1865 static void asm_loop_fixup(ASMState *as)
1867 MCode *p = as->mctop;
1868 MCode *target = as->mcp;
1869 if (as->loopinv) { /* Inverted loop branch? */
1870 /* asm_guardcc already inverted the cond branch and patched the final b. */
1871 p[-2] = (p[-2] & (0xffff0000u & ~PPCF_Y)) | (((target-p+2) & 0x3fffu) << 2);
1872 } else {
1873 p[-1] = PPCI_B|(((target-p+1)&0x00ffffffu)<<2);
1877 /* -- Head of trace ------------------------------------------------------- */
1879 /* Coalesce BASE register for a root trace. */
1880 static void asm_head_root_base(ASMState *as)
1882 IRIns *ir = IR(REF_BASE);
1883 Reg r = ir->r;
1884 if (ra_hasreg(r)) {
1885 ra_free(as, r);
1886 if (rset_test(as->modset, r))
1887 ir->r = RID_INIT; /* No inheritance for modified BASE register. */
1888 if (r != RID_BASE)
1889 emit_mr(as, r, RID_BASE);
1893 /* Coalesce BASE register for a side trace. */
1894 static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
1896 IRIns *ir = IR(REF_BASE);
1897 Reg r = ir->r;
1898 if (ra_hasreg(r)) {
1899 ra_free(as, r);
1900 if (rset_test(as->modset, r))
1901 ir->r = RID_INIT; /* No inheritance for modified BASE register. */
1902 if (irp->r == r) {
1903 rset_clear(allow, r); /* Mark same BASE register as coalesced. */
1904 } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) {
1905 rset_clear(allow, irp->r);
1906 emit_mr(as, r, irp->r); /* Move from coalesced parent reg. */
1907 } else {
1908 emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */
1911 return allow;
1914 /* -- Tail of trace ------------------------------------------------------- */
1916 /* Fixup the tail code. */
1917 static void asm_tail_fixup(ASMState *as, TraceNo lnk)
1919 MCode *p = as->mctop;
1920 MCode *target;
1921 int32_t spadj = as->T->spadjust;
1922 if (spadj == 0) {
1923 *--p = PPCI_NOP;
1924 *--p = PPCI_NOP;
1925 as->mctop = p;
1926 } else {
1927 /* Patch stack adjustment. */
1928 lua_assert(checki16(CFRAME_SIZE+spadj));
1929 p[-3] = PPCI_ADDI | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | (CFRAME_SIZE+spadj);
1930 p[-2] = PPCI_STWU | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | spadj;
1932 /* Patch exit branch. */
1933 target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp;
1934 p[-1] = PPCI_B|(((target-p+1)&0x00ffffffu)<<2);
1937 /* Prepare tail of code. */
1938 static void asm_tail_prep(ASMState *as)
1940 MCode *p = as->mctop - 1; /* Leave room for exit branch. */
1941 if (as->loopref) {
1942 as->invmcp = as->mcp = p;
1943 } else {
1944 as->mcp = p-2; /* Leave room for stack pointer adjustment. */
1945 as->invmcp = NULL;
1949 /* -- Instruction dispatch ------------------------------------------------ */
1951 /* Assemble a single instruction. */
1952 static void asm_ir(ASMState *as, IRIns *ir)
1954 switch ((IROp)ir->o) {
1955 /* Miscellaneous ops. */
1956 case IR_LOOP: asm_loop(as); break;
1957 case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
1958 case IR_USE:
1959 ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
1960 case IR_PHI: asm_phi(as, ir); break;
1961 case IR_HIOP: asm_hiop(as, ir); break;
1963 /* Guarded assertions. */
1964 case IR_EQ: case IR_NE:
1965 if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
1966 as->curins--;
1967 asm_href(as, ir-1, (IROp)ir->o);
1968 break;
1970 /* fallthrough */
1971 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
1972 case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
1973 case IR_ABC:
1974 asm_comp(as, ir);
1975 break;
1977 case IR_RETF: asm_retf(as, ir); break;
1979 /* Bit ops. */
1980 case IR_BNOT: asm_bitnot(as, ir); break;
1981 case IR_BSWAP: asm_bitswap(as, ir); break;
1983 case IR_BAND: asm_bitand(as, ir); break;
1984 case IR_BOR: asm_bitop(as, ir, PPCI_OR, PPCI_ORI); break;
1985 case IR_BXOR: asm_bitop(as, ir, PPCI_XOR, PPCI_XORI); break;
1987 case IR_BSHL: asm_bitshift(as, ir, PPCI_SLW, 0); break;
1988 case IR_BSHR: asm_bitshift(as, ir, PPCI_SRW, 1); break;
1989 case IR_BSAR: asm_bitshift(as, ir, PPCI_SRAW, PPCI_SRAWI); break;
1990 case IR_BROL: asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31),
1991 PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31)); break;
1992 case IR_BROR: lua_assert(0); break;
1994 /* Arithmetic ops. */
1995 case IR_ADD: asm_add(as, ir); break;
1996 case IR_SUB: asm_sub(as, ir); break;
1997 case IR_MUL: asm_mul(as, ir); break;
1998 case IR_DIV: asm_fparith(as, ir, PPCI_FDIV); break;
1999 case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break;
2000 case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break;
2001 case IR_NEG: asm_neg(as, ir); break;
2003 case IR_ABS: asm_fpunary(as, ir, PPCI_FABS); break;
2004 case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break;
2005 case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break;
2006 case IR_MIN: asm_min_max(as, ir, 0); break;
2007 case IR_MAX: asm_min_max(as, ir, 1); break;
2008 case IR_FPMATH:
2009 if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
2010 break;
2011 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
2012 break;
2014 /* Overflow-checking arithmetic ops. */
2015 case IR_ADDOV: asm_arithov(as, ir, PPCI_ADDO); break;
2016 case IR_SUBOV: asm_arithov(as, ir, PPCI_SUBFO); break;
2017 case IR_MULOV: asm_arithov(as, ir, PPCI_MULLWO); break;
2019 /* Memory references. */
2020 case IR_AREF: asm_aref(as, ir); break;
2021 case IR_HREF: asm_href(as, ir, 0); break;
2022 case IR_HREFK: asm_hrefk(as, ir); break;
2023 case IR_NEWREF: asm_newref(as, ir); break;
2024 case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
2025 case IR_FREF: asm_fref(as, ir); break;
2026 case IR_STRREF: asm_strref(as, ir); break;
2028 /* Loads and stores. */
2029 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
2030 asm_ahuvload(as, ir);
2031 break;
2032 case IR_FLOAD: asm_fload(as, ir); break;
2033 case IR_XLOAD: asm_xload(as, ir); break;
2034 case IR_SLOAD: asm_sload(as, ir); break;
2036 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
2037 case IR_FSTORE: asm_fstore(as, ir); break;
2038 case IR_XSTORE: asm_xstore(as, ir); break;
2040 /* Allocations. */
2041 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
2042 case IR_TNEW: asm_tnew(as, ir); break;
2043 case IR_TDUP: asm_tdup(as, ir); break;
2044 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
2046 /* Write barriers. */
2047 case IR_TBAR: asm_tbar(as, ir); break;
2048 case IR_OBAR: asm_obar(as, ir); break;
2050 /* Type conversions. */
2051 case IR_CONV: asm_conv(as, ir); break;
2052 case IR_TOBIT: asm_tobit(as, ir); break;
2053 case IR_TOSTR: asm_tostr(as, ir); break;
2054 case IR_STRTO: asm_strto(as, ir); break;
2056 /* Calls. */
2057 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
2058 case IR_CALLXS: asm_callx(as, ir); break;
2059 case IR_CARG: break;
2061 default:
2062 setintV(&as->J->errinfo, ir->o);
2063 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
2064 break;
2068 /* -- Trace setup --------------------------------------------------------- */
2070 /* Ensure there are enough stack slots for call arguments. */
2071 static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
2073 IRRef args[CCI_NARGS_MAX];
2074 uint32_t i, nargs = (int)CCI_NARGS(ci);
2075 int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
2076 asm_collectargs(as, ir, ci, args);
2077 for (i = 0; i < nargs; i++)
2078 if (args[i] && irt_isfp(IR(args[i])->t)) {
2079 if (nfpr > 0) nfpr--; else nslots = (nslots+3) & ~1;
2080 } else {
2081 if (ngpr > 0) ngpr--; else nslots++;
2083 if (nslots > as->evenspill) /* Leave room for args in stack slots. */
2084 as->evenspill = nslots;
2085 return irt_isfp(ir->t) ? REGSP_HINT(RID_FPRET) : REGSP_HINT(RID_RET);
2088 static void asm_setup_target(ASMState *as)
2090 asm_exitstub_setup(as, as->T->nsnap + (as->parent ? 1 : 0));
2093 /* -- Trace patching ------------------------------------------------------ */
2095 /* Patch exit jumps of existing machine code to a new target. */
2096 void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
2098 MCode *p = T->mcode;
2099 MCode *pe = (MCode *)((char *)p + T->szmcode);
2100 MCode *px = exitstub_trace_addr(T, exitno);
2101 MCode *cstart = NULL;
2102 MCode *mcarea = lj_mcode_patch(J, p, 0);
2103 int clearso = 0;
2104 for (; p < pe; p++) {
2105 /* Look for exitstub branch, try to replace with branch to target. */
2106 uint32_t ins = *p;
2107 if ((ins & 0xfc000000u) == 0x40000000u &&
2108 ((ins ^ ((char *)px-(char *)p)) & 0xffffu) == 0) {
2109 ptrdiff_t delta = (char *)target - (char *)p;
2110 if (((ins >> 16) & 3) == (CC_SO&3)) {
2111 clearso = sizeof(MCode);
2112 delta -= sizeof(MCode);
2114 /* Many, but not all short-range branches can be patched directly. */
2115 if (((delta + 0x8000) >> 16) == 0) {
2116 *p = (ins & 0xffdf0000u) | ((uint32_t)delta & 0xffffu) |
2117 ((delta & 0x8000) * (PPCF_Y/0x8000));
2118 if (!cstart) cstart = p;
2120 } else if ((ins & 0xfc000000u) == PPCI_B &&
2121 ((ins ^ ((char *)px-(char *)p)) & 0x03ffffffu) == 0) {
2122 ptrdiff_t delta = (char *)target - (char *)p;
2123 lua_assert(((delta + 0x02000000) >> 26) == 0);
2124 *p = PPCI_B | ((uint32_t)delta & 0x03ffffffu);
2125 if (!cstart) cstart = p;
2128 { /* Always patch long-range branch in exit stub itself. */
2129 ptrdiff_t delta = (char *)target - (char *)px - clearso;
2130 lua_assert(((delta + 0x02000000) >> 26) == 0);
2131 *px = PPCI_B | ((uint32_t)delta & 0x03ffffffu);
2133 if (!cstart) cstart = px;
2134 lj_mcode_sync(cstart, px+1);
2135 if (clearso) { /* Extend the current trace. Ugly workaround. */
2136 MCode *pp = J->cur.mcode;
2137 J->cur.szmcode += sizeof(MCode);
2138 *--pp = PPCI_MCRXR; /* Clear SO flag. */
2139 J->cur.mcode = pp;
2140 lj_mcode_sync(pp, pp+1);
2142 lj_mcode_patch(J, mcarea, 1);