2 ** PPC IR assembler (SSA IR -> machine code).
3 ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
6 /* -- Register allocator extensions --------------------------------------- */
8 /* Allocate a register with a hint. */
9 static Reg
ra_hintalloc(ASMState
*as
, IRRef ref
, Reg hint
, RegSet allow
)
13 if (!ra_hashint(r
) && !iscrossref(as
, ref
))
14 ra_sethint(IR(ref
)->r
, hint
); /* Propagate register hint. */
15 r
= ra_allocref(as
, ref
, allow
);
21 /* Allocate two source registers for three-operand instructions. */
22 static Reg
ra_alloc2(ASMState
*as
, IRIns
*ir
, RegSet allow
)
24 IRIns
*irl
= IR(ir
->op1
), *irr
= IR(ir
->op2
);
25 Reg left
= irl
->r
, right
= irr
->r
;
26 if (ra_hasreg(left
)) {
29 right
= ra_allocref(as
, ir
->op2
, rset_exclude(allow
, left
));
32 } else if (ra_hasreg(right
)) {
34 left
= ra_allocref(as
, ir
->op1
, rset_exclude(allow
, right
));
35 } else if (ra_hashint(right
)) {
36 right
= ra_allocref(as
, ir
->op2
, allow
);
37 left
= ra_alloc1(as
, ir
->op1
, rset_exclude(allow
, right
));
39 left
= ra_allocref(as
, ir
->op1
, allow
);
40 right
= ra_alloc1(as
, ir
->op2
, rset_exclude(allow
, left
));
42 return left
| (right
<< 8);
45 /* -- Guard handling ------------------------------------------------------ */
47 /* Setup exit stubs after the end of each trace. */
48 static void asm_exitstub_setup(ASMState
*as
, ExitNo nexits
)
51 MCode
*mxp
= as
->mctop
;
52 if (mxp
- (nexits
+ 3 + MCLIM_REDZONE
) < as
->mclim
)
54 /* 1: mflr r0; bl ->vm_exit_handler; li r0, traceno; bl <1; bl <1; ... */
55 for (i
= nexits
-1; (int32_t)i
>= 0; i
--)
56 *--mxp
= PPCI_BL
|(((-3-i
)&0x00ffffffu
)<<2);
57 *--mxp
= PPCI_LI
|PPCF_T(RID_TMP
)|as
->T
->traceno
; /* Read by exit handler. */
59 *mxp
= PPCI_BL
|((((MCode
*)(void *)lj_vm_exit_handler
-mxp
)&0x00ffffffu
)<<2);
60 *--mxp
= PPCI_MFLR
|PPCF_T(RID_TMP
);
64 static MCode
*asm_exitstub_addr(ASMState
*as
, ExitNo exitno
)
66 /* Keep this in-sync with exitstub_trace_addr(). */
67 return as
->mctop
+ exitno
+ 3;
70 /* Emit conditional branch to exit for guard. */
71 static void asm_guardcc(ASMState
*as
, PPCCC cc
)
73 MCode
*target
= asm_exitstub_addr(as
, as
->snapno
);
75 if (LJ_UNLIKELY(p
== as
->invmcp
)) {
77 *p
= PPCI_B
| (((target
-p
) & 0x00ffffffu
) << 2);
78 emit_condbranch(as
, PPCI_BC
, cc
^4, p
);
81 emit_condbranch(as
, PPCI_BC
, cc
, target
);
84 /* -- Operand fusion ------------------------------------------------------ */
86 /* Limit linear search to this distance. Avoids O(n^2) behavior. */
87 #define CONFLICT_SEARCH_LIM 31
89 /* Check if there's no conflicting instruction between curins and ref. */
90 static int noconflict(ASMState
*as
, IRRef ref
, IROp conflict
)
94 if (i
> ref
+ CONFLICT_SEARCH_LIM
)
95 return 0; /* Give up, ref is too far away. */
97 if (ir
[i
].o
== conflict
)
98 return 0; /* Conflict found. */
99 return 1; /* Ok, no conflict. */
102 /* Fuse the array base of colocated arrays. */
103 static int32_t asm_fuseabase(ASMState
*as
, IRRef ref
)
106 if (ir
->o
== IR_TNEW
&& ir
->op1
<= LJ_MAX_COLOSIZE
&&
107 !neverfuse(as
) && noconflict(as
, ref
, IR_NEWREF
))
108 return (int32_t)sizeof(GCtab
);
112 /* Indicates load/store indexed is ok. */
113 #define AHUREF_LSX ((int32_t)0x80000000)
115 /* Fuse array/hash/upvalue reference into register+offset operand. */
116 static Reg
asm_fuseahuref(ASMState
*as
, IRRef ref
, int32_t *ofsp
, RegSet allow
)
119 if (ra_noreg(ir
->r
)) {
120 if (ir
->o
== IR_AREF
) {
121 if (mayfuse(as
, ref
)) {
122 if (irref_isk(ir
->op2
)) {
123 IRRef tab
= IR(ir
->op1
)->op1
;
124 int32_t ofs
= asm_fuseabase(as
, tab
);
125 IRRef refa
= ofs
? tab
: ir
->op1
;
126 ofs
+= 8*IR(ir
->op2
)->i
;
129 return ra_alloc1(as
, refa
, allow
);
132 if (*ofsp
== AHUREF_LSX
) {
133 Reg base
= ra_alloc1(as
, ir
->op1
, allow
);
134 Reg idx
= ra_alloc1(as
, ir
->op2
, rset_exclude(RSET_GPR
, base
));
135 return base
| (idx
<< 8);
138 } else if (ir
->o
== IR_HREFK
) {
139 if (mayfuse(as
, ref
)) {
140 int32_t ofs
= (int32_t)(IR(ir
->op2
)->op2
* sizeof(Node
));
143 return ra_alloc1(as
, ir
->op1
, allow
);
146 } else if (ir
->o
== IR_UREFC
) {
147 if (irref_isk(ir
->op1
)) {
148 GCfunc
*fn
= ir_kfunc(IR(ir
->op1
));
149 int32_t ofs
= i32ptr(&gcref(fn
->l
.uvptr
[(ir
->op2
>> 8)])->uv
.tv
);
150 int32_t jgl
= (intptr_t)J2G(as
->J
);
151 if ((uint32_t)(ofs
-jgl
) < 65536) {
152 *ofsp
= ofs
-jgl
-32768;
155 *ofsp
= (int16_t)ofs
;
156 return ra_allock(as
, ofs
-(int16_t)ofs
, allow
);
159 } else if (ir
->o
== IR_TMPREF
) {
160 *ofsp
= (int32_t)(offsetof(global_State
, tmptv
)-32768);
165 return ra_alloc1(as
, ref
, allow
);
168 /* Fuse XLOAD/XSTORE reference into load/store operand. */
169 static void asm_fusexref(ASMState
*as
, PPCIns pi
, Reg rt
, IRRef ref
,
170 RegSet allow
, int32_t ofs
)
174 if (ra_noreg(ir
->r
) && canfuse(as
, ir
)) {
175 if (ir
->o
== IR_ADD
) {
177 if (irref_isk(ir
->op2
) && (ofs2
= ofs
+ IR(ir
->op2
)->i
, checki16(ofs2
))) {
180 } else if (ofs
== 0) {
181 Reg right
, left
= ra_alloc2(as
, ir
, allow
);
182 right
= (left
>> 8); left
&= 255;
183 emit_fab(as
, PPCI_LWZX
| ((pi
>> 20) & 0x780), rt
, left
, right
);
186 } else if (ir
->o
== IR_STRREF
) {
187 lj_assertA(ofs
== 0, "bad usage");
188 ofs
= (int32_t)sizeof(GCstr
);
189 if (irref_isk(ir
->op2
)) {
190 ofs
+= IR(ir
->op2
)->i
;
192 } else if (irref_isk(ir
->op1
)) {
193 ofs
+= IR(ir
->op1
)->i
;
196 /* NYI: Fuse ADD with constant. */
197 Reg tmp
, right
, left
= ra_alloc2(as
, ir
, allow
);
198 right
= (left
>> 8); left
&= 255;
199 tmp
= ra_scratch(as
, rset_exclude(rset_exclude(allow
, left
), right
));
200 emit_fai(as
, pi
, rt
, tmp
, ofs
);
201 emit_tab(as
, PPCI_ADD
, tmp
, left
, right
);
204 if (!checki16(ofs
)) {
205 Reg left
= ra_alloc1(as
, ref
, allow
);
206 Reg right
= ra_allock(as
, ofs
, rset_exclude(allow
, left
));
207 emit_fab(as
, PPCI_LWZX
| ((pi
>> 20) & 0x780), rt
, left
, right
);
212 base
= ra_alloc1(as
, ref
, allow
);
213 emit_fai(as
, pi
, rt
, base
, ofs
);
216 /* Fuse XLOAD/XSTORE reference into indexed-only load/store operand. */
217 static void asm_fusexrefx(ASMState
*as
, PPCIns pi
, Reg rt
, IRRef ref
,
220 IRIns
*ira
= IR(ref
);
222 if (canfuse(as
, ira
) && ira
->o
== IR_ADD
&& ra_noreg(ira
->r
)) {
223 left
= ra_alloc2(as
, ira
, allow
);
224 right
= (left
>> 8); left
&= 255;
226 right
= ra_alloc1(as
, ref
, allow
);
229 emit_tab(as
, pi
, rt
, left
, right
);
233 /* Fuse to multiply-add/sub instruction. */
234 static int asm_fusemadd(ASMState
*as
, IRIns
*ir
, PPCIns pi
, PPCIns pir
)
236 IRRef lref
= ir
->op1
, rref
= ir
->op2
;
238 if ((as
->flags
& JIT_F_OPT_FMA
) &&
240 ((mayfuse(as
, lref
) && (irm
= IR(lref
), irm
->o
== IR_MUL
) &&
242 (mayfuse(as
, rref
) && (irm
= IR(rref
), irm
->o
== IR_MUL
) &&
243 (rref
= lref
, pi
= pir
, ra_noreg(irm
->r
))))) {
244 Reg dest
= ra_dest(as
, ir
, RSET_FPR
);
245 Reg add
= ra_alloc1(as
, rref
, RSET_FPR
);
246 Reg right
, left
= ra_alloc2(as
, irm
, rset_exclude(RSET_FPR
, add
));
247 right
= (left
>> 8); left
&= 255;
248 emit_facb(as
, pi
, dest
, left
, right
, add
);
255 /* -- Calls --------------------------------------------------------------- */
257 /* Generate a call to a C function. */
258 static void asm_gencall(ASMState
*as
, const CCallInfo
*ci
, IRRef
*args
)
260 uint32_t n
, nargs
= CCI_XNARGS(ci
);
262 Reg gpr
= REGARG_FIRSTGPR
;
264 Reg fpr
= REGARG_FIRSTFPR
;
266 if ((void *)ci
->func
)
267 emit_call(as
, (void *)ci
->func
);
268 for (n
= 0; n
< nargs
; n
++) { /* Setup args. */
273 if (irt_isfp(ir
->t
)) {
274 if (fpr
<= REGARG_LASTFPR
) {
275 lj_assertA(rset_test(as
->freeset
, fpr
),
276 "reg %d not free", fpr
); /* Already evicted. */
277 ra_leftov(as
, fpr
, ref
);
280 Reg r
= ra_alloc1(as
, ref
, RSET_FPR
);
281 if (irt_isnum(ir
->t
)) ofs
= (ofs
+ 4) & ~4;
282 emit_spstore(as
, ir
, r
, ofs
);
283 ofs
+= irt_isnum(ir
->t
) ? 8 : 4;
288 if (gpr
<= REGARG_LASTGPR
) {
289 lj_assertA(rset_test(as
->freeset
, gpr
),
290 "reg %d not free", gpr
); /* Already evicted. */
291 ra_leftov(as
, gpr
, ref
);
294 Reg r
= ra_alloc1(as
, ref
, RSET_GPR
);
295 emit_spstore(as
, ir
, r
, ofs
);
300 if (gpr
<= REGARG_LASTGPR
)
308 if ((ci
->flags
& CCI_VARARG
)) /* Vararg calls need to know about FPR use. */
309 emit_tab(as
, fpr
== REGARG_FIRSTFPR
? PPCI_CRXOR
: PPCI_CREQV
, 6, 6, 6);
313 /* Setup result reg/sp for call. Evict scratch regs. */
314 static void asm_setupresult(ASMState
*as
, IRIns
*ir
, const CCallInfo
*ci
)
316 RegSet drop
= RSET_SCRATCH
;
317 int hiop
= ((ir
+1)->o
== IR_HIOP
&& !irt_isnil((ir
+1)->t
));
319 if ((ci
->flags
& CCI_NOFPRCLOBBER
))
322 if (ra_hasreg(ir
->r
))
323 rset_clear(drop
, ir
->r
); /* Dest reg handled below. */
324 if (hiop
&& ra_hasreg((ir
+1)->r
))
325 rset_clear(drop
, (ir
+1)->r
); /* Dest reg handled below. */
326 ra_evictset(as
, drop
); /* Evictions must be performed first. */
328 lj_assertA(!irt_ispri(ir
->t
), "PRI dest");
329 if (!LJ_SOFTFP
&& irt_isfp(ir
->t
)) {
330 if ((ci
->flags
& CCI_CASTU64
)) {
331 /* Use spill slot or temp slots. */
332 int32_t ofs
= ir
->s
? sps_scale(ir
->s
) : SPOFS_TMP
;
334 if (ra_hasreg(dest
)) {
336 ra_modified(as
, dest
);
337 emit_fai(as
, PPCI_LFD
, dest
, RID_SP
, ofs
);
339 emit_tai(as
, PPCI_STW
, RID_RETHI
, RID_SP
, ofs
);
340 emit_tai(as
, PPCI_STW
, RID_RETLO
, RID_SP
, ofs
+4);
342 ra_destreg(as
, ir
, RID_FPRET
);
347 ra_destreg(as
, ir
, RID_RET
);
352 static void asm_callx(ASMState
*as
, IRIns
*ir
)
354 IRRef args
[CCI_NARGS_MAX
*2];
358 ci
.flags
= asm_callx_flags(as
, ir
);
359 asm_collectargs(as
, ir
, &ci
, args
);
360 asm_setupresult(as
, ir
, &ci
);
361 func
= ir
->op2
; irf
= IR(func
);
362 if (irf
->o
== IR_CARG
) { func
= irf
->op1
; irf
= IR(func
); }
363 if (irref_isk(func
)) { /* Call to constant address. */
364 ci
.func
= (ASMFunction
)(void *)(intptr_t)(irf
->i
);
365 } else { /* Need a non-argument register for indirect calls. */
366 RegSet allow
= RSET_GPR
& ~RSET_RANGE(RID_R0
, REGARG_LASTGPR
+1);
367 Reg freg
= ra_alloc1(as
, func
, allow
);
368 *--as
->mcp
= PPCI_BCTRL
;
369 *--as
->mcp
= PPCI_MTCTR
| PPCF_T(freg
);
370 ci
.func
= (ASMFunction
)(void *)0;
372 asm_gencall(as
, &ci
, args
);
375 /* -- Returns ------------------------------------------------------------- */
377 /* Return to lower frame. Guard that it goes to the right spot. */
378 static void asm_retf(ASMState
*as
, IRIns
*ir
)
380 Reg base
= ra_alloc1(as
, REF_BASE
, RSET_GPR
);
381 void *pc
= ir_kptr(IR(ir
->op2
));
382 int32_t delta
= 1+LJ_FR2
+bc_a(*((const BCIns
*)pc
- 1));
383 as
->topslot
-= (BCReg
)delta
;
384 if ((int32_t)as
->topslot
< 0) as
->topslot
= 0;
385 irt_setmark(IR(REF_BASE
)->t
); /* Children must not coalesce with BASE reg. */
386 emit_setgl(as
, base
, jit_base
);
387 emit_addptr(as
, base
, -8*delta
);
388 asm_guardcc(as
, CC_NE
);
389 emit_ab(as
, PPCI_CMPW
, RID_TMP
,
390 ra_allock(as
, i32ptr(pc
), rset_exclude(RSET_GPR
, base
)));
391 emit_tai(as
, PPCI_LWZ
, RID_TMP
, base
, -8);
394 /* -- Buffer operations --------------------------------------------------- */
397 static void asm_bufhdr_write(ASMState
*as
, Reg sb
)
399 Reg tmp
= ra_scratch(as
, rset_exclude(RSET_GPR
, sb
));
401 irgc
.ot
= IRT(0, IRT_PGC
); /* GC type. */
402 emit_storeofs(as
, &irgc
, RID_TMP
, sb
, offsetof(SBuf
, L
));
403 emit_rot(as
, PPCI_RLWIMI
, RID_TMP
, tmp
, 0, 31-lj_fls(SBUF_MASK_FLAG
), 31);
404 emit_getgl(as
, RID_TMP
, cur_L
);
405 emit_loadofs(as
, &irgc
, tmp
, sb
, offsetof(SBuf
, L
));
409 /* -- Type conversions ---------------------------------------------------- */
412 static void asm_tointg(ASMState
*as
, IRIns
*ir
, Reg left
)
414 RegSet allow
= RSET_FPR
;
415 Reg tmp
= ra_scratch(as
, rset_clear(allow
, left
));
416 Reg fbias
= ra_scratch(as
, rset_clear(allow
, tmp
));
417 Reg dest
= ra_dest(as
, ir
, RSET_GPR
);
418 Reg hibias
= ra_allock(as
, 0x43300000, rset_exclude(RSET_GPR
, dest
));
419 asm_guardcc(as
, CC_NE
);
420 emit_fab(as
, PPCI_FCMPU
, 0, tmp
, left
);
421 emit_fab(as
, PPCI_FSUB
, tmp
, tmp
, fbias
);
422 emit_fai(as
, PPCI_LFD
, tmp
, RID_SP
, SPOFS_TMP
);
423 emit_tai(as
, PPCI_STW
, RID_TMP
, RID_SP
, SPOFS_TMPLO
);
424 emit_tai(as
, PPCI_STW
, hibias
, RID_SP
, SPOFS_TMPHI
);
425 emit_asi(as
, PPCI_XORIS
, RID_TMP
, dest
, 0x8000);
426 emit_tai(as
, PPCI_LWZ
, dest
, RID_SP
, SPOFS_TMPLO
);
427 emit_lsptr(as
, PPCI_LFS
, (fbias
& 31),
428 (void *)&as
->J
->k32
[LJ_K32_2P52_2P31
], RSET_GPR
);
429 emit_fai(as
, PPCI_STFD
, tmp
, RID_SP
, SPOFS_TMP
);
430 emit_fb(as
, PPCI_FCTIWZ
, tmp
, left
);
433 static void asm_tobit(ASMState
*as
, IRIns
*ir
)
435 RegSet allow
= RSET_FPR
;
436 Reg dest
= ra_dest(as
, ir
, RSET_GPR
);
437 Reg left
= ra_alloc1(as
, ir
->op1
, allow
);
438 Reg right
= ra_alloc1(as
, ir
->op2
, rset_clear(allow
, left
));
439 Reg tmp
= ra_scratch(as
, rset_clear(allow
, right
));
440 emit_tai(as
, PPCI_LWZ
, dest
, RID_SP
, SPOFS_TMPLO
);
441 emit_fai(as
, PPCI_STFD
, tmp
, RID_SP
, SPOFS_TMP
);
442 emit_fab(as
, PPCI_FADD
, tmp
, left
, right
);
446 static void asm_conv(ASMState
*as
, IRIns
*ir
)
448 IRType st
= (IRType
)(ir
->op2
& IRCONV_SRCMASK
);
450 int stfp
= (st
== IRT_NUM
|| st
== IRT_FLOAT
);
452 IRRef lref
= ir
->op1
;
453 /* 64 bit integer conversions are handled by SPLIT. */
454 lj_assertA(!(irt_isint64(ir
->t
) || (st
== IRT_I64
|| st
== IRT_U64
)),
455 "IR %04d has unsplit 64 bit type",
456 (int)(ir
- as
->ir
) - REF_BIAS
);
458 /* FP conversions are handled by SPLIT. */
459 lj_assertA(!irt_isfp(ir
->t
) && !(st
== IRT_NUM
|| st
== IRT_FLOAT
),
460 "IR %04d has FP type",
461 (int)(ir
- as
->ir
) - REF_BIAS
);
462 /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */
464 lj_assertA(irt_type(ir
->t
) != st
, "inconsistent types for CONV");
465 if (irt_isfp(ir
->t
)) {
466 Reg dest
= ra_dest(as
, ir
, RSET_FPR
);
467 if (stfp
) { /* FP to FP conversion. */
468 if (st
== IRT_NUM
) /* double -> float conversion. */
469 emit_fb(as
, PPCI_FRSP
, dest
, ra_alloc1(as
, lref
, RSET_FPR
));
470 else /* float -> double conversion is a no-op on PPC. */
471 ra_leftov(as
, dest
, lref
); /* Do nothing, but may need to move regs. */
472 } else { /* Integer to FP conversion. */
473 /* IRT_INT: Flip hibit, bias with 2^52, subtract 2^52+2^31. */
474 /* IRT_U32: Bias with 2^52, subtract 2^52. */
475 RegSet allow
= RSET_GPR
;
476 Reg left
= ra_alloc1(as
, lref
, allow
);
477 Reg hibias
= ra_allock(as
, 0x43300000, rset_clear(allow
, left
));
478 Reg fbias
= ra_scratch(as
, rset_exclude(RSET_FPR
, dest
));
479 if (irt_isfloat(ir
->t
)) emit_fb(as
, PPCI_FRSP
, dest
, dest
);
480 emit_fab(as
, PPCI_FSUB
, dest
, dest
, fbias
);
481 emit_fai(as
, PPCI_LFD
, dest
, RID_SP
, SPOFS_TMP
);
482 emit_lsptr(as
, PPCI_LFS
, (fbias
& 31),
483 &as
->J
->k32
[st
== IRT_U32
? LJ_K32_2P52
: LJ_K32_2P52_2P31
],
484 rset_clear(allow
, hibias
));
485 emit_tai(as
, PPCI_STW
, st
== IRT_U32
? left
: RID_TMP
,
486 RID_SP
, SPOFS_TMPLO
);
487 emit_tai(as
, PPCI_STW
, hibias
, RID_SP
, SPOFS_TMPHI
);
488 if (st
!= IRT_U32
) emit_asi(as
, PPCI_XORIS
, RID_TMP
, left
, 0x8000);
490 } else if (stfp
) { /* FP to integer conversion. */
491 if (irt_isguard(ir
->t
)) {
492 /* Checked conversions are only supported from number to int. */
493 lj_assertA(irt_isint(ir
->t
) && st
== IRT_NUM
,
494 "bad type for checked CONV");
495 asm_tointg(as
, ir
, ra_alloc1(as
, lref
, RSET_FPR
));
497 Reg dest
= ra_dest(as
, ir
, RSET_GPR
);
498 Reg left
= ra_alloc1(as
, lref
, RSET_FPR
);
499 Reg tmp
= ra_scratch(as
, rset_exclude(RSET_FPR
, left
));
500 if (irt_isu32(ir
->t
)) {
501 /* Convert both x and x-2^31 to int and merge results. */
502 Reg tmpi
= ra_scratch(as
, rset_exclude(RSET_GPR
, dest
));
503 emit_asb(as
, PPCI_OR
, dest
, dest
, tmpi
); /* Select with mask idiom. */
504 emit_asb(as
, PPCI_AND
, tmpi
, tmpi
, RID_TMP
);
505 emit_asb(as
, PPCI_ANDC
, dest
, dest
, RID_TMP
);
506 emit_tai(as
, PPCI_LWZ
, tmpi
, RID_SP
, SPOFS_TMPLO
); /* tmp = (int)(x) */
507 emit_tai(as
, PPCI_ADDIS
, dest
, dest
, 0x8000); /* dest += 2^31 */
508 emit_asb(as
, PPCI_SRAWI
, RID_TMP
, dest
, 31); /* mask = -(dest < 0) */
509 emit_fai(as
, PPCI_STFD
, tmp
, RID_SP
, SPOFS_TMP
);
510 emit_tai(as
, PPCI_LWZ
, dest
,
511 RID_SP
, SPOFS_TMPLO
); /* dest = (int)(x-2^31) */
512 emit_fb(as
, PPCI_FCTIWZ
, tmp
, left
);
513 emit_fai(as
, PPCI_STFD
, tmp
, RID_SP
, SPOFS_TMP
);
514 emit_fb(as
, PPCI_FCTIWZ
, tmp
, tmp
);
515 emit_fab(as
, PPCI_FSUB
, tmp
, left
, tmp
);
516 emit_lsptr(as
, PPCI_LFS
, (tmp
& 31),
517 (void *)&as
->J
->k32
[LJ_K32_2P31
], RSET_GPR
);
519 emit_tai(as
, PPCI_LWZ
, dest
, RID_SP
, SPOFS_TMPLO
);
520 emit_fai(as
, PPCI_STFD
, tmp
, RID_SP
, SPOFS_TMP
);
521 emit_fb(as
, PPCI_FCTIWZ
, tmp
, left
);
527 Reg dest
= ra_dest(as
, ir
, RSET_GPR
);
528 if (st
>= IRT_I8
&& st
<= IRT_U16
) { /* Extend to 32 bit integer. */
529 Reg left
= ra_alloc1(as
, ir
->op1
, RSET_GPR
);
530 lj_assertA(irt_isint(ir
->t
) || irt_isu32(ir
->t
), "bad type for CONV EXT");
531 if ((ir
->op2
& IRCONV_SEXT
))
532 emit_as(as
, st
== IRT_I8
? PPCI_EXTSB
: PPCI_EXTSH
, dest
, left
);
534 emit_rot(as
, PPCI_RLWINM
, dest
, left
, 0, st
== IRT_U8
? 24 : 16, 31);
535 } else { /* 32/64 bit integer conversions. */
536 /* Only need to handle 32/32 bit no-op (cast) on 32 bit archs. */
537 ra_leftov(as
, dest
, lref
); /* Do nothing, but may need to move regs. */
542 static void asm_strto(ASMState
*as
, IRIns
*ir
)
544 const CCallInfo
*ci
= &lj_ir_callinfo
[IRCALL_lj_strscan_num
];
546 int32_t ofs
= SPOFS_TMP
;
548 ra_evictset(as
, RSET_SCRATCH
);
550 if (ra_hasspill(ir
->s
) && ra_hasspill((ir
+1)->s
) &&
551 (ir
->s
& 1) == LJ_BE
&& (ir
->s
^ 1) == (ir
+1)->s
) {
553 for (i
= 0; i
< 2; i
++) {
558 emit_spload(as
, ir
+i
, r
, sps_scale((ir
+i
)->s
));
561 ofs
= sps_scale(ir
->s
& ~1);
563 Reg rhi
= ra_dest(as
, ir
+1, RSET_GPR
);
564 Reg rlo
= ra_dest(as
, ir
, rset_exclude(RSET_GPR
, rhi
));
565 emit_tai(as
, PPCI_LWZ
, rhi
, RID_SP
, ofs
);
566 emit_tai(as
, PPCI_LWZ
, rlo
, RID_SP
, ofs
+4);
570 RegSet drop
= RSET_SCRATCH
;
571 if (ra_hasreg(ir
->r
)) rset_set(drop
, ir
->r
); /* Spill dest reg (if any). */
572 ra_evictset(as
, drop
);
573 if (ir
->s
) ofs
= sps_scale(ir
->s
);
575 asm_guardcc(as
, CC_EQ
);
576 emit_ai(as
, PPCI_CMPWI
, RID_RET
, 0); /* Test return status. */
577 args
[0] = ir
->op1
; /* GCstr *str */
578 args
[1] = ASMREF_TMP1
; /* TValue *n */
579 asm_gencall(as
, ci
, args
);
580 /* Store the result to the spill slot or temp slots. */
581 emit_tai(as
, PPCI_ADDI
, ra_releasetmp(as
, ASMREF_TMP1
), RID_SP
, ofs
);
584 /* -- Memory references --------------------------------------------------- */
586 /* Get pointer to TValue. */
587 static void asm_tvptr(ASMState
*as
, Reg dest
, IRRef ref
, MSize mode
)
589 int32_t tmpofs
= (int32_t)(offsetof(global_State
, tmptv
)-32768);
590 if ((mode
& IRTMPREF_IN1
)) {
592 if (irt_isnum(ir
->t
)) {
593 if ((mode
& IRTMPREF_OUT1
)) {
595 lj_assertA(irref_isk(ref
), "unsplit FP op");
596 emit_tai(as
, PPCI_ADDI
, dest
, RID_JGL
, tmpofs
);
598 ra_allock(as
, (int32_t)ir_knum(ir
)->u32
.lo
, RSET_GPR
),
601 ra_allock(as
, (int32_t)ir_knum(ir
)->u32
.hi
, RSET_GPR
),
604 Reg src
= ra_alloc1(as
, ref
, RSET_FPR
);
605 emit_tai(as
, PPCI_ADDI
, dest
, RID_JGL
, tmpofs
);
606 emit_fai(as
, PPCI_STFD
, src
, RID_JGL
, tmpofs
);
608 } else if (irref_isk(ref
)) {
609 /* Use the number constant itself as a TValue. */
610 ra_allockreg(as
, i32ptr(ir_knum(ir
)), dest
);
613 lj_assertA(0, "unsplit FP op");
615 /* Otherwise force a spill and use the spill slot. */
616 emit_tai(as
, PPCI_ADDI
, dest
, RID_SP
, ra_spill(as
, ir
));
620 /* Otherwise use g->tmptv to hold the TValue. */
622 emit_tai(as
, PPCI_ADDI
, dest
, RID_JGL
, tmpofs
);
623 if (!irt_ispri(ir
->t
)) {
624 Reg src
= ra_alloc1(as
, ref
, RSET_GPR
);
625 emit_setgl(as
, src
, tmptv
.gcr
);
627 if (LJ_SOFTFP
&& (ir
+1)->o
== IR_HIOP
&& !irt_isnil((ir
+1)->t
))
628 type
= ra_alloc1(as
, ref
+1, RSET_GPR
);
630 type
= ra_allock(as
, irt_toitype(ir
->t
), RSET_GPR
);
631 emit_setgl(as
, type
, tmptv
.it
);
634 emit_tai(as
, PPCI_ADDI
, dest
, RID_JGL
, tmpofs
);
638 static void asm_aref(ASMState
*as
, IRIns
*ir
)
640 Reg dest
= ra_dest(as
, ir
, RSET_GPR
);
642 if (irref_isk(ir
->op2
)) {
643 IRRef tab
= IR(ir
->op1
)->op1
;
644 int32_t ofs
= asm_fuseabase(as
, tab
);
645 IRRef refa
= ofs
? tab
: ir
->op1
;
646 ofs
+= 8*IR(ir
->op2
)->i
;
648 base
= ra_alloc1(as
, refa
, RSET_GPR
);
649 emit_tai(as
, PPCI_ADDI
, dest
, base
, ofs
);
653 base
= ra_alloc1(as
, ir
->op1
, RSET_GPR
);
654 idx
= ra_alloc1(as
, ir
->op2
, rset_exclude(RSET_GPR
, base
));
655 emit_tab(as
, PPCI_ADD
, dest
, RID_TMP
, base
);
656 emit_slwi(as
, RID_TMP
, idx
, 3);
659 /* Inlined hash lookup. Specialized for key type and for const keys.
660 ** The equivalent C code is:
661 ** Node *n = hashkey(t, key);
663 ** if (lj_obj_equal(&n->key, key)) return &n->val;
664 ** } while ((n = nextnode(n)));
667 static void asm_href(ASMState
*as
, IRIns
*ir
, IROp merge
)
669 RegSet allow
= RSET_GPR
;
670 int destused
= ra_used(ir
);
671 Reg dest
= ra_dest(as
, ir
, allow
);
672 Reg tab
= ra_alloc1(as
, ir
->op1
, rset_clear(allow
, dest
));
673 Reg key
= RID_NONE
, tmp1
= RID_TMP
, tmp2
;
674 Reg tisnum
= RID_NONE
, tmpnum
= RID_NONE
;
675 IRRef refkey
= ir
->op2
;
676 IRIns
*irkey
= IR(refkey
);
677 int isk
= irref_isk(refkey
);
678 IRType1 kt
= irkey
->t
;
680 MCLabel l_end
, l_loop
, l_next
;
682 rset_clear(allow
, tab
);
685 key
= ra_alloc1(as
, refkey
, allow
);
686 rset_clear(allow
, key
);
687 if (irkey
[1].o
== IR_HIOP
) {
688 if (ra_hasreg((irkey
+1)->r
)) {
689 tmpnum
= (irkey
+1)->r
;
690 ra_noweak(as
, tmpnum
);
692 tmpnum
= ra_allocref(as
, refkey
+1, allow
);
694 rset_clear(allow
, tmpnum
);
699 key
= ra_alloc1(as
, refkey
, RSET_FPR
);
700 tmpnum
= ra_scratch(as
, rset_exclude(RSET_FPR
, key
));
701 tisnum
= ra_allock(as
, (int32_t)LJ_TISNUM
, allow
);
702 rset_clear(allow
, tisnum
);
703 } else if (!irt_ispri(kt
)) {
704 key
= ra_alloc1(as
, refkey
, allow
);
705 rset_clear(allow
, key
);
708 tmp2
= ra_scratch(as
, allow
);
709 rset_clear(allow
, tmp2
);
711 /* Key not found in chain: jump to exit (if merged) or load niltv. */
712 l_end
= emit_label(as
);
715 asm_guardcc(as
, CC_EQ
);
717 emit_loada(as
, dest
, niltvg(J2G(as
->J
)));
719 /* Follow hash chain until the end. */
721 emit_ai(as
, PPCI_CMPWI
, dest
, 0);
722 emit_tai(as
, PPCI_LWZ
, dest
, dest
, (int32_t)offsetof(Node
, next
));
723 l_next
= emit_label(as
);
725 /* Type and value comparison. */
727 asm_guardcc(as
, CC_EQ
);
729 emit_condbranch(as
, PPCI_BC
|PPCF_Y
, CC_EQ
, l_end
);
730 if (!LJ_SOFTFP
&& irt_isnum(kt
)) {
731 emit_fab(as
, PPCI_FCMPU
, 0, tmpnum
, key
);
732 emit_condbranch(as
, PPCI_BC
, CC_GE
, l_next
);
733 emit_ab(as
, PPCI_CMPLW
, tmp1
, tisnum
);
734 emit_fai(as
, PPCI_LFD
, tmpnum
, dest
, (int32_t)offsetof(Node
, key
.n
));
736 if (!irt_ispri(kt
)) {
737 emit_ab(as
, PPCI_CMPW
, tmp2
, key
);
738 emit_condbranch(as
, PPCI_BC
, CC_NE
, l_next
);
740 if (LJ_SOFTFP
&& ra_hasreg(tmpnum
))
741 emit_ab(as
, PPCI_CMPW
, tmp1
, tmpnum
);
743 emit_ai(as
, PPCI_CMPWI
, tmp1
, irt_toitype(irkey
->t
));
745 emit_tai(as
, PPCI_LWZ
, tmp2
, dest
, (int32_t)offsetof(Node
, key
.gcr
));
747 emit_tai(as
, PPCI_LWZ
, tmp1
, dest
, (int32_t)offsetof(Node
, key
.it
));
748 *l_loop
= PPCI_BC
| PPCF_Y
| PPCF_CC(CC_NE
) |
749 (((char *)as
->mcp
-(char *)l_loop
) & 0xffffu
);
751 /* Load main position relative to tab->node into dest. */
752 khash
= isk
? ir_khash(as
, irkey
) : 1;
754 emit_tai(as
, PPCI_LWZ
, dest
, tab
, (int32_t)offsetof(GCtab
, node
));
758 tmphash
= ra_allock(as
, khash
, allow
);
759 emit_tab(as
, PPCI_ADD
, dest
, dest
, tmp1
);
760 emit_tai(as
, PPCI_MULLI
, tmp1
, tmp1
, sizeof(Node
));
761 emit_asb(as
, PPCI_AND
, tmp1
, tmp2
, tmphash
);
762 emit_tai(as
, PPCI_LWZ
, dest
, tab
, (int32_t)offsetof(GCtab
, node
));
763 emit_tai(as
, PPCI_LWZ
, tmp2
, tab
, (int32_t)offsetof(GCtab
, hmask
));
766 } else if (irt_isstr(kt
)) {
767 emit_tai(as
, PPCI_LWZ
, tmp1
, key
, (int32_t)offsetof(GCstr
, sid
));
768 } else { /* Must match with hash*() in lj_tab.c. */
769 emit_tab(as
, PPCI_SUBF
, tmp1
, tmp2
, tmp1
);
770 emit_rotlwi(as
, tmp2
, tmp2
, HASH_ROT3
);
771 emit_asb(as
, PPCI_XOR
, tmp1
, tmp1
, tmp2
);
772 emit_rotlwi(as
, tmp1
, tmp1
, (HASH_ROT2
+HASH_ROT1
)&31);
773 emit_tab(as
, PPCI_SUBF
, tmp2
, dest
, tmp2
);
774 if (LJ_SOFTFP
? (irkey
[1].o
== IR_HIOP
) : irt_isnum(kt
)) {
776 emit_asb(as
, PPCI_XOR
, tmp2
, key
, tmp1
);
777 emit_rotlwi(as
, dest
, tmp1
, HASH_ROT1
);
778 emit_tab(as
, PPCI_ADD
, tmp1
, tmpnum
, tmpnum
);
780 int32_t ofs
= ra_spill(as
, irkey
);
781 emit_asb(as
, PPCI_XOR
, tmp2
, tmp2
, tmp1
);
782 emit_rotlwi(as
, dest
, tmp1
, HASH_ROT1
);
783 emit_tab(as
, PPCI_ADD
, tmp1
, tmp1
, tmp1
);
784 emit_tai(as
, PPCI_LWZ
, tmp2
, RID_SP
, ofs
+4);
785 emit_tai(as
, PPCI_LWZ
, tmp1
, RID_SP
, ofs
);
788 emit_asb(as
, PPCI_XOR
, tmp2
, key
, tmp1
);
789 emit_rotlwi(as
, dest
, tmp1
, HASH_ROT1
);
790 emit_tai(as
, PPCI_ADDI
, tmp1
, tmp2
, HASH_BIAS
);
791 emit_tai(as
, PPCI_ADDIS
, tmp2
, key
, (HASH_BIAS
+ 32768)>>16);
797 static void asm_hrefk(ASMState
*as
, IRIns
*ir
)
799 IRIns
*kslot
= IR(ir
->op2
);
800 IRIns
*irkey
= IR(kslot
->op1
);
801 int32_t ofs
= (int32_t)(kslot
->op2
* sizeof(Node
));
802 int32_t kofs
= ofs
+ (int32_t)offsetof(Node
, key
);
803 Reg dest
= (ra_used(ir
)||ofs
> 32736) ? ra_dest(as
, ir
, RSET_GPR
) : RID_NONE
;
804 Reg node
= ra_alloc1(as
, ir
->op1
, RSET_GPR
);
805 Reg key
= RID_NONE
, type
= RID_TMP
, idx
= node
;
806 RegSet allow
= rset_exclude(RSET_GPR
, node
);
807 lj_assertA(ofs
% sizeof(Node
) == 0, "unaligned HREFK slot");
810 rset_clear(allow
, dest
);
811 kofs
= (int32_t)offsetof(Node
, key
);
812 } else if (ra_hasreg(dest
)) {
813 emit_tai(as
, PPCI_ADDI
, dest
, node
, ofs
);
815 asm_guardcc(as
, CC_NE
);
816 if (!irt_ispri(irkey
->t
)) {
817 key
= ra_scratch(as
, allow
);
818 rset_clear(allow
, key
);
820 rset_clear(allow
, type
);
821 if (irt_isnum(irkey
->t
)) {
822 emit_cmpi(as
, key
, (int32_t)ir_knum(irkey
)->u32
.lo
);
823 asm_guardcc(as
, CC_NE
);
824 emit_cmpi(as
, type
, (int32_t)ir_knum(irkey
)->u32
.hi
);
826 if (ra_hasreg(key
)) {
827 emit_cmpi(as
, key
, irkey
->i
); /* May use RID_TMP, i.e. type. */
828 asm_guardcc(as
, CC_NE
);
830 emit_ai(as
, PPCI_CMPWI
, type
, irt_toitype(irkey
->t
));
832 if (ra_hasreg(key
)) emit_tai(as
, PPCI_LWZ
, key
, idx
, kofs
+4);
833 emit_tai(as
, PPCI_LWZ
, type
, idx
, kofs
);
835 emit_tai(as
, PPCI_ADDIS
, dest
, dest
, (ofs
+ 32768) >> 16);
836 emit_tai(as
, PPCI_ADDI
, dest
, node
, ofs
);
840 static void asm_uref(ASMState
*as
, IRIns
*ir
)
842 Reg dest
= ra_dest(as
, ir
, RSET_GPR
);
843 int guarded
= (irt_t(ir
->t
) & (IRT_GUARD
|IRT_TYPE
)) == (IRT_GUARD
|IRT_PGC
);
844 if (irref_isk(ir
->op1
) && !guarded
) {
845 GCfunc
*fn
= ir_kfunc(IR(ir
->op1
));
846 MRef
*v
= &gcref(fn
->l
.uvptr
[(ir
->op2
>> 8)])->uv
.v
;
847 emit_lsptr(as
, PPCI_LWZ
, dest
, v
, RSET_GPR
);
850 asm_guardcc(as
, ir
->o
== IR_UREFC
? CC_NE
: CC_EQ
);
851 emit_ai(as
, PPCI_CMPWI
, RID_TMP
, 1);
853 if (ir
->o
== IR_UREFC
)
854 emit_tai(as
, PPCI_ADDI
, dest
, dest
, (int32_t)offsetof(GCupval
, tv
));
856 emit_tai(as
, PPCI_LWZ
, dest
, dest
, (int32_t)offsetof(GCupval
, v
));
858 emit_tai(as
, PPCI_LBZ
, RID_TMP
, dest
, (int32_t)offsetof(GCupval
, closed
));
859 if (irref_isk(ir
->op1
)) {
860 GCfunc
*fn
= ir_kfunc(IR(ir
->op1
));
861 int32_t k
= (int32_t)gcrefu(fn
->l
.uvptr
[(ir
->op2
>> 8)]);
862 emit_loadi(as
, dest
, k
);
864 emit_tai(as
, PPCI_LWZ
, dest
, ra_alloc1(as
, ir
->op1
, RSET_GPR
),
865 (int32_t)offsetof(GCfuncL
, uvptr
) + 4*(int32_t)(ir
->op2
>> 8));
870 static void asm_fref(ASMState
*as
, IRIns
*ir
)
872 UNUSED(as
); UNUSED(ir
);
873 lj_assertA(!ra_used(ir
), "unfused FREF");
876 static void asm_strref(ASMState
*as
, IRIns
*ir
)
878 Reg dest
= ra_dest(as
, ir
, RSET_GPR
);
879 IRRef ref
= ir
->op2
, refk
= ir
->op1
;
880 int32_t ofs
= (int32_t)sizeof(GCstr
);
882 if (irref_isk(ref
)) {
883 IRRef tmp
= refk
; refk
= ref
; ref
= tmp
;
884 } else if (!irref_isk(refk
)) {
885 Reg right
, left
= ra_alloc1(as
, ir
->op1
, RSET_GPR
);
886 IRIns
*irr
= IR(ir
->op2
);
887 if (ra_hasreg(irr
->r
)) {
888 ra_noweak(as
, irr
->r
);
890 } else if (mayfuse(as
, irr
->op2
) &&
891 irr
->o
== IR_ADD
&& irref_isk(irr
->op2
) &&
892 checki16(ofs
+ IR(irr
->op2
)->i
)) {
893 ofs
+= IR(irr
->op2
)->i
;
894 right
= ra_alloc1(as
, irr
->op1
, rset_exclude(RSET_GPR
, left
));
896 right
= ra_allocref(as
, ir
->op2
, rset_exclude(RSET_GPR
, left
));
898 emit_tai(as
, PPCI_ADDI
, dest
, dest
, ofs
);
899 emit_tab(as
, PPCI_ADD
, dest
, left
, right
);
902 r
= ra_alloc1(as
, ref
, RSET_GPR
);
905 emit_tai(as
, PPCI_ADDI
, dest
, r
, ofs
);
907 emit_tab(as
, PPCI_ADD
, dest
, r
,
908 ra_allock(as
, ofs
, rset_exclude(RSET_GPR
, r
)));
911 /* -- Loads and stores ---------------------------------------------------- */
913 static PPCIns
asm_fxloadins(ASMState
*as
, IRIns
*ir
)
916 switch (irt_type(ir
->t
)) {
917 case IRT_I8
: return PPCI_LBZ
; /* Needs sign-extension. */
918 case IRT_U8
: return PPCI_LBZ
;
919 case IRT_I16
: return PPCI_LHA
;
920 case IRT_U16
: return PPCI_LHZ
;
921 case IRT_NUM
: lj_assertA(!LJ_SOFTFP
, "unsplit FP op"); return PPCI_LFD
;
922 case IRT_FLOAT
: if (!LJ_SOFTFP
) return PPCI_LFS
;
923 default: return PPCI_LWZ
;
927 static PPCIns
asm_fxstoreins(ASMState
*as
, IRIns
*ir
)
930 switch (irt_type(ir
->t
)) {
931 case IRT_I8
: case IRT_U8
: return PPCI_STB
;
932 case IRT_I16
: case IRT_U16
: return PPCI_STH
;
933 case IRT_NUM
: lj_assertA(!LJ_SOFTFP
, "unsplit FP op"); return PPCI_STFD
;
934 case IRT_FLOAT
: if (!LJ_SOFTFP
) return PPCI_STFS
;
935 default: return PPCI_STW
;
939 static void asm_fload(ASMState
*as
, IRIns
*ir
)
941 Reg dest
= ra_dest(as
, ir
, RSET_GPR
);
942 PPCIns pi
= asm_fxloadins(as
, ir
);
945 if (ir
->op1
== REF_NIL
) { /* FLOAD from GG_State with offset. */
947 ofs
= (ir
->op2
<< 2) - 32768 - GG_OFS(g
);
949 idx
= ra_alloc1(as
, ir
->op1
, RSET_GPR
);
950 if (ir
->op2
== IRFL_TAB_ARRAY
) {
951 ofs
= asm_fuseabase(as
, ir
->op1
);
952 if (ofs
) { /* Turn the t->array load into an add for colocated arrays. */
953 emit_tai(as
, PPCI_ADDI
, dest
, idx
, ofs
);
957 ofs
= field_ofs
[ir
->op2
];
959 lj_assertA(!irt_isi8(ir
->t
), "unsupported FLOAD I8");
960 emit_tai(as
, pi
, dest
, idx
, ofs
);
963 static void asm_fstore(ASMState
*as
, IRIns
*ir
)
965 if (ir
->r
!= RID_SINK
) {
966 Reg src
= ra_alloc1(as
, ir
->op2
, RSET_GPR
);
967 IRIns
*irf
= IR(ir
->op1
);
968 Reg idx
= ra_alloc1(as
, irf
->op1
, rset_exclude(RSET_GPR
, src
));
969 int32_t ofs
= field_ofs
[irf
->op2
];
970 PPCIns pi
= asm_fxstoreins(as
, ir
);
971 emit_tai(as
, pi
, src
, idx
, ofs
);
975 static void asm_xload(ASMState
*as
, IRIns
*ir
)
977 Reg dest
= ra_dest(as
, ir
,
978 (!LJ_SOFTFP
&& irt_isfp(ir
->t
)) ? RSET_FPR
: RSET_GPR
);
979 lj_assertA(!(ir
->op2
& IRXLOAD_UNALIGNED
), "unaligned XLOAD");
981 emit_as(as
, PPCI_EXTSB
, dest
, dest
);
982 asm_fusexref(as
, asm_fxloadins(as
, ir
), dest
, ir
->op1
, RSET_GPR
, 0);
985 static void asm_xstore_(ASMState
*as
, IRIns
*ir
, int32_t ofs
)
988 if (ir
->r
== RID_SINK
)
990 if (ofs
== 0 && mayfuse(as
, ir
->op2
) && (irb
= IR(ir
->op2
))->o
== IR_BSWAP
&&
991 ra_noreg(irb
->r
) && (irt_isint(ir
->t
) || irt_isu32(ir
->t
))) {
992 /* Fuse BSWAP with XSTORE to stwbrx. */
993 Reg src
= ra_alloc1(as
, irb
->op1
, RSET_GPR
);
994 asm_fusexrefx(as
, PPCI_STWBRX
, src
, ir
->op1
, rset_exclude(RSET_GPR
, src
));
996 Reg src
= ra_alloc1(as
, ir
->op2
,
997 (!LJ_SOFTFP
&& irt_isfp(ir
->t
)) ? RSET_FPR
: RSET_GPR
);
998 asm_fusexref(as
, asm_fxstoreins(as
, ir
), src
, ir
->op1
,
999 rset_exclude(RSET_GPR
, src
), ofs
);
1003 #define asm_xstore(as, ir) asm_xstore_(as, ir, 0)
1005 static void asm_ahuvload(ASMState
*as
, IRIns
*ir
)
1008 Reg dest
= RID_NONE
, type
= RID_TMP
, tmp
= RID_TMP
, idx
;
1009 RegSet allow
= RSET_GPR
;
1010 int32_t ofs
= AHUREF_LSX
;
1011 if (LJ_SOFTFP
&& (ir
+1)->o
== IR_HIOP
) {
1013 if (ra_used(ir
+1)) {
1014 type
= ra_dest(as
, ir
+1, allow
);
1015 rset_clear(allow
, type
);
1020 lj_assertA((LJ_SOFTFP
? 0 : irt_isnum(ir
->t
)) ||
1021 irt_isint(ir
->t
) || irt_isaddr(ir
->t
),
1022 "bad load type %d", irt_type(ir
->t
));
1023 if (LJ_SOFTFP
|| !irt_isnum(t
)) ofs
= 0;
1024 dest
= ra_dest(as
, ir
, (!LJ_SOFTFP
&& irt_isnum(t
)) ? RSET_FPR
: allow
);
1025 rset_clear(allow
, dest
);
1027 idx
= asm_fuseahuref(as
, ir
->op1
, &ofs
, allow
);
1028 if (ir
->o
== IR_VLOAD
) {
1029 ofs
= ofs
!= AHUREF_LSX
? ofs
+ 8 * ir
->op2
:
1030 ir
->op2
? 8 * ir
->op2
: AHUREF_LSX
;
1033 Reg tisnum
= ra_allock(as
, (int32_t)LJ_TISNUM
, rset_exclude(allow
, idx
));
1034 asm_guardcc(as
, CC_GE
);
1035 emit_ab(as
, PPCI_CMPLW
, type
, tisnum
);
1036 if (ra_hasreg(dest
)) {
1037 if (!LJ_SOFTFP
&& ofs
== AHUREF_LSX
) {
1038 tmp
= ra_scratch(as
, rset_exclude(rset_exclude(RSET_GPR
,
1039 (idx
&255)), (idx
>>8)));
1040 emit_fab(as
, PPCI_LFDX
, dest
, (idx
&255), tmp
);
1042 emit_fai(as
, LJ_SOFTFP
? PPCI_LWZ
: PPCI_LFD
, dest
, idx
,
1047 asm_guardcc(as
, CC_NE
);
1048 emit_ai(as
, PPCI_CMPWI
, type
, irt_toitype(t
));
1049 if (ra_hasreg(dest
)) emit_tai(as
, PPCI_LWZ
, dest
, idx
, ofs
+4);
1051 if (ofs
== AHUREF_LSX
) {
1052 emit_tab(as
, PPCI_LWZX
, type
, (idx
&255), tmp
);
1053 emit_slwi(as
, tmp
, (idx
>>8), 3);
1055 emit_tai(as
, PPCI_LWZ
, type
, idx
, ofs
);
1059 static void asm_ahustore(ASMState
*as
, IRIns
*ir
)
1061 RegSet allow
= RSET_GPR
;
1062 Reg idx
, src
= RID_NONE
, type
= RID_NONE
;
1063 int32_t ofs
= AHUREF_LSX
;
1064 if (ir
->r
== RID_SINK
)
1066 if (!LJ_SOFTFP
&& irt_isnum(ir
->t
)) {
1067 src
= ra_alloc1(as
, ir
->op2
, RSET_FPR
);
1069 if (!irt_ispri(ir
->t
)) {
1070 src
= ra_alloc1(as
, ir
->op2
, allow
);
1071 rset_clear(allow
, src
);
1074 if (LJ_SOFTFP
&& (ir
+1)->o
== IR_HIOP
)
1075 type
= ra_alloc1(as
, (ir
+1)->op2
, allow
);
1077 type
= ra_allock(as
, (int32_t)irt_toitype(ir
->t
), allow
);
1078 rset_clear(allow
, type
);
1080 idx
= asm_fuseahuref(as
, ir
->op1
, &ofs
, allow
);
1081 if (!LJ_SOFTFP
&& irt_isnum(ir
->t
)) {
1082 if (ofs
== AHUREF_LSX
) {
1083 emit_fab(as
, PPCI_STFDX
, src
, (idx
&255), RID_TMP
);
1084 emit_slwi(as
, RID_TMP
, (idx
>>8), 3);
1086 emit_fai(as
, PPCI_STFD
, src
, idx
, ofs
);
1090 emit_tai(as
, PPCI_STW
, src
, idx
, ofs
+4);
1091 if (ofs
== AHUREF_LSX
) {
1092 emit_tab(as
, PPCI_STWX
, type
, (idx
&255), RID_TMP
);
1093 emit_slwi(as
, RID_TMP
, (idx
>>8), 3);
1095 emit_tai(as
, PPCI_STW
, type
, idx
, ofs
);
1100 static void asm_sload(ASMState
*as
, IRIns
*ir
)
1102 int32_t ofs
= 8*((int32_t)ir
->op1
-1) + ((ir
->op2
& IRSLOAD_FRAME
) ? 0 : 4);
1104 Reg dest
= RID_NONE
, type
= RID_NONE
, base
;
1105 RegSet allow
= RSET_GPR
;
1106 int hiop
= (LJ_SOFTFP
&& (ir
+1)->o
== IR_HIOP
);
1109 lj_assertA(!(ir
->op2
& IRSLOAD_PARENT
),
1110 "bad parent SLOAD"); /* Handled by asm_head_side(). */
1111 lj_assertA(irt_isguard(ir
->t
) || !(ir
->op2
& IRSLOAD_TYPECHECK
),
1112 "inconsistent SLOAD variant");
1113 lj_assertA(LJ_DUALNUM
||
1115 (ir
->op2
& (IRSLOAD_CONVERT
|IRSLOAD_FRAME
|IRSLOAD_KEYINDEX
)),
1118 lj_assertA(!(ir
->op2
& IRSLOAD_CONVERT
),
1119 "unsplit SLOAD convert"); /* Handled by LJ_SOFTFP SPLIT. */
1120 if (hiop
&& ra_used(ir
+1)) {
1121 type
= ra_dest(as
, ir
+1, allow
);
1122 rset_clear(allow
, type
);
1125 if ((ir
->op2
& IRSLOAD_CONVERT
) && irt_isguard(t
) && irt_isint(t
)) {
1126 dest
= ra_scratch(as
, RSET_FPR
);
1127 asm_tointg(as
, ir
, dest
);
1128 t
.irt
= IRT_NUM
; /* Continue with a regular number type check. */
1132 lj_assertA(irt_isnum(t
) || irt_isint(t
) || irt_isaddr(t
),
1133 "bad SLOAD type %d", irt_type(ir
->t
));
1134 dest
= ra_dest(as
, ir
, (!LJ_SOFTFP
&& irt_isnum(t
)) ? RSET_FPR
: allow
);
1135 rset_clear(allow
, dest
);
1136 base
= ra_alloc1(as
, REF_BASE
, allow
);
1137 rset_clear(allow
, base
);
1138 if (!LJ_SOFTFP
&& (ir
->op2
& IRSLOAD_CONVERT
)) {
1140 emit_tai(as
, PPCI_LWZ
, dest
, RID_SP
, SPOFS_TMPLO
);
1141 dest
= ra_scratch(as
, RSET_FPR
);
1142 emit_fai(as
, PPCI_STFD
, dest
, RID_SP
, SPOFS_TMP
);
1143 emit_fb(as
, PPCI_FCTIWZ
, dest
, dest
);
1144 t
.irt
= IRT_NUM
; /* Check for original type. */
1146 Reg tmp
= ra_scratch(as
, allow
);
1147 Reg hibias
= ra_allock(as
, 0x43300000, rset_clear(allow
, tmp
));
1148 Reg fbias
= ra_scratch(as
, rset_exclude(RSET_FPR
, dest
));
1149 emit_fab(as
, PPCI_FSUB
, dest
, dest
, fbias
);
1150 emit_fai(as
, PPCI_LFD
, dest
, RID_SP
, SPOFS_TMP
);
1151 emit_lsptr(as
, PPCI_LFS
, (fbias
& 31),
1152 (void *)&as
->J
->k32
[LJ_K32_2P52_2P31
],
1153 rset_clear(allow
, hibias
));
1154 emit_tai(as
, PPCI_STW
, tmp
, RID_SP
, SPOFS_TMPLO
);
1155 emit_tai(as
, PPCI_STW
, hibias
, RID_SP
, SPOFS_TMPHI
);
1156 emit_asi(as
, PPCI_XORIS
, tmp
, tmp
, 0x8000);
1158 t
.irt
= IRT_INT
; /* Check for original type. */
1163 base
= ra_alloc1(as
, REF_BASE
, allow
);
1164 rset_clear(allow
, base
);
1167 if ((ir
->op2
& IRSLOAD_TYPECHECK
)) {
1168 Reg tisnum
= ra_allock(as
, (int32_t)LJ_TISNUM
, allow
);
1169 asm_guardcc(as
, CC_GE
);
1173 emit_ab(as
, PPCI_CMPLW
, type
, tisnum
);
1175 if (ra_hasreg(dest
)) emit_fai(as
, LJ_SOFTFP
? PPCI_LWZ
: PPCI_LFD
, dest
,
1176 base
, ofs
-(LJ_SOFTFP
?0:4));
1178 if ((ir
->op2
& IRSLOAD_TYPECHECK
)) {
1179 asm_guardcc(as
, CC_NE
);
1180 if ((ir
->op2
& IRSLOAD_KEYINDEX
)) {
1181 emit_ai(as
, PPCI_CMPWI
, RID_TMP
, (LJ_KEYINDEX
& 0xffff));
1182 emit_asi(as
, PPCI_XORIS
, RID_TMP
, RID_TMP
, (LJ_KEYINDEX
>> 16));
1184 emit_ai(as
, PPCI_CMPWI
, RID_TMP
, irt_toitype(t
));
1188 if (ra_hasreg(dest
)) emit_tai(as
, PPCI_LWZ
, dest
, base
, ofs
);
1190 if (ra_hasreg(type
)) emit_tai(as
, PPCI_LWZ
, type
, base
, ofs
-4);
1193 /* -- Allocations --------------------------------------------------------- */
1196 static void asm_cnew(ASMState
*as
, IRIns
*ir
)
1198 CTState
*cts
= ctype_ctsG(J2G(as
->J
));
1199 CTypeID id
= (CTypeID
)IR(ir
->op1
)->i
;
1201 CTInfo info
= lj_ctype_info(cts
, id
, &sz
);
1202 const CCallInfo
*ci
= &lj_ir_callinfo
[IRCALL_lj_mem_newgco
];
1204 RegSet drop
= RSET_SCRATCH
;
1205 lj_assertA(sz
!= CTSIZE_INVALID
|| (ir
->o
== IR_CNEW
&& ir
->op2
!= REF_NIL
),
1206 "bad CNEW/CNEWI operands");
1209 if (ra_hasreg(ir
->r
))
1210 rset_clear(drop
, ir
->r
); /* Dest reg handled below. */
1211 ra_evictset(as
, drop
);
1213 ra_destreg(as
, ir
, RID_RET
); /* GCcdata * */
1215 /* Initialize immutable cdata object. */
1216 if (ir
->o
== IR_CNEWI
) {
1217 RegSet allow
= (RSET_GPR
& ~RSET_SCRATCH
);
1218 int32_t ofs
= sizeof(GCcdata
);
1219 lj_assertA(sz
== 4 || sz
== 8, "bad CNEWI size %d", sz
);
1222 lj_assertA((ir
+1)->o
== IR_HIOP
, "expected HIOP for CNEWI");
1225 Reg r
= ra_alloc1(as
, ir
->op2
, allow
);
1226 emit_tai(as
, PPCI_STW
, r
, RID_RET
, ofs
);
1227 rset_clear(allow
, r
);
1228 if (ofs
== sizeof(GCcdata
)) break;
1231 } else if (ir
->op2
!= REF_NIL
) { /* Create VLA/VLS/aligned cdata. */
1232 ci
= &lj_ir_callinfo
[IRCALL_lj_cdata_newv
];
1233 args
[0] = ASMREF_L
; /* lua_State *L */
1234 args
[1] = ir
->op1
; /* CTypeID id */
1235 args
[2] = ir
->op2
; /* CTSize sz */
1236 args
[3] = ASMREF_TMP1
; /* CTSize align */
1237 asm_gencall(as
, ci
, args
);
1238 emit_loadi(as
, ra_releasetmp(as
, ASMREF_TMP1
), (int32_t)ctype_align(info
));
1242 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
1243 emit_tai(as
, PPCI_STB
, RID_RET
+1, RID_RET
, offsetof(GCcdata
, gct
));
1244 emit_tai(as
, PPCI_STH
, RID_TMP
, RID_RET
, offsetof(GCcdata
, ctypeid
));
1245 emit_ti(as
, PPCI_LI
, RID_RET
+1, ~LJ_TCDATA
);
1246 emit_ti(as
, PPCI_LI
, RID_TMP
, id
); /* Lower 16 bit used. Sign-ext ok. */
1247 args
[0] = ASMREF_L
; /* lua_State *L */
1248 args
[1] = ASMREF_TMP1
; /* MSize size */
1249 asm_gencall(as
, ci
, args
);
1250 ra_allockreg(as
, (int32_t)(sz
+sizeof(GCcdata
)),
1251 ra_releasetmp(as
, ASMREF_TMP1
));
1255 /* -- Write barriers ------------------------------------------------------ */
1257 static void asm_tbar(ASMState
*as
, IRIns
*ir
)
1259 Reg tab
= ra_alloc1(as
, ir
->op1
, RSET_GPR
);
1260 Reg mark
= ra_scratch(as
, rset_exclude(RSET_GPR
, tab
));
1262 MCLabel l_end
= emit_label(as
);
1263 emit_tai(as
, PPCI_STW
, link
, tab
, (int32_t)offsetof(GCtab
, gclist
));
1264 emit_tai(as
, PPCI_STB
, mark
, tab
, (int32_t)offsetof(GCtab
, marked
));
1265 emit_setgl(as
, tab
, gc
.grayagain
);
1266 lj_assertA(LJ_GC_BLACK
== 0x04, "bad LJ_GC_BLACK");
1267 emit_rot(as
, PPCI_RLWINM
, mark
, mark
, 0, 30, 28); /* Clear black bit. */
1268 emit_getgl(as
, link
, gc
.grayagain
);
1269 emit_condbranch(as
, PPCI_BC
|PPCF_Y
, CC_EQ
, l_end
);
1270 emit_asi(as
, PPCI_ANDIDOT
, RID_TMP
, mark
, LJ_GC_BLACK
);
1271 emit_tai(as
, PPCI_LBZ
, mark
, tab
, (int32_t)offsetof(GCtab
, marked
));
1274 static void asm_obar(ASMState
*as
, IRIns
*ir
)
1276 const CCallInfo
*ci
= &lj_ir_callinfo
[IRCALL_lj_gc_barrieruv
];
1280 /* No need for other object barriers (yet). */
1281 lj_assertA(IR(ir
->op1
)->o
== IR_UREFC
, "bad OBAR type");
1282 ra_evictset(as
, RSET_SCRATCH
);
1283 l_end
= emit_label(as
);
1284 args
[0] = ASMREF_TMP1
; /* global_State *g */
1285 args
[1] = ir
->op1
; /* TValue *tv */
1286 asm_gencall(as
, ci
, args
);
1287 emit_tai(as
, PPCI_ADDI
, ra_releasetmp(as
, ASMREF_TMP1
), RID_JGL
, -32768);
1288 obj
= IR(ir
->op1
)->r
;
1289 tmp
= ra_scratch(as
, rset_exclude(RSET_GPR
, obj
));
1290 emit_condbranch(as
, PPCI_BC
|PPCF_Y
, CC_EQ
, l_end
);
1291 emit_asi(as
, PPCI_ANDIDOT
, tmp
, tmp
, LJ_GC_BLACK
);
1292 emit_condbranch(as
, PPCI_BC
, CC_EQ
, l_end
);
1293 emit_asi(as
, PPCI_ANDIDOT
, RID_TMP
, RID_TMP
, LJ_GC_WHITES
);
1294 val
= ra_alloc1(as
, ir
->op2
, rset_exclude(RSET_GPR
, obj
));
1295 emit_tai(as
, PPCI_LBZ
, tmp
, obj
,
1296 (int32_t)offsetof(GCupval
, marked
)-(int32_t)offsetof(GCupval
, tv
));
1297 emit_tai(as
, PPCI_LBZ
, RID_TMP
, val
, (int32_t)offsetof(GChead
, marked
));
1300 /* -- Arithmetic and logic operations ------------------------------------- */
1303 static void asm_fparith(ASMState
*as
, IRIns
*ir
, PPCIns pi
)
1305 Reg dest
= ra_dest(as
, ir
, RSET_FPR
);
1306 Reg right
, left
= ra_alloc2(as
, ir
, RSET_FPR
);
1307 right
= (left
>> 8); left
&= 255;
1308 if (pi
== PPCI_FMUL
)
1309 emit_fac(as
, pi
, dest
, left
, right
);
1311 emit_fab(as
, pi
, dest
, left
, right
);
1314 static void asm_fpunary(ASMState
*as
, IRIns
*ir
, PPCIns pi
)
1316 Reg dest
= ra_dest(as
, ir
, RSET_FPR
);
1317 Reg left
= ra_hintalloc(as
, ir
->op1
, dest
, RSET_FPR
);
1318 emit_fb(as
, pi
, dest
, left
);
1321 static void asm_fpmath(ASMState
*as
, IRIns
*ir
)
1323 if (ir
->op2
== IRFPM_SQRT
&& (as
->flags
& JIT_F_SQRT
))
1324 asm_fpunary(as
, ir
, PPCI_FSQRT
);
1326 asm_callid(as
, ir
, IRCALL_lj_vm_floor
+ ir
->op2
);
1330 static void asm_add(ASMState
*as
, IRIns
*ir
)
1333 if (irt_isnum(ir
->t
)) {
1334 if (!asm_fusemadd(as
, ir
, PPCI_FMADD
, PPCI_FMADD
))
1335 asm_fparith(as
, ir
, PPCI_FADD
);
1339 Reg dest
= ra_dest(as
, ir
, RSET_GPR
);
1340 Reg right
, left
= ra_hintalloc(as
, ir
->op1
, dest
, RSET_GPR
);
1342 if (irref_isk(ir
->op2
)) {
1343 int32_t k
= IR(ir
->op2
)->i
;
1346 /* May fail due to spills/restores above, but simplifies the logic. */
1347 if (as
->flagmcp
== as
->mcp
) {
1352 emit_tai(as
, pi
, dest
, left
, k
);
1354 } else if ((k
& 0xffff) == 0) {
1355 emit_tai(as
, PPCI_ADDIS
, dest
, left
, (k
>> 16));
1357 } else if (!as
->sectref
) {
1358 emit_tai(as
, PPCI_ADDIS
, dest
, dest
, (k
+ 32768) >> 16);
1359 emit_tai(as
, PPCI_ADDI
, dest
, left
, k
);
1364 /* May fail due to spills/restores above, but simplifies the logic. */
1365 if (as
->flagmcp
== as
->mcp
) {
1370 right
= ra_alloc1(as
, ir
->op2
, rset_exclude(RSET_GPR
, left
));
1371 emit_tab(as
, pi
, dest
, left
, right
);
1375 static void asm_sub(ASMState
*as
, IRIns
*ir
)
1378 if (irt_isnum(ir
->t
)) {
1379 if (!asm_fusemadd(as
, ir
, PPCI_FMSUB
, PPCI_FNMSUB
))
1380 asm_fparith(as
, ir
, PPCI_FSUB
);
1384 PPCIns pi
= PPCI_SUBF
;
1385 Reg dest
= ra_dest(as
, ir
, RSET_GPR
);
1387 if (irref_isk(ir
->op1
)) {
1388 int32_t k
= IR(ir
->op1
)->i
;
1390 right
= ra_alloc1(as
, ir
->op2
, RSET_GPR
);
1391 emit_tai(as
, PPCI_SUBFIC
, dest
, right
, k
);
1395 /* May fail due to spills/restores above, but simplifies the logic. */
1396 if (as
->flagmcp
== as
->mcp
) {
1401 left
= ra_hintalloc(as
, ir
->op1
, dest
, RSET_GPR
);
1402 right
= ra_alloc1(as
, ir
->op2
, rset_exclude(RSET_GPR
, left
));
1403 emit_tab(as
, pi
, dest
, right
, left
); /* Subtract right _from_ left. */
1407 static void asm_mul(ASMState
*as
, IRIns
*ir
)
1410 if (irt_isnum(ir
->t
)) {
1411 asm_fparith(as
, ir
, PPCI_FMUL
);
1415 PPCIns pi
= PPCI_MULLW
;
1416 Reg dest
= ra_dest(as
, ir
, RSET_GPR
);
1417 Reg right
, left
= ra_hintalloc(as
, ir
->op1
, dest
, RSET_GPR
);
1418 if (irref_isk(ir
->op2
)) {
1419 int32_t k
= IR(ir
->op2
)->i
;
1421 emit_tai(as
, PPCI_MULLI
, dest
, left
, k
);
1425 /* May fail due to spills/restores above, but simplifies the logic. */
1426 if (as
->flagmcp
== as
->mcp
) {
1431 right
= ra_alloc1(as
, ir
->op2
, rset_exclude(RSET_GPR
, left
));
1432 emit_tab(as
, pi
, dest
, left
, right
);
1436 #define asm_fpdiv(as, ir) asm_fparith(as, ir, PPCI_FDIV)
1438 static void asm_neg(ASMState
*as
, IRIns
*ir
)
1441 if (irt_isnum(ir
->t
)) {
1442 asm_fpunary(as
, ir
, PPCI_FNEG
);
1447 PPCIns pi
= PPCI_NEG
;
1448 if (as
->flagmcp
== as
->mcp
) {
1453 dest
= ra_dest(as
, ir
, RSET_GPR
);
1454 left
= ra_hintalloc(as
, ir
->op1
, dest
, RSET_GPR
);
1455 emit_tab(as
, pi
, dest
, left
, 0);
1459 #define asm_abs(as, ir) asm_fpunary(as, ir, PPCI_FABS)
1461 static void asm_arithov(ASMState
*as
, IRIns
*ir
, PPCIns pi
)
1463 Reg dest
, left
, right
;
1464 if (as
->flagmcp
== as
->mcp
) {
1468 asm_guardcc(as
, CC_SO
);
1469 dest
= ra_dest(as
, ir
, RSET_GPR
);
1470 left
= ra_alloc2(as
, ir
, RSET_GPR
);
1471 right
= (left
>> 8); left
&= 255;
1472 if (pi
== PPCI_SUBFO
) { Reg tmp
= left
; left
= right
; right
= tmp
; }
1473 emit_tab(as
, pi
|PPCF_DOT
, dest
, left
, right
);
1476 #define asm_addov(as, ir) asm_arithov(as, ir, PPCI_ADDO)
1477 #define asm_subov(as, ir) asm_arithov(as, ir, PPCI_SUBFO)
1478 #define asm_mulov(as, ir) asm_arithov(as, ir, PPCI_MULLWO)
1481 static void asm_add64(ASMState
*as
, IRIns
*ir
)
1483 Reg dest
= ra_dest(as
, ir
, RSET_GPR
);
1484 Reg right
, left
= ra_alloc1(as
, ir
->op1
, RSET_GPR
);
1485 PPCIns pi
= PPCI_ADDE
;
1486 if (irref_isk(ir
->op2
)) {
1487 int32_t k
= IR(ir
->op2
)->i
;
1497 right
= ra_alloc1(as
, ir
->op2
, rset_exclude(RSET_GPR
, left
));
1499 emit_tab(as
, pi
, dest
, left
, right
);
1501 dest
= ra_dest(as
, ir
, RSET_GPR
);
1502 left
= ra_alloc1(as
, ir
->op1
, RSET_GPR
);
1503 if (irref_isk(ir
->op2
)) {
1504 int32_t k
= IR(ir
->op2
)->i
;
1506 emit_tai(as
, PPCI_ADDIC
, dest
, left
, k
);
1510 right
= ra_alloc1(as
, ir
->op2
, rset_exclude(RSET_GPR
, left
));
1511 emit_tab(as
, PPCI_ADDC
, dest
, left
, right
);
1514 static void asm_sub64(ASMState
*as
, IRIns
*ir
)
1516 Reg dest
= ra_dest(as
, ir
, RSET_GPR
);
1517 Reg left
, right
= ra_alloc1(as
, ir
->op2
, RSET_GPR
);
1518 PPCIns pi
= PPCI_SUBFE
;
1519 if (irref_isk(ir
->op1
)) {
1520 int32_t k
= IR(ir
->op1
)->i
;
1530 left
= ra_alloc1(as
, ir
->op1
, rset_exclude(RSET_GPR
, right
));
1532 emit_tab(as
, pi
, dest
, right
, left
); /* Subtract right _from_ left. */
1534 dest
= ra_dest(as
, ir
, RSET_GPR
);
1535 right
= ra_alloc1(as
, ir
->op2
, RSET_GPR
);
1536 if (irref_isk(ir
->op1
)) {
1537 int32_t k
= IR(ir
->op1
)->i
;
1539 emit_tai(as
, PPCI_SUBFIC
, dest
, right
, k
);
1543 left
= ra_alloc1(as
, ir
->op1
, rset_exclude(RSET_GPR
, right
));
1544 emit_tab(as
, PPCI_SUBFC
, dest
, right
, left
);
1547 static void asm_neg64(ASMState
*as
, IRIns
*ir
)
1549 Reg dest
= ra_dest(as
, ir
, RSET_GPR
);
1550 Reg left
= ra_alloc1(as
, ir
->op1
, RSET_GPR
);
1551 emit_tab(as
, PPCI_SUBFZE
, dest
, left
, 0);
1553 dest
= ra_dest(as
, ir
, RSET_GPR
);
1554 left
= ra_alloc1(as
, ir
->op1
, RSET_GPR
);
1555 emit_tai(as
, PPCI_SUBFIC
, dest
, left
, 0);
1559 static void asm_bnot(ASMState
*as
, IRIns
*ir
)
1561 Reg dest
, left
, right
;
1562 PPCIns pi
= PPCI_NOR
;
1563 if (as
->flagmcp
== as
->mcp
) {
1568 dest
= ra_dest(as
, ir
, RSET_GPR
);
1569 if (mayfuse(as
, ir
->op1
)) {
1570 IRIns
*irl
= IR(ir
->op1
);
1571 if (irl
->o
== IR_BAND
)
1572 pi
^= (PPCI_NOR
^ PPCI_NAND
);
1573 else if (irl
->o
== IR_BXOR
)
1574 pi
^= (PPCI_NOR
^ PPCI_EQV
);
1575 else if (irl
->o
!= IR_BOR
)
1577 left
= ra_hintalloc(as
, irl
->op1
, dest
, RSET_GPR
);
1578 right
= ra_alloc1(as
, irl
->op2
, rset_exclude(RSET_GPR
, left
));
1581 left
= right
= ra_hintalloc(as
, ir
->op1
, dest
, RSET_GPR
);
1583 emit_asb(as
, pi
, dest
, left
, right
);
1586 static void asm_bswap(ASMState
*as
, IRIns
*ir
)
1588 Reg dest
= ra_dest(as
, ir
, RSET_GPR
);
1590 if (mayfuse(as
, ir
->op1
) && (irx
= IR(ir
->op1
))->o
== IR_XLOAD
&&
1591 ra_noreg(irx
->r
) && (irt_isint(irx
->t
) || irt_isu32(irx
->t
))) {
1592 /* Fuse BSWAP with XLOAD to lwbrx. */
1593 asm_fusexrefx(as
, PPCI_LWBRX
, dest
, irx
->op1
, RSET_GPR
);
1595 Reg left
= ra_alloc1(as
, ir
->op1
, RSET_GPR
);
1599 emit_mr(as
, dest
, RID_TMP
);
1601 emit_rot(as
, PPCI_RLWIMI
, tmp
, left
, 24, 16, 23);
1602 emit_rot(as
, PPCI_RLWIMI
, tmp
, left
, 24, 0, 7);
1603 emit_rotlwi(as
, tmp
, left
, 8);
1607 /* Fuse BAND with contiguous bitmask and a shift to rlwinm. */
1608 static void asm_fuseandsh(ASMState
*as
, PPCIns pi
, int32_t mask
, IRRef ref
)
1612 if (mayfuse(as
, ref
) && (ir
= IR(ref
), ra_noreg(ir
->r
)) &&
1613 irref_isk(ir
->op2
) && ir
->o
>= IR_BSHL
&& ir
->o
<= IR_BROR
) {
1614 int32_t sh
= (IR(ir
->op2
)->i
& 31);
1617 if ((mask
& ((1u<<sh
)-1))) goto nofuse
;
1620 if ((mask
& ~((~0u)>>sh
))) goto nofuse
;
1628 left
= ra_alloc1(as
, ir
->op1
, RSET_GPR
);
1629 *--as
->mcp
= pi
| PPCF_T(left
) | PPCF_B(sh
);
1633 left
= ra_alloc1(as
, ref
, RSET_GPR
);
1634 *--as
->mcp
= pi
| PPCF_T(left
);
1637 static void asm_band(ASMState
*as
, IRIns
*ir
)
1639 Reg dest
, left
, right
;
1640 IRRef lref
= ir
->op1
;
1643 if (as
->flagmcp
== as
->mcp
) {
1648 dest
= ra_dest(as
, ir
, RSET_GPR
);
1649 if (irref_isk(ir
->op2
)) {
1650 int32_t k
= IR(ir
->op2
)->i
;
1652 /* First check for a contiguous bitmask as used by rlwinm. */
1653 uint32_t s1
= lj_ffs((uint32_t)k
);
1654 uint32_t k1
= ((uint32_t)k
>> s1
);
1655 if ((k1
& (k1
+1)) == 0) {
1656 asm_fuseandsh(as
, PPCI_RLWINM
|dot
| PPCF_A(dest
) |
1657 PPCF_MB(31-lj_fls((uint32_t)k
)) | PPCF_ME(31-s1
),
1662 uint32_t s2
= lj_ffs(~(uint32_t)k
);
1663 uint32_t k2
= (~(uint32_t)k
>> s2
);
1664 if ((k2
& (k2
+1)) == 0) {
1665 asm_fuseandsh(as
, PPCI_RLWINM
|dot
| PPCF_A(dest
) |
1666 PPCF_MB(32-s2
) | PPCF_ME(30-lj_fls(~(uint32_t)k
)),
1673 left
= ra_alloc1(as
, lref
, RSET_GPR
);
1674 emit_asi(as
, PPCI_ANDIDOT
, dest
, left
, k
);
1676 } else if ((k
& 0xffff) == 0) {
1677 left
= ra_alloc1(as
, lref
, RSET_GPR
);
1678 emit_asi(as
, PPCI_ANDISDOT
, dest
, left
, (k
>> 16));
1683 if (mayfuse(as
, op2
) && IR(op2
)->o
== IR_BNOT
&& ra_noreg(IR(op2
)->r
)) {
1684 dot
^= (PPCI_AND
^ PPCI_ANDC
);
1687 left
= ra_hintalloc(as
, lref
, dest
, RSET_GPR
);
1688 right
= ra_alloc1(as
, op2
, rset_exclude(RSET_GPR
, left
));
1689 emit_asb(as
, PPCI_AND
^ dot
, dest
, left
, right
);
1692 static void asm_bitop(ASMState
*as
, IRIns
*ir
, PPCIns pi
, PPCIns pik
)
1694 Reg dest
= ra_dest(as
, ir
, RSET_GPR
);
1695 Reg right
, left
= ra_hintalloc(as
, ir
->op1
, dest
, RSET_GPR
);
1696 if (irref_isk(ir
->op2
)) {
1697 int32_t k
= IR(ir
->op2
)->i
;
1699 if ((checku16(k
) || (k
& 0xffff) == 0) || (tmp
= dest
, !as
->sectref
)) {
1701 emit_asi(as
, pik
^ (PPCI_ORI
^ PPCI_ORIS
), dest
, tmp
, (k
>> 16));
1702 if ((k
& 0xffff) == 0) return;
1704 emit_asi(as
, pik
, dest
, left
, k
);
1708 /* May fail due to spills/restores above, but simplifies the logic. */
1709 if (as
->flagmcp
== as
->mcp
) {
1714 right
= ra_alloc1(as
, ir
->op2
, rset_exclude(RSET_GPR
, left
));
1715 emit_asb(as
, pi
, dest
, left
, right
);
1718 #define asm_bor(as, ir) asm_bitop(as, ir, PPCI_OR, PPCI_ORI)
1719 #define asm_bxor(as, ir) asm_bitop(as, ir, PPCI_XOR, PPCI_XORI)
1721 static void asm_bitshift(ASMState
*as
, IRIns
*ir
, PPCIns pi
, PPCIns pik
)
1725 if (as
->flagmcp
== as
->mcp
) {
1730 dest
= ra_dest(as
, ir
, RSET_GPR
);
1731 left
= ra_alloc1(as
, ir
->op1
, RSET_GPR
);
1732 if (irref_isk(ir
->op2
)) { /* Constant shifts. */
1733 int32_t shift
= (IR(ir
->op2
)->i
& 31);
1734 if (pik
== 0) /* SLWI */
1735 emit_rot(as
, PPCI_RLWINM
|dot
, dest
, left
, shift
, 0, 31-shift
);
1736 else if (pik
== 1) /* SRWI */
1737 emit_rot(as
, PPCI_RLWINM
|dot
, dest
, left
, (32-shift
)&31, shift
, 31);
1739 emit_asb(as
, pik
|dot
, dest
, left
, shift
);
1741 Reg right
= ra_alloc1(as
, ir
->op2
, rset_exclude(RSET_GPR
, left
));
1742 emit_asb(as
, pi
|dot
, dest
, left
, right
);
1746 #define asm_bshl(as, ir) asm_bitshift(as, ir, PPCI_SLW, 0)
1747 #define asm_bshr(as, ir) asm_bitshift(as, ir, PPCI_SRW, 1)
1748 #define asm_bsar(as, ir) asm_bitshift(as, ir, PPCI_SRAW, PPCI_SRAWI)
1749 #define asm_brol(as, ir) \
1750 asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31), \
1751 PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31))
1752 #define asm_bror(as, ir) lj_assertA(0, "unexpected BROR")
1755 static void asm_sfpmin_max(ASMState
*as
, IRIns
*ir
)
1757 CCallInfo ci
= lj_ir_callinfo
[IRCALL_softfp_cmp
];
1759 MCLabel l_right
, l_end
;
1760 Reg desthi
= ra_dest(as
, ir
, RSET_GPR
), destlo
= ra_dest(as
, ir
+1, RSET_GPR
);
1761 Reg righthi
, lefthi
= ra_alloc2(as
, ir
, RSET_GPR
);
1762 Reg rightlo
, leftlo
= ra_alloc2(as
, ir
+1, RSET_GPR
);
1763 PPCCC cond
= (IROp
)ir
->o
== IR_MIN
? CC_EQ
: CC_NE
;
1764 righthi
= (lefthi
>> 8); lefthi
&= 255;
1765 rightlo
= (leftlo
>> 8); leftlo
&= 255;
1766 args
[0^LJ_BE
] = ir
->op1
; args
[1^LJ_BE
] = (ir
+1)->op1
;
1767 args
[2^LJ_BE
] = ir
->op2
; args
[3^LJ_BE
] = (ir
+1)->op2
;
1768 l_end
= emit_label(as
);
1769 if (desthi
!= righthi
) emit_mr(as
, desthi
, righthi
);
1770 if (destlo
!= rightlo
) emit_mr(as
, destlo
, rightlo
);
1771 l_right
= emit_label(as
);
1772 if (l_end
!= l_right
) emit_jmp(as
, l_end
);
1773 if (desthi
!= lefthi
) emit_mr(as
, desthi
, lefthi
);
1774 if (destlo
!= leftlo
) emit_mr(as
, destlo
, leftlo
);
1775 if (l_right
== as
->mcp
+1) {
1776 cond
^= 4; l_right
= l_end
; ++as
->mcp
;
1778 emit_condbranch(as
, PPCI_BC
, cond
, l_right
);
1779 ra_evictset(as
, RSET_SCRATCH
);
1780 emit_cmpi(as
, RID_RET
, 1);
1781 asm_gencall(as
, &ci
, args
);
1785 static void asm_min_max(ASMState
*as
, IRIns
*ir
, int ismax
)
1787 if (!LJ_SOFTFP
&& irt_isnum(ir
->t
)) {
1788 Reg dest
= ra_dest(as
, ir
, RSET_FPR
);
1790 Reg right
, left
= ra_alloc2(as
, ir
, RSET_FPR
);
1791 right
= (left
>> 8); left
&= 255;
1792 if (tmp
== left
|| tmp
== right
)
1793 tmp
= ra_scratch(as
, rset_exclude(rset_exclude(rset_exclude(RSET_FPR
,
1794 dest
), left
), right
));
1795 emit_facb(as
, PPCI_FSEL
, dest
, tmp
, left
, right
);
1796 emit_fab(as
, PPCI_FSUB
, tmp
, ismax
? left
: right
, ismax
? right
: left
);
1798 Reg dest
= ra_dest(as
, ir
, RSET_GPR
);
1799 Reg tmp1
= RID_TMP
, tmp2
= dest
;
1800 Reg right
, left
= ra_alloc2(as
, ir
, RSET_GPR
);
1801 right
= (left
>> 8); left
&= 255;
1802 if (tmp2
== left
|| tmp2
== right
)
1803 tmp2
= ra_scratch(as
, rset_exclude(rset_exclude(rset_exclude(RSET_GPR
,
1804 dest
), left
), right
));
1805 emit_tab(as
, PPCI_ADD
, dest
, tmp2
, right
);
1806 emit_asb(as
, ismax
? PPCI_ANDC
: PPCI_AND
, tmp2
, tmp2
, tmp1
);
1807 emit_tab(as
, PPCI_SUBFE
, tmp1
, tmp1
, tmp1
);
1808 emit_tab(as
, PPCI_SUBFC
, tmp2
, tmp2
, tmp1
);
1809 emit_asi(as
, PPCI_XORIS
, tmp2
, right
, 0x8000);
1810 emit_asi(as
, PPCI_XORIS
, tmp1
, left
, 0x8000);
1814 #define asm_min(as, ir) asm_min_max(as, ir, 0)
1815 #define asm_max(as, ir) asm_min_max(as, ir, 1)
1817 /* -- Comparisons --------------------------------------------------------- */
1819 #define CC_UNSIGNED 0x08 /* Unsigned integer comparison. */
1820 #define CC_TWO 0x80 /* Check two flags for FP comparison. */
1822 /* Map of comparisons to flags. ORDER IR. */
1823 static const uint8_t asm_compmap
[IR_ABC
+1] = {
1824 /* op int cc FP cc */
1825 /* LT */ CC_GE
+ (CC_GE
<<4),
1826 /* GE */ CC_LT
+ (CC_LE
<<4) + CC_TWO
,
1827 /* LE */ CC_GT
+ (CC_GE
<<4) + CC_TWO
,
1828 /* GT */ CC_LE
+ (CC_LE
<<4),
1829 /* ULT */ CC_GE
+ CC_UNSIGNED
+ (CC_GT
<<4) + CC_TWO
,
1830 /* UGE */ CC_LT
+ CC_UNSIGNED
+ (CC_LT
<<4),
1831 /* ULE */ CC_GT
+ CC_UNSIGNED
+ (CC_GT
<<4),
1832 /* UGT */ CC_LE
+ CC_UNSIGNED
+ (CC_LT
<<4) + CC_TWO
,
1833 /* EQ */ CC_NE
+ (CC_NE
<<4),
1834 /* NE */ CC_EQ
+ (CC_EQ
<<4),
1835 /* ABC */ CC_LE
+ CC_UNSIGNED
+ (CC_LT
<<4) + CC_TWO
/* Same as UGT. */
1838 static void asm_intcomp_(ASMState
*as
, IRRef lref
, IRRef rref
, Reg cr
, PPCCC cc
)
1840 Reg right
, left
= ra_alloc1(as
, lref
, RSET_GPR
);
1841 if (irref_isk(rref
)) {
1842 int32_t k
= IR(rref
)->i
;
1843 if ((cc
& CC_UNSIGNED
) == 0) { /* Signed comparison with constant. */
1845 emit_tai(as
, PPCI_CMPWI
, cr
, left
, k
);
1846 /* Signed comparison with zero and referencing previous ins? */
1847 if (k
== 0 && lref
== as
->curins
-1)
1848 as
->flagmcp
= as
->mcp
; /* Allow elimination of the compare. */
1850 } else if ((cc
& 3) == (CC_EQ
& 3)) { /* Use CMPLWI for EQ or NE. */
1852 emit_tai(as
, PPCI_CMPLWI
, cr
, left
, k
);
1854 } else if (!as
->sectref
&& ra_noreg(IR(rref
)->r
)) {
1855 emit_tai(as
, PPCI_CMPLWI
, cr
, RID_TMP
, k
);
1856 emit_asi(as
, PPCI_XORIS
, RID_TMP
, left
, (k
>> 16));
1860 } else { /* Unsigned comparison with constant. */
1862 emit_tai(as
, PPCI_CMPLWI
, cr
, left
, k
);
1867 right
= ra_alloc1(as
, rref
, rset_exclude(RSET_GPR
, left
));
1868 emit_tab(as
, (cc
& CC_UNSIGNED
) ? PPCI_CMPLW
: PPCI_CMPW
, cr
, left
, right
);
1871 static void asm_comp(ASMState
*as
, IRIns
*ir
)
1873 PPCCC cc
= asm_compmap
[ir
->o
];
1874 if (!LJ_SOFTFP
&& irt_isnum(ir
->t
)) {
1875 Reg right
, left
= ra_alloc2(as
, ir
, RSET_FPR
);
1876 right
= (left
>> 8); left
&= 255;
1877 asm_guardcc(as
, (cc
>> 4));
1879 emit_tab(as
, PPCI_CROR
, ((cc
>>4)&3), ((cc
>>4)&3), (CC_EQ
&3));
1880 emit_fab(as
, PPCI_FCMPU
, 0, left
, right
);
1882 IRRef lref
= ir
->op1
, rref
= ir
->op2
;
1883 if (irref_isk(lref
) && !irref_isk(rref
)) {
1884 /* Swap constants to the right (only for ABC). */
1885 IRRef tmp
= lref
; lref
= rref
; rref
= tmp
;
1886 if ((cc
& 2) == 0) cc
^= 1; /* LT <-> GT, LE <-> GE */
1888 asm_guardcc(as
, cc
);
1889 asm_intcomp_(as
, lref
, rref
, 0, cc
);
1893 #define asm_equal(as, ir) asm_comp(as, ir)
1896 /* SFP comparisons. */
1897 static void asm_sfpcomp(ASMState
*as
, IRIns
*ir
)
1899 const CCallInfo
*ci
= &lj_ir_callinfo
[IRCALL_softfp_cmp
];
1900 RegSet drop
= RSET_SCRATCH
;
1903 args
[0^LJ_BE
] = ir
->op1
; args
[1^LJ_BE
] = (ir
+1)->op1
;
1904 args
[2^LJ_BE
] = ir
->op2
; args
[3^LJ_BE
] = (ir
+1)->op2
;
1906 for (r
= REGARG_FIRSTGPR
; r
<= REGARG_FIRSTGPR
+3; r
++) {
1907 if (!rset_test(as
->freeset
, r
) &&
1908 regcost_ref(as
->cost
[r
]) == args
[r
-REGARG_FIRSTGPR
])
1909 rset_clear(drop
, r
);
1911 ra_evictset(as
, drop
);
1912 asm_setupresult(as
, ir
, ci
);
1913 switch ((IROp
)ir
->o
) {
1915 asm_guardcc(as
, CC_EQ
);
1916 emit_ai(as
, PPCI_CMPWI
, RID_RET
, 0);
1918 asm_guardcc(as
, CC_EQ
);
1919 emit_ai(as
, PPCI_CMPWI
, RID_RET
, 1);
1921 case IR_GE
: case IR_GT
:
1922 asm_guardcc(as
, CC_EQ
);
1923 emit_ai(as
, PPCI_CMPWI
, RID_RET
, 2);
1925 asm_guardcc(as
, (asm_compmap
[ir
->o
] & 0xf));
1926 emit_ai(as
, PPCI_CMPWI
, RID_RET
, 0);
1929 asm_gencall(as
, ci
, args
);
1934 /* 64 bit integer comparisons. */
1935 static void asm_comp64(ASMState
*as
, IRIns
*ir
)
1937 PPCCC cc
= asm_compmap
[(ir
-1)->o
];
1938 if ((cc
&3) == (CC_EQ
&3)) {
1939 asm_guardcc(as
, cc
);
1940 emit_tab(as
, (cc
&4) ? PPCI_CRAND
: PPCI_CROR
,
1941 (CC_EQ
&3), (CC_EQ
&3), 4+(CC_EQ
&3));
1943 asm_guardcc(as
, CC_EQ
);
1944 emit_tab(as
, PPCI_CROR
, (CC_EQ
&3), (CC_EQ
&3), ((cc
^~(cc
>>2))&1));
1945 emit_tab(as
, (cc
&4) ? PPCI_CRAND
: PPCI_CRANDC
,
1946 (CC_EQ
&3), (CC_EQ
&3), 4+(cc
&3));
1948 /* Loword comparison sets cr1 and is unsigned, except for equality. */
1949 asm_intcomp_(as
, (ir
-1)->op1
, (ir
-1)->op2
, 4,
1950 cc
| ((cc
&3) == (CC_EQ
&3) ? 0 : CC_UNSIGNED
));
1951 /* Hiword comparison sets cr0. */
1952 asm_intcomp_(as
, ir
->op1
, ir
->op2
, 0, cc
);
1953 as
->flagmcp
= NULL
; /* Doesn't work here. */
1957 /* -- Split register ops -------------------------------------------------- */
1959 /* Hiword op of a split 32/32 bit op. Previous op is be the loword op. */
1960 static void asm_hiop(ASMState
*as
, IRIns
*ir
)
1962 /* HIOP is marked as a store because it needs its own DCE logic. */
1963 int uselo
= ra_used(ir
-1), usehi
= ra_used(ir
); /* Loword/hiword used? */
1964 if (LJ_UNLIKELY(!(as
->flags
& JIT_F_OPT_DCE
))) uselo
= usehi
= 1;
1965 #if LJ_HASFFI || LJ_SOFTFP
1966 if ((ir
-1)->o
== IR_CONV
) { /* Conversions to/from 64 bit. */
1967 as
->curins
--; /* Always skip the CONV. */
1968 #if LJ_HASFFI && !LJ_SOFTFP
1973 } else if ((ir
-1)->o
<= IR_NE
) { /* 64 bit integer comparisons. ORDER IR. */
1974 as
->curins
--; /* Always skip the loword comparison. */
1976 if (!irt_isint(ir
->t
)) {
1977 asm_sfpcomp(as
, ir
-1);
1986 } else if ((ir
-1)->o
== IR_MIN
|| (ir
-1)->o
== IR_MAX
) {
1987 as
->curins
--; /* Always skip the loword min/max. */
1989 asm_sfpmin_max(as
, ir
-1);
1992 } else if ((ir
-1)->o
== IR_XSTORE
) {
1993 as
->curins
--; /* Handle both stores here. */
1994 if ((ir
-1)->r
!= RID_SINK
) {
1995 asm_xstore_(as
, ir
, 0);
1996 asm_xstore_(as
, ir
-1, 4);
2001 if (!usehi
) return; /* Skip unused hiword op for all remaining ops. */
2002 switch ((ir
-1)->o
) {
2004 case IR_ADD
: as
->curins
--; asm_add64(as
, ir
); break;
2005 case IR_SUB
: as
->curins
--; asm_sub64(as
, ir
); break;
2006 case IR_NEG
: as
->curins
--; asm_neg64(as
, ir
); break;
2008 /* Nothing to do here. Handled by lo op itself. */
2012 case IR_SLOAD
: case IR_ALOAD
: case IR_HLOAD
: case IR_ULOAD
: case IR_VLOAD
:
2015 ra_allocref(as
, ir
->op1
, RSET_GPR
); /* Mark lo op as used. */
2017 case IR_ASTORE
: case IR_HSTORE
: case IR_USTORE
: case IR_TOSTR
: case IR_TMPREF
:
2018 /* Nothing to do here. Handled by lo op itself. */
2021 case IR_CALLN
: case IR_CALLL
: case IR_CALLS
: case IR_CALLXS
:
2023 ra_allocref(as
, ir
->op1
, RID2RSET(RID_RETLO
)); /* Mark lo op as used. */
2025 default: lj_assertA(0, "bad HIOP for op %d", (ir
-1)->o
); break;
2029 /* -- Profiling ----------------------------------------------------------- */
2031 static void asm_prof(ASMState
*as
, IRIns
*ir
)
2034 asm_guardcc(as
, CC_NE
);
2035 emit_asi(as
, PPCI_ANDIDOT
, RID_TMP
, RID_TMP
, HOOK_PROFILE
);
2036 emit_lsglptr(as
, PPCI_LBZ
, RID_TMP
,
2037 (int32_t)offsetof(global_State
, hookmask
));
2040 /* -- Stack handling ------------------------------------------------------ */
2042 /* Check Lua stack size for overflow. Use exit handler as fallback. */
2043 static void asm_stack_check(ASMState
*as
, BCReg topslot
,
2044 IRIns
*irp
, RegSet allow
, ExitNo exitno
)
2046 /* Try to get an unused temp. register, otherwise spill/restore RID_RET*. */
2047 Reg tmp
, pbase
= irp
? (ra_hasreg(irp
->r
) ? irp
->r
: RID_TMP
) : RID_BASE
;
2048 rset_clear(allow
, pbase
);
2049 tmp
= allow
? rset_pickbot(allow
) :
2050 (pbase
== RID_RETHI
? RID_RETLO
: RID_RETHI
);
2051 emit_condbranch(as
, PPCI_BC
, CC_LT
, asm_exitstub_addr(as
, exitno
));
2052 if (allow
== RSET_EMPTY
) /* Restore temp. register. */
2053 emit_tai(as
, PPCI_LWZ
, tmp
, RID_SP
, SPOFS_TMPW
);
2055 ra_modified(as
, tmp
);
2056 emit_ai(as
, PPCI_CMPLWI
, RID_TMP
, (int32_t)(8*topslot
));
2057 emit_tab(as
, PPCI_SUBF
, RID_TMP
, pbase
, tmp
);
2058 emit_tai(as
, PPCI_LWZ
, tmp
, tmp
, offsetof(lua_State
, maxstack
));
2059 if (pbase
== RID_TMP
)
2060 emit_getgl(as
, RID_TMP
, jit_base
);
2061 emit_getgl(as
, tmp
, cur_L
);
2062 if (allow
== RSET_EMPTY
) /* Spill temp. register. */
2063 emit_tai(as
, PPCI_STW
, tmp
, RID_SP
, SPOFS_TMPW
);
2066 /* Restore Lua stack from on-trace state. */
2067 static void asm_stack_restore(ASMState
*as
, SnapShot
*snap
)
2069 SnapEntry
*map
= &as
->T
->snapmap
[snap
->mapofs
];
2070 SnapEntry
*flinks
= &as
->T
->snapmap
[snap_nextofs(as
->T
, snap
)-1];
2071 MSize n
, nent
= snap
->nent
;
2072 /* Store the value of all modified slots to the Lua stack. */
2073 for (n
= 0; n
< nent
; n
++) {
2074 SnapEntry sn
= map
[n
];
2075 BCReg s
= snap_slot(sn
);
2076 int32_t ofs
= 8*((int32_t)s
-1);
2077 IRRef ref
= snap_ref(sn
);
2078 IRIns
*ir
= IR(ref
);
2079 if ((sn
& SNAP_NORESTORE
))
2081 if (irt_isnum(ir
->t
)) {
2084 RegSet allow
= rset_exclude(RSET_GPR
, RID_BASE
);
2085 /* LJ_SOFTFP: must be a number constant. */
2086 lj_assertA(irref_isk(ref
), "unsplit FP op");
2087 tmp
= ra_allock(as
, (int32_t)ir_knum(ir
)->u32
.lo
, allow
);
2088 emit_tai(as
, PPCI_STW
, tmp
, RID_BASE
, ofs
+(LJ_BE
?4:0));
2089 if (rset_test(as
->freeset
, tmp
+1)) allow
= RID2RSET(tmp
+1);
2090 tmp
= ra_allock(as
, (int32_t)ir_knum(ir
)->u32
.hi
, allow
);
2091 emit_tai(as
, PPCI_STW
, tmp
, RID_BASE
, ofs
+(LJ_BE
?0:4));
2093 Reg src
= ra_alloc1(as
, ref
, RSET_FPR
);
2094 emit_fai(as
, PPCI_STFD
, src
, RID_BASE
, ofs
);
2098 RegSet allow
= rset_exclude(RSET_GPR
, RID_BASE
);
2099 lj_assertA(irt_ispri(ir
->t
) || irt_isaddr(ir
->t
) || irt_isinteger(ir
->t
),
2100 "restore of IR type %d", irt_type(ir
->t
));
2101 if (!irt_ispri(ir
->t
)) {
2102 Reg src
= ra_alloc1(as
, ref
, allow
);
2103 rset_clear(allow
, src
);
2104 emit_tai(as
, PPCI_STW
, src
, RID_BASE
, ofs
+4);
2106 if ((sn
& (SNAP_CONT
|SNAP_FRAME
))) {
2107 if (s
== 0) continue; /* Do not overwrite link to previous frame. */
2108 type
= ra_allock(as
, (int32_t)(*flinks
--), allow
);
2110 } else if ((sn
& SNAP_SOFTFPNUM
)) {
2111 type
= ra_alloc1(as
, ref
+1, rset_exclude(RSET_GPR
, RID_BASE
));
2113 } else if ((sn
& SNAP_KEYINDEX
)) {
2114 type
= ra_allock(as
, (int32_t)LJ_KEYINDEX
, allow
);
2116 type
= ra_allock(as
, (int32_t)irt_toitype(ir
->t
), allow
);
2118 emit_tai(as
, PPCI_STW
, type
, RID_BASE
, ofs
);
2122 lj_assertA(map
+ nent
== flinks
, "inconsistent frames in snapshot");
2125 /* -- GC handling --------------------------------------------------------- */
2127 /* Marker to prevent patching the GC check exit. */
2128 #define PPC_NOPATCH_GC_CHECK PPCI_ORIS
2130 /* Check GC threshold and do one or more GC steps. */
2131 static void asm_gc_check(ASMState
*as
)
2133 const CCallInfo
*ci
= &lj_ir_callinfo
[IRCALL_lj_gc_step_jit
];
2137 ra_evictset(as
, RSET_SCRATCH
);
2138 l_end
= emit_label(as
);
2139 /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */
2140 asm_guardcc(as
, CC_NE
); /* Assumes asm_snap_prep() already done. */
2141 *--as
->mcp
= PPC_NOPATCH_GC_CHECK
;
2142 emit_ai(as
, PPCI_CMPWI
, RID_RET
, 0);
2143 args
[0] = ASMREF_TMP1
; /* global_State *g */
2144 args
[1] = ASMREF_TMP2
; /* MSize steps */
2145 asm_gencall(as
, ci
, args
);
2146 emit_tai(as
, PPCI_ADDI
, ra_releasetmp(as
, ASMREF_TMP1
), RID_JGL
, -32768);
2147 tmp
= ra_releasetmp(as
, ASMREF_TMP2
);
2148 emit_loadi(as
, tmp
, as
->gcsteps
);
2149 /* Jump around GC step if GC total < GC threshold. */
2150 emit_condbranch(as
, PPCI_BC
|PPCF_Y
, CC_LT
, l_end
);
2151 emit_ab(as
, PPCI_CMPLW
, RID_TMP
, tmp
);
2152 emit_getgl(as
, tmp
, gc
.threshold
);
2153 emit_getgl(as
, RID_TMP
, gc
.total
);
2158 /* -- Loop handling ------------------------------------------------------- */
2160 /* Fixup the loop branch. */
2161 static void asm_loop_fixup(ASMState
*as
)
2163 MCode
*p
= as
->mctop
;
2164 MCode
*target
= as
->mcp
;
2165 if (as
->loopinv
) { /* Inverted loop branch? */
2166 /* asm_guardcc already inverted the cond branch and patched the final b. */
2167 p
[-2] = (p
[-2] & (0xffff0000u
& ~PPCF_Y
)) | (((target
-p
+2) & 0x3fffu
) << 2);
2169 p
[-1] = PPCI_B
|(((target
-p
+1)&0x00ffffffu
)<<2);
2173 /* Fixup the tail of the loop. */
2174 static void asm_loop_tail_fixup(ASMState
*as
)
2176 UNUSED(as
); /* Nothing to do. */
2179 /* -- Head of trace ------------------------------------------------------- */
2181 /* Coalesce BASE register for a root trace. */
2182 static void asm_head_root_base(ASMState
*as
)
2184 IRIns
*ir
= IR(REF_BASE
);
2188 if (rset_test(as
->modset
, r
) || irt_ismarked(ir
->t
))
2189 ir
->r
= RID_INIT
; /* No inheritance for modified BASE register. */
2191 emit_mr(as
, r
, RID_BASE
);
2195 /* Coalesce BASE register for a side trace. */
2196 static Reg
asm_head_side_base(ASMState
*as
, IRIns
*irp
)
2198 IRIns
*ir
= IR(REF_BASE
);
2202 if (rset_test(as
->modset
, r
) || irt_ismarked(ir
->t
))
2203 ir
->r
= RID_INIT
; /* No inheritance for modified BASE register. */
2205 return r
; /* Same BASE register already coalesced. */
2206 } else if (ra_hasreg(irp
->r
) && rset_test(as
->freeset
, irp
->r
)) {
2207 emit_mr(as
, r
, irp
->r
); /* Move from coalesced parent reg. */
2210 emit_getgl(as
, r
, jit_base
); /* Otherwise reload BASE. */
2216 /* -- Tail of trace ------------------------------------------------------- */
2218 /* Fixup the tail code. */
2219 static void asm_tail_fixup(ASMState
*as
, TraceNo lnk
)
2221 MCode
*p
= as
->mctop
;
2223 int32_t spadj
= as
->T
->spadjust
;
2229 /* Patch stack adjustment. */
2230 lj_assertA(checki16(CFRAME_SIZE
+spadj
), "stack adjustment out of range");
2231 p
[-3] = PPCI_ADDI
| PPCF_T(RID_TMP
) | PPCF_A(RID_SP
) | (CFRAME_SIZE
+spadj
);
2232 p
[-2] = PPCI_STWU
| PPCF_T(RID_TMP
) | PPCF_A(RID_SP
) | spadj
;
2234 /* Patch exit branch. */
2235 target
= lnk
? traceref(as
->J
, lnk
)->mcode
: (MCode
*)lj_vm_exit_interp
;
2236 p
[-1] = PPCI_B
|(((target
-p
+1)&0x00ffffffu
)<<2);
2239 /* Prepare tail of code. */
2240 static void asm_tail_prep(ASMState
*as
)
2242 MCode
*p
= as
->mctop
- 1; /* Leave room for exit branch. */
2244 as
->invmcp
= as
->mcp
= p
;
2246 as
->mcp
= p
-2; /* Leave room for stack pointer adjustment. */
2251 /* -- Trace setup --------------------------------------------------------- */
2253 /* Ensure there are enough stack slots for call arguments. */
2254 static Reg
asm_setup_call_slots(ASMState
*as
, IRIns
*ir
, const CCallInfo
*ci
)
2256 IRRef args
[CCI_NARGS_MAX
*2];
2257 uint32_t i
, nargs
= CCI_XNARGS(ci
);
2258 int nslots
= 2, ngpr
= REGARG_NUMGPR
, nfpr
= REGARG_NUMFPR
;
2259 asm_collectargs(as
, ir
, ci
, args
);
2260 for (i
= 0; i
< nargs
; i
++)
2261 if (!LJ_SOFTFP
&& args
[i
] && irt_isfp(IR(args
[i
])->t
)) {
2262 if (nfpr
> 0) nfpr
--; else nslots
= (nslots
+3) & ~1;
2264 if (ngpr
> 0) ngpr
--; else nslots
++;
2266 if (nslots
> as
->evenspill
) /* Leave room for args in stack slots. */
2267 as
->evenspill
= nslots
;
2268 return (!LJ_SOFTFP
&& irt_isfp(ir
->t
)) ? REGSP_HINT(RID_FPRET
) :
2269 REGSP_HINT(RID_RET
);
2272 static void asm_setup_target(ASMState
*as
)
2274 asm_exitstub_setup(as
, as
->T
->nsnap
+ (as
->parent
? 1 : 0));
2277 /* -- Trace patching ------------------------------------------------------ */
2279 /* Patch exit jumps of existing machine code to a new target. */
2280 void lj_asm_patchexit(jit_State
*J
, GCtrace
*T
, ExitNo exitno
, MCode
*target
)
2282 MCode
*p
= T
->mcode
;
2283 MCode
*pe
= (MCode
*)((char *)p
+ T
->szmcode
);
2284 MCode
*px
= exitstub_trace_addr(T
, exitno
);
2285 MCode
*cstart
= NULL
;
2286 MCode
*mcarea
= lj_mcode_patch(J
, p
, 0);
2287 int clearso
= 0, patchlong
= 1;
2288 for (; p
< pe
; p
++) {
2289 /* Look for exitstub branch, try to replace with branch to target. */
2291 if ((ins
& 0xfc000000u
) == 0x40000000u
&&
2292 ((ins
^ ((char *)px
-(char *)p
)) & 0xffffu
) == 0) {
2293 ptrdiff_t delta
= (char *)target
- (char *)p
;
2294 if (((ins
>> 16) & 3) == (CC_SO
&3)) {
2295 clearso
= sizeof(MCode
);
2296 delta
-= sizeof(MCode
);
2298 /* Many, but not all short-range branches can be patched directly. */
2299 if (p
[-1] == PPC_NOPATCH_GC_CHECK
) {
2301 } else if (((delta
+ 0x8000) >> 16) == 0) {
2302 *p
= (ins
& 0xffdf0000u
) | ((uint32_t)delta
& 0xffffu
) |
2303 ((delta
& 0x8000) * (PPCF_Y
/0x8000));
2304 if (!cstart
) cstart
= p
;
2306 } else if ((ins
& 0xfc000000u
) == PPCI_B
&&
2307 ((ins
^ ((char *)px
-(char *)p
)) & 0x03ffffffu
) == 0) {
2308 ptrdiff_t delta
= (char *)target
- (char *)p
;
2309 lj_assertJ(((delta
+ 0x02000000) >> 26) == 0,
2310 "branch target out of range");
2311 *p
= PPCI_B
| ((uint32_t)delta
& 0x03ffffffu
);
2312 if (!cstart
) cstart
= p
;
2315 /* Always patch long-range branch in exit stub itself. Except, if we can't. */
2317 ptrdiff_t delta
= (char *)target
- (char *)px
- clearso
;
2318 lj_assertJ(((delta
+ 0x02000000) >> 26) == 0,
2319 "branch target out of range");
2320 *px
= PPCI_B
| ((uint32_t)delta
& 0x03ffffffu
);
2322 if (!cstart
) cstart
= px
;
2323 lj_mcode_sync(cstart
, px
+1);
2324 if (clearso
) { /* Extend the current trace. Ugly workaround. */
2325 MCode
*pp
= J
->cur
.mcode
;
2326 J
->cur
.szmcode
+= sizeof(MCode
);
2327 *--pp
= PPCI_MCRXR
; /* Clear SO flag. */
2329 lj_mcode_sync(pp
, pp
+1);
2331 lj_mcode_patch(J
, mcarea
, 1);