Update Nintendo Switch build script.
[luajit-2.0.git] / src / lj_opt_split.c
blob338a61a2d20df61e2f164ddd95c2da2bd425b312
1 /*
2 ** SPLIT: Split 64 bit IR instructions into 32 bit IR instructions.
3 ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
4 */
6 #define lj_opt_split_c
7 #define LUA_CORE
9 #include "lj_obj.h"
11 #if LJ_HASJIT && (LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI))
13 #include "lj_err.h"
14 #include "lj_buf.h"
15 #include "lj_ir.h"
16 #include "lj_jit.h"
17 #include "lj_ircall.h"
18 #include "lj_iropt.h"
19 #include "lj_dispatch.h"
20 #include "lj_vm.h"
22 /* SPLIT pass:
24 ** This pass splits up 64 bit IR instructions into multiple 32 bit IR
25 ** instructions. It's only active for soft-float targets or for 32 bit CPUs
26 ** which lack native 64 bit integer operations (the FFI is currently the
27 ** only emitter for 64 bit integer instructions).
29 ** Splitting the IR in a separate pass keeps each 32 bit IR assembler
30 ** backend simple. Only a small amount of extra functionality needs to be
31 ** implemented. This is much easier than adding support for allocating
32 ** register pairs to each backend (believe me, I tried). A few simple, but
33 ** important optimizations can be performed by the SPLIT pass, which would
34 ** be tedious to do in the backend.
36 ** The basic idea is to replace each 64 bit IR instruction with its 32 bit
37 ** equivalent plus an extra HIOP instruction. The splitted IR is not passed
38 ** through FOLD or any other optimizations, so each HIOP is guaranteed to
39 ** immediately follow it's counterpart. The actual functionality of HIOP is
40 ** inferred from the previous instruction.
42 ** The operands of HIOP hold the hiword input references. The output of HIOP
43 ** is the hiword output reference, which is also used to hold the hiword
44 ** register or spill slot information. The register allocator treats this
45 ** instruction independently of any other instruction, which improves code
46 ** quality compared to using fixed register pairs.
48 ** It's easier to split up some instructions into two regular 32 bit
49 ** instructions. E.g. XLOAD is split up into two XLOADs with two different
50 ** addresses. Obviously 64 bit constants need to be split up into two 32 bit
51 ** constants, too. Some hiword instructions can be entirely omitted, e.g.
52 ** when zero-extending a 32 bit value to 64 bits. 64 bit arguments for calls
53 ** are split up into two 32 bit arguments each.
55 ** On soft-float targets, floating-point instructions are directly converted
56 ** to soft-float calls by the SPLIT pass (except for comparisons and MIN/MAX).
57 ** HIOP for number results has the type IRT_SOFTFP ("sfp" in -jdump).
59 ** Here's the IR and x64 machine code for 'x.b = x.a + 1' for a struct with
60 ** two int64_t fields:
62 ** 0100 p32 ADD base +8
63 ** 0101 i64 XLOAD 0100
64 ** 0102 i64 ADD 0101 +1
65 ** 0103 p32 ADD base +16
66 ** 0104 i64 XSTORE 0103 0102
68 ** mov rax, [esi+0x8]
69 ** add rax, +0x01
70 ** mov [esi+0x10], rax
72 ** Here's the transformed IR and the x86 machine code after the SPLIT pass:
74 ** 0100 p32 ADD base +8
75 ** 0101 int XLOAD 0100
76 ** 0102 p32 ADD base +12
77 ** 0103 int XLOAD 0102
78 ** 0104 int ADD 0101 +1
79 ** 0105 int HIOP 0103 +0
80 ** 0106 p32 ADD base +16
81 ** 0107 int XSTORE 0106 0104
82 ** 0108 int HIOP 0106 0105
84 ** mov eax, [esi+0x8]
85 ** mov ecx, [esi+0xc]
86 ** add eax, +0x01
87 ** adc ecx, +0x00
88 ** mov [esi+0x10], eax
89 ** mov [esi+0x14], ecx
91 ** You may notice the reassociated hiword address computation, which is
92 ** later fused into the mov operands by the assembler.
95 /* Some local macros to save typing. Undef'd at the end. */
96 #define IR(ref) (&J->cur.ir[(ref)])
98 /* Directly emit the transformed IR without updating chains etc. */
99 static IRRef split_emit(jit_State *J, uint16_t ot, IRRef1 op1, IRRef1 op2)
101 IRRef nref = lj_ir_nextins(J);
102 IRIns *ir = IR(nref);
103 ir->ot = ot;
104 ir->op1 = op1;
105 ir->op2 = op2;
106 return nref;
109 #if LJ_SOFTFP
110 /* Emit a (checked) number to integer conversion. */
111 static IRRef split_num2int(jit_State *J, IRRef lo, IRRef hi, int check)
113 IRRef tmp, res;
114 #if LJ_LE
115 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), lo, hi);
116 #else
117 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hi, lo);
118 #endif
119 res = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_softfp_d2i);
120 if (check) {
121 tmp = split_emit(J, IRTI(IR_CALLN), res, IRCALL_softfp_i2d);
122 split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
123 split_emit(J, IRTGI(IR_EQ), tmp, lo);
124 split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP), tmp+1, hi);
126 return res;
129 /* Emit a CALLN with one split 64 bit argument. */
130 static IRRef split_call_l(jit_State *J, IRRef1 *hisubst, IRIns *oir,
131 IRIns *ir, IRCallID id)
133 IRRef tmp, op1 = ir->op1;
134 J->cur.nins--;
135 #if LJ_LE
136 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
137 #else
138 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
139 #endif
140 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
141 return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
143 #endif
145 /* Emit a CALLN with one split 64 bit argument and a 32 bit argument. */
146 static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir,
147 IRIns *ir, IRCallID id)
149 IRRef tmp, op1 = ir->op1, op2 = ir->op2;
150 J->cur.nins--;
151 #if LJ_LE
152 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
153 #else
154 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
155 #endif
156 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev);
157 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
158 return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
161 /* Emit a CALLN with two split 64 bit arguments. */
162 static IRRef split_call_ll(jit_State *J, IRRef1 *hisubst, IRIns *oir,
163 IRIns *ir, IRCallID id)
165 IRRef tmp, op1 = ir->op1, op2 = ir->op2;
166 J->cur.nins--;
167 #if LJ_LE
168 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
169 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev);
170 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, hisubst[op2]);
171 #else
172 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
173 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, hisubst[op2]);
174 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev);
175 #endif
176 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
177 return split_emit(J,
178 IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT),
179 tmp, tmp);
182 /* Get a pointer to the other 32 bit word (LE: hiword, BE: loword). */
183 static IRRef split_ptr(jit_State *J, IRIns *oir, IRRef ref)
185 IRRef nref = oir[ref].prev;
186 IRIns *ir = IR(nref);
187 int32_t ofs = 4;
188 if (ir->o == IR_KPTR)
189 return lj_ir_kptr(J, (char *)ir_kptr(ir) + ofs);
190 if (ir->o == IR_ADD && irref_isk(ir->op2) && !irt_isphi(oir[ref].t)) {
191 /* Reassociate address. */
192 ofs += IR(ir->op2)->i;
193 nref = ir->op1;
194 if (ofs == 0) return nref;
196 return split_emit(J, IRT(IR_ADD, IRT_PTR), nref, lj_ir_kint(J, ofs));
199 #if LJ_HASFFI
200 static IRRef split_bitshift(jit_State *J, IRRef1 *hisubst,
201 IRIns *oir, IRIns *nir, IRIns *ir)
203 IROp op = ir->o;
204 IRRef kref = nir->op2;
205 if (irref_isk(kref)) { /* Optimize constant shifts. */
206 int32_t k = (IR(kref)->i & 63);
207 IRRef lo = nir->op1, hi = hisubst[ir->op1];
208 if (op == IR_BROL || op == IR_BROR) {
209 if (op == IR_BROR) k = (-k & 63);
210 if (k >= 32) { IRRef t = lo; lo = hi; hi = t; k -= 32; }
211 if (k == 0) {
212 passthrough:
213 J->cur.nins--;
214 ir->prev = lo;
215 return hi;
216 } else {
217 TRef k1, k2;
218 IRRef t1, t2, t3, t4;
219 J->cur.nins--;
220 k1 = lj_ir_kint(J, k);
221 k2 = lj_ir_kint(J, (-k & 31));
222 t1 = split_emit(J, IRTI(IR_BSHL), lo, k1);
223 t2 = split_emit(J, IRTI(IR_BSHL), hi, k1);
224 t3 = split_emit(J, IRTI(IR_BSHR), lo, k2);
225 t4 = split_emit(J, IRTI(IR_BSHR), hi, k2);
226 ir->prev = split_emit(J, IRTI(IR_BOR), t1, t4);
227 return split_emit(J, IRTI(IR_BOR), t2, t3);
229 } else if (k == 0) {
230 goto passthrough;
231 } else if (k < 32) {
232 if (op == IR_BSHL) {
233 IRRef t1 = split_emit(J, IRTI(IR_BSHL), hi, kref);
234 IRRef t2 = split_emit(J, IRTI(IR_BSHR), lo, lj_ir_kint(J, (-k&31)));
235 return split_emit(J, IRTI(IR_BOR), t1, t2);
236 } else {
237 IRRef t1 = ir->prev, t2;
238 lj_assertJ(op == IR_BSHR || op == IR_BSAR, "bad usage");
239 nir->o = IR_BSHR;
240 t2 = split_emit(J, IRTI(IR_BSHL), hi, lj_ir_kint(J, (-k&31)));
241 ir->prev = split_emit(J, IRTI(IR_BOR), t1, t2);
242 return split_emit(J, IRTI(op), hi, kref);
244 } else {
245 if (op == IR_BSHL) {
246 if (k == 32)
247 J->cur.nins--;
248 else
249 lo = ir->prev;
250 ir->prev = lj_ir_kint(J, 0);
251 return lo;
252 } else {
253 lj_assertJ(op == IR_BSHR || op == IR_BSAR, "bad usage");
254 if (k == 32) {
255 J->cur.nins--;
256 ir->prev = hi;
257 } else {
258 nir->op1 = hi;
260 if (op == IR_BSHR)
261 return lj_ir_kint(J, 0);
262 else
263 return split_emit(J, IRTI(IR_BSAR), hi, lj_ir_kint(J, 31));
267 return split_call_li(J, hisubst, oir, ir,
268 op - IR_BSHL + IRCALL_lj_carith_shl64);
271 static IRRef split_bitop(jit_State *J, IRRef1 *hisubst,
272 IRIns *nir, IRIns *ir)
274 IROp op = ir->o;
275 IRRef hi, kref = nir->op2;
276 if (irref_isk(kref)) { /* Optimize bit operations with lo constant. */
277 int32_t k = IR(kref)->i;
278 if (k == 0 || k == -1) {
279 if (op == IR_BAND) k = ~k;
280 if (k == 0) {
281 J->cur.nins--;
282 ir->prev = nir->op1;
283 } else if (op == IR_BXOR) {
284 nir->o = IR_BNOT;
285 nir->op2 = 0;
286 } else {
287 J->cur.nins--;
288 ir->prev = kref;
292 hi = hisubst[ir->op1];
293 kref = hisubst[ir->op2];
294 if (irref_isk(kref)) { /* Optimize bit operations with hi constant. */
295 int32_t k = IR(kref)->i;
296 if (k == 0 || k == -1) {
297 if (op == IR_BAND) k = ~k;
298 if (k == 0) {
299 return hi;
300 } else if (op == IR_BXOR) {
301 return split_emit(J, IRTI(IR_BNOT), hi, 0);
302 } else {
303 return kref;
307 return split_emit(J, IRTI(op), hi, kref);
309 #endif
311 /* Substitute references of a snapshot. */
312 static void split_subst_snap(jit_State *J, SnapShot *snap, IRIns *oir)
314 SnapEntry *map = &J->cur.snapmap[snap->mapofs];
315 MSize n, nent = snap->nent;
316 for (n = 0; n < nent; n++) {
317 SnapEntry sn = map[n];
318 IRIns *ir = &oir[snap_ref(sn)];
319 if (!(LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && irref_isk(snap_ref(sn))))
320 map[n] = ((sn & 0xffff0000) | ir->prev);
324 /* Transform the old IR to the new IR. */
325 static void split_ir(jit_State *J)
327 IRRef nins = J->cur.nins, nk = J->cur.nk;
328 MSize irlen = nins - nk;
329 MSize need = (irlen+1)*(sizeof(IRIns) + sizeof(IRRef1));
330 IRIns *oir = (IRIns *)lj_buf_tmp(J->L, need);
331 IRRef1 *hisubst;
332 IRRef ref, snref;
333 SnapShot *snap;
335 /* Copy old IR to buffer. */
336 memcpy(oir, IR(nk), irlen*sizeof(IRIns));
337 /* Bias hiword substitution table and old IR. Loword kept in field prev. */
338 hisubst = (IRRef1 *)&oir[irlen] - nk;
339 oir -= nk;
341 /* Remove all IR instructions, but retain IR constants. */
342 J->cur.nins = REF_FIRST;
343 J->loopref = 0;
345 /* Process constants and fixed references. */
346 for (ref = nk; ref <= REF_BASE; ref++) {
347 IRIns *ir = &oir[ref];
348 if ((LJ_SOFTFP && ir->o == IR_KNUM) || ir->o == IR_KINT64) {
349 /* Split up 64 bit constant. */
350 TValue tv = *ir_k64(ir);
351 ir->prev = lj_ir_kint(J, (int32_t)tv.u32.lo);
352 hisubst[ref] = lj_ir_kint(J, (int32_t)tv.u32.hi);
353 } else {
354 ir->prev = ref; /* Identity substitution for loword. */
355 hisubst[ref] = 0;
357 if (irt_is64(ir->t) && ir->o != IR_KNULL)
358 ref++;
361 /* Process old IR instructions. */
362 snap = J->cur.snap;
363 snref = snap->ref;
364 for (ref = REF_FIRST; ref < nins; ref++) {
365 IRIns *ir = &oir[ref];
366 IRRef nref = lj_ir_nextins(J);
367 IRIns *nir = IR(nref);
368 IRRef hi = 0;
370 if (ref >= snref) {
371 snap->ref = nref;
372 split_subst_snap(J, snap++, oir);
373 snref = snap < &J->cur.snap[J->cur.nsnap] ? snap->ref : ~(IRRef)0;
376 /* Copy-substitute old instruction to new instruction. */
377 nir->op1 = ir->op1 < nk ? ir->op1 : oir[ir->op1].prev;
378 nir->op2 = ir->op2 < nk ? ir->op2 : oir[ir->op2].prev;
379 ir->prev = nref; /* Loword substitution. */
380 nir->o = ir->o;
381 nir->t.irt = ir->t.irt & ~(IRT_MARK|IRT_ISPHI);
382 hisubst[ref] = 0;
384 /* Split 64 bit instructions. */
385 #if LJ_SOFTFP
386 if (irt_isnum(ir->t)) {
387 nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */
388 /* Note: hi ref = lo ref + 1! Required for SNAP_SOFTFPNUM logic. */
389 switch (ir->o) {
390 case IR_ADD:
391 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_add);
392 break;
393 case IR_SUB:
394 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_sub);
395 break;
396 case IR_MUL:
397 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_mul);
398 break;
399 case IR_DIV:
400 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_div);
401 break;
402 case IR_POW:
403 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_pow);
404 break;
405 case IR_FPMATH:
406 hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_floor + ir->op2);
407 break;
408 case IR_LDEXP:
409 hi = split_call_li(J, hisubst, oir, ir, IRCALL_ldexp);
410 break;
411 case IR_NEG: case IR_ABS:
412 nir->o = IR_CONV; /* Pass through loword. */
413 nir->op2 = (IRT_INT << 5) | IRT_INT;
414 hi = split_emit(J, IRT(ir->o == IR_NEG ? IR_BXOR : IR_BAND, IRT_SOFTFP),
415 hisubst[ir->op1],
416 lj_ir_kint(J, (int32_t)(0x7fffffffu + (ir->o == IR_NEG))));
417 break;
418 case IR_SLOAD:
419 if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from int to number. */
420 nir->op2 &= ~IRSLOAD_CONVERT;
421 ir->prev = nref = split_emit(J, IRTI(IR_CALLN), nref,
422 IRCALL_softfp_i2d);
423 hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
424 break;
426 /* fallthrough */
427 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
428 case IR_STRTO:
429 hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
430 break;
431 case IR_FLOAD:
432 lj_assertJ(ir->op1 == REF_NIL, "expected FLOAD from GG_State");
433 hi = lj_ir_kint(J, *(int32_t*)((char*)J2GG(J) + ir->op2 + LJ_LE*4));
434 nir->op2 += LJ_BE*4;
435 break;
436 case IR_XLOAD: {
437 IRIns inslo = *nir; /* Save/undo the emit of the lo XLOAD. */
438 J->cur.nins--;
439 hi = split_ptr(J, oir, ir->op1); /* Insert the hiref ADD. */
440 #if LJ_BE
441 hi = split_emit(J, IRT(IR_XLOAD, IRT_INT), hi, ir->op2);
442 inslo.t.irt = IRT_SOFTFP | (inslo.t.irt & IRT_GUARD);
443 #endif
444 nref = lj_ir_nextins(J);
445 nir = IR(nref);
446 *nir = inslo; /* Re-emit lo XLOAD. */
447 #if LJ_LE
448 hi = split_emit(J, IRT(IR_XLOAD, IRT_SOFTFP), hi, ir->op2);
449 ir->prev = nref;
450 #else
451 ir->prev = hi; hi = nref;
452 #endif
453 break;
455 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_XSTORE:
456 split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nir->op1, hisubst[ir->op2]);
457 break;
458 case IR_CONV: { /* Conversion to number. Others handled below. */
459 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
460 UNUSED(st);
461 #if LJ_32 && LJ_HASFFI
462 if (st == IRT_I64 || st == IRT_U64) {
463 hi = split_call_l(J, hisubst, oir, ir,
464 st == IRT_I64 ? IRCALL_fp64_l2d : IRCALL_fp64_ul2d);
465 break;
467 #endif
468 lj_assertJ(st == IRT_INT ||
469 (LJ_32 && LJ_HASFFI && (st == IRT_U32 || st == IRT_FLOAT)),
470 "bad source type for CONV");
471 nir->o = IR_CALLN;
472 #if LJ_32 && LJ_HASFFI
473 nir->op2 = st == IRT_INT ? IRCALL_softfp_i2d :
474 st == IRT_FLOAT ? IRCALL_softfp_f2d :
475 IRCALL_softfp_ui2d;
476 #else
477 nir->op2 = IRCALL_softfp_i2d;
478 #endif
479 hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
480 break;
482 case IR_CALLN:
483 case IR_CALLL:
484 case IR_CALLS:
485 case IR_CALLXS:
486 goto split_call;
487 case IR_PHI:
488 if (nir->op1 == nir->op2)
489 J->cur.nins--; /* Drop useless PHIs. */
490 if (hisubst[ir->op1] != hisubst[ir->op2])
491 split_emit(J, IRT(IR_PHI, IRT_SOFTFP),
492 hisubst[ir->op1], hisubst[ir->op2]);
493 break;
494 case IR_HIOP:
495 J->cur.nins--; /* Drop joining HIOP. */
496 ir->prev = nir->op1;
497 hi = nir->op2;
498 break;
499 default:
500 lj_assertJ(ir->o <= IR_NE || ir->o == IR_MIN || ir->o == IR_MAX,
501 "bad IR op %d", ir->o);
502 hi = split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP),
503 hisubst[ir->op1], hisubst[ir->op2]);
504 break;
506 } else
507 #endif
508 #if LJ_32 && LJ_HASFFI
509 if (irt_isint64(ir->t)) {
510 IRRef hiref = hisubst[ir->op1];
511 nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */
512 switch (ir->o) {
513 case IR_ADD:
514 case IR_SUB:
515 /* Use plain op for hiword if loword cannot produce a carry/borrow. */
516 if (irref_isk(nir->op2) && IR(nir->op2)->i == 0) {
517 ir->prev = nir->op1; /* Pass through loword. */
518 nir->op1 = hiref; nir->op2 = hisubst[ir->op2];
519 hi = nref;
520 break;
522 /* fallthrough */
523 case IR_NEG:
524 hi = split_emit(J, IRTI(IR_HIOP), hiref, hisubst[ir->op2]);
525 break;
526 case IR_MUL:
527 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_lj_carith_mul64);
528 break;
529 case IR_DIV:
530 hi = split_call_ll(J, hisubst, oir, ir,
531 irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
532 IRCALL_lj_carith_divu64);
533 break;
534 case IR_MOD:
535 hi = split_call_ll(J, hisubst, oir, ir,
536 irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
537 IRCALL_lj_carith_modu64);
538 break;
539 case IR_POW:
540 hi = split_call_ll(J, hisubst, oir, ir,
541 irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
542 IRCALL_lj_carith_powu64);
543 break;
544 case IR_BNOT:
545 hi = split_emit(J, IRTI(IR_BNOT), hiref, 0);
546 break;
547 case IR_BSWAP:
548 ir->prev = split_emit(J, IRTI(IR_BSWAP), hiref, 0);
549 hi = nref;
550 break;
551 case IR_BAND: case IR_BOR: case IR_BXOR:
552 hi = split_bitop(J, hisubst, nir, ir);
553 break;
554 case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR:
555 hi = split_bitshift(J, hisubst, oir, nir, ir);
556 break;
557 case IR_FLOAD:
558 lj_assertJ(ir->op2 == IRFL_CDATA_INT64, "only INT64 supported");
559 hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64_4);
560 #if LJ_BE
561 ir->prev = hi; hi = nref;
562 #endif
563 break;
564 case IR_XLOAD:
565 hi = split_emit(J, IRTI(IR_XLOAD), split_ptr(J, oir, ir->op1), ir->op2);
566 #if LJ_BE
567 ir->prev = hi; hi = nref;
568 #endif
569 break;
570 case IR_XSTORE:
571 split_emit(J, IRTI(IR_HIOP), nir->op1, hisubst[ir->op2]);
572 break;
573 case IR_CONV: { /* Conversion to 64 bit integer. Others handled below. */
574 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
575 #if LJ_SOFTFP
576 if (st == IRT_NUM) { /* NUM to 64 bit int conv. */
577 hi = split_call_l(J, hisubst, oir, ir,
578 irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul);
579 } else if (st == IRT_FLOAT) { /* FLOAT to 64 bit int conv. */
580 nir->o = IR_CALLN;
581 nir->op2 = irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul;
582 hi = split_emit(J, IRTI(IR_HIOP), nref, nref);
584 #else
585 if (st == IRT_NUM || st == IRT_FLOAT) { /* FP to 64 bit int conv. */
586 hi = split_emit(J, IRTI(IR_HIOP), nir->op1, nref);
588 #endif
589 else if (st == IRT_I64 || st == IRT_U64) { /* 64/64 bit cast. */
590 /* Drop cast, since assembler doesn't care. But fwd both parts. */
591 hi = hiref;
592 goto fwdlo;
593 } else if ((ir->op2 & IRCONV_SEXT)) { /* Sign-extend to 64 bit. */
594 IRRef k31 = lj_ir_kint(J, 31);
595 nir = IR(nref); /* May have been reallocated. */
596 ir->prev = nir->op1; /* Pass through loword. */
597 nir->o = IR_BSAR; /* hi = bsar(lo, 31). */
598 nir->op2 = k31;
599 hi = nref;
600 } else { /* Zero-extend to 64 bit. */
601 hi = lj_ir_kint(J, 0);
602 goto fwdlo;
604 break;
606 case IR_CALLXS:
607 goto split_call;
608 case IR_PHI: {
609 IRRef hiref2;
610 if ((irref_isk(nir->op1) && irref_isk(nir->op2)) ||
611 nir->op1 == nir->op2)
612 J->cur.nins--; /* Drop useless PHIs. */
613 hiref2 = hisubst[ir->op2];
614 if (!((irref_isk(hiref) && irref_isk(hiref2)) || hiref == hiref2))
615 split_emit(J, IRTI(IR_PHI), hiref, hiref2);
616 break;
618 case IR_HIOP:
619 J->cur.nins--; /* Drop joining HIOP. */
620 ir->prev = nir->op1;
621 hi = nir->op2;
622 break;
623 default:
624 lj_assertJ(ir->o <= IR_NE, "bad IR op %d", ir->o); /* Comparisons. */
625 split_emit(J, IRTGI(IR_HIOP), hiref, hisubst[ir->op2]);
626 break;
628 } else
629 #endif
630 #if LJ_SOFTFP
631 if (ir->o == IR_SLOAD) {
632 if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from number to int. */
633 nir->op2 &= ~IRSLOAD_CONVERT;
634 if (!(nir->op2 & IRSLOAD_TYPECHECK))
635 nir->t.irt = IRT_INT; /* Drop guard. */
636 split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
637 ir->prev = split_num2int(J, nref, nref+1, irt_isguard(ir->t));
639 } else if (ir->o == IR_TOBIT) {
640 IRRef tmp, op1 = ir->op1;
641 J->cur.nins--;
642 #if LJ_LE
643 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
644 #else
645 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
646 #endif
647 ir->prev = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_lj_vm_tobit);
648 } else if (ir->o == IR_TOSTR || ir->o == IR_TMPREF) {
649 if (hisubst[ir->op1]) {
650 if (irref_isk(ir->op1))
651 nir->op1 = ir->op1;
652 else
653 split_emit(J, IRT(IR_HIOP, IRT_NIL), hisubst[ir->op1], nref);
655 } else if (ir->o == IR_HREF || ir->o == IR_NEWREF) {
656 if (irref_isk(ir->op2) && hisubst[ir->op2])
657 nir->op2 = ir->op2;
658 } else
659 #endif
660 if (ir->o == IR_CONV) { /* See above, too. */
661 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
662 #if LJ_32 && LJ_HASFFI
663 if (st == IRT_I64 || st == IRT_U64) { /* Conversion from 64 bit int. */
664 #if LJ_SOFTFP
665 if (irt_isfloat(ir->t)) {
666 split_call_l(J, hisubst, oir, ir,
667 st == IRT_I64 ? IRCALL_fp64_l2f : IRCALL_fp64_ul2f);
668 J->cur.nins--; /* Drop unused HIOP. */
670 #else
671 if (irt_isfp(ir->t)) { /* 64 bit integer to FP conversion. */
672 ir->prev = split_emit(J, IRT(IR_HIOP, irt_type(ir->t)),
673 hisubst[ir->op1], nref);
675 #endif
676 else { /* Truncate to lower 32 bits. */
677 fwdlo:
678 ir->prev = nir->op1; /* Forward loword. */
679 /* Replace with NOP to avoid messing up the snapshot logic. */
680 nir->ot = IRT(IR_NOP, IRT_NIL);
681 nir->op1 = nir->op2 = 0;
684 #endif
685 #if LJ_SOFTFP && LJ_32 && LJ_HASFFI
686 else if (irt_isfloat(ir->t)) {
687 if (st == IRT_NUM) {
688 split_call_l(J, hisubst, oir, ir, IRCALL_softfp_d2f);
689 J->cur.nins--; /* Drop unused HIOP. */
690 } else {
691 nir->o = IR_CALLN;
692 nir->op2 = st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f;
694 } else if (st == IRT_FLOAT) {
695 nir->o = IR_CALLN;
696 nir->op2 = irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui;
697 } else
698 #endif
699 #if LJ_SOFTFP
700 if (st == IRT_NUM || (LJ_32 && LJ_HASFFI && st == IRT_FLOAT)) {
701 if (irt_isguard(ir->t)) {
702 lj_assertJ(st == IRT_NUM && irt_isint(ir->t), "bad CONV types");
703 J->cur.nins--;
704 ir->prev = split_num2int(J, nir->op1, hisubst[ir->op1], 1);
705 } else {
706 split_call_l(J, hisubst, oir, ir,
707 #if LJ_32 && LJ_HASFFI
708 st == IRT_NUM ?
709 (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) :
710 (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui)
711 #else
712 IRCALL_softfp_d2i
713 #endif
715 J->cur.nins--; /* Drop unused HIOP. */
718 #endif
719 } else if (ir->o == IR_CALLXS) {
720 IRRef hiref;
721 split_call:
722 hiref = hisubst[ir->op1];
723 if (hiref) {
724 IROpT ot = nir->ot;
725 IRRef op2 = nir->op2;
726 nir->ot = IRT(IR_CARG, IRT_NIL);
727 #if LJ_LE
728 nir->op2 = hiref;
729 #else
730 nir->op2 = nir->op1; nir->op1 = hiref;
731 #endif
732 ir->prev = nref = split_emit(J, ot, nref, op2);
734 if (LJ_SOFTFP ? irt_is64(ir->t) : irt_isint64(ir->t))
735 hi = split_emit(J,
736 IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT),
737 nref, nref);
738 } else if (ir->o == IR_CARG) {
739 IRRef hiref = hisubst[ir->op1];
740 if (hiref) {
741 IRRef op2 = nir->op2;
742 #if LJ_LE
743 nir->op2 = hiref;
744 #else
745 nir->op2 = nir->op1; nir->op1 = hiref;
746 #endif
747 ir->prev = nref = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, op2);
748 nir = IR(nref);
750 hiref = hisubst[ir->op2];
751 if (hiref) {
752 #if !LJ_TARGET_X86
753 int carg = 0;
754 IRIns *cir;
755 for (cir = IR(nir->op1); cir->o == IR_CARG; cir = IR(cir->op1))
756 carg++;
757 if ((carg & 1) == 0) { /* Align 64 bit arguments. */
758 IRRef op2 = nir->op2;
759 nir->op2 = REF_NIL;
760 nref = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, op2);
761 nir = IR(nref);
763 #endif
764 #if LJ_BE
765 { IRRef tmp = nir->op2; nir->op2 = hiref; hiref = tmp; }
766 #endif
767 ir->prev = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, hiref);
769 } else if (ir->o == IR_CNEWI) {
770 if (hisubst[ir->op2])
771 split_emit(J, IRT(IR_HIOP, IRT_NIL), nref, hisubst[ir->op2]);
772 } else if (ir->o == IR_LOOP) {
773 J->loopref = nref; /* Needed by assembler. */
775 hisubst[ref] = hi; /* Store hiword substitution. */
777 if (snref == nins) { /* Substitution for last snapshot. */
778 snap->ref = J->cur.nins;
779 split_subst_snap(J, snap, oir);
782 /* Add PHI marks. */
783 for (ref = J->cur.nins-1; ref >= REF_FIRST; ref--) {
784 IRIns *ir = IR(ref);
785 if (ir->o != IR_PHI) break;
786 if (!irref_isk(ir->op1)) irt_setphi(IR(ir->op1)->t);
787 if (ir->op2 > J->loopref) irt_setphi(IR(ir->op2)->t);
791 /* Protected callback for split pass. */
792 static TValue *cpsplit(lua_State *L, lua_CFunction dummy, void *ud)
794 jit_State *J = (jit_State *)ud;
795 split_ir(J);
796 UNUSED(L); UNUSED(dummy);
797 return NULL;
800 #if defined(LUA_USE_ASSERT) || LJ_SOFTFP
801 /* Slow, but sure way to check whether a SPLIT pass is needed. */
802 static int split_needsplit(jit_State *J)
804 IRIns *ir, *irend;
805 IRRef ref;
806 for (ir = IR(REF_FIRST), irend = IR(J->cur.nins); ir < irend; ir++)
807 if (LJ_SOFTFP ? irt_is64orfp(ir->t) : irt_isint64(ir->t))
808 return 1;
809 if (LJ_SOFTFP) {
810 for (ref = J->chain[IR_SLOAD]; ref; ref = IR(ref)->prev)
811 if ((IR(ref)->op2 & IRSLOAD_CONVERT))
812 return 1;
813 if (J->chain[IR_TOBIT])
814 return 1;
816 for (ref = J->chain[IR_CONV]; ref; ref = IR(ref)->prev) {
817 IRType st = (IR(ref)->op2 & IRCONV_SRCMASK);
818 if ((LJ_SOFTFP && (st == IRT_NUM || st == IRT_FLOAT)) ||
819 st == IRT_I64 || st == IRT_U64)
820 return 1;
822 return 0; /* Nope. */
824 #endif
826 /* SPLIT pass. */
827 void lj_opt_split(jit_State *J)
829 #if LJ_SOFTFP
830 if (!J->needsplit)
831 J->needsplit = split_needsplit(J);
832 #else
833 lj_assertJ(J->needsplit >= split_needsplit(J), "bad SPLIT state");
834 #endif
835 if (J->needsplit) {
836 int errcode = lj_vm_cpcall(J->L, NULL, J, cpsplit);
837 if (errcode) {
838 /* Completely reset the trace to avoid inconsistent dump on abort. */
839 J->cur.nins = J->cur.nk = REF_BASE;
840 J->cur.nsnap = 0;
841 lj_err_throw(J->L, errcode); /* Propagate errors. */
846 #undef IR
848 #endif