beta-0.89.2
[luatex.git] / source / libs / luajit / LuaJIT-src / src / lj_opt_split.c
blob81ded6c0a01c045ae0f68d81811d9e49a76f08d3
1 /*
2 ** SPLIT: Split 64 bit IR instructions into 32 bit IR instructions.
3 ** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
4 */
6 #define lj_opt_split_c
7 #define LUA_CORE
9 #include "lj_obj.h"
11 #if LJ_HASJIT && (LJ_SOFTFP || (LJ_32 && LJ_HASFFI))
13 #include "lj_err.h"
14 #include "lj_buf.h"
15 #include "lj_ir.h"
16 #include "lj_jit.h"
17 #include "lj_ircall.h"
18 #include "lj_iropt.h"
19 #include "lj_vm.h"
21 /* SPLIT pass:
23 ** This pass splits up 64 bit IR instructions into multiple 32 bit IR
24 ** instructions. It's only active for soft-float targets or for 32 bit CPUs
25 ** which lack native 64 bit integer operations (the FFI is currently the
26 ** only emitter for 64 bit integer instructions).
28 ** Splitting the IR in a separate pass keeps each 32 bit IR assembler
29 ** backend simple. Only a small amount of extra functionality needs to be
30 ** implemented. This is much easier than adding support for allocating
31 ** register pairs to each backend (believe me, I tried). A few simple, but
32 ** important optimizations can be performed by the SPLIT pass, which would
33 ** be tedious to do in the backend.
35 ** The basic idea is to replace each 64 bit IR instruction with its 32 bit
36 ** equivalent plus an extra HIOP instruction. The splitted IR is not passed
37 ** through FOLD or any other optimizations, so each HIOP is guaranteed to
38 ** immediately follow it's counterpart. The actual functionality of HIOP is
39 ** inferred from the previous instruction.
41 ** The operands of HIOP hold the hiword input references. The output of HIOP
42 ** is the hiword output reference, which is also used to hold the hiword
43 ** register or spill slot information. The register allocator treats this
44 ** instruction independently of any other instruction, which improves code
45 ** quality compared to using fixed register pairs.
47 ** It's easier to split up some instructions into two regular 32 bit
48 ** instructions. E.g. XLOAD is split up into two XLOADs with two different
49 ** addresses. Obviously 64 bit constants need to be split up into two 32 bit
50 ** constants, too. Some hiword instructions can be entirely omitted, e.g.
51 ** when zero-extending a 32 bit value to 64 bits. 64 bit arguments for calls
52 ** are split up into two 32 bit arguments each.
54 ** On soft-float targets, floating-point instructions are directly converted
55 ** to soft-float calls by the SPLIT pass (except for comparisons and MIN/MAX).
56 ** HIOP for number results has the type IRT_SOFTFP ("sfp" in -jdump).
58 ** Here's the IR and x64 machine code for 'x.b = x.a + 1' for a struct with
59 ** two int64_t fields:
61 ** 0100 p32 ADD base +8
62 ** 0101 i64 XLOAD 0100
63 ** 0102 i64 ADD 0101 +1
64 ** 0103 p32 ADD base +16
65 ** 0104 i64 XSTORE 0103 0102
67 ** mov rax, [esi+0x8]
68 ** add rax, +0x01
69 ** mov [esi+0x10], rax
71 ** Here's the transformed IR and the x86 machine code after the SPLIT pass:
73 ** 0100 p32 ADD base +8
74 ** 0101 int XLOAD 0100
75 ** 0102 p32 ADD base +12
76 ** 0103 int XLOAD 0102
77 ** 0104 int ADD 0101 +1
78 ** 0105 int HIOP 0103 +0
79 ** 0106 p32 ADD base +16
80 ** 0107 int XSTORE 0106 0104
81 ** 0108 int HIOP 0106 0105
83 ** mov eax, [esi+0x8]
84 ** mov ecx, [esi+0xc]
85 ** add eax, +0x01
86 ** adc ecx, +0x00
87 ** mov [esi+0x10], eax
88 ** mov [esi+0x14], ecx
90 ** You may notice the reassociated hiword address computation, which is
91 ** later fused into the mov operands by the assembler.
94 /* Some local macros to save typing. Undef'd at the end. */
95 #define IR(ref) (&J->cur.ir[(ref)])
97 /* Directly emit the transformed IR without updating chains etc. */
98 static IRRef split_emit(jit_State *J, uint16_t ot, IRRef1 op1, IRRef1 op2)
100 IRRef nref = lj_ir_nextins(J);
101 IRIns *ir = IR(nref);
102 ir->ot = ot;
103 ir->op1 = op1;
104 ir->op2 = op2;
105 return nref;
108 #if LJ_SOFTFP
109 /* Emit a (checked) number to integer conversion. */
110 static IRRef split_num2int(jit_State *J, IRRef lo, IRRef hi, int check)
112 IRRef tmp, res;
113 #if LJ_LE
114 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), lo, hi);
115 #else
116 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hi, lo);
117 #endif
118 res = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_softfp_d2i);
119 if (check) {
120 tmp = split_emit(J, IRTI(IR_CALLN), res, IRCALL_softfp_i2d);
121 split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
122 split_emit(J, IRTGI(IR_EQ), tmp, lo);
123 split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP), tmp+1, hi);
125 return res;
128 /* Emit a CALLN with one split 64 bit argument. */
129 static IRRef split_call_l(jit_State *J, IRRef1 *hisubst, IRIns *oir,
130 IRIns *ir, IRCallID id)
132 IRRef tmp, op1 = ir->op1;
133 J->cur.nins--;
134 #if LJ_LE
135 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
136 #else
137 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
138 #endif
139 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
140 return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
142 #endif
144 /* Emit a CALLN with one split 64 bit argument and a 32 bit argument. */
145 static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir,
146 IRIns *ir, IRCallID id)
148 IRRef tmp, op1 = ir->op1, op2 = ir->op2;
149 J->cur.nins--;
150 #if LJ_LE
151 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
152 #else
153 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
154 #endif
155 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev);
156 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
157 return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
160 /* Emit a CALLN with two split 64 bit arguments. */
161 static IRRef split_call_ll(jit_State *J, IRRef1 *hisubst, IRIns *oir,
162 IRIns *ir, IRCallID id)
164 IRRef tmp, op1 = ir->op1, op2 = ir->op2;
165 J->cur.nins--;
166 #if LJ_LE
167 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
168 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev);
169 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, hisubst[op2]);
170 #else
171 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
172 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, hisubst[op2]);
173 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev);
174 #endif
175 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
176 return split_emit(J,
177 IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT),
178 tmp, tmp);
181 /* Get a pointer to the other 32 bit word (LE: hiword, BE: loword). */
182 static IRRef split_ptr(jit_State *J, IRIns *oir, IRRef ref)
184 IRRef nref = oir[ref].prev;
185 IRIns *ir = IR(nref);
186 int32_t ofs = 4;
187 if (ir->o == IR_KPTR)
188 return lj_ir_kptr(J, (char *)ir_kptr(ir) + ofs);
189 if (ir->o == IR_ADD && irref_isk(ir->op2) && !irt_isphi(oir[ref].t)) {
190 /* Reassociate address. */
191 ofs += IR(ir->op2)->i;
192 nref = ir->op1;
193 if (ofs == 0) return nref;
195 return split_emit(J, IRTI(IR_ADD), nref, lj_ir_kint(J, ofs));
198 #if LJ_HASFFI
199 static IRRef split_bitshift(jit_State *J, IRRef1 *hisubst,
200 IRIns *oir, IRIns *nir, IRIns *ir)
202 IROp op = ir->o;
203 IRRef kref = nir->op2;
204 if (irref_isk(kref)) { /* Optimize constant shifts. */
205 int32_t k = (IR(kref)->i & 63);
206 IRRef lo = nir->op1, hi = hisubst[ir->op1];
207 if (op == IR_BROL || op == IR_BROR) {
208 if (op == IR_BROR) k = (-k & 63);
209 if (k >= 32) { IRRef t = lo; lo = hi; hi = t; k -= 32; }
210 if (k == 0) {
211 passthrough:
212 J->cur.nins--;
213 ir->prev = lo;
214 return hi;
215 } else {
216 TRef k1, k2;
217 IRRef t1, t2, t3, t4;
218 J->cur.nins--;
219 k1 = lj_ir_kint(J, k);
220 k2 = lj_ir_kint(J, (-k & 31));
221 t1 = split_emit(J, IRTI(IR_BSHL), lo, k1);
222 t2 = split_emit(J, IRTI(IR_BSHL), hi, k1);
223 t3 = split_emit(J, IRTI(IR_BSHR), lo, k2);
224 t4 = split_emit(J, IRTI(IR_BSHR), hi, k2);
225 ir->prev = split_emit(J, IRTI(IR_BOR), t1, t4);
226 return split_emit(J, IRTI(IR_BOR), t2, t3);
228 } else if (k == 0) {
229 goto passthrough;
230 } else if (k < 32) {
231 if (op == IR_BSHL) {
232 IRRef t1 = split_emit(J, IRTI(IR_BSHL), hi, kref);
233 IRRef t2 = split_emit(J, IRTI(IR_BSHR), lo, lj_ir_kint(J, (-k&31)));
234 return split_emit(J, IRTI(IR_BOR), t1, t2);
235 } else {
236 IRRef t1 = ir->prev, t2;
237 lua_assert(op == IR_BSHR || op == IR_BSAR);
238 nir->o = IR_BSHR;
239 t2 = split_emit(J, IRTI(IR_BSHL), hi, lj_ir_kint(J, (-k&31)));
240 ir->prev = split_emit(J, IRTI(IR_BOR), t1, t2);
241 return split_emit(J, IRTI(op), hi, kref);
243 } else {
244 if (op == IR_BSHL) {
245 if (k == 32)
246 J->cur.nins--;
247 else
248 lo = ir->prev;
249 ir->prev = lj_ir_kint(J, 0);
250 return lo;
251 } else {
252 lua_assert(op == IR_BSHR || op == IR_BSAR);
253 if (k == 32) {
254 J->cur.nins--;
255 ir->prev = hi;
256 } else {
257 nir->op1 = hi;
259 if (op == IR_BSHR)
260 return lj_ir_kint(J, 0);
261 else
262 return split_emit(J, IRTI(IR_BSAR), hi, lj_ir_kint(J, 31));
266 return split_call_li(J, hisubst, oir, ir,
267 op - IR_BSHL + IRCALL_lj_carith_shl64);
270 static IRRef split_bitop(jit_State *J, IRRef1 *hisubst,
271 IRIns *nir, IRIns *ir)
273 IROp op = ir->o;
274 IRRef hi, kref = nir->op2;
275 if (irref_isk(kref)) { /* Optimize bit operations with lo constant. */
276 int32_t k = IR(kref)->i;
277 if (k == 0 || k == -1) {
278 if (op == IR_BAND) k = ~k;
279 if (k == 0) {
280 J->cur.nins--;
281 ir->prev = nir->op1;
282 } else if (op == IR_BXOR) {
283 nir->o = IR_BNOT;
284 nir->op2 = 0;
285 } else {
286 J->cur.nins--;
287 ir->prev = kref;
291 hi = hisubst[ir->op1];
292 kref = hisubst[ir->op2];
293 if (irref_isk(kref)) { /* Optimize bit operations with hi constant. */
294 int32_t k = IR(kref)->i;
295 if (k == 0 || k == -1) {
296 if (op == IR_BAND) k = ~k;
297 if (k == 0) {
298 return hi;
299 } else if (op == IR_BXOR) {
300 return split_emit(J, IRTI(IR_BNOT), hi, 0);
301 } else {
302 return kref;
306 return split_emit(J, IRTI(op), hi, kref);
308 #endif
310 /* Substitute references of a snapshot. */
311 static void split_subst_snap(jit_State *J, SnapShot *snap, IRIns *oir)
313 SnapEntry *map = &J->cur.snapmap[snap->mapofs];
314 MSize n, nent = snap->nent;
315 for (n = 0; n < nent; n++) {
316 SnapEntry sn = map[n];
317 IRIns *ir = &oir[snap_ref(sn)];
318 if (!(LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && irref_isk(snap_ref(sn))))
319 map[n] = ((sn & 0xffff0000) | ir->prev);
323 /* Transform the old IR to the new IR. */
324 static void split_ir(jit_State *J)
326 IRRef nins = J->cur.nins, nk = J->cur.nk;
327 MSize irlen = nins - nk;
328 MSize need = (irlen+1)*(sizeof(IRIns) + sizeof(IRRef1));
329 IRIns *oir = (IRIns *)lj_buf_tmp(J->L, need);
330 IRRef1 *hisubst;
331 IRRef ref, snref;
332 SnapShot *snap;
334 /* Copy old IR to buffer. */
335 memcpy(oir, IR(nk), irlen*sizeof(IRIns));
336 /* Bias hiword substitution table and old IR. Loword kept in field prev. */
337 hisubst = (IRRef1 *)&oir[irlen] - nk;
338 oir -= nk;
340 /* Remove all IR instructions, but retain IR constants. */
341 J->cur.nins = REF_FIRST;
342 J->loopref = 0;
344 /* Process constants and fixed references. */
345 for (ref = nk; ref <= REF_BASE; ref++) {
346 IRIns *ir = &oir[ref];
347 if ((LJ_SOFTFP && ir->o == IR_KNUM) || ir->o == IR_KINT64) {
348 /* Split up 64 bit constant. */
349 TValue tv = *ir_k64(ir);
350 ir->prev = lj_ir_kint(J, (int32_t)tv.u32.lo);
351 hisubst[ref] = lj_ir_kint(J, (int32_t)tv.u32.hi);
352 } else {
353 ir->prev = ref; /* Identity substitution for loword. */
354 hisubst[ref] = 0;
358 /* Process old IR instructions. */
359 snap = J->cur.snap;
360 snref = snap->ref;
361 for (ref = REF_FIRST; ref < nins; ref++) {
362 IRIns *ir = &oir[ref];
363 IRRef nref = lj_ir_nextins(J);
364 IRIns *nir = IR(nref);
365 IRRef hi = 0;
367 if (ref >= snref) {
368 snap->ref = nref;
369 split_subst_snap(J, snap++, oir);
370 snref = snap < &J->cur.snap[J->cur.nsnap] ? snap->ref : ~(IRRef)0;
373 /* Copy-substitute old instruction to new instruction. */
374 nir->op1 = ir->op1 < nk ? ir->op1 : oir[ir->op1].prev;
375 nir->op2 = ir->op2 < nk ? ir->op2 : oir[ir->op2].prev;
376 ir->prev = nref; /* Loword substitution. */
377 nir->o = ir->o;
378 nir->t.irt = ir->t.irt & ~(IRT_MARK|IRT_ISPHI);
379 hisubst[ref] = 0;
381 /* Split 64 bit instructions. */
382 #if LJ_SOFTFP
383 if (irt_isnum(ir->t)) {
384 nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */
385 /* Note: hi ref = lo ref + 1! Required for SNAP_SOFTFPNUM logic. */
386 switch (ir->o) {
387 case IR_ADD:
388 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_add);
389 break;
390 case IR_SUB:
391 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_sub);
392 break;
393 case IR_MUL:
394 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_mul);
395 break;
396 case IR_DIV:
397 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_div);
398 break;
399 case IR_POW:
400 hi = split_call_li(J, hisubst, oir, ir, IRCALL_lj_vm_powi);
401 break;
402 case IR_FPMATH:
403 /* Try to rejoin pow from EXP2, MUL and LOG2. */
404 if (nir->op2 == IRFPM_EXP2 && nir->op1 > J->loopref) {
405 IRIns *irp = IR(nir->op1);
406 if (irp->o == IR_CALLN && irp->op2 == IRCALL_softfp_mul) {
407 IRIns *irm4 = IR(irp->op1);
408 IRIns *irm3 = IR(irm4->op1);
409 IRIns *irm12 = IR(irm3->op1);
410 IRIns *irl1 = IR(irm12->op1);
411 if (irm12->op1 > J->loopref && irl1->o == IR_CALLN &&
412 irl1->op2 == IRCALL_lj_vm_log2) {
413 IRRef tmp = irl1->op1; /* Recycle first two args from LOG2. */
414 IRRef arg3 = irm3->op2, arg4 = irm4->op2;
415 J->cur.nins--;
416 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg3);
417 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg4);
418 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_pow);
419 hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
420 break;
424 hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_floor + ir->op2);
425 break;
426 case IR_ATAN2:
427 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_atan2);
428 break;
429 case IR_LDEXP:
430 hi = split_call_li(J, hisubst, oir, ir, IRCALL_ldexp);
431 break;
432 case IR_NEG: case IR_ABS:
433 nir->o = IR_CONV; /* Pass through loword. */
434 nir->op2 = (IRT_INT << 5) | IRT_INT;
435 hi = split_emit(J, IRT(ir->o == IR_NEG ? IR_BXOR : IR_BAND, IRT_SOFTFP),
436 hisubst[ir->op1], hisubst[ir->op2]);
437 break;
438 case IR_SLOAD:
439 if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from int to number. */
440 nir->op2 &= ~IRSLOAD_CONVERT;
441 ir->prev = nref = split_emit(J, IRTI(IR_CALLN), nref,
442 IRCALL_softfp_i2d);
443 hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
444 break;
446 /* fallthrough */
447 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
448 case IR_STRTO:
449 hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
450 break;
451 case IR_XLOAD: {
452 IRIns inslo = *nir; /* Save/undo the emit of the lo XLOAD. */
453 J->cur.nins--;
454 hi = split_ptr(J, oir, ir->op1); /* Insert the hiref ADD. */
455 nref = lj_ir_nextins(J);
456 nir = IR(nref);
457 *nir = inslo; /* Re-emit lo XLOAD immediately before hi XLOAD. */
458 hi = split_emit(J, IRT(IR_XLOAD, IRT_SOFTFP), hi, ir->op2);
459 #if LJ_LE
460 ir->prev = nref;
461 #else
462 ir->prev = hi; hi = nref;
463 #endif
464 break;
466 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_XSTORE:
467 split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nir->op1, hisubst[ir->op2]);
468 break;
469 case IR_CONV: { /* Conversion to number. Others handled below. */
470 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
471 UNUSED(st);
472 #if LJ_32 && LJ_HASFFI
473 if (st == IRT_I64 || st == IRT_U64) {
474 hi = split_call_l(J, hisubst, oir, ir,
475 st == IRT_I64 ? IRCALL_fp64_l2d : IRCALL_fp64_ul2d);
476 break;
478 #endif
479 lua_assert(st == IRT_INT ||
480 (LJ_32 && LJ_HASFFI && (st == IRT_U32 || st == IRT_FLOAT)));
481 nir->o = IR_CALLN;
482 #if LJ_32 && LJ_HASFFI
483 nir->op2 = st == IRT_INT ? IRCALL_softfp_i2d :
484 st == IRT_FLOAT ? IRCALL_softfp_f2d :
485 IRCALL_softfp_ui2d;
486 #else
487 nir->op2 = IRCALL_softfp_i2d;
488 #endif
489 hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
490 break;
492 case IR_CALLN:
493 case IR_CALLL:
494 case IR_CALLS:
495 case IR_CALLXS:
496 goto split_call;
497 case IR_PHI:
498 if (nir->op1 == nir->op2)
499 J->cur.nins--; /* Drop useless PHIs. */
500 if (hisubst[ir->op1] != hisubst[ir->op2])
501 split_emit(J, IRT(IR_PHI, IRT_SOFTFP),
502 hisubst[ir->op1], hisubst[ir->op2]);
503 break;
504 case IR_HIOP:
505 J->cur.nins--; /* Drop joining HIOP. */
506 ir->prev = nir->op1;
507 hi = nir->op2;
508 break;
509 default:
510 lua_assert(ir->o <= IR_NE || ir->o == IR_MIN || ir->o == IR_MAX);
511 hi = split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP),
512 hisubst[ir->op1], hisubst[ir->op2]);
513 break;
515 } else
516 #endif
517 #if LJ_32 && LJ_HASFFI
518 if (irt_isint64(ir->t)) {
519 IRRef hiref = hisubst[ir->op1];
520 nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */
521 switch (ir->o) {
522 case IR_ADD:
523 case IR_SUB:
524 /* Use plain op for hiword if loword cannot produce a carry/borrow. */
525 if (irref_isk(nir->op2) && IR(nir->op2)->i == 0) {
526 ir->prev = nir->op1; /* Pass through loword. */
527 nir->op1 = hiref; nir->op2 = hisubst[ir->op2];
528 hi = nref;
529 break;
531 /* fallthrough */
532 case IR_NEG:
533 hi = split_emit(J, IRTI(IR_HIOP), hiref, hisubst[ir->op2]);
534 break;
535 case IR_MUL:
536 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_lj_carith_mul64);
537 break;
538 case IR_DIV:
539 hi = split_call_ll(J, hisubst, oir, ir,
540 irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
541 IRCALL_lj_carith_divu64);
542 break;
543 case IR_MOD:
544 hi = split_call_ll(J, hisubst, oir, ir,
545 irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
546 IRCALL_lj_carith_modu64);
547 break;
548 case IR_POW:
549 hi = split_call_ll(J, hisubst, oir, ir,
550 irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
551 IRCALL_lj_carith_powu64);
552 break;
553 case IR_BNOT:
554 hi = split_emit(J, IRTI(IR_BNOT), hiref, 0);
555 break;
556 case IR_BSWAP:
557 ir->prev = split_emit(J, IRTI(IR_BSWAP), hiref, 0);
558 hi = nref;
559 break;
560 case IR_BAND: case IR_BOR: case IR_BXOR:
561 hi = split_bitop(J, hisubst, nir, ir);
562 break;
563 case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR:
564 hi = split_bitshift(J, hisubst, oir, nir, ir);
565 break;
566 case IR_FLOAD:
567 lua_assert(ir->op2 == IRFL_CDATA_INT64);
568 hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64_4);
569 #if LJ_BE
570 ir->prev = hi; hi = nref;
571 #endif
572 break;
573 case IR_XLOAD:
574 hi = split_emit(J, IRTI(IR_XLOAD), split_ptr(J, oir, ir->op1), ir->op2);
575 #if LJ_BE
576 ir->prev = hi; hi = nref;
577 #endif
578 break;
579 case IR_XSTORE:
580 split_emit(J, IRTI(IR_HIOP), nir->op1, hisubst[ir->op2]);
581 break;
582 case IR_CONV: { /* Conversion to 64 bit integer. Others handled below. */
583 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
584 #if LJ_SOFTFP
585 if (st == IRT_NUM) { /* NUM to 64 bit int conv. */
586 hi = split_call_l(J, hisubst, oir, ir,
587 irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul);
588 } else if (st == IRT_FLOAT) { /* FLOAT to 64 bit int conv. */
589 nir->o = IR_CALLN;
590 nir->op2 = irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul;
591 hi = split_emit(J, IRTI(IR_HIOP), nref, nref);
593 #else
594 if (st == IRT_NUM || st == IRT_FLOAT) { /* FP to 64 bit int conv. */
595 hi = split_emit(J, IRTI(IR_HIOP), nir->op1, nref);
597 #endif
598 else if (st == IRT_I64 || st == IRT_U64) { /* 64/64 bit cast. */
599 /* Drop cast, since assembler doesn't care. */
600 goto fwdlo;
601 } else if ((ir->op2 & IRCONV_SEXT)) { /* Sign-extend to 64 bit. */
602 IRRef k31 = lj_ir_kint(J, 31);
603 nir = IR(nref); /* May have been reallocated. */
604 ir->prev = nir->op1; /* Pass through loword. */
605 nir->o = IR_BSAR; /* hi = bsar(lo, 31). */
606 nir->op2 = k31;
607 hi = nref;
608 } else { /* Zero-extend to 64 bit. */
609 hi = lj_ir_kint(J, 0);
610 goto fwdlo;
612 break;
614 case IR_CALLXS:
615 goto split_call;
616 case IR_PHI: {
617 IRRef hiref2;
618 if ((irref_isk(nir->op1) && irref_isk(nir->op2)) ||
619 nir->op1 == nir->op2)
620 J->cur.nins--; /* Drop useless PHIs. */
621 hiref2 = hisubst[ir->op2];
622 if (!((irref_isk(hiref) && irref_isk(hiref2)) || hiref == hiref2))
623 split_emit(J, IRTI(IR_PHI), hiref, hiref2);
624 break;
626 case IR_HIOP:
627 J->cur.nins--; /* Drop joining HIOP. */
628 ir->prev = nir->op1;
629 hi = nir->op2;
630 break;
631 default:
632 lua_assert(ir->o <= IR_NE); /* Comparisons. */
633 split_emit(J, IRTGI(IR_HIOP), hiref, hisubst[ir->op2]);
634 break;
636 } else
637 #endif
638 #if LJ_SOFTFP
639 if (ir->o == IR_SLOAD) {
640 if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from number to int. */
641 nir->op2 &= ~IRSLOAD_CONVERT;
642 if (!(nir->op2 & IRSLOAD_TYPECHECK))
643 nir->t.irt = IRT_INT; /* Drop guard. */
644 split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
645 ir->prev = split_num2int(J, nref, nref+1, irt_isguard(ir->t));
647 } else if (ir->o == IR_TOBIT) {
648 IRRef tmp, op1 = ir->op1;
649 J->cur.nins--;
650 #if LJ_LE
651 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
652 #else
653 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
654 #endif
655 ir->prev = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_lj_vm_tobit);
656 } else if (ir->o == IR_TOSTR) {
657 if (hisubst[ir->op1]) {
658 if (irref_isk(ir->op1))
659 nir->op1 = ir->op1;
660 else
661 split_emit(J, IRT(IR_HIOP, IRT_NIL), hisubst[ir->op1], nref);
663 } else if (ir->o == IR_HREF || ir->o == IR_NEWREF) {
664 if (irref_isk(ir->op2) && hisubst[ir->op2])
665 nir->op2 = ir->op2;
666 } else
667 #endif
668 if (ir->o == IR_CONV) { /* See above, too. */
669 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
670 #if LJ_32 && LJ_HASFFI
671 if (st == IRT_I64 || st == IRT_U64) { /* Conversion from 64 bit int. */
672 #if LJ_SOFTFP
673 if (irt_isfloat(ir->t)) {
674 split_call_l(J, hisubst, oir, ir,
675 st == IRT_I64 ? IRCALL_fp64_l2f : IRCALL_fp64_ul2f);
676 J->cur.nins--; /* Drop unused HIOP. */
678 #else
679 if (irt_isfp(ir->t)) { /* 64 bit integer to FP conversion. */
680 ir->prev = split_emit(J, IRT(IR_HIOP, irt_type(ir->t)),
681 hisubst[ir->op1], nref);
683 #endif
684 else { /* Truncate to lower 32 bits. */
685 fwdlo:
686 ir->prev = nir->op1; /* Forward loword. */
687 /* Replace with NOP to avoid messing up the snapshot logic. */
688 nir->ot = IRT(IR_NOP, IRT_NIL);
689 nir->op1 = nir->op2 = 0;
692 #endif
693 #if LJ_SOFTFP && LJ_32 && LJ_HASFFI
694 else if (irt_isfloat(ir->t)) {
695 if (st == IRT_NUM) {
696 split_call_l(J, hisubst, oir, ir, IRCALL_softfp_d2f);
697 J->cur.nins--; /* Drop unused HIOP. */
698 } else {
699 nir->o = IR_CALLN;
700 nir->op2 = st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f;
702 } else if (st == IRT_FLOAT) {
703 nir->o = IR_CALLN;
704 nir->op2 = irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui;
705 } else
706 #endif
707 #if LJ_SOFTFP
708 if (st == IRT_NUM || (LJ_32 && LJ_HASFFI && st == IRT_FLOAT)) {
709 if (irt_isguard(ir->t)) {
710 lua_assert(st == IRT_NUM && irt_isint(ir->t));
711 J->cur.nins--;
712 ir->prev = split_num2int(J, nir->op1, hisubst[ir->op1], 1);
713 } else {
714 split_call_l(J, hisubst, oir, ir,
715 #if LJ_32 && LJ_HASFFI
716 st == IRT_NUM ?
717 (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) :
718 (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui)
719 #else
720 IRCALL_softfp_d2i
721 #endif
723 J->cur.nins--; /* Drop unused HIOP. */
726 #endif
727 } else if (ir->o == IR_CALLXS) {
728 IRRef hiref;
729 split_call:
730 hiref = hisubst[ir->op1];
731 if (hiref) {
732 IROpT ot = nir->ot;
733 IRRef op2 = nir->op2;
734 nir->ot = IRT(IR_CARG, IRT_NIL);
735 #if LJ_LE
736 nir->op2 = hiref;
737 #else
738 nir->op2 = nir->op1; nir->op1 = hiref;
739 #endif
740 ir->prev = nref = split_emit(J, ot, nref, op2);
742 if (LJ_SOFTFP ? irt_is64(ir->t) : irt_isint64(ir->t))
743 hi = split_emit(J,
744 IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT),
745 nref, nref);
746 } else if (ir->o == IR_CARG) {
747 IRRef hiref = hisubst[ir->op1];
748 if (hiref) {
749 IRRef op2 = nir->op2;
750 #if LJ_LE
751 nir->op2 = hiref;
752 #else
753 nir->op2 = nir->op1; nir->op1 = hiref;
754 #endif
755 ir->prev = nref = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, op2);
756 nir = IR(nref);
758 hiref = hisubst[ir->op2];
759 if (hiref) {
760 #if !LJ_TARGET_X86
761 int carg = 0;
762 IRIns *cir;
763 for (cir = IR(nir->op1); cir->o == IR_CARG; cir = IR(cir->op1))
764 carg++;
765 if ((carg & 1) == 0) { /* Align 64 bit arguments. */
766 IRRef op2 = nir->op2;
767 nir->op2 = REF_NIL;
768 nref = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, op2);
769 nir = IR(nref);
771 #endif
772 #if LJ_BE
773 { IRRef tmp = nir->op2; nir->op2 = hiref; hiref = tmp; }
774 #endif
775 ir->prev = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, hiref);
777 } else if (ir->o == IR_CNEWI) {
778 if (hisubst[ir->op2])
779 split_emit(J, IRT(IR_HIOP, IRT_NIL), nref, hisubst[ir->op2]);
780 } else if (ir->o == IR_LOOP) {
781 J->loopref = nref; /* Needed by assembler. */
783 hisubst[ref] = hi; /* Store hiword substitution. */
785 if (snref == nins) { /* Substitution for last snapshot. */
786 snap->ref = J->cur.nins;
787 split_subst_snap(J, snap, oir);
790 /* Add PHI marks. */
791 for (ref = J->cur.nins-1; ref >= REF_FIRST; ref--) {
792 IRIns *ir = IR(ref);
793 if (ir->o != IR_PHI) break;
794 if (!irref_isk(ir->op1)) irt_setphi(IR(ir->op1)->t);
795 if (ir->op2 > J->loopref) irt_setphi(IR(ir->op2)->t);
799 /* Protected callback for split pass. */
800 static TValue *cpsplit(lua_State *L, lua_CFunction dummy, void *ud)
802 jit_State *J = (jit_State *)ud;
803 split_ir(J);
804 UNUSED(L); UNUSED(dummy);
805 return NULL;
808 #if defined(LUA_USE_ASSERT) || LJ_SOFTFP
809 /* Slow, but sure way to check whether a SPLIT pass is needed. */
810 static int split_needsplit(jit_State *J)
812 IRIns *ir, *irend;
813 IRRef ref;
814 for (ir = IR(REF_FIRST), irend = IR(J->cur.nins); ir < irend; ir++)
815 if (LJ_SOFTFP ? irt_is64orfp(ir->t) : irt_isint64(ir->t))
816 return 1;
817 if (LJ_SOFTFP) {
818 for (ref = J->chain[IR_SLOAD]; ref; ref = IR(ref)->prev)
819 if ((IR(ref)->op2 & IRSLOAD_CONVERT))
820 return 1;
821 if (J->chain[IR_TOBIT])
822 return 1;
824 for (ref = J->chain[IR_CONV]; ref; ref = IR(ref)->prev) {
825 IRType st = (IR(ref)->op2 & IRCONV_SRCMASK);
826 if ((LJ_SOFTFP && (st == IRT_NUM || st == IRT_FLOAT)) ||
827 st == IRT_I64 || st == IRT_U64)
828 return 1;
830 return 0; /* Nope. */
832 #endif
834 /* SPLIT pass. */
835 void lj_opt_split(jit_State *J)
837 #if LJ_SOFTFP
838 if (!J->needsplit)
839 J->needsplit = split_needsplit(J);
840 #else
841 lua_assert(J->needsplit >= split_needsplit(J)); /* Verify flag. */
842 #endif
843 if (J->needsplit) {
844 int errcode = lj_vm_cpcall(J->L, NULL, J, cpsplit);
845 if (errcode) {
846 /* Completely reset the trace to avoid inconsistent dump on abort. */
847 J->cur.nins = J->cur.nk = REF_BASE;
848 J->cur.nsnap = 0;
849 lj_err_throw(J->L, errcode); /* Propagate errors. */
854 #undef IR
856 #endif