new beta-0.90.0
[luatex.git] / source / libs / luajit / LuaJIT-src / src / lj_opt_split.c
blob6def4161aa3343a7fae3bdb84ba56a52b5660b50
1 /*
2 ** SPLIT: Split 64 bit IR instructions into 32 bit IR instructions.
3 ** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
4 */
6 #define lj_opt_split_c
7 #define LUA_CORE
9 #include "lj_obj.h"
11 #if LJ_HASJIT && (LJ_SOFTFP || (LJ_32 && LJ_HASFFI))
13 #include "lj_err.h"
14 #include "lj_buf.h"
15 #include "lj_ir.h"
16 #include "lj_jit.h"
17 #include "lj_ircall.h"
18 #include "lj_iropt.h"
19 #include "lj_vm.h"
21 /* SPLIT pass:
23 ** This pass splits up 64 bit IR instructions into multiple 32 bit IR
24 ** instructions. It's only active for soft-float targets or for 32 bit CPUs
25 ** which lack native 64 bit integer operations (the FFI is currently the
26 ** only emitter for 64 bit integer instructions).
28 ** Splitting the IR in a separate pass keeps each 32 bit IR assembler
29 ** backend simple. Only a small amount of extra functionality needs to be
30 ** implemented. This is much easier than adding support for allocating
31 ** register pairs to each backend (believe me, I tried). A few simple, but
32 ** important optimizations can be performed by the SPLIT pass, which would
33 ** be tedious to do in the backend.
35 ** The basic idea is to replace each 64 bit IR instruction with its 32 bit
36 ** equivalent plus an extra HIOP instruction. The splitted IR is not passed
37 ** through FOLD or any other optimizations, so each HIOP is guaranteed to
38 ** immediately follow it's counterpart. The actual functionality of HIOP is
39 ** inferred from the previous instruction.
41 ** The operands of HIOP hold the hiword input references. The output of HIOP
42 ** is the hiword output reference, which is also used to hold the hiword
43 ** register or spill slot information. The register allocator treats this
44 ** instruction independently of any other instruction, which improves code
45 ** quality compared to using fixed register pairs.
47 ** It's easier to split up some instructions into two regular 32 bit
48 ** instructions. E.g. XLOAD is split up into two XLOADs with two different
49 ** addresses. Obviously 64 bit constants need to be split up into two 32 bit
50 ** constants, too. Some hiword instructions can be entirely omitted, e.g.
51 ** when zero-extending a 32 bit value to 64 bits. 64 bit arguments for calls
52 ** are split up into two 32 bit arguments each.
54 ** On soft-float targets, floating-point instructions are directly converted
55 ** to soft-float calls by the SPLIT pass (except for comparisons and MIN/MAX).
56 ** HIOP for number results has the type IRT_SOFTFP ("sfp" in -jdump).
58 ** Here's the IR and x64 machine code for 'x.b = x.a + 1' for a struct with
59 ** two int64_t fields:
61 ** 0100 p32 ADD base +8
62 ** 0101 i64 XLOAD 0100
63 ** 0102 i64 ADD 0101 +1
64 ** 0103 p32 ADD base +16
65 ** 0104 i64 XSTORE 0103 0102
67 ** mov rax, [esi+0x8]
68 ** add rax, +0x01
69 ** mov [esi+0x10], rax
71 ** Here's the transformed IR and the x86 machine code after the SPLIT pass:
73 ** 0100 p32 ADD base +8
74 ** 0101 int XLOAD 0100
75 ** 0102 p32 ADD base +12
76 ** 0103 int XLOAD 0102
77 ** 0104 int ADD 0101 +1
78 ** 0105 int HIOP 0103 +0
79 ** 0106 p32 ADD base +16
80 ** 0107 int XSTORE 0106 0104
81 ** 0108 int HIOP 0106 0105
83 ** mov eax, [esi+0x8]
84 ** mov ecx, [esi+0xc]
85 ** add eax, +0x01
86 ** adc ecx, +0x00
87 ** mov [esi+0x10], eax
88 ** mov [esi+0x14], ecx
90 ** You may notice the reassociated hiword address computation, which is
91 ** later fused into the mov operands by the assembler.
94 /* Some local macros to save typing. Undef'd at the end. */
95 #define IR(ref) (&J->cur.ir[(ref)])
97 /* Directly emit the transformed IR without updating chains etc. */
98 static IRRef split_emit(jit_State *J, uint16_t ot, IRRef1 op1, IRRef1 op2)
100 IRRef nref = lj_ir_nextins(J);
101 IRIns *ir = IR(nref);
102 ir->ot = ot;
103 ir->op1 = op1;
104 ir->op2 = op2;
105 return nref;
108 #if LJ_SOFTFP
109 /* Emit a (checked) number to integer conversion. */
110 static IRRef split_num2int(jit_State *J, IRRef lo, IRRef hi, int check)
112 IRRef tmp, res;
113 #if LJ_LE
114 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), lo, hi);
115 #else
116 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hi, lo);
117 #endif
118 res = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_softfp_d2i);
119 if (check) {
120 tmp = split_emit(J, IRTI(IR_CALLN), res, IRCALL_softfp_i2d);
121 split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
122 split_emit(J, IRTGI(IR_EQ), tmp, lo);
123 split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP), tmp+1, hi);
125 return res;
128 /* Emit a CALLN with one split 64 bit argument. */
129 static IRRef split_call_l(jit_State *J, IRRef1 *hisubst, IRIns *oir,
130 IRIns *ir, IRCallID id)
132 IRRef tmp, op1 = ir->op1;
133 J->cur.nins--;
134 #if LJ_LE
135 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
136 #else
137 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
138 #endif
139 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
140 return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
142 #endif
144 /* Emit a CALLN with one split 64 bit argument and a 32 bit argument. */
145 static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir,
146 IRIns *ir, IRCallID id)
148 IRRef tmp, op1 = ir->op1, op2 = ir->op2;
149 J->cur.nins--;
150 #if LJ_LE
151 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
152 #else
153 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
154 #endif
155 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev);
156 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
157 return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
160 /* Emit a CALLN with two split 64 bit arguments. */
161 static IRRef split_call_ll(jit_State *J, IRRef1 *hisubst, IRIns *oir,
162 IRIns *ir, IRCallID id)
164 IRRef tmp, op1 = ir->op1, op2 = ir->op2;
165 J->cur.nins--;
166 #if LJ_LE
167 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
168 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev);
169 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, hisubst[op2]);
170 #else
171 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
172 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, hisubst[op2]);
173 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev);
174 #endif
175 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
176 return split_emit(J,
177 IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT),
178 tmp, tmp);
181 /* Get a pointer to the other 32 bit word (LE: hiword, BE: loword). */
182 static IRRef split_ptr(jit_State *J, IRIns *oir, IRRef ref)
184 IRRef nref = oir[ref].prev;
185 IRIns *ir = IR(nref);
186 int32_t ofs = 4;
187 if (ir->o == IR_KPTR)
188 return lj_ir_kptr(J, (char *)ir_kptr(ir) + ofs);
189 if (ir->o == IR_ADD && irref_isk(ir->op2) && !irt_isphi(oir[ref].t)) {
190 /* Reassociate address. */
191 ofs += IR(ir->op2)->i;
192 nref = ir->op1;
193 if (ofs == 0) return nref;
195 return split_emit(J, IRT(IR_ADD, IRT_PTR), nref, lj_ir_kint(J, ofs));
198 #if LJ_HASFFI
199 static IRRef split_bitshift(jit_State *J, IRRef1 *hisubst,
200 IRIns *oir, IRIns *nir, IRIns *ir)
202 IROp op = ir->o;
203 IRRef kref = nir->op2;
204 if (irref_isk(kref)) { /* Optimize constant shifts. */
205 int32_t k = (IR(kref)->i & 63);
206 IRRef lo = nir->op1, hi = hisubst[ir->op1];
207 if (op == IR_BROL || op == IR_BROR) {
208 if (op == IR_BROR) k = (-k & 63);
209 if (k >= 32) { IRRef t = lo; lo = hi; hi = t; k -= 32; }
210 if (k == 0) {
211 passthrough:
212 J->cur.nins--;
213 ir->prev = lo;
214 return hi;
215 } else {
216 TRef k1, k2;
217 IRRef t1, t2, t3, t4;
218 J->cur.nins--;
219 k1 = lj_ir_kint(J, k);
220 k2 = lj_ir_kint(J, (-k & 31));
221 t1 = split_emit(J, IRTI(IR_BSHL), lo, k1);
222 t2 = split_emit(J, IRTI(IR_BSHL), hi, k1);
223 t3 = split_emit(J, IRTI(IR_BSHR), lo, k2);
224 t4 = split_emit(J, IRTI(IR_BSHR), hi, k2);
225 ir->prev = split_emit(J, IRTI(IR_BOR), t1, t4);
226 return split_emit(J, IRTI(IR_BOR), t2, t3);
228 } else if (k == 0) {
229 goto passthrough;
230 } else if (k < 32) {
231 if (op == IR_BSHL) {
232 IRRef t1 = split_emit(J, IRTI(IR_BSHL), hi, kref);
233 IRRef t2 = split_emit(J, IRTI(IR_BSHR), lo, lj_ir_kint(J, (-k&31)));
234 return split_emit(J, IRTI(IR_BOR), t1, t2);
235 } else {
236 IRRef t1 = ir->prev, t2;
237 lua_assert(op == IR_BSHR || op == IR_BSAR);
238 nir->o = IR_BSHR;
239 t2 = split_emit(J, IRTI(IR_BSHL), hi, lj_ir_kint(J, (-k&31)));
240 ir->prev = split_emit(J, IRTI(IR_BOR), t1, t2);
241 return split_emit(J, IRTI(op), hi, kref);
243 } else {
244 if (op == IR_BSHL) {
245 if (k == 32)
246 J->cur.nins--;
247 else
248 lo = ir->prev;
249 ir->prev = lj_ir_kint(J, 0);
250 return lo;
251 } else {
252 lua_assert(op == IR_BSHR || op == IR_BSAR);
253 if (k == 32) {
254 J->cur.nins--;
255 ir->prev = hi;
256 } else {
257 nir->op1 = hi;
259 if (op == IR_BSHR)
260 return lj_ir_kint(J, 0);
261 else
262 return split_emit(J, IRTI(IR_BSAR), hi, lj_ir_kint(J, 31));
266 return split_call_li(J, hisubst, oir, ir,
267 op - IR_BSHL + IRCALL_lj_carith_shl64);
270 static IRRef split_bitop(jit_State *J, IRRef1 *hisubst,
271 IRIns *nir, IRIns *ir)
273 IROp op = ir->o;
274 IRRef hi, kref = nir->op2;
275 if (irref_isk(kref)) { /* Optimize bit operations with lo constant. */
276 int32_t k = IR(kref)->i;
277 if (k == 0 || k == -1) {
278 if (op == IR_BAND) k = ~k;
279 if (k == 0) {
280 J->cur.nins--;
281 ir->prev = nir->op1;
282 } else if (op == IR_BXOR) {
283 nir->o = IR_BNOT;
284 nir->op2 = 0;
285 } else {
286 J->cur.nins--;
287 ir->prev = kref;
291 hi = hisubst[ir->op1];
292 kref = hisubst[ir->op2];
293 if (irref_isk(kref)) { /* Optimize bit operations with hi constant. */
294 int32_t k = IR(kref)->i;
295 if (k == 0 || k == -1) {
296 if (op == IR_BAND) k = ~k;
297 if (k == 0) {
298 return hi;
299 } else if (op == IR_BXOR) {
300 return split_emit(J, IRTI(IR_BNOT), hi, 0);
301 } else {
302 return kref;
306 return split_emit(J, IRTI(op), hi, kref);
308 #endif
310 /* Substitute references of a snapshot. */
311 static void split_subst_snap(jit_State *J, SnapShot *snap, IRIns *oir)
313 SnapEntry *map = &J->cur.snapmap[snap->mapofs];
314 MSize n, nent = snap->nent;
315 for (n = 0; n < nent; n++) {
316 SnapEntry sn = map[n];
317 IRIns *ir = &oir[snap_ref(sn)];
318 if (!(LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && irref_isk(snap_ref(sn))))
319 map[n] = ((sn & 0xffff0000) | ir->prev);
323 /* Transform the old IR to the new IR. */
324 static void split_ir(jit_State *J)
326 IRRef nins = J->cur.nins, nk = J->cur.nk;
327 MSize irlen = nins - nk;
328 MSize need = (irlen+1)*(sizeof(IRIns) + sizeof(IRRef1));
329 IRIns *oir = (IRIns *)lj_buf_tmp(J->L, need);
330 IRRef1 *hisubst;
331 IRRef ref, snref;
332 SnapShot *snap;
334 /* Copy old IR to buffer. */
335 memcpy(oir, IR(nk), irlen*sizeof(IRIns));
336 /* Bias hiword substitution table and old IR. Loword kept in field prev. */
337 hisubst = (IRRef1 *)&oir[irlen] - nk;
338 oir -= nk;
340 /* Remove all IR instructions, but retain IR constants. */
341 J->cur.nins = REF_FIRST;
342 J->loopref = 0;
344 /* Process constants and fixed references. */
345 for (ref = nk; ref <= REF_BASE; ref++) {
346 IRIns *ir = &oir[ref];
347 if ((LJ_SOFTFP && ir->o == IR_KNUM) || ir->o == IR_KINT64) {
348 /* Split up 64 bit constant. */
349 TValue tv = *ir_k64(ir);
350 ir->prev = lj_ir_kint(J, (int32_t)tv.u32.lo);
351 hisubst[ref] = lj_ir_kint(J, (int32_t)tv.u32.hi);
352 } else {
353 ir->prev = ref; /* Identity substitution for loword. */
354 hisubst[ref] = 0;
358 /* Process old IR instructions. */
359 snap = J->cur.snap;
360 snref = snap->ref;
361 for (ref = REF_FIRST; ref < nins; ref++) {
362 IRIns *ir = &oir[ref];
363 IRRef nref = lj_ir_nextins(J);
364 IRIns *nir = IR(nref);
365 IRRef hi = 0;
367 if (ref >= snref) {
368 snap->ref = nref;
369 split_subst_snap(J, snap++, oir);
370 snref = snap < &J->cur.snap[J->cur.nsnap] ? snap->ref : ~(IRRef)0;
373 /* Copy-substitute old instruction to new instruction. */
374 nir->op1 = ir->op1 < nk ? ir->op1 : oir[ir->op1].prev;
375 nir->op2 = ir->op2 < nk ? ir->op2 : oir[ir->op2].prev;
376 ir->prev = nref; /* Loword substitution. */
377 nir->o = ir->o;
378 nir->t.irt = ir->t.irt & ~(IRT_MARK|IRT_ISPHI);
379 hisubst[ref] = 0;
381 /* Split 64 bit instructions. */
382 #if LJ_SOFTFP
383 if (irt_isnum(ir->t)) {
384 nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */
385 /* Note: hi ref = lo ref + 1! Required for SNAP_SOFTFPNUM logic. */
386 switch (ir->o) {
387 case IR_ADD:
388 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_add);
389 break;
390 case IR_SUB:
391 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_sub);
392 break;
393 case IR_MUL:
394 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_mul);
395 break;
396 case IR_DIV:
397 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_div);
398 break;
399 case IR_POW:
400 hi = split_call_li(J, hisubst, oir, ir, IRCALL_lj_vm_powi);
401 break;
402 case IR_FPMATH:
403 /* Try to rejoin pow from EXP2, MUL and LOG2. */
404 if (nir->op2 == IRFPM_EXP2 && nir->op1 > J->loopref) {
405 IRIns *irp = IR(nir->op1);
406 if (irp->o == IR_CALLN && irp->op2 == IRCALL_softfp_mul) {
407 IRIns *irm4 = IR(irp->op1);
408 IRIns *irm3 = IR(irm4->op1);
409 IRIns *irm12 = IR(irm3->op1);
410 IRIns *irl1 = IR(irm12->op1);
411 if (irm12->op1 > J->loopref && irl1->o == IR_CALLN &&
412 irl1->op2 == IRCALL_lj_vm_log2) {
413 IRRef tmp = irl1->op1; /* Recycle first two args from LOG2. */
414 IRRef arg3 = irm3->op2, arg4 = irm4->op2;
415 J->cur.nins--;
416 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg3);
417 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg4);
418 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_pow);
419 hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
420 break;
424 hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_floor + ir->op2);
425 break;
426 case IR_ATAN2:
427 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_atan2);
428 break;
429 case IR_LDEXP:
430 hi = split_call_li(J, hisubst, oir, ir, IRCALL_ldexp);
431 break;
432 case IR_NEG: case IR_ABS:
433 nir->o = IR_CONV; /* Pass through loword. */
434 nir->op2 = (IRT_INT << 5) | IRT_INT;
435 hi = split_emit(J, IRT(ir->o == IR_NEG ? IR_BXOR : IR_BAND, IRT_SOFTFP),
436 hisubst[ir->op1], hisubst[ir->op2]);
437 break;
438 case IR_SLOAD:
439 if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from int to number. */
440 nir->op2 &= ~IRSLOAD_CONVERT;
441 ir->prev = nref = split_emit(J, IRTI(IR_CALLN), nref,
442 IRCALL_softfp_i2d);
443 hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
444 break;
446 /* fallthrough */
447 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
448 case IR_STRTO:
449 hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
450 break;
451 case IR_XLOAD: {
452 IRIns inslo = *nir; /* Save/undo the emit of the lo XLOAD. */
453 J->cur.nins--;
454 hi = split_ptr(J, oir, ir->op1); /* Insert the hiref ADD. */
455 #if LJ_BE
456 hi = split_emit(J, IRT(IR_XLOAD, IRT_INT), hi, ir->op2);
457 inslo.t.irt = IRT_SOFTFP | (inslo.t.irt & IRT_GUARD);
458 #endif
459 nref = lj_ir_nextins(J);
460 nir = IR(nref);
461 *nir = inslo; /* Re-emit lo XLOAD. */
462 #if LJ_LE
463 hi = split_emit(J, IRT(IR_XLOAD, IRT_SOFTFP), hi, ir->op2);
464 ir->prev = nref;
465 #else
466 ir->prev = hi; hi = nref;
467 #endif
468 break;
470 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_XSTORE:
471 split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nir->op1, hisubst[ir->op2]);
472 break;
473 case IR_CONV: { /* Conversion to number. Others handled below. */
474 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
475 UNUSED(st);
476 #if LJ_32 && LJ_HASFFI
477 if (st == IRT_I64 || st == IRT_U64) {
478 hi = split_call_l(J, hisubst, oir, ir,
479 st == IRT_I64 ? IRCALL_fp64_l2d : IRCALL_fp64_ul2d);
480 break;
482 #endif
483 lua_assert(st == IRT_INT ||
484 (LJ_32 && LJ_HASFFI && (st == IRT_U32 || st == IRT_FLOAT)));
485 nir->o = IR_CALLN;
486 #if LJ_32 && LJ_HASFFI
487 nir->op2 = st == IRT_INT ? IRCALL_softfp_i2d :
488 st == IRT_FLOAT ? IRCALL_softfp_f2d :
489 IRCALL_softfp_ui2d;
490 #else
491 nir->op2 = IRCALL_softfp_i2d;
492 #endif
493 hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
494 break;
496 case IR_CALLN:
497 case IR_CALLL:
498 case IR_CALLS:
499 case IR_CALLXS:
500 goto split_call;
501 case IR_PHI:
502 if (nir->op1 == nir->op2)
503 J->cur.nins--; /* Drop useless PHIs. */
504 if (hisubst[ir->op1] != hisubst[ir->op2])
505 split_emit(J, IRT(IR_PHI, IRT_SOFTFP),
506 hisubst[ir->op1], hisubst[ir->op2]);
507 break;
508 case IR_HIOP:
509 J->cur.nins--; /* Drop joining HIOP. */
510 ir->prev = nir->op1;
511 hi = nir->op2;
512 break;
513 default:
514 lua_assert(ir->o <= IR_NE || ir->o == IR_MIN || ir->o == IR_MAX);
515 hi = split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP),
516 hisubst[ir->op1], hisubst[ir->op2]);
517 break;
519 } else
520 #endif
521 #if LJ_32 && LJ_HASFFI
522 if (irt_isint64(ir->t)) {
523 IRRef hiref = hisubst[ir->op1];
524 nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */
525 switch (ir->o) {
526 case IR_ADD:
527 case IR_SUB:
528 /* Use plain op for hiword if loword cannot produce a carry/borrow. */
529 if (irref_isk(nir->op2) && IR(nir->op2)->i == 0) {
530 ir->prev = nir->op1; /* Pass through loword. */
531 nir->op1 = hiref; nir->op2 = hisubst[ir->op2];
532 hi = nref;
533 break;
535 /* fallthrough */
536 case IR_NEG:
537 hi = split_emit(J, IRTI(IR_HIOP), hiref, hisubst[ir->op2]);
538 break;
539 case IR_MUL:
540 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_lj_carith_mul64);
541 break;
542 case IR_DIV:
543 hi = split_call_ll(J, hisubst, oir, ir,
544 irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
545 IRCALL_lj_carith_divu64);
546 break;
547 case IR_MOD:
548 hi = split_call_ll(J, hisubst, oir, ir,
549 irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
550 IRCALL_lj_carith_modu64);
551 break;
552 case IR_POW:
553 hi = split_call_ll(J, hisubst, oir, ir,
554 irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
555 IRCALL_lj_carith_powu64);
556 break;
557 case IR_BNOT:
558 hi = split_emit(J, IRTI(IR_BNOT), hiref, 0);
559 break;
560 case IR_BSWAP:
561 ir->prev = split_emit(J, IRTI(IR_BSWAP), hiref, 0);
562 hi = nref;
563 break;
564 case IR_BAND: case IR_BOR: case IR_BXOR:
565 hi = split_bitop(J, hisubst, nir, ir);
566 break;
567 case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR:
568 hi = split_bitshift(J, hisubst, oir, nir, ir);
569 break;
570 case IR_FLOAD:
571 lua_assert(ir->op2 == IRFL_CDATA_INT64);
572 hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64_4);
573 #if LJ_BE
574 ir->prev = hi; hi = nref;
575 #endif
576 break;
577 case IR_XLOAD:
578 hi = split_emit(J, IRTI(IR_XLOAD), split_ptr(J, oir, ir->op1), ir->op2);
579 #if LJ_BE
580 ir->prev = hi; hi = nref;
581 #endif
582 break;
583 case IR_XSTORE:
584 split_emit(J, IRTI(IR_HIOP), nir->op1, hisubst[ir->op2]);
585 break;
586 case IR_CONV: { /* Conversion to 64 bit integer. Others handled below. */
587 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
588 #if LJ_SOFTFP
589 if (st == IRT_NUM) { /* NUM to 64 bit int conv. */
590 hi = split_call_l(J, hisubst, oir, ir,
591 irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul);
592 } else if (st == IRT_FLOAT) { /* FLOAT to 64 bit int conv. */
593 nir->o = IR_CALLN;
594 nir->op2 = irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul;
595 hi = split_emit(J, IRTI(IR_HIOP), nref, nref);
597 #else
598 if (st == IRT_NUM || st == IRT_FLOAT) { /* FP to 64 bit int conv. */
599 hi = split_emit(J, IRTI(IR_HIOP), nir->op1, nref);
601 #endif
602 else if (st == IRT_I64 || st == IRT_U64) { /* 64/64 bit cast. */
603 /* Drop cast, since assembler doesn't care. But fwd both parts. */
604 hi = hiref;
605 goto fwdlo;
606 } else if ((ir->op2 & IRCONV_SEXT)) { /* Sign-extend to 64 bit. */
607 IRRef k31 = lj_ir_kint(J, 31);
608 nir = IR(nref); /* May have been reallocated. */
609 ir->prev = nir->op1; /* Pass through loword. */
610 nir->o = IR_BSAR; /* hi = bsar(lo, 31). */
611 nir->op2 = k31;
612 hi = nref;
613 } else { /* Zero-extend to 64 bit. */
614 hi = lj_ir_kint(J, 0);
615 goto fwdlo;
617 break;
619 case IR_CALLXS:
620 goto split_call;
621 case IR_PHI: {
622 IRRef hiref2;
623 if ((irref_isk(nir->op1) && irref_isk(nir->op2)) ||
624 nir->op1 == nir->op2)
625 J->cur.nins--; /* Drop useless PHIs. */
626 hiref2 = hisubst[ir->op2];
627 if (!((irref_isk(hiref) && irref_isk(hiref2)) || hiref == hiref2))
628 split_emit(J, IRTI(IR_PHI), hiref, hiref2);
629 break;
631 case IR_HIOP:
632 J->cur.nins--; /* Drop joining HIOP. */
633 ir->prev = nir->op1;
634 hi = nir->op2;
635 break;
636 default:
637 lua_assert(ir->o <= IR_NE); /* Comparisons. */
638 split_emit(J, IRTGI(IR_HIOP), hiref, hisubst[ir->op2]);
639 break;
641 } else
642 #endif
643 #if LJ_SOFTFP
644 if (ir->o == IR_SLOAD) {
645 if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from number to int. */
646 nir->op2 &= ~IRSLOAD_CONVERT;
647 if (!(nir->op2 & IRSLOAD_TYPECHECK))
648 nir->t.irt = IRT_INT; /* Drop guard. */
649 split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
650 ir->prev = split_num2int(J, nref, nref+1, irt_isguard(ir->t));
652 } else if (ir->o == IR_TOBIT) {
653 IRRef tmp, op1 = ir->op1;
654 J->cur.nins--;
655 #if LJ_LE
656 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
657 #else
658 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
659 #endif
660 ir->prev = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_lj_vm_tobit);
661 } else if (ir->o == IR_TOSTR) {
662 if (hisubst[ir->op1]) {
663 if (irref_isk(ir->op1))
664 nir->op1 = ir->op1;
665 else
666 split_emit(J, IRT(IR_HIOP, IRT_NIL), hisubst[ir->op1], nref);
668 } else if (ir->o == IR_HREF || ir->o == IR_NEWREF) {
669 if (irref_isk(ir->op2) && hisubst[ir->op2])
670 nir->op2 = ir->op2;
671 } else
672 #endif
673 if (ir->o == IR_CONV) { /* See above, too. */
674 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
675 #if LJ_32 && LJ_HASFFI
676 if (st == IRT_I64 || st == IRT_U64) { /* Conversion from 64 bit int. */
677 #if LJ_SOFTFP
678 if (irt_isfloat(ir->t)) {
679 split_call_l(J, hisubst, oir, ir,
680 st == IRT_I64 ? IRCALL_fp64_l2f : IRCALL_fp64_ul2f);
681 J->cur.nins--; /* Drop unused HIOP. */
683 #else
684 if (irt_isfp(ir->t)) { /* 64 bit integer to FP conversion. */
685 ir->prev = split_emit(J, IRT(IR_HIOP, irt_type(ir->t)),
686 hisubst[ir->op1], nref);
688 #endif
689 else { /* Truncate to lower 32 bits. */
690 fwdlo:
691 ir->prev = nir->op1; /* Forward loword. */
692 /* Replace with NOP to avoid messing up the snapshot logic. */
693 nir->ot = IRT(IR_NOP, IRT_NIL);
694 nir->op1 = nir->op2 = 0;
697 #endif
698 #if LJ_SOFTFP && LJ_32 && LJ_HASFFI
699 else if (irt_isfloat(ir->t)) {
700 if (st == IRT_NUM) {
701 split_call_l(J, hisubst, oir, ir, IRCALL_softfp_d2f);
702 J->cur.nins--; /* Drop unused HIOP. */
703 } else {
704 nir->o = IR_CALLN;
705 nir->op2 = st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f;
707 } else if (st == IRT_FLOAT) {
708 nir->o = IR_CALLN;
709 nir->op2 = irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui;
710 } else
711 #endif
712 #if LJ_SOFTFP
713 if (st == IRT_NUM || (LJ_32 && LJ_HASFFI && st == IRT_FLOAT)) {
714 if (irt_isguard(ir->t)) {
715 lua_assert(st == IRT_NUM && irt_isint(ir->t));
716 J->cur.nins--;
717 ir->prev = split_num2int(J, nir->op1, hisubst[ir->op1], 1);
718 } else {
719 split_call_l(J, hisubst, oir, ir,
720 #if LJ_32 && LJ_HASFFI
721 st == IRT_NUM ?
722 (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) :
723 (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui)
724 #else
725 IRCALL_softfp_d2i
726 #endif
728 J->cur.nins--; /* Drop unused HIOP. */
731 #endif
732 } else if (ir->o == IR_CALLXS) {
733 IRRef hiref;
734 split_call:
735 hiref = hisubst[ir->op1];
736 if (hiref) {
737 IROpT ot = nir->ot;
738 IRRef op2 = nir->op2;
739 nir->ot = IRT(IR_CARG, IRT_NIL);
740 #if LJ_LE
741 nir->op2 = hiref;
742 #else
743 nir->op2 = nir->op1; nir->op1 = hiref;
744 #endif
745 ir->prev = nref = split_emit(J, ot, nref, op2);
747 if (LJ_SOFTFP ? irt_is64(ir->t) : irt_isint64(ir->t))
748 hi = split_emit(J,
749 IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT),
750 nref, nref);
751 } else if (ir->o == IR_CARG) {
752 IRRef hiref = hisubst[ir->op1];
753 if (hiref) {
754 IRRef op2 = nir->op2;
755 #if LJ_LE
756 nir->op2 = hiref;
757 #else
758 nir->op2 = nir->op1; nir->op1 = hiref;
759 #endif
760 ir->prev = nref = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, op2);
761 nir = IR(nref);
763 hiref = hisubst[ir->op2];
764 if (hiref) {
765 #if !LJ_TARGET_X86
766 int carg = 0;
767 IRIns *cir;
768 for (cir = IR(nir->op1); cir->o == IR_CARG; cir = IR(cir->op1))
769 carg++;
770 if ((carg & 1) == 0) { /* Align 64 bit arguments. */
771 IRRef op2 = nir->op2;
772 nir->op2 = REF_NIL;
773 nref = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, op2);
774 nir = IR(nref);
776 #endif
777 #if LJ_BE
778 { IRRef tmp = nir->op2; nir->op2 = hiref; hiref = tmp; }
779 #endif
780 ir->prev = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, hiref);
782 } else if (ir->o == IR_CNEWI) {
783 if (hisubst[ir->op2])
784 split_emit(J, IRT(IR_HIOP, IRT_NIL), nref, hisubst[ir->op2]);
785 } else if (ir->o == IR_LOOP) {
786 J->loopref = nref; /* Needed by assembler. */
788 hisubst[ref] = hi; /* Store hiword substitution. */
790 if (snref == nins) { /* Substitution for last snapshot. */
791 snap->ref = J->cur.nins;
792 split_subst_snap(J, snap, oir);
795 /* Add PHI marks. */
796 for (ref = J->cur.nins-1; ref >= REF_FIRST; ref--) {
797 IRIns *ir = IR(ref);
798 if (ir->o != IR_PHI) break;
799 if (!irref_isk(ir->op1)) irt_setphi(IR(ir->op1)->t);
800 if (ir->op2 > J->loopref) irt_setphi(IR(ir->op2)->t);
804 /* Protected callback for split pass. */
805 static TValue *cpsplit(lua_State *L, lua_CFunction dummy, void *ud)
807 jit_State *J = (jit_State *)ud;
808 split_ir(J);
809 UNUSED(L); UNUSED(dummy);
810 return NULL;
813 #if defined(LUA_USE_ASSERT) || LJ_SOFTFP
814 /* Slow, but sure way to check whether a SPLIT pass is needed. */
815 static int split_needsplit(jit_State *J)
817 IRIns *ir, *irend;
818 IRRef ref;
819 for (ir = IR(REF_FIRST), irend = IR(J->cur.nins); ir < irend; ir++)
820 if (LJ_SOFTFP ? irt_is64orfp(ir->t) : irt_isint64(ir->t))
821 return 1;
822 if (LJ_SOFTFP) {
823 for (ref = J->chain[IR_SLOAD]; ref; ref = IR(ref)->prev)
824 if ((IR(ref)->op2 & IRSLOAD_CONVERT))
825 return 1;
826 if (J->chain[IR_TOBIT])
827 return 1;
829 for (ref = J->chain[IR_CONV]; ref; ref = IR(ref)->prev) {
830 IRType st = (IR(ref)->op2 & IRCONV_SRCMASK);
831 if ((LJ_SOFTFP && (st == IRT_NUM || st == IRT_FLOAT)) ||
832 st == IRT_I64 || st == IRT_U64)
833 return 1;
835 return 0; /* Nope. */
837 #endif
839 /* SPLIT pass. */
840 void lj_opt_split(jit_State *J)
842 #if LJ_SOFTFP
843 if (!J->needsplit)
844 J->needsplit = split_needsplit(J);
845 #else
846 lua_assert(J->needsplit >= split_needsplit(J)); /* Verify flag. */
847 #endif
848 if (J->needsplit) {
849 int errcode = lj_vm_cpcall(J->L, NULL, J, cpsplit);
850 if (errcode) {
851 /* Completely reset the trace to avoid inconsistent dump on abort. */
852 J->cur.nins = J->cur.nk = REF_BASE;
853 J->cur.nsnap = 0;
854 lj_err_throw(J->L, errcode); /* Propagate errors. */
859 #undef IR
861 #endif