PPC: Compile math.sqrt() to fsqrt instruction.
[luajit-2.0/celess22.git] / src / lj_opt_split.c
blobda2c71849c77221758c44326566a0c22e03f1239
1 /*
2 ** SPLIT: Split 64 bit IR instructions into 32 bit IR instructions.
3 ** Copyright (C) 2005-2012 Mike Pall. See Copyright Notice in luajit.h
4 */
6 #define lj_opt_split_c
7 #define LUA_CORE
9 #include "lj_obj.h"
11 #if LJ_HASJIT && (LJ_SOFTFP || (LJ_32 && LJ_HASFFI))
13 #include "lj_err.h"
14 #include "lj_str.h"
15 #include "lj_ir.h"
16 #include "lj_jit.h"
17 #include "lj_ircall.h"
18 #include "lj_iropt.h"
19 #include "lj_vm.h"
21 /* SPLIT pass:
23 ** This pass splits up 64 bit IR instructions into multiple 32 bit IR
24 ** instructions. It's only active for soft-float targets or for 32 bit CPUs
25 ** which lack native 64 bit integer operations (the FFI is currently the
26 ** only emitter for 64 bit integer instructions).
28 ** Splitting the IR in a separate pass keeps each 32 bit IR assembler
29 ** backend simple. Only a small amount of extra functionality needs to be
30 ** implemented. This is much easier than adding support for allocating
31 ** register pairs to each backend (believe me, I tried). A few simple, but
32 ** important optimizations can be performed by the SPLIT pass, which would
33 ** be tedious to do in the backend.
35 ** The basic idea is to replace each 64 bit IR instruction with its 32 bit
36 ** equivalent plus an extra HIOP instruction. The splitted IR is not passed
37 ** through FOLD or any other optimizations, so each HIOP is guaranteed to
38 ** immediately follow it's counterpart. The actual functionality of HIOP is
39 ** inferred from the previous instruction.
41 ** The operands of HIOP hold the hiword input references. The output of HIOP
42 ** is the hiword output reference, which is also used to hold the hiword
43 ** register or spill slot information. The register allocator treats this
44 ** instruction independently of any other instruction, which improves code
45 ** quality compared to using fixed register pairs.
47 ** It's easier to split up some instructions into two regular 32 bit
48 ** instructions. E.g. XLOAD is split up into two XLOADs with two different
49 ** addresses. Obviously 64 bit constants need to be split up into two 32 bit
50 ** constants, too. Some hiword instructions can be entirely omitted, e.g.
51 ** when zero-extending a 32 bit value to 64 bits. 64 bit arguments for calls
52 ** are split up into two 32 bit arguments each.
54 ** On soft-float targets, floating-point instructions are directly converted
55 ** to soft-float calls by the SPLIT pass (except for comparisons and MIN/MAX).
56 ** HIOP for number results has the type IRT_SOFTFP ("sfp" in -jdump).
58 ** Here's the IR and x64 machine code for 'x.b = x.a + 1' for a struct with
59 ** two int64_t fields:
61 ** 0100 p32 ADD base +8
62 ** 0101 i64 XLOAD 0100
63 ** 0102 i64 ADD 0101 +1
64 ** 0103 p32 ADD base +16
65 ** 0104 i64 XSTORE 0103 0102
67 ** mov rax, [esi+0x8]
68 ** add rax, +0x01
69 ** mov [esi+0x10], rax
71 ** Here's the transformed IR and the x86 machine code after the SPLIT pass:
73 ** 0100 p32 ADD base +8
74 ** 0101 int XLOAD 0100
75 ** 0102 p32 ADD base +12
76 ** 0103 int XLOAD 0102
77 ** 0104 int ADD 0101 +1
78 ** 0105 int HIOP 0103 +0
79 ** 0106 p32 ADD base +16
80 ** 0107 int XSTORE 0106 0104
81 ** 0108 int HIOP 0106 0105
83 ** mov eax, [esi+0x8]
84 ** mov ecx, [esi+0xc]
85 ** add eax, +0x01
86 ** adc ecx, +0x00
87 ** mov [esi+0x10], eax
88 ** mov [esi+0x14], ecx
90 ** You may notice the reassociated hiword address computation, which is
91 ** later fused into the mov operands by the assembler.
94 /* Some local macros to save typing. Undef'd at the end. */
95 #define IR(ref) (&J->cur.ir[(ref)])
97 /* Directly emit the transformed IR without updating chains etc. */
98 static IRRef split_emit(jit_State *J, uint16_t ot, IRRef1 op1, IRRef1 op2)
100 IRRef nref = lj_ir_nextins(J);
101 IRIns *ir = IR(nref);
102 ir->ot = ot;
103 ir->op1 = op1;
104 ir->op2 = op2;
105 return nref;
108 #if LJ_SOFTFP
109 /* Emit a (checked) number to integer conversion. */
110 static IRRef split_num2int(jit_State *J, IRRef lo, IRRef hi, int check)
112 IRRef tmp, res;
113 #if LJ_LE
114 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), lo, hi);
115 #else
116 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hi, lo);
117 #endif
118 res = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_softfp_d2i);
119 if (check) {
120 tmp = split_emit(J, IRTI(IR_CALLN), res, IRCALL_softfp_i2d);
121 split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
122 split_emit(J, IRTGI(IR_EQ), tmp, lo);
123 split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP), tmp+1, hi);
125 return res;
128 /* Emit a CALLN with one split 64 bit argument. */
129 static IRRef split_call_l(jit_State *J, IRRef1 *hisubst, IRIns *oir,
130 IRIns *ir, IRCallID id)
132 IRRef tmp, op1 = ir->op1;
133 J->cur.nins--;
134 #if LJ_LE
135 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
136 #else
137 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
138 #endif
139 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
140 return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
143 /* Emit a CALLN with one split 64 bit argument and a 32 bit argument. */
144 static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir,
145 IRIns *ir, IRCallID id)
147 IRRef tmp, op1 = ir->op1, op2 = ir->op2;
148 J->cur.nins--;
149 #if LJ_LE
150 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
151 #else
152 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
153 #endif
154 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev);
155 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
156 return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
158 #endif
160 /* Emit a CALLN with two split 64 bit arguments. */
161 static IRRef split_call_ll(jit_State *J, IRRef1 *hisubst, IRIns *oir,
162 IRIns *ir, IRCallID id)
164 IRRef tmp, op1 = ir->op1, op2 = ir->op2;
165 J->cur.nins--;
166 #if LJ_LE
167 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
168 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev);
169 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, hisubst[op2]);
170 #else
171 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
172 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, hisubst[op2]);
173 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev);
174 #endif
175 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
176 return split_emit(J,
177 IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT),
178 tmp, tmp);
181 /* Get a pointer to the other 32 bit word (LE: hiword, BE: loword). */
182 static IRRef split_ptr(jit_State *J, IRIns *oir, IRRef ref)
184 IRRef nref = oir[ref].prev;
185 IRIns *ir = IR(nref);
186 int32_t ofs = 4;
187 if (ir->o == IR_KPTR)
188 return lj_ir_kptr(J, (char *)ir_kptr(ir) + ofs);
189 if (ir->o == IR_ADD && irref_isk(ir->op2) && !irt_isphi(oir[ref].t)) {
190 /* Reassociate address. */
191 ofs += IR(ir->op2)->i;
192 nref = ir->op1;
193 if (ofs == 0) return nref;
195 return split_emit(J, IRTI(IR_ADD), nref, lj_ir_kint(J, ofs));
198 /* Transform the old IR to the new IR. */
199 static void split_ir(jit_State *J)
201 IRRef nins = J->cur.nins, nk = J->cur.nk;
202 MSize irlen = nins - nk;
203 MSize need = (irlen+1)*(sizeof(IRIns) + sizeof(IRRef1));
204 IRIns *oir = (IRIns *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf, need);
205 IRRef1 *hisubst;
206 IRRef ref;
208 /* Copy old IR to buffer. */
209 memcpy(oir, IR(nk), irlen*sizeof(IRIns));
210 /* Bias hiword substitution table and old IR. Loword kept in field prev. */
211 hisubst = (IRRef1 *)&oir[irlen] - nk;
212 oir -= nk;
214 /* Remove all IR instructions, but retain IR constants. */
215 J->cur.nins = REF_FIRST;
216 J->loopref = 0;
218 /* Process constants and fixed references. */
219 for (ref = nk; ref <= REF_BASE; ref++) {
220 IRIns *ir = &oir[ref];
221 if ((LJ_SOFTFP && ir->o == IR_KNUM) || ir->o == IR_KINT64) {
222 /* Split up 64 bit constant. */
223 TValue tv = *ir_k64(ir);
224 ir->prev = lj_ir_kint(J, (int32_t)tv.u32.lo);
225 hisubst[ref] = lj_ir_kint(J, (int32_t)tv.u32.hi);
226 } else {
227 ir->prev = ref; /* Identity substitution for loword. */
228 hisubst[ref] = 0;
232 /* Process old IR instructions. */
233 for (ref = REF_FIRST; ref < nins; ref++) {
234 IRIns *ir = &oir[ref];
235 IRRef nref = lj_ir_nextins(J);
236 IRIns *nir = IR(nref);
237 IRRef hi = 0;
239 /* Copy-substitute old instruction to new instruction. */
240 nir->op1 = ir->op1 < nk ? ir->op1 : oir[ir->op1].prev;
241 nir->op2 = ir->op2 < nk ? ir->op2 : oir[ir->op2].prev;
242 ir->prev = nref; /* Loword substitution. */
243 nir->o = ir->o;
244 nir->t.irt = ir->t.irt & ~(IRT_MARK|IRT_ISPHI);
245 hisubst[ref] = 0;
247 /* Split 64 bit instructions. */
248 #if LJ_SOFTFP
249 if (irt_isnum(ir->t)) {
250 nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */
251 /* Note: hi ref = lo ref + 1! Required for SNAP_SOFTFPNUM logic. */
252 switch (ir->o) {
253 case IR_ADD:
254 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_add);
255 break;
256 case IR_SUB:
257 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_sub);
258 break;
259 case IR_MUL:
260 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_mul);
261 break;
262 case IR_DIV:
263 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_div);
264 break;
265 case IR_POW:
266 hi = split_call_li(J, hisubst, oir, ir, IRCALL_lj_vm_powi);
267 break;
268 case IR_FPMATH:
269 /* Try to rejoin pow from EXP2, MUL and LOG2. */
270 if (nir->op2 == IRFPM_EXP2 && nir->op1 > J->loopref) {
271 IRIns *irp = IR(nir->op1);
272 if (irp->o == IR_CALLN && irp->op2 == IRCALL_softfp_mul) {
273 IRIns *irm4 = IR(irp->op1);
274 IRIns *irm3 = IR(irm4->op1);
275 IRIns *irm12 = IR(irm3->op1);
276 IRIns *irl1 = IR(irm12->op1);
277 if (irm12->op1 > J->loopref && irl1->o == IR_CALLN &&
278 irl1->op2 == IRCALL_lj_vm_log2) {
279 IRRef tmp = irl1->op1; /* Recycle first two args from LOG2. */
280 IRRef arg3 = irm3->op2, arg4 = irm4->op2;
281 J->cur.nins--;
282 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg3);
283 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg4);
284 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_pow);
285 hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
286 break;
290 hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_floor + ir->op2);
291 break;
292 case IR_ATAN2:
293 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_atan2);
294 break;
295 case IR_LDEXP:
296 hi = split_call_li(J, hisubst, oir, ir, IRCALL_ldexp);
297 break;
298 case IR_NEG: case IR_ABS:
299 nir->o = IR_CONV; /* Pass through loword. */
300 nir->op2 = (IRT_INT << 5) | IRT_INT;
301 hi = split_emit(J, IRT(ir->o == IR_NEG ? IR_BXOR : IR_BAND, IRT_SOFTFP),
302 hisubst[ir->op1], hisubst[ir->op2]);
303 break;
304 case IR_SLOAD:
305 if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from int to number. */
306 nir->op2 &= ~IRSLOAD_CONVERT;
307 ir->prev = nref = split_emit(J, IRTI(IR_CALLN), nref,
308 IRCALL_softfp_i2d);
309 hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
310 break;
312 /* fallthrough */
313 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
314 case IR_STRTO:
315 hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
316 break;
317 case IR_XLOAD: {
318 IRIns inslo = *nir; /* Save/undo the emit of the lo XLOAD. */
319 J->cur.nins--;
320 hi = split_ptr(J, oir, ir->op1); /* Insert the hiref ADD. */
321 nref = lj_ir_nextins(J);
322 nir = IR(nref);
323 *nir = inslo; /* Re-emit lo XLOAD immediately before hi XLOAD. */
324 hi = split_emit(J, IRT(IR_XLOAD, IRT_SOFTFP), hi, ir->op2);
325 #if LJ_LE
326 ir->prev = nref;
327 #else
328 ir->prev = hi; hi = nref;
329 #endif
330 break;
332 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_XSTORE:
333 split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nir->op1, hisubst[ir->op2]);
334 break;
335 case IR_CONV: { /* Conversion to number. Others handled below. */
336 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
337 UNUSED(st);
338 #if LJ_32 && LJ_HASFFI
339 if (st == IRT_I64 || st == IRT_U64) {
340 hi = split_call_l(J, hisubst, oir, ir,
341 st == IRT_I64 ? IRCALL_fp64_l2d : IRCALL_fp64_ul2d);
342 break;
344 #endif
345 lua_assert(st == IRT_INT ||
346 (LJ_32 && LJ_HASFFI && (st == IRT_U32 || st == IRT_FLOAT)));
347 nir->o = IR_CALLN;
348 #if LJ_32 && LJ_HASFFI
349 nir->op2 = st == IRT_INT ? IRCALL_softfp_i2d :
350 st == IRT_FLOAT ? IRCALL_softfp_f2d :
351 IRCALL_softfp_ui2d;
352 #else
353 nir->op2 = IRCALL_softfp_i2d;
354 #endif
355 hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
356 break;
358 case IR_CALLN:
359 case IR_CALLL:
360 case IR_CALLS:
361 case IR_CALLXS:
362 goto split_call;
363 case IR_PHI:
364 if (nir->op1 == nir->op2)
365 J->cur.nins--; /* Drop useless PHIs. */
366 if (hisubst[ir->op1] != hisubst[ir->op2])
367 split_emit(J, IRT(IR_PHI, IRT_SOFTFP),
368 hisubst[ir->op1], hisubst[ir->op2]);
369 break;
370 case IR_HIOP:
371 J->cur.nins--; /* Drop joining HIOP. */
372 ir->prev = nir->op1;
373 hi = nir->op2;
374 break;
375 default:
376 lua_assert(ir->o <= IR_NE || ir->o == IR_MIN || ir->o == IR_MAX);
377 hi = split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP),
378 hisubst[ir->op1], hisubst[ir->op2]);
379 break;
381 } else
382 #endif
383 #if LJ_32 && LJ_HASFFI
384 if (irt_isint64(ir->t)) {
385 IRRef hiref = hisubst[ir->op1];
386 nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */
387 switch (ir->o) {
388 case IR_ADD:
389 case IR_SUB:
390 /* Use plain op for hiword if loword cannot produce a carry/borrow. */
391 if (irref_isk(nir->op2) && IR(nir->op2)->i == 0) {
392 ir->prev = nir->op1; /* Pass through loword. */
393 nir->op1 = hiref; nir->op2 = hisubst[ir->op2];
394 hi = nref;
395 break;
397 /* fallthrough */
398 case IR_NEG:
399 hi = split_emit(J, IRTI(IR_HIOP), hiref, hisubst[ir->op2]);
400 break;
401 case IR_MUL:
402 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_lj_carith_mul64);
403 break;
404 case IR_DIV:
405 hi = split_call_ll(J, hisubst, oir, ir,
406 irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
407 IRCALL_lj_carith_divu64);
408 break;
409 case IR_MOD:
410 hi = split_call_ll(J, hisubst, oir, ir,
411 irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
412 IRCALL_lj_carith_modu64);
413 break;
414 case IR_POW:
415 hi = split_call_ll(J, hisubst, oir, ir,
416 irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
417 IRCALL_lj_carith_powu64);
418 break;
419 case IR_FLOAD:
420 lua_assert(ir->op2 == IRFL_CDATA_INT64);
421 hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64_4);
422 #if LJ_BE
423 ir->prev = hi; hi = nref;
424 #endif
425 break;
426 case IR_XLOAD:
427 hi = split_emit(J, IRTI(IR_XLOAD), split_ptr(J, oir, ir->op1), ir->op2);
428 #if LJ_BE
429 ir->prev = hi; hi = nref;
430 #endif
431 break;
432 case IR_XSTORE:
433 split_emit(J, IRTI(IR_HIOP), nir->op1, hisubst[ir->op2]);
434 break;
435 case IR_CONV: { /* Conversion to 64 bit integer. Others handled below. */
436 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
437 #if LJ_SOFTFP
438 if (st == IRT_NUM) { /* NUM to 64 bit int conv. */
439 hi = split_call_l(J, hisubst, oir, ir,
440 irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul);
441 } else if (st == IRT_FLOAT) { /* FLOAT to 64 bit int conv. */
442 nir->o = IR_CALLN;
443 nir->op2 = irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul;
444 hi = split_emit(J, IRTI(IR_HIOP), nref, nref);
446 #else
447 if (st == IRT_NUM || st == IRT_FLOAT) { /* FP to 64 bit int conv. */
448 hi = split_emit(J, IRTI(IR_HIOP), nir->op1, nref);
450 #endif
451 else if (st == IRT_I64 || st == IRT_U64) { /* 64/64 bit cast. */
452 /* Drop cast, since assembler doesn't care. */
453 goto fwdlo;
454 } else if ((ir->op2 & IRCONV_SEXT)) { /* Sign-extend to 64 bit. */
455 IRRef k31 = lj_ir_kint(J, 31);
456 nir = IR(nref); /* May have been reallocated. */
457 ir->prev = nir->op1; /* Pass through loword. */
458 nir->o = IR_BSAR; /* hi = bsar(lo, 31). */
459 nir->op2 = k31;
460 hi = nref;
461 } else { /* Zero-extend to 64 bit. */
462 hi = lj_ir_kint(J, 0);
463 goto fwdlo;
465 break;
467 case IR_CALLXS:
468 goto split_call;
469 case IR_PHI: {
470 IRRef hiref2;
471 if ((irref_isk(nir->op1) && irref_isk(nir->op2)) ||
472 nir->op1 == nir->op2)
473 J->cur.nins--; /* Drop useless PHIs. */
474 hiref2 = hisubst[ir->op2];
475 if (!((irref_isk(hiref) && irref_isk(hiref2)) || hiref == hiref2))
476 split_emit(J, IRTI(IR_PHI), hiref, hiref2);
477 break;
479 case IR_HIOP:
480 J->cur.nins--; /* Drop joining HIOP. */
481 ir->prev = nir->op1;
482 hi = nir->op2;
483 break;
484 default:
485 lua_assert(ir->o <= IR_NE); /* Comparisons. */
486 split_emit(J, IRTGI(IR_HIOP), hiref, hisubst[ir->op2]);
487 break;
489 } else
490 #endif
491 #if LJ_SOFTFP
492 if (ir->o == IR_SLOAD) {
493 if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from number to int. */
494 nir->op2 &= ~IRSLOAD_CONVERT;
495 if (!(nir->op2 & IRSLOAD_TYPECHECK))
496 nir->t.irt = IRT_INT; /* Drop guard. */
497 split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
498 ir->prev = split_num2int(J, nref, nref+1, irt_isguard(ir->t));
500 } else if (ir->o == IR_TOBIT) {
501 IRRef tmp, op1 = ir->op1;
502 J->cur.nins--;
503 #if LJ_LE
504 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
505 #else
506 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
507 #endif
508 ir->prev = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_lj_vm_tobit);
509 } else if (ir->o == IR_TOSTR) {
510 if (hisubst[ir->op1]) {
511 if (irref_isk(ir->op1))
512 nir->op1 = ir->op1;
513 else
514 split_emit(J, IRT(IR_HIOP, IRT_NIL), hisubst[ir->op1], nref);
516 } else if (ir->o == IR_HREF || ir->o == IR_NEWREF) {
517 if (irref_isk(ir->op2) && hisubst[ir->op2])
518 nir->op2 = ir->op2;
519 } else
520 #endif
521 if (ir->o == IR_CONV) { /* See above, too. */
522 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
523 #if LJ_32 && LJ_HASFFI
524 if (st == IRT_I64 || st == IRT_U64) { /* Conversion from 64 bit int. */
525 #if LJ_SOFTFP
526 if (irt_isfloat(ir->t)) {
527 split_call_l(J, hisubst, oir, ir,
528 st == IRT_I64 ? IRCALL_fp64_l2f : IRCALL_fp64_ul2f);
529 J->cur.nins--; /* Drop unused HIOP. */
531 #else
532 if (irt_isfp(ir->t)) { /* 64 bit integer to FP conversion. */
533 ir->prev = split_emit(J, IRT(IR_HIOP, irt_type(ir->t)),
534 hisubst[ir->op1], nref);
536 #endif
537 else { /* Truncate to lower 32 bits. */
538 fwdlo:
539 ir->prev = nir->op1; /* Forward loword. */
540 /* Replace with NOP to avoid messing up the snapshot logic. */
541 nir->ot = IRT(IR_NOP, IRT_NIL);
542 nir->op1 = nir->op2 = 0;
545 #endif
546 #if LJ_SOFTFP && LJ_32 && LJ_HASFFI
547 else if (irt_isfloat(ir->t)) {
548 if (st == IRT_NUM) {
549 split_call_l(J, hisubst, oir, ir, IRCALL_softfp_d2f);
550 J->cur.nins--; /* Drop unused HIOP. */
551 } else {
552 nir->o = IR_CALLN;
553 nir->op2 = st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f;
555 } else if (st == IRT_FLOAT) {
556 nir->o = IR_CALLN;
557 nir->op2 = irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui;
558 } else
559 #endif
560 #if LJ_SOFTFP
561 if (st == IRT_NUM || (LJ_32 && LJ_HASFFI && st == IRT_FLOAT)) {
562 if (irt_isguard(ir->t)) {
563 lua_assert(st == IRT_NUM && irt_isint(ir->t));
564 J->cur.nins--;
565 ir->prev = split_num2int(J, nir->op1, hisubst[ir->op1], 1);
566 } else {
567 split_call_l(J, hisubst, oir, ir,
568 #if LJ_32 && LJ_HASFFI
569 st == IRT_NUM ?
570 (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) :
571 (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui)
572 #else
573 IRCALL_softfp_d2i
574 #endif
576 J->cur.nins--; /* Drop unused HIOP. */
579 #endif
580 } else if (ir->o == IR_CALLXS) {
581 IRRef hiref;
582 split_call:
583 hiref = hisubst[ir->op1];
584 if (hiref) {
585 IROpT ot = nir->ot;
586 IRRef op2 = nir->op2;
587 nir->ot = IRT(IR_CARG, IRT_NIL);
588 #if LJ_LE
589 nir->op2 = hiref;
590 #else
591 nir->op2 = nir->op1; nir->op1 = hiref;
592 #endif
593 ir->prev = nref = split_emit(J, ot, nref, op2);
595 if (LJ_SOFTFP ? irt_is64(ir->t) : irt_isint64(ir->t))
596 hi = split_emit(J,
597 IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT),
598 nref, nref);
599 } else if (ir->o == IR_CARG) {
600 IRRef hiref = hisubst[ir->op1];
601 if (hiref) {
602 IRRef op2 = nir->op2;
603 #if LJ_LE
604 nir->op2 = hiref;
605 #else
606 nir->op2 = nir->op1; nir->op1 = hiref;
607 #endif
608 ir->prev = nref = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, op2);
609 nir = IR(nref);
611 hiref = hisubst[ir->op2];
612 if (hiref) {
613 #if !LJ_TARGET_X86
614 int carg = 0;
615 IRIns *cir;
616 for (cir = IR(nir->op1); cir->o == IR_CARG; cir = IR(cir->op1))
617 carg++;
618 if ((carg & 1) == 0) { /* Align 64 bit arguments. */
619 IRRef op2 = nir->op2;
620 nir->op2 = REF_NIL;
621 nref = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, op2);
622 nir = IR(nref);
624 #endif
625 #if LJ_BE
626 { IRRef tmp = nir->op2; nir->op2 = hiref; hiref = tmp; }
627 #endif
628 ir->prev = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, hiref);
630 } else if (ir->o == IR_CNEWI) {
631 if (hisubst[ir->op2])
632 split_emit(J, IRT(IR_HIOP, IRT_NIL), nref, hisubst[ir->op2]);
633 } else if (ir->o == IR_LOOP) {
634 J->loopref = nref; /* Needed by assembler. */
636 hisubst[ref] = hi; /* Store hiword substitution. */
639 /* Add PHI marks. */
640 for (ref = J->cur.nins-1; ref >= REF_FIRST; ref--) {
641 IRIns *ir = IR(ref);
642 if (ir->o != IR_PHI) break;
643 if (!irref_isk(ir->op1)) irt_setphi(IR(ir->op1)->t);
644 if (ir->op2 > J->loopref) irt_setphi(IR(ir->op2)->t);
647 /* Substitute snapshot maps. */
648 oir[nins].prev = J->cur.nins; /* Substitution for last snapshot. */
650 SnapNo i, nsnap = J->cur.nsnap;
651 for (i = 0; i < nsnap; i++) {
652 SnapShot *snap = &J->cur.snap[i];
653 SnapEntry *map = &J->cur.snapmap[snap->mapofs];
654 MSize n, nent = snap->nent;
655 snap->ref = snap->ref == REF_FIRST ? REF_FIRST : oir[snap->ref].prev;
656 for (n = 0; n < nent; n++) {
657 SnapEntry sn = map[n];
658 IRIns *ir = &oir[snap_ref(sn)];
659 if (!(LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && irref_isk(snap_ref(sn))))
660 map[n] = ((sn & 0xffff0000) | ir->prev);
666 /* Protected callback for split pass. */
667 static TValue *cpsplit(lua_State *L, lua_CFunction dummy, void *ud)
669 jit_State *J = (jit_State *)ud;
670 split_ir(J);
671 UNUSED(L); UNUSED(dummy);
672 return NULL;
675 #if defined(LUA_USE_ASSERT) || LJ_SOFTFP
676 /* Slow, but sure way to check whether a SPLIT pass is needed. */
677 static int split_needsplit(jit_State *J)
679 IRIns *ir, *irend;
680 IRRef ref;
681 for (ir = IR(REF_FIRST), irend = IR(J->cur.nins); ir < irend; ir++)
682 if (LJ_SOFTFP ? irt_is64orfp(ir->t) : irt_isint64(ir->t))
683 return 1;
684 if (LJ_SOFTFP) {
685 for (ref = J->chain[IR_SLOAD]; ref; ref = IR(ref)->prev)
686 if ((IR(ref)->op2 & IRSLOAD_CONVERT))
687 return 1;
688 if (J->chain[IR_TOBIT])
689 return 1;
691 for (ref = J->chain[IR_CONV]; ref; ref = IR(ref)->prev) {
692 IRType st = (IR(ref)->op2 & IRCONV_SRCMASK);
693 if ((LJ_SOFTFP && (st == IRT_NUM || st == IRT_FLOAT)) ||
694 st == IRT_I64 || st == IRT_U64)
695 return 1;
697 return 0; /* Nope. */
699 #endif
701 /* SPLIT pass. */
702 void lj_opt_split(jit_State *J)
704 #if LJ_SOFTFP
705 if (!J->needsplit)
706 J->needsplit = split_needsplit(J);
707 #else
708 lua_assert(J->needsplit >= split_needsplit(J)); /* Verify flag. */
709 #endif
710 if (J->needsplit) {
711 int errcode = lj_vm_cpcall(J->L, NULL, J, cpsplit);
712 if (errcode) {
713 /* Completely reset the trace to avoid inconsistent dump on abort. */
714 J->cur.nins = J->cur.nk = REF_BASE;
715 J->cur.nsnap = 0;
716 lj_err_throw(J->L, errcode); /* Propagate errors. */
721 #undef IR
723 #endif