2 ** SPLIT: Split 64 bit IR instructions into 32 bit IR instructions.
3 ** Copyright (C) 2005-2011 Mike Pall. See Copyright Notice in luajit.h
11 #if LJ_HASJIT && LJ_HASFFI && LJ_32
22 ** This pass splits up 64 bit IR instructions into multiple 32 bit IR
23 ** instructions. It's only active for 32 bit CPUs which lack native 64 bit
24 ** operations. The FFI is currently the only emitter for 64 bit
25 ** instructions, so this pass is disabled if the FFI is disabled.
27 ** Splitting the IR in a separate pass keeps each 32 bit IR assembler
28 ** backend simple. Only a small amount of extra functionality needs to be
29 ** implemented. This is much easier than adding support for allocating
30 ** register pairs to each backend (believe me, I tried). A few simple, but
31 ** important optimizations can be performed by the SPLIT pass, which would
32 ** be tedious to do in the backend.
34 ** The basic idea is to replace each 64 bit IR instruction with its 32 bit
35 ** equivalent plus an extra HIOP instruction. The splitted IR is not passed
36 ** through FOLD or any other optimizations, so each HIOP is guaranteed to
37 ** immediately follow it's counterpart. The actual functionality of HIOP is
38 ** inferred from the previous instruction.
40 ** The operands of HIOP hold the hiword input references. The output of HIOP
41 ** is the hiword output reference, which is also used to hold the hiword
42 ** register or spill slot information. The register allocator treats this
43 ** instruction independent of any other instruction, which improves code
44 ** quality compared to using fixed register pairs.
46 ** It's easier to split up some instructions into two regular 32 bit
47 ** instructions. E.g. XLOAD is split up into two XLOADs with two different
48 ** addresses. Obviously 64 bit constants need to be split up into two 32 bit
49 ** constants, too. Some hiword instructions can be entirely omitted, e.g.
50 ** when zero-extending a 32 bit value to 64 bits.
52 ** Here's the IR and x64 machine code for 'x.b = x.a + 1' for a struct with
53 ** two int64_t fields:
55 ** 0100 p32 ADD base +8
56 ** 0101 i64 XLOAD 0100
57 ** 0102 i64 ADD 0101 +1
58 ** 0103 p32 ADD base +16
59 ** 0104 i64 XSTORE 0103 0102
63 ** mov [esi+0x10], rax
65 ** Here's the transformed IR and the x86 machine code after the SPLIT pass:
67 ** 0100 p32 ADD base +8
68 ** 0101 int XLOAD 0100
69 ** 0102 p32 ADD base +12
70 ** 0103 int XLOAD 0102
71 ** 0104 int ADD 0101 +1
72 ** 0105 int HIOP 0103 +0
73 ** 0106 p32 ADD base +16
74 ** 0107 int XSTORE 0106 0104
75 ** 0108 p32 ADD base +20
76 ** 0109 int XSTORE 0108 0105
82 ** mov [esi+0x10], eax
83 ** mov [esi+0x14], ecx
85 ** You may notice the reassociated hiword address computation, which is
86 ** later fused into the mov operands by the assembler.
89 /* Some local macros to save typing. Undef'd at the end. */
90 #define IR(ref) (&J->cur.ir[(ref)])
92 /* Directly emit the transformed IR without updating chains etc. */
93 static IRRef
split_emit(jit_State
*J
, uint16_t ot
, IRRef1 op1
, IRRef1 op2
)
95 IRRef nref
= lj_ir_nextins(J
);
103 /* Emit a CALLN with two split 64 bit arguments. */
104 static IRRef
split_call64(jit_State
*J
, IRRef1
*hisubst
, IRIns
*oir
,
105 IRIns
*ir
, IRCallID id
)
107 IRRef tmp
, op1
= ir
->op1
, op2
= ir
->op2
;
110 tmp
= split_emit(J
, IRT(IR_CARG
, IRT_NIL
), oir
[op1
].prev
, hisubst
[op1
]);
111 tmp
= split_emit(J
, IRT(IR_CARG
, IRT_NIL
), tmp
, oir
[op2
].prev
);
112 tmp
= split_emit(J
, IRT(IR_CARG
, IRT_NIL
), tmp
, hisubst
[op2
]);
114 tmp
= split_emit(J
, IRT(IR_CARG
, IRT_NIL
), hisubst
[op1
], oir
[op1
].prev
);
115 tmp
= split_emit(J
, IRT(IR_CARG
, IRT_NIL
), tmp
, hisubst
[op2
]);
116 tmp
= split_emit(J
, IRT(IR_CARG
, IRT_NIL
), tmp
, oir
[op2
].prev
);
118 ir
->prev
= tmp
= split_emit(J
, IRTI(IR_CALLN
), tmp
, id
);
119 return split_emit(J
, IRTI(IR_HIOP
), tmp
, tmp
);
122 /* Get a pointer to the other 32 bit word (LE: hiword, BE: loword). */
123 static IRRef
split_ptr(jit_State
*J
, IRRef ref
)
127 if (ir
->o
== IR_ADD
&& irref_isk(ir
->op2
)) { /* Reassociate address. */
128 ofs
+= IR(ir
->op2
)->i
;
130 if (ofs
== 0) return ref
;
132 return split_emit(J
, IRTI(IR_ADD
), ref
, lj_ir_kint(J
, ofs
));
135 /* Transform the old IR to the new IR. */
136 static void split_ir(jit_State
*J
)
138 IRRef nins
= J
->cur
.nins
, nk
= J
->cur
.nk
;
139 MSize irlen
= nins
- nk
;
140 MSize need
= (irlen
+1)*(sizeof(IRIns
) + sizeof(IRRef1
));
141 IRIns
*oir
= (IRIns
*)lj_str_needbuf(J
->L
, &G(J
->L
)->tmpbuf
, need
);
145 /* Copy old IR to buffer. */
146 memcpy(oir
, IR(nk
), irlen
*sizeof(IRIns
));
147 /* Bias hiword substitution table and old IR. Loword kept in field prev. */
148 hisubst
= (IRRef1
*)&oir
[irlen
] - nk
;
151 /* Remove all IR instructions, but retain IR constants. */
152 J
->cur
.nins
= REF_FIRST
;
154 /* Process constants and fixed references. */
155 for (ref
= nk
; ref
<= REF_BASE
; ref
++) {
156 IRIns
*ir
= &oir
[ref
];
157 if (ir
->o
== IR_KINT64
) { /* Split up 64 bit constant. */
158 TValue tv
= *ir_k64(ir
);
159 ir
->prev
= lj_ir_kint(J
, (int32_t)tv
.u32
.lo
);
160 hisubst
[ref
] = lj_ir_kint(J
, (int32_t)tv
.u32
.hi
);
162 ir
->prev
= (IRRef1
)ref
; /* Identity substitution for loword. */
166 /* Process old IR instructions. */
167 for (ref
= REF_FIRST
; ref
< nins
; ref
++) {
168 IRIns
*ir
= &oir
[ref
];
169 IRRef nref
= lj_ir_nextins(J
);
170 IRIns
*nir
= IR(nref
);
172 /* Copy-substitute old instruction to new instruction. */
173 nir
->op1
= ir
->op1
< nk
? ir
->op1
: oir
[ir
->op1
].prev
;
174 nir
->op2
= ir
->op2
< nk
? ir
->op2
: oir
[ir
->op2
].prev
;
175 ir
->prev
= nref
; /* Loword substitution. */
177 nir
->t
.irt
= ir
->t
.irt
& ~(IRT_MARK
|IRT_ISPHI
);
179 /* Split 64 bit instructions. */
180 if (irt_isint64(ir
->t
)) {
181 IRRef hi
= hisubst
[ir
->op1
];
182 nir
->t
.irt
= IRT_INT
| (nir
->t
.irt
& IRT_GUARD
); /* Turn into INT op. */
186 /* Use plain op for hiword if loword cannot produce a carry/borrow. */
187 if (irref_isk(nir
->op2
) && IR(nir
->op2
)->i
== 0) {
188 ir
->prev
= nir
->op1
; /* Pass through loword. */
189 nir
->op1
= hi
; nir
->op2
= hisubst
[ir
->op2
];
195 hi
= split_emit(J
, IRTI(IR_HIOP
), hi
, hisubst
[ir
->op2
]);
198 hi
= split_call64(J
, hisubst
, oir
, ir
, IRCALL_lj_carith_mul64
);
201 hi
= split_call64(J
, hisubst
, oir
, ir
,
202 irt_isi64(ir
->t
) ? IRCALL_lj_carith_divi64
:
203 IRCALL_lj_carith_divu64
);
206 hi
= split_call64(J
, hisubst
, oir
, ir
,
207 irt_isi64(ir
->t
) ? IRCALL_lj_carith_modi64
:
208 IRCALL_lj_carith_modu64
);
211 hi
= split_call64(J
, hisubst
, oir
, ir
,
212 irt_isi64(ir
->t
) ? IRCALL_lj_carith_powi64
:
213 IRCALL_lj_carith_powu64
);
216 hi
= split_emit(J
, IRTI(IR_XLOAD
), split_ptr(J
, nir
->op1
), ir
->op2
);
218 ir
->prev
= hi
; hi
= nref
;
223 hi
= hisubst
[ir
->op2
];
225 hi
= nir
->op2
; nir
->op2
= hisubst
[ir
->op2
];
227 split_emit(J
, IRTI(IR_XSTORE
), split_ptr(J
, nir
->op1
), hi
);
229 case IR_CONV
: { /* Conversion to 64 bit integer. Others handled below. */
230 IRType st
= (IRType
)(ir
->op2
& IRCONV_SRCMASK
);
231 if (st
== IRT_NUM
|| st
== IRT_FLOAT
) { /* FP to 64 bit int conv. */
232 hi
= split_emit(J
, IRTI(IR_HIOP
), nir
->op1
, nref
);
233 } else if (st
== IRT_I64
|| st
== IRT_U64
) { /* 64/64 bit cast. */
234 /* Drop cast, since assembler doesn't care. */
237 } else if ((ir
->op2
& IRCONV_SEXT
)) { /* Sign-extend to 64 bit. */
238 IRRef k31
= lj_ir_kint(J
, 31);
239 nir
= IR(nref
); /* May have been reallocated. */
240 ir
->prev
= nir
->op1
; /* Pass through loword. */
241 nir
->o
= IR_BSAR
; /* hi = bsar(lo, 31). */
244 } else { /* Zero-extend to 64 bit. */
245 hisubst
[ref
] = lj_ir_kint(J
, 0);
252 if ((irref_isk(nir
->op1
) && irref_isk(nir
->op2
)) ||
253 nir
->op1
== nir
->op2
)
254 J
->cur
.nins
--; /* Drop useless PHIs. */
255 hi2
= hisubst
[ir
->op2
];
256 if (!((irref_isk(hi
) && irref_isk(hi2
)) || hi
== hi2
))
257 split_emit(J
, IRTI(IR_PHI
), hi
, hi2
);
261 lua_assert(ir
->o
<= IR_NE
);
262 split_emit(J
, IRTGI(IR_HIOP
), hi
, hisubst
[ir
->op2
]); /* Comparisons. */
265 hisubst
[ref
] = hi
; /* Store hiword substitution. */
266 } else if (ir
->o
== IR_CONV
) { /* See above, too. */
267 IRType st
= (IRType
)(ir
->op2
& IRCONV_SRCMASK
);
268 if (st
== IRT_I64
|| st
== IRT_U64
) { /* Conversion from 64 bit int. */
269 if (irt_isfp(ir
->t
)) { /* 64 bit integer to FP conversion. */
270 ir
->prev
= split_emit(J
, IRT(IR_HIOP
, irt_type(ir
->t
)),
271 hisubst
[ir
->op1
], nref
);
272 } else { /* Truncate to lower 32 bits. */
274 ir
->prev
= nir
->op1
; /* Forward loword. */
275 /* Replace with NOP to avoid messing up the snapshot logic. */
276 nir
->ot
= IRT(IR_NOP
, IRT_NIL
);
277 nir
->op1
= nir
->op2
= 0;
280 } else if (ir
->o
== IR_LOOP
) {
281 J
->loopref
= nref
; /* Needed by assembler. */
286 for (ref
= J
->cur
.nins
-1; ref
>= REF_FIRST
; ref
--) {
288 if (ir
->o
!= IR_PHI
) break;
289 if (!irref_isk(ir
->op1
)) irt_setphi(IR(ir
->op1
)->t
);
290 if (ir
->op2
> J
->loopref
) irt_setphi(IR(ir
->op2
)->t
);
293 /* Substitute snapshot maps. */
294 oir
[nins
].prev
= J
->cur
.nins
; /* Substitution for last snapshot. */
296 SnapNo i
, nsnap
= J
->cur
.nsnap
;
297 for (i
= 0; i
< nsnap
; i
++) {
298 SnapShot
*snap
= &J
->cur
.snap
[i
];
299 SnapEntry
*map
= &J
->cur
.snapmap
[snap
->mapofs
];
300 MSize n
, nent
= snap
->nent
;
301 snap
->ref
= oir
[snap
->ref
].prev
;
302 for (n
= 0; n
< nent
; n
++) {
303 SnapEntry sn
= map
[n
];
304 map
[n
] = ((sn
& 0xffff0000) | oir
[snap_ref(sn
)].prev
);
310 /* Protected callback for split pass. */
311 static TValue
*cpsplit(lua_State
*L
, lua_CFunction dummy
, void *ud
)
313 jit_State
*J
= (jit_State
*)ud
;
315 UNUSED(L
); UNUSED(dummy
);
319 #ifdef LUA_USE_ASSERT
320 /* Slow, but sure way to check whether a SPLIT pass is needed. */
321 static int split_needsplit(jit_State
*J
)
325 for (ir
= IR(REF_FIRST
), irend
= IR(J
->cur
.nins
); ir
< irend
; ir
++)
326 if (irt_isint64(ir
->t
))
328 for (ref
= J
->chain
[IR_CONV
]; ref
; ref
= IR(ref
)->prev
)
329 if ((IR(ref
)->op2
& IRCONV_SRCMASK
) == IRT_I64
||
330 (IR(ref
)->op2
& IRCONV_SRCMASK
) == IRT_U64
)
332 return 0; /* Nope. */
337 void lj_opt_split(jit_State
*J
)
339 lua_assert(J
->needsplit
>= split_needsplit(J
)); /* Verify flag. */
341 int errcode
= lj_vm_cpcall(J
->L
, NULL
, J
, cpsplit
);
343 /* Completely reset the trace to avoid inconsistent dump on abort. */
344 J
->cur
.nins
= J
->cur
.nk
= REF_BASE
;
346 lj_err_throw(J
->L
, errcode
); /* Propagate errors. */