Add IR_XBAR, a barrier against XLOAD/XSTORE optimizations.
[luajit-2.0.git] / src / lj_opt_fold.c
blobe5ed7adee349132a6855b304e5706a0b112d6476
1 /*
2 ** FOLD: Constant Folding, Algebraic Simplifications and Reassociation.
3 ** ABCelim: Array Bounds Check Elimination.
4 ** CSE: Common-Subexpression Elimination.
5 ** Copyright (C) 2005-2011 Mike Pall. See Copyright Notice in luajit.h
6 */
8 #define lj_opt_fold_c
9 #define LUA_CORE
11 #include "lj_obj.h"
13 #if LJ_HASJIT
15 #include "lj_str.h"
16 #include "lj_tab.h"
17 #include "lj_ir.h"
18 #include "lj_jit.h"
19 #include "lj_iropt.h"
20 #include "lj_trace.h"
21 #include "lj_carith.h"
22 #include "lj_vm.h"
24 /* Here's a short description how the FOLD engine processes instructions:
26 ** The FOLD engine receives a single instruction stored in fins (J->fold.ins).
27 ** The instruction and its operands are used to select matching fold rules.
28 ** These are applied iteratively until a fixed point is reached.
30 ** The 8 bit opcode of the instruction itself plus the opcodes of the
31 ** two instructions referenced by its operands form a 24 bit key
32 ** 'ins left right' (unused operands -> 0, literals -> lowest 8 bits).
34 ** This key is used for partial matching against the fold rules. The
35 ** left/right operand fields of the key are successively masked with
36 ** the 'any' wildcard, from most specific to least specific:
38 ** ins left right
39 ** ins any right
40 ** ins left any
41 ** ins any any
43 ** The masked key is used to lookup a matching fold rule in a semi-perfect
44 ** hash table. If a matching rule is found, the related fold function is run.
45 ** Multiple rules can share the same fold function. A fold rule may return
46 ** one of several special values:
48 ** - NEXTFOLD means no folding was applied, because an additional test
49 ** inside the fold function failed. Matching continues against less
50 ** specific fold rules. Finally the instruction is passed on to CSE.
52 ** - RETRYFOLD means the instruction was modified in-place. Folding is
53 ** retried as if this instruction had just been received.
55 ** All other return values are terminal actions -- no further folding is
56 ** applied:
58 ** - INTFOLD(i) returns a reference to the integer constant i.
60 ** - LEFTFOLD and RIGHTFOLD return the left/right operand reference
61 ** without emitting an instruction.
63 ** - CSEFOLD and EMITFOLD pass the instruction directly to CSE or emit
64 ** it without passing through any further optimizations.
66 ** - FAILFOLD, DROPFOLD and CONDFOLD only apply to instructions which have
67 ** no result (e.g. guarded assertions): FAILFOLD means the guard would
68 ** always fail, i.e. the current trace is pointless. DROPFOLD means
69 ** the guard is always true and has been eliminated. CONDFOLD is a
70 ** shortcut for FAILFOLD + cond (i.e. drop if true, otherwise fail).
72 ** - Any other return value is interpreted as an IRRef or TRef. This
73 ** can be a reference to an existing or a newly created instruction.
74 ** Only the least-significant 16 bits (IRRef1) are used to form a TRef
75 ** which is finally returned to the caller.
77 ** The FOLD engine receives instructions both from the trace recorder and
78 ** substituted instructions from LOOP unrolling. This means all types
79 ** of instructions may end up here, even though the recorder bypasses
80 ** FOLD in some cases. Thus all loads, stores and allocations must have
81 ** an any/any rule to avoid being passed on to CSE.
83 ** Carefully read the following requirements before adding or modifying
84 ** any fold rules:
86 ** Requirement #1: All fold rules must preserve their destination type.
88 ** Consistently use INTFOLD() (KINT result) or lj_ir_knum() (KNUM result).
89 ** Never use lj_ir_knumint() which can have either a KINT or KNUM result.
91 ** Requirement #2: Fold rules should not create *new* instructions which
92 ** reference operands *across* PHIs.
94 ** E.g. a RETRYFOLD with 'fins->op1 = fleft->op1' is invalid if the
95 ** left operand is a PHI. Then fleft->op1 would point across the PHI
96 ** frontier to an invariant instruction. Adding a PHI for this instruction
97 ** would be counterproductive. The solution is to add a barrier which
98 ** prevents folding across PHIs, i.e. 'PHIBARRIER(fleft)' in this case.
99 ** The only exception is for recurrences with high latencies like
100 ** repeated int->num->int conversions.
102 ** One could relax this condition a bit if the referenced instruction is
103 ** a PHI, too. But this often leads to worse code due to excessive
104 ** register shuffling.
106 ** Note: returning *existing* instructions (e.g. LEFTFOLD) is ok, though.
107 ** Even returning fleft->op1 would be ok, because a new PHI will added,
108 ** if needed. But again, this leads to excessive register shuffling and
109 ** should be avoided.
111 ** Requirement #3: The set of all fold rules must be monotonic to guarantee
112 ** termination.
114 ** The goal is optimization, so one primarily wants to add strength-reducing
115 ** rules. This means eliminating an instruction or replacing an instruction
116 ** with one or more simpler instructions. Don't add fold rules which point
117 ** into the other direction.
119 ** Some rules (like commutativity) do not directly reduce the strength of
120 ** an instruction, but enable other fold rules (e.g. by moving constants
121 ** to the right operand). These rules must be made unidirectional to avoid
122 ** cycles.
124 ** Rule of thumb: the trace recorder expands the IR and FOLD shrinks it.
127 /* Some local macros to save typing. Undef'd at the end. */
128 #define IR(ref) (&J->cur.ir[(ref)])
129 #define fins (&J->fold.ins)
130 #define fleft (&J->fold.left)
131 #define fright (&J->fold.right)
132 #define knumleft (ir_knum(fleft)->n)
133 #define knumright (ir_knum(fright)->n)
135 /* Pass IR on to next optimization in chain (FOLD). */
136 #define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
138 /* Fold function type. Fastcall on x86 significantly reduces their size. */
139 typedef IRRef (LJ_FASTCALL *FoldFunc)(jit_State *J);
141 /* Macros for the fold specs, so buildvm can recognize them. */
142 #define LJFOLD(x)
143 #define LJFOLDX(x)
144 #define LJFOLDF(name) static TRef LJ_FASTCALL fold_##name(jit_State *J)
145 /* Note: They must be at the start of a line or buildvm ignores them! */
147 /* Barrier to prevent using operands across PHIs. */
148 #define PHIBARRIER(ir) if (irt_isphi((ir)->t)) return NEXTFOLD
150 /* Barrier to prevent folding across a GC step.
151 ** GC steps can only happen at the head of a trace and at LOOP.
152 ** And the GC is only driven forward if there is at least one allocation.
154 #define gcstep_barrier(J, ref) \
155 ((ref) < J->chain[IR_LOOP] && \
156 (J->chain[IR_SNEW] || J->chain[IR_TNEW] || J->chain[IR_TDUP] || \
157 J->chain[IR_CNEW] || J->chain[IR_CNEWI] || J->chain[IR_TOSTR]))
159 /* -- Constant folding for FP numbers ------------------------------------- */
161 LJFOLD(ADD KNUM KNUM)
162 LJFOLD(SUB KNUM KNUM)
163 LJFOLD(MUL KNUM KNUM)
164 LJFOLD(DIV KNUM KNUM)
165 LJFOLD(NEG KNUM KNUM)
166 LJFOLD(ABS KNUM KNUM)
167 LJFOLD(ATAN2 KNUM KNUM)
168 LJFOLD(LDEXP KNUM KNUM)
169 LJFOLD(MIN KNUM KNUM)
170 LJFOLD(MAX KNUM KNUM)
171 LJFOLDF(kfold_numarith)
173 lua_Number a = knumleft;
174 lua_Number b = knumright;
175 lua_Number y = lj_vm_foldarith(a, b, fins->o - IR_ADD);
176 return lj_ir_knum(J, y);
179 LJFOLD(FPMATH KNUM any)
180 LJFOLDF(kfold_fpmath)
182 lua_Number a = knumleft;
183 lua_Number y = lj_vm_foldfpm(a, fins->op2);
184 return lj_ir_knum(J, y);
187 LJFOLD(POW KNUM KINT)
188 LJFOLDF(kfold_numpow)
190 lua_Number a = knumleft;
191 lua_Number b = cast_num(fright->i);
192 lua_Number y = lj_vm_foldarith(a, b, IR_POW - IR_ADD);
193 return lj_ir_knum(J, y);
196 /* Must not use kfold_kref for numbers (could be NaN). */
197 LJFOLD(EQ KNUM KNUM)
198 LJFOLD(NE KNUM KNUM)
199 LJFOLD(LT KNUM KNUM)
200 LJFOLD(GE KNUM KNUM)
201 LJFOLD(LE KNUM KNUM)
202 LJFOLD(GT KNUM KNUM)
203 LJFOLD(ULT KNUM KNUM)
204 LJFOLD(UGE KNUM KNUM)
205 LJFOLD(ULE KNUM KNUM)
206 LJFOLD(UGT KNUM KNUM)
207 LJFOLDF(kfold_numcomp)
209 return CONDFOLD(lj_ir_numcmp(knumleft, knumright, (IROp)fins->o));
212 /* -- Constant folding for 32 bit integers -------------------------------- */
214 static int32_t kfold_intop(int32_t k1, int32_t k2, IROp op)
216 switch (op) {
217 case IR_ADD: k1 += k2; break;
218 case IR_SUB: k1 -= k2; break;
219 case IR_MUL: k1 *= k2; break;
220 case IR_BAND: k1 &= k2; break;
221 case IR_BOR: k1 |= k2; break;
222 case IR_BXOR: k1 ^= k2; break;
223 case IR_BSHL: k1 <<= (k2 & 31); break;
224 case IR_BSHR: k1 = (int32_t)((uint32_t)k1 >> (k2 & 31)); break;
225 case IR_BSAR: k1 >>= (k2 & 31); break;
226 case IR_BROL: k1 = (int32_t)lj_rol((uint32_t)k1, (k2 & 31)); break;
227 case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 31)); break;
228 default: lua_assert(0); break;
230 return k1;
233 LJFOLD(ADD KINT KINT)
234 LJFOLD(SUB KINT KINT)
235 LJFOLD(MUL KINT KINT)
236 LJFOLD(BAND KINT KINT)
237 LJFOLD(BOR KINT KINT)
238 LJFOLD(BXOR KINT KINT)
239 LJFOLD(BSHL KINT KINT)
240 LJFOLD(BSHR KINT KINT)
241 LJFOLD(BSAR KINT KINT)
242 LJFOLD(BROL KINT KINT)
243 LJFOLD(BROR KINT KINT)
244 LJFOLDF(kfold_intarith)
246 return INTFOLD(kfold_intop(fleft->i, fright->i, (IROp)fins->o));
249 LJFOLD(BNOT KINT)
250 LJFOLDF(kfold_bnot)
252 return INTFOLD(~fleft->i);
255 LJFOLD(BSWAP KINT)
256 LJFOLDF(kfold_bswap)
258 return INTFOLD((int32_t)lj_bswap((uint32_t)fleft->i));
261 LJFOLD(LT KINT KINT)
262 LJFOLD(GE KINT KINT)
263 LJFOLD(LE KINT KINT)
264 LJFOLD(GT KINT KINT)
265 LJFOLD(ULT KINT KINT)
266 LJFOLD(UGE KINT KINT)
267 LJFOLD(ULE KINT KINT)
268 LJFOLD(UGT KINT KINT)
269 LJFOLD(ABC KINT KINT)
270 LJFOLDF(kfold_intcomp)
272 int32_t a = fleft->i, b = fright->i;
273 switch ((IROp)fins->o) {
274 case IR_LT: return CONDFOLD(a < b);
275 case IR_GE: return CONDFOLD(a >= b);
276 case IR_LE: return CONDFOLD(a <= b);
277 case IR_GT: return CONDFOLD(a > b);
278 case IR_ULT: return CONDFOLD((uint32_t)a < (uint32_t)b);
279 case IR_UGE: return CONDFOLD((uint32_t)a >= (uint32_t)b);
280 case IR_ULE: return CONDFOLD((uint32_t)a <= (uint32_t)b);
281 case IR_ABC:
282 case IR_UGT: return CONDFOLD((uint32_t)a > (uint32_t)b);
283 default: lua_assert(0); return FAILFOLD;
287 LJFOLD(UGE any KINT)
288 LJFOLDF(kfold_intcomp0)
290 if (fright->i == 0)
291 return DROPFOLD;
292 return NEXTFOLD;
295 /* -- Constant folding for 64 bit integers -------------------------------- */
297 static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op)
299 switch (op) {
300 #if LJ_64 || LJ_HASFFI
301 case IR_ADD: k1 += k2; break;
302 case IR_SUB: k1 -= k2; break;
303 #endif
304 #if LJ_HASFFI
305 case IR_MUL: k1 *= k2; break;
306 case IR_BAND: k1 &= k2; break;
307 case IR_BOR: k1 |= k2; break;
308 case IR_BXOR: k1 ^= k2; break;
309 #endif
310 default: UNUSED(k2); lua_assert(0); break;
312 return k1;
315 LJFOLD(ADD KINT64 KINT64)
316 LJFOLD(SUB KINT64 KINT64)
317 LJFOLD(MUL KINT64 KINT64)
318 LJFOLD(BAND KINT64 KINT64)
319 LJFOLD(BOR KINT64 KINT64)
320 LJFOLD(BXOR KINT64 KINT64)
321 LJFOLDF(kfold_int64arith)
323 return INT64FOLD(kfold_int64arith(ir_k64(fleft)->u64,
324 ir_k64(fright)->u64, (IROp)fins->o));
327 LJFOLD(DIV KINT64 KINT64)
328 LJFOLD(MOD KINT64 KINT64)
329 LJFOLD(POW KINT64 KINT64)
330 LJFOLDF(kfold_int64arith2)
332 #if LJ_HASFFI
333 uint64_t k1 = ir_k64(fleft)->u64, k2 = ir_k64(fright)->u64;
334 if (irt_isi64(fins->t)) {
335 k1 = fins->o == IR_DIV ? lj_carith_divi64((int64_t)k1, (int64_t)k2) :
336 fins->o == IR_MOD ? lj_carith_modi64((int64_t)k1, (int64_t)k2) :
337 lj_carith_powi64((int64_t)k1, (int64_t)k2);
338 } else {
339 k1 = fins->o == IR_DIV ? lj_carith_divu64(k1, k2) :
340 fins->o == IR_MOD ? lj_carith_modu64(k1, k2) :
341 lj_carith_powu64(k1, k2);
343 return INT64FOLD(k1);
344 #else
345 UNUSED(J); lua_assert(0); return FAILFOLD;
346 #endif
349 LJFOLD(BSHL KINT64 KINT)
350 LJFOLD(BSHR KINT64 KINT)
351 LJFOLD(BSAR KINT64 KINT)
352 LJFOLD(BROL KINT64 KINT)
353 LJFOLD(BROR KINT64 KINT)
354 LJFOLDF(kfold_int64shift)
356 #if LJ_HASFFI || LJ_64
357 uint64_t k = ir_k64(fleft)->u64;
358 int32_t sh = (fright->i & 63);
359 switch ((IROp)fins->o) {
360 case IR_BSHL: k <<= sh; break;
361 #if LJ_HASFFI
362 case IR_BSHR: k >>= sh; break;
363 case IR_BSAR: k = (uint64_t)((int64_t)k >> sh); break;
364 case IR_BROL: k = lj_rol(k, sh); break;
365 case IR_BROR: k = lj_ror(k, sh); break;
366 #endif
367 default: lua_assert(0); break;
369 return INT64FOLD(k);
370 #else
371 UNUSED(J); lua_assert(0); return FAILFOLD;
372 #endif
375 LJFOLD(BNOT KINT64)
376 LJFOLDF(kfold_bnot64)
378 #if LJ_HASFFI
379 return INT64FOLD(~ir_k64(fleft)->u64);
380 #else
381 UNUSED(J); lua_assert(0); return FAILFOLD;
382 #endif
385 LJFOLD(BSWAP KINT64)
386 LJFOLDF(kfold_bswap64)
388 #if LJ_HASFFI
389 return INT64FOLD(lj_bswap64(ir_k64(fleft)->u64));
390 #else
391 UNUSED(J); lua_assert(0); return FAILFOLD;
392 #endif
395 LJFOLD(LT KINT64 KINT)
396 LJFOLD(GE KINT64 KINT)
397 LJFOLD(LE KINT64 KINT)
398 LJFOLD(GT KINT64 KINT)
399 LJFOLD(ULT KINT64 KINT)
400 LJFOLD(UGE KINT64 KINT)
401 LJFOLD(ULE KINT64 KINT)
402 LJFOLD(UGT KINT64 KINT)
403 LJFOLDF(kfold_int64comp)
405 #if LJ_HASFFI
406 uint64_t a = ir_k64(fleft)->u64, b = ir_k64(fright)->u64;
407 switch ((IROp)fins->o) {
408 case IR_LT: return CONDFOLD(a < b);
409 case IR_GE: return CONDFOLD(a >= b);
410 case IR_LE: return CONDFOLD(a <= b);
411 case IR_GT: return CONDFOLD(a > b);
412 case IR_ULT: return CONDFOLD((uint64_t)a < (uint64_t)b);
413 case IR_UGE: return CONDFOLD((uint64_t)a >= (uint64_t)b);
414 case IR_ULE: return CONDFOLD((uint64_t)a <= (uint64_t)b);
415 case IR_UGT: return CONDFOLD((uint64_t)a > (uint64_t)b);
416 default: lua_assert(0); return FAILFOLD;
418 #else
419 UNUSED(J); lua_assert(0); return FAILFOLD;
420 #endif
423 LJFOLD(UGE any KINT64)
424 LJFOLDF(kfold_int64comp0)
426 #if LJ_HASFFI
427 if (ir_k64(fright)->u64 == 0)
428 return DROPFOLD;
429 return NEXTFOLD;
430 #else
431 UNUSED(J); lua_assert(0); return FAILFOLD;
432 #endif
435 /* -- Constant folding for strings ---------------------------------------- */
437 LJFOLD(SNEW KKPTR KINT)
438 LJFOLDF(kfold_snew_kptr)
440 GCstr *s = lj_str_new(J->L, (const char *)ir_kptr(fleft), (size_t)fright->i);
441 return lj_ir_kstr(J, s);
444 LJFOLD(SNEW any KINT)
445 LJFOLDF(kfold_snew_empty)
447 if (fright->i == 0)
448 return lj_ir_kstr(J, lj_str_new(J->L, "", 0));
449 return NEXTFOLD;
452 LJFOLD(STRREF KGC KINT)
453 LJFOLDF(kfold_strref)
455 GCstr *str = ir_kstr(fleft);
456 lua_assert((MSize)fright->i < str->len);
457 return lj_ir_kkptr(J, (char *)strdata(str) + fright->i);
460 LJFOLD(STRREF SNEW any)
461 LJFOLDF(kfold_strref_snew)
463 PHIBARRIER(fleft);
464 if (irref_isk(fins->op2) && fright->i == 0) {
465 return fleft->op1; /* strref(snew(ptr, len), 0) ==> ptr */
466 } else {
467 /* Reassociate: strref(snew(strref(str, a), len), b) ==> strref(str, a+b) */
468 IRIns *ir = IR(fleft->op1);
469 IRRef1 str = ir->op1; /* IRIns * is not valid across emitir. */
470 lua_assert(ir->o == IR_STRREF);
471 PHIBARRIER(ir);
472 fins->op2 = emitir(IRTI(IR_ADD), ir->op2, fins->op2); /* Clobbers fins! */
473 fins->op1 = str;
474 fins->ot = IRT(IR_STRREF, IRT_P32);
475 return RETRYFOLD;
477 return NEXTFOLD;
480 LJFOLD(CALLN CARG IRCALL_lj_str_cmp)
481 LJFOLDF(kfold_strcmp)
483 if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) {
484 GCstr *a = ir_kstr(IR(fleft->op1));
485 GCstr *b = ir_kstr(IR(fleft->op2));
486 return INTFOLD(lj_str_cmp(a, b));
488 return NEXTFOLD;
491 /* -- Constant folding of pointer arithmetic ------------------------------ */
493 LJFOLD(ADD KGC KINT)
494 LJFOLD(ADD KGC KINT64)
495 LJFOLDF(kfold_add_kgc)
497 GCobj *o = ir_kgc(fleft);
498 #if LJ_64
499 ptrdiff_t ofs = (ptrdiff_t)ir_kint64(fright)->u64;
500 #else
501 ptrdiff_t ofs = fright->i;
502 #endif
503 return lj_ir_kkptr(J, (char *)o + ofs);
506 LJFOLD(ADD KPTR KINT)
507 LJFOLD(ADD KPTR KINT64)
508 LJFOLD(ADD KKPTR KINT)
509 LJFOLD(ADD KKPTR KINT64)
510 LJFOLDF(kfold_add_kptr)
512 void *p = ir_kptr(fleft);
513 #if LJ_64
514 ptrdiff_t ofs = (ptrdiff_t)ir_kint64(fright)->u64;
515 #else
516 ptrdiff_t ofs = fright->i;
517 #endif
518 return lj_ir_kptr_(J, fleft->o, (char *)p + ofs);
521 /* -- Constant folding of conversions ------------------------------------- */
523 LJFOLD(TOBIT KNUM KNUM)
524 LJFOLDF(kfold_tobit)
526 TValue tv;
527 tv.n = knumleft + knumright;
528 return INTFOLD((int32_t)tv.u32.lo);
531 LJFOLD(CONV KINT IRCONV_NUM_INT)
532 LJFOLDF(kfold_conv_kint_num)
534 return lj_ir_knum(J, cast_num(fleft->i));
537 LJFOLD(CONV KINT IRCONV_NUM_U32)
538 LJFOLDF(kfold_conv_kintu32_num)
540 return lj_ir_knum(J, cast_num((uint32_t)fleft->i));
543 LJFOLD(CONV KINT IRCONV_I64_INT)
544 LJFOLD(CONV KINT IRCONV_U64_INT)
545 LJFOLDF(kfold_conv_kint_i64)
547 return INT64FOLD((uint64_t)(int64_t)fleft->i);
550 LJFOLD(CONV KINT64 IRCONV_NUM_I64)
551 LJFOLDF(kfold_conv_kint64_num_i64)
553 return lj_ir_knum(J, cast_num((int64_t)ir_kint64(fleft)->u64));
556 LJFOLD(CONV KINT64 IRCONV_NUM_U64)
557 LJFOLDF(kfold_conv_kint64_num_u64)
559 return lj_ir_knum(J, cast_num(ir_kint64(fleft)->u64));
562 LJFOLD(CONV KINT64 IRCONV_INT_I64)
563 LJFOLD(CONV KINT64 IRCONV_U32_I64)
564 LJFOLDF(kfold_conv_kint64_int_i64)
566 return INTFOLD((int32_t)ir_kint64(fleft)->u64);
569 LJFOLD(CONV KNUM IRCONV_INT_NUM)
570 LJFOLDF(kfold_conv_knum_int_num)
572 lua_Number n = knumleft;
573 if (!(fins->op2 & IRCONV_TRUNC)) {
574 int32_t k = lj_num2int(n);
575 if (irt_isguard(fins->t) && n != cast_num(k)) {
576 /* We're about to create a guard which always fails, like CONV +1.5.
577 ** Some pathological loops cause this during LICM, e.g.:
578 ** local x,k,t = 0,1.5,{1,[1.5]=2}
579 ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end
580 ** assert(x == 300)
582 return FAILFOLD;
584 return INTFOLD(k);
585 } else {
586 return INTFOLD((int32_t)n);
590 LJFOLD(CONV KNUM IRCONV_U32_NUM)
591 LJFOLDF(kfold_conv_knum_u32_num)
593 lua_assert((fins->op2 & IRCONV_TRUNC));
594 return INTFOLD((int32_t)(uint32_t)knumleft);
597 LJFOLD(CONV KNUM IRCONV_I64_NUM)
598 LJFOLDF(kfold_conv_knum_i64_num)
600 lua_assert((fins->op2 & IRCONV_TRUNC));
601 return INT64FOLD((uint64_t)(int64_t)knumleft);
604 LJFOLD(CONV KNUM IRCONV_U64_NUM)
605 LJFOLDF(kfold_conv_knum_u64_num)
607 lua_assert((fins->op2 & IRCONV_TRUNC));
608 return INT64FOLD(lj_num2u64(knumleft));
611 LJFOLD(TOSTR KNUM)
612 LJFOLDF(kfold_tostr_knum)
614 return lj_ir_kstr(J, lj_str_fromnum(J->L, &knumleft));
617 LJFOLD(TOSTR KINT)
618 LJFOLDF(kfold_tostr_kint)
620 return lj_ir_kstr(J, lj_str_fromint(J->L, fleft->i));
623 LJFOLD(STRTO KGC)
624 LJFOLDF(kfold_strto)
626 TValue n;
627 if (lj_str_tonum(ir_kstr(fleft), &n))
628 return lj_ir_knum(J, numV(&n));
629 return FAILFOLD;
632 /* -- Constant folding of equality checks --------------------------------- */
634 /* Don't constant-fold away FLOAD checks against KNULL. */
635 LJFOLD(EQ FLOAD KNULL)
636 LJFOLD(NE FLOAD KNULL)
637 LJFOLDX(lj_opt_cse)
639 /* But fold all other KNULL compares, since only KNULL is equal to KNULL. */
640 LJFOLD(EQ any KNULL)
641 LJFOLD(NE any KNULL)
642 LJFOLD(EQ KNULL any)
643 LJFOLD(NE KNULL any)
644 LJFOLD(EQ KINT KINT) /* Constants are unique, so same refs <==> same value. */
645 LJFOLD(NE KINT KINT)
646 LJFOLD(EQ KINT64 KINT64)
647 LJFOLD(NE KINT64 KINT64)
648 LJFOLD(EQ KGC KGC)
649 LJFOLD(NE KGC KGC)
650 LJFOLDF(kfold_kref)
652 return CONDFOLD((fins->op1 == fins->op2) ^ (fins->o == IR_NE));
655 /* -- Algebraic shortcuts ------------------------------------------------- */
657 LJFOLD(FPMATH FPMATH IRFPM_FLOOR)
658 LJFOLD(FPMATH FPMATH IRFPM_CEIL)
659 LJFOLD(FPMATH FPMATH IRFPM_TRUNC)
660 LJFOLDF(shortcut_round)
662 IRFPMathOp op = (IRFPMathOp)fleft->op2;
663 if (op == IRFPM_FLOOR || op == IRFPM_CEIL || op == IRFPM_TRUNC)
664 return LEFTFOLD; /* round(round_left(x)) = round_left(x) */
665 return NEXTFOLD;
668 LJFOLD(ABS ABS KNUM)
669 LJFOLDF(shortcut_left)
671 return LEFTFOLD; /* f(g(x)) ==> g(x) */
674 LJFOLD(ABS NEG KNUM)
675 LJFOLDF(shortcut_dropleft)
677 PHIBARRIER(fleft);
678 fins->op1 = fleft->op1; /* abs(neg(x)) ==> abs(x) */
679 return RETRYFOLD;
682 /* Note: no safe shortcuts with STRTO and TOSTR ("1e2" ==> +100 ==> "100"). */
683 LJFOLD(NEG NEG KNUM)
684 LJFOLD(BNOT BNOT)
685 LJFOLD(BSWAP BSWAP)
686 LJFOLDF(shortcut_leftleft)
688 PHIBARRIER(fleft); /* See above. Fold would be ok, but not beneficial. */
689 return fleft->op1; /* f(g(x)) ==> x */
692 /* -- FP algebraic simplifications ---------------------------------------- */
694 /* FP arithmetic is tricky -- there's not much to simplify.
695 ** Please note the following common pitfalls before sending "improvements":
696 ** x+0 ==> x is INVALID for x=-0
697 ** 0-x ==> -x is INVALID for x=+0
698 ** x*0 ==> 0 is INVALID for x=-0, x=+-Inf or x=NaN
701 LJFOLD(ADD NEG any)
702 LJFOLDF(simplify_numadd_negx)
704 PHIBARRIER(fleft);
705 fins->o = IR_SUB; /* (-a) + b ==> b - a */
706 fins->op1 = fins->op2;
707 fins->op2 = fleft->op1;
708 return RETRYFOLD;
711 LJFOLD(ADD any NEG)
712 LJFOLDF(simplify_numadd_xneg)
714 PHIBARRIER(fright);
715 fins->o = IR_SUB; /* a + (-b) ==> a - b */
716 fins->op2 = fright->op1;
717 return RETRYFOLD;
720 LJFOLD(SUB any KNUM)
721 LJFOLDF(simplify_numsub_k)
723 lua_Number n = knumright;
724 if (n == 0.0) /* x - (+-0) ==> x */
725 return LEFTFOLD;
726 return NEXTFOLD;
729 LJFOLD(SUB NEG KNUM)
730 LJFOLDF(simplify_numsub_negk)
732 PHIBARRIER(fleft);
733 fins->op2 = fleft->op1; /* (-x) - k ==> (-k) - x */
734 fins->op1 = (IRRef1)lj_ir_knum(J, -knumright);
735 return RETRYFOLD;
738 LJFOLD(SUB any NEG)
739 LJFOLDF(simplify_numsub_xneg)
741 PHIBARRIER(fright);
742 fins->o = IR_ADD; /* a - (-b) ==> a + b */
743 fins->op2 = fright->op1;
744 return RETRYFOLD;
747 LJFOLD(MUL any KNUM)
748 LJFOLD(DIV any KNUM)
749 LJFOLDF(simplify_nummuldiv_k)
751 lua_Number n = knumright;
752 if (n == 1.0) { /* x o 1 ==> x */
753 return LEFTFOLD;
754 } else if (n == -1.0) { /* x o -1 ==> -x */
755 fins->o = IR_NEG;
756 fins->op2 = (IRRef1)lj_ir_knum_neg(J);
757 return RETRYFOLD;
758 } else if (fins->o == IR_MUL && n == 2.0) { /* x * 2 ==> x + x */
759 fins->o = IR_ADD;
760 fins->op2 = fins->op1;
761 return RETRYFOLD;
763 return NEXTFOLD;
766 LJFOLD(MUL NEG KNUM)
767 LJFOLD(DIV NEG KNUM)
768 LJFOLDF(simplify_nummuldiv_negk)
770 PHIBARRIER(fleft);
771 fins->op1 = fleft->op1; /* (-a) o k ==> a o (-k) */
772 fins->op2 = (IRRef1)lj_ir_knum(J, -knumright);
773 return RETRYFOLD;
776 LJFOLD(MUL NEG NEG)
777 LJFOLD(DIV NEG NEG)
778 LJFOLDF(simplify_nummuldiv_negneg)
780 PHIBARRIER(fleft);
781 PHIBARRIER(fright);
782 fins->op1 = fleft->op1; /* (-a) o (-b) ==> a o b */
783 fins->op2 = fright->op1;
784 return RETRYFOLD;
787 LJFOLD(POW any KINT)
788 LJFOLDF(simplify_numpow_xk)
790 int32_t k = fright->i;
791 TRef ref = fins->op1;
792 if (k == 0) /* x ^ 0 ==> 1 */
793 return lj_ir_knum_one(J); /* Result must be a number, not an int. */
794 if (k == 1) /* x ^ 1 ==> x */
795 return LEFTFOLD;
796 if ((uint32_t)(k+65536) > 2*65536u) /* Limit code explosion. */
797 return NEXTFOLD;
798 if (k < 0) { /* x ^ (-k) ==> (1/x) ^ k. */
799 ref = emitir(IRTN(IR_DIV), lj_ir_knum_one(J), ref);
800 k = -k;
802 /* Unroll x^k for 1 <= k <= 65536. */
803 for (; (k & 1) == 0; k >>= 1) /* Handle leading zeros. */
804 ref = emitir(IRTN(IR_MUL), ref, ref);
805 if ((k >>= 1) != 0) { /* Handle trailing bits. */
806 TRef tmp = emitir(IRTN(IR_MUL), ref, ref);
807 for (; k != 1; k >>= 1) {
808 if (k & 1)
809 ref = emitir(IRTN(IR_MUL), ref, tmp);
810 tmp = emitir(IRTN(IR_MUL), tmp, tmp);
812 ref = emitir(IRTN(IR_MUL), ref, tmp);
814 return ref;
817 LJFOLD(POW KNUM any)
818 LJFOLDF(simplify_numpow_kx)
820 lua_Number n = knumleft;
821 if (n == 2.0) { /* 2.0 ^ i ==> ldexp(1.0, tonum(i)) */
822 fins->o = IR_CONV;
823 fins->op1 = fins->op2;
824 fins->op2 = IRCONV_NUM_INT;
825 fins->op2 = (IRRef1)lj_opt_fold(J);
826 fins->op1 = (IRRef1)lj_ir_knum_one(J);
827 fins->o = IR_LDEXP;
828 return RETRYFOLD;
830 return NEXTFOLD;
833 /* -- Simplify conversions ------------------------------------------------ */
835 LJFOLD(CONV CONV IRCONV_NUM_INT) /* _NUM */
836 LJFOLDF(shortcut_conv_num_int)
838 PHIBARRIER(fleft);
839 /* Only safe with a guarded conversion to int. */
840 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_NUM && irt_isguard(fleft->t))
841 return fleft->op1; /* f(g(x)) ==> x */
842 return NEXTFOLD;
845 LJFOLD(CONV CONV IRCONV_INT_NUM) /* _INT */
846 LJFOLDF(simplify_conv_int_num)
848 /* Fold even across PHI to avoid expensive num->int conversions in loop. */
849 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT)
850 return fleft->op1;
851 return NEXTFOLD;
854 LJFOLD(CONV CONV IRCONV_U32_NUM) /* _U32*/
855 LJFOLDF(simplify_conv_u32_num)
857 /* Fold even across PHI to avoid expensive num->int conversions in loop. */
858 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32)
859 return fleft->op1;
860 return NEXTFOLD;
863 LJFOLD(CONV CONV IRCONV_I64_NUM) /* _INT or _U32*/
864 LJFOLD(CONV CONV IRCONV_U64_NUM) /* _INT or _U32*/
865 LJFOLDF(simplify_conv_i64_num)
867 PHIBARRIER(fleft);
868 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT) {
869 /* Reduce to a sign-extension. */
870 fins->op1 = fleft->op1;
871 fins->op2 = ((IRT_I64<<5)|IRT_INT|IRCONV_SEXT);
872 return RETRYFOLD;
873 } else if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) {
874 #if LJ_TARGET_X64
875 return fleft->op1;
876 #else
877 /* Reduce to a zero-extension. */
878 fins->op1 = fleft->op1;
879 fins->op2 = (IRT_I64<<5)|IRT_U32;
880 return RETRYFOLD;
881 #endif
883 return NEXTFOLD;
886 /* Shortcut TOBIT + IRT_NUM <- IRT_INT/IRT_U32 conversion. */
887 LJFOLD(TOBIT CONV KNUM)
888 LJFOLDF(simplify_tobit_conv)
890 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT ||
891 (fleft->op2 & IRCONV_SRCMASK) == IRT_U32) {
892 /* Fold even across PHI to avoid expensive num->int conversions in loop. */
893 lua_assert(irt_isnum(fleft->t));
894 return fleft->op1;
896 return NEXTFOLD;
899 /* Shortcut floor/ceil/round + IRT_NUM <- IRT_INT/IRT_U32 conversion. */
900 LJFOLD(FPMATH CONV IRFPM_FLOOR)
901 LJFOLD(FPMATH CONV IRFPM_CEIL)
902 LJFOLD(FPMATH CONV IRFPM_TRUNC)
903 LJFOLDF(simplify_floor_conv)
905 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT ||
906 (fleft->op2 & IRCONV_SRCMASK) == IRT_U32)
907 return LEFTFOLD;
908 return NEXTFOLD;
911 /* Strength reduction of widening. */
912 LJFOLD(CONV any IRCONV_I64_INT)
913 LJFOLDF(simplify_conv_sext)
915 IRRef ref = fins->op1;
916 int64_t ofs = 0;
917 if (!(fins->op2 & IRCONV_SEXT))
918 return NEXTFOLD;
919 PHIBARRIER(fleft);
920 if (fleft->o == IR_XLOAD && (irt_isu8(fleft->t) || irt_isu16(fleft->t)))
921 goto ok_reduce;
922 if (fleft->o == IR_ADD && irref_isk(fleft->op2)) {
923 ofs = (int64_t)IR(fleft->op2)->i;
924 ref = fleft->op1;
926 /* Use scalar evolution analysis results to strength-reduce sign-extension. */
927 if (ref == J->scev.idx) {
928 IRRef lo = J->scev.dir ? J->scev.start : J->scev.stop;
929 lua_assert(irt_isint(J->scev.t));
930 if (lo && IR(lo)->i + ofs >= 0) {
931 ok_reduce:
932 #if LJ_TARGET_X64
933 /* Eliminate widening. All 32 bit ops do an implicit zero-extension. */
934 return LEFTFOLD;
935 #else
936 /* Reduce to a (cheaper) zero-extension. */
937 fins->op2 &= ~IRCONV_SEXT;
938 return RETRYFOLD;
939 #endif
942 return NEXTFOLD;
945 /* Special CSE rule for CONV. */
946 LJFOLD(CONV any any)
947 LJFOLDF(cse_conv)
949 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
950 IRRef op1 = fins->op1, op2 = (fins->op2 & IRCONV_MODEMASK);
951 uint8_t guard = irt_isguard(fins->t);
952 IRRef ref = J->chain[IR_CONV];
953 while (ref > op1) {
954 IRIns *ir = IR(ref);
955 /* Commoning with stronger checks is ok. */
956 if (ir->op1 == op1 && (ir->op2 & IRCONV_MODEMASK) == op2 &&
957 irt_isguard(ir->t) >= guard)
958 return ref;
959 ref = ir->prev;
962 return EMITFOLD; /* No fallthrough to regular CSE. */
965 /* FP conversion narrowing. */
966 LJFOLD(TOBIT ADD KNUM)
967 LJFOLD(TOBIT SUB KNUM)
968 LJFOLD(CONV ADD IRCONV_INT_NUM)
969 LJFOLD(CONV SUB IRCONV_INT_NUM)
970 LJFOLD(CONV ADD IRCONV_I64_NUM)
971 LJFOLD(CONV SUB IRCONV_I64_NUM)
972 LJFOLDF(narrow_convert)
974 PHIBARRIER(fleft);
975 /* Narrowing ignores PHIs and repeating it inside the loop is not useful. */
976 if (J->chain[IR_LOOP])
977 return NEXTFOLD;
978 lua_assert(fins->o != IR_CONV || (fins->op2&IRCONV_CONVMASK) != IRCONV_TOBIT);
979 return lj_opt_narrow_convert(J);
982 /* -- Integer algebraic simplifications ----------------------------------- */
984 LJFOLD(ADD any KINT)
985 LJFOLD(ADDOV any KINT)
986 LJFOLD(SUBOV any KINT)
987 LJFOLDF(simplify_intadd_k)
989 if (fright->i == 0) /* i o 0 ==> i */
990 return LEFTFOLD;
991 return NEXTFOLD;
994 LJFOLD(SUB any KINT)
995 LJFOLDF(simplify_intsub_k)
997 if (fright->i == 0) /* i - 0 ==> i */
998 return LEFTFOLD;
999 fins->o = IR_ADD; /* i - k ==> i + (-k) */
1000 fins->op2 = (IRRef1)lj_ir_kint(J, -fright->i); /* Overflow for -2^31 ok. */
1001 return RETRYFOLD;
1004 LJFOLD(ADD any KINT64)
1005 LJFOLDF(simplify_intadd_k64)
1007 if (ir_kint64(fright)->u64 == 0) /* i + 0 ==> i */
1008 return LEFTFOLD;
1009 return NEXTFOLD;
1012 LJFOLD(SUB any KINT64)
1013 LJFOLDF(simplify_intsub_k64)
1015 uint64_t k = ir_kint64(fright)->u64;
1016 if (k == 0) /* i - 0 ==> i */
1017 return LEFTFOLD;
1018 fins->o = IR_ADD; /* i - k ==> i + (-k) */
1019 fins->op2 = (IRRef1)lj_ir_kint64(J, (uint64_t)-(int64_t)k);
1020 return RETRYFOLD;
1023 static TRef simplify_intmul_k(jit_State *J, int32_t k)
1025 /* Note: many more simplifications are possible, e.g. 2^k1 +- 2^k2.
1026 ** But this is mainly intended for simple address arithmetic.
1027 ** Also it's easier for the backend to optimize the original multiplies.
1029 if (k == 1) { /* i * 1 ==> i */
1030 return LEFTFOLD;
1031 } else if ((k & (k-1)) == 0) { /* i * 2^k ==> i << k */
1032 fins->o = IR_BSHL;
1033 fins->op2 = lj_ir_kint(J, lj_fls((uint32_t)k));
1034 return RETRYFOLD;
1036 return NEXTFOLD;
1039 LJFOLD(MUL any KINT)
1040 LJFOLDF(simplify_intmul_k32)
1042 if (fright->i == 0) /* i * 0 ==> 0 */
1043 return INTFOLD(0);
1044 else if (fright->i > 0)
1045 return simplify_intmul_k(J, fright->i);
1046 return NEXTFOLD;
1049 LJFOLD(MUL any KINT64)
1050 LJFOLDF(simplify_intmul_k64)
1053 if (ir_kint64(fright)->u64 == 0) /* i * 0 ==> 0 */
1054 return INT64FOLD(0);
1055 #if LJ_64
1056 /* NYI: SPLIT for BSHL and 32 bit backend support. */
1057 else if (ir_kint64(fright)->u64 < 0x80000000u)
1058 return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64);
1059 #endif
1060 return NEXTFOLD;
1063 LJFOLD(SUB any any)
1064 LJFOLD(SUBOV any any)
1065 LJFOLDF(simplify_intsub)
1067 if (fins->op1 == fins->op2 && !irt_isnum(fins->t)) /* i - i ==> 0 */
1068 return irt_is64(fins->t) ? INT64FOLD(0) : INTFOLD(0);
1069 return NEXTFOLD;
1072 LJFOLD(SUB ADD any)
1073 LJFOLDF(simplify_intsubadd_leftcancel)
1075 if (!irt_isnum(fins->t)) {
1076 PHIBARRIER(fleft);
1077 if (fins->op2 == fleft->op1) /* (i + j) - i ==> j */
1078 return fleft->op2;
1079 if (fins->op2 == fleft->op2) /* (i + j) - j ==> i */
1080 return fleft->op1;
1082 return NEXTFOLD;
1085 LJFOLD(SUB SUB any)
1086 LJFOLDF(simplify_intsubsub_leftcancel)
1088 if (!irt_isnum(fins->t)) {
1089 PHIBARRIER(fleft);
1090 if (fins->op1 == fleft->op1) { /* (i - j) - i ==> 0 - j */
1091 fins->op1 = (IRRef1)lj_ir_kint(J, 0);
1092 fins->op2 = fleft->op2;
1093 return RETRYFOLD;
1096 return NEXTFOLD;
1099 LJFOLD(SUB any SUB)
1100 LJFOLDF(simplify_intsubsub_rightcancel)
1102 if (!irt_isnum(fins->t)) {
1103 PHIBARRIER(fright);
1104 if (fins->op1 == fright->op1) /* i - (i - j) ==> j */
1105 return fright->op2;
1107 return NEXTFOLD;
1110 LJFOLD(SUB any ADD)
1111 LJFOLDF(simplify_intsubadd_rightcancel)
1113 if (!irt_isnum(fins->t)) {
1114 PHIBARRIER(fright);
1115 if (fins->op1 == fright->op1) { /* i - (i + j) ==> 0 - j */
1116 fins->op2 = fright->op2;
1117 fins->op1 = (IRRef1)lj_ir_kint(J, 0);
1118 return RETRYFOLD;
1120 if (fins->op1 == fright->op2) { /* i - (j + i) ==> 0 - j */
1121 fins->op2 = fright->op1;
1122 fins->op1 = (IRRef1)lj_ir_kint(J, 0);
1123 return RETRYFOLD;
1126 return NEXTFOLD;
1129 LJFOLD(SUB ADD ADD)
1130 LJFOLDF(simplify_intsubaddadd_cancel)
1132 if (!irt_isnum(fins->t)) {
1133 PHIBARRIER(fleft);
1134 PHIBARRIER(fright);
1135 if (fleft->op1 == fright->op1) { /* (i + j1) - (i + j2) ==> j1 - j2 */
1136 fins->op1 = fleft->op2;
1137 fins->op2 = fright->op2;
1138 return RETRYFOLD;
1140 if (fleft->op1 == fright->op2) { /* (i + j1) - (j2 + i) ==> j1 - j2 */
1141 fins->op1 = fleft->op2;
1142 fins->op2 = fright->op1;
1143 return RETRYFOLD;
1145 if (fleft->op2 == fright->op1) { /* (j1 + i) - (i + j2) ==> j1 - j2 */
1146 fins->op1 = fleft->op1;
1147 fins->op2 = fright->op2;
1148 return RETRYFOLD;
1150 if (fleft->op2 == fright->op2) { /* (j1 + i) - (j2 + i) ==> j1 - j2 */
1151 fins->op1 = fleft->op1;
1152 fins->op2 = fright->op1;
1153 return RETRYFOLD;
1156 return NEXTFOLD;
1159 LJFOLD(BAND any KINT)
1160 LJFOLD(BAND any KINT64)
1161 LJFOLDF(simplify_band_k)
1163 int64_t k = fright->o == IR_KINT ? (int64_t)fright->i :
1164 (int64_t)ir_k64(fright)->u64;
1165 if (k == 0) /* i & 0 ==> 0 */
1166 return RIGHTFOLD;
1167 if (k == -1) /* i & -1 ==> i */
1168 return LEFTFOLD;
1169 return NEXTFOLD;
1172 LJFOLD(BOR any KINT)
1173 LJFOLD(BOR any KINT64)
1174 LJFOLDF(simplify_bor_k)
1176 int64_t k = fright->o == IR_KINT ? (int64_t)fright->i :
1177 (int64_t)ir_k64(fright)->u64;
1178 if (k == 0) /* i | 0 ==> i */
1179 return LEFTFOLD;
1180 if (k == -1) /* i | -1 ==> -1 */
1181 return RIGHTFOLD;
1182 return NEXTFOLD;
1185 LJFOLD(BXOR any KINT)
1186 LJFOLD(BXOR any KINT64)
1187 LJFOLDF(simplify_bxor_k)
1189 int64_t k = fright->o == IR_KINT ? (int64_t)fright->i :
1190 (int64_t)ir_k64(fright)->u64;
1191 if (k == 0) /* i xor 0 ==> i */
1192 return LEFTFOLD;
1193 if (k == -1) { /* i xor -1 ==> ~i */
1194 fins->o = IR_BNOT;
1195 fins->op2 = 0;
1196 return RETRYFOLD;
1198 return NEXTFOLD;
1201 LJFOLD(BSHL any KINT)
1202 LJFOLD(BSHR any KINT)
1203 LJFOLD(BSAR any KINT)
1204 LJFOLD(BROL any KINT)
1205 LJFOLD(BROR any KINT)
1206 LJFOLDF(simplify_shift_ik)
1208 int32_t mask = irt_is64(fins->t) ? 63 : 31;
1209 int32_t k = (fright->i & mask);
1210 if (k == 0) /* i o 0 ==> i */
1211 return LEFTFOLD;
1212 if (k == 1 && fins->o == IR_BSHL) { /* i << 1 ==> i + i */
1213 fins->o = IR_ADD;
1214 fins->op2 = fins->op1;
1215 return RETRYFOLD;
1217 if (k != fright->i) { /* i o k ==> i o (k & mask) */
1218 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1219 return RETRYFOLD;
1221 if (fins->o == IR_BROR) { /* bror(i, k) ==> brol(i, (-k)&mask) */
1222 fins->o = IR_BROL;
1223 fins->op2 = (IRRef1)lj_ir_kint(J, (-k)&mask);
1224 return RETRYFOLD;
1226 return NEXTFOLD;
1229 LJFOLD(BSHL any BAND)
1230 LJFOLD(BSHR any BAND)
1231 LJFOLD(BSAR any BAND)
1232 LJFOLD(BROL any BAND)
1233 LJFOLD(BROR any BAND)
1234 LJFOLDF(simplify_shift_andk)
1236 IRIns *irk = IR(fright->op2);
1237 PHIBARRIER(fright);
1238 if ((fins->o < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) &&
1239 irk->o == IR_KINT) { /* i o (j & mask) ==> i o j */
1240 int32_t mask = irt_is64(fins->t) ? 63 : 31;
1241 int32_t k = irk->i & mask;
1242 if (k == mask) {
1243 fins->op2 = fright->op1;
1244 return RETRYFOLD;
1247 return NEXTFOLD;
1250 LJFOLD(BSHL KINT any)
1251 LJFOLD(BSHR KINT any)
1252 LJFOLD(BSHL KINT64 any)
1253 LJFOLD(BSHR KINT64 any)
1254 LJFOLDF(simplify_shift1_ki)
1256 int64_t k = fleft->o == IR_KINT ? (int64_t)fleft->i :
1257 (int64_t)ir_k64(fleft)->u64;
1258 if (k == 0) /* 0 o i ==> 0 */
1259 return LEFTFOLD;
1260 return NEXTFOLD;
1263 LJFOLD(BSAR KINT any)
1264 LJFOLD(BROL KINT any)
1265 LJFOLD(BROR KINT any)
1266 LJFOLD(BSAR KINT64 any)
1267 LJFOLD(BROL KINT64 any)
1268 LJFOLD(BROR KINT64 any)
1269 LJFOLDF(simplify_shift2_ki)
1271 int64_t k = fleft->o == IR_KINT ? (int64_t)fleft->i :
1272 (int64_t)ir_k64(fleft)->u64;
1273 if (k == 0 || k == -1) /* 0 o i ==> 0; -1 o i ==> -1 */
1274 return LEFTFOLD;
1275 return NEXTFOLD;
1278 /* -- Reassociation ------------------------------------------------------- */
1280 LJFOLD(ADD ADD KINT)
1281 LJFOLD(MUL MUL KINT)
1282 LJFOLD(BAND BAND KINT)
1283 LJFOLD(BOR BOR KINT)
1284 LJFOLD(BXOR BXOR KINT)
1285 LJFOLDF(reassoc_intarith_k)
1287 IRIns *irk = IR(fleft->op2);
1288 if (irk->o == IR_KINT) {
1289 int32_t k = kfold_intop(irk->i, fright->i, (IROp)fins->o);
1290 if (k == irk->i) /* (i o k1) o k2 ==> i o k1, if (k1 o k2) == k1. */
1291 return LEFTFOLD;
1292 PHIBARRIER(fleft);
1293 fins->op1 = fleft->op1;
1294 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1295 return RETRYFOLD; /* (i o k1) o k2 ==> i o (k1 o k2) */
1297 return NEXTFOLD;
1300 LJFOLD(ADD ADD KINT64)
1301 LJFOLD(MUL MUL KINT64)
1302 LJFOLD(BAND BAND KINT64)
1303 LJFOLD(BOR BOR KINT64)
1304 LJFOLD(BXOR BXOR KINT64)
1305 LJFOLDF(reassoc_intarith_k64)
1307 #if LJ_HASFFI || LJ_64
1308 IRIns *irk = IR(fleft->op2);
1309 if (irk->o == IR_KINT64) {
1310 uint64_t k = kfold_int64arith(ir_k64(irk)->u64,
1311 ir_k64(fright)->u64, (IROp)fins->o);
1312 PHIBARRIER(fleft);
1313 fins->op1 = fleft->op1;
1314 fins->op2 = (IRRef1)lj_ir_kint64(J, k);
1315 return RETRYFOLD; /* (i o k1) o k2 ==> i o (k1 o k2) */
1317 return NEXTFOLD;
1318 #else
1319 UNUSED(J); lua_assert(0); return FAILFOLD;
1320 #endif
1323 LJFOLD(MIN MIN any)
1324 LJFOLD(MAX MAX any)
1325 LJFOLD(BAND BAND any)
1326 LJFOLD(BOR BOR any)
1327 LJFOLDF(reassoc_dup)
1329 PHIBARRIER(fleft);
1330 if (fins->op2 == fleft->op1 || fins->op2 == fleft->op2)
1331 return LEFTFOLD; /* (a o b) o a ==> a o b; (a o b) o b ==> a o b */
1332 return NEXTFOLD;
1335 LJFOLD(BXOR BXOR any)
1336 LJFOLDF(reassoc_bxor)
1338 PHIBARRIER(fleft);
1339 if (fins->op2 == fleft->op1) /* (a xor b) xor a ==> b */
1340 return fleft->op2;
1341 if (fins->op2 == fleft->op2) /* (a xor b) xor b ==> a */
1342 return fleft->op1;
1343 return NEXTFOLD;
1346 LJFOLD(BSHL BSHL KINT)
1347 LJFOLD(BSHR BSHR KINT)
1348 LJFOLD(BSAR BSAR KINT)
1349 LJFOLD(BROL BROL KINT)
1350 LJFOLD(BROR BROR KINT)
1351 LJFOLDF(reassoc_shift)
1353 IRIns *irk = IR(fleft->op2);
1354 PHIBARRIER(fleft); /* The (shift any KINT) rule covers k2 == 0 and more. */
1355 if (irk->o == IR_KINT) { /* (i o k1) o k2 ==> i o (k1 + k2) */
1356 int32_t mask = irt_is64(fins->t) ? 63 : 31;
1357 int32_t k = (irk->i & mask) + (fright->i & mask);
1358 if (k > mask) { /* Combined shift too wide? */
1359 if (fins->o == IR_BSHL || fins->o == IR_BSHR)
1360 return mask == 31 ? INTFOLD(0) : INT64FOLD(0);
1361 else if (fins->o == IR_BSAR)
1362 k = mask;
1363 else
1364 k &= mask;
1366 fins->op1 = fleft->op1;
1367 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1368 return RETRYFOLD;
1370 return NEXTFOLD;
1373 LJFOLD(MIN MIN KNUM)
1374 LJFOLD(MAX MAX KNUM)
1375 LJFOLDF(reassoc_minmax_k)
1377 IRIns *irk = IR(fleft->op2);
1378 if (irk->o == IR_KNUM) {
1379 lua_Number a = ir_knum(irk)->n;
1380 lua_Number b = knumright;
1381 lua_Number y = lj_vm_foldarith(a, b, fins->o - IR_ADD);
1382 if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */
1383 return LEFTFOLD;
1384 PHIBARRIER(fleft);
1385 fins->op1 = fleft->op1;
1386 fins->op2 = (IRRef1)lj_ir_knum(J, y);
1387 return RETRYFOLD; /* (x o k1) o k2 ==> x o (k1 o k2) */
1389 return NEXTFOLD;
1392 LJFOLD(MIN MAX any)
1393 LJFOLD(MAX MIN any)
1394 LJFOLDF(reassoc_minmax_left)
1396 if (fins->op2 == fleft->op1 || fins->op2 == fleft->op2)
1397 return RIGHTFOLD; /* (b o1 a) o2 b ==> b; (a o1 b) o2 b ==> b */
1398 return NEXTFOLD;
1401 LJFOLD(MIN any MAX)
1402 LJFOLD(MAX any MIN)
1403 LJFOLDF(reassoc_minmax_right)
1405 if (fins->op1 == fright->op1 || fins->op1 == fright->op2)
1406 return LEFTFOLD; /* a o2 (a o1 b) ==> a; a o2 (b o1 a) ==> a */
1407 return NEXTFOLD;
1410 /* -- Array bounds check elimination -------------------------------------- */
1412 /* Eliminate ABC across PHIs to handle t[i-1] forwarding case.
1413 ** ABC(asize, (i+k)+(-k)) ==> ABC(asize, i), but only if it already exists.
1414 ** Could be generalized to (i+k1)+k2 ==> i+(k1+k2), but needs better disambig.
1416 LJFOLD(ABC any ADD)
1417 LJFOLDF(abc_fwd)
1419 if (LJ_LIKELY(J->flags & JIT_F_OPT_ABC)) {
1420 if (irref_isk(fright->op2)) {
1421 IRIns *add2 = IR(fright->op1);
1422 if (add2->o == IR_ADD && irref_isk(add2->op2) &&
1423 IR(fright->op2)->i == -IR(add2->op2)->i) {
1424 IRRef ref = J->chain[IR_ABC];
1425 IRRef lim = add2->op1;
1426 if (fins->op1 > lim) lim = fins->op1;
1427 while (ref > lim) {
1428 IRIns *ir = IR(ref);
1429 if (ir->op1 == fins->op1 && ir->op2 == add2->op1)
1430 return DROPFOLD;
1431 ref = ir->prev;
1436 return NEXTFOLD;
1439 /* Eliminate ABC for constants.
1440 ** ABC(asize, k1), ABC(asize k2) ==> ABC(asize, max(k1, k2))
1441 ** Drop second ABC if k2 is lower. Otherwise patch first ABC with k2.
1443 LJFOLD(ABC any KINT)
1444 LJFOLDF(abc_k)
1446 if (LJ_LIKELY(J->flags & JIT_F_OPT_ABC)) {
1447 IRRef ref = J->chain[IR_ABC];
1448 IRRef asize = fins->op1;
1449 while (ref > asize) {
1450 IRIns *ir = IR(ref);
1451 if (ir->op1 == asize && irref_isk(ir->op2)) {
1452 int32_t k = IR(ir->op2)->i;
1453 if (fright->i > k)
1454 ir->op2 = fins->op2;
1455 return DROPFOLD;
1457 ref = ir->prev;
1459 return EMITFOLD; /* Already performed CSE. */
1461 return NEXTFOLD;
1464 /* Eliminate invariant ABC inside loop. */
1465 LJFOLD(ABC any any)
1466 LJFOLDF(abc_invar)
1468 if (!irt_isint(fins->t) && J->chain[IR_LOOP]) /* Currently marked as PTR. */
1469 return DROPFOLD;
1470 return NEXTFOLD;
1473 /* -- Commutativity ------------------------------------------------------- */
1475 /* The refs of commutative ops are canonicalized. Lower refs go to the right.
1476 ** Rationale behind this:
1477 ** - It (also) moves constants to the right.
1478 ** - It reduces the number of FOLD rules (e.g. (BOR any KINT) suffices).
1479 ** - It helps CSE to find more matches.
1480 ** - The assembler generates better code with constants at the right.
1483 LJFOLD(ADD any any)
1484 LJFOLD(MUL any any)
1485 LJFOLD(ADDOV any any)
1486 LJFOLDF(comm_swap)
1488 if (fins->op1 < fins->op2) { /* Move lower ref to the right. */
1489 IRRef1 tmp = fins->op1;
1490 fins->op1 = fins->op2;
1491 fins->op2 = tmp;
1492 return RETRYFOLD;
1494 return NEXTFOLD;
1497 LJFOLD(EQ any any)
1498 LJFOLD(NE any any)
1499 LJFOLDF(comm_equal)
1501 /* For non-numbers only: x == x ==> drop; x ~= x ==> fail */
1502 if (fins->op1 == fins->op2 && !irt_isnum(fins->t))
1503 return CONDFOLD(fins->o == IR_EQ);
1504 return fold_comm_swap(J);
1507 LJFOLD(LT any any)
1508 LJFOLD(GE any any)
1509 LJFOLD(LE any any)
1510 LJFOLD(GT any any)
1511 LJFOLD(ULT any any)
1512 LJFOLD(UGE any any)
1513 LJFOLD(ULE any any)
1514 LJFOLD(UGT any any)
1515 LJFOLDF(comm_comp)
1517 /* For non-numbers only: x <=> x ==> drop; x <> x ==> fail */
1518 if (fins->op1 == fins->op2 && !irt_isnum(fins->t))
1519 return CONDFOLD((fins->o ^ (fins->o >> 1)) & 1);
1520 if (fins->op1 < fins->op2) { /* Move lower ref to the right. */
1521 IRRef1 tmp = fins->op1;
1522 fins->op1 = fins->op2;
1523 fins->op2 = tmp;
1524 fins->o ^= 3; /* GT <-> LT, GE <-> LE, does not affect U */
1525 return RETRYFOLD;
1527 return NEXTFOLD;
1530 LJFOLD(BAND any any)
1531 LJFOLD(BOR any any)
1532 LJFOLD(MIN any any)
1533 LJFOLD(MAX any any)
1534 LJFOLDF(comm_dup)
1536 if (fins->op1 == fins->op2) /* x o x ==> x */
1537 return LEFTFOLD;
1538 return fold_comm_swap(J);
1541 LJFOLD(BXOR any any)
1542 LJFOLDF(comm_bxor)
1544 if (fins->op1 == fins->op2) /* i xor i ==> 0 */
1545 return irt_is64(fins->t) ? INT64FOLD(0) : INTFOLD(0);
1546 return fold_comm_swap(J);
1549 /* -- Simplification of compound expressions ------------------------------ */
1551 static TRef kfold_xload(jit_State *J, IRIns *ir, const void *p)
1553 int32_t k;
1554 #if !LJ_TARGET_X86ORX64
1555 #error "Missing support for unaligned loads"
1556 #endif
1557 switch (irt_type(ir->t)) {
1558 case IRT_NUM: return lj_ir_knum_u64(J, *(uint64_t *)p);
1559 case IRT_I8: k = (int32_t)*(int8_t *)p; break;
1560 case IRT_U8: k = (int32_t)*(uint8_t *)p; break;
1561 case IRT_I16: k = (int32_t)*(int16_t *)p; break;
1562 case IRT_U16: k = (int32_t)*(uint16_t *)p; break;
1563 case IRT_INT: case IRT_U32: k = *(int32_t *)p; break;
1564 case IRT_I64: case IRT_U64: return lj_ir_kint64(J, *(uint64_t *)p);
1565 default: return 0;
1567 return lj_ir_kint(J, k);
1570 /* Turn: string.sub(str, a, b) == kstr
1571 ** into: string.byte(str, a) == string.byte(kstr, 1) etc.
1572 ** Note: this creates unaligned XLOADs!
1574 LJFOLD(EQ SNEW KGC)
1575 LJFOLD(NE SNEW KGC)
1576 LJFOLDF(merge_eqne_snew_kgc)
1578 GCstr *kstr = ir_kstr(fright);
1579 int32_t len = (int32_t)kstr->len;
1580 lua_assert(irt_isstr(fins->t));
1581 if (len <= 4) { /* Handle string lengths 0, 1, 2, 3, 4. */
1582 IROp op = (IROp)fins->o;
1583 IRRef strref = fleft->op1;
1584 lua_assert(IR(strref)->o == IR_STRREF);
1585 if (op == IR_EQ) {
1586 emitir(IRTGI(IR_EQ), fleft->op2, lj_ir_kint(J, len));
1587 /* Caveat: fins/fleft/fright is no longer valid after emitir. */
1588 } else {
1589 /* NE is not expanded since this would need an OR of two conds. */
1590 if (!irref_isk(fleft->op2)) /* Only handle the constant length case. */
1591 return NEXTFOLD;
1592 if (IR(fleft->op2)->i != len)
1593 return DROPFOLD;
1595 if (len > 0) {
1596 /* A 4 byte load for length 3 is ok -- all strings have an extra NUL. */
1597 uint16_t ot = (uint16_t)(len == 1 ? IRT(IR_XLOAD, IRT_I8) :
1598 len == 2 ? IRT(IR_XLOAD, IRT_U16) :
1599 IRTI(IR_XLOAD));
1600 TRef tmp = emitir(ot, strref,
1601 IRXLOAD_READONLY | (len > 1 ? IRXLOAD_UNALIGNED : 0));
1602 TRef val = kfold_xload(J, IR(tref_ref(tmp)), strdata(kstr));
1603 if (len == 3)
1604 tmp = emitir(IRTI(IR_BAND), tmp,
1605 lj_ir_kint(J, LJ_ENDIAN_SELECT(0x00ffffff, 0xffffff00)));
1606 fins->op1 = (IRRef1)tmp;
1607 fins->op2 = (IRRef1)val;
1608 fins->ot = (IROpT)IRTGI(op);
1609 return RETRYFOLD;
1610 } else {
1611 return DROPFOLD;
1614 return NEXTFOLD;
1617 /* -- Loads --------------------------------------------------------------- */
1619 /* Loads cannot be folded or passed on to CSE in general.
1620 ** Alias analysis is needed to check for forwarding opportunities.
1622 ** Caveat: *all* loads must be listed here or they end up at CSE!
1625 LJFOLD(ALOAD any)
1626 LJFOLDX(lj_opt_fwd_aload)
1628 /* From HREF fwd (see below). Must eliminate, not supported by fwd/backend. */
1629 LJFOLD(HLOAD KKPTR)
1630 LJFOLDF(kfold_hload_kkptr)
1632 UNUSED(J);
1633 lua_assert(ir_kptr(fleft) == niltvg(J2G(J)));
1634 return TREF_NIL;
1637 LJFOLD(HLOAD any)
1638 LJFOLDX(lj_opt_fwd_hload)
1640 LJFOLD(ULOAD any)
1641 LJFOLDX(lj_opt_fwd_uload)
1643 LJFOLD(CALLL any IRCALL_lj_tab_len)
1644 LJFOLDX(lj_opt_fwd_tab_len)
1646 /* Upvalue refs are really loads, but there are no corresponding stores.
1647 ** So CSE is ok for them, except for UREFO across a GC step (see below).
1648 ** If the referenced function is const, its upvalue addresses are const, too.
1649 ** This can be used to improve CSE by looking for the same address,
1650 ** even if the upvalues originate from a different function.
1652 LJFOLD(UREFO KGC any)
1653 LJFOLD(UREFC KGC any)
1654 LJFOLDF(cse_uref)
1656 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
1657 IRRef ref = J->chain[fins->o];
1658 GCfunc *fn = ir_kfunc(fleft);
1659 GCupval *uv = gco2uv(gcref(fn->l.uvptr[(fins->op2 >> 8)]));
1660 while (ref > 0) {
1661 IRIns *ir = IR(ref);
1662 if (irref_isk(ir->op1)) {
1663 GCfunc *fn2 = ir_kfunc(IR(ir->op1));
1664 if (gco2uv(gcref(fn2->l.uvptr[(ir->op2 >> 8)])) == uv) {
1665 if (fins->o == IR_UREFO && gcstep_barrier(J, ref))
1666 break;
1667 return ref;
1670 ref = ir->prev;
1673 return EMITFOLD;
1676 LJFOLD(HREF TNEW any)
1677 LJFOLDF(fwd_href_tnew)
1679 if (lj_opt_fwd_href_nokey(J))
1680 return lj_ir_kkptr(J, niltvg(J2G(J)));
1681 return NEXTFOLD;
1684 LJFOLD(HREF TDUP KPRI)
1685 LJFOLD(HREF TDUP KGC)
1686 LJFOLD(HREF TDUP KNUM)
1687 LJFOLDF(fwd_href_tdup)
1689 TValue keyv;
1690 lj_ir_kvalue(J->L, &keyv, fright);
1691 if (lj_tab_get(J->L, ir_ktab(IR(fleft->op1)), &keyv) == niltvg(J2G(J)) &&
1692 lj_opt_fwd_href_nokey(J))
1693 return lj_ir_kkptr(J, niltvg(J2G(J)));
1694 return NEXTFOLD;
1697 /* We can safely FOLD/CSE array/hash refs and field loads, since there
1698 ** are no corresponding stores. But we need to check for any NEWREF with
1699 ** an aliased table, as it may invalidate all of the pointers and fields.
1700 ** Only HREF needs the NEWREF check -- AREF and HREFK already depend on
1701 ** FLOADs. And NEWREF itself is treated like a store (see below).
1703 LJFOLD(FLOAD TNEW IRFL_TAB_ASIZE)
1704 LJFOLDF(fload_tab_tnew_asize)
1706 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
1707 return INTFOLD(fleft->op1);
1708 return NEXTFOLD;
1711 LJFOLD(FLOAD TNEW IRFL_TAB_HMASK)
1712 LJFOLDF(fload_tab_tnew_hmask)
1714 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
1715 return INTFOLD((1 << fleft->op2)-1);
1716 return NEXTFOLD;
1719 LJFOLD(FLOAD TDUP IRFL_TAB_ASIZE)
1720 LJFOLDF(fload_tab_tdup_asize)
1722 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
1723 return INTFOLD((int32_t)ir_ktab(IR(fleft->op1))->asize);
1724 return NEXTFOLD;
1727 LJFOLD(FLOAD TDUP IRFL_TAB_HMASK)
1728 LJFOLDF(fload_tab_tdup_hmask)
1730 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
1731 return INTFOLD((int32_t)ir_ktab(IR(fleft->op1))->hmask);
1732 return NEXTFOLD;
1735 LJFOLD(HREF any any)
1736 LJFOLD(FLOAD any IRFL_TAB_ARRAY)
1737 LJFOLD(FLOAD any IRFL_TAB_NODE)
1738 LJFOLD(FLOAD any IRFL_TAB_ASIZE)
1739 LJFOLD(FLOAD any IRFL_TAB_HMASK)
1740 LJFOLDF(fload_tab_ah)
1742 TRef tr = lj_opt_cse(J);
1743 return lj_opt_fwd_tptr(J, tref_ref(tr)) ? tr : EMITFOLD;
1746 /* Strings are immutable, so we can safely FOLD/CSE the related FLOAD. */
1747 LJFOLD(FLOAD KGC IRFL_STR_LEN)
1748 LJFOLDF(fload_str_len_kgc)
1750 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
1751 return INTFOLD((int32_t)ir_kstr(fleft)->len);
1752 return NEXTFOLD;
1755 LJFOLD(FLOAD SNEW IRFL_STR_LEN)
1756 LJFOLDF(fload_str_len_snew)
1758 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
1759 PHIBARRIER(fleft);
1760 return fleft->op2;
1762 return NEXTFOLD;
1765 /* The C type ID of cdata objects is immutable. */
1766 LJFOLD(FLOAD KGC IRFL_CDATA_TYPEID)
1767 LJFOLDF(fload_cdata_typeid_kgc)
1769 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
1770 return INTFOLD((int32_t)ir_kcdata(fleft)->typeid);
1771 return NEXTFOLD;
1774 /* Get the contents of immutable cdata objects. */
1775 LJFOLD(FLOAD KGC IRFL_CDATA_PTR)
1776 LJFOLD(FLOAD KGC IRFL_CDATA_INT64)
1777 LJFOLDF(fload_cdata_int64_kgc)
1779 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
1780 void *p = cdataptr(ir_kcdata(fleft));
1781 if (irt_is64(fins->t))
1782 return INT64FOLD(*(uint64_t *)p);
1783 else
1784 return INTFOLD(*(int32_t *)p);
1786 return NEXTFOLD;
1789 LJFOLD(FLOAD CNEW IRFL_CDATA_TYPEID)
1790 LJFOLD(FLOAD CNEWI IRFL_CDATA_TYPEID)
1791 LJFOLDF(fload_cdata_typeid_cnew)
1793 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
1794 return fleft->op1; /* No PHI barrier needed. CNEW/CNEWI op1 is const. */
1795 return NEXTFOLD;
1798 /* Pointer and int64 cdata objects are immutable. */
1799 LJFOLD(FLOAD CNEWI IRFL_CDATA_PTR)
1800 LJFOLD(FLOAD CNEWI IRFL_CDATA_INT64)
1801 LJFOLDF(fload_cdata_ptr_int64_cnew)
1803 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
1804 return fleft->op2; /* Fold even across PHI to avoid allocations. */
1805 return NEXTFOLD;
1808 LJFOLD(FLOAD any IRFL_STR_LEN)
1809 LJFOLD(FLOAD any IRFL_CDATA_TYPEID)
1810 LJFOLD(FLOAD any IRFL_CDATA_PTR)
1811 LJFOLD(FLOAD any IRFL_CDATA_INT64)
1812 LJFOLD(VLOAD any any) /* Vararg loads have no corresponding stores. */
1813 LJFOLDX(lj_opt_cse)
1815 /* All other field loads need alias analysis. */
1816 LJFOLD(FLOAD any any)
1817 LJFOLDX(lj_opt_fwd_fload)
1819 /* This is for LOOP only. Recording handles SLOADs internally. */
1820 LJFOLD(SLOAD any any)
1821 LJFOLDF(fwd_sload)
1823 if ((fins->op2 & IRSLOAD_FRAME)) {
1824 TRef tr = lj_opt_cse(J);
1825 return tref_ref(tr) < J->chain[IR_RETF] ? EMITFOLD : tr;
1826 } else {
1827 lua_assert(J->slot[fins->op1] != 0);
1828 return J->slot[fins->op1];
1832 /* Only fold for KKPTR. The pointer _and_ the contents must be const. */
1833 LJFOLD(XLOAD KKPTR any)
1834 LJFOLDF(xload_kptr)
1836 TRef tr = kfold_xload(J, fins, ir_kptr(fleft));
1837 return tr ? tr : NEXTFOLD;
1840 LJFOLD(XLOAD any any)
1841 LJFOLDX(lj_opt_fwd_xload)
1843 /* -- Write barriers ------------------------------------------------------ */
1845 /* Write barriers are amenable to CSE, but not across any incremental
1846 ** GC steps.
1848 ** The same logic applies to open upvalue references, because a stack
1849 ** may be resized during a GC step (not the current stack, but maybe that
1850 ** of a coroutine).
1852 LJFOLD(TBAR any)
1853 LJFOLD(OBAR any any)
1854 LJFOLD(UREFO any any)
1855 LJFOLDF(barrier_tab)
1857 TRef tr = lj_opt_cse(J);
1858 if (gcstep_barrier(J, tref_ref(tr))) /* CSE across GC step? */
1859 return EMITFOLD; /* Raw emit. Assumes fins is left intact by CSE. */
1860 return tr;
1863 LJFOLD(TBAR TNEW)
1864 LJFOLD(TBAR TDUP)
1865 LJFOLDF(barrier_tnew_tdup)
1867 /* New tables are always white and never need a barrier. */
1868 if (fins->op1 < J->chain[IR_LOOP]) /* Except across a GC step. */
1869 return NEXTFOLD;
1870 return DROPFOLD;
1873 /* -- Stores and allocations ---------------------------------------------- */
1875 /* Stores and allocations cannot be folded or passed on to CSE in general.
1876 ** But some stores can be eliminated with dead-store elimination (DSE).
1878 ** Caveat: *all* stores and allocs must be listed here or they end up at CSE!
1881 LJFOLD(ASTORE any any)
1882 LJFOLD(HSTORE any any)
1883 LJFOLDX(lj_opt_dse_ahstore)
1885 LJFOLD(USTORE any any)
1886 LJFOLDX(lj_opt_dse_ustore)
1888 LJFOLD(FSTORE any any)
1889 LJFOLDX(lj_opt_dse_fstore)
1891 LJFOLD(XSTORE any any)
1892 LJFOLDX(lj_opt_dse_xstore)
1894 LJFOLD(NEWREF any any) /* Treated like a store. */
1895 LJFOLD(CALLS any any)
1896 LJFOLD(CALLL any any) /* Safeguard fallback. */
1897 LJFOLD(CALLXS any any)
1898 LJFOLD(XBAR)
1899 LJFOLD(RETF any any) /* Modifies BASE. */
1900 LJFOLD(TNEW any any)
1901 LJFOLD(TDUP any)
1902 LJFOLD(CNEW any any)
1903 LJFOLDX(lj_ir_emit)
1905 /* ------------------------------------------------------------------------ */
1907 /* Every entry in the generated hash table is a 32 bit pattern:
1909 ** xxxxxxxx iiiiiii lllllll rrrrrrrrrr
1911 ** xxxxxxxx = 8 bit index into fold function table
1912 ** iiiiiii = 7 bit folded instruction opcode
1913 ** lllllll = 7 bit left instruction opcode
1914 ** rrrrrrrrrr = 8 bit right instruction opcode or 10 bits from literal field
1917 #include "lj_folddef.h"
1919 /* ------------------------------------------------------------------------ */
1921 /* Fold IR instruction. */
1922 TRef LJ_FASTCALL lj_opt_fold(jit_State *J)
1924 uint32_t key, any;
1925 IRRef ref;
1927 if (LJ_UNLIKELY((J->flags & JIT_F_OPT_MASK) != JIT_F_OPT_DEFAULT)) {
1928 lua_assert(((JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE|JIT_F_OPT_DSE) |
1929 JIT_F_OPT_DEFAULT) == JIT_F_OPT_DEFAULT);
1930 /* Folding disabled? Chain to CSE, but not for loads/stores/allocs. */
1931 if (!(J->flags & JIT_F_OPT_FOLD) && irm_kind(lj_ir_mode[fins->o]) == IRM_N)
1932 return lj_opt_cse(J);
1934 /* Forwarding or CSE disabled? Emit raw IR for loads, except for SLOAD. */
1935 if ((J->flags & (JIT_F_OPT_FWD|JIT_F_OPT_CSE)) !=
1936 (JIT_F_OPT_FWD|JIT_F_OPT_CSE) &&
1937 irm_kind(lj_ir_mode[fins->o]) == IRM_L && fins->o != IR_SLOAD)
1938 return lj_ir_emit(J);
1940 /* DSE disabled? Emit raw IR for stores. */
1941 if (!(J->flags & JIT_F_OPT_DSE) && irm_kind(lj_ir_mode[fins->o]) == IRM_S)
1942 return lj_ir_emit(J);
1945 /* Fold engine start/retry point. */
1946 retry:
1947 /* Construct key from opcode and operand opcodes (unless literal/none). */
1948 key = ((uint32_t)fins->o << 17);
1949 if (fins->op1 >= J->cur.nk) {
1950 key += (uint32_t)IR(fins->op1)->o << 10;
1951 *fleft = *IR(fins->op1);
1953 if (fins->op2 >= J->cur.nk) {
1954 key += (uint32_t)IR(fins->op2)->o;
1955 *fright = *IR(fins->op2);
1956 } else {
1957 key += (fins->op2 & 0x3ffu); /* Literal mask. Must include IRCONV_*MASK. */
1960 /* Check for a match in order from most specific to least specific. */
1961 any = 0;
1962 for (;;) {
1963 uint32_t k = key | (any & 0x1ffff);
1964 uint32_t h = fold_hashkey(k);
1965 uint32_t fh = fold_hash[h]; /* Lookup key in semi-perfect hash table. */
1966 if ((fh & 0xffffff) == k || (fh = fold_hash[h+1], (fh & 0xffffff) == k)) {
1967 ref = (IRRef)tref_ref(fold_func[fh >> 24](J));
1968 if (ref != NEXTFOLD)
1969 break;
1971 if (any == 0xfffff) /* Exhausted folding. Pass on to CSE. */
1972 return lj_opt_cse(J);
1973 any = (any | (any >> 10)) ^ 0xffc00;
1976 /* Return value processing, ordered by frequency. */
1977 if (LJ_LIKELY(ref >= MAX_FOLD))
1978 return TREF(ref, irt_t(IR(ref)->t));
1979 if (ref == RETRYFOLD)
1980 goto retry;
1981 if (ref == KINTFOLD)
1982 return lj_ir_kint(J, fins->i);
1983 if (ref == FAILFOLD)
1984 lj_trace_err(J, LJ_TRERR_GFAIL);
1985 lua_assert(ref == DROPFOLD);
1986 return REF_DROP;
1989 /* -- Common-Subexpression Elimination ------------------------------------ */
1991 /* CSE an IR instruction. This is very fast due to the skip-list chains. */
1992 TRef LJ_FASTCALL lj_opt_cse(jit_State *J)
1994 /* Avoid narrow to wide store-to-load forwarding stall */
1995 IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16);
1996 IROp op = fins->o;
1997 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
1998 /* Limited search for same operands in per-opcode chain. */
1999 IRRef ref = J->chain[op];
2000 IRRef lim = fins->op1;
2001 if (fins->op2 > lim) lim = fins->op2; /* Relies on lit < REF_BIAS. */
2002 while (ref > lim) {
2003 if (IR(ref)->op12 == op12)
2004 return TREF(ref, irt_t(IR(ref)->t)); /* Common subexpression found. */
2005 ref = IR(ref)->prev;
2008 /* Otherwise emit IR (inlined for speed). */
2010 IRRef ref = lj_ir_nextins(J);
2011 IRIns *ir = IR(ref);
2012 ir->prev = J->chain[op];
2013 ir->op12 = op12;
2014 J->chain[op] = (IRRef1)ref;
2015 ir->o = fins->o;
2016 J->guardemit.irt |= fins->t.irt;
2017 return TREF(ref, irt_t((ir->t = fins->t)));
2021 /* CSE with explicit search limit. */
2022 TRef LJ_FASTCALL lj_opt_cselim(jit_State *J, IRRef lim)
2024 IRRef ref = J->chain[fins->o];
2025 IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16);
2026 while (ref > lim) {
2027 if (IR(ref)->op12 == op12)
2028 return ref;
2029 ref = IR(ref)->prev;
2031 return lj_ir_emit(J);
2034 /* ------------------------------------------------------------------------ */
2036 #undef IR
2037 #undef fins
2038 #undef fleft
2039 #undef fright
2040 #undef knumleft
2041 #undef knumright
2042 #undef emitir
2044 #endif