Fix FOLD rule for CONV.flt.num(CONV.num.flt(x)) => x.
[luajit-2.0.git] / src / lj_opt_fold.c
blobe04fa480292127ac51d595bdddb0366ea9191b01
1 /*
2 ** FOLD: Constant Folding, Algebraic Simplifications and Reassociation.
3 ** ABCelim: Array Bounds Check Elimination.
4 ** CSE: Common-Subexpression Elimination.
5 ** Copyright (C) 2005-2011 Mike Pall. See Copyright Notice in luajit.h
6 */
8 #define lj_opt_fold_c
9 #define LUA_CORE
11 #include <math.h>
13 #include "lj_obj.h"
15 #if LJ_HASJIT
17 #include "lj_str.h"
18 #include "lj_tab.h"
19 #include "lj_ir.h"
20 #include "lj_jit.h"
21 #include "lj_iropt.h"
22 #include "lj_trace.h"
23 #include "lj_carith.h"
24 #include "lj_vm.h"
26 /* Here's a short description how the FOLD engine processes instructions:
28 ** The FOLD engine receives a single instruction stored in fins (J->fold.ins).
29 ** The instruction and its operands are used to select matching fold rules.
30 ** These are applied iteratively until a fixed point is reached.
32 ** The 8 bit opcode of the instruction itself plus the opcodes of the
33 ** two instructions referenced by its operands form a 24 bit key
34 ** 'ins left right' (unused operands -> 0, literals -> lowest 8 bits).
36 ** This key is used for partial matching against the fold rules. The
37 ** left/right operand fields of the key are successively masked with
38 ** the 'any' wildcard, from most specific to least specific:
40 ** ins left right
41 ** ins any right
42 ** ins left any
43 ** ins any any
45 ** The masked key is used to lookup a matching fold rule in a semi-perfect
46 ** hash table. If a matching rule is found, the related fold function is run.
47 ** Multiple rules can share the same fold function. A fold rule may return
48 ** one of several special values:
50 ** - NEXTFOLD means no folding was applied, because an additional test
51 ** inside the fold function failed. Matching continues against less
52 ** specific fold rules. Finally the instruction is passed on to CSE.
54 ** - RETRYFOLD means the instruction was modified in-place. Folding is
55 ** retried as if this instruction had just been received.
57 ** All other return values are terminal actions -- no further folding is
58 ** applied:
60 ** - INTFOLD(i) returns a reference to the integer constant i.
62 ** - LEFTFOLD and RIGHTFOLD return the left/right operand reference
63 ** without emitting an instruction.
65 ** - CSEFOLD and EMITFOLD pass the instruction directly to CSE or emit
66 ** it without passing through any further optimizations.
68 ** - FAILFOLD, DROPFOLD and CONDFOLD only apply to instructions which have
69 ** no result (e.g. guarded assertions): FAILFOLD means the guard would
70 ** always fail, i.e. the current trace is pointless. DROPFOLD means
71 ** the guard is always true and has been eliminated. CONDFOLD is a
72 ** shortcut for FAILFOLD + cond (i.e. drop if true, otherwise fail).
74 ** - Any other return value is interpreted as an IRRef or TRef. This
75 ** can be a reference to an existing or a newly created instruction.
76 ** Only the least-significant 16 bits (IRRef1) are used to form a TRef
77 ** which is finally returned to the caller.
79 ** The FOLD engine receives instructions both from the trace recorder and
80 ** substituted instructions from LOOP unrolling. This means all types
81 ** of instructions may end up here, even though the recorder bypasses
82 ** FOLD in some cases. Thus all loads, stores and allocations must have
83 ** an any/any rule to avoid being passed on to CSE.
85 ** Carefully read the following requirements before adding or modifying
86 ** any fold rules:
88 ** Requirement #1: All fold rules must preserve their destination type.
90 ** Consistently use INTFOLD() (KINT result) or lj_ir_knum() (KNUM result).
91 ** Never use lj_ir_knumint() which can have either a KINT or KNUM result.
93 ** Requirement #2: Fold rules should not create *new* instructions which
94 ** reference operands *across* PHIs.
96 ** E.g. a RETRYFOLD with 'fins->op1 = fleft->op1' is invalid if the
97 ** left operand is a PHI. Then fleft->op1 would point across the PHI
98 ** frontier to an invariant instruction. Adding a PHI for this instruction
99 ** would be counterproductive. The solution is to add a barrier which
100 ** prevents folding across PHIs, i.e. 'PHIBARRIER(fleft)' in this case.
101 ** The only exception is for recurrences with high latencies like
102 ** repeated int->num->int conversions.
104 ** One could relax this condition a bit if the referenced instruction is
105 ** a PHI, too. But this often leads to worse code due to excessive
106 ** register shuffling.
108 ** Note: returning *existing* instructions (e.g. LEFTFOLD) is ok, though.
109 ** Even returning fleft->op1 would be ok, because a new PHI will added,
110 ** if needed. But again, this leads to excessive register shuffling and
111 ** should be avoided.
113 ** Requirement #3: The set of all fold rules must be monotonic to guarantee
114 ** termination.
116 ** The goal is optimization, so one primarily wants to add strength-reducing
117 ** rules. This means eliminating an instruction or replacing an instruction
118 ** with one or more simpler instructions. Don't add fold rules which point
119 ** into the other direction.
121 ** Some rules (like commutativity) do not directly reduce the strength of
122 ** an instruction, but enable other fold rules (e.g. by moving constants
123 ** to the right operand). These rules must be made unidirectional to avoid
124 ** cycles.
126 ** Rule of thumb: the trace recorder expands the IR and FOLD shrinks it.
129 /* Some local macros to save typing. Undef'd at the end. */
130 #define IR(ref) (&J->cur.ir[(ref)])
131 #define fins (&J->fold.ins)
132 #define fleft (&J->fold.left)
133 #define fright (&J->fold.right)
134 #define knumleft (ir_knum(fleft)->n)
135 #define knumright (ir_knum(fright)->n)
137 /* Pass IR on to next optimization in chain (FOLD). */
138 #define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
140 /* Fold function type. Fastcall on x86 significantly reduces their size. */
141 typedef IRRef (LJ_FASTCALL *FoldFunc)(jit_State *J);
143 /* Macros for the fold specs, so buildvm can recognize them. */
144 #define LJFOLD(x)
145 #define LJFOLDX(x)
146 #define LJFOLDF(name) static TRef LJ_FASTCALL fold_##name(jit_State *J)
147 /* Note: They must be at the start of a line or buildvm ignores them! */
149 /* Barrier to prevent using operands across PHIs. */
150 #define PHIBARRIER(ir) if (irt_isphi((ir)->t)) return NEXTFOLD
152 /* Barrier to prevent folding across a GC step.
153 ** GC steps can only happen at the head of a trace and at LOOP.
154 ** And the GC is only driven forward if there is at least one allocation.
156 #define gcstep_barrier(J, ref) \
157 ((ref) < J->chain[IR_LOOP] && \
158 (J->chain[IR_SNEW] || J->chain[IR_XSNEW] || \
159 J->chain[IR_TNEW] || J->chain[IR_TDUP] || \
160 J->chain[IR_CNEW] || J->chain[IR_CNEWI] || J->chain[IR_TOSTR]))
162 /* -- Constant folding for FP numbers ------------------------------------- */
164 LJFOLD(ADD KNUM KNUM)
165 LJFOLD(SUB KNUM KNUM)
166 LJFOLD(MUL KNUM KNUM)
167 LJFOLD(DIV KNUM KNUM)
168 LJFOLD(NEG KNUM KNUM)
169 LJFOLD(ABS KNUM KNUM)
170 LJFOLD(ATAN2 KNUM KNUM)
171 LJFOLD(LDEXP KNUM KNUM)
172 LJFOLD(MIN KNUM KNUM)
173 LJFOLD(MAX KNUM KNUM)
174 LJFOLDF(kfold_numarith)
176 lua_Number a = knumleft;
177 lua_Number b = knumright;
178 lua_Number y = lj_vm_foldarith(a, b, fins->o - IR_ADD);
179 return lj_ir_knum(J, y);
182 LJFOLD(LDEXP KNUM KINT)
183 LJFOLDF(kfold_ldexp)
185 #if LJ_TARGET_X86ORX64
186 UNUSED(J);
187 return NEXTFOLD;
188 #else
189 return lj_ir_knum(J, ldexp(knumleft, fright->i));
190 #endif
193 LJFOLD(FPMATH KNUM any)
194 LJFOLDF(kfold_fpmath)
196 lua_Number a = knumleft;
197 lua_Number y = lj_vm_foldfpm(a, fins->op2);
198 return lj_ir_knum(J, y);
201 LJFOLD(POW KNUM KINT)
202 LJFOLDF(kfold_numpow)
204 lua_Number a = knumleft;
205 lua_Number b = (lua_Number)fright->i;
206 lua_Number y = lj_vm_foldarith(a, b, IR_POW - IR_ADD);
207 return lj_ir_knum(J, y);
210 /* Must not use kfold_kref for numbers (could be NaN). */
211 LJFOLD(EQ KNUM KNUM)
212 LJFOLD(NE KNUM KNUM)
213 LJFOLD(LT KNUM KNUM)
214 LJFOLD(GE KNUM KNUM)
215 LJFOLD(LE KNUM KNUM)
216 LJFOLD(GT KNUM KNUM)
217 LJFOLD(ULT KNUM KNUM)
218 LJFOLD(UGE KNUM KNUM)
219 LJFOLD(ULE KNUM KNUM)
220 LJFOLD(UGT KNUM KNUM)
221 LJFOLDF(kfold_numcomp)
223 return CONDFOLD(lj_ir_numcmp(knumleft, knumright, (IROp)fins->o));
226 /* -- Constant folding for 32 bit integers -------------------------------- */
228 static int32_t kfold_intop(int32_t k1, int32_t k2, IROp op)
230 switch (op) {
231 case IR_ADD: k1 += k2; break;
232 case IR_SUB: k1 -= k2; break;
233 case IR_MUL: k1 *= k2; break;
234 case IR_MOD: k1 = lj_vm_modi(k1, k2); break;
235 case IR_NEG: k1 = -k1; break;
236 case IR_BAND: k1 &= k2; break;
237 case IR_BOR: k1 |= k2; break;
238 case IR_BXOR: k1 ^= k2; break;
239 case IR_BSHL: k1 <<= (k2 & 31); break;
240 case IR_BSHR: k1 = (int32_t)((uint32_t)k1 >> (k2 & 31)); break;
241 case IR_BSAR: k1 >>= (k2 & 31); break;
242 case IR_BROL: k1 = (int32_t)lj_rol((uint32_t)k1, (k2 & 31)); break;
243 case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 31)); break;
244 case IR_MIN: k1 = k1 < k2 ? k1 : k2; break;
245 case IR_MAX: k1 = k1 > k2 ? k1 : k2; break;
246 default: lua_assert(0); break;
248 return k1;
251 LJFOLD(ADD KINT KINT)
252 LJFOLD(SUB KINT KINT)
253 LJFOLD(MUL KINT KINT)
254 LJFOLD(MOD KINT KINT)
255 LJFOLD(NEG KINT KINT)
256 LJFOLD(BAND KINT KINT)
257 LJFOLD(BOR KINT KINT)
258 LJFOLD(BXOR KINT KINT)
259 LJFOLD(BSHL KINT KINT)
260 LJFOLD(BSHR KINT KINT)
261 LJFOLD(BSAR KINT KINT)
262 LJFOLD(BROL KINT KINT)
263 LJFOLD(BROR KINT KINT)
264 LJFOLD(MIN KINT KINT)
265 LJFOLD(MAX KINT KINT)
266 LJFOLDF(kfold_intarith)
268 return INTFOLD(kfold_intop(fleft->i, fright->i, (IROp)fins->o));
271 LJFOLD(ADDOV KINT KINT)
272 LJFOLD(SUBOV KINT KINT)
273 LJFOLD(MULOV KINT KINT)
274 LJFOLDF(kfold_intovarith)
276 lua_Number n = lj_vm_foldarith((lua_Number)fleft->i, (lua_Number)fright->i,
277 fins->o - IR_ADDOV);
278 int32_t k = lj_num2int(n);
279 if (n != (lua_Number)k)
280 return FAILFOLD;
281 return INTFOLD(k);
284 LJFOLD(BNOT KINT)
285 LJFOLDF(kfold_bnot)
287 return INTFOLD(~fleft->i);
290 LJFOLD(BSWAP KINT)
291 LJFOLDF(kfold_bswap)
293 return INTFOLD((int32_t)lj_bswap((uint32_t)fleft->i));
296 LJFOLD(LT KINT KINT)
297 LJFOLD(GE KINT KINT)
298 LJFOLD(LE KINT KINT)
299 LJFOLD(GT KINT KINT)
300 LJFOLD(ULT KINT KINT)
301 LJFOLD(UGE KINT KINT)
302 LJFOLD(ULE KINT KINT)
303 LJFOLD(UGT KINT KINT)
304 LJFOLD(ABC KINT KINT)
305 LJFOLDF(kfold_intcomp)
307 int32_t a = fleft->i, b = fright->i;
308 switch ((IROp)fins->o) {
309 case IR_LT: return CONDFOLD(a < b);
310 case IR_GE: return CONDFOLD(a >= b);
311 case IR_LE: return CONDFOLD(a <= b);
312 case IR_GT: return CONDFOLD(a > b);
313 case IR_ULT: return CONDFOLD((uint32_t)a < (uint32_t)b);
314 case IR_UGE: return CONDFOLD((uint32_t)a >= (uint32_t)b);
315 case IR_ULE: return CONDFOLD((uint32_t)a <= (uint32_t)b);
316 case IR_ABC:
317 case IR_UGT: return CONDFOLD((uint32_t)a > (uint32_t)b);
318 default: lua_assert(0); return FAILFOLD;
322 LJFOLD(UGE any KINT)
323 LJFOLDF(kfold_intcomp0)
325 if (fright->i == 0)
326 return DROPFOLD;
327 return NEXTFOLD;
330 /* -- Constant folding for 64 bit integers -------------------------------- */
332 static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op)
334 switch (op) {
335 #if LJ_64 || LJ_HASFFI
336 case IR_ADD: k1 += k2; break;
337 case IR_SUB: k1 -= k2; break;
338 #endif
339 #if LJ_HASFFI
340 case IR_MUL: k1 *= k2; break;
341 case IR_BAND: k1 &= k2; break;
342 case IR_BOR: k1 |= k2; break;
343 case IR_BXOR: k1 ^= k2; break;
344 #endif
345 default: UNUSED(k2); lua_assert(0); break;
347 return k1;
350 LJFOLD(ADD KINT64 KINT64)
351 LJFOLD(SUB KINT64 KINT64)
352 LJFOLD(MUL KINT64 KINT64)
353 LJFOLD(BAND KINT64 KINT64)
354 LJFOLD(BOR KINT64 KINT64)
355 LJFOLD(BXOR KINT64 KINT64)
356 LJFOLDF(kfold_int64arith)
358 return INT64FOLD(kfold_int64arith(ir_k64(fleft)->u64,
359 ir_k64(fright)->u64, (IROp)fins->o));
362 LJFOLD(DIV KINT64 KINT64)
363 LJFOLD(MOD KINT64 KINT64)
364 LJFOLD(POW KINT64 KINT64)
365 LJFOLDF(kfold_int64arith2)
367 #if LJ_HASFFI
368 uint64_t k1 = ir_k64(fleft)->u64, k2 = ir_k64(fright)->u64;
369 if (irt_isi64(fins->t)) {
370 k1 = fins->o == IR_DIV ? lj_carith_divi64((int64_t)k1, (int64_t)k2) :
371 fins->o == IR_MOD ? lj_carith_modi64((int64_t)k1, (int64_t)k2) :
372 lj_carith_powi64((int64_t)k1, (int64_t)k2);
373 } else {
374 k1 = fins->o == IR_DIV ? lj_carith_divu64(k1, k2) :
375 fins->o == IR_MOD ? lj_carith_modu64(k1, k2) :
376 lj_carith_powu64(k1, k2);
378 return INT64FOLD(k1);
379 #else
380 UNUSED(J); lua_assert(0); return FAILFOLD;
381 #endif
384 LJFOLD(BSHL KINT64 KINT)
385 LJFOLD(BSHR KINT64 KINT)
386 LJFOLD(BSAR KINT64 KINT)
387 LJFOLD(BROL KINT64 KINT)
388 LJFOLD(BROR KINT64 KINT)
389 LJFOLDF(kfold_int64shift)
391 #if LJ_HASFFI || LJ_64
392 uint64_t k = ir_k64(fleft)->u64;
393 int32_t sh = (fright->i & 63);
394 switch ((IROp)fins->o) {
395 case IR_BSHL: k <<= sh; break;
396 #if LJ_HASFFI
397 case IR_BSHR: k >>= sh; break;
398 case IR_BSAR: k = (uint64_t)((int64_t)k >> sh); break;
399 case IR_BROL: k = lj_rol(k, sh); break;
400 case IR_BROR: k = lj_ror(k, sh); break;
401 #endif
402 default: lua_assert(0); break;
404 return INT64FOLD(k);
405 #else
406 UNUSED(J); lua_assert(0); return FAILFOLD;
407 #endif
410 LJFOLD(BNOT KINT64)
411 LJFOLDF(kfold_bnot64)
413 #if LJ_HASFFI
414 return INT64FOLD(~ir_k64(fleft)->u64);
415 #else
416 UNUSED(J); lua_assert(0); return FAILFOLD;
417 #endif
420 LJFOLD(BSWAP KINT64)
421 LJFOLDF(kfold_bswap64)
423 #if LJ_HASFFI
424 return INT64FOLD(lj_bswap64(ir_k64(fleft)->u64));
425 #else
426 UNUSED(J); lua_assert(0); return FAILFOLD;
427 #endif
430 LJFOLD(LT KINT64 KINT)
431 LJFOLD(GE KINT64 KINT)
432 LJFOLD(LE KINT64 KINT)
433 LJFOLD(GT KINT64 KINT)
434 LJFOLD(ULT KINT64 KINT)
435 LJFOLD(UGE KINT64 KINT)
436 LJFOLD(ULE KINT64 KINT)
437 LJFOLD(UGT KINT64 KINT)
438 LJFOLDF(kfold_int64comp)
440 #if LJ_HASFFI
441 uint64_t a = ir_k64(fleft)->u64, b = ir_k64(fright)->u64;
442 switch ((IROp)fins->o) {
443 case IR_LT: return CONDFOLD(a < b);
444 case IR_GE: return CONDFOLD(a >= b);
445 case IR_LE: return CONDFOLD(a <= b);
446 case IR_GT: return CONDFOLD(a > b);
447 case IR_ULT: return CONDFOLD((uint64_t)a < (uint64_t)b);
448 case IR_UGE: return CONDFOLD((uint64_t)a >= (uint64_t)b);
449 case IR_ULE: return CONDFOLD((uint64_t)a <= (uint64_t)b);
450 case IR_UGT: return CONDFOLD((uint64_t)a > (uint64_t)b);
451 default: lua_assert(0); return FAILFOLD;
453 #else
454 UNUSED(J); lua_assert(0); return FAILFOLD;
455 #endif
458 LJFOLD(UGE any KINT64)
459 LJFOLDF(kfold_int64comp0)
461 #if LJ_HASFFI
462 if (ir_k64(fright)->u64 == 0)
463 return DROPFOLD;
464 return NEXTFOLD;
465 #else
466 UNUSED(J); lua_assert(0); return FAILFOLD;
467 #endif
470 /* -- Constant folding for strings ---------------------------------------- */
472 LJFOLD(SNEW KKPTR KINT)
473 LJFOLDF(kfold_snew_kptr)
475 GCstr *s = lj_str_new(J->L, (const char *)ir_kptr(fleft), (size_t)fright->i);
476 return lj_ir_kstr(J, s);
479 LJFOLD(SNEW any KINT)
480 LJFOLDF(kfold_snew_empty)
482 if (fright->i == 0)
483 return lj_ir_kstr(J, &J2G(J)->strempty);
484 return NEXTFOLD;
487 LJFOLD(STRREF KGC KINT)
488 LJFOLDF(kfold_strref)
490 GCstr *str = ir_kstr(fleft);
491 lua_assert((MSize)fright->i < str->len);
492 return lj_ir_kkptr(J, (char *)strdata(str) + fright->i);
495 LJFOLD(STRREF SNEW any)
496 LJFOLDF(kfold_strref_snew)
498 PHIBARRIER(fleft);
499 if (irref_isk(fins->op2) && fright->i == 0) {
500 return fleft->op1; /* strref(snew(ptr, len), 0) ==> ptr */
501 } else {
502 /* Reassociate: strref(snew(strref(str, a), len), b) ==> strref(str, a+b) */
503 IRIns *ir = IR(fleft->op1);
504 IRRef1 str = ir->op1; /* IRIns * is not valid across emitir. */
505 lua_assert(ir->o == IR_STRREF);
506 PHIBARRIER(ir);
507 fins->op2 = emitir(IRTI(IR_ADD), ir->op2, fins->op2); /* Clobbers fins! */
508 fins->op1 = str;
509 fins->ot = IRT(IR_STRREF, IRT_P32);
510 return RETRYFOLD;
512 return NEXTFOLD;
515 LJFOLD(CALLN CARG IRCALL_lj_str_cmp)
516 LJFOLDF(kfold_strcmp)
518 if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) {
519 GCstr *a = ir_kstr(IR(fleft->op1));
520 GCstr *b = ir_kstr(IR(fleft->op2));
521 return INTFOLD(lj_str_cmp(a, b));
523 return NEXTFOLD;
526 /* -- Constant folding of pointer arithmetic ------------------------------ */
528 LJFOLD(ADD KGC KINT)
529 LJFOLD(ADD KGC KINT64)
530 LJFOLDF(kfold_add_kgc)
532 GCobj *o = ir_kgc(fleft);
533 #if LJ_64
534 ptrdiff_t ofs = (ptrdiff_t)ir_kint64(fright)->u64;
535 #else
536 ptrdiff_t ofs = fright->i;
537 #endif
538 return lj_ir_kkptr(J, (char *)o + ofs);
541 LJFOLD(ADD KPTR KINT)
542 LJFOLD(ADD KPTR KINT64)
543 LJFOLD(ADD KKPTR KINT)
544 LJFOLD(ADD KKPTR KINT64)
545 LJFOLDF(kfold_add_kptr)
547 void *p = ir_kptr(fleft);
548 #if LJ_64
549 ptrdiff_t ofs = (ptrdiff_t)ir_kint64(fright)->u64;
550 #else
551 ptrdiff_t ofs = fright->i;
552 #endif
553 return lj_ir_kptr_(J, fleft->o, (char *)p + ofs);
556 /* -- Constant folding of conversions ------------------------------------- */
558 LJFOLD(TOBIT KNUM KNUM)
559 LJFOLDF(kfold_tobit)
561 return INTFOLD(lj_num2bit(knumleft));
564 LJFOLD(CONV KINT IRCONV_NUM_INT)
565 LJFOLDF(kfold_conv_kint_num)
567 return lj_ir_knum(J, (lua_Number)fleft->i);
570 LJFOLD(CONV KINT IRCONV_NUM_U32)
571 LJFOLDF(kfold_conv_kintu32_num)
573 return lj_ir_knum(J, (lua_Number)(uint32_t)fleft->i);
576 LJFOLD(CONV KINT IRCONV_I64_INT)
577 LJFOLD(CONV KINT IRCONV_U64_INT)
578 LJFOLDF(kfold_conv_kint_i64)
580 if ((fins->op2 & IRCONV_SEXT))
581 return INT64FOLD((uint64_t)(int64_t)fleft->i);
582 else
583 return INT64FOLD((uint64_t)(int64_t)(uint32_t)fleft->i);
586 LJFOLD(CONV KINT64 IRCONV_NUM_I64)
587 LJFOLDF(kfold_conv_kint64_num_i64)
589 return lj_ir_knum(J, (lua_Number)(int64_t)ir_kint64(fleft)->u64);
592 LJFOLD(CONV KINT64 IRCONV_NUM_U64)
593 LJFOLDF(kfold_conv_kint64_num_u64)
595 return lj_ir_knum(J, (lua_Number)ir_kint64(fleft)->u64);
598 LJFOLD(CONV KINT64 IRCONV_INT_I64)
599 LJFOLD(CONV KINT64 IRCONV_U32_I64)
600 LJFOLDF(kfold_conv_kint64_int_i64)
602 return INTFOLD((int32_t)ir_kint64(fleft)->u64);
605 LJFOLD(CONV KNUM IRCONV_INT_NUM)
606 LJFOLDF(kfold_conv_knum_int_num)
608 lua_Number n = knumleft;
609 if (!(fins->op2 & IRCONV_TRUNC)) {
610 int32_t k = lj_num2int(n);
611 if (irt_isguard(fins->t) && n != (lua_Number)k) {
612 /* We're about to create a guard which always fails, like CONV +1.5.
613 ** Some pathological loops cause this during LICM, e.g.:
614 ** local x,k,t = 0,1.5,{1,[1.5]=2}
615 ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end
616 ** assert(x == 300)
618 return FAILFOLD;
620 return INTFOLD(k);
621 } else {
622 return INTFOLD((int32_t)n);
626 LJFOLD(CONV KNUM IRCONV_U32_NUM)
627 LJFOLDF(kfold_conv_knum_u32_num)
629 lua_assert((fins->op2 & IRCONV_TRUNC));
630 return INTFOLD((int32_t)(uint32_t)knumleft);
633 LJFOLD(CONV KNUM IRCONV_I64_NUM)
634 LJFOLDF(kfold_conv_knum_i64_num)
636 lua_assert((fins->op2 & IRCONV_TRUNC));
637 return INT64FOLD((uint64_t)(int64_t)knumleft);
640 LJFOLD(CONV KNUM IRCONV_U64_NUM)
641 LJFOLDF(kfold_conv_knum_u64_num)
643 lua_assert((fins->op2 & IRCONV_TRUNC));
644 return INT64FOLD(lj_num2u64(knumleft));
647 LJFOLD(TOSTR KNUM)
648 LJFOLDF(kfold_tostr_knum)
650 return lj_ir_kstr(J, lj_str_fromnum(J->L, &knumleft));
653 LJFOLD(TOSTR KINT)
654 LJFOLDF(kfold_tostr_kint)
656 return lj_ir_kstr(J, lj_str_fromint(J->L, fleft->i));
659 LJFOLD(STRTO KGC)
660 LJFOLDF(kfold_strto)
662 TValue n;
663 if (lj_str_tonum(ir_kstr(fleft), &n))
664 return lj_ir_knum(J, numV(&n));
665 return FAILFOLD;
668 /* -- Constant folding of equality checks --------------------------------- */
670 /* Don't constant-fold away FLOAD checks against KNULL. */
671 LJFOLD(EQ FLOAD KNULL)
672 LJFOLD(NE FLOAD KNULL)
673 LJFOLDX(lj_opt_cse)
675 /* But fold all other KNULL compares, since only KNULL is equal to KNULL. */
676 LJFOLD(EQ any KNULL)
677 LJFOLD(NE any KNULL)
678 LJFOLD(EQ KNULL any)
679 LJFOLD(NE KNULL any)
680 LJFOLD(EQ KINT KINT) /* Constants are unique, so same refs <==> same value. */
681 LJFOLD(NE KINT KINT)
682 LJFOLD(EQ KINT64 KINT64)
683 LJFOLD(NE KINT64 KINT64)
684 LJFOLD(EQ KGC KGC)
685 LJFOLD(NE KGC KGC)
686 LJFOLDF(kfold_kref)
688 return CONDFOLD((fins->op1 == fins->op2) ^ (fins->o == IR_NE));
691 /* -- Algebraic shortcuts ------------------------------------------------- */
693 LJFOLD(FPMATH FPMATH IRFPM_FLOOR)
694 LJFOLD(FPMATH FPMATH IRFPM_CEIL)
695 LJFOLD(FPMATH FPMATH IRFPM_TRUNC)
696 LJFOLDF(shortcut_round)
698 IRFPMathOp op = (IRFPMathOp)fleft->op2;
699 if (op == IRFPM_FLOOR || op == IRFPM_CEIL || op == IRFPM_TRUNC)
700 return LEFTFOLD; /* round(round_left(x)) = round_left(x) */
701 return NEXTFOLD;
704 LJFOLD(ABS ABS KNUM)
705 LJFOLDF(shortcut_left)
707 return LEFTFOLD; /* f(g(x)) ==> g(x) */
710 LJFOLD(ABS NEG KNUM)
711 LJFOLDF(shortcut_dropleft)
713 PHIBARRIER(fleft);
714 fins->op1 = fleft->op1; /* abs(neg(x)) ==> abs(x) */
715 return RETRYFOLD;
718 /* Note: no safe shortcuts with STRTO and TOSTR ("1e2" ==> +100 ==> "100"). */
719 LJFOLD(NEG NEG any)
720 LJFOLD(BNOT BNOT)
721 LJFOLD(BSWAP BSWAP)
722 LJFOLDF(shortcut_leftleft)
724 PHIBARRIER(fleft); /* See above. Fold would be ok, but not beneficial. */
725 return fleft->op1; /* f(g(x)) ==> x */
728 /* -- FP algebraic simplifications ---------------------------------------- */
730 /* FP arithmetic is tricky -- there's not much to simplify.
731 ** Please note the following common pitfalls before sending "improvements":
732 ** x+0 ==> x is INVALID for x=-0
733 ** 0-x ==> -x is INVALID for x=+0
734 ** x*0 ==> 0 is INVALID for x=-0, x=+-Inf or x=NaN
737 LJFOLD(ADD NEG any)
738 LJFOLDF(simplify_numadd_negx)
740 PHIBARRIER(fleft);
741 fins->o = IR_SUB; /* (-a) + b ==> b - a */
742 fins->op1 = fins->op2;
743 fins->op2 = fleft->op1;
744 return RETRYFOLD;
747 LJFOLD(ADD any NEG)
748 LJFOLDF(simplify_numadd_xneg)
750 PHIBARRIER(fright);
751 fins->o = IR_SUB; /* a + (-b) ==> a - b */
752 fins->op2 = fright->op1;
753 return RETRYFOLD;
756 LJFOLD(SUB any KNUM)
757 LJFOLDF(simplify_numsub_k)
759 lua_Number n = knumright;
760 if (n == 0.0) /* x - (+-0) ==> x */
761 return LEFTFOLD;
762 return NEXTFOLD;
765 LJFOLD(SUB NEG KNUM)
766 LJFOLDF(simplify_numsub_negk)
768 PHIBARRIER(fleft);
769 fins->op2 = fleft->op1; /* (-x) - k ==> (-k) - x */
770 fins->op1 = (IRRef1)lj_ir_knum(J, -knumright);
771 return RETRYFOLD;
774 LJFOLD(SUB any NEG)
775 LJFOLDF(simplify_numsub_xneg)
777 PHIBARRIER(fright);
778 fins->o = IR_ADD; /* a - (-b) ==> a + b */
779 fins->op2 = fright->op1;
780 return RETRYFOLD;
783 LJFOLD(MUL any KNUM)
784 LJFOLD(DIV any KNUM)
785 LJFOLDF(simplify_nummuldiv_k)
787 lua_Number n = knumright;
788 if (n == 1.0) { /* x o 1 ==> x */
789 return LEFTFOLD;
790 } else if (n == -1.0) { /* x o -1 ==> -x */
791 fins->o = IR_NEG;
792 fins->op2 = (IRRef1)lj_ir_knum_neg(J);
793 return RETRYFOLD;
794 } else if (fins->o == IR_MUL && n == 2.0) { /* x * 2 ==> x + x */
795 fins->o = IR_ADD;
796 fins->op2 = fins->op1;
797 return RETRYFOLD;
799 return NEXTFOLD;
802 LJFOLD(MUL NEG KNUM)
803 LJFOLD(DIV NEG KNUM)
804 LJFOLDF(simplify_nummuldiv_negk)
806 PHIBARRIER(fleft);
807 fins->op1 = fleft->op1; /* (-a) o k ==> a o (-k) */
808 fins->op2 = (IRRef1)lj_ir_knum(J, -knumright);
809 return RETRYFOLD;
812 LJFOLD(MUL NEG NEG)
813 LJFOLD(DIV NEG NEG)
814 LJFOLDF(simplify_nummuldiv_negneg)
816 PHIBARRIER(fleft);
817 PHIBARRIER(fright);
818 fins->op1 = fleft->op1; /* (-a) o (-b) ==> a o b */
819 fins->op2 = fright->op1;
820 return RETRYFOLD;
823 LJFOLD(POW any KINT)
824 LJFOLDF(simplify_numpow_xk)
826 int32_t k = fright->i;
827 TRef ref = fins->op1;
828 if (k == 0) /* x ^ 0 ==> 1 */
829 return lj_ir_knum_one(J); /* Result must be a number, not an int. */
830 if (k == 1) /* x ^ 1 ==> x */
831 return LEFTFOLD;
832 if ((uint32_t)(k+65536) > 2*65536u) /* Limit code explosion. */
833 return NEXTFOLD;
834 if (k < 0) { /* x ^ (-k) ==> (1/x) ^ k. */
835 ref = emitir(IRTN(IR_DIV), lj_ir_knum_one(J), ref);
836 k = -k;
838 /* Unroll x^k for 1 <= k <= 65536. */
839 for (; (k & 1) == 0; k >>= 1) /* Handle leading zeros. */
840 ref = emitir(IRTN(IR_MUL), ref, ref);
841 if ((k >>= 1) != 0) { /* Handle trailing bits. */
842 TRef tmp = emitir(IRTN(IR_MUL), ref, ref);
843 for (; k != 1; k >>= 1) {
844 if (k & 1)
845 ref = emitir(IRTN(IR_MUL), ref, tmp);
846 tmp = emitir(IRTN(IR_MUL), tmp, tmp);
848 ref = emitir(IRTN(IR_MUL), ref, tmp);
850 return ref;
853 LJFOLD(POW KNUM any)
854 LJFOLDF(simplify_numpow_kx)
856 lua_Number n = knumleft;
857 if (n == 2.0) { /* 2.0 ^ i ==> ldexp(1.0, tonum(i)) */
858 fins->o = IR_CONV;
859 #if LJ_TARGET_X86ORX64
860 fins->op1 = fins->op2;
861 fins->op2 = IRCONV_NUM_INT;
862 fins->op2 = (IRRef1)lj_opt_fold(J);
863 #endif
864 fins->op1 = (IRRef1)lj_ir_knum_one(J);
865 fins->o = IR_LDEXP;
866 return RETRYFOLD;
868 return NEXTFOLD;
871 /* -- Simplify conversions ------------------------------------------------ */
873 LJFOLD(CONV CONV IRCONV_NUM_INT) /* _NUM */
874 LJFOLDF(shortcut_conv_num_int)
876 PHIBARRIER(fleft);
877 /* Only safe with a guarded conversion to int. */
878 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_NUM && irt_isguard(fleft->t))
879 return fleft->op1; /* f(g(x)) ==> x */
880 return NEXTFOLD;
883 LJFOLD(CONV CONV IRCONV_INT_NUM) /* _INT */
884 LJFOLDF(simplify_conv_int_num)
886 /* Fold even across PHI to avoid expensive num->int conversions in loop. */
887 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT)
888 return fleft->op1;
889 return NEXTFOLD;
892 LJFOLD(CONV CONV IRCONV_U32_NUM) /* _U32*/
893 LJFOLDF(simplify_conv_u32_num)
895 /* Fold even across PHI to avoid expensive num->int conversions in loop. */
896 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32)
897 return fleft->op1;
898 return NEXTFOLD;
901 LJFOLD(CONV CONV IRCONV_I64_NUM) /* _INT or _U32*/
902 LJFOLD(CONV CONV IRCONV_U64_NUM) /* _INT or _U32*/
903 LJFOLDF(simplify_conv_i64_num)
905 PHIBARRIER(fleft);
906 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT) {
907 /* Reduce to a sign-extension. */
908 fins->op1 = fleft->op1;
909 fins->op2 = ((IRT_I64<<5)|IRT_INT|IRCONV_SEXT);
910 return RETRYFOLD;
911 } else if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) {
912 #if LJ_TARGET_X64
913 return fleft->op1;
914 #else
915 /* Reduce to a zero-extension. */
916 fins->op1 = fleft->op1;
917 fins->op2 = (IRT_I64<<5)|IRT_U32;
918 return RETRYFOLD;
919 #endif
921 return NEXTFOLD;
924 LJFOLD(CONV CONV IRCONV_INT_I64) /* _INT */
925 LJFOLD(CONV CONV IRCONV_INT_U64) /* _INT */
926 LJFOLDF(simplify_conv_int_i64)
928 PHIBARRIER(fleft);
929 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT)
930 return fleft->op1;
931 return NEXTFOLD;
934 LJFOLD(CONV CONV IRCONV_FLOAT_NUM) /* _FLOAT */
935 LJFOLDF(simplify_conv_flt_num)
937 PHIBARRIER(fleft);
938 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_FLOAT)
939 return fleft->op1;
940 return NEXTFOLD;
943 /* Shortcut TOBIT + IRT_NUM <- IRT_INT/IRT_U32 conversion. */
944 LJFOLD(TOBIT CONV KNUM)
945 LJFOLDF(simplify_tobit_conv)
947 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT ||
948 (fleft->op2 & IRCONV_SRCMASK) == IRT_U32) {
949 /* Fold even across PHI to avoid expensive num->int conversions in loop. */
950 lua_assert(irt_isnum(fleft->t));
951 return fleft->op1;
953 return NEXTFOLD;
956 /* Shortcut floor/ceil/round + IRT_NUM <- IRT_INT/IRT_U32 conversion. */
957 LJFOLD(FPMATH CONV IRFPM_FLOOR)
958 LJFOLD(FPMATH CONV IRFPM_CEIL)
959 LJFOLD(FPMATH CONV IRFPM_TRUNC)
960 LJFOLDF(simplify_floor_conv)
962 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT ||
963 (fleft->op2 & IRCONV_SRCMASK) == IRT_U32)
964 return LEFTFOLD;
965 return NEXTFOLD;
968 /* Strength reduction of widening. */
969 LJFOLD(CONV any IRCONV_I64_INT)
970 LJFOLDF(simplify_conv_sext)
972 IRRef ref = fins->op1;
973 int64_t ofs = 0;
974 if (!(fins->op2 & IRCONV_SEXT))
975 return NEXTFOLD;
976 PHIBARRIER(fleft);
977 if (fleft->o == IR_XLOAD && (irt_isu8(fleft->t) || irt_isu16(fleft->t)))
978 goto ok_reduce;
979 if (fleft->o == IR_ADD && irref_isk(fleft->op2)) {
980 ofs = (int64_t)IR(fleft->op2)->i;
981 ref = fleft->op1;
983 /* Use scalar evolution analysis results to strength-reduce sign-extension. */
984 if (ref == J->scev.idx) {
985 IRRef lo = J->scev.dir ? J->scev.start : J->scev.stop;
986 lua_assert(irt_isint(J->scev.t));
987 if (lo && IR(lo)->i + ofs >= 0) {
988 ok_reduce:
989 #if LJ_TARGET_X64
990 /* Eliminate widening. All 32 bit ops do an implicit zero-extension. */
991 return LEFTFOLD;
992 #else
993 /* Reduce to a (cheaper) zero-extension. */
994 fins->op2 &= ~IRCONV_SEXT;
995 return RETRYFOLD;
996 #endif
999 return NEXTFOLD;
1002 /* Strength reduction of narrowing. */
1003 LJFOLD(CONV ADD IRCONV_INT_I64)
1004 LJFOLD(CONV SUB IRCONV_INT_I64)
1005 LJFOLD(CONV MUL IRCONV_INT_I64)
1006 LJFOLD(CONV ADD IRCONV_INT_U64)
1007 LJFOLD(CONV SUB IRCONV_INT_U64)
1008 LJFOLD(CONV MUL IRCONV_INT_U64)
1009 LJFOLDF(simplify_conv_narrow)
1011 IROp op = (IROp)fleft->o;
1012 IRRef op1 = fleft->op1, op2 = fleft->op2, mode = fins->op2;
1013 PHIBARRIER(fleft);
1014 op1 = emitir(IRTI(IR_CONV), op1, mode);
1015 op2 = emitir(IRTI(IR_CONV), op2, mode);
1016 fins->ot = IRTI(op);
1017 fins->op1 = op1;
1018 fins->op2 = op2;
1019 return RETRYFOLD;
1022 /* Special CSE rule for CONV. */
1023 LJFOLD(CONV any any)
1024 LJFOLDF(cse_conv)
1026 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
1027 IRRef op1 = fins->op1, op2 = (fins->op2 & IRCONV_MODEMASK);
1028 uint8_t guard = irt_isguard(fins->t);
1029 IRRef ref = J->chain[IR_CONV];
1030 while (ref > op1) {
1031 IRIns *ir = IR(ref);
1032 /* Commoning with stronger checks is ok. */
1033 if (ir->op1 == op1 && (ir->op2 & IRCONV_MODEMASK) == op2 &&
1034 irt_isguard(ir->t) >= guard)
1035 return ref;
1036 ref = ir->prev;
1039 return EMITFOLD; /* No fallthrough to regular CSE. */
1042 /* FP conversion narrowing. */
1043 LJFOLD(TOBIT ADD KNUM)
1044 LJFOLD(TOBIT SUB KNUM)
1045 LJFOLD(CONV ADD IRCONV_INT_NUM)
1046 LJFOLD(CONV SUB IRCONV_INT_NUM)
1047 LJFOLD(CONV ADD IRCONV_I64_NUM)
1048 LJFOLD(CONV SUB IRCONV_I64_NUM)
1049 LJFOLDF(narrow_convert)
1051 PHIBARRIER(fleft);
1052 /* Narrowing ignores PHIs and repeating it inside the loop is not useful. */
1053 if (J->chain[IR_LOOP])
1054 return NEXTFOLD;
1055 lua_assert(fins->o != IR_CONV || (fins->op2&IRCONV_CONVMASK) != IRCONV_TOBIT);
1056 return lj_opt_narrow_convert(J);
1059 /* -- Integer algebraic simplifications ----------------------------------- */
1061 LJFOLD(ADD any KINT)
1062 LJFOLD(ADDOV any KINT)
1063 LJFOLD(SUBOV any KINT)
1064 LJFOLDF(simplify_intadd_k)
1066 if (fright->i == 0) /* i o 0 ==> i */
1067 return LEFTFOLD;
1068 return NEXTFOLD;
1071 LJFOLD(MULOV any KINT)
1072 LJFOLDF(simplify_intmul_k)
1074 if (fright->i == 0) /* i * 0 ==> 0 */
1075 return RIGHTFOLD;
1076 if (fright->i == 1) /* i * 1 ==> i */
1077 return LEFTFOLD;
1078 if (fright->i == 2) { /* i * 2 ==> i + i */
1079 fins->o = IR_ADDOV;
1080 fins->op2 = fins->op1;
1081 return RETRYFOLD;
1083 return NEXTFOLD;
1086 LJFOLD(SUB any KINT)
1087 LJFOLDF(simplify_intsub_k)
1089 if (fright->i == 0) /* i - 0 ==> i */
1090 return LEFTFOLD;
1091 fins->o = IR_ADD; /* i - k ==> i + (-k) */
1092 fins->op2 = (IRRef1)lj_ir_kint(J, -fright->i); /* Overflow for -2^31 ok. */
1093 return RETRYFOLD;
1096 LJFOLD(SUB KINT any)
1097 LJFOLD(SUB KINT64 any)
1098 LJFOLDF(simplify_intsub_kleft)
1100 if (fleft->o == IR_KINT ? (fleft->i == 0) : (ir_kint64(fleft)->u64 == 0)) {
1101 fins->o = IR_NEG; /* 0 - i ==> -i */
1102 fins->op1 = fins->op2;
1103 return RETRYFOLD;
1105 return NEXTFOLD;
1108 LJFOLD(ADD any KINT64)
1109 LJFOLDF(simplify_intadd_k64)
1111 if (ir_kint64(fright)->u64 == 0) /* i + 0 ==> i */
1112 return LEFTFOLD;
1113 return NEXTFOLD;
1116 LJFOLD(SUB any KINT64)
1117 LJFOLDF(simplify_intsub_k64)
1119 uint64_t k = ir_kint64(fright)->u64;
1120 if (k == 0) /* i - 0 ==> i */
1121 return LEFTFOLD;
1122 fins->o = IR_ADD; /* i - k ==> i + (-k) */
1123 fins->op2 = (IRRef1)lj_ir_kint64(J, (uint64_t)-(int64_t)k);
1124 return RETRYFOLD;
1127 static TRef simplify_intmul_k(jit_State *J, int32_t k)
1129 /* Note: many more simplifications are possible, e.g. 2^k1 +- 2^k2.
1130 ** But this is mainly intended for simple address arithmetic.
1131 ** Also it's easier for the backend to optimize the original multiplies.
1133 if (k == 1) { /* i * 1 ==> i */
1134 return LEFTFOLD;
1135 } else if ((k & (k-1)) == 0) { /* i * 2^k ==> i << k */
1136 fins->o = IR_BSHL;
1137 fins->op2 = lj_ir_kint(J, lj_fls((uint32_t)k));
1138 return RETRYFOLD;
1140 return NEXTFOLD;
1143 LJFOLD(MUL any KINT)
1144 LJFOLDF(simplify_intmul_k32)
1146 if (fright->i == 0) /* i * 0 ==> 0 */
1147 return INTFOLD(0);
1148 else if (fright->i > 0)
1149 return simplify_intmul_k(J, fright->i);
1150 return NEXTFOLD;
1153 LJFOLD(MUL any KINT64)
1154 LJFOLDF(simplify_intmul_k64)
1156 if (ir_kint64(fright)->u64 == 0) /* i * 0 ==> 0 */
1157 return INT64FOLD(0);
1158 #if LJ_64
1159 /* NYI: SPLIT for BSHL and 32 bit backend support. */
1160 else if (ir_kint64(fright)->u64 < 0x80000000u)
1161 return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64);
1162 #endif
1163 return NEXTFOLD;
1166 LJFOLD(MOD any KINT)
1167 LJFOLDF(simplify_intmod_k)
1169 int32_t k = fright->i;
1170 lua_assert(k != 0);
1171 if (k > 0 && (k & (k-1)) == 0) { /* i % (2^k) ==> i & (2^k-1) */
1172 fins->o = IR_BAND;
1173 fins->op2 = lj_ir_kint(J, k-1);
1174 return RETRYFOLD;
1176 return NEXTFOLD;
1179 LJFOLD(MOD KINT any)
1180 LJFOLDF(simplify_intmod_kleft)
1182 if (fleft->i == 0)
1183 return INTFOLD(0);
1184 return NEXTFOLD;
1187 LJFOLD(SUB any any)
1188 LJFOLD(SUBOV any any)
1189 LJFOLDF(simplify_intsub)
1191 if (fins->op1 == fins->op2 && !irt_isnum(fins->t)) /* i - i ==> 0 */
1192 return irt_is64(fins->t) ? INT64FOLD(0) : INTFOLD(0);
1193 return NEXTFOLD;
1196 LJFOLD(SUB ADD any)
1197 LJFOLDF(simplify_intsubadd_leftcancel)
1199 if (!irt_isnum(fins->t)) {
1200 PHIBARRIER(fleft);
1201 if (fins->op2 == fleft->op1) /* (i + j) - i ==> j */
1202 return fleft->op2;
1203 if (fins->op2 == fleft->op2) /* (i + j) - j ==> i */
1204 return fleft->op1;
1206 return NEXTFOLD;
1209 LJFOLD(SUB SUB any)
1210 LJFOLDF(simplify_intsubsub_leftcancel)
1212 if (!irt_isnum(fins->t)) {
1213 PHIBARRIER(fleft);
1214 if (fins->op1 == fleft->op1) { /* (i - j) - i ==> 0 - j */
1215 fins->op1 = (IRRef1)lj_ir_kint(J, 0);
1216 fins->op2 = fleft->op2;
1217 return RETRYFOLD;
1220 return NEXTFOLD;
1223 LJFOLD(SUB any SUB)
1224 LJFOLDF(simplify_intsubsub_rightcancel)
1226 if (!irt_isnum(fins->t)) {
1227 PHIBARRIER(fright);
1228 if (fins->op1 == fright->op1) /* i - (i - j) ==> j */
1229 return fright->op2;
1231 return NEXTFOLD;
1234 LJFOLD(SUB any ADD)
1235 LJFOLDF(simplify_intsubadd_rightcancel)
1237 if (!irt_isnum(fins->t)) {
1238 PHIBARRIER(fright);
1239 if (fins->op1 == fright->op1) { /* i - (i + j) ==> 0 - j */
1240 fins->op2 = fright->op2;
1241 fins->op1 = (IRRef1)lj_ir_kint(J, 0);
1242 return RETRYFOLD;
1244 if (fins->op1 == fright->op2) { /* i - (j + i) ==> 0 - j */
1245 fins->op2 = fright->op1;
1246 fins->op1 = (IRRef1)lj_ir_kint(J, 0);
1247 return RETRYFOLD;
1250 return NEXTFOLD;
1253 LJFOLD(SUB ADD ADD)
1254 LJFOLDF(simplify_intsubaddadd_cancel)
1256 if (!irt_isnum(fins->t)) {
1257 PHIBARRIER(fleft);
1258 PHIBARRIER(fright);
1259 if (fleft->op1 == fright->op1) { /* (i + j1) - (i + j2) ==> j1 - j2 */
1260 fins->op1 = fleft->op2;
1261 fins->op2 = fright->op2;
1262 return RETRYFOLD;
1264 if (fleft->op1 == fright->op2) { /* (i + j1) - (j2 + i) ==> j1 - j2 */
1265 fins->op1 = fleft->op2;
1266 fins->op2 = fright->op1;
1267 return RETRYFOLD;
1269 if (fleft->op2 == fright->op1) { /* (j1 + i) - (i + j2) ==> j1 - j2 */
1270 fins->op1 = fleft->op1;
1271 fins->op2 = fright->op2;
1272 return RETRYFOLD;
1274 if (fleft->op2 == fright->op2) { /* (j1 + i) - (j2 + i) ==> j1 - j2 */
1275 fins->op1 = fleft->op1;
1276 fins->op2 = fright->op1;
1277 return RETRYFOLD;
1280 return NEXTFOLD;
1283 LJFOLD(BAND any KINT)
1284 LJFOLD(BAND any KINT64)
1285 LJFOLDF(simplify_band_k)
1287 int64_t k = fright->o == IR_KINT ? (int64_t)fright->i :
1288 (int64_t)ir_k64(fright)->u64;
1289 if (k == 0) /* i & 0 ==> 0 */
1290 return RIGHTFOLD;
1291 if (k == -1) /* i & -1 ==> i */
1292 return LEFTFOLD;
1293 return NEXTFOLD;
1296 LJFOLD(BOR any KINT)
1297 LJFOLD(BOR any KINT64)
1298 LJFOLDF(simplify_bor_k)
1300 int64_t k = fright->o == IR_KINT ? (int64_t)fright->i :
1301 (int64_t)ir_k64(fright)->u64;
1302 if (k == 0) /* i | 0 ==> i */
1303 return LEFTFOLD;
1304 if (k == -1) /* i | -1 ==> -1 */
1305 return RIGHTFOLD;
1306 return NEXTFOLD;
1309 LJFOLD(BXOR any KINT)
1310 LJFOLD(BXOR any KINT64)
1311 LJFOLDF(simplify_bxor_k)
1313 int64_t k = fright->o == IR_KINT ? (int64_t)fright->i :
1314 (int64_t)ir_k64(fright)->u64;
1315 if (k == 0) /* i xor 0 ==> i */
1316 return LEFTFOLD;
1317 if (k == -1) { /* i xor -1 ==> ~i */
1318 fins->o = IR_BNOT;
1319 fins->op2 = 0;
1320 return RETRYFOLD;
1322 return NEXTFOLD;
1325 LJFOLD(BSHL any KINT)
1326 LJFOLD(BSHR any KINT)
1327 LJFOLD(BSAR any KINT)
1328 LJFOLD(BROL any KINT)
1329 LJFOLD(BROR any KINT)
1330 LJFOLDF(simplify_shift_ik)
1332 int32_t mask = irt_is64(fins->t) ? 63 : 31;
1333 int32_t k = (fright->i & mask);
1334 if (k == 0) /* i o 0 ==> i */
1335 return LEFTFOLD;
1336 if (k == 1 && fins->o == IR_BSHL) { /* i << 1 ==> i + i */
1337 fins->o = IR_ADD;
1338 fins->op2 = fins->op1;
1339 return RETRYFOLD;
1341 if (k != fright->i) { /* i o k ==> i o (k & mask) */
1342 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1343 return RETRYFOLD;
1345 #ifndef LJ_TARGET_UNIFYROT
1346 if (fins->o == IR_BROR) { /* bror(i, k) ==> brol(i, (-k)&mask) */
1347 fins->o = IR_BROL;
1348 fins->op2 = (IRRef1)lj_ir_kint(J, (-k)&mask);
1349 return RETRYFOLD;
1351 #endif
1352 return NEXTFOLD;
1355 LJFOLD(BSHL any BAND)
1356 LJFOLD(BSHR any BAND)
1357 LJFOLD(BSAR any BAND)
1358 LJFOLD(BROL any BAND)
1359 LJFOLD(BROR any BAND)
1360 LJFOLDF(simplify_shift_andk)
1362 IRIns *irk = IR(fright->op2);
1363 PHIBARRIER(fright);
1364 if ((fins->o < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) &&
1365 irk->o == IR_KINT) { /* i o (j & mask) ==> i o j */
1366 int32_t mask = irt_is64(fins->t) ? 63 : 31;
1367 int32_t k = irk->i & mask;
1368 if (k == mask) {
1369 fins->op2 = fright->op1;
1370 return RETRYFOLD;
1373 return NEXTFOLD;
1376 LJFOLD(BSHL KINT any)
1377 LJFOLD(BSHR KINT any)
1378 LJFOLD(BSHL KINT64 any)
1379 LJFOLD(BSHR KINT64 any)
1380 LJFOLDF(simplify_shift1_ki)
1382 int64_t k = fleft->o == IR_KINT ? (int64_t)fleft->i :
1383 (int64_t)ir_k64(fleft)->u64;
1384 if (k == 0) /* 0 o i ==> 0 */
1385 return LEFTFOLD;
1386 return NEXTFOLD;
1389 LJFOLD(BSAR KINT any)
1390 LJFOLD(BROL KINT any)
1391 LJFOLD(BROR KINT any)
1392 LJFOLD(BSAR KINT64 any)
1393 LJFOLD(BROL KINT64 any)
1394 LJFOLD(BROR KINT64 any)
1395 LJFOLDF(simplify_shift2_ki)
1397 int64_t k = fleft->o == IR_KINT ? (int64_t)fleft->i :
1398 (int64_t)ir_k64(fleft)->u64;
1399 if (k == 0 || k == -1) /* 0 o i ==> 0; -1 o i ==> -1 */
1400 return LEFTFOLD;
1401 return NEXTFOLD;
1404 /* -- Reassociation ------------------------------------------------------- */
1406 LJFOLD(ADD ADD KINT)
1407 LJFOLD(MUL MUL KINT)
1408 LJFOLD(BAND BAND KINT)
1409 LJFOLD(BOR BOR KINT)
1410 LJFOLD(BXOR BXOR KINT)
1411 LJFOLDF(reassoc_intarith_k)
1413 IRIns *irk = IR(fleft->op2);
1414 if (irk->o == IR_KINT) {
1415 int32_t k = kfold_intop(irk->i, fright->i, (IROp)fins->o);
1416 if (k == irk->i) /* (i o k1) o k2 ==> i o k1, if (k1 o k2) == k1. */
1417 return LEFTFOLD;
1418 PHIBARRIER(fleft);
1419 fins->op1 = fleft->op1;
1420 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1421 return RETRYFOLD; /* (i o k1) o k2 ==> i o (k1 o k2) */
1423 return NEXTFOLD;
1426 LJFOLD(ADD ADD KINT64)
1427 LJFOLD(MUL MUL KINT64)
1428 LJFOLD(BAND BAND KINT64)
1429 LJFOLD(BOR BOR KINT64)
1430 LJFOLD(BXOR BXOR KINT64)
1431 LJFOLDF(reassoc_intarith_k64)
1433 #if LJ_HASFFI || LJ_64
1434 IRIns *irk = IR(fleft->op2);
1435 if (irk->o == IR_KINT64) {
1436 uint64_t k = kfold_int64arith(ir_k64(irk)->u64,
1437 ir_k64(fright)->u64, (IROp)fins->o);
1438 PHIBARRIER(fleft);
1439 fins->op1 = fleft->op1;
1440 fins->op2 = (IRRef1)lj_ir_kint64(J, k);
1441 return RETRYFOLD; /* (i o k1) o k2 ==> i o (k1 o k2) */
1443 return NEXTFOLD;
1444 #else
1445 UNUSED(J); lua_assert(0); return FAILFOLD;
1446 #endif
1449 LJFOLD(MIN MIN any)
1450 LJFOLD(MAX MAX any)
1451 LJFOLD(BAND BAND any)
1452 LJFOLD(BOR BOR any)
1453 LJFOLDF(reassoc_dup)
1455 if (fins->op2 == fleft->op1 || fins->op2 == fleft->op2)
1456 return LEFTFOLD; /* (a o b) o a ==> a o b; (a o b) o b ==> a o b */
1457 return NEXTFOLD;
1460 LJFOLD(BXOR BXOR any)
1461 LJFOLDF(reassoc_bxor)
1463 PHIBARRIER(fleft);
1464 if (fins->op2 == fleft->op1) /* (a xor b) xor a ==> b */
1465 return fleft->op2;
1466 if (fins->op2 == fleft->op2) /* (a xor b) xor b ==> a */
1467 return fleft->op1;
1468 return NEXTFOLD;
1471 LJFOLD(BSHL BSHL KINT)
1472 LJFOLD(BSHR BSHR KINT)
1473 LJFOLD(BSAR BSAR KINT)
1474 LJFOLD(BROL BROL KINT)
1475 LJFOLD(BROR BROR KINT)
1476 LJFOLDF(reassoc_shift)
1478 IRIns *irk = IR(fleft->op2);
1479 PHIBARRIER(fleft); /* The (shift any KINT) rule covers k2 == 0 and more. */
1480 if (irk->o == IR_KINT) { /* (i o k1) o k2 ==> i o (k1 + k2) */
1481 int32_t mask = irt_is64(fins->t) ? 63 : 31;
1482 int32_t k = (irk->i & mask) + (fright->i & mask);
1483 if (k > mask) { /* Combined shift too wide? */
1484 if (fins->o == IR_BSHL || fins->o == IR_BSHR)
1485 return mask == 31 ? INTFOLD(0) : INT64FOLD(0);
1486 else if (fins->o == IR_BSAR)
1487 k = mask;
1488 else
1489 k &= mask;
1491 fins->op1 = fleft->op1;
1492 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1493 return RETRYFOLD;
1495 return NEXTFOLD;
1498 LJFOLD(MIN MIN KNUM)
1499 LJFOLD(MAX MAX KNUM)
1500 LJFOLD(MIN MIN KINT)
1501 LJFOLD(MAX MAX KINT)
1502 LJFOLDF(reassoc_minmax_k)
1504 IRIns *irk = IR(fleft->op2);
1505 if (irk->o == IR_KNUM) {
1506 lua_Number a = ir_knum(irk)->n;
1507 lua_Number y = lj_vm_foldarith(a, knumright, fins->o - IR_ADD);
1508 if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */
1509 return LEFTFOLD;
1510 PHIBARRIER(fleft);
1511 fins->op1 = fleft->op1;
1512 fins->op2 = (IRRef1)lj_ir_knum(J, y);
1513 return RETRYFOLD; /* (x o k1) o k2 ==> x o (k1 o k2) */
1514 } else if (irk->o == IR_KINT) {
1515 int32_t a = irk->i;
1516 int32_t y = kfold_intop(a, fright->i, fins->o);
1517 if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */
1518 return LEFTFOLD;
1519 PHIBARRIER(fleft);
1520 fins->op1 = fleft->op1;
1521 fins->op2 = (IRRef1)lj_ir_kint(J, y);
1522 return RETRYFOLD; /* (x o k1) o k2 ==> x o (k1 o k2) */
1524 return NEXTFOLD;
1527 LJFOLD(MIN MAX any)
1528 LJFOLD(MAX MIN any)
1529 LJFOLDF(reassoc_minmax_left)
1531 if (fins->op2 == fleft->op1 || fins->op2 == fleft->op2)
1532 return RIGHTFOLD; /* (b o1 a) o2 b ==> b; (a o1 b) o2 b ==> b */
1533 return NEXTFOLD;
1536 LJFOLD(MIN any MAX)
1537 LJFOLD(MAX any MIN)
1538 LJFOLDF(reassoc_minmax_right)
1540 if (fins->op1 == fright->op1 || fins->op1 == fright->op2)
1541 return LEFTFOLD; /* a o2 (a o1 b) ==> a; a o2 (b o1 a) ==> a */
1542 return NEXTFOLD;
1545 /* -- Array bounds check elimination -------------------------------------- */
1547 /* Eliminate ABC across PHIs to handle t[i-1] forwarding case.
1548 ** ABC(asize, (i+k)+(-k)) ==> ABC(asize, i), but only if it already exists.
1549 ** Could be generalized to (i+k1)+k2 ==> i+(k1+k2), but needs better disambig.
1551 LJFOLD(ABC any ADD)
1552 LJFOLDF(abc_fwd)
1554 if (LJ_LIKELY(J->flags & JIT_F_OPT_ABC)) {
1555 if (irref_isk(fright->op2)) {
1556 IRIns *add2 = IR(fright->op1);
1557 if (add2->o == IR_ADD && irref_isk(add2->op2) &&
1558 IR(fright->op2)->i == -IR(add2->op2)->i) {
1559 IRRef ref = J->chain[IR_ABC];
1560 IRRef lim = add2->op1;
1561 if (fins->op1 > lim) lim = fins->op1;
1562 while (ref > lim) {
1563 IRIns *ir = IR(ref);
1564 if (ir->op1 == fins->op1 && ir->op2 == add2->op1)
1565 return DROPFOLD;
1566 ref = ir->prev;
1571 return NEXTFOLD;
1574 /* Eliminate ABC for constants.
1575 ** ABC(asize, k1), ABC(asize k2) ==> ABC(asize, max(k1, k2))
1576 ** Drop second ABC if k2 is lower. Otherwise patch first ABC with k2.
1578 LJFOLD(ABC any KINT)
1579 LJFOLDF(abc_k)
1581 if (LJ_LIKELY(J->flags & JIT_F_OPT_ABC)) {
1582 IRRef ref = J->chain[IR_ABC];
1583 IRRef asize = fins->op1;
1584 while (ref > asize) {
1585 IRIns *ir = IR(ref);
1586 if (ir->op1 == asize && irref_isk(ir->op2)) {
1587 int32_t k = IR(ir->op2)->i;
1588 if (fright->i > k)
1589 ir->op2 = fins->op2;
1590 return DROPFOLD;
1592 ref = ir->prev;
1594 return EMITFOLD; /* Already performed CSE. */
1596 return NEXTFOLD;
1599 /* Eliminate invariant ABC inside loop. */
1600 LJFOLD(ABC any any)
1601 LJFOLDF(abc_invar)
1603 if (!irt_isint(fins->t) && J->chain[IR_LOOP]) /* Currently marked as PTR. */
1604 return DROPFOLD;
1605 return NEXTFOLD;
1608 /* -- Commutativity ------------------------------------------------------- */
1610 /* The refs of commutative ops are canonicalized. Lower refs go to the right.
1611 ** Rationale behind this:
1612 ** - It (also) moves constants to the right.
1613 ** - It reduces the number of FOLD rules (e.g. (BOR any KINT) suffices).
1614 ** - It helps CSE to find more matches.
1615 ** - The assembler generates better code with constants at the right.
1618 LJFOLD(ADD any any)
1619 LJFOLD(MUL any any)
1620 LJFOLD(ADDOV any any)
1621 LJFOLD(MULOV any any)
1622 LJFOLDF(comm_swap)
1624 if (fins->op1 < fins->op2) { /* Move lower ref to the right. */
1625 IRRef1 tmp = fins->op1;
1626 fins->op1 = fins->op2;
1627 fins->op2 = tmp;
1628 return RETRYFOLD;
1630 return NEXTFOLD;
1633 LJFOLD(EQ any any)
1634 LJFOLD(NE any any)
1635 LJFOLDF(comm_equal)
1637 /* For non-numbers only: x == x ==> drop; x ~= x ==> fail */
1638 if (fins->op1 == fins->op2 && !irt_isnum(fins->t))
1639 return CONDFOLD(fins->o == IR_EQ);
1640 return fold_comm_swap(J);
1643 LJFOLD(LT any any)
1644 LJFOLD(GE any any)
1645 LJFOLD(LE any any)
1646 LJFOLD(GT any any)
1647 LJFOLD(ULT any any)
1648 LJFOLD(UGE any any)
1649 LJFOLD(ULE any any)
1650 LJFOLD(UGT any any)
1651 LJFOLDF(comm_comp)
1653 /* For non-numbers only: x <=> x ==> drop; x <> x ==> fail */
1654 if (fins->op1 == fins->op2 && !irt_isnum(fins->t))
1655 return CONDFOLD((fins->o ^ (fins->o >> 1)) & 1);
1656 if (fins->op1 < fins->op2) { /* Move lower ref to the right. */
1657 IRRef1 tmp = fins->op1;
1658 fins->op1 = fins->op2;
1659 fins->op2 = tmp;
1660 fins->o ^= 3; /* GT <-> LT, GE <-> LE, does not affect U */
1661 return RETRYFOLD;
1663 return NEXTFOLD;
1666 LJFOLD(BAND any any)
1667 LJFOLD(BOR any any)
1668 LJFOLD(MIN any any)
1669 LJFOLD(MAX any any)
1670 LJFOLDF(comm_dup)
1672 if (fins->op1 == fins->op2) /* x o x ==> x */
1673 return LEFTFOLD;
1674 return fold_comm_swap(J);
1677 LJFOLD(BXOR any any)
1678 LJFOLDF(comm_bxor)
1680 if (fins->op1 == fins->op2) /* i xor i ==> 0 */
1681 return irt_is64(fins->t) ? INT64FOLD(0) : INTFOLD(0);
1682 return fold_comm_swap(J);
1685 /* -- Simplification of compound expressions ------------------------------ */
1687 static TRef kfold_xload(jit_State *J, IRIns *ir, const void *p)
1689 int32_t k;
1690 switch (irt_type(ir->t)) {
1691 case IRT_NUM: return lj_ir_knum_u64(J, *(uint64_t *)p);
1692 case IRT_I8: k = (int32_t)*(int8_t *)p; break;
1693 case IRT_U8: k = (int32_t)*(uint8_t *)p; break;
1694 case IRT_I16: k = (int32_t)(int16_t)lj_getu16(p); break;
1695 case IRT_U16: k = (int32_t)(uint16_t)lj_getu16(p); break;
1696 case IRT_INT: case IRT_U32: k = (int32_t)lj_getu32(p); break;
1697 case IRT_I64: case IRT_U64: return lj_ir_kint64(J, *(uint64_t *)p);
1698 default: return 0;
1700 return lj_ir_kint(J, k);
1703 /* Turn: string.sub(str, a, b) == kstr
1704 ** into: string.byte(str, a) == string.byte(kstr, 1) etc.
1705 ** Note: this creates unaligned XLOADs on x86/x64.
1707 LJFOLD(EQ SNEW KGC)
1708 LJFOLD(NE SNEW KGC)
1709 LJFOLDF(merge_eqne_snew_kgc)
1711 GCstr *kstr = ir_kstr(fright);
1712 int32_t len = (int32_t)kstr->len;
1713 lua_assert(irt_isstr(fins->t));
1715 #if LJ_TARGET_X86ORX64
1716 #define FOLD_SNEW_MAX_LEN 4 /* Handle string lengths 0, 1, 2, 3, 4. */
1717 #define FOLD_SNEW_TYPE8 IRT_I8 /* Creates shorter immediates. */
1718 #else
1719 #define FOLD_SNEW_MAX_LEN 1 /* Handle string lengths 0 or 1. */
1720 #define FOLD_SNEW_TYPE8 IRT_U8 /* Prefer unsigned loads. */
1721 #endif
1723 if (len <= FOLD_SNEW_MAX_LEN) {
1724 IROp op = (IROp)fins->o;
1725 IRRef strref = fleft->op1;
1726 lua_assert(IR(strref)->o == IR_STRREF);
1727 if (op == IR_EQ) {
1728 emitir(IRTGI(IR_EQ), fleft->op2, lj_ir_kint(J, len));
1729 /* Caveat: fins/fleft/fright is no longer valid after emitir. */
1730 } else {
1731 /* NE is not expanded since this would need an OR of two conds. */
1732 if (!irref_isk(fleft->op2)) /* Only handle the constant length case. */
1733 return NEXTFOLD;
1734 if (IR(fleft->op2)->i != len)
1735 return DROPFOLD;
1737 if (len > 0) {
1738 /* A 4 byte load for length 3 is ok -- all strings have an extra NUL. */
1739 uint16_t ot = (uint16_t)(len == 1 ? IRT(IR_XLOAD, FOLD_SNEW_TYPE8) :
1740 len == 2 ? IRT(IR_XLOAD, IRT_U16) :
1741 IRTI(IR_XLOAD));
1742 TRef tmp = emitir(ot, strref,
1743 IRXLOAD_READONLY | (len > 1 ? IRXLOAD_UNALIGNED : 0));
1744 TRef val = kfold_xload(J, IR(tref_ref(tmp)), strdata(kstr));
1745 if (len == 3)
1746 tmp = emitir(IRTI(IR_BAND), tmp,
1747 lj_ir_kint(J, LJ_ENDIAN_SELECT(0x00ffffff, 0xffffff00)));
1748 fins->op1 = (IRRef1)tmp;
1749 fins->op2 = (IRRef1)val;
1750 fins->ot = (IROpT)IRTGI(op);
1751 return RETRYFOLD;
1752 } else {
1753 return DROPFOLD;
1756 return NEXTFOLD;
1759 /* -- Loads --------------------------------------------------------------- */
1761 /* Loads cannot be folded or passed on to CSE in general.
1762 ** Alias analysis is needed to check for forwarding opportunities.
1764 ** Caveat: *all* loads must be listed here or they end up at CSE!
1767 LJFOLD(ALOAD any)
1768 LJFOLDX(lj_opt_fwd_aload)
1770 /* From HREF fwd (see below). Must eliminate, not supported by fwd/backend. */
1771 LJFOLD(HLOAD KKPTR)
1772 LJFOLDF(kfold_hload_kkptr)
1774 UNUSED(J);
1775 lua_assert(ir_kptr(fleft) == niltvg(J2G(J)));
1776 return TREF_NIL;
1779 LJFOLD(HLOAD any)
1780 LJFOLDX(lj_opt_fwd_hload)
1782 LJFOLD(ULOAD any)
1783 LJFOLDX(lj_opt_fwd_uload)
1785 LJFOLD(CALLL any IRCALL_lj_tab_len)
1786 LJFOLDX(lj_opt_fwd_tab_len)
1788 /* Upvalue refs are really loads, but there are no corresponding stores.
1789 ** So CSE is ok for them, except for UREFO across a GC step (see below).
1790 ** If the referenced function is const, its upvalue addresses are const, too.
1791 ** This can be used to improve CSE by looking for the same address,
1792 ** even if the upvalues originate from a different function.
1794 LJFOLD(UREFO KGC any)
1795 LJFOLD(UREFC KGC any)
1796 LJFOLDF(cse_uref)
1798 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
1799 IRRef ref = J->chain[fins->o];
1800 GCfunc *fn = ir_kfunc(fleft);
1801 GCupval *uv = gco2uv(gcref(fn->l.uvptr[(fins->op2 >> 8)]));
1802 while (ref > 0) {
1803 IRIns *ir = IR(ref);
1804 if (irref_isk(ir->op1)) {
1805 GCfunc *fn2 = ir_kfunc(IR(ir->op1));
1806 if (gco2uv(gcref(fn2->l.uvptr[(ir->op2 >> 8)])) == uv) {
1807 if (fins->o == IR_UREFO && gcstep_barrier(J, ref))
1808 break;
1809 return ref;
1812 ref = ir->prev;
1815 return EMITFOLD;
1818 LJFOLD(HREF TNEW any)
1819 LJFOLDF(fwd_href_tnew)
1821 if (lj_opt_fwd_href_nokey(J))
1822 return lj_ir_kkptr(J, niltvg(J2G(J)));
1823 return NEXTFOLD;
1826 LJFOLD(HREF TDUP KPRI)
1827 LJFOLD(HREF TDUP KGC)
1828 LJFOLD(HREF TDUP KNUM)
1829 LJFOLDF(fwd_href_tdup)
1831 TValue keyv;
1832 lj_ir_kvalue(J->L, &keyv, fright);
1833 if (lj_tab_get(J->L, ir_ktab(IR(fleft->op1)), &keyv) == niltvg(J2G(J)) &&
1834 lj_opt_fwd_href_nokey(J))
1835 return lj_ir_kkptr(J, niltvg(J2G(J)));
1836 return NEXTFOLD;
1839 /* We can safely FOLD/CSE array/hash refs and field loads, since there
1840 ** are no corresponding stores. But we need to check for any NEWREF with
1841 ** an aliased table, as it may invalidate all of the pointers and fields.
1842 ** Only HREF needs the NEWREF check -- AREF and HREFK already depend on
1843 ** FLOADs. And NEWREF itself is treated like a store (see below).
1845 LJFOLD(FLOAD TNEW IRFL_TAB_ASIZE)
1846 LJFOLDF(fload_tab_tnew_asize)
1848 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
1849 return INTFOLD(fleft->op1);
1850 return NEXTFOLD;
1853 LJFOLD(FLOAD TNEW IRFL_TAB_HMASK)
1854 LJFOLDF(fload_tab_tnew_hmask)
1856 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
1857 return INTFOLD((1 << fleft->op2)-1);
1858 return NEXTFOLD;
1861 LJFOLD(FLOAD TDUP IRFL_TAB_ASIZE)
1862 LJFOLDF(fload_tab_tdup_asize)
1864 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
1865 return INTFOLD((int32_t)ir_ktab(IR(fleft->op1))->asize);
1866 return NEXTFOLD;
1869 LJFOLD(FLOAD TDUP IRFL_TAB_HMASK)
1870 LJFOLDF(fload_tab_tdup_hmask)
1872 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
1873 return INTFOLD((int32_t)ir_ktab(IR(fleft->op1))->hmask);
1874 return NEXTFOLD;
1877 LJFOLD(HREF any any)
1878 LJFOLD(FLOAD any IRFL_TAB_ARRAY)
1879 LJFOLD(FLOAD any IRFL_TAB_NODE)
1880 LJFOLD(FLOAD any IRFL_TAB_ASIZE)
1881 LJFOLD(FLOAD any IRFL_TAB_HMASK)
1882 LJFOLDF(fload_tab_ah)
1884 TRef tr = lj_opt_cse(J);
1885 return lj_opt_fwd_tptr(J, tref_ref(tr)) ? tr : EMITFOLD;
1888 /* Strings are immutable, so we can safely FOLD/CSE the related FLOAD. */
1889 LJFOLD(FLOAD KGC IRFL_STR_LEN)
1890 LJFOLDF(fload_str_len_kgc)
1892 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
1893 return INTFOLD((int32_t)ir_kstr(fleft)->len);
1894 return NEXTFOLD;
1897 LJFOLD(FLOAD SNEW IRFL_STR_LEN)
1898 LJFOLDF(fload_str_len_snew)
1900 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
1901 PHIBARRIER(fleft);
1902 return fleft->op2;
1904 return NEXTFOLD;
1907 /* The C type ID of cdata objects is immutable. */
1908 LJFOLD(FLOAD KGC IRFL_CDATA_TYPEID)
1909 LJFOLDF(fload_cdata_typeid_kgc)
1911 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
1912 return INTFOLD((int32_t)ir_kcdata(fleft)->typeid);
1913 return NEXTFOLD;
1916 /* Get the contents of immutable cdata objects. */
1917 LJFOLD(FLOAD KGC IRFL_CDATA_PTR)
1918 LJFOLD(FLOAD KGC IRFL_CDATA_INT64)
1919 LJFOLDF(fload_cdata_int64_kgc)
1921 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
1922 void *p = cdataptr(ir_kcdata(fleft));
1923 if (irt_is64(fins->t))
1924 return INT64FOLD(*(uint64_t *)p);
1925 else
1926 return INTFOLD(*(int32_t *)p);
1928 return NEXTFOLD;
1931 LJFOLD(FLOAD CNEW IRFL_CDATA_TYPEID)
1932 LJFOLD(FLOAD CNEWI IRFL_CDATA_TYPEID)
1933 LJFOLDF(fload_cdata_typeid_cnew)
1935 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
1936 return fleft->op1; /* No PHI barrier needed. CNEW/CNEWI op1 is const. */
1937 return NEXTFOLD;
1940 /* Pointer and int64 cdata objects are immutable. */
1941 LJFOLD(FLOAD CNEWI IRFL_CDATA_PTR)
1942 LJFOLD(FLOAD CNEWI IRFL_CDATA_INT64)
1943 LJFOLDF(fload_cdata_ptr_int64_cnew)
1945 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
1946 return fleft->op2; /* Fold even across PHI to avoid allocations. */
1947 return NEXTFOLD;
1950 LJFOLD(FLOAD any IRFL_STR_LEN)
1951 LJFOLD(FLOAD any IRFL_CDATA_TYPEID)
1952 LJFOLD(FLOAD any IRFL_CDATA_PTR)
1953 LJFOLD(FLOAD any IRFL_CDATA_INT64)
1954 LJFOLD(VLOAD any any) /* Vararg loads have no corresponding stores. */
1955 LJFOLDX(lj_opt_cse)
1957 /* All other field loads need alias analysis. */
1958 LJFOLD(FLOAD any any)
1959 LJFOLDX(lj_opt_fwd_fload)
1961 /* This is for LOOP only. Recording handles SLOADs internally. */
1962 LJFOLD(SLOAD any any)
1963 LJFOLDF(fwd_sload)
1965 if ((fins->op2 & IRSLOAD_FRAME)) {
1966 TRef tr = lj_opt_cse(J);
1967 return tref_ref(tr) < J->chain[IR_RETF] ? EMITFOLD : tr;
1968 } else {
1969 lua_assert(J->slot[fins->op1] != 0);
1970 return J->slot[fins->op1];
1974 /* Only fold for KKPTR. The pointer _and_ the contents must be const. */
1975 LJFOLD(XLOAD KKPTR any)
1976 LJFOLDF(xload_kptr)
1978 TRef tr = kfold_xload(J, fins, ir_kptr(fleft));
1979 return tr ? tr : NEXTFOLD;
1982 LJFOLD(XLOAD any any)
1983 LJFOLDX(lj_opt_fwd_xload)
1985 /* -- Write barriers ------------------------------------------------------ */
1987 /* Write barriers are amenable to CSE, but not across any incremental
1988 ** GC steps.
1990 ** The same logic applies to open upvalue references, because a stack
1991 ** may be resized during a GC step (not the current stack, but maybe that
1992 ** of a coroutine).
1994 LJFOLD(TBAR any)
1995 LJFOLD(OBAR any any)
1996 LJFOLD(UREFO any any)
1997 LJFOLDF(barrier_tab)
1999 TRef tr = lj_opt_cse(J);
2000 if (gcstep_barrier(J, tref_ref(tr))) /* CSE across GC step? */
2001 return EMITFOLD; /* Raw emit. Assumes fins is left intact by CSE. */
2002 return tr;
2005 LJFOLD(TBAR TNEW)
2006 LJFOLD(TBAR TDUP)
2007 LJFOLDF(barrier_tnew_tdup)
2009 /* New tables are always white and never need a barrier. */
2010 if (fins->op1 < J->chain[IR_LOOP]) /* Except across a GC step. */
2011 return NEXTFOLD;
2012 return DROPFOLD;
2015 /* -- Stores and allocations ---------------------------------------------- */
2017 /* Stores and allocations cannot be folded or passed on to CSE in general.
2018 ** But some stores can be eliminated with dead-store elimination (DSE).
2020 ** Caveat: *all* stores and allocs must be listed here or they end up at CSE!
2023 LJFOLD(ASTORE any any)
2024 LJFOLD(HSTORE any any)
2025 LJFOLDX(lj_opt_dse_ahstore)
2027 LJFOLD(USTORE any any)
2028 LJFOLDX(lj_opt_dse_ustore)
2030 LJFOLD(FSTORE any any)
2031 LJFOLDX(lj_opt_dse_fstore)
2033 LJFOLD(XSTORE any any)
2034 LJFOLDX(lj_opt_dse_xstore)
2036 LJFOLD(NEWREF any any) /* Treated like a store. */
2037 LJFOLD(CALLS any any)
2038 LJFOLD(CALLL any any) /* Safeguard fallback. */
2039 LJFOLD(CALLXS any any)
2040 LJFOLD(XBAR)
2041 LJFOLD(RETF any any) /* Modifies BASE. */
2042 LJFOLD(TNEW any any)
2043 LJFOLD(TDUP any)
2044 LJFOLD(CNEW any any)
2045 LJFOLD(XSNEW any any)
2046 LJFOLDX(lj_ir_emit)
2048 /* ------------------------------------------------------------------------ */
2050 /* Every entry in the generated hash table is a 32 bit pattern:
2052 ** xxxxxxxx iiiiiii lllllll rrrrrrrrrr
2054 ** xxxxxxxx = 8 bit index into fold function table
2055 ** iiiiiii = 7 bit folded instruction opcode
2056 ** lllllll = 7 bit left instruction opcode
2057 ** rrrrrrrrrr = 8 bit right instruction opcode or 10 bits from literal field
2060 #include "lj_folddef.h"
2062 /* ------------------------------------------------------------------------ */
2064 /* Fold IR instruction. */
2065 TRef LJ_FASTCALL lj_opt_fold(jit_State *J)
2067 uint32_t key, any;
2068 IRRef ref;
2070 if (LJ_UNLIKELY((J->flags & JIT_F_OPT_MASK) != JIT_F_OPT_DEFAULT)) {
2071 lua_assert(((JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE|JIT_F_OPT_DSE) |
2072 JIT_F_OPT_DEFAULT) == JIT_F_OPT_DEFAULT);
2073 /* Folding disabled? Chain to CSE, but not for loads/stores/allocs. */
2074 if (!(J->flags & JIT_F_OPT_FOLD) && irm_kind(lj_ir_mode[fins->o]) == IRM_N)
2075 return lj_opt_cse(J);
2077 /* Forwarding or CSE disabled? Emit raw IR for loads, except for SLOAD. */
2078 if ((J->flags & (JIT_F_OPT_FWD|JIT_F_OPT_CSE)) !=
2079 (JIT_F_OPT_FWD|JIT_F_OPT_CSE) &&
2080 irm_kind(lj_ir_mode[fins->o]) == IRM_L && fins->o != IR_SLOAD)
2081 return lj_ir_emit(J);
2083 /* DSE disabled? Emit raw IR for stores. */
2084 if (!(J->flags & JIT_F_OPT_DSE) && irm_kind(lj_ir_mode[fins->o]) == IRM_S)
2085 return lj_ir_emit(J);
2088 /* Fold engine start/retry point. */
2089 retry:
2090 /* Construct key from opcode and operand opcodes (unless literal/none). */
2091 key = ((uint32_t)fins->o << 17);
2092 if (fins->op1 >= J->cur.nk) {
2093 key += (uint32_t)IR(fins->op1)->o << 10;
2094 *fleft = *IR(fins->op1);
2096 if (fins->op2 >= J->cur.nk) {
2097 key += (uint32_t)IR(fins->op2)->o;
2098 *fright = *IR(fins->op2);
2099 } else {
2100 key += (fins->op2 & 0x3ffu); /* Literal mask. Must include IRCONV_*MASK. */
2103 /* Check for a match in order from most specific to least specific. */
2104 any = 0;
2105 for (;;) {
2106 uint32_t k = key | (any & 0x1ffff);
2107 uint32_t h = fold_hashkey(k);
2108 uint32_t fh = fold_hash[h]; /* Lookup key in semi-perfect hash table. */
2109 if ((fh & 0xffffff) == k || (fh = fold_hash[h+1], (fh & 0xffffff) == k)) {
2110 ref = (IRRef)tref_ref(fold_func[fh >> 24](J));
2111 if (ref != NEXTFOLD)
2112 break;
2114 if (any == 0xfffff) /* Exhausted folding. Pass on to CSE. */
2115 return lj_opt_cse(J);
2116 any = (any | (any >> 10)) ^ 0xffc00;
2119 /* Return value processing, ordered by frequency. */
2120 if (LJ_LIKELY(ref >= MAX_FOLD))
2121 return TREF(ref, irt_t(IR(ref)->t));
2122 if (ref == RETRYFOLD)
2123 goto retry;
2124 if (ref == KINTFOLD)
2125 return lj_ir_kint(J, fins->i);
2126 if (ref == FAILFOLD)
2127 lj_trace_err(J, LJ_TRERR_GFAIL);
2128 lua_assert(ref == DROPFOLD);
2129 return REF_DROP;
2132 /* -- Common-Subexpression Elimination ------------------------------------ */
2134 /* CSE an IR instruction. This is very fast due to the skip-list chains. */
2135 TRef LJ_FASTCALL lj_opt_cse(jit_State *J)
2137 /* Avoid narrow to wide store-to-load forwarding stall */
2138 IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16);
2139 IROp op = fins->o;
2140 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
2141 /* Limited search for same operands in per-opcode chain. */
2142 IRRef ref = J->chain[op];
2143 IRRef lim = fins->op1;
2144 if (fins->op2 > lim) lim = fins->op2; /* Relies on lit < REF_BIAS. */
2145 while (ref > lim) {
2146 if (IR(ref)->op12 == op12)
2147 return TREF(ref, irt_t(IR(ref)->t)); /* Common subexpression found. */
2148 ref = IR(ref)->prev;
2151 /* Otherwise emit IR (inlined for speed). */
2153 IRRef ref = lj_ir_nextins(J);
2154 IRIns *ir = IR(ref);
2155 ir->prev = J->chain[op];
2156 ir->op12 = op12;
2157 J->chain[op] = (IRRef1)ref;
2158 ir->o = fins->o;
2159 J->guardemit.irt |= fins->t.irt;
2160 return TREF(ref, irt_t((ir->t = fins->t)));
2164 /* CSE with explicit search limit. */
2165 TRef LJ_FASTCALL lj_opt_cselim(jit_State *J, IRRef lim)
2167 IRRef ref = J->chain[fins->o];
2168 IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16);
2169 while (ref > lim) {
2170 if (IR(ref)->op12 == op12)
2171 return ref;
2172 ref = IR(ref)->prev;
2174 return lj_ir_emit(J);
2177 /* ------------------------------------------------------------------------ */
2179 #undef IR
2180 #undef fins
2181 #undef fleft
2182 #undef fright
2183 #undef knumleft
2184 #undef knumright
2185 #undef emitir
2187 #endif