Remove unneeded PHI barrier for reassociation of duplicate ops.
[luajit-2.0.git] / src / lj_opt_fold.c
blob43685cdb467b1f1408928ab9116883177604f550
1 /*
2 ** FOLD: Constant Folding, Algebraic Simplifications and Reassociation.
3 ** ABCelim: Array Bounds Check Elimination.
4 ** CSE: Common-Subexpression Elimination.
5 ** Copyright (C) 2005-2011 Mike Pall. See Copyright Notice in luajit.h
6 */
8 #define lj_opt_fold_c
9 #define LUA_CORE
11 #include <math.h>
13 #include "lj_obj.h"
15 #if LJ_HASJIT
17 #include "lj_str.h"
18 #include "lj_tab.h"
19 #include "lj_ir.h"
20 #include "lj_jit.h"
21 #include "lj_iropt.h"
22 #include "lj_trace.h"
23 #include "lj_carith.h"
24 #include "lj_vm.h"
26 /* Here's a short description how the FOLD engine processes instructions:
28 ** The FOLD engine receives a single instruction stored in fins (J->fold.ins).
29 ** The instruction and its operands are used to select matching fold rules.
30 ** These are applied iteratively until a fixed point is reached.
32 ** The 8 bit opcode of the instruction itself plus the opcodes of the
33 ** two instructions referenced by its operands form a 24 bit key
34 ** 'ins left right' (unused operands -> 0, literals -> lowest 8 bits).
36 ** This key is used for partial matching against the fold rules. The
37 ** left/right operand fields of the key are successively masked with
38 ** the 'any' wildcard, from most specific to least specific:
40 ** ins left right
41 ** ins any right
42 ** ins left any
43 ** ins any any
45 ** The masked key is used to lookup a matching fold rule in a semi-perfect
46 ** hash table. If a matching rule is found, the related fold function is run.
47 ** Multiple rules can share the same fold function. A fold rule may return
48 ** one of several special values:
50 ** - NEXTFOLD means no folding was applied, because an additional test
51 ** inside the fold function failed. Matching continues against less
52 ** specific fold rules. Finally the instruction is passed on to CSE.
54 ** - RETRYFOLD means the instruction was modified in-place. Folding is
55 ** retried as if this instruction had just been received.
57 ** All other return values are terminal actions -- no further folding is
58 ** applied:
60 ** - INTFOLD(i) returns a reference to the integer constant i.
62 ** - LEFTFOLD and RIGHTFOLD return the left/right operand reference
63 ** without emitting an instruction.
65 ** - CSEFOLD and EMITFOLD pass the instruction directly to CSE or emit
66 ** it without passing through any further optimizations.
68 ** - FAILFOLD, DROPFOLD and CONDFOLD only apply to instructions which have
69 ** no result (e.g. guarded assertions): FAILFOLD means the guard would
70 ** always fail, i.e. the current trace is pointless. DROPFOLD means
71 ** the guard is always true and has been eliminated. CONDFOLD is a
72 ** shortcut for FAILFOLD + cond (i.e. drop if true, otherwise fail).
74 ** - Any other return value is interpreted as an IRRef or TRef. This
75 ** can be a reference to an existing or a newly created instruction.
76 ** Only the least-significant 16 bits (IRRef1) are used to form a TRef
77 ** which is finally returned to the caller.
79 ** The FOLD engine receives instructions both from the trace recorder and
80 ** substituted instructions from LOOP unrolling. This means all types
81 ** of instructions may end up here, even though the recorder bypasses
82 ** FOLD in some cases. Thus all loads, stores and allocations must have
83 ** an any/any rule to avoid being passed on to CSE.
85 ** Carefully read the following requirements before adding or modifying
86 ** any fold rules:
88 ** Requirement #1: All fold rules must preserve their destination type.
90 ** Consistently use INTFOLD() (KINT result) or lj_ir_knum() (KNUM result).
91 ** Never use lj_ir_knumint() which can have either a KINT or KNUM result.
93 ** Requirement #2: Fold rules should not create *new* instructions which
94 ** reference operands *across* PHIs.
96 ** E.g. a RETRYFOLD with 'fins->op1 = fleft->op1' is invalid if the
97 ** left operand is a PHI. Then fleft->op1 would point across the PHI
98 ** frontier to an invariant instruction. Adding a PHI for this instruction
99 ** would be counterproductive. The solution is to add a barrier which
100 ** prevents folding across PHIs, i.e. 'PHIBARRIER(fleft)' in this case.
101 ** The only exception is for recurrences with high latencies like
102 ** repeated int->num->int conversions.
104 ** One could relax this condition a bit if the referenced instruction is
105 ** a PHI, too. But this often leads to worse code due to excessive
106 ** register shuffling.
108 ** Note: returning *existing* instructions (e.g. LEFTFOLD) is ok, though.
109 ** Even returning fleft->op1 would be ok, because a new PHI will added,
110 ** if needed. But again, this leads to excessive register shuffling and
111 ** should be avoided.
113 ** Requirement #3: The set of all fold rules must be monotonic to guarantee
114 ** termination.
116 ** The goal is optimization, so one primarily wants to add strength-reducing
117 ** rules. This means eliminating an instruction or replacing an instruction
118 ** with one or more simpler instructions. Don't add fold rules which point
119 ** into the other direction.
121 ** Some rules (like commutativity) do not directly reduce the strength of
122 ** an instruction, but enable other fold rules (e.g. by moving constants
123 ** to the right operand). These rules must be made unidirectional to avoid
124 ** cycles.
126 ** Rule of thumb: the trace recorder expands the IR and FOLD shrinks it.
129 /* Some local macros to save typing. Undef'd at the end. */
130 #define IR(ref) (&J->cur.ir[(ref)])
131 #define fins (&J->fold.ins)
132 #define fleft (&J->fold.left)
133 #define fright (&J->fold.right)
134 #define knumleft (ir_knum(fleft)->n)
135 #define knumright (ir_knum(fright)->n)
137 /* Pass IR on to next optimization in chain (FOLD). */
138 #define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
140 /* Fold function type. Fastcall on x86 significantly reduces their size. */
141 typedef IRRef (LJ_FASTCALL *FoldFunc)(jit_State *J);
143 /* Macros for the fold specs, so buildvm can recognize them. */
144 #define LJFOLD(x)
145 #define LJFOLDX(x)
146 #define LJFOLDF(name) static TRef LJ_FASTCALL fold_##name(jit_State *J)
147 /* Note: They must be at the start of a line or buildvm ignores them! */
149 /* Barrier to prevent using operands across PHIs. */
150 #define PHIBARRIER(ir) if (irt_isphi((ir)->t)) return NEXTFOLD
152 /* Barrier to prevent folding across a GC step.
153 ** GC steps can only happen at the head of a trace and at LOOP.
154 ** And the GC is only driven forward if there is at least one allocation.
156 #define gcstep_barrier(J, ref) \
157 ((ref) < J->chain[IR_LOOP] && \
158 (J->chain[IR_SNEW] || J->chain[IR_XSNEW] || \
159 J->chain[IR_TNEW] || J->chain[IR_TDUP] || \
160 J->chain[IR_CNEW] || J->chain[IR_CNEWI] || J->chain[IR_TOSTR]))
162 /* -- Constant folding for FP numbers ------------------------------------- */
164 LJFOLD(ADD KNUM KNUM)
165 LJFOLD(SUB KNUM KNUM)
166 LJFOLD(MUL KNUM KNUM)
167 LJFOLD(DIV KNUM KNUM)
168 LJFOLD(NEG KNUM KNUM)
169 LJFOLD(ABS KNUM KNUM)
170 LJFOLD(ATAN2 KNUM KNUM)
171 LJFOLD(LDEXP KNUM KNUM)
172 LJFOLD(MIN KNUM KNUM)
173 LJFOLD(MAX KNUM KNUM)
174 LJFOLDF(kfold_numarith)
176 lua_Number a = knumleft;
177 lua_Number b = knumright;
178 lua_Number y = lj_vm_foldarith(a, b, fins->o - IR_ADD);
179 return lj_ir_knum(J, y);
182 LJFOLD(LDEXP KNUM KINT)
183 LJFOLDF(kfold_ldexp)
185 #if LJ_TARGET_X86ORX64
186 UNUSED(J);
187 return NEXTFOLD;
188 #else
189 return lj_ir_knum(J, ldexp(knumleft, fright->i));
190 #endif
193 LJFOLD(FPMATH KNUM any)
194 LJFOLDF(kfold_fpmath)
196 lua_Number a = knumleft;
197 lua_Number y = lj_vm_foldfpm(a, fins->op2);
198 return lj_ir_knum(J, y);
201 LJFOLD(POW KNUM KINT)
202 LJFOLDF(kfold_numpow)
204 lua_Number a = knumleft;
205 lua_Number b = (lua_Number)fright->i;
206 lua_Number y = lj_vm_foldarith(a, b, IR_POW - IR_ADD);
207 return lj_ir_knum(J, y);
210 /* Must not use kfold_kref for numbers (could be NaN). */
211 LJFOLD(EQ KNUM KNUM)
212 LJFOLD(NE KNUM KNUM)
213 LJFOLD(LT KNUM KNUM)
214 LJFOLD(GE KNUM KNUM)
215 LJFOLD(LE KNUM KNUM)
216 LJFOLD(GT KNUM KNUM)
217 LJFOLD(ULT KNUM KNUM)
218 LJFOLD(UGE KNUM KNUM)
219 LJFOLD(ULE KNUM KNUM)
220 LJFOLD(UGT KNUM KNUM)
221 LJFOLDF(kfold_numcomp)
223 return CONDFOLD(lj_ir_numcmp(knumleft, knumright, (IROp)fins->o));
226 /* -- Constant folding for 32 bit integers -------------------------------- */
228 static int32_t kfold_intop(int32_t k1, int32_t k2, IROp op)
230 switch (op) {
231 case IR_ADD: k1 += k2; break;
232 case IR_SUB: k1 -= k2; break;
233 case IR_MUL: k1 *= k2; break;
234 case IR_MOD: k1 = lj_vm_modi(k1, k2); break;
235 case IR_BAND: k1 &= k2; break;
236 case IR_BOR: k1 |= k2; break;
237 case IR_BXOR: k1 ^= k2; break;
238 case IR_BSHL: k1 <<= (k2 & 31); break;
239 case IR_BSHR: k1 = (int32_t)((uint32_t)k1 >> (k2 & 31)); break;
240 case IR_BSAR: k1 >>= (k2 & 31); break;
241 case IR_BROL: k1 = (int32_t)lj_rol((uint32_t)k1, (k2 & 31)); break;
242 case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 31)); break;
243 case IR_MIN: k1 = k1 < k2 ? k1 : k2; break;
244 case IR_MAX: k1 = k1 > k2 ? k1 : k2; break;
245 default: lua_assert(0); break;
247 return k1;
250 LJFOLD(ADD KINT KINT)
251 LJFOLD(SUB KINT KINT)
252 LJFOLD(MUL KINT KINT)
253 LJFOLD(MOD KINT KINT)
254 LJFOLD(BAND KINT KINT)
255 LJFOLD(BOR KINT KINT)
256 LJFOLD(BXOR KINT KINT)
257 LJFOLD(BSHL KINT KINT)
258 LJFOLD(BSHR KINT KINT)
259 LJFOLD(BSAR KINT KINT)
260 LJFOLD(BROL KINT KINT)
261 LJFOLD(BROR KINT KINT)
262 LJFOLD(MIN KINT KINT)
263 LJFOLD(MAX KINT KINT)
264 LJFOLDF(kfold_intarith)
266 return INTFOLD(kfold_intop(fleft->i, fright->i, (IROp)fins->o));
269 LJFOLD(ADDOV KINT KINT)
270 LJFOLD(SUBOV KINT KINT)
271 LJFOLD(MULOV KINT KINT)
272 LJFOLDF(kfold_intovarith)
274 lua_Number n = lj_vm_foldarith((lua_Number)fleft->i, (lua_Number)fright->i,
275 fins->o - IR_ADDOV);
276 int32_t k = lj_num2int(n);
277 if (n != (lua_Number)k)
278 return FAILFOLD;
279 return INTFOLD(k);
282 LJFOLD(BNOT KINT)
283 LJFOLDF(kfold_bnot)
285 return INTFOLD(~fleft->i);
288 LJFOLD(BSWAP KINT)
289 LJFOLDF(kfold_bswap)
291 return INTFOLD((int32_t)lj_bswap((uint32_t)fleft->i));
294 LJFOLD(LT KINT KINT)
295 LJFOLD(GE KINT KINT)
296 LJFOLD(LE KINT KINT)
297 LJFOLD(GT KINT KINT)
298 LJFOLD(ULT KINT KINT)
299 LJFOLD(UGE KINT KINT)
300 LJFOLD(ULE KINT KINT)
301 LJFOLD(UGT KINT KINT)
302 LJFOLD(ABC KINT KINT)
303 LJFOLDF(kfold_intcomp)
305 int32_t a = fleft->i, b = fright->i;
306 switch ((IROp)fins->o) {
307 case IR_LT: return CONDFOLD(a < b);
308 case IR_GE: return CONDFOLD(a >= b);
309 case IR_LE: return CONDFOLD(a <= b);
310 case IR_GT: return CONDFOLD(a > b);
311 case IR_ULT: return CONDFOLD((uint32_t)a < (uint32_t)b);
312 case IR_UGE: return CONDFOLD((uint32_t)a >= (uint32_t)b);
313 case IR_ULE: return CONDFOLD((uint32_t)a <= (uint32_t)b);
314 case IR_ABC:
315 case IR_UGT: return CONDFOLD((uint32_t)a > (uint32_t)b);
316 default: lua_assert(0); return FAILFOLD;
320 LJFOLD(UGE any KINT)
321 LJFOLDF(kfold_intcomp0)
323 if (fright->i == 0)
324 return DROPFOLD;
325 return NEXTFOLD;
328 /* -- Constant folding for 64 bit integers -------------------------------- */
330 static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op)
332 switch (op) {
333 #if LJ_64 || LJ_HASFFI
334 case IR_ADD: k1 += k2; break;
335 case IR_SUB: k1 -= k2; break;
336 #endif
337 #if LJ_HASFFI
338 case IR_MUL: k1 *= k2; break;
339 case IR_BAND: k1 &= k2; break;
340 case IR_BOR: k1 |= k2; break;
341 case IR_BXOR: k1 ^= k2; break;
342 #endif
343 default: UNUSED(k2); lua_assert(0); break;
345 return k1;
348 LJFOLD(ADD KINT64 KINT64)
349 LJFOLD(SUB KINT64 KINT64)
350 LJFOLD(MUL KINT64 KINT64)
351 LJFOLD(BAND KINT64 KINT64)
352 LJFOLD(BOR KINT64 KINT64)
353 LJFOLD(BXOR KINT64 KINT64)
354 LJFOLDF(kfold_int64arith)
356 return INT64FOLD(kfold_int64arith(ir_k64(fleft)->u64,
357 ir_k64(fright)->u64, (IROp)fins->o));
360 LJFOLD(DIV KINT64 KINT64)
361 LJFOLD(MOD KINT64 KINT64)
362 LJFOLD(POW KINT64 KINT64)
363 LJFOLDF(kfold_int64arith2)
365 #if LJ_HASFFI
366 uint64_t k1 = ir_k64(fleft)->u64, k2 = ir_k64(fright)->u64;
367 if (irt_isi64(fins->t)) {
368 k1 = fins->o == IR_DIV ? lj_carith_divi64((int64_t)k1, (int64_t)k2) :
369 fins->o == IR_MOD ? lj_carith_modi64((int64_t)k1, (int64_t)k2) :
370 lj_carith_powi64((int64_t)k1, (int64_t)k2);
371 } else {
372 k1 = fins->o == IR_DIV ? lj_carith_divu64(k1, k2) :
373 fins->o == IR_MOD ? lj_carith_modu64(k1, k2) :
374 lj_carith_powu64(k1, k2);
376 return INT64FOLD(k1);
377 #else
378 UNUSED(J); lua_assert(0); return FAILFOLD;
379 #endif
382 LJFOLD(BSHL KINT64 KINT)
383 LJFOLD(BSHR KINT64 KINT)
384 LJFOLD(BSAR KINT64 KINT)
385 LJFOLD(BROL KINT64 KINT)
386 LJFOLD(BROR KINT64 KINT)
387 LJFOLDF(kfold_int64shift)
389 #if LJ_HASFFI || LJ_64
390 uint64_t k = ir_k64(fleft)->u64;
391 int32_t sh = (fright->i & 63);
392 switch ((IROp)fins->o) {
393 case IR_BSHL: k <<= sh; break;
394 #if LJ_HASFFI
395 case IR_BSHR: k >>= sh; break;
396 case IR_BSAR: k = (uint64_t)((int64_t)k >> sh); break;
397 case IR_BROL: k = lj_rol(k, sh); break;
398 case IR_BROR: k = lj_ror(k, sh); break;
399 #endif
400 default: lua_assert(0); break;
402 return INT64FOLD(k);
403 #else
404 UNUSED(J); lua_assert(0); return FAILFOLD;
405 #endif
408 LJFOLD(BNOT KINT64)
409 LJFOLDF(kfold_bnot64)
411 #if LJ_HASFFI
412 return INT64FOLD(~ir_k64(fleft)->u64);
413 #else
414 UNUSED(J); lua_assert(0); return FAILFOLD;
415 #endif
418 LJFOLD(BSWAP KINT64)
419 LJFOLDF(kfold_bswap64)
421 #if LJ_HASFFI
422 return INT64FOLD(lj_bswap64(ir_k64(fleft)->u64));
423 #else
424 UNUSED(J); lua_assert(0); return FAILFOLD;
425 #endif
428 LJFOLD(LT KINT64 KINT)
429 LJFOLD(GE KINT64 KINT)
430 LJFOLD(LE KINT64 KINT)
431 LJFOLD(GT KINT64 KINT)
432 LJFOLD(ULT KINT64 KINT)
433 LJFOLD(UGE KINT64 KINT)
434 LJFOLD(ULE KINT64 KINT)
435 LJFOLD(UGT KINT64 KINT)
436 LJFOLDF(kfold_int64comp)
438 #if LJ_HASFFI
439 uint64_t a = ir_k64(fleft)->u64, b = ir_k64(fright)->u64;
440 switch ((IROp)fins->o) {
441 case IR_LT: return CONDFOLD(a < b);
442 case IR_GE: return CONDFOLD(a >= b);
443 case IR_LE: return CONDFOLD(a <= b);
444 case IR_GT: return CONDFOLD(a > b);
445 case IR_ULT: return CONDFOLD((uint64_t)a < (uint64_t)b);
446 case IR_UGE: return CONDFOLD((uint64_t)a >= (uint64_t)b);
447 case IR_ULE: return CONDFOLD((uint64_t)a <= (uint64_t)b);
448 case IR_UGT: return CONDFOLD((uint64_t)a > (uint64_t)b);
449 default: lua_assert(0); return FAILFOLD;
451 #else
452 UNUSED(J); lua_assert(0); return FAILFOLD;
453 #endif
456 LJFOLD(UGE any KINT64)
457 LJFOLDF(kfold_int64comp0)
459 #if LJ_HASFFI
460 if (ir_k64(fright)->u64 == 0)
461 return DROPFOLD;
462 return NEXTFOLD;
463 #else
464 UNUSED(J); lua_assert(0); return FAILFOLD;
465 #endif
468 /* -- Constant folding for strings ---------------------------------------- */
470 LJFOLD(SNEW KKPTR KINT)
471 LJFOLDF(kfold_snew_kptr)
473 GCstr *s = lj_str_new(J->L, (const char *)ir_kptr(fleft), (size_t)fright->i);
474 return lj_ir_kstr(J, s);
477 LJFOLD(SNEW any KINT)
478 LJFOLDF(kfold_snew_empty)
480 if (fright->i == 0)
481 return lj_ir_kstr(J, &J2G(J)->strempty);
482 return NEXTFOLD;
485 LJFOLD(STRREF KGC KINT)
486 LJFOLDF(kfold_strref)
488 GCstr *str = ir_kstr(fleft);
489 lua_assert((MSize)fright->i < str->len);
490 return lj_ir_kkptr(J, (char *)strdata(str) + fright->i);
493 LJFOLD(STRREF SNEW any)
494 LJFOLDF(kfold_strref_snew)
496 PHIBARRIER(fleft);
497 if (irref_isk(fins->op2) && fright->i == 0) {
498 return fleft->op1; /* strref(snew(ptr, len), 0) ==> ptr */
499 } else {
500 /* Reassociate: strref(snew(strref(str, a), len), b) ==> strref(str, a+b) */
501 IRIns *ir = IR(fleft->op1);
502 IRRef1 str = ir->op1; /* IRIns * is not valid across emitir. */
503 lua_assert(ir->o == IR_STRREF);
504 PHIBARRIER(ir);
505 fins->op2 = emitir(IRTI(IR_ADD), ir->op2, fins->op2); /* Clobbers fins! */
506 fins->op1 = str;
507 fins->ot = IRT(IR_STRREF, IRT_P32);
508 return RETRYFOLD;
510 return NEXTFOLD;
513 LJFOLD(CALLN CARG IRCALL_lj_str_cmp)
514 LJFOLDF(kfold_strcmp)
516 if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) {
517 GCstr *a = ir_kstr(IR(fleft->op1));
518 GCstr *b = ir_kstr(IR(fleft->op2));
519 return INTFOLD(lj_str_cmp(a, b));
521 return NEXTFOLD;
524 /* -- Constant folding of pointer arithmetic ------------------------------ */
526 LJFOLD(ADD KGC KINT)
527 LJFOLD(ADD KGC KINT64)
528 LJFOLDF(kfold_add_kgc)
530 GCobj *o = ir_kgc(fleft);
531 #if LJ_64
532 ptrdiff_t ofs = (ptrdiff_t)ir_kint64(fright)->u64;
533 #else
534 ptrdiff_t ofs = fright->i;
535 #endif
536 return lj_ir_kkptr(J, (char *)o + ofs);
539 LJFOLD(ADD KPTR KINT)
540 LJFOLD(ADD KPTR KINT64)
541 LJFOLD(ADD KKPTR KINT)
542 LJFOLD(ADD KKPTR KINT64)
543 LJFOLDF(kfold_add_kptr)
545 void *p = ir_kptr(fleft);
546 #if LJ_64
547 ptrdiff_t ofs = (ptrdiff_t)ir_kint64(fright)->u64;
548 #else
549 ptrdiff_t ofs = fright->i;
550 #endif
551 return lj_ir_kptr_(J, fleft->o, (char *)p + ofs);
554 /* -- Constant folding of conversions ------------------------------------- */
556 LJFOLD(TOBIT KNUM KNUM)
557 LJFOLDF(kfold_tobit)
559 return INTFOLD(lj_num2bit(knumleft));
562 LJFOLD(CONV KINT IRCONV_NUM_INT)
563 LJFOLDF(kfold_conv_kint_num)
565 return lj_ir_knum(J, (lua_Number)fleft->i);
568 LJFOLD(CONV KINT IRCONV_NUM_U32)
569 LJFOLDF(kfold_conv_kintu32_num)
571 return lj_ir_knum(J, (lua_Number)(uint32_t)fleft->i);
574 LJFOLD(CONV KINT IRCONV_I64_INT)
575 LJFOLD(CONV KINT IRCONV_U64_INT)
576 LJFOLDF(kfold_conv_kint_i64)
578 if ((fins->op2 & IRCONV_SEXT))
579 return INT64FOLD((uint64_t)(int64_t)fleft->i);
580 else
581 return INT64FOLD((uint64_t)(int64_t)(uint32_t)fleft->i);
584 LJFOLD(CONV KINT64 IRCONV_NUM_I64)
585 LJFOLDF(kfold_conv_kint64_num_i64)
587 return lj_ir_knum(J, (lua_Number)(int64_t)ir_kint64(fleft)->u64);
590 LJFOLD(CONV KINT64 IRCONV_NUM_U64)
591 LJFOLDF(kfold_conv_kint64_num_u64)
593 return lj_ir_knum(J, (lua_Number)ir_kint64(fleft)->u64);
596 LJFOLD(CONV KINT64 IRCONV_INT_I64)
597 LJFOLD(CONV KINT64 IRCONV_U32_I64)
598 LJFOLDF(kfold_conv_kint64_int_i64)
600 return INTFOLD((int32_t)ir_kint64(fleft)->u64);
603 LJFOLD(CONV KNUM IRCONV_INT_NUM)
604 LJFOLDF(kfold_conv_knum_int_num)
606 lua_Number n = knumleft;
607 if (!(fins->op2 & IRCONV_TRUNC)) {
608 int32_t k = lj_num2int(n);
609 if (irt_isguard(fins->t) && n != (lua_Number)k) {
610 /* We're about to create a guard which always fails, like CONV +1.5.
611 ** Some pathological loops cause this during LICM, e.g.:
612 ** local x,k,t = 0,1.5,{1,[1.5]=2}
613 ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end
614 ** assert(x == 300)
616 return FAILFOLD;
618 return INTFOLD(k);
619 } else {
620 return INTFOLD((int32_t)n);
624 LJFOLD(CONV KNUM IRCONV_U32_NUM)
625 LJFOLDF(kfold_conv_knum_u32_num)
627 lua_assert((fins->op2 & IRCONV_TRUNC));
628 return INTFOLD((int32_t)(uint32_t)knumleft);
631 LJFOLD(CONV KNUM IRCONV_I64_NUM)
632 LJFOLDF(kfold_conv_knum_i64_num)
634 lua_assert((fins->op2 & IRCONV_TRUNC));
635 return INT64FOLD((uint64_t)(int64_t)knumleft);
638 LJFOLD(CONV KNUM IRCONV_U64_NUM)
639 LJFOLDF(kfold_conv_knum_u64_num)
641 lua_assert((fins->op2 & IRCONV_TRUNC));
642 return INT64FOLD(lj_num2u64(knumleft));
645 LJFOLD(TOSTR KNUM)
646 LJFOLDF(kfold_tostr_knum)
648 return lj_ir_kstr(J, lj_str_fromnum(J->L, &knumleft));
651 LJFOLD(TOSTR KINT)
652 LJFOLDF(kfold_tostr_kint)
654 return lj_ir_kstr(J, lj_str_fromint(J->L, fleft->i));
657 LJFOLD(STRTO KGC)
658 LJFOLDF(kfold_strto)
660 TValue n;
661 if (lj_str_tonum(ir_kstr(fleft), &n))
662 return lj_ir_knum(J, numV(&n));
663 return FAILFOLD;
666 /* -- Constant folding of equality checks --------------------------------- */
668 /* Don't constant-fold away FLOAD checks against KNULL. */
669 LJFOLD(EQ FLOAD KNULL)
670 LJFOLD(NE FLOAD KNULL)
671 LJFOLDX(lj_opt_cse)
673 /* But fold all other KNULL compares, since only KNULL is equal to KNULL. */
674 LJFOLD(EQ any KNULL)
675 LJFOLD(NE any KNULL)
676 LJFOLD(EQ KNULL any)
677 LJFOLD(NE KNULL any)
678 LJFOLD(EQ KINT KINT) /* Constants are unique, so same refs <==> same value. */
679 LJFOLD(NE KINT KINT)
680 LJFOLD(EQ KINT64 KINT64)
681 LJFOLD(NE KINT64 KINT64)
682 LJFOLD(EQ KGC KGC)
683 LJFOLD(NE KGC KGC)
684 LJFOLDF(kfold_kref)
686 return CONDFOLD((fins->op1 == fins->op2) ^ (fins->o == IR_NE));
689 /* -- Algebraic shortcuts ------------------------------------------------- */
691 LJFOLD(FPMATH FPMATH IRFPM_FLOOR)
692 LJFOLD(FPMATH FPMATH IRFPM_CEIL)
693 LJFOLD(FPMATH FPMATH IRFPM_TRUNC)
694 LJFOLDF(shortcut_round)
696 IRFPMathOp op = (IRFPMathOp)fleft->op2;
697 if (op == IRFPM_FLOOR || op == IRFPM_CEIL || op == IRFPM_TRUNC)
698 return LEFTFOLD; /* round(round_left(x)) = round_left(x) */
699 return NEXTFOLD;
702 LJFOLD(ABS ABS KNUM)
703 LJFOLDF(shortcut_left)
705 return LEFTFOLD; /* f(g(x)) ==> g(x) */
708 LJFOLD(ABS NEG KNUM)
709 LJFOLDF(shortcut_dropleft)
711 PHIBARRIER(fleft);
712 fins->op1 = fleft->op1; /* abs(neg(x)) ==> abs(x) */
713 return RETRYFOLD;
716 /* Note: no safe shortcuts with STRTO and TOSTR ("1e2" ==> +100 ==> "100"). */
717 LJFOLD(NEG NEG any)
718 LJFOLD(BNOT BNOT)
719 LJFOLD(BSWAP BSWAP)
720 LJFOLDF(shortcut_leftleft)
722 PHIBARRIER(fleft); /* See above. Fold would be ok, but not beneficial. */
723 return fleft->op1; /* f(g(x)) ==> x */
726 /* -- FP algebraic simplifications ---------------------------------------- */
728 /* FP arithmetic is tricky -- there's not much to simplify.
729 ** Please note the following common pitfalls before sending "improvements":
730 ** x+0 ==> x is INVALID for x=-0
731 ** 0-x ==> -x is INVALID for x=+0
732 ** x*0 ==> 0 is INVALID for x=-0, x=+-Inf or x=NaN
735 LJFOLD(ADD NEG any)
736 LJFOLDF(simplify_numadd_negx)
738 PHIBARRIER(fleft);
739 fins->o = IR_SUB; /* (-a) + b ==> b - a */
740 fins->op1 = fins->op2;
741 fins->op2 = fleft->op1;
742 return RETRYFOLD;
745 LJFOLD(ADD any NEG)
746 LJFOLDF(simplify_numadd_xneg)
748 PHIBARRIER(fright);
749 fins->o = IR_SUB; /* a + (-b) ==> a - b */
750 fins->op2 = fright->op1;
751 return RETRYFOLD;
754 LJFOLD(SUB any KNUM)
755 LJFOLDF(simplify_numsub_k)
757 lua_Number n = knumright;
758 if (n == 0.0) /* x - (+-0) ==> x */
759 return LEFTFOLD;
760 return NEXTFOLD;
763 LJFOLD(SUB NEG KNUM)
764 LJFOLDF(simplify_numsub_negk)
766 PHIBARRIER(fleft);
767 fins->op2 = fleft->op1; /* (-x) - k ==> (-k) - x */
768 fins->op1 = (IRRef1)lj_ir_knum(J, -knumright);
769 return RETRYFOLD;
772 LJFOLD(SUB any NEG)
773 LJFOLDF(simplify_numsub_xneg)
775 PHIBARRIER(fright);
776 fins->o = IR_ADD; /* a - (-b) ==> a + b */
777 fins->op2 = fright->op1;
778 return RETRYFOLD;
781 LJFOLD(MUL any KNUM)
782 LJFOLD(DIV any KNUM)
783 LJFOLDF(simplify_nummuldiv_k)
785 lua_Number n = knumright;
786 if (n == 1.0) { /* x o 1 ==> x */
787 return LEFTFOLD;
788 } else if (n == -1.0) { /* x o -1 ==> -x */
789 fins->o = IR_NEG;
790 fins->op2 = (IRRef1)lj_ir_knum_neg(J);
791 return RETRYFOLD;
792 } else if (fins->o == IR_MUL && n == 2.0) { /* x * 2 ==> x + x */
793 fins->o = IR_ADD;
794 fins->op2 = fins->op1;
795 return RETRYFOLD;
797 return NEXTFOLD;
800 LJFOLD(MUL NEG KNUM)
801 LJFOLD(DIV NEG KNUM)
802 LJFOLDF(simplify_nummuldiv_negk)
804 PHIBARRIER(fleft);
805 fins->op1 = fleft->op1; /* (-a) o k ==> a o (-k) */
806 fins->op2 = (IRRef1)lj_ir_knum(J, -knumright);
807 return RETRYFOLD;
810 LJFOLD(MUL NEG NEG)
811 LJFOLD(DIV NEG NEG)
812 LJFOLDF(simplify_nummuldiv_negneg)
814 PHIBARRIER(fleft);
815 PHIBARRIER(fright);
816 fins->op1 = fleft->op1; /* (-a) o (-b) ==> a o b */
817 fins->op2 = fright->op1;
818 return RETRYFOLD;
821 LJFOLD(POW any KINT)
822 LJFOLDF(simplify_numpow_xk)
824 int32_t k = fright->i;
825 TRef ref = fins->op1;
826 if (k == 0) /* x ^ 0 ==> 1 */
827 return lj_ir_knum_one(J); /* Result must be a number, not an int. */
828 if (k == 1) /* x ^ 1 ==> x */
829 return LEFTFOLD;
830 if ((uint32_t)(k+65536) > 2*65536u) /* Limit code explosion. */
831 return NEXTFOLD;
832 if (k < 0) { /* x ^ (-k) ==> (1/x) ^ k. */
833 ref = emitir(IRTN(IR_DIV), lj_ir_knum_one(J), ref);
834 k = -k;
836 /* Unroll x^k for 1 <= k <= 65536. */
837 for (; (k & 1) == 0; k >>= 1) /* Handle leading zeros. */
838 ref = emitir(IRTN(IR_MUL), ref, ref);
839 if ((k >>= 1) != 0) { /* Handle trailing bits. */
840 TRef tmp = emitir(IRTN(IR_MUL), ref, ref);
841 for (; k != 1; k >>= 1) {
842 if (k & 1)
843 ref = emitir(IRTN(IR_MUL), ref, tmp);
844 tmp = emitir(IRTN(IR_MUL), tmp, tmp);
846 ref = emitir(IRTN(IR_MUL), ref, tmp);
848 return ref;
851 LJFOLD(POW KNUM any)
852 LJFOLDF(simplify_numpow_kx)
854 lua_Number n = knumleft;
855 if (n == 2.0) { /* 2.0 ^ i ==> ldexp(1.0, tonum(i)) */
856 fins->o = IR_CONV;
857 #if LJ_TARGET_X86ORX64
858 fins->op1 = fins->op2;
859 fins->op2 = IRCONV_NUM_INT;
860 fins->op2 = (IRRef1)lj_opt_fold(J);
861 #endif
862 fins->op1 = (IRRef1)lj_ir_knum_one(J);
863 fins->o = IR_LDEXP;
864 return RETRYFOLD;
866 return NEXTFOLD;
869 /* -- Simplify conversions ------------------------------------------------ */
871 LJFOLD(CONV CONV IRCONV_NUM_INT) /* _NUM */
872 LJFOLDF(shortcut_conv_num_int)
874 PHIBARRIER(fleft);
875 /* Only safe with a guarded conversion to int. */
876 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_NUM && irt_isguard(fleft->t))
877 return fleft->op1; /* f(g(x)) ==> x */
878 return NEXTFOLD;
881 LJFOLD(CONV CONV IRCONV_INT_NUM) /* _INT */
882 LJFOLDF(simplify_conv_int_num)
884 /* Fold even across PHI to avoid expensive num->int conversions in loop. */
885 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT)
886 return fleft->op1;
887 return NEXTFOLD;
890 LJFOLD(CONV CONV IRCONV_U32_NUM) /* _U32*/
891 LJFOLDF(simplify_conv_u32_num)
893 /* Fold even across PHI to avoid expensive num->int conversions in loop. */
894 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32)
895 return fleft->op1;
896 return NEXTFOLD;
899 LJFOLD(CONV CONV IRCONV_I64_NUM) /* _INT or _U32*/
900 LJFOLD(CONV CONV IRCONV_U64_NUM) /* _INT or _U32*/
901 LJFOLDF(simplify_conv_i64_num)
903 PHIBARRIER(fleft);
904 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT) {
905 /* Reduce to a sign-extension. */
906 fins->op1 = fleft->op1;
907 fins->op2 = ((IRT_I64<<5)|IRT_INT|IRCONV_SEXT);
908 return RETRYFOLD;
909 } else if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) {
910 #if LJ_TARGET_X64
911 return fleft->op1;
912 #else
913 /* Reduce to a zero-extension. */
914 fins->op1 = fleft->op1;
915 fins->op2 = (IRT_I64<<5)|IRT_U32;
916 return RETRYFOLD;
917 #endif
919 return NEXTFOLD;
922 LJFOLD(CONV CONV IRCONV_INT_I64) /* _INT */
923 LJFOLD(CONV CONV IRCONV_INT_U64) /* _INT */
924 LJFOLDF(simplify_conv_int_i64)
926 PHIBARRIER(fleft);
927 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT)
928 return fleft->op1;
929 return NEXTFOLD;
932 LJFOLD(CONV CONV IRCONV_NUM_FLOAT) /* _NUM */
933 LJFOLDF(simplify_conv_flt_num)
935 PHIBARRIER(fleft);
936 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_NUM)
937 return fleft->op1;
938 return NEXTFOLD;
941 /* Shortcut TOBIT + IRT_NUM <- IRT_INT/IRT_U32 conversion. */
942 LJFOLD(TOBIT CONV KNUM)
943 LJFOLDF(simplify_tobit_conv)
945 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT ||
946 (fleft->op2 & IRCONV_SRCMASK) == IRT_U32) {
947 /* Fold even across PHI to avoid expensive num->int conversions in loop. */
948 lua_assert(irt_isnum(fleft->t));
949 return fleft->op1;
951 return NEXTFOLD;
954 /* Shortcut floor/ceil/round + IRT_NUM <- IRT_INT/IRT_U32 conversion. */
955 LJFOLD(FPMATH CONV IRFPM_FLOOR)
956 LJFOLD(FPMATH CONV IRFPM_CEIL)
957 LJFOLD(FPMATH CONV IRFPM_TRUNC)
958 LJFOLDF(simplify_floor_conv)
960 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT ||
961 (fleft->op2 & IRCONV_SRCMASK) == IRT_U32)
962 return LEFTFOLD;
963 return NEXTFOLD;
966 /* Strength reduction of widening. */
967 LJFOLD(CONV any IRCONV_I64_INT)
968 LJFOLDF(simplify_conv_sext)
970 IRRef ref = fins->op1;
971 int64_t ofs = 0;
972 if (!(fins->op2 & IRCONV_SEXT))
973 return NEXTFOLD;
974 PHIBARRIER(fleft);
975 if (fleft->o == IR_XLOAD && (irt_isu8(fleft->t) || irt_isu16(fleft->t)))
976 goto ok_reduce;
977 if (fleft->o == IR_ADD && irref_isk(fleft->op2)) {
978 ofs = (int64_t)IR(fleft->op2)->i;
979 ref = fleft->op1;
981 /* Use scalar evolution analysis results to strength-reduce sign-extension. */
982 if (ref == J->scev.idx) {
983 IRRef lo = J->scev.dir ? J->scev.start : J->scev.stop;
984 lua_assert(irt_isint(J->scev.t));
985 if (lo && IR(lo)->i + ofs >= 0) {
986 ok_reduce:
987 #if LJ_TARGET_X64
988 /* Eliminate widening. All 32 bit ops do an implicit zero-extension. */
989 return LEFTFOLD;
990 #else
991 /* Reduce to a (cheaper) zero-extension. */
992 fins->op2 &= ~IRCONV_SEXT;
993 return RETRYFOLD;
994 #endif
997 return NEXTFOLD;
1000 /* Strength reduction of narrowing. */
1001 LJFOLD(CONV ADD IRCONV_INT_I64)
1002 LJFOLD(CONV SUB IRCONV_INT_I64)
1003 LJFOLD(CONV MUL IRCONV_INT_I64)
1004 LJFOLD(CONV ADD IRCONV_INT_U64)
1005 LJFOLD(CONV SUB IRCONV_INT_U64)
1006 LJFOLD(CONV MUL IRCONV_INT_U64)
1007 LJFOLDF(simplify_conv_narrow)
1009 IROp op = (IROp)fleft->o;
1010 IRRef op1 = fleft->op1, op2 = fleft->op2, mode = fins->op2;
1011 PHIBARRIER(fleft);
1012 op1 = emitir(IRTI(IR_CONV), op1, mode);
1013 op2 = emitir(IRTI(IR_CONV), op2, mode);
1014 fins->ot = IRTI(op);
1015 fins->op1 = op1;
1016 fins->op2 = op2;
1017 return RETRYFOLD;
1020 /* Special CSE rule for CONV. */
1021 LJFOLD(CONV any any)
1022 LJFOLDF(cse_conv)
1024 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
1025 IRRef op1 = fins->op1, op2 = (fins->op2 & IRCONV_MODEMASK);
1026 uint8_t guard = irt_isguard(fins->t);
1027 IRRef ref = J->chain[IR_CONV];
1028 while (ref > op1) {
1029 IRIns *ir = IR(ref);
1030 /* Commoning with stronger checks is ok. */
1031 if (ir->op1 == op1 && (ir->op2 & IRCONV_MODEMASK) == op2 &&
1032 irt_isguard(ir->t) >= guard)
1033 return ref;
1034 ref = ir->prev;
1037 return EMITFOLD; /* No fallthrough to regular CSE. */
1040 /* FP conversion narrowing. */
1041 LJFOLD(TOBIT ADD KNUM)
1042 LJFOLD(TOBIT SUB KNUM)
1043 LJFOLD(CONV ADD IRCONV_INT_NUM)
1044 LJFOLD(CONV SUB IRCONV_INT_NUM)
1045 LJFOLD(CONV ADD IRCONV_I64_NUM)
1046 LJFOLD(CONV SUB IRCONV_I64_NUM)
1047 LJFOLDF(narrow_convert)
1049 PHIBARRIER(fleft);
1050 /* Narrowing ignores PHIs and repeating it inside the loop is not useful. */
1051 if (J->chain[IR_LOOP])
1052 return NEXTFOLD;
1053 lua_assert(fins->o != IR_CONV || (fins->op2&IRCONV_CONVMASK) != IRCONV_TOBIT);
1054 return lj_opt_narrow_convert(J);
1057 /* -- Integer algebraic simplifications ----------------------------------- */
1059 LJFOLD(ADD any KINT)
1060 LJFOLD(ADDOV any KINT)
1061 LJFOLD(SUBOV any KINT)
1062 LJFOLDF(simplify_intadd_k)
1064 if (fright->i == 0) /* i o 0 ==> i */
1065 return LEFTFOLD;
1066 return NEXTFOLD;
1069 LJFOLD(MULOV any KINT)
1070 LJFOLDF(simplify_intmul_k)
1072 if (fright->i == 0) /* i * 0 ==> 0 */
1073 return RIGHTFOLD;
1074 if (fright->i == 1) /* i * 1 ==> i */
1075 return LEFTFOLD;
1076 if (fright->i == 2) { /* i * 2 ==> i + i */
1077 fins->o = IR_ADDOV;
1078 fins->op2 = fins->op1;
1079 return RETRYFOLD;
1081 return NEXTFOLD;
1084 LJFOLD(SUB any KINT)
1085 LJFOLDF(simplify_intsub_k)
1087 if (fright->i == 0) /* i - 0 ==> i */
1088 return LEFTFOLD;
1089 fins->o = IR_ADD; /* i - k ==> i + (-k) */
1090 fins->op2 = (IRRef1)lj_ir_kint(J, -fright->i); /* Overflow for -2^31 ok. */
1091 return RETRYFOLD;
1094 LJFOLD(SUB KINT any)
1095 LJFOLD(SUB KINT64 any)
1096 LJFOLDF(simplify_intsub_kleft)
1098 if (fleft->o == IR_KINT ? (fleft->i == 0) : (ir_kint64(fleft)->u64 == 0)) {
1099 fins->o = IR_NEG; /* 0 - i ==> -i */
1100 fins->op1 = fins->op2;
1101 return RETRYFOLD;
1103 return NEXTFOLD;
1106 LJFOLD(ADD any KINT64)
1107 LJFOLDF(simplify_intadd_k64)
1109 if (ir_kint64(fright)->u64 == 0) /* i + 0 ==> i */
1110 return LEFTFOLD;
1111 return NEXTFOLD;
1114 LJFOLD(SUB any KINT64)
1115 LJFOLDF(simplify_intsub_k64)
1117 uint64_t k = ir_kint64(fright)->u64;
1118 if (k == 0) /* i - 0 ==> i */
1119 return LEFTFOLD;
1120 fins->o = IR_ADD; /* i - k ==> i + (-k) */
1121 fins->op2 = (IRRef1)lj_ir_kint64(J, (uint64_t)-(int64_t)k);
1122 return RETRYFOLD;
1125 static TRef simplify_intmul_k(jit_State *J, int32_t k)
1127 /* Note: many more simplifications are possible, e.g. 2^k1 +- 2^k2.
1128 ** But this is mainly intended for simple address arithmetic.
1129 ** Also it's easier for the backend to optimize the original multiplies.
1131 if (k == 1) { /* i * 1 ==> i */
1132 return LEFTFOLD;
1133 } else if ((k & (k-1)) == 0) { /* i * 2^k ==> i << k */
1134 fins->o = IR_BSHL;
1135 fins->op2 = lj_ir_kint(J, lj_fls((uint32_t)k));
1136 return RETRYFOLD;
1138 return NEXTFOLD;
1141 LJFOLD(MUL any KINT)
1142 LJFOLDF(simplify_intmul_k32)
1144 if (fright->i == 0) /* i * 0 ==> 0 */
1145 return INTFOLD(0);
1146 else if (fright->i > 0)
1147 return simplify_intmul_k(J, fright->i);
1148 return NEXTFOLD;
1151 LJFOLD(MUL any KINT64)
1152 LJFOLDF(simplify_intmul_k64)
1154 if (ir_kint64(fright)->u64 == 0) /* i * 0 ==> 0 */
1155 return INT64FOLD(0);
1156 #if LJ_64
1157 /* NYI: SPLIT for BSHL and 32 bit backend support. */
1158 else if (ir_kint64(fright)->u64 < 0x80000000u)
1159 return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64);
1160 #endif
1161 return NEXTFOLD;
1164 LJFOLD(MOD any KINT)
1165 LJFOLDF(simplify_intmod_k)
1167 int32_t k = fright->i;
1168 lua_assert(k != 0);
1169 if (k > 0 && (k & (k-1)) == 0) { /* i % (2^k) ==> i & (2^k-1) */
1170 fins->o = IR_BAND;
1171 fins->op2 = lj_ir_kint(J, k-1);
1172 return RETRYFOLD;
1174 return NEXTFOLD;
1177 LJFOLD(MOD KINT any)
1178 LJFOLDF(simplify_intmod_kleft)
1180 if (fleft->i == 0)
1181 return INTFOLD(0);
1182 return NEXTFOLD;
1185 LJFOLD(SUB any any)
1186 LJFOLD(SUBOV any any)
1187 LJFOLDF(simplify_intsub)
1189 if (fins->op1 == fins->op2 && !irt_isnum(fins->t)) /* i - i ==> 0 */
1190 return irt_is64(fins->t) ? INT64FOLD(0) : INTFOLD(0);
1191 return NEXTFOLD;
1194 LJFOLD(SUB ADD any)
1195 LJFOLDF(simplify_intsubadd_leftcancel)
1197 if (!irt_isnum(fins->t)) {
1198 PHIBARRIER(fleft);
1199 if (fins->op2 == fleft->op1) /* (i + j) - i ==> j */
1200 return fleft->op2;
1201 if (fins->op2 == fleft->op2) /* (i + j) - j ==> i */
1202 return fleft->op1;
1204 return NEXTFOLD;
1207 LJFOLD(SUB SUB any)
1208 LJFOLDF(simplify_intsubsub_leftcancel)
1210 if (!irt_isnum(fins->t)) {
1211 PHIBARRIER(fleft);
1212 if (fins->op1 == fleft->op1) { /* (i - j) - i ==> 0 - j */
1213 fins->op1 = (IRRef1)lj_ir_kint(J, 0);
1214 fins->op2 = fleft->op2;
1215 return RETRYFOLD;
1218 return NEXTFOLD;
1221 LJFOLD(SUB any SUB)
1222 LJFOLDF(simplify_intsubsub_rightcancel)
1224 if (!irt_isnum(fins->t)) {
1225 PHIBARRIER(fright);
1226 if (fins->op1 == fright->op1) /* i - (i - j) ==> j */
1227 return fright->op2;
1229 return NEXTFOLD;
1232 LJFOLD(SUB any ADD)
1233 LJFOLDF(simplify_intsubadd_rightcancel)
1235 if (!irt_isnum(fins->t)) {
1236 PHIBARRIER(fright);
1237 if (fins->op1 == fright->op1) { /* i - (i + j) ==> 0 - j */
1238 fins->op2 = fright->op2;
1239 fins->op1 = (IRRef1)lj_ir_kint(J, 0);
1240 return RETRYFOLD;
1242 if (fins->op1 == fright->op2) { /* i - (j + i) ==> 0 - j */
1243 fins->op2 = fright->op1;
1244 fins->op1 = (IRRef1)lj_ir_kint(J, 0);
1245 return RETRYFOLD;
1248 return NEXTFOLD;
1251 LJFOLD(SUB ADD ADD)
1252 LJFOLDF(simplify_intsubaddadd_cancel)
1254 if (!irt_isnum(fins->t)) {
1255 PHIBARRIER(fleft);
1256 PHIBARRIER(fright);
1257 if (fleft->op1 == fright->op1) { /* (i + j1) - (i + j2) ==> j1 - j2 */
1258 fins->op1 = fleft->op2;
1259 fins->op2 = fright->op2;
1260 return RETRYFOLD;
1262 if (fleft->op1 == fright->op2) { /* (i + j1) - (j2 + i) ==> j1 - j2 */
1263 fins->op1 = fleft->op2;
1264 fins->op2 = fright->op1;
1265 return RETRYFOLD;
1267 if (fleft->op2 == fright->op1) { /* (j1 + i) - (i + j2) ==> j1 - j2 */
1268 fins->op1 = fleft->op1;
1269 fins->op2 = fright->op2;
1270 return RETRYFOLD;
1272 if (fleft->op2 == fright->op2) { /* (j1 + i) - (j2 + i) ==> j1 - j2 */
1273 fins->op1 = fleft->op1;
1274 fins->op2 = fright->op1;
1275 return RETRYFOLD;
1278 return NEXTFOLD;
1281 LJFOLD(BAND any KINT)
1282 LJFOLD(BAND any KINT64)
1283 LJFOLDF(simplify_band_k)
1285 int64_t k = fright->o == IR_KINT ? (int64_t)fright->i :
1286 (int64_t)ir_k64(fright)->u64;
1287 if (k == 0) /* i & 0 ==> 0 */
1288 return RIGHTFOLD;
1289 if (k == -1) /* i & -1 ==> i */
1290 return LEFTFOLD;
1291 return NEXTFOLD;
1294 LJFOLD(BOR any KINT)
1295 LJFOLD(BOR any KINT64)
1296 LJFOLDF(simplify_bor_k)
1298 int64_t k = fright->o == IR_KINT ? (int64_t)fright->i :
1299 (int64_t)ir_k64(fright)->u64;
1300 if (k == 0) /* i | 0 ==> i */
1301 return LEFTFOLD;
1302 if (k == -1) /* i | -1 ==> -1 */
1303 return RIGHTFOLD;
1304 return NEXTFOLD;
1307 LJFOLD(BXOR any KINT)
1308 LJFOLD(BXOR any KINT64)
1309 LJFOLDF(simplify_bxor_k)
1311 int64_t k = fright->o == IR_KINT ? (int64_t)fright->i :
1312 (int64_t)ir_k64(fright)->u64;
1313 if (k == 0) /* i xor 0 ==> i */
1314 return LEFTFOLD;
1315 if (k == -1) { /* i xor -1 ==> ~i */
1316 fins->o = IR_BNOT;
1317 fins->op2 = 0;
1318 return RETRYFOLD;
1320 return NEXTFOLD;
1323 LJFOLD(BSHL any KINT)
1324 LJFOLD(BSHR any KINT)
1325 LJFOLD(BSAR any KINT)
1326 LJFOLD(BROL any KINT)
1327 LJFOLD(BROR any KINT)
1328 LJFOLDF(simplify_shift_ik)
1330 int32_t mask = irt_is64(fins->t) ? 63 : 31;
1331 int32_t k = (fright->i & mask);
1332 if (k == 0) /* i o 0 ==> i */
1333 return LEFTFOLD;
1334 if (k == 1 && fins->o == IR_BSHL) { /* i << 1 ==> i + i */
1335 fins->o = IR_ADD;
1336 fins->op2 = fins->op1;
1337 return RETRYFOLD;
1339 if (k != fright->i) { /* i o k ==> i o (k & mask) */
1340 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1341 return RETRYFOLD;
1343 #ifndef LJ_TARGET_UNIFYROT
1344 if (fins->o == IR_BROR) { /* bror(i, k) ==> brol(i, (-k)&mask) */
1345 fins->o = IR_BROL;
1346 fins->op2 = (IRRef1)lj_ir_kint(J, (-k)&mask);
1347 return RETRYFOLD;
1349 #endif
1350 return NEXTFOLD;
1353 LJFOLD(BSHL any BAND)
1354 LJFOLD(BSHR any BAND)
1355 LJFOLD(BSAR any BAND)
1356 LJFOLD(BROL any BAND)
1357 LJFOLD(BROR any BAND)
1358 LJFOLDF(simplify_shift_andk)
1360 IRIns *irk = IR(fright->op2);
1361 PHIBARRIER(fright);
1362 if ((fins->o < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) &&
1363 irk->o == IR_KINT) { /* i o (j & mask) ==> i o j */
1364 int32_t mask = irt_is64(fins->t) ? 63 : 31;
1365 int32_t k = irk->i & mask;
1366 if (k == mask) {
1367 fins->op2 = fright->op1;
1368 return RETRYFOLD;
1371 return NEXTFOLD;
1374 LJFOLD(BSHL KINT any)
1375 LJFOLD(BSHR KINT any)
1376 LJFOLD(BSHL KINT64 any)
1377 LJFOLD(BSHR KINT64 any)
1378 LJFOLDF(simplify_shift1_ki)
1380 int64_t k = fleft->o == IR_KINT ? (int64_t)fleft->i :
1381 (int64_t)ir_k64(fleft)->u64;
1382 if (k == 0) /* 0 o i ==> 0 */
1383 return LEFTFOLD;
1384 return NEXTFOLD;
1387 LJFOLD(BSAR KINT any)
1388 LJFOLD(BROL KINT any)
1389 LJFOLD(BROR KINT any)
1390 LJFOLD(BSAR KINT64 any)
1391 LJFOLD(BROL KINT64 any)
1392 LJFOLD(BROR KINT64 any)
1393 LJFOLDF(simplify_shift2_ki)
1395 int64_t k = fleft->o == IR_KINT ? (int64_t)fleft->i :
1396 (int64_t)ir_k64(fleft)->u64;
1397 if (k == 0 || k == -1) /* 0 o i ==> 0; -1 o i ==> -1 */
1398 return LEFTFOLD;
1399 return NEXTFOLD;
1402 /* -- Reassociation ------------------------------------------------------- */
1404 LJFOLD(ADD ADD KINT)
1405 LJFOLD(MUL MUL KINT)
1406 LJFOLD(BAND BAND KINT)
1407 LJFOLD(BOR BOR KINT)
1408 LJFOLD(BXOR BXOR KINT)
1409 LJFOLDF(reassoc_intarith_k)
1411 IRIns *irk = IR(fleft->op2);
1412 if (irk->o == IR_KINT) {
1413 int32_t k = kfold_intop(irk->i, fright->i, (IROp)fins->o);
1414 if (k == irk->i) /* (i o k1) o k2 ==> i o k1, if (k1 o k2) == k1. */
1415 return LEFTFOLD;
1416 PHIBARRIER(fleft);
1417 fins->op1 = fleft->op1;
1418 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1419 return RETRYFOLD; /* (i o k1) o k2 ==> i o (k1 o k2) */
1421 return NEXTFOLD;
1424 LJFOLD(ADD ADD KINT64)
1425 LJFOLD(MUL MUL KINT64)
1426 LJFOLD(BAND BAND KINT64)
1427 LJFOLD(BOR BOR KINT64)
1428 LJFOLD(BXOR BXOR KINT64)
1429 LJFOLDF(reassoc_intarith_k64)
1431 #if LJ_HASFFI || LJ_64
1432 IRIns *irk = IR(fleft->op2);
1433 if (irk->o == IR_KINT64) {
1434 uint64_t k = kfold_int64arith(ir_k64(irk)->u64,
1435 ir_k64(fright)->u64, (IROp)fins->o);
1436 PHIBARRIER(fleft);
1437 fins->op1 = fleft->op1;
1438 fins->op2 = (IRRef1)lj_ir_kint64(J, k);
1439 return RETRYFOLD; /* (i o k1) o k2 ==> i o (k1 o k2) */
1441 return NEXTFOLD;
1442 #else
1443 UNUSED(J); lua_assert(0); return FAILFOLD;
1444 #endif
1447 LJFOLD(MIN MIN any)
1448 LJFOLD(MAX MAX any)
1449 LJFOLD(BAND BAND any)
1450 LJFOLD(BOR BOR any)
1451 LJFOLDF(reassoc_dup)
1453 if (fins->op2 == fleft->op1 || fins->op2 == fleft->op2)
1454 return LEFTFOLD; /* (a o b) o a ==> a o b; (a o b) o b ==> a o b */
1455 return NEXTFOLD;
1458 LJFOLD(BXOR BXOR any)
1459 LJFOLDF(reassoc_bxor)
1461 PHIBARRIER(fleft);
1462 if (fins->op2 == fleft->op1) /* (a xor b) xor a ==> b */
1463 return fleft->op2;
1464 if (fins->op2 == fleft->op2) /* (a xor b) xor b ==> a */
1465 return fleft->op1;
1466 return NEXTFOLD;
1469 LJFOLD(BSHL BSHL KINT)
1470 LJFOLD(BSHR BSHR KINT)
1471 LJFOLD(BSAR BSAR KINT)
1472 LJFOLD(BROL BROL KINT)
1473 LJFOLD(BROR BROR KINT)
1474 LJFOLDF(reassoc_shift)
1476 IRIns *irk = IR(fleft->op2);
1477 PHIBARRIER(fleft); /* The (shift any KINT) rule covers k2 == 0 and more. */
1478 if (irk->o == IR_KINT) { /* (i o k1) o k2 ==> i o (k1 + k2) */
1479 int32_t mask = irt_is64(fins->t) ? 63 : 31;
1480 int32_t k = (irk->i & mask) + (fright->i & mask);
1481 if (k > mask) { /* Combined shift too wide? */
1482 if (fins->o == IR_BSHL || fins->o == IR_BSHR)
1483 return mask == 31 ? INTFOLD(0) : INT64FOLD(0);
1484 else if (fins->o == IR_BSAR)
1485 k = mask;
1486 else
1487 k &= mask;
1489 fins->op1 = fleft->op1;
1490 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1491 return RETRYFOLD;
1493 return NEXTFOLD;
1496 LJFOLD(MIN MIN KNUM)
1497 LJFOLD(MAX MAX KNUM)
1498 LJFOLD(MIN MIN KINT)
1499 LJFOLD(MAX MAX KINT)
1500 LJFOLDF(reassoc_minmax_k)
1502 IRIns *irk = IR(fleft->op2);
1503 if (irk->o == IR_KNUM) {
1504 lua_Number a = ir_knum(irk)->n;
1505 lua_Number y = lj_vm_foldarith(a, knumright, fins->o - IR_ADD);
1506 if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */
1507 return LEFTFOLD;
1508 PHIBARRIER(fleft);
1509 fins->op1 = fleft->op1;
1510 fins->op2 = (IRRef1)lj_ir_knum(J, y);
1511 return RETRYFOLD; /* (x o k1) o k2 ==> x o (k1 o k2) */
1512 } else if (irk->o == IR_KINT) {
1513 int32_t a = irk->i;
1514 int32_t y = kfold_intop(a, fright->i, fins->o);
1515 if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */
1516 return LEFTFOLD;
1517 PHIBARRIER(fleft);
1518 fins->op1 = fleft->op1;
1519 fins->op2 = (IRRef1)lj_ir_kint(J, y);
1520 return RETRYFOLD; /* (x o k1) o k2 ==> x o (k1 o k2) */
1522 return NEXTFOLD;
1525 LJFOLD(MIN MAX any)
1526 LJFOLD(MAX MIN any)
1527 LJFOLDF(reassoc_minmax_left)
1529 if (fins->op2 == fleft->op1 || fins->op2 == fleft->op2)
1530 return RIGHTFOLD; /* (b o1 a) o2 b ==> b; (a o1 b) o2 b ==> b */
1531 return NEXTFOLD;
1534 LJFOLD(MIN any MAX)
1535 LJFOLD(MAX any MIN)
1536 LJFOLDF(reassoc_minmax_right)
1538 if (fins->op1 == fright->op1 || fins->op1 == fright->op2)
1539 return LEFTFOLD; /* a o2 (a o1 b) ==> a; a o2 (b o1 a) ==> a */
1540 return NEXTFOLD;
1543 /* -- Array bounds check elimination -------------------------------------- */
1545 /* Eliminate ABC across PHIs to handle t[i-1] forwarding case.
1546 ** ABC(asize, (i+k)+(-k)) ==> ABC(asize, i), but only if it already exists.
1547 ** Could be generalized to (i+k1)+k2 ==> i+(k1+k2), but needs better disambig.
1549 LJFOLD(ABC any ADD)
1550 LJFOLDF(abc_fwd)
1552 if (LJ_LIKELY(J->flags & JIT_F_OPT_ABC)) {
1553 if (irref_isk(fright->op2)) {
1554 IRIns *add2 = IR(fright->op1);
1555 if (add2->o == IR_ADD && irref_isk(add2->op2) &&
1556 IR(fright->op2)->i == -IR(add2->op2)->i) {
1557 IRRef ref = J->chain[IR_ABC];
1558 IRRef lim = add2->op1;
1559 if (fins->op1 > lim) lim = fins->op1;
1560 while (ref > lim) {
1561 IRIns *ir = IR(ref);
1562 if (ir->op1 == fins->op1 && ir->op2 == add2->op1)
1563 return DROPFOLD;
1564 ref = ir->prev;
1569 return NEXTFOLD;
1572 /* Eliminate ABC for constants.
1573 ** ABC(asize, k1), ABC(asize k2) ==> ABC(asize, max(k1, k2))
1574 ** Drop second ABC if k2 is lower. Otherwise patch first ABC with k2.
1576 LJFOLD(ABC any KINT)
1577 LJFOLDF(abc_k)
1579 if (LJ_LIKELY(J->flags & JIT_F_OPT_ABC)) {
1580 IRRef ref = J->chain[IR_ABC];
1581 IRRef asize = fins->op1;
1582 while (ref > asize) {
1583 IRIns *ir = IR(ref);
1584 if (ir->op1 == asize && irref_isk(ir->op2)) {
1585 int32_t k = IR(ir->op2)->i;
1586 if (fright->i > k)
1587 ir->op2 = fins->op2;
1588 return DROPFOLD;
1590 ref = ir->prev;
1592 return EMITFOLD; /* Already performed CSE. */
1594 return NEXTFOLD;
1597 /* Eliminate invariant ABC inside loop. */
1598 LJFOLD(ABC any any)
1599 LJFOLDF(abc_invar)
1601 if (!irt_isint(fins->t) && J->chain[IR_LOOP]) /* Currently marked as PTR. */
1602 return DROPFOLD;
1603 return NEXTFOLD;
1606 /* -- Commutativity ------------------------------------------------------- */
1608 /* The refs of commutative ops are canonicalized. Lower refs go to the right.
1609 ** Rationale behind this:
1610 ** - It (also) moves constants to the right.
1611 ** - It reduces the number of FOLD rules (e.g. (BOR any KINT) suffices).
1612 ** - It helps CSE to find more matches.
1613 ** - The assembler generates better code with constants at the right.
1616 LJFOLD(ADD any any)
1617 LJFOLD(MUL any any)
1618 LJFOLD(ADDOV any any)
1619 LJFOLD(MULOV any any)
1620 LJFOLDF(comm_swap)
1622 if (fins->op1 < fins->op2) { /* Move lower ref to the right. */
1623 IRRef1 tmp = fins->op1;
1624 fins->op1 = fins->op2;
1625 fins->op2 = tmp;
1626 return RETRYFOLD;
1628 return NEXTFOLD;
1631 LJFOLD(EQ any any)
1632 LJFOLD(NE any any)
1633 LJFOLDF(comm_equal)
1635 /* For non-numbers only: x == x ==> drop; x ~= x ==> fail */
1636 if (fins->op1 == fins->op2 && !irt_isnum(fins->t))
1637 return CONDFOLD(fins->o == IR_EQ);
1638 return fold_comm_swap(J);
1641 LJFOLD(LT any any)
1642 LJFOLD(GE any any)
1643 LJFOLD(LE any any)
1644 LJFOLD(GT any any)
1645 LJFOLD(ULT any any)
1646 LJFOLD(UGE any any)
1647 LJFOLD(ULE any any)
1648 LJFOLD(UGT any any)
1649 LJFOLDF(comm_comp)
1651 /* For non-numbers only: x <=> x ==> drop; x <> x ==> fail */
1652 if (fins->op1 == fins->op2 && !irt_isnum(fins->t))
1653 return CONDFOLD((fins->o ^ (fins->o >> 1)) & 1);
1654 if (fins->op1 < fins->op2) { /* Move lower ref to the right. */
1655 IRRef1 tmp = fins->op1;
1656 fins->op1 = fins->op2;
1657 fins->op2 = tmp;
1658 fins->o ^= 3; /* GT <-> LT, GE <-> LE, does not affect U */
1659 return RETRYFOLD;
1661 return NEXTFOLD;
1664 LJFOLD(BAND any any)
1665 LJFOLD(BOR any any)
1666 LJFOLD(MIN any any)
1667 LJFOLD(MAX any any)
1668 LJFOLDF(comm_dup)
1670 if (fins->op1 == fins->op2) /* x o x ==> x */
1671 return LEFTFOLD;
1672 return fold_comm_swap(J);
1675 LJFOLD(BXOR any any)
1676 LJFOLDF(comm_bxor)
1678 if (fins->op1 == fins->op2) /* i xor i ==> 0 */
1679 return irt_is64(fins->t) ? INT64FOLD(0) : INTFOLD(0);
1680 return fold_comm_swap(J);
1683 /* -- Simplification of compound expressions ------------------------------ */
1685 static TRef kfold_xload(jit_State *J, IRIns *ir, const void *p)
1687 int32_t k;
1688 switch (irt_type(ir->t)) {
1689 case IRT_NUM: return lj_ir_knum_u64(J, *(uint64_t *)p);
1690 case IRT_I8: k = (int32_t)*(int8_t *)p; break;
1691 case IRT_U8: k = (int32_t)*(uint8_t *)p; break;
1692 case IRT_I16: k = (int32_t)(int16_t)lj_getu16(p); break;
1693 case IRT_U16: k = (int32_t)(uint16_t)lj_getu16(p); break;
1694 case IRT_INT: case IRT_U32: k = (int32_t)lj_getu32(p); break;
1695 case IRT_I64: case IRT_U64: return lj_ir_kint64(J, *(uint64_t *)p);
1696 default: return 0;
1698 return lj_ir_kint(J, k);
1701 /* Turn: string.sub(str, a, b) == kstr
1702 ** into: string.byte(str, a) == string.byte(kstr, 1) etc.
1703 ** Note: this creates unaligned XLOADs on x86/x64.
1705 LJFOLD(EQ SNEW KGC)
1706 LJFOLD(NE SNEW KGC)
1707 LJFOLDF(merge_eqne_snew_kgc)
1709 GCstr *kstr = ir_kstr(fright);
1710 int32_t len = (int32_t)kstr->len;
1711 lua_assert(irt_isstr(fins->t));
1713 #if LJ_TARGET_X86ORX64
1714 #define FOLD_SNEW_MAX_LEN 4 /* Handle string lengths 0, 1, 2, 3, 4. */
1715 #define FOLD_SNEW_TYPE8 IRT_I8 /* Creates shorter immediates. */
1716 #else
1717 #define FOLD_SNEW_MAX_LEN 1 /* Handle string lengths 0 or 1. */
1718 #define FOLD_SNEW_TYPE8 IRT_U8 /* Prefer unsigned loads. */
1719 #endif
1721 if (len <= FOLD_SNEW_MAX_LEN) {
1722 IROp op = (IROp)fins->o;
1723 IRRef strref = fleft->op1;
1724 lua_assert(IR(strref)->o == IR_STRREF);
1725 if (op == IR_EQ) {
1726 emitir(IRTGI(IR_EQ), fleft->op2, lj_ir_kint(J, len));
1727 /* Caveat: fins/fleft/fright is no longer valid after emitir. */
1728 } else {
1729 /* NE is not expanded since this would need an OR of two conds. */
1730 if (!irref_isk(fleft->op2)) /* Only handle the constant length case. */
1731 return NEXTFOLD;
1732 if (IR(fleft->op2)->i != len)
1733 return DROPFOLD;
1735 if (len > 0) {
1736 /* A 4 byte load for length 3 is ok -- all strings have an extra NUL. */
1737 uint16_t ot = (uint16_t)(len == 1 ? IRT(IR_XLOAD, FOLD_SNEW_TYPE8) :
1738 len == 2 ? IRT(IR_XLOAD, IRT_U16) :
1739 IRTI(IR_XLOAD));
1740 TRef tmp = emitir(ot, strref,
1741 IRXLOAD_READONLY | (len > 1 ? IRXLOAD_UNALIGNED : 0));
1742 TRef val = kfold_xload(J, IR(tref_ref(tmp)), strdata(kstr));
1743 if (len == 3)
1744 tmp = emitir(IRTI(IR_BAND), tmp,
1745 lj_ir_kint(J, LJ_ENDIAN_SELECT(0x00ffffff, 0xffffff00)));
1746 fins->op1 = (IRRef1)tmp;
1747 fins->op2 = (IRRef1)val;
1748 fins->ot = (IROpT)IRTGI(op);
1749 return RETRYFOLD;
1750 } else {
1751 return DROPFOLD;
1754 return NEXTFOLD;
1757 /* -- Loads --------------------------------------------------------------- */
1759 /* Loads cannot be folded or passed on to CSE in general.
1760 ** Alias analysis is needed to check for forwarding opportunities.
1762 ** Caveat: *all* loads must be listed here or they end up at CSE!
1765 LJFOLD(ALOAD any)
1766 LJFOLDX(lj_opt_fwd_aload)
1768 /* From HREF fwd (see below). Must eliminate, not supported by fwd/backend. */
1769 LJFOLD(HLOAD KKPTR)
1770 LJFOLDF(kfold_hload_kkptr)
1772 UNUSED(J);
1773 lua_assert(ir_kptr(fleft) == niltvg(J2G(J)));
1774 return TREF_NIL;
1777 LJFOLD(HLOAD any)
1778 LJFOLDX(lj_opt_fwd_hload)
1780 LJFOLD(ULOAD any)
1781 LJFOLDX(lj_opt_fwd_uload)
1783 LJFOLD(CALLL any IRCALL_lj_tab_len)
1784 LJFOLDX(lj_opt_fwd_tab_len)
1786 /* Upvalue refs are really loads, but there are no corresponding stores.
1787 ** So CSE is ok for them, except for UREFO across a GC step (see below).
1788 ** If the referenced function is const, its upvalue addresses are const, too.
1789 ** This can be used to improve CSE by looking for the same address,
1790 ** even if the upvalues originate from a different function.
1792 LJFOLD(UREFO KGC any)
1793 LJFOLD(UREFC KGC any)
1794 LJFOLDF(cse_uref)
1796 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
1797 IRRef ref = J->chain[fins->o];
1798 GCfunc *fn = ir_kfunc(fleft);
1799 GCupval *uv = gco2uv(gcref(fn->l.uvptr[(fins->op2 >> 8)]));
1800 while (ref > 0) {
1801 IRIns *ir = IR(ref);
1802 if (irref_isk(ir->op1)) {
1803 GCfunc *fn2 = ir_kfunc(IR(ir->op1));
1804 if (gco2uv(gcref(fn2->l.uvptr[(ir->op2 >> 8)])) == uv) {
1805 if (fins->o == IR_UREFO && gcstep_barrier(J, ref))
1806 break;
1807 return ref;
1810 ref = ir->prev;
1813 return EMITFOLD;
1816 LJFOLD(HREF TNEW any)
1817 LJFOLDF(fwd_href_tnew)
1819 if (lj_opt_fwd_href_nokey(J))
1820 return lj_ir_kkptr(J, niltvg(J2G(J)));
1821 return NEXTFOLD;
1824 LJFOLD(HREF TDUP KPRI)
1825 LJFOLD(HREF TDUP KGC)
1826 LJFOLD(HREF TDUP KNUM)
1827 LJFOLDF(fwd_href_tdup)
1829 TValue keyv;
1830 lj_ir_kvalue(J->L, &keyv, fright);
1831 if (lj_tab_get(J->L, ir_ktab(IR(fleft->op1)), &keyv) == niltvg(J2G(J)) &&
1832 lj_opt_fwd_href_nokey(J))
1833 return lj_ir_kkptr(J, niltvg(J2G(J)));
1834 return NEXTFOLD;
1837 /* We can safely FOLD/CSE array/hash refs and field loads, since there
1838 ** are no corresponding stores. But we need to check for any NEWREF with
1839 ** an aliased table, as it may invalidate all of the pointers and fields.
1840 ** Only HREF needs the NEWREF check -- AREF and HREFK already depend on
1841 ** FLOADs. And NEWREF itself is treated like a store (see below).
1843 LJFOLD(FLOAD TNEW IRFL_TAB_ASIZE)
1844 LJFOLDF(fload_tab_tnew_asize)
1846 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
1847 return INTFOLD(fleft->op1);
1848 return NEXTFOLD;
1851 LJFOLD(FLOAD TNEW IRFL_TAB_HMASK)
1852 LJFOLDF(fload_tab_tnew_hmask)
1854 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
1855 return INTFOLD((1 << fleft->op2)-1);
1856 return NEXTFOLD;
1859 LJFOLD(FLOAD TDUP IRFL_TAB_ASIZE)
1860 LJFOLDF(fload_tab_tdup_asize)
1862 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
1863 return INTFOLD((int32_t)ir_ktab(IR(fleft->op1))->asize);
1864 return NEXTFOLD;
1867 LJFOLD(FLOAD TDUP IRFL_TAB_HMASK)
1868 LJFOLDF(fload_tab_tdup_hmask)
1870 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
1871 return INTFOLD((int32_t)ir_ktab(IR(fleft->op1))->hmask);
1872 return NEXTFOLD;
1875 LJFOLD(HREF any any)
1876 LJFOLD(FLOAD any IRFL_TAB_ARRAY)
1877 LJFOLD(FLOAD any IRFL_TAB_NODE)
1878 LJFOLD(FLOAD any IRFL_TAB_ASIZE)
1879 LJFOLD(FLOAD any IRFL_TAB_HMASK)
1880 LJFOLDF(fload_tab_ah)
1882 TRef tr = lj_opt_cse(J);
1883 return lj_opt_fwd_tptr(J, tref_ref(tr)) ? tr : EMITFOLD;
1886 /* Strings are immutable, so we can safely FOLD/CSE the related FLOAD. */
1887 LJFOLD(FLOAD KGC IRFL_STR_LEN)
1888 LJFOLDF(fload_str_len_kgc)
1890 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
1891 return INTFOLD((int32_t)ir_kstr(fleft)->len);
1892 return NEXTFOLD;
1895 LJFOLD(FLOAD SNEW IRFL_STR_LEN)
1896 LJFOLDF(fload_str_len_snew)
1898 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
1899 PHIBARRIER(fleft);
1900 return fleft->op2;
1902 return NEXTFOLD;
1905 /* The C type ID of cdata objects is immutable. */
1906 LJFOLD(FLOAD KGC IRFL_CDATA_TYPEID)
1907 LJFOLDF(fload_cdata_typeid_kgc)
1909 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
1910 return INTFOLD((int32_t)ir_kcdata(fleft)->typeid);
1911 return NEXTFOLD;
1914 /* Get the contents of immutable cdata objects. */
1915 LJFOLD(FLOAD KGC IRFL_CDATA_PTR)
1916 LJFOLD(FLOAD KGC IRFL_CDATA_INT64)
1917 LJFOLDF(fload_cdata_int64_kgc)
1919 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
1920 void *p = cdataptr(ir_kcdata(fleft));
1921 if (irt_is64(fins->t))
1922 return INT64FOLD(*(uint64_t *)p);
1923 else
1924 return INTFOLD(*(int32_t *)p);
1926 return NEXTFOLD;
1929 LJFOLD(FLOAD CNEW IRFL_CDATA_TYPEID)
1930 LJFOLD(FLOAD CNEWI IRFL_CDATA_TYPEID)
1931 LJFOLDF(fload_cdata_typeid_cnew)
1933 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
1934 return fleft->op1; /* No PHI barrier needed. CNEW/CNEWI op1 is const. */
1935 return NEXTFOLD;
1938 /* Pointer and int64 cdata objects are immutable. */
1939 LJFOLD(FLOAD CNEWI IRFL_CDATA_PTR)
1940 LJFOLD(FLOAD CNEWI IRFL_CDATA_INT64)
1941 LJFOLDF(fload_cdata_ptr_int64_cnew)
1943 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
1944 return fleft->op2; /* Fold even across PHI to avoid allocations. */
1945 return NEXTFOLD;
1948 LJFOLD(FLOAD any IRFL_STR_LEN)
1949 LJFOLD(FLOAD any IRFL_CDATA_TYPEID)
1950 LJFOLD(FLOAD any IRFL_CDATA_PTR)
1951 LJFOLD(FLOAD any IRFL_CDATA_INT64)
1952 LJFOLD(VLOAD any any) /* Vararg loads have no corresponding stores. */
1953 LJFOLDX(lj_opt_cse)
1955 /* All other field loads need alias analysis. */
1956 LJFOLD(FLOAD any any)
1957 LJFOLDX(lj_opt_fwd_fload)
1959 /* This is for LOOP only. Recording handles SLOADs internally. */
1960 LJFOLD(SLOAD any any)
1961 LJFOLDF(fwd_sload)
1963 if ((fins->op2 & IRSLOAD_FRAME)) {
1964 TRef tr = lj_opt_cse(J);
1965 return tref_ref(tr) < J->chain[IR_RETF] ? EMITFOLD : tr;
1966 } else {
1967 lua_assert(J->slot[fins->op1] != 0);
1968 return J->slot[fins->op1];
1972 /* Only fold for KKPTR. The pointer _and_ the contents must be const. */
1973 LJFOLD(XLOAD KKPTR any)
1974 LJFOLDF(xload_kptr)
1976 TRef tr = kfold_xload(J, fins, ir_kptr(fleft));
1977 return tr ? tr : NEXTFOLD;
1980 LJFOLD(XLOAD any any)
1981 LJFOLDX(lj_opt_fwd_xload)
1983 /* -- Write barriers ------------------------------------------------------ */
1985 /* Write barriers are amenable to CSE, but not across any incremental
1986 ** GC steps.
1988 ** The same logic applies to open upvalue references, because a stack
1989 ** may be resized during a GC step (not the current stack, but maybe that
1990 ** of a coroutine).
1992 LJFOLD(TBAR any)
1993 LJFOLD(OBAR any any)
1994 LJFOLD(UREFO any any)
1995 LJFOLDF(barrier_tab)
1997 TRef tr = lj_opt_cse(J);
1998 if (gcstep_barrier(J, tref_ref(tr))) /* CSE across GC step? */
1999 return EMITFOLD; /* Raw emit. Assumes fins is left intact by CSE. */
2000 return tr;
2003 LJFOLD(TBAR TNEW)
2004 LJFOLD(TBAR TDUP)
2005 LJFOLDF(barrier_tnew_tdup)
2007 /* New tables are always white and never need a barrier. */
2008 if (fins->op1 < J->chain[IR_LOOP]) /* Except across a GC step. */
2009 return NEXTFOLD;
2010 return DROPFOLD;
2013 /* -- Stores and allocations ---------------------------------------------- */
2015 /* Stores and allocations cannot be folded or passed on to CSE in general.
2016 ** But some stores can be eliminated with dead-store elimination (DSE).
2018 ** Caveat: *all* stores and allocs must be listed here or they end up at CSE!
2021 LJFOLD(ASTORE any any)
2022 LJFOLD(HSTORE any any)
2023 LJFOLDX(lj_opt_dse_ahstore)
2025 LJFOLD(USTORE any any)
2026 LJFOLDX(lj_opt_dse_ustore)
2028 LJFOLD(FSTORE any any)
2029 LJFOLDX(lj_opt_dse_fstore)
2031 LJFOLD(XSTORE any any)
2032 LJFOLDX(lj_opt_dse_xstore)
2034 LJFOLD(NEWREF any any) /* Treated like a store. */
2035 LJFOLD(CALLS any any)
2036 LJFOLD(CALLL any any) /* Safeguard fallback. */
2037 LJFOLD(CALLXS any any)
2038 LJFOLD(XBAR)
2039 LJFOLD(RETF any any) /* Modifies BASE. */
2040 LJFOLD(TNEW any any)
2041 LJFOLD(TDUP any)
2042 LJFOLD(CNEW any any)
2043 LJFOLD(XSNEW any any)
2044 LJFOLDX(lj_ir_emit)
2046 /* ------------------------------------------------------------------------ */
2048 /* Every entry in the generated hash table is a 32 bit pattern:
2050 ** xxxxxxxx iiiiiii lllllll rrrrrrrrrr
2052 ** xxxxxxxx = 8 bit index into fold function table
2053 ** iiiiiii = 7 bit folded instruction opcode
2054 ** lllllll = 7 bit left instruction opcode
2055 ** rrrrrrrrrr = 8 bit right instruction opcode or 10 bits from literal field
2058 #include "lj_folddef.h"
2060 /* ------------------------------------------------------------------------ */
2062 /* Fold IR instruction. */
2063 TRef LJ_FASTCALL lj_opt_fold(jit_State *J)
2065 uint32_t key, any;
2066 IRRef ref;
2068 if (LJ_UNLIKELY((J->flags & JIT_F_OPT_MASK) != JIT_F_OPT_DEFAULT)) {
2069 lua_assert(((JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE|JIT_F_OPT_DSE) |
2070 JIT_F_OPT_DEFAULT) == JIT_F_OPT_DEFAULT);
2071 /* Folding disabled? Chain to CSE, but not for loads/stores/allocs. */
2072 if (!(J->flags & JIT_F_OPT_FOLD) && irm_kind(lj_ir_mode[fins->o]) == IRM_N)
2073 return lj_opt_cse(J);
2075 /* Forwarding or CSE disabled? Emit raw IR for loads, except for SLOAD. */
2076 if ((J->flags & (JIT_F_OPT_FWD|JIT_F_OPT_CSE)) !=
2077 (JIT_F_OPT_FWD|JIT_F_OPT_CSE) &&
2078 irm_kind(lj_ir_mode[fins->o]) == IRM_L && fins->o != IR_SLOAD)
2079 return lj_ir_emit(J);
2081 /* DSE disabled? Emit raw IR for stores. */
2082 if (!(J->flags & JIT_F_OPT_DSE) && irm_kind(lj_ir_mode[fins->o]) == IRM_S)
2083 return lj_ir_emit(J);
2086 /* Fold engine start/retry point. */
2087 retry:
2088 /* Construct key from opcode and operand opcodes (unless literal/none). */
2089 key = ((uint32_t)fins->o << 17);
2090 if (fins->op1 >= J->cur.nk) {
2091 key += (uint32_t)IR(fins->op1)->o << 10;
2092 *fleft = *IR(fins->op1);
2094 if (fins->op2 >= J->cur.nk) {
2095 key += (uint32_t)IR(fins->op2)->o;
2096 *fright = *IR(fins->op2);
2097 } else {
2098 key += (fins->op2 & 0x3ffu); /* Literal mask. Must include IRCONV_*MASK. */
2101 /* Check for a match in order from most specific to least specific. */
2102 any = 0;
2103 for (;;) {
2104 uint32_t k = key | (any & 0x1ffff);
2105 uint32_t h = fold_hashkey(k);
2106 uint32_t fh = fold_hash[h]; /* Lookup key in semi-perfect hash table. */
2107 if ((fh & 0xffffff) == k || (fh = fold_hash[h+1], (fh & 0xffffff) == k)) {
2108 ref = (IRRef)tref_ref(fold_func[fh >> 24](J));
2109 if (ref != NEXTFOLD)
2110 break;
2112 if (any == 0xfffff) /* Exhausted folding. Pass on to CSE. */
2113 return lj_opt_cse(J);
2114 any = (any | (any >> 10)) ^ 0xffc00;
2117 /* Return value processing, ordered by frequency. */
2118 if (LJ_LIKELY(ref >= MAX_FOLD))
2119 return TREF(ref, irt_t(IR(ref)->t));
2120 if (ref == RETRYFOLD)
2121 goto retry;
2122 if (ref == KINTFOLD)
2123 return lj_ir_kint(J, fins->i);
2124 if (ref == FAILFOLD)
2125 lj_trace_err(J, LJ_TRERR_GFAIL);
2126 lua_assert(ref == DROPFOLD);
2127 return REF_DROP;
2130 /* -- Common-Subexpression Elimination ------------------------------------ */
2132 /* CSE an IR instruction. This is very fast due to the skip-list chains. */
2133 TRef LJ_FASTCALL lj_opt_cse(jit_State *J)
2135 /* Avoid narrow to wide store-to-load forwarding stall */
2136 IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16);
2137 IROp op = fins->o;
2138 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
2139 /* Limited search for same operands in per-opcode chain. */
2140 IRRef ref = J->chain[op];
2141 IRRef lim = fins->op1;
2142 if (fins->op2 > lim) lim = fins->op2; /* Relies on lit < REF_BIAS. */
2143 while (ref > lim) {
2144 if (IR(ref)->op12 == op12)
2145 return TREF(ref, irt_t(IR(ref)->t)); /* Common subexpression found. */
2146 ref = IR(ref)->prev;
2149 /* Otherwise emit IR (inlined for speed). */
2151 IRRef ref = lj_ir_nextins(J);
2152 IRIns *ir = IR(ref);
2153 ir->prev = J->chain[op];
2154 ir->op12 = op12;
2155 J->chain[op] = (IRRef1)ref;
2156 ir->o = fins->o;
2157 J->guardemit.irt |= fins->t.irt;
2158 return TREF(ref, irt_t((ir->t = fins->t)));
2162 /* CSE with explicit search limit. */
2163 TRef LJ_FASTCALL lj_opt_cselim(jit_State *J, IRRef lim)
2165 IRRef ref = J->chain[fins->o];
2166 IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16);
2167 while (ref > lim) {
2168 if (IR(ref)->op12 == op12)
2169 return ref;
2170 ref = IR(ref)->prev;
2172 return lj_ir_emit(J);
2175 /* ------------------------------------------------------------------------ */
2177 #undef IR
2178 #undef fins
2179 #undef fleft
2180 #undef fright
2181 #undef knumleft
2182 #undef knumright
2183 #undef emitir
2185 #endif