2 ** FOLD: Constant Folding, Algebraic Simplifications and Reassociation.
3 ** CSE: Common-Subexpression Elimination.
4 ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
21 /* Here's a short description how the FOLD engine processes instructions:
23 ** The FOLD engine receives a single instruction stored in fins (J->fold.ins).
24 ** The instruction and its operands are used to select matching fold rules.
25 ** These are applied iteratively until a fixed point is reached.
27 ** The 8 bit opcode of the instruction itself plus the opcodes of the
28 ** two instructions referenced by its operands form a 24 bit key
29 ** 'ins left right' (unused operands -> 0, literals -> lowest 8 bits).
31 ** This key is used for partial matching against the fold rules. The
32 ** left/right operand fields of the key are successively masked with
33 ** the 'any' wildcard, from most specific to least specific:
40 ** The masked key is used to lookup a matching fold rule in a semi-perfect
41 ** hash table. If a matching rule is found, the related fold function is run.
42 ** Multiple rules can share the same fold function. A fold rule may return
43 ** one of several special values:
45 ** - NEXTFOLD means no folding was applied, because an additional test
46 ** inside the fold function failed. Matching continues against less
47 ** specific fold rules. Finally the instruction is passed on to CSE.
49 ** - RETRYFOLD means the instruction was modified in-place. Folding is
50 ** retried as if this instruction had just been received.
52 ** All other return values are terminal actions -- no further folding is
55 ** - INTFOLD(i) returns a reference to the integer constant i.
57 ** - LEFTFOLD and RIGHTFOLD return the left/right operand reference
58 ** without emitting an instruction.
60 ** - CSEFOLD and EMITFOLD pass the instruction directly to CSE or emit
61 ** it without passing through any further optimizations.
63 ** - FAILFOLD, DROPFOLD and CONDFOLD only apply to instructions which have
64 ** no result (e.g. guarded assertions): FAILFOLD means the guard would
65 ** always fail, i.e. the current trace is pointless. DROPFOLD means
66 ** the guard is always true and has been eliminated. CONDFOLD is a
67 ** shortcut for FAILFOLD + cond (i.e. drop if true, otherwise fail).
69 ** - Any other return value is interpreted as an IRRef or TRef. This
70 ** can be a reference to an existing or a newly created instruction.
71 ** Only the least-significant 16 bits (IRRef1) are used to form a TRef
72 ** which is finally returned to the caller.
74 ** The FOLD engine receives instructions both from the trace recorder and
75 ** substituted instructions from LOOP unrolling. This means all types
76 ** of instructions may end up here, even though the recorder bypasses
77 ** FOLD in some cases. Thus all loads, stores and allocations must have
78 ** an any/any rule to avoid being passed on to CSE.
80 ** Carefully read the following requirements before adding or modifying
83 ** Requirement #1: All fold rules must preserve their destination type.
85 ** Consistently use INTFOLD() (KINT result) or lj_ir_knum() (KNUM result).
86 ** Never use lj_ir_knumint() which can have either a KINT or KNUM result.
88 ** Requirement #2: Fold rules should not create *new* instructions which
89 ** reference operands *across* PHIs.
91 ** E.g. a RETRYFOLD with 'fins->op1 = fleft->op1' is invalid if the
92 ** left operand is a PHI. Then fleft->op1 would point across the PHI
93 ** frontier to an invariant instruction. Adding a PHI for this instruction
94 ** would be counterproductive. The solution is to add a barrier which
95 ** prevents folding across PHIs, i.e. 'PHIBARRIER(fleft)' in this case.
96 ** The only exception is for recurrences with high latencies like
97 ** repeated int->num->int conversions.
99 ** One could relax this condition a bit if the referenced instruction is
100 ** a PHI, too. But this often leads to worse code due to excessive
101 ** register shuffling.
103 ** Note: returning *existing* instructions (e.g. LEFTFOLD) is ok, though.
104 ** Even returning fleft->op1 would be ok, because a new PHI will added,
105 ** if needed. But again, this leads to excessive register shuffling and
106 ** should be avoided.
108 ** Requirement #3: The set of all fold rules must be monotonic to guarantee
111 ** The goal is optimization, so one primarily wants to add strength-reducing
112 ** rules. This means eliminating an instruction or replacing an instruction
113 ** with one or more simpler instructions. Don't add fold rules which point
114 ** into the other direction.
116 ** Some rules (like commutativity) do not directly reduce the strength of
117 ** an instruction, but enable other fold rules (e.g. by moving constants
118 ** to the right operand). These rules must be made unidirectional to avoid
121 ** Rule of thumb: the trace recorder expands the IR and FOLD shrinks it.
124 /* Some local macros to save typing. Undef'd at the end. */
125 #define IR(ref) (&J->cur.ir[(ref)])
126 #define fins (&J->fold.ins)
127 #define fleft (&J->fold.left)
128 #define fright (&J->fold.right)
129 #define knumleft (ir_knum(fleft)->n)
130 #define knumright (ir_knum(fright)->n)
132 /* Pass IR on to next optimization in chain (FOLD). */
133 #define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
135 /* Fold function type. Fastcall on x86 significantly reduces their size. */
136 typedef IRRef (LJ_FASTCALL
*FoldFunc
)(jit_State
*J
);
138 /* Macros for the fold specs, so buildvm can recognize them. */
141 #define LJFOLDF(name) static TRef LJ_FASTCALL fold_##name(jit_State *J)
142 /* Note: They must be at the start of a line or buildvm ignores them! */
144 /* Barrier to prevent using operands across PHIs. */
145 #define PHIBARRIER(ir) if (irt_isphi((ir)->t)) return NEXTFOLD
147 /* Barrier to prevent folding across a GC step.
148 ** GC steps can only happen at the head of a trace and at LOOP.
149 ** And the GC is only driven forward if there is at least one allocation.
151 #define gcstep_barrier(J, ref) \
152 ((ref) < J->chain[IR_LOOP] && \
153 (J->chain[IR_TNEW] || J->chain[IR_TDUP] || \
154 J->chain[IR_SNEW] || J->chain[IR_TOSTR]))
156 /* -- Constant folding ---------------------------------------------------- */
158 LJFOLD(ADD KNUM KNUM
)
159 LJFOLD(SUB KNUM KNUM
)
160 LJFOLD(MUL KNUM KNUM
)
161 LJFOLD(DIV KNUM KNUM
)
162 LJFOLD(NEG KNUM KNUM
)
163 LJFOLD(ABS KNUM KNUM
)
164 LJFOLD(ATAN2 KNUM KNUM
)
165 LJFOLD(LDEXP KNUM KNUM
)
166 LJFOLD(MIN KNUM KNUM
)
167 LJFOLD(MAX KNUM KNUM
)
168 LJFOLDF(kfold_numarith
)
170 lua_Number a
= knumleft
;
171 lua_Number b
= knumright
;
172 lua_Number y
= lj_vm_foldarith(a
, b
, fins
->o
- IR_ADD
);
173 return lj_ir_knum(J
, y
);
176 LJFOLD(FPMATH KNUM any
)
177 LJFOLDF(kfold_fpmath
)
179 lua_Number a
= knumleft
;
180 lua_Number y
= lj_vm_foldfpm(a
, fins
->op2
);
181 return lj_ir_knum(J
, y
);
184 LJFOLD(POWI KNUM KINT
)
187 lua_Number a
= knumleft
;
188 lua_Number b
= cast_num(fright
->i
);
189 lua_Number y
= lj_vm_foldarith(a
, b
, IR_POWI
- IR_ADD
);
190 return lj_ir_knum(J
, y
);
193 static int32_t kfold_intop(int32_t k1
, int32_t k2
, IROp op
)
196 case IR_ADD
: k1
+= k2
; break;
197 case IR_SUB
: k1
-= k2
; break;
198 case IR_BAND
: k1
&= k2
; break;
199 case IR_BOR
: k1
|= k2
; break;
200 case IR_BXOR
: k1
^= k2
; break;
201 case IR_BSHL
: k1
<<= (k2
& 31); break;
202 case IR_BSHR
: k1
= (int32_t)((uint32_t)k1
>> (k2
& 31)); break;
203 case IR_BSAR
: k1
>>= (k2
& 31); break;
204 case IR_BROL
: k1
= (int32_t)lj_rol((uint32_t)k1
, (k2
& 31)); break;
205 case IR_BROR
: k1
= (int32_t)lj_ror((uint32_t)k1
, (k2
& 31)); break;
206 default: lua_assert(0); break;
211 LJFOLD(ADD KINT KINT
)
212 LJFOLD(SUB KINT KINT
)
213 LJFOLD(BAND KINT KINT
)
214 LJFOLD(BOR KINT KINT
)
215 LJFOLD(BXOR KINT KINT
)
216 LJFOLD(BSHL KINT KINT
)
217 LJFOLD(BSHR KINT KINT
)
218 LJFOLD(BSAR KINT KINT
)
219 LJFOLD(BROL KINT KINT
)
220 LJFOLD(BROR KINT KINT
)
221 LJFOLDF(kfold_intarith
)
223 return INTFOLD(kfold_intop(fleft
->i
, fright
->i
, (IROp
)fins
->o
));
229 return INTFOLD(~fleft
->i
);
235 return INTFOLD((int32_t)lj_bswap((uint32_t)fleft
->i
));
241 return lj_ir_knum(J
, cast_num(fleft
->i
));
244 LJFOLD(TOBIT KNUM KNUM
)
248 tv
.n
= knumleft
+ knumright
;
249 return INTFOLD((int32_t)tv
.u32
.lo
);
252 LJFOLD(TOINT KNUM any
)
255 lua_Number n
= knumleft
;
256 int32_t k
= lj_num2int(n
);
257 if (irt_isguard(fins
->t
) && n
!= cast_num(k
)) {
258 /* We're about to create a guard which always fails, like TOINT +1.5.
259 ** Some pathological loops cause this during LICM, e.g.:
260 ** local x,k,t = 0,1.5,{1,[1.5]=2}
261 ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end
270 LJFOLDF(kfold_tostr_knum
)
272 return lj_ir_kstr(J
, lj_str_fromnum(J
->L
, &knumleft
));
276 LJFOLDF(kfold_tostr_kint
)
278 return lj_ir_kstr(J
, lj_str_fromint(J
->L
, fleft
->i
));
285 if (lj_str_tonum(ir_kstr(fleft
), &n
))
286 return lj_ir_knum(J
, numV(&n
));
290 LJFOLD(SNEW KPTR KINT
)
291 LJFOLDF(kfold_snew_kptr
)
293 GCstr
*s
= lj_str_new(J
->L
, (const char *)ir_kptr(fleft
), (size_t)fright
->i
);
294 return lj_ir_kstr(J
, s
);
297 LJFOLD(SNEW any KINT
)
298 LJFOLDF(kfold_snew_empty
)
301 return lj_ir_kstr(J
, lj_str_new(J
->L
, "", 0));
305 LJFOLD(STRREF KGC KINT
)
306 LJFOLDF(kfold_strref
)
308 GCstr
*str
= ir_kstr(fleft
);
309 lua_assert((MSize
)fright
->i
< str
->len
);
310 return lj_ir_kptr(J
, (char *)strdata(str
) + fright
->i
);
313 LJFOLD(STRREF SNEW any
)
314 LJFOLDF(kfold_strref_snew
)
317 if (irref_isk(fins
->op2
) && fright
->i
== 0) {
318 return fleft
->op1
; /* strref(snew(ptr, len), 0) ==> ptr */
320 /* Reassociate: strref(snew(strref(str, a), len), b) ==> strref(str, a+b) */
321 IRIns
*ir
= IR(fleft
->op1
);
322 IRRef1 str
= ir
->op1
; /* IRIns * is not valid across emitir. */
323 lua_assert(ir
->o
== IR_STRREF
);
325 fins
->op2
= emitir(IRTI(IR_ADD
), ir
->op2
, fins
->op2
); /* Clobbers fins! */
327 fins
->ot
= IRT(IR_STRREF
, IRT_PTR
);
333 /* Must not use kfold_kref for numbers (could be NaN). */
340 LJFOLD(ULT KNUM KNUM
)
341 LJFOLD(UGE KNUM KNUM
)
342 LJFOLD(ULE KNUM KNUM
)
343 LJFOLD(UGT KNUM KNUM
)
344 LJFOLDF(kfold_numcomp
)
346 return CONDFOLD(lj_ir_numcmp(knumleft
, knumright
, (IROp
)fins
->o
));
353 LJFOLD(ULT KINT KINT
)
354 LJFOLD(UGE KINT KINT
)
355 LJFOLD(ULE KINT KINT
)
356 LJFOLD(UGT KINT KINT
)
357 LJFOLD(ABC KINT KINT
)
358 LJFOLDF(kfold_intcomp
)
360 int32_t a
= fleft
->i
, b
= fright
->i
;
361 switch ((IROp
)fins
->o
) {
362 case IR_LT
: return CONDFOLD(a
< b
);
363 case IR_GE
: return CONDFOLD(a
>= b
);
364 case IR_LE
: return CONDFOLD(a
<= b
);
365 case IR_GT
: return CONDFOLD(a
> b
);
366 case IR_ULT
: return CONDFOLD((uint32_t)a
< (uint32_t)b
);
367 case IR_UGE
: return CONDFOLD((uint32_t)a
>= (uint32_t)b
);
368 case IR_ULE
: return CONDFOLD((uint32_t)a
<= (uint32_t)b
);
370 case IR_UGT
: return CONDFOLD((uint32_t)a
> (uint32_t)b
);
371 default: lua_assert(0); return FAILFOLD
;
375 LJFOLD(CALLN CARG IRCALL_lj_str_cmp
)
376 LJFOLDF(kfold_strcmp
)
378 if (irref_isk(fleft
->op1
) && irref_isk(fleft
->op2
)) {
379 GCstr
*a
= ir_kstr(IR(fleft
->op1
));
380 GCstr
*b
= ir_kstr(IR(fleft
->op2
));
381 return INTFOLD(lj_str_cmp(a
, b
));
386 /* Don't constant-fold away FLOAD checks against KNULL. */
387 LJFOLD(EQ FLOAD KNULL
)
388 LJFOLD(NE FLOAD KNULL
)
391 /* But fold all other KNULL compares, since only KNULL is equal to KNULL. */
396 LJFOLD(EQ KINT KINT
) /* Constants are unique, so same refs <==> same value. */
402 return CONDFOLD((fins
->op1
== fins
->op2
) ^ (fins
->o
== IR_NE
));
405 /* -- Algebraic shortcuts ------------------------------------------------- */
407 LJFOLD(FPMATH FPMATH IRFPM_FLOOR
)
408 LJFOLD(FPMATH FPMATH IRFPM_CEIL
)
409 LJFOLD(FPMATH FPMATH IRFPM_TRUNC
)
410 LJFOLDF(shortcut_round
)
412 IRFPMathOp op
= (IRFPMathOp
)fleft
->op2
;
413 if (op
== IRFPM_FLOOR
|| op
== IRFPM_CEIL
|| op
== IRFPM_TRUNC
)
414 return LEFTFOLD
; /* round(round_left(x)) = round_left(x) */
418 LJFOLD(FPMATH TONUM IRFPM_FLOOR
)
419 LJFOLD(FPMATH TONUM IRFPM_CEIL
)
420 LJFOLD(FPMATH TONUM IRFPM_TRUNC
)
422 LJFOLDF(shortcut_left
)
424 return LEFTFOLD
; /* f(g(x)) ==> g(x) */
428 LJFOLDF(shortcut_dropleft
)
431 fins
->op1
= fleft
->op1
; /* abs(neg(x)) ==> abs(x) */
435 /* Note: no safe shortcuts with STRTO and TOSTR ("1e2" ==> +100 ==> "100"). */
439 LJFOLDF(shortcut_leftleft
)
441 PHIBARRIER(fleft
); /* See above. Fold would be ok, but not beneficial. */
442 return fleft
->op1
; /* f(g(x)) ==> x */
446 LJFOLDF(shortcut_leftleft_toint
)
449 if (irt_isguard(fleft
->t
)) /* Only safe with a guarded TOINT. */
450 return fleft
->op1
; /* f(g(x)) ==> x */
454 LJFOLD(TOINT TONUM any
)
455 LJFOLD(TOBIT TONUM KNUM
) /* The inverse must NOT be shortcut! */
456 LJFOLDF(shortcut_leftleft_across_phi
)
458 /* Fold even across PHI to avoid expensive int->num->int conversions. */
459 return fleft
->op1
; /* f(g(x)) ==> x */
462 /* -- FP algebraic simplifications ---------------------------------------- */
464 /* FP arithmetic is tricky -- there's not much to simplify.
465 ** Please note the following common pitfalls before sending "improvements":
466 ** x+0 ==> x is INVALID for x=-0
467 ** 0-x ==> -x is INVALID for x=+0
468 ** x*0 ==> 0 is INVALID for x=-0, x=+-Inf or x=NaN
472 LJFOLDF(simplify_numadd_negx
)
475 fins
->o
= IR_SUB
; /* (-a) + b ==> b - a */
476 fins
->op1
= fins
->op2
;
477 fins
->op2
= fleft
->op1
;
482 LJFOLDF(simplify_numadd_xneg
)
485 fins
->o
= IR_SUB
; /* a + (-b) ==> a - b */
486 fins
->op2
= fright
->op1
;
491 LJFOLDF(simplify_numsub_k
)
493 lua_Number n
= knumright
;
494 if (n
== 0.0) /* x - (+-0) ==> x */
500 LJFOLDF(simplify_numsub_negk
)
503 fins
->op2
= fleft
->op1
; /* (-x) - k ==> (-k) - x */
504 fins
->op1
= (IRRef1
)lj_ir_knum(J
, -knumright
);
509 LJFOLDF(simplify_numsub_xneg
)
512 fins
->o
= IR_ADD
; /* a - (-b) ==> a + b */
513 fins
->op2
= fright
->op1
;
519 LJFOLDF(simplify_nummuldiv_k
)
521 lua_Number n
= knumright
;
522 if (n
== 1.0) { /* x o 1 ==> x */
524 } else if (n
== -1.0) { /* x o -1 ==> -x */
526 fins
->op2
= (IRRef1
)lj_ir_knum_neg(J
);
528 } else if (fins
->o
== IR_MUL
&& n
== 2.0) { /* x * 2 ==> x + x */
530 fins
->op2
= fins
->op1
;
538 LJFOLDF(simplify_nummuldiv_negk
)
541 fins
->op1
= fleft
->op1
; /* (-a) o k ==> a o (-k) */
542 fins
->op2
= (IRRef1
)lj_ir_knum(J
, -knumright
);
548 LJFOLDF(simplify_nummuldiv_negneg
)
552 fins
->op1
= fleft
->op1
; /* (-a) o (-b) ==> a o b */
553 fins
->op2
= fright
->op1
;
557 LJFOLD(POWI any KINT
)
558 LJFOLDF(simplify_powi_xk
)
560 int32_t k
= fright
->i
;
561 TRef ref
= fins
->op1
;
562 if (k
== 0) /* x ^ 0 ==> 1 */
563 return lj_ir_knum_one(J
); /* Result must be a number, not an int. */
564 if (k
== 1) /* x ^ 1 ==> x */
566 if ((uint32_t)(k
+65536) > 2*65536u) /* Limit code explosion. */
568 if (k
< 0) { /* x ^ (-k) ==> (1/x) ^ k. */
569 ref
= emitir(IRTN(IR_DIV
), lj_ir_knum_one(J
), ref
);
572 /* Unroll x^k for 1 <= k <= 65536. */
573 for (; (k
& 1) == 0; k
>>= 1) /* Handle leading zeros. */
574 ref
= emitir(IRTN(IR_MUL
), ref
, ref
);
575 if ((k
>>= 1) != 0) { /* Handle trailing bits. */
576 TRef tmp
= emitir(IRTN(IR_MUL
), ref
, ref
);
577 for (; k
!= 1; k
>>= 1) {
579 ref
= emitir(IRTN(IR_MUL
), ref
, tmp
);
580 tmp
= emitir(IRTN(IR_MUL
), tmp
, tmp
);
582 ref
= emitir(IRTN(IR_MUL
), ref
, tmp
);
587 LJFOLD(POWI KNUM any
)
588 LJFOLDF(simplify_powi_kx
)
590 lua_Number n
= knumleft
;
591 if (n
== 2.0) { /* 2.0 ^ i ==> ldexp(1.0, tonum(i)) */
593 fins
->op1
= fins
->op2
;
595 fins
->op2
= (IRRef1
)lj_opt_fold(J
);
596 fins
->op1
= (IRRef1
)lj_ir_knum_one(J
);
603 /* -- FP conversion narrowing --------------------------------------------- */
605 LJFOLD(TOINT ADD any
)
606 LJFOLD(TOINT SUB any
)
607 LJFOLD(TOBIT ADD KNUM
)
608 LJFOLD(TOBIT SUB KNUM
)
609 LJFOLDF(narrow_convert
)
612 /* Narrowing ignores PHIs and repeating it inside the loop is not useful. */
613 if (J
->chain
[IR_LOOP
])
615 return lj_opt_narrow_convert(J
);
618 /* Relaxed CSE rule for TOINT allows commoning with stronger checks, too. */
619 LJFOLD(TOINT any any
)
622 if (LJ_LIKELY(J
->flags
& JIT_F_OPT_CSE
)) {
623 IRRef ref
, op1
= fins
->op1
;
624 uint8_t guard
= irt_isguard(fins
->t
);
625 for (ref
= J
->chain
[IR_TOINT
]; ref
> op1
; ref
= IR(ref
)->prev
)
626 if (IR(ref
)->op1
== op1
&& irt_isguard(IR(ref
)->t
) >= guard
)
629 return EMITFOLD
; /* No fallthrough to regular CSE. */
632 /* -- Integer algebraic simplifications ----------------------------------- */
635 LJFOLD(ADDOV any KINT
)
636 LJFOLD(SUBOV any KINT
)
637 LJFOLDF(simplify_intadd_k
)
639 if (fright
->i
== 0) /* i o 0 ==> i */
645 LJFOLDF(simplify_intsub_k
)
647 if (fright
->i
== 0) /* i - 0 ==> i */
649 fins
->o
= IR_ADD
; /* i - k ==> i + (-k) */
650 fins
->op2
= (IRRef1
)lj_ir_kint(J
, -fright
->i
); /* Overflow for -2^31 ok. */
655 LJFOLD(SUBOV any any
)
656 LJFOLDF(simplify_intsub
)
658 if (fins
->op1
== fins
->op2
&& !irt_isnum(fins
->t
)) /* i - i ==> 0 */
664 LJFOLDF(simplify_intsubadd_leftcancel
)
666 if (!irt_isnum(fins
->t
)) {
668 if (fins
->op2
== fleft
->op1
) /* (i + j) - i ==> j */
670 if (fins
->op2
== fleft
->op2
) /* (i + j) - j ==> i */
677 LJFOLDF(simplify_intsubsub_leftcancel
)
679 if (!irt_isnum(fins
->t
)) {
681 if (fins
->op1
== fleft
->op1
) { /* (i - j) - i ==> 0 - j */
682 fins
->op1
= (IRRef1
)lj_ir_kint(J
, 0);
683 fins
->op2
= fleft
->op2
;
691 LJFOLDF(simplify_intsubsub_rightcancel
)
693 if (!irt_isnum(fins
->t
)) {
695 if (fins
->op1
== fright
->op1
) /* i - (i - j) ==> j */
702 LJFOLDF(simplify_intsubadd_rightcancel
)
704 if (!irt_isnum(fins
->t
)) {
706 if (fins
->op1
== fright
->op1
) { /* i - (i + j) ==> 0 - j */
707 fins
->op2
= fright
->op2
;
708 fins
->op1
= (IRRef1
)lj_ir_kint(J
, 0);
711 if (fins
->op1
== fright
->op2
) { /* i - (j + i) ==> 0 - j */
712 fins
->op2
= fright
->op1
;
713 fins
->op1
= (IRRef1
)lj_ir_kint(J
, 0);
721 LJFOLDF(simplify_intsubaddadd_cancel
)
723 if (!irt_isnum(fins
->t
)) {
726 if (fleft
->op1
== fright
->op1
) { /* (i + j1) - (i + j2) ==> j1 - j2 */
727 fins
->op1
= fleft
->op2
;
728 fins
->op2
= fright
->op2
;
731 if (fleft
->op1
== fright
->op2
) { /* (i + j1) - (j2 + i) ==> j1 - j2 */
732 fins
->op1
= fleft
->op2
;
733 fins
->op2
= fright
->op1
;
736 if (fleft
->op2
== fright
->op1
) { /* (j1 + i) - (i + j2) ==> j1 - j2 */
737 fins
->op1
= fleft
->op1
;
738 fins
->op2
= fright
->op2
;
741 if (fleft
->op2
== fright
->op2
) { /* (j1 + i) - (j2 + i) ==> j1 - j2 */
742 fins
->op1
= fleft
->op1
;
743 fins
->op2
= fright
->op1
;
750 LJFOLD(BAND any KINT
)
751 LJFOLDF(simplify_band_k
)
753 if (fright
->i
== 0) /* i & 0 ==> 0 */
755 if (fright
->i
== -1) /* i & -1 ==> i */
761 LJFOLDF(simplify_bor_k
)
763 if (fright
->i
== 0) /* i | 0 ==> i */
765 if (fright
->i
== -1) /* i | -1 ==> -1 */
770 LJFOLD(BXOR any KINT
)
771 LJFOLDF(simplify_bxor_k
)
773 if (fright
->i
== 0) /* i xor 0 ==> i */
775 if (fright
->i
== -1) { /* i xor -1 ==> ~i */
783 LJFOLD(BSHL any KINT
)
784 LJFOLD(BSHR any KINT
)
785 LJFOLD(BSAR any KINT
)
786 LJFOLD(BROL any KINT
)
787 LJFOLD(BROR any KINT
)
788 LJFOLDF(simplify_shift_ik
)
790 int32_t k
= (fright
->i
& 31);
791 if (k
== 0) /* i o 0 ==> i */
793 if (k
!= fright
->i
) { /* i o k ==> i o (k & 31) */
794 fins
->op2
= (IRRef1
)lj_ir_kint(J
, k
);
797 if (fins
->o
== IR_BROR
) { /* bror(i, k) ==> brol(i, (-k)&31) */
799 fins
->op2
= (IRRef1
)lj_ir_kint(J
, (-k
)&31);
805 LJFOLD(BSHL any BAND
)
806 LJFOLD(BSHR any BAND
)
807 LJFOLD(BSAR any BAND
)
808 LJFOLD(BROL any BAND
)
809 LJFOLD(BROR any BAND
)
810 LJFOLDF(simplify_shift_andk
)
812 #if LJ_TARGET_MASKEDSHIFT
813 IRIns
*irk
= IR(fright
->op2
);
815 if (irk
->o
== IR_KINT
) { /* i o (j & 31) ==> i o j */
816 int32_t k
= irk
->i
& 31;
818 fins
->op2
= fright
->op1
;
826 LJFOLD(BSHL KINT any
)
827 LJFOLD(BSHR KINT any
)
828 LJFOLDF(simplify_shift1_ki
)
830 if (fleft
->i
== 0) /* 0 o i ==> 0 */
835 LJFOLD(BSAR KINT any
)
836 LJFOLD(BROL KINT any
)
837 LJFOLD(BROR KINT any
)
838 LJFOLDF(simplify_shift2_ki
)
840 if (fleft
->i
== 0 || fleft
->i
== -1) /* 0 o i ==> 0; -1 o i ==> -1 */
845 /* -- Reassociation ------------------------------------------------------- */
848 LJFOLD(BAND BAND KINT
)
850 LJFOLD(BXOR BXOR KINT
)
851 LJFOLDF(reassoc_intarith_k
)
853 IRIns
*irk
= IR(fleft
->op2
);
854 if (irk
->o
== IR_KINT
) {
855 int32_t k
= kfold_intop(irk
->i
, fright
->i
, (IROp
)fins
->o
);
856 if (k
== irk
->i
) /* (i o k1) o k2 ==> i o k1, if (k1 o k2) == k1. */
859 fins
->op1
= fleft
->op1
;
860 fins
->op2
= (IRRef1
)lj_ir_kint(J
, k
);
861 return RETRYFOLD
; /* (i o k1) o k2 ==> i o (k1 o k2) */
868 LJFOLD(BAND BAND any
)
873 if (fins
->op2
== fleft
->op1
|| fins
->op2
== fleft
->op2
)
874 return LEFTFOLD
; /* (a o b) o a ==> a o b; (a o b) o b ==> a o b */
878 LJFOLD(BXOR BXOR any
)
879 LJFOLDF(reassoc_bxor
)
882 if (fins
->op2
== fleft
->op1
) /* (a xor b) xor a ==> b */
884 if (fins
->op2
== fleft
->op2
) /* (a xor b) xor b ==> a */
889 LJFOLD(BSHL BSHL KINT
)
890 LJFOLD(BSHR BSHR KINT
)
891 LJFOLD(BSAR BSAR KINT
)
892 LJFOLD(BROL BROL KINT
)
893 LJFOLD(BROR BROR KINT
)
894 LJFOLDF(reassoc_shift
)
896 IRIns
*irk
= IR(fleft
->op2
);
897 PHIBARRIER(fleft
); /* The (shift any KINT) rule covers k2 == 0 and more. */
898 if (irk
->o
== IR_KINT
) { /* (i o k1) o k2 ==> i o (k1 + k2) */
899 int32_t k
= (irk
->i
& 31) + (fright
->i
& 31);
900 if (k
> 31) { /* Combined shift too wide? */
901 if (fins
->o
== IR_BSHL
|| fins
->o
== IR_BSHR
)
903 else if (fins
->o
== IR_BSAR
)
908 fins
->op1
= fleft
->op1
;
909 fins
->op2
= (IRRef1
)lj_ir_kint(J
, k
);
917 LJFOLDF(reassoc_minmax_k
)
919 IRIns
*irk
= IR(fleft
->op2
);
920 if (irk
->o
== IR_KNUM
) {
921 lua_Number a
= ir_knum(irk
)->n
;
922 lua_Number b
= knumright
;
923 lua_Number y
= lj_vm_foldarith(a
, b
, fins
->o
- IR_ADD
);
924 if (a
== y
) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */
927 fins
->op1
= fleft
->op1
;
928 fins
->op2
= (IRRef1
)lj_ir_knum(J
, y
);
929 return RETRYFOLD
; /* (x o k1) o k2 ==> x o (k1 o k2) */
936 LJFOLDF(reassoc_minmax_left
)
938 if (fins
->op2
== fleft
->op1
|| fins
->op2
== fleft
->op2
)
939 return RIGHTFOLD
; /* (b o1 a) o2 b ==> b; (a o1 b) o2 b ==> b */
945 LJFOLDF(reassoc_minmax_right
)
947 if (fins
->op1
== fright
->op1
|| fins
->op1
== fright
->op2
)
948 return LEFTFOLD
; /* a o2 (a o1 b) ==> a; a o2 (b o1 a) ==> a */
952 /* Eliminate ABC across PHIs to handle t[i-1] forwarding case.
953 ** ABC(asize, (i+k)+(-k)) ==> ABC(asize, i), but only if it already exists.
954 ** Could be generalized to (i+k1)+k2 ==> i+(k1+k2), but needs better disambig.
959 if (irref_isk(fright
->op2
)) {
960 IRIns
*add2
= IR(fright
->op1
);
961 if (add2
->o
== IR_ADD
&& irref_isk(add2
->op2
) &&
962 IR(fright
->op2
)->i
== -IR(add2
->op2
)->i
) {
963 IRRef ref
= J
->chain
[IR_ABC
];
964 IRRef lim
= add2
->op1
;
965 if (fins
->op1
> lim
) lim
= fins
->op1
;
968 if (ir
->op1
== fins
->op1
&& ir
->op2
== add2
->op1
)
977 /* -- Commutativity ------------------------------------------------------- */
979 /* The refs of commutative ops are canonicalized. Lower refs go to the right.
980 ** Rationale behind this:
981 ** - It (also) moves constants to the right.
982 ** - It reduces the number of FOLD rules (e.g. (BOR any KINT) suffices).
983 ** - It helps CSE to find more matches.
984 ** - The assembler generates better code with constants at the right.
989 LJFOLD(ADDOV any any
)
992 if (fins
->op1
< fins
->op2
) { /* Move lower ref to the right. */
993 IRRef1 tmp
= fins
->op1
;
994 fins
->op1
= fins
->op2
;
1005 /* For non-numbers only: x == x ==> drop; x ~= x ==> fail */
1006 if (fins
->op1
== fins
->op2
&& !irt_isnum(fins
->t
))
1007 return CONDFOLD(fins
->o
== IR_EQ
);
1008 return fold_comm_swap(J
);
1021 /* For non-numbers only: x <=> x ==> drop; x <> x ==> fail */
1022 if (fins
->op1
== fins
->op2
&& !irt_isnum(fins
->t
))
1023 return CONDFOLD(fins
->o
& 1);
1024 if (fins
->op1
< fins
->op2
) { /* Move lower ref to the right. */
1025 IRRef1 tmp
= fins
->op1
;
1026 fins
->op1
= fins
->op2
;
1028 fins
->o
^= 3; /* GT <-> LT, GE <-> LE, does not affect U */
1034 LJFOLD(BAND any any
)
1040 if (fins
->op1
== fins
->op2
) /* x o x ==> x */
1042 return fold_comm_swap(J
);
1045 LJFOLD(BXOR any any
)
1048 if (fins
->op1
== fins
->op2
) /* i xor i ==> 0 */
1050 return fold_comm_swap(J
);
1053 /* -- Simplification of compound expressions ------------------------------ */
1055 static int32_t kfold_xload(IRIns
*ir
, const void *p
)
1057 #if !LJ_TARGET_X86ORX64
1058 #error "Missing support for unaligned loads"
1060 switch (irt_type(ir
->t
)) {
1061 case IRT_I8
: return (int32_t)*(int8_t *)p
;
1062 case IRT_U8
: return (int32_t)*(uint8_t *)p
;
1063 case IRT_I16
: return (int32_t)*(int16_t *)p
;
1064 case IRT_U16
: return (int32_t)*(uint16_t *)p
;
1065 default: lua_assert(irt_isint(ir
->t
)); return (int32_t)*(int32_t *)p
;
1069 /* Turn: string.sub(str, a, b) == kstr
1070 ** into: string.byte(str, a) == string.byte(kstr, 1) etc.
1071 ** Note: this creates unaligned XLOADs!
1075 LJFOLDF(merge_eqne_snew_kgc
)
1077 GCstr
*kstr
= ir_kstr(fright
);
1078 int32_t len
= (int32_t)kstr
->len
;
1079 lua_assert(irt_isstr(fins
->t
));
1080 if (len
<= 4) { /* Handle string lengths 0, 1, 2, 3, 4. */
1081 IROp op
= (IROp
)fins
->o
;
1082 IRRef strref
= fleft
->op1
;
1083 lua_assert(IR(strref
)->o
== IR_STRREF
);
1085 emitir(IRTGI(IR_EQ
), fleft
->op2
, lj_ir_kint(J
, len
));
1086 /* Caveat: fins/fleft/fright is no longer valid after emitir. */
1088 /* NE is not expanded since this would need an OR of two conds. */
1089 if (!irref_isk(fleft
->op2
)) /* Only handle the constant length case. */
1091 if (IR(fleft
->op2
)->i
!= len
)
1095 /* A 4 byte load for length 3 is ok -- all strings have an extra NUL. */
1096 uint16_t ot
= (uint16_t)(len
== 1 ? IRT(IR_XLOAD
, IRT_I8
) :
1097 len
== 2 ? IRT(IR_XLOAD
, IRT_U16
) :
1099 TRef tmp
= emitir(ot
, strref
,
1100 IRXLOAD_READONLY
| (len
> 1 ? IRXLOAD_UNALIGNED
: 0));
1101 TRef val
= lj_ir_kint(J
, kfold_xload(IR(tref_ref(tmp
)), strdata(kstr
)));
1103 tmp
= emitir(IRTI(IR_BAND
), tmp
,
1104 lj_ir_kint(J
, LJ_ENDIAN_SELECT(0x00ffffff, 0xffffff00)));
1105 fins
->op1
= (IRRef1
)tmp
;
1106 fins
->op2
= (IRRef1
)val
;
1107 fins
->ot
= (IROpT
)IRTGI(op
);
1116 /* -- Loads --------------------------------------------------------------- */
1118 /* Loads cannot be folded or passed on to CSE in general.
1119 ** Alias analysis is needed to check for forwarding opportunities.
1121 ** Caveat: *all* loads must be listed here or they end up at CSE!
1125 LJFOLDX(lj_opt_fwd_aload
)
1128 LJFOLDX(lj_opt_fwd_hload
)
1131 LJFOLDX(lj_opt_fwd_uload
)
1133 LJFOLD(CALLL any IRCALL_lj_tab_len
)
1134 LJFOLDX(lj_opt_fwd_tab_len
)
1136 /* Upvalue refs are really loads, but there are no corresponding stores.
1137 ** So CSE is ok for them, except for UREFO across a GC step (see below).
1138 ** If the referenced function is const, its upvalue addresses are const, too.
1139 ** This can be used to improve CSE by looking for the same address,
1140 ** even if the upvalues originate from a different function.
1142 LJFOLD(UREFO KGC any
)
1143 LJFOLD(UREFC KGC any
)
1146 if (LJ_LIKELY(J
->flags
& JIT_F_OPT_CSE
)) {
1147 IRRef ref
= J
->chain
[fins
->o
];
1148 GCfunc
*fn
= ir_kfunc(fleft
);
1149 GCupval
*uv
= gco2uv(gcref(fn
->l
.uvptr
[fins
->op2
]));
1151 IRIns
*ir
= IR(ref
);
1152 if (irref_isk(ir
->op1
)) {
1153 GCfunc
*fn2
= ir_kfunc(IR(ir
->op1
));
1154 if (gco2uv(gcref(fn2
->l
.uvptr
[ir
->op2
])) == uv
) {
1155 if (fins
->o
== IR_UREFO
&& gcstep_barrier(J
, ref
))
1166 /* We can safely FOLD/CSE array/hash refs and field loads, since there
1167 ** are no corresponding stores. But NEWREF may invalidate all of them.
1168 ** Lacking better disambiguation for table references, these optimizations
1169 ** are simply disabled across any NEWREF.
1170 ** Only HREF needs the NEWREF check -- AREF and HREFK already depend on
1171 ** FLOADs. And NEWREF itself is treated like a store (see below).
1173 LJFOLD(HREF any any
)
1176 TRef tr
= lj_opt_cse(J
);
1177 return tref_ref(tr
) < J
->chain
[IR_NEWREF
] ? EMITFOLD
: tr
;
1180 LJFOLD(FLOAD TNEW IRFL_TAB_ASIZE
)
1181 LJFOLDF(fload_tab_tnew_asize
)
1183 if (LJ_LIKELY(J
->flags
& JIT_F_OPT_FOLD
) && fins
->op1
> J
->chain
[IR_NEWREF
])
1184 return INTFOLD(fleft
->op1
);
1188 LJFOLD(FLOAD TNEW IRFL_TAB_HMASK
)
1189 LJFOLDF(fload_tab_tnew_hmask
)
1191 if (LJ_LIKELY(J
->flags
& JIT_F_OPT_FOLD
) && fins
->op1
> J
->chain
[IR_NEWREF
])
1192 return INTFOLD((1 << fleft
->op2
)-1);
1196 LJFOLD(FLOAD TDUP IRFL_TAB_ASIZE
)
1197 LJFOLDF(fload_tab_tdup_asize
)
1199 if (LJ_LIKELY(J
->flags
& JIT_F_OPT_FOLD
) && fins
->op1
> J
->chain
[IR_NEWREF
])
1200 return INTFOLD((int32_t)ir_ktab(IR(fleft
->op1
))->asize
);
1204 LJFOLD(FLOAD TDUP IRFL_TAB_HMASK
)
1205 LJFOLDF(fload_tab_tdup_hmask
)
1207 if (LJ_LIKELY(J
->flags
& JIT_F_OPT_FOLD
) && fins
->op1
> J
->chain
[IR_NEWREF
])
1208 return INTFOLD((int32_t)ir_ktab(IR(fleft
->op1
))->hmask
);
1212 LJFOLD(FLOAD any IRFL_TAB_ARRAY
)
1213 LJFOLD(FLOAD any IRFL_TAB_NODE
)
1214 LJFOLD(FLOAD any IRFL_TAB_ASIZE
)
1215 LJFOLD(FLOAD any IRFL_TAB_HMASK
)
1216 LJFOLDF(fload_tab_ah
)
1218 TRef tr
= lj_opt_cse(J
);
1219 return tref_ref(tr
) < J
->chain
[IR_NEWREF
] ? EMITFOLD
: tr
;
1222 /* Strings are immutable, so we can safely FOLD/CSE the related FLOAD. */
1223 LJFOLD(FLOAD KGC IRFL_STR_LEN
)
1224 LJFOLDF(fload_str_len_kgc
)
1226 if (LJ_LIKELY(J
->flags
& JIT_F_OPT_FOLD
))
1227 return INTFOLD((int32_t)ir_kstr(fleft
)->len
);
1231 LJFOLD(FLOAD SNEW IRFL_STR_LEN
)
1232 LJFOLDF(fload_str_len_snew
)
1234 if (LJ_LIKELY(J
->flags
& JIT_F_OPT_FOLD
)) {
1241 LJFOLD(FLOAD any IRFL_STR_LEN
)
1244 /* All other field loads need alias analysis. */
1245 LJFOLD(FLOAD any any
)
1246 LJFOLDX(lj_opt_fwd_fload
)
1248 /* This is for LOOP only. Recording handles SLOADs internally. */
1249 LJFOLD(SLOAD any any
)
1252 lua_assert(J
->slot
[fins
->op1
] != 0);
1253 return J
->slot
[fins
->op1
];
1256 LJFOLD(XLOAD KPTR any
)
1259 /* Only fold read-only integer loads for now. */
1260 if ((fins
->op2
& IRXLOAD_READONLY
) && irt_isinteger(fins
->t
))
1261 return INTFOLD(kfold_xload(fins
, ir_kptr(fleft
)));
1265 /* CSE for XLOAD depends on the type, but not on the IRXLOAD_* flags. */
1266 LJFOLD(XLOAD any any
)
1269 IRRef ref
= J
->chain
[IR_XLOAD
];
1270 IRRef op1
= fins
->op1
;
1272 if (IR(ref
)->op1
== op1
&& irt_sametype(IR(ref
)->t
, fins
->t
))
1274 ref
= IR(ref
)->prev
;
1279 /* -- Write barriers ------------------------------------------------------ */
1281 /* Write barriers are amenable to CSE, but not across any incremental
1284 ** The same logic applies to open upvalue references, because the stack
1285 ** may be resized during a GC step.
1288 LJFOLD(OBAR any any
)
1289 LJFOLD(UREFO any any
)
1290 LJFOLDF(barrier_tab
)
1292 TRef tr
= lj_opt_cse(J
);
1293 if (gcstep_barrier(J
, tref_ref(tr
))) /* CSE across GC step? */
1294 return EMITFOLD
; /* Raw emit. Assumes fins is left intact by CSE. */
1300 LJFOLDF(barrier_tnew_tdup
)
1302 /* New tables are always white and never need a barrier. */
1303 if (fins
->op1
< J
->chain
[IR_LOOP
]) /* Except across a GC step. */
1308 /* -- Stores and allocations ---------------------------------------------- */
1310 /* Stores and allocations cannot be folded or passed on to CSE in general.
1311 ** But some stores can be eliminated with dead-store elimination (DSE).
1313 ** Caveat: *all* stores and allocs must be listed here or they end up at CSE!
1316 LJFOLD(ASTORE any any
)
1317 LJFOLD(HSTORE any any
)
1318 LJFOLDX(lj_opt_dse_ahstore
)
1320 LJFOLD(USTORE any any
)
1321 LJFOLDX(lj_opt_dse_ustore
)
1323 LJFOLD(FSTORE any any
)
1324 LJFOLDX(lj_opt_dse_fstore
)
1326 LJFOLD(NEWREF any any
) /* Treated like a store. */
1327 LJFOLD(CALLS any any
)
1328 LJFOLD(CALLL any any
) /* Safeguard fallback. */
1329 LJFOLD(TNEW any any
)
1333 /* ------------------------------------------------------------------------ */
1335 /* Every entry in the generated hash table is a 32 bit pattern:
1337 ** xxxxxxxx iiiiiiii llllllll rrrrrrrr
1339 ** xxxxxxxx = 8 bit index into fold function table
1340 ** iiiiiiii = 8 bit folded instruction opcode
1341 ** llllllll = 8 bit left instruction opcode
1342 ** rrrrrrrr = 8 bit right instruction opcode or 8 bits from literal field
1345 #include "lj_folddef.h"
1347 /* ------------------------------------------------------------------------ */
1349 /* Fold IR instruction. */
1350 TRef LJ_FASTCALL
lj_opt_fold(jit_State
*J
)
1355 if (LJ_UNLIKELY((J
->flags
& JIT_F_OPT_MASK
) != JIT_F_OPT_DEFAULT
)) {
1356 lua_assert(((JIT_F_OPT_FOLD
|JIT_F_OPT_FWD
|JIT_F_OPT_CSE
|JIT_F_OPT_DSE
) |
1357 JIT_F_OPT_DEFAULT
) == JIT_F_OPT_DEFAULT
);
1358 /* Folding disabled? Chain to CSE, but not for loads/stores/allocs. */
1359 if (!(J
->flags
& JIT_F_OPT_FOLD
) && irm_kind(lj_ir_mode
[fins
->o
]) == IRM_N
)
1360 return lj_opt_cse(J
);
1362 /* Forwarding or CSE disabled? Emit raw IR for loads, except for SLOAD. */
1363 if ((J
->flags
& (JIT_F_OPT_FWD
|JIT_F_OPT_CSE
)) !=
1364 (JIT_F_OPT_FWD
|JIT_F_OPT_CSE
) &&
1365 irm_kind(lj_ir_mode
[fins
->o
]) == IRM_L
&& fins
->o
!= IR_SLOAD
)
1366 return lj_ir_emit(J
);
1368 /* DSE disabled? Emit raw IR for stores. */
1369 if (!(J
->flags
& JIT_F_OPT_DSE
) && irm_kind(lj_ir_mode
[fins
->o
]) == IRM_S
)
1370 return lj_ir_emit(J
);
1373 /* Fold engine start/retry point. */
1375 /* Construct key from opcode and operand opcodes (unless literal/none). */
1376 key
= ((uint32_t)fins
->o
<< 16);
1377 if (fins
->op1
>= J
->cur
.nk
) {
1378 key
+= (uint32_t)IR(fins
->op1
)->o
<< 8;
1379 *fleft
= *IR(fins
->op1
);
1381 if (fins
->op2
>= J
->cur
.nk
) {
1382 key
+= (uint32_t)IR(fins
->op2
)->o
;
1383 *fright
= *IR(fins
->op2
);
1385 key
+= (fins
->op2
& 0xffu
); /* For IRFPM_* and IRFL_*. */
1388 /* Check for a match in order from most specific to least specific. */
1391 uint32_t k
= key
| any
;
1392 uint32_t h
= fold_hashkey(k
);
1393 uint32_t fh
= fold_hash
[h
]; /* Lookup key in semi-perfect hash table. */
1394 if ((fh
& 0xffffff) == k
|| (fh
= fold_hash
[h
+1], (fh
& 0xffffff) == k
)) {
1395 ref
= (IRRef
)tref_ref(fold_func
[fh
>> 24](J
));
1396 if (ref
!= NEXTFOLD
)
1399 if (any
== 0xffff) /* Exhausted folding. Pass on to CSE. */
1400 return lj_opt_cse(J
);
1401 any
= (any
| (any
>> 8)) ^ 0xff00;
1404 /* Return value processing, ordered by frequency. */
1405 if (LJ_LIKELY(ref
>= MAX_FOLD
))
1406 return TREF(ref
, irt_t(IR(ref
)->t
));
1407 if (ref
== RETRYFOLD
)
1409 if (ref
== KINTFOLD
)
1410 return lj_ir_kint(J
, fins
->i
);
1411 if (ref
== FAILFOLD
)
1412 lj_trace_err(J
, LJ_TRERR_GFAIL
);
1413 lua_assert(ref
== DROPFOLD
);
1417 /* -- Common-Subexpression Elimination ------------------------------------ */
1419 /* CSE an IR instruction. This is very fast due to the skip-list chains. */
1420 TRef LJ_FASTCALL
lj_opt_cse(jit_State
*J
)
1422 /* Avoid narrow to wide store-to-load forwarding stall */
1423 IRRef2 op12
= (IRRef2
)fins
->op1
+ ((IRRef2
)fins
->op2
<< 16);
1425 if (LJ_LIKELY(J
->flags
& JIT_F_OPT_CSE
)) {
1426 /* Limited search for same operands in per-opcode chain. */
1427 IRRef ref
= J
->chain
[op
];
1428 IRRef lim
= fins
->op1
;
1429 if (fins
->op2
> lim
) lim
= fins
->op2
; /* Relies on lit < REF_BIAS. */
1431 if (IR(ref
)->op12
== op12
)
1432 return TREF(ref
, irt_t(IR(ref
)->t
)); /* Common subexpression found. */
1433 ref
= IR(ref
)->prev
;
1436 /* Otherwise emit IR (inlined for speed). */
1438 IRRef ref
= lj_ir_nextins(J
);
1439 IRIns
*ir
= IR(ref
);
1440 ir
->prev
= J
->chain
[op
];
1442 J
->chain
[op
] = (IRRef1
)ref
;
1444 J
->guardemit
.irt
|= fins
->t
.irt
;
1445 return TREF(ref
, irt_t((ir
->t
= fins
->t
)));
1449 /* CSE with explicit search limit. */
1450 TRef LJ_FASTCALL
lj_opt_cselim(jit_State
*J
, IRRef lim
)
1452 IRRef ref
= J
->chain
[fins
->o
];
1453 IRRef2 op12
= (IRRef2
)fins
->op1
+ ((IRRef2
)fins
->op2
<< 16);
1455 if (IR(ref
)->op12
== op12
)
1457 ref
= IR(ref
)->prev
;
1459 return lj_ir_emit(J
);
1462 /* ------------------------------------------------------------------------ */