2 ** FOLD: Constant Folding, Algebraic Simplifications and Reassociation.
3 ** ABCelim: Array Bounds Check Elimination.
4 ** CSE: Common-Subexpression Elimination.
5 ** Copyright (C) 2005-2011 Mike Pall. See Copyright Notice in luajit.h
23 #include "lj_carith.h"
26 /* Here's a short description how the FOLD engine processes instructions:
28 ** The FOLD engine receives a single instruction stored in fins (J->fold.ins).
29 ** The instruction and its operands are used to select matching fold rules.
30 ** These are applied iteratively until a fixed point is reached.
32 ** The 8 bit opcode of the instruction itself plus the opcodes of the
33 ** two instructions referenced by its operands form a 24 bit key
34 ** 'ins left right' (unused operands -> 0, literals -> lowest 8 bits).
36 ** This key is used for partial matching against the fold rules. The
37 ** left/right operand fields of the key are successively masked with
38 ** the 'any' wildcard, from most specific to least specific:
45 ** The masked key is used to lookup a matching fold rule in a semi-perfect
46 ** hash table. If a matching rule is found, the related fold function is run.
47 ** Multiple rules can share the same fold function. A fold rule may return
48 ** one of several special values:
50 ** - NEXTFOLD means no folding was applied, because an additional test
51 ** inside the fold function failed. Matching continues against less
52 ** specific fold rules. Finally the instruction is passed on to CSE.
54 ** - RETRYFOLD means the instruction was modified in-place. Folding is
55 ** retried as if this instruction had just been received.
57 ** All other return values are terminal actions -- no further folding is
60 ** - INTFOLD(i) returns a reference to the integer constant i.
62 ** - LEFTFOLD and RIGHTFOLD return the left/right operand reference
63 ** without emitting an instruction.
65 ** - CSEFOLD and EMITFOLD pass the instruction directly to CSE or emit
66 ** it without passing through any further optimizations.
68 ** - FAILFOLD, DROPFOLD and CONDFOLD only apply to instructions which have
69 ** no result (e.g. guarded assertions): FAILFOLD means the guard would
70 ** always fail, i.e. the current trace is pointless. DROPFOLD means
71 ** the guard is always true and has been eliminated. CONDFOLD is a
72 ** shortcut for FAILFOLD + cond (i.e. drop if true, otherwise fail).
74 ** - Any other return value is interpreted as an IRRef or TRef. This
75 ** can be a reference to an existing or a newly created instruction.
76 ** Only the least-significant 16 bits (IRRef1) are used to form a TRef
77 ** which is finally returned to the caller.
79 ** The FOLD engine receives instructions both from the trace recorder and
80 ** substituted instructions from LOOP unrolling. This means all types
81 ** of instructions may end up here, even though the recorder bypasses
82 ** FOLD in some cases. Thus all loads, stores and allocations must have
83 ** an any/any rule to avoid being passed on to CSE.
85 ** Carefully read the following requirements before adding or modifying
88 ** Requirement #1: All fold rules must preserve their destination type.
90 ** Consistently use INTFOLD() (KINT result) or lj_ir_knum() (KNUM result).
91 ** Never use lj_ir_knumint() which can have either a KINT or KNUM result.
93 ** Requirement #2: Fold rules should not create *new* instructions which
94 ** reference operands *across* PHIs.
96 ** E.g. a RETRYFOLD with 'fins->op1 = fleft->op1' is invalid if the
97 ** left operand is a PHI. Then fleft->op1 would point across the PHI
98 ** frontier to an invariant instruction. Adding a PHI for this instruction
99 ** would be counterproductive. The solution is to add a barrier which
100 ** prevents folding across PHIs, i.e. 'PHIBARRIER(fleft)' in this case.
101 ** The only exception is for recurrences with high latencies like
102 ** repeated int->num->int conversions.
104 ** One could relax this condition a bit if the referenced instruction is
105 ** a PHI, too. But this often leads to worse code due to excessive
106 ** register shuffling.
108 ** Note: returning *existing* instructions (e.g. LEFTFOLD) is ok, though.
109 ** Even returning fleft->op1 would be ok, because a new PHI will added,
110 ** if needed. But again, this leads to excessive register shuffling and
111 ** should be avoided.
113 ** Requirement #3: The set of all fold rules must be monotonic to guarantee
116 ** The goal is optimization, so one primarily wants to add strength-reducing
117 ** rules. This means eliminating an instruction or replacing an instruction
118 ** with one or more simpler instructions. Don't add fold rules which point
119 ** into the other direction.
121 ** Some rules (like commutativity) do not directly reduce the strength of
122 ** an instruction, but enable other fold rules (e.g. by moving constants
123 ** to the right operand). These rules must be made unidirectional to avoid
126 ** Rule of thumb: the trace recorder expands the IR and FOLD shrinks it.
129 /* Some local macros to save typing. Undef'd at the end. */
130 #define IR(ref) (&J->cur.ir[(ref)])
131 #define fins (&J->fold.ins)
132 #define fleft (&J->fold.left)
133 #define fright (&J->fold.right)
134 #define knumleft (ir_knum(fleft)->n)
135 #define knumright (ir_knum(fright)->n)
137 /* Pass IR on to next optimization in chain (FOLD). */
138 #define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
140 /* Fold function type. Fastcall on x86 significantly reduces their size. */
141 typedef IRRef (LJ_FASTCALL
*FoldFunc
)(jit_State
*J
);
143 /* Macros for the fold specs, so buildvm can recognize them. */
146 #define LJFOLDF(name) static TRef LJ_FASTCALL fold_##name(jit_State *J)
147 /* Note: They must be at the start of a line or buildvm ignores them! */
149 /* Barrier to prevent using operands across PHIs. */
150 #define PHIBARRIER(ir) if (irt_isphi((ir)->t)) return NEXTFOLD
152 /* Barrier to prevent folding across a GC step.
153 ** GC steps can only happen at the head of a trace and at LOOP.
154 ** And the GC is only driven forward if there is at least one allocation.
156 #define gcstep_barrier(J, ref) \
157 ((ref) < J->chain[IR_LOOP] && \
158 (J->chain[IR_SNEW] || J->chain[IR_XSNEW] || \
159 J->chain[IR_TNEW] || J->chain[IR_TDUP] || \
160 J->chain[IR_CNEW] || J->chain[IR_CNEWI] || J->chain[IR_TOSTR]))
162 /* -- Constant folding for FP numbers ------------------------------------- */
164 LJFOLD(ADD KNUM KNUM
)
165 LJFOLD(SUB KNUM KNUM
)
166 LJFOLD(MUL KNUM KNUM
)
167 LJFOLD(DIV KNUM KNUM
)
168 LJFOLD(NEG KNUM KNUM
)
169 LJFOLD(ABS KNUM KNUM
)
170 LJFOLD(ATAN2 KNUM KNUM
)
171 LJFOLD(LDEXP KNUM KNUM
)
172 LJFOLD(MIN KNUM KNUM
)
173 LJFOLD(MAX KNUM KNUM
)
174 LJFOLDF(kfold_numarith
)
176 lua_Number a
= knumleft
;
177 lua_Number b
= knumright
;
178 lua_Number y
= lj_vm_foldarith(a
, b
, fins
->o
- IR_ADD
);
179 return lj_ir_knum(J
, y
);
182 LJFOLD(LDEXP KNUM KINT
)
185 #if LJ_TARGET_X86ORX64
189 return lj_ir_knum(J
, ldexp(knumleft
, fright
->i
));
193 LJFOLD(FPMATH KNUM any
)
194 LJFOLDF(kfold_fpmath
)
196 lua_Number a
= knumleft
;
197 lua_Number y
= lj_vm_foldfpm(a
, fins
->op2
);
198 return lj_ir_knum(J
, y
);
201 LJFOLD(POW KNUM KINT
)
202 LJFOLDF(kfold_numpow
)
204 lua_Number a
= knumleft
;
205 lua_Number b
= (lua_Number
)fright
->i
;
206 lua_Number y
= lj_vm_foldarith(a
, b
, IR_POW
- IR_ADD
);
207 return lj_ir_knum(J
, y
);
210 /* Must not use kfold_kref for numbers (could be NaN). */
217 LJFOLD(ULT KNUM KNUM
)
218 LJFOLD(UGE KNUM KNUM
)
219 LJFOLD(ULE KNUM KNUM
)
220 LJFOLD(UGT KNUM KNUM
)
221 LJFOLDF(kfold_numcomp
)
223 return CONDFOLD(lj_ir_numcmp(knumleft
, knumright
, (IROp
)fins
->o
));
226 /* -- Constant folding for 32 bit integers -------------------------------- */
228 static int32_t kfold_intop(int32_t k1
, int32_t k2
, IROp op
)
231 case IR_ADD
: k1
+= k2
; break;
232 case IR_SUB
: k1
-= k2
; break;
233 case IR_MUL
: k1
*= k2
; break;
234 case IR_MOD
: k1
= lj_vm_modi(k1
, k2
); break;
235 case IR_NEG
: k1
= -k1
; break;
236 case IR_BAND
: k1
&= k2
; break;
237 case IR_BOR
: k1
|= k2
; break;
238 case IR_BXOR
: k1
^= k2
; break;
239 case IR_BSHL
: k1
<<= (k2
& 31); break;
240 case IR_BSHR
: k1
= (int32_t)((uint32_t)k1
>> (k2
& 31)); break;
241 case IR_BSAR
: k1
>>= (k2
& 31); break;
242 case IR_BROL
: k1
= (int32_t)lj_rol((uint32_t)k1
, (k2
& 31)); break;
243 case IR_BROR
: k1
= (int32_t)lj_ror((uint32_t)k1
, (k2
& 31)); break;
244 case IR_MIN
: k1
= k1
< k2
? k1
: k2
; break;
245 case IR_MAX
: k1
= k1
> k2
? k1
: k2
; break;
246 default: lua_assert(0); break;
251 LJFOLD(ADD KINT KINT
)
252 LJFOLD(SUB KINT KINT
)
253 LJFOLD(MUL KINT KINT
)
254 LJFOLD(MOD KINT KINT
)
255 LJFOLD(NEG KINT KINT
)
256 LJFOLD(BAND KINT KINT
)
257 LJFOLD(BOR KINT KINT
)
258 LJFOLD(BXOR KINT KINT
)
259 LJFOLD(BSHL KINT KINT
)
260 LJFOLD(BSHR KINT KINT
)
261 LJFOLD(BSAR KINT KINT
)
262 LJFOLD(BROL KINT KINT
)
263 LJFOLD(BROR KINT KINT
)
264 LJFOLD(MIN KINT KINT
)
265 LJFOLD(MAX KINT KINT
)
266 LJFOLDF(kfold_intarith
)
268 return INTFOLD(kfold_intop(fleft
->i
, fright
->i
, (IROp
)fins
->o
));
271 LJFOLD(ADDOV KINT KINT
)
272 LJFOLD(SUBOV KINT KINT
)
273 LJFOLD(MULOV KINT KINT
)
274 LJFOLDF(kfold_intovarith
)
276 lua_Number n
= lj_vm_foldarith((lua_Number
)fleft
->i
, (lua_Number
)fright
->i
,
278 int32_t k
= lj_num2int(n
);
279 if (n
!= (lua_Number
)k
)
287 return INTFOLD(~fleft
->i
);
293 return INTFOLD((int32_t)lj_bswap((uint32_t)fleft
->i
));
300 LJFOLD(ULT KINT KINT
)
301 LJFOLD(UGE KINT KINT
)
302 LJFOLD(ULE KINT KINT
)
303 LJFOLD(UGT KINT KINT
)
304 LJFOLD(ABC KINT KINT
)
305 LJFOLDF(kfold_intcomp
)
307 int32_t a
= fleft
->i
, b
= fright
->i
;
308 switch ((IROp
)fins
->o
) {
309 case IR_LT
: return CONDFOLD(a
< b
);
310 case IR_GE
: return CONDFOLD(a
>= b
);
311 case IR_LE
: return CONDFOLD(a
<= b
);
312 case IR_GT
: return CONDFOLD(a
> b
);
313 case IR_ULT
: return CONDFOLD((uint32_t)a
< (uint32_t)b
);
314 case IR_UGE
: return CONDFOLD((uint32_t)a
>= (uint32_t)b
);
315 case IR_ULE
: return CONDFOLD((uint32_t)a
<= (uint32_t)b
);
317 case IR_UGT
: return CONDFOLD((uint32_t)a
> (uint32_t)b
);
318 default: lua_assert(0); return FAILFOLD
;
323 LJFOLDF(kfold_intcomp0
)
330 /* -- Constant folding for 64 bit integers -------------------------------- */
332 static uint64_t kfold_int64arith(uint64_t k1
, uint64_t k2
, IROp op
)
335 #if LJ_64 || LJ_HASFFI
336 case IR_ADD
: k1
+= k2
; break;
337 case IR_SUB
: k1
-= k2
; break;
340 case IR_MUL
: k1
*= k2
; break;
341 case IR_BAND
: k1
&= k2
; break;
342 case IR_BOR
: k1
|= k2
; break;
343 case IR_BXOR
: k1
^= k2
; break;
345 default: UNUSED(k2
); lua_assert(0); break;
350 LJFOLD(ADD KINT64 KINT64
)
351 LJFOLD(SUB KINT64 KINT64
)
352 LJFOLD(MUL KINT64 KINT64
)
353 LJFOLD(BAND KINT64 KINT64
)
354 LJFOLD(BOR KINT64 KINT64
)
355 LJFOLD(BXOR KINT64 KINT64
)
356 LJFOLDF(kfold_int64arith
)
358 return INT64FOLD(kfold_int64arith(ir_k64(fleft
)->u64
,
359 ir_k64(fright
)->u64
, (IROp
)fins
->o
));
362 LJFOLD(DIV KINT64 KINT64
)
363 LJFOLD(MOD KINT64 KINT64
)
364 LJFOLD(POW KINT64 KINT64
)
365 LJFOLDF(kfold_int64arith2
)
368 uint64_t k1
= ir_k64(fleft
)->u64
, k2
= ir_k64(fright
)->u64
;
369 if (irt_isi64(fins
->t
)) {
370 k1
= fins
->o
== IR_DIV
? lj_carith_divi64((int64_t)k1
, (int64_t)k2
) :
371 fins
->o
== IR_MOD
? lj_carith_modi64((int64_t)k1
, (int64_t)k2
) :
372 lj_carith_powi64((int64_t)k1
, (int64_t)k2
);
374 k1
= fins
->o
== IR_DIV
? lj_carith_divu64(k1
, k2
) :
375 fins
->o
== IR_MOD
? lj_carith_modu64(k1
, k2
) :
376 lj_carith_powu64(k1
, k2
);
378 return INT64FOLD(k1
);
380 UNUSED(J
); lua_assert(0); return FAILFOLD
;
384 LJFOLD(BSHL KINT64 KINT
)
385 LJFOLD(BSHR KINT64 KINT
)
386 LJFOLD(BSAR KINT64 KINT
)
387 LJFOLD(BROL KINT64 KINT
)
388 LJFOLD(BROR KINT64 KINT
)
389 LJFOLDF(kfold_int64shift
)
391 #if LJ_HASFFI || LJ_64
392 uint64_t k
= ir_k64(fleft
)->u64
;
393 int32_t sh
= (fright
->i
& 63);
394 switch ((IROp
)fins
->o
) {
395 case IR_BSHL
: k
<<= sh
; break;
397 case IR_BSHR
: k
>>= sh
; break;
398 case IR_BSAR
: k
= (uint64_t)((int64_t)k
>> sh
); break;
399 case IR_BROL
: k
= lj_rol(k
, sh
); break;
400 case IR_BROR
: k
= lj_ror(k
, sh
); break;
402 default: lua_assert(0); break;
406 UNUSED(J
); lua_assert(0); return FAILFOLD
;
411 LJFOLDF(kfold_bnot64
)
414 return INT64FOLD(~ir_k64(fleft
)->u64
);
416 UNUSED(J
); lua_assert(0); return FAILFOLD
;
421 LJFOLDF(kfold_bswap64
)
424 return INT64FOLD(lj_bswap64(ir_k64(fleft
)->u64
));
426 UNUSED(J
); lua_assert(0); return FAILFOLD
;
430 LJFOLD(LT KINT64 KINT
)
431 LJFOLD(GE KINT64 KINT
)
432 LJFOLD(LE KINT64 KINT
)
433 LJFOLD(GT KINT64 KINT
)
434 LJFOLD(ULT KINT64 KINT
)
435 LJFOLD(UGE KINT64 KINT
)
436 LJFOLD(ULE KINT64 KINT
)
437 LJFOLD(UGT KINT64 KINT
)
438 LJFOLDF(kfold_int64comp
)
441 uint64_t a
= ir_k64(fleft
)->u64
, b
= ir_k64(fright
)->u64
;
442 switch ((IROp
)fins
->o
) {
443 case IR_LT
: return CONDFOLD(a
< b
);
444 case IR_GE
: return CONDFOLD(a
>= b
);
445 case IR_LE
: return CONDFOLD(a
<= b
);
446 case IR_GT
: return CONDFOLD(a
> b
);
447 case IR_ULT
: return CONDFOLD((uint64_t)a
< (uint64_t)b
);
448 case IR_UGE
: return CONDFOLD((uint64_t)a
>= (uint64_t)b
);
449 case IR_ULE
: return CONDFOLD((uint64_t)a
<= (uint64_t)b
);
450 case IR_UGT
: return CONDFOLD((uint64_t)a
> (uint64_t)b
);
451 default: lua_assert(0); return FAILFOLD
;
454 UNUSED(J
); lua_assert(0); return FAILFOLD
;
458 LJFOLD(UGE any KINT64
)
459 LJFOLDF(kfold_int64comp0
)
462 if (ir_k64(fright
)->u64
== 0)
466 UNUSED(J
); lua_assert(0); return FAILFOLD
;
470 /* -- Constant folding for strings ---------------------------------------- */
472 LJFOLD(SNEW KKPTR KINT
)
473 LJFOLDF(kfold_snew_kptr
)
475 GCstr
*s
= lj_str_new(J
->L
, (const char *)ir_kptr(fleft
), (size_t)fright
->i
);
476 return lj_ir_kstr(J
, s
);
479 LJFOLD(SNEW any KINT
)
480 LJFOLDF(kfold_snew_empty
)
483 return lj_ir_kstr(J
, &J2G(J
)->strempty
);
487 LJFOLD(STRREF KGC KINT
)
488 LJFOLDF(kfold_strref
)
490 GCstr
*str
= ir_kstr(fleft
);
491 lua_assert((MSize
)fright
->i
< str
->len
);
492 return lj_ir_kkptr(J
, (char *)strdata(str
) + fright
->i
);
495 LJFOLD(STRREF SNEW any
)
496 LJFOLDF(kfold_strref_snew
)
499 if (irref_isk(fins
->op2
) && fright
->i
== 0) {
500 return fleft
->op1
; /* strref(snew(ptr, len), 0) ==> ptr */
502 /* Reassociate: strref(snew(strref(str, a), len), b) ==> strref(str, a+b) */
503 IRIns
*ir
= IR(fleft
->op1
);
504 IRRef1 str
= ir
->op1
; /* IRIns * is not valid across emitir. */
505 lua_assert(ir
->o
== IR_STRREF
);
507 fins
->op2
= emitir(IRTI(IR_ADD
), ir
->op2
, fins
->op2
); /* Clobbers fins! */
509 fins
->ot
= IRT(IR_STRREF
, IRT_P32
);
515 LJFOLD(CALLN CARG IRCALL_lj_str_cmp
)
516 LJFOLDF(kfold_strcmp
)
518 if (irref_isk(fleft
->op1
) && irref_isk(fleft
->op2
)) {
519 GCstr
*a
= ir_kstr(IR(fleft
->op1
));
520 GCstr
*b
= ir_kstr(IR(fleft
->op2
));
521 return INTFOLD(lj_str_cmp(a
, b
));
526 /* -- Constant folding of pointer arithmetic ------------------------------ */
529 LJFOLD(ADD KGC KINT64
)
530 LJFOLDF(kfold_add_kgc
)
532 GCobj
*o
= ir_kgc(fleft
);
534 ptrdiff_t ofs
= (ptrdiff_t)ir_kint64(fright
)->u64
;
536 ptrdiff_t ofs
= fright
->i
;
538 return lj_ir_kkptr(J
, (char *)o
+ ofs
);
541 LJFOLD(ADD KPTR KINT
)
542 LJFOLD(ADD KPTR KINT64
)
543 LJFOLD(ADD KKPTR KINT
)
544 LJFOLD(ADD KKPTR KINT64
)
545 LJFOLDF(kfold_add_kptr
)
547 void *p
= ir_kptr(fleft
);
549 ptrdiff_t ofs
= (ptrdiff_t)ir_kint64(fright
)->u64
;
551 ptrdiff_t ofs
= fright
->i
;
553 return lj_ir_kptr_(J
, fleft
->o
, (char *)p
+ ofs
);
556 /* -- Constant folding of conversions ------------------------------------- */
558 LJFOLD(TOBIT KNUM KNUM
)
561 return INTFOLD(lj_num2bit(knumleft
));
564 LJFOLD(CONV KINT IRCONV_NUM_INT
)
565 LJFOLDF(kfold_conv_kint_num
)
567 return lj_ir_knum(J
, (lua_Number
)fleft
->i
);
570 LJFOLD(CONV KINT IRCONV_NUM_U32
)
571 LJFOLDF(kfold_conv_kintu32_num
)
573 return lj_ir_knum(J
, (lua_Number
)(uint32_t)fleft
->i
);
576 LJFOLD(CONV KINT IRCONV_I64_INT
)
577 LJFOLD(CONV KINT IRCONV_U64_INT
)
578 LJFOLDF(kfold_conv_kint_i64
)
580 if ((fins
->op2
& IRCONV_SEXT
))
581 return INT64FOLD((uint64_t)(int64_t)fleft
->i
);
583 return INT64FOLD((uint64_t)(int64_t)(uint32_t)fleft
->i
);
586 LJFOLD(CONV KINT64 IRCONV_NUM_I64
)
587 LJFOLDF(kfold_conv_kint64_num_i64
)
589 return lj_ir_knum(J
, (lua_Number
)(int64_t)ir_kint64(fleft
)->u64
);
592 LJFOLD(CONV KINT64 IRCONV_NUM_U64
)
593 LJFOLDF(kfold_conv_kint64_num_u64
)
595 return lj_ir_knum(J
, (lua_Number
)ir_kint64(fleft
)->u64
);
598 LJFOLD(CONV KINT64 IRCONV_INT_I64
)
599 LJFOLD(CONV KINT64 IRCONV_U32_I64
)
600 LJFOLDF(kfold_conv_kint64_int_i64
)
602 return INTFOLD((int32_t)ir_kint64(fleft
)->u64
);
605 LJFOLD(CONV KNUM IRCONV_INT_NUM
)
606 LJFOLDF(kfold_conv_knum_int_num
)
608 lua_Number n
= knumleft
;
609 if (!(fins
->op2
& IRCONV_TRUNC
)) {
610 int32_t k
= lj_num2int(n
);
611 if (irt_isguard(fins
->t
) && n
!= (lua_Number
)k
) {
612 /* We're about to create a guard which always fails, like CONV +1.5.
613 ** Some pathological loops cause this during LICM, e.g.:
614 ** local x,k,t = 0,1.5,{1,[1.5]=2}
615 ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end
622 return INTFOLD((int32_t)n
);
626 LJFOLD(CONV KNUM IRCONV_U32_NUM
)
627 LJFOLDF(kfold_conv_knum_u32_num
)
629 lua_assert((fins
->op2
& IRCONV_TRUNC
));
630 return INTFOLD((int32_t)(uint32_t)knumleft
);
633 LJFOLD(CONV KNUM IRCONV_I64_NUM
)
634 LJFOLDF(kfold_conv_knum_i64_num
)
636 lua_assert((fins
->op2
& IRCONV_TRUNC
));
637 return INT64FOLD((uint64_t)(int64_t)knumleft
);
640 LJFOLD(CONV KNUM IRCONV_U64_NUM
)
641 LJFOLDF(kfold_conv_knum_u64_num
)
643 lua_assert((fins
->op2
& IRCONV_TRUNC
));
644 return INT64FOLD(lj_num2u64(knumleft
));
648 LJFOLDF(kfold_tostr_knum
)
650 return lj_ir_kstr(J
, lj_str_fromnum(J
->L
, &knumleft
));
654 LJFOLDF(kfold_tostr_kint
)
656 return lj_ir_kstr(J
, lj_str_fromint(J
->L
, fleft
->i
));
663 if (lj_str_tonum(ir_kstr(fleft
), &n
))
664 return lj_ir_knum(J
, numV(&n
));
668 /* -- Constant folding of equality checks --------------------------------- */
670 /* Don't constant-fold away FLOAD checks against KNULL. */
671 LJFOLD(EQ FLOAD KNULL
)
672 LJFOLD(NE FLOAD KNULL
)
675 /* But fold all other KNULL compares, since only KNULL is equal to KNULL. */
680 LJFOLD(EQ KINT KINT
) /* Constants are unique, so same refs <==> same value. */
682 LJFOLD(EQ KINT64 KINT64
)
683 LJFOLD(NE KINT64 KINT64
)
688 return CONDFOLD((fins
->op1
== fins
->op2
) ^ (fins
->o
== IR_NE
));
691 /* -- Algebraic shortcuts ------------------------------------------------- */
693 LJFOLD(FPMATH FPMATH IRFPM_FLOOR
)
694 LJFOLD(FPMATH FPMATH IRFPM_CEIL
)
695 LJFOLD(FPMATH FPMATH IRFPM_TRUNC
)
696 LJFOLDF(shortcut_round
)
698 IRFPMathOp op
= (IRFPMathOp
)fleft
->op2
;
699 if (op
== IRFPM_FLOOR
|| op
== IRFPM_CEIL
|| op
== IRFPM_TRUNC
)
700 return LEFTFOLD
; /* round(round_left(x)) = round_left(x) */
705 LJFOLDF(shortcut_left
)
707 return LEFTFOLD
; /* f(g(x)) ==> g(x) */
711 LJFOLDF(shortcut_dropleft
)
714 fins
->op1
= fleft
->op1
; /* abs(neg(x)) ==> abs(x) */
718 /* Note: no safe shortcuts with STRTO and TOSTR ("1e2" ==> +100 ==> "100"). */
722 LJFOLDF(shortcut_leftleft
)
724 PHIBARRIER(fleft
); /* See above. Fold would be ok, but not beneficial. */
725 return fleft
->op1
; /* f(g(x)) ==> x */
728 /* -- FP algebraic simplifications ---------------------------------------- */
730 /* FP arithmetic is tricky -- there's not much to simplify.
731 ** Please note the following common pitfalls before sending "improvements":
732 ** x+0 ==> x is INVALID for x=-0
733 ** 0-x ==> -x is INVALID for x=+0
734 ** x*0 ==> 0 is INVALID for x=-0, x=+-Inf or x=NaN
738 LJFOLDF(simplify_numadd_negx
)
741 fins
->o
= IR_SUB
; /* (-a) + b ==> b - a */
742 fins
->op1
= fins
->op2
;
743 fins
->op2
= fleft
->op1
;
748 LJFOLDF(simplify_numadd_xneg
)
751 fins
->o
= IR_SUB
; /* a + (-b) ==> a - b */
752 fins
->op2
= fright
->op1
;
757 LJFOLDF(simplify_numsub_k
)
759 lua_Number n
= knumright
;
760 if (n
== 0.0) /* x - (+-0) ==> x */
766 LJFOLDF(simplify_numsub_negk
)
769 fins
->op2
= fleft
->op1
; /* (-x) - k ==> (-k) - x */
770 fins
->op1
= (IRRef1
)lj_ir_knum(J
, -knumright
);
775 LJFOLDF(simplify_numsub_xneg
)
778 fins
->o
= IR_ADD
; /* a - (-b) ==> a + b */
779 fins
->op2
= fright
->op1
;
785 LJFOLDF(simplify_nummuldiv_k
)
787 lua_Number n
= knumright
;
788 if (n
== 1.0) { /* x o 1 ==> x */
790 } else if (n
== -1.0) { /* x o -1 ==> -x */
792 fins
->op2
= (IRRef1
)lj_ir_knum_neg(J
);
794 } else if (fins
->o
== IR_MUL
&& n
== 2.0) { /* x * 2 ==> x + x */
796 fins
->op2
= fins
->op1
;
804 LJFOLDF(simplify_nummuldiv_negk
)
807 fins
->op1
= fleft
->op1
; /* (-a) o k ==> a o (-k) */
808 fins
->op2
= (IRRef1
)lj_ir_knum(J
, -knumright
);
814 LJFOLDF(simplify_nummuldiv_negneg
)
818 fins
->op1
= fleft
->op1
; /* (-a) o (-b) ==> a o b */
819 fins
->op2
= fright
->op1
;
824 LJFOLDF(simplify_numpow_xk
)
826 int32_t k
= fright
->i
;
827 TRef ref
= fins
->op1
;
828 if (k
== 0) /* x ^ 0 ==> 1 */
829 return lj_ir_knum_one(J
); /* Result must be a number, not an int. */
830 if (k
== 1) /* x ^ 1 ==> x */
832 if ((uint32_t)(k
+65536) > 2*65536u) /* Limit code explosion. */
834 if (k
< 0) { /* x ^ (-k) ==> (1/x) ^ k. */
835 ref
= emitir(IRTN(IR_DIV
), lj_ir_knum_one(J
), ref
);
838 /* Unroll x^k for 1 <= k <= 65536. */
839 for (; (k
& 1) == 0; k
>>= 1) /* Handle leading zeros. */
840 ref
= emitir(IRTN(IR_MUL
), ref
, ref
);
841 if ((k
>>= 1) != 0) { /* Handle trailing bits. */
842 TRef tmp
= emitir(IRTN(IR_MUL
), ref
, ref
);
843 for (; k
!= 1; k
>>= 1) {
845 ref
= emitir(IRTN(IR_MUL
), ref
, tmp
);
846 tmp
= emitir(IRTN(IR_MUL
), tmp
, tmp
);
848 ref
= emitir(IRTN(IR_MUL
), ref
, tmp
);
854 LJFOLDF(simplify_numpow_kx
)
856 lua_Number n
= knumleft
;
857 if (n
== 2.0) { /* 2.0 ^ i ==> ldexp(1.0, tonum(i)) */
859 #if LJ_TARGET_X86ORX64
860 fins
->op1
= fins
->op2
;
861 fins
->op2
= IRCONV_NUM_INT
;
862 fins
->op2
= (IRRef1
)lj_opt_fold(J
);
864 fins
->op1
= (IRRef1
)lj_ir_knum_one(J
);
871 /* -- Simplify conversions ------------------------------------------------ */
873 LJFOLD(CONV CONV IRCONV_NUM_INT
) /* _NUM */
874 LJFOLDF(shortcut_conv_num_int
)
877 /* Only safe with a guarded conversion to int. */
878 if ((fleft
->op2
& IRCONV_SRCMASK
) == IRT_NUM
&& irt_isguard(fleft
->t
))
879 return fleft
->op1
; /* f(g(x)) ==> x */
883 LJFOLD(CONV CONV IRCONV_INT_NUM
) /* _INT */
884 LJFOLDF(simplify_conv_int_num
)
886 /* Fold even across PHI to avoid expensive num->int conversions in loop. */
887 if ((fleft
->op2
& IRCONV_SRCMASK
) == IRT_INT
)
892 LJFOLD(CONV CONV IRCONV_U32_NUM
) /* _U32*/
893 LJFOLDF(simplify_conv_u32_num
)
895 /* Fold even across PHI to avoid expensive num->int conversions in loop. */
896 if ((fleft
->op2
& IRCONV_SRCMASK
) == IRT_U32
)
901 LJFOLD(CONV CONV IRCONV_I64_NUM
) /* _INT or _U32*/
902 LJFOLD(CONV CONV IRCONV_U64_NUM
) /* _INT or _U32*/
903 LJFOLDF(simplify_conv_i64_num
)
906 if ((fleft
->op2
& IRCONV_SRCMASK
) == IRT_INT
) {
907 /* Reduce to a sign-extension. */
908 fins
->op1
= fleft
->op1
;
909 fins
->op2
= ((IRT_I64
<<5)|IRT_INT
|IRCONV_SEXT
);
911 } else if ((fleft
->op2
& IRCONV_SRCMASK
) == IRT_U32
) {
915 /* Reduce to a zero-extension. */
916 fins
->op1
= fleft
->op1
;
917 fins
->op2
= (IRT_I64
<<5)|IRT_U32
;
924 LJFOLD(CONV CONV IRCONV_INT_I64
) /* _INT */
925 LJFOLD(CONV CONV IRCONV_INT_U64
) /* _INT */
926 LJFOLDF(simplify_conv_int_i64
)
929 if ((fleft
->op2
& IRCONV_SRCMASK
) == IRT_INT
)
934 LJFOLD(CONV CONV IRCONV_FLOAT_NUM
) /* _FLOAT */
935 LJFOLDF(simplify_conv_flt_num
)
938 if ((fleft
->op2
& IRCONV_SRCMASK
) == IRT_FLOAT
)
943 /* Shortcut TOBIT + IRT_NUM <- IRT_INT/IRT_U32 conversion. */
944 LJFOLD(TOBIT CONV KNUM
)
945 LJFOLDF(simplify_tobit_conv
)
947 if ((fleft
->op2
& IRCONV_SRCMASK
) == IRT_INT
||
948 (fleft
->op2
& IRCONV_SRCMASK
) == IRT_U32
) {
949 /* Fold even across PHI to avoid expensive num->int conversions in loop. */
950 lua_assert(irt_isnum(fleft
->t
));
956 /* Shortcut floor/ceil/round + IRT_NUM <- IRT_INT/IRT_U32 conversion. */
957 LJFOLD(FPMATH CONV IRFPM_FLOOR
)
958 LJFOLD(FPMATH CONV IRFPM_CEIL
)
959 LJFOLD(FPMATH CONV IRFPM_TRUNC
)
960 LJFOLDF(simplify_floor_conv
)
962 if ((fleft
->op2
& IRCONV_SRCMASK
) == IRT_INT
||
963 (fleft
->op2
& IRCONV_SRCMASK
) == IRT_U32
)
968 /* Strength reduction of widening. */
969 LJFOLD(CONV any IRCONV_I64_INT
)
970 LJFOLDF(simplify_conv_sext
)
972 IRRef ref
= fins
->op1
;
974 if (!(fins
->op2
& IRCONV_SEXT
))
977 if (fleft
->o
== IR_XLOAD
&& (irt_isu8(fleft
->t
) || irt_isu16(fleft
->t
)))
979 if (fleft
->o
== IR_ADD
&& irref_isk(fleft
->op2
)) {
980 ofs
= (int64_t)IR(fleft
->op2
)->i
;
983 /* Use scalar evolution analysis results to strength-reduce sign-extension. */
984 if (ref
== J
->scev
.idx
) {
985 IRRef lo
= J
->scev
.dir
? J
->scev
.start
: J
->scev
.stop
;
986 lua_assert(irt_isint(J
->scev
.t
));
987 if (lo
&& IR(lo
)->i
+ ofs
>= 0) {
990 /* Eliminate widening. All 32 bit ops do an implicit zero-extension. */
993 /* Reduce to a (cheaper) zero-extension. */
994 fins
->op2
&= ~IRCONV_SEXT
;
1002 /* Strength reduction of narrowing. */
1003 LJFOLD(CONV ADD IRCONV_INT_I64
)
1004 LJFOLD(CONV SUB IRCONV_INT_I64
)
1005 LJFOLD(CONV MUL IRCONV_INT_I64
)
1006 LJFOLD(CONV ADD IRCONV_INT_U64
)
1007 LJFOLD(CONV SUB IRCONV_INT_U64
)
1008 LJFOLD(CONV MUL IRCONV_INT_U64
)
1009 LJFOLDF(simplify_conv_narrow
)
1011 IROp op
= (IROp
)fleft
->o
;
1012 IRRef op1
= fleft
->op1
, op2
= fleft
->op2
, mode
= fins
->op2
;
1014 op1
= emitir(IRTI(IR_CONV
), op1
, mode
);
1015 op2
= emitir(IRTI(IR_CONV
), op2
, mode
);
1016 fins
->ot
= IRTI(op
);
1022 /* Special CSE rule for CONV. */
1023 LJFOLD(CONV any any
)
1026 if (LJ_LIKELY(J
->flags
& JIT_F_OPT_CSE
)) {
1027 IRRef op1
= fins
->op1
, op2
= (fins
->op2
& IRCONV_MODEMASK
);
1028 uint8_t guard
= irt_isguard(fins
->t
);
1029 IRRef ref
= J
->chain
[IR_CONV
];
1031 IRIns
*ir
= IR(ref
);
1032 /* Commoning with stronger checks is ok. */
1033 if (ir
->op1
== op1
&& (ir
->op2
& IRCONV_MODEMASK
) == op2
&&
1034 irt_isguard(ir
->t
) >= guard
)
1039 return EMITFOLD
; /* No fallthrough to regular CSE. */
1042 /* FP conversion narrowing. */
1043 LJFOLD(TOBIT ADD KNUM
)
1044 LJFOLD(TOBIT SUB KNUM
)
1045 LJFOLD(CONV ADD IRCONV_INT_NUM
)
1046 LJFOLD(CONV SUB IRCONV_INT_NUM
)
1047 LJFOLD(CONV ADD IRCONV_I64_NUM
)
1048 LJFOLD(CONV SUB IRCONV_I64_NUM
)
1049 LJFOLDF(narrow_convert
)
1052 /* Narrowing ignores PHIs and repeating it inside the loop is not useful. */
1053 if (J
->chain
[IR_LOOP
])
1055 lua_assert(fins
->o
!= IR_CONV
|| (fins
->op2
&IRCONV_CONVMASK
) != IRCONV_TOBIT
);
1056 return lj_opt_narrow_convert(J
);
1059 /* -- Integer algebraic simplifications ----------------------------------- */
1061 LJFOLD(ADD any KINT
)
1062 LJFOLD(ADDOV any KINT
)
1063 LJFOLD(SUBOV any KINT
)
1064 LJFOLDF(simplify_intadd_k
)
1066 if (fright
->i
== 0) /* i o 0 ==> i */
1071 LJFOLD(MULOV any KINT
)
1072 LJFOLDF(simplify_intmul_k
)
1074 if (fright
->i
== 0) /* i * 0 ==> 0 */
1076 if (fright
->i
== 1) /* i * 1 ==> i */
1078 if (fright
->i
== 2) { /* i * 2 ==> i + i */
1080 fins
->op2
= fins
->op1
;
1086 LJFOLD(SUB any KINT
)
1087 LJFOLDF(simplify_intsub_k
)
1089 if (fright
->i
== 0) /* i - 0 ==> i */
1091 fins
->o
= IR_ADD
; /* i - k ==> i + (-k) */
1092 fins
->op2
= (IRRef1
)lj_ir_kint(J
, -fright
->i
); /* Overflow for -2^31 ok. */
1096 LJFOLD(SUB KINT any
)
1097 LJFOLD(SUB KINT64 any
)
1098 LJFOLDF(simplify_intsub_kleft
)
1100 if (fleft
->o
== IR_KINT
? (fleft
->i
== 0) : (ir_kint64(fleft
)->u64
== 0)) {
1101 fins
->o
= IR_NEG
; /* 0 - i ==> -i */
1102 fins
->op1
= fins
->op2
;
1108 LJFOLD(ADD any KINT64
)
1109 LJFOLDF(simplify_intadd_k64
)
1111 if (ir_kint64(fright
)->u64
== 0) /* i + 0 ==> i */
1116 LJFOLD(SUB any KINT64
)
1117 LJFOLDF(simplify_intsub_k64
)
1119 uint64_t k
= ir_kint64(fright
)->u64
;
1120 if (k
== 0) /* i - 0 ==> i */
1122 fins
->o
= IR_ADD
; /* i - k ==> i + (-k) */
1123 fins
->op2
= (IRRef1
)lj_ir_kint64(J
, (uint64_t)-(int64_t)k
);
1127 static TRef
simplify_intmul_k(jit_State
*J
, int32_t k
)
1129 /* Note: many more simplifications are possible, e.g. 2^k1 +- 2^k2.
1130 ** But this is mainly intended for simple address arithmetic.
1131 ** Also it's easier for the backend to optimize the original multiplies.
1133 if (k
== 1) { /* i * 1 ==> i */
1135 } else if ((k
& (k
-1)) == 0) { /* i * 2^k ==> i << k */
1137 fins
->op2
= lj_ir_kint(J
, lj_fls((uint32_t)k
));
1143 LJFOLD(MUL any KINT
)
1144 LJFOLDF(simplify_intmul_k32
)
1146 if (fright
->i
== 0) /* i * 0 ==> 0 */
1148 else if (fright
->i
> 0)
1149 return simplify_intmul_k(J
, fright
->i
);
1153 LJFOLD(MUL any KINT64
)
1154 LJFOLDF(simplify_intmul_k64
)
1156 if (ir_kint64(fright
)->u64
== 0) /* i * 0 ==> 0 */
1157 return INT64FOLD(0);
1159 /* NYI: SPLIT for BSHL and 32 bit backend support. */
1160 else if (ir_kint64(fright
)->u64
< 0x80000000u
)
1161 return simplify_intmul_k(J
, (int32_t)ir_kint64(fright
)->u64
);
1166 LJFOLD(MOD any KINT
)
1167 LJFOLDF(simplify_intmod_k
)
1169 int32_t k
= fright
->i
;
1171 if (k
> 0 && (k
& (k
-1)) == 0) { /* i % (2^k) ==> i & (2^k-1) */
1173 fins
->op2
= lj_ir_kint(J
, k
-1);
1179 LJFOLD(MOD KINT any
)
1180 LJFOLDF(simplify_intmod_kleft
)
1188 LJFOLD(SUBOV any any
)
1189 LJFOLDF(simplify_intsub
)
1191 if (fins
->op1
== fins
->op2
&& !irt_isnum(fins
->t
)) /* i - i ==> 0 */
1192 return irt_is64(fins
->t
) ? INT64FOLD(0) : INTFOLD(0);
1197 LJFOLDF(simplify_intsubadd_leftcancel
)
1199 if (!irt_isnum(fins
->t
)) {
1201 if (fins
->op2
== fleft
->op1
) /* (i + j) - i ==> j */
1203 if (fins
->op2
== fleft
->op2
) /* (i + j) - j ==> i */
1210 LJFOLDF(simplify_intsubsub_leftcancel
)
1212 if (!irt_isnum(fins
->t
)) {
1214 if (fins
->op1
== fleft
->op1
) { /* (i - j) - i ==> 0 - j */
1215 fins
->op1
= (IRRef1
)lj_ir_kint(J
, 0);
1216 fins
->op2
= fleft
->op2
;
1224 LJFOLDF(simplify_intsubsub_rightcancel
)
1226 if (!irt_isnum(fins
->t
)) {
1228 if (fins
->op1
== fright
->op1
) /* i - (i - j) ==> j */
1235 LJFOLDF(simplify_intsubadd_rightcancel
)
1237 if (!irt_isnum(fins
->t
)) {
1239 if (fins
->op1
== fright
->op1
) { /* i - (i + j) ==> 0 - j */
1240 fins
->op2
= fright
->op2
;
1241 fins
->op1
= (IRRef1
)lj_ir_kint(J
, 0);
1244 if (fins
->op1
== fright
->op2
) { /* i - (j + i) ==> 0 - j */
1245 fins
->op2
= fright
->op1
;
1246 fins
->op1
= (IRRef1
)lj_ir_kint(J
, 0);
1254 LJFOLDF(simplify_intsubaddadd_cancel
)
1256 if (!irt_isnum(fins
->t
)) {
1259 if (fleft
->op1
== fright
->op1
) { /* (i + j1) - (i + j2) ==> j1 - j2 */
1260 fins
->op1
= fleft
->op2
;
1261 fins
->op2
= fright
->op2
;
1264 if (fleft
->op1
== fright
->op2
) { /* (i + j1) - (j2 + i) ==> j1 - j2 */
1265 fins
->op1
= fleft
->op2
;
1266 fins
->op2
= fright
->op1
;
1269 if (fleft
->op2
== fright
->op1
) { /* (j1 + i) - (i + j2) ==> j1 - j2 */
1270 fins
->op1
= fleft
->op1
;
1271 fins
->op2
= fright
->op2
;
1274 if (fleft
->op2
== fright
->op2
) { /* (j1 + i) - (j2 + i) ==> j1 - j2 */
1275 fins
->op1
= fleft
->op1
;
1276 fins
->op2
= fright
->op1
;
1283 LJFOLD(BAND any KINT
)
1284 LJFOLD(BAND any KINT64
)
1285 LJFOLDF(simplify_band_k
)
1287 int64_t k
= fright
->o
== IR_KINT
? (int64_t)fright
->i
:
1288 (int64_t)ir_k64(fright
)->u64
;
1289 if (k
== 0) /* i & 0 ==> 0 */
1291 if (k
== -1) /* i & -1 ==> i */
1296 LJFOLD(BOR any KINT
)
1297 LJFOLD(BOR any KINT64
)
1298 LJFOLDF(simplify_bor_k
)
1300 int64_t k
= fright
->o
== IR_KINT
? (int64_t)fright
->i
:
1301 (int64_t)ir_k64(fright
)->u64
;
1302 if (k
== 0) /* i | 0 ==> i */
1304 if (k
== -1) /* i | -1 ==> -1 */
1309 LJFOLD(BXOR any KINT
)
1310 LJFOLD(BXOR any KINT64
)
1311 LJFOLDF(simplify_bxor_k
)
1313 int64_t k
= fright
->o
== IR_KINT
? (int64_t)fright
->i
:
1314 (int64_t)ir_k64(fright
)->u64
;
1315 if (k
== 0) /* i xor 0 ==> i */
1317 if (k
== -1) { /* i xor -1 ==> ~i */
1325 LJFOLD(BSHL any KINT
)
1326 LJFOLD(BSHR any KINT
)
1327 LJFOLD(BSAR any KINT
)
1328 LJFOLD(BROL any KINT
)
1329 LJFOLD(BROR any KINT
)
1330 LJFOLDF(simplify_shift_ik
)
1332 int32_t mask
= irt_is64(fins
->t
) ? 63 : 31;
1333 int32_t k
= (fright
->i
& mask
);
1334 if (k
== 0) /* i o 0 ==> i */
1336 if (k
== 1 && fins
->o
== IR_BSHL
) { /* i << 1 ==> i + i */
1338 fins
->op2
= fins
->op1
;
1341 if (k
!= fright
->i
) { /* i o k ==> i o (k & mask) */
1342 fins
->op2
= (IRRef1
)lj_ir_kint(J
, k
);
1345 #ifndef LJ_TARGET_UNIFYROT
1346 if (fins
->o
== IR_BROR
) { /* bror(i, k) ==> brol(i, (-k)&mask) */
1348 fins
->op2
= (IRRef1
)lj_ir_kint(J
, (-k
)&mask
);
1355 LJFOLD(BSHL any BAND
)
1356 LJFOLD(BSHR any BAND
)
1357 LJFOLD(BSAR any BAND
)
1358 LJFOLD(BROL any BAND
)
1359 LJFOLD(BROR any BAND
)
1360 LJFOLDF(simplify_shift_andk
)
1362 IRIns
*irk
= IR(fright
->op2
);
1364 if ((fins
->o
< IR_BROL
? LJ_TARGET_MASKSHIFT
: LJ_TARGET_MASKROT
) &&
1365 irk
->o
== IR_KINT
) { /* i o (j & mask) ==> i o j */
1366 int32_t mask
= irt_is64(fins
->t
) ? 63 : 31;
1367 int32_t k
= irk
->i
& mask
;
1369 fins
->op2
= fright
->op1
;
1376 LJFOLD(BSHL KINT any
)
1377 LJFOLD(BSHR KINT any
)
1378 LJFOLD(BSHL KINT64 any
)
1379 LJFOLD(BSHR KINT64 any
)
1380 LJFOLDF(simplify_shift1_ki
)
1382 int64_t k
= fleft
->o
== IR_KINT
? (int64_t)fleft
->i
:
1383 (int64_t)ir_k64(fleft
)->u64
;
1384 if (k
== 0) /* 0 o i ==> 0 */
1389 LJFOLD(BSAR KINT any
)
1390 LJFOLD(BROL KINT any
)
1391 LJFOLD(BROR KINT any
)
1392 LJFOLD(BSAR KINT64 any
)
1393 LJFOLD(BROL KINT64 any
)
1394 LJFOLD(BROR KINT64 any
)
1395 LJFOLDF(simplify_shift2_ki
)
1397 int64_t k
= fleft
->o
== IR_KINT
? (int64_t)fleft
->i
:
1398 (int64_t)ir_k64(fleft
)->u64
;
1399 if (k
== 0 || k
== -1) /* 0 o i ==> 0; -1 o i ==> -1 */
1404 /* -- Reassociation ------------------------------------------------------- */
1406 LJFOLD(ADD ADD KINT
)
1407 LJFOLD(MUL MUL KINT
)
1408 LJFOLD(BAND BAND KINT
)
1409 LJFOLD(BOR BOR KINT
)
1410 LJFOLD(BXOR BXOR KINT
)
1411 LJFOLDF(reassoc_intarith_k
)
1413 IRIns
*irk
= IR(fleft
->op2
);
1414 if (irk
->o
== IR_KINT
) {
1415 int32_t k
= kfold_intop(irk
->i
, fright
->i
, (IROp
)fins
->o
);
1416 if (k
== irk
->i
) /* (i o k1) o k2 ==> i o k1, if (k1 o k2) == k1. */
1419 fins
->op1
= fleft
->op1
;
1420 fins
->op2
= (IRRef1
)lj_ir_kint(J
, k
);
1421 return RETRYFOLD
; /* (i o k1) o k2 ==> i o (k1 o k2) */
1426 LJFOLD(ADD ADD KINT64
)
1427 LJFOLD(MUL MUL KINT64
)
1428 LJFOLD(BAND BAND KINT64
)
1429 LJFOLD(BOR BOR KINT64
)
1430 LJFOLD(BXOR BXOR KINT64
)
1431 LJFOLDF(reassoc_intarith_k64
)
1433 #if LJ_HASFFI || LJ_64
1434 IRIns
*irk
= IR(fleft
->op2
);
1435 if (irk
->o
== IR_KINT64
) {
1436 uint64_t k
= kfold_int64arith(ir_k64(irk
)->u64
,
1437 ir_k64(fright
)->u64
, (IROp
)fins
->o
);
1439 fins
->op1
= fleft
->op1
;
1440 fins
->op2
= (IRRef1
)lj_ir_kint64(J
, k
);
1441 return RETRYFOLD
; /* (i o k1) o k2 ==> i o (k1 o k2) */
1445 UNUSED(J
); lua_assert(0); return FAILFOLD
;
1451 LJFOLD(BAND BAND any
)
1453 LJFOLDF(reassoc_dup
)
1455 if (fins
->op2
== fleft
->op1
|| fins
->op2
== fleft
->op2
)
1456 return LEFTFOLD
; /* (a o b) o a ==> a o b; (a o b) o b ==> a o b */
1460 LJFOLD(BXOR BXOR any
)
1461 LJFOLDF(reassoc_bxor
)
1464 if (fins
->op2
== fleft
->op1
) /* (a xor b) xor a ==> b */
1466 if (fins
->op2
== fleft
->op2
) /* (a xor b) xor b ==> a */
1471 LJFOLD(BSHL BSHL KINT
)
1472 LJFOLD(BSHR BSHR KINT
)
1473 LJFOLD(BSAR BSAR KINT
)
1474 LJFOLD(BROL BROL KINT
)
1475 LJFOLD(BROR BROR KINT
)
1476 LJFOLDF(reassoc_shift
)
1478 IRIns
*irk
= IR(fleft
->op2
);
1479 PHIBARRIER(fleft
); /* The (shift any KINT) rule covers k2 == 0 and more. */
1480 if (irk
->o
== IR_KINT
) { /* (i o k1) o k2 ==> i o (k1 + k2) */
1481 int32_t mask
= irt_is64(fins
->t
) ? 63 : 31;
1482 int32_t k
= (irk
->i
& mask
) + (fright
->i
& mask
);
1483 if (k
> mask
) { /* Combined shift too wide? */
1484 if (fins
->o
== IR_BSHL
|| fins
->o
== IR_BSHR
)
1485 return mask
== 31 ? INTFOLD(0) : INT64FOLD(0);
1486 else if (fins
->o
== IR_BSAR
)
1491 fins
->op1
= fleft
->op1
;
1492 fins
->op2
= (IRRef1
)lj_ir_kint(J
, k
);
1498 LJFOLD(MIN MIN KNUM
)
1499 LJFOLD(MAX MAX KNUM
)
1500 LJFOLD(MIN MIN KINT
)
1501 LJFOLD(MAX MAX KINT
)
1502 LJFOLDF(reassoc_minmax_k
)
1504 IRIns
*irk
= IR(fleft
->op2
);
1505 if (irk
->o
== IR_KNUM
) {
1506 lua_Number a
= ir_knum(irk
)->n
;
1507 lua_Number y
= lj_vm_foldarith(a
, knumright
, fins
->o
- IR_ADD
);
1508 if (a
== y
) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */
1511 fins
->op1
= fleft
->op1
;
1512 fins
->op2
= (IRRef1
)lj_ir_knum(J
, y
);
1513 return RETRYFOLD
; /* (x o k1) o k2 ==> x o (k1 o k2) */
1514 } else if (irk
->o
== IR_KINT
) {
1516 int32_t y
= kfold_intop(a
, fright
->i
, fins
->o
);
1517 if (a
== y
) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */
1520 fins
->op1
= fleft
->op1
;
1521 fins
->op2
= (IRRef1
)lj_ir_kint(J
, y
);
1522 return RETRYFOLD
; /* (x o k1) o k2 ==> x o (k1 o k2) */
1529 LJFOLDF(reassoc_minmax_left
)
1531 if (fins
->op2
== fleft
->op1
|| fins
->op2
== fleft
->op2
)
1532 return RIGHTFOLD
; /* (b o1 a) o2 b ==> b; (a o1 b) o2 b ==> b */
1538 LJFOLDF(reassoc_minmax_right
)
1540 if (fins
->op1
== fright
->op1
|| fins
->op1
== fright
->op2
)
1541 return LEFTFOLD
; /* a o2 (a o1 b) ==> a; a o2 (b o1 a) ==> a */
1545 /* -- Array bounds check elimination -------------------------------------- */
1547 /* Eliminate ABC across PHIs to handle t[i-1] forwarding case.
1548 ** ABC(asize, (i+k)+(-k)) ==> ABC(asize, i), but only if it already exists.
1549 ** Could be generalized to (i+k1)+k2 ==> i+(k1+k2), but needs better disambig.
1554 if (LJ_LIKELY(J
->flags
& JIT_F_OPT_ABC
)) {
1555 if (irref_isk(fright
->op2
)) {
1556 IRIns
*add2
= IR(fright
->op1
);
1557 if (add2
->o
== IR_ADD
&& irref_isk(add2
->op2
) &&
1558 IR(fright
->op2
)->i
== -IR(add2
->op2
)->i
) {
1559 IRRef ref
= J
->chain
[IR_ABC
];
1560 IRRef lim
= add2
->op1
;
1561 if (fins
->op1
> lim
) lim
= fins
->op1
;
1563 IRIns
*ir
= IR(ref
);
1564 if (ir
->op1
== fins
->op1
&& ir
->op2
== add2
->op1
)
1574 /* Eliminate ABC for constants.
1575 ** ABC(asize, k1), ABC(asize k2) ==> ABC(asize, max(k1, k2))
1576 ** Drop second ABC if k2 is lower. Otherwise patch first ABC with k2.
1578 LJFOLD(ABC any KINT
)
1581 if (LJ_LIKELY(J
->flags
& JIT_F_OPT_ABC
)) {
1582 IRRef ref
= J
->chain
[IR_ABC
];
1583 IRRef asize
= fins
->op1
;
1584 while (ref
> asize
) {
1585 IRIns
*ir
= IR(ref
);
1586 if (ir
->op1
== asize
&& irref_isk(ir
->op2
)) {
1587 int32_t k
= IR(ir
->op2
)->i
;
1589 ir
->op2
= fins
->op2
;
1594 return EMITFOLD
; /* Already performed CSE. */
1599 /* Eliminate invariant ABC inside loop. */
1603 if (!irt_isint(fins
->t
) && J
->chain
[IR_LOOP
]) /* Currently marked as PTR. */
1608 /* -- Commutativity ------------------------------------------------------- */
1610 /* The refs of commutative ops are canonicalized. Lower refs go to the right.
1611 ** Rationale behind this:
1612 ** - It (also) moves constants to the right.
1613 ** - It reduces the number of FOLD rules (e.g. (BOR any KINT) suffices).
1614 ** - It helps CSE to find more matches.
1615 ** - The assembler generates better code with constants at the right.
1620 LJFOLD(ADDOV any any
)
1621 LJFOLD(MULOV any any
)
1624 if (fins
->op1
< fins
->op2
) { /* Move lower ref to the right. */
1625 IRRef1 tmp
= fins
->op1
;
1626 fins
->op1
= fins
->op2
;
1637 /* For non-numbers only: x == x ==> drop; x ~= x ==> fail */
1638 if (fins
->op1
== fins
->op2
&& !irt_isnum(fins
->t
))
1639 return CONDFOLD(fins
->o
== IR_EQ
);
1640 return fold_comm_swap(J
);
1653 /* For non-numbers only: x <=> x ==> drop; x <> x ==> fail */
1654 if (fins
->op1
== fins
->op2
&& !irt_isnum(fins
->t
))
1655 return CONDFOLD((fins
->o
^ (fins
->o
>> 1)) & 1);
1656 if (fins
->op1
< fins
->op2
) { /* Move lower ref to the right. */
1657 IRRef1 tmp
= fins
->op1
;
1658 fins
->op1
= fins
->op2
;
1660 fins
->o
^= 3; /* GT <-> LT, GE <-> LE, does not affect U */
1666 LJFOLD(BAND any any
)
1672 if (fins
->op1
== fins
->op2
) /* x o x ==> x */
1674 return fold_comm_swap(J
);
1677 LJFOLD(BXOR any any
)
1680 if (fins
->op1
== fins
->op2
) /* i xor i ==> 0 */
1681 return irt_is64(fins
->t
) ? INT64FOLD(0) : INTFOLD(0);
1682 return fold_comm_swap(J
);
1685 /* -- Simplification of compound expressions ------------------------------ */
1687 static TRef
kfold_xload(jit_State
*J
, IRIns
*ir
, const void *p
)
1690 switch (irt_type(ir
->t
)) {
1691 case IRT_NUM
: return lj_ir_knum_u64(J
, *(uint64_t *)p
);
1692 case IRT_I8
: k
= (int32_t)*(int8_t *)p
; break;
1693 case IRT_U8
: k
= (int32_t)*(uint8_t *)p
; break;
1694 case IRT_I16
: k
= (int32_t)(int16_t)lj_getu16(p
); break;
1695 case IRT_U16
: k
= (int32_t)(uint16_t)lj_getu16(p
); break;
1696 case IRT_INT
: case IRT_U32
: k
= (int32_t)lj_getu32(p
); break;
1697 case IRT_I64
: case IRT_U64
: return lj_ir_kint64(J
, *(uint64_t *)p
);
1700 return lj_ir_kint(J
, k
);
1703 /* Turn: string.sub(str, a, b) == kstr
1704 ** into: string.byte(str, a) == string.byte(kstr, 1) etc.
1705 ** Note: this creates unaligned XLOADs on x86/x64.
1709 LJFOLDF(merge_eqne_snew_kgc
)
1711 GCstr
*kstr
= ir_kstr(fright
);
1712 int32_t len
= (int32_t)kstr
->len
;
1713 lua_assert(irt_isstr(fins
->t
));
1715 #if LJ_TARGET_X86ORX64
1716 #define FOLD_SNEW_MAX_LEN 4 /* Handle string lengths 0, 1, 2, 3, 4. */
1717 #define FOLD_SNEW_TYPE8 IRT_I8 /* Creates shorter immediates. */
1719 #define FOLD_SNEW_MAX_LEN 1 /* Handle string lengths 0 or 1. */
1720 #define FOLD_SNEW_TYPE8 IRT_U8 /* Prefer unsigned loads. */
1723 if (len
<= FOLD_SNEW_MAX_LEN
) {
1724 IROp op
= (IROp
)fins
->o
;
1725 IRRef strref
= fleft
->op1
;
1726 lua_assert(IR(strref
)->o
== IR_STRREF
);
1728 emitir(IRTGI(IR_EQ
), fleft
->op2
, lj_ir_kint(J
, len
));
1729 /* Caveat: fins/fleft/fright is no longer valid after emitir. */
1731 /* NE is not expanded since this would need an OR of two conds. */
1732 if (!irref_isk(fleft
->op2
)) /* Only handle the constant length case. */
1734 if (IR(fleft
->op2
)->i
!= len
)
1738 /* A 4 byte load for length 3 is ok -- all strings have an extra NUL. */
1739 uint16_t ot
= (uint16_t)(len
== 1 ? IRT(IR_XLOAD
, FOLD_SNEW_TYPE8
) :
1740 len
== 2 ? IRT(IR_XLOAD
, IRT_U16
) :
1742 TRef tmp
= emitir(ot
, strref
,
1743 IRXLOAD_READONLY
| (len
> 1 ? IRXLOAD_UNALIGNED
: 0));
1744 TRef val
= kfold_xload(J
, IR(tref_ref(tmp
)), strdata(kstr
));
1746 tmp
= emitir(IRTI(IR_BAND
), tmp
,
1747 lj_ir_kint(J
, LJ_ENDIAN_SELECT(0x00ffffff, 0xffffff00)));
1748 fins
->op1
= (IRRef1
)tmp
;
1749 fins
->op2
= (IRRef1
)val
;
1750 fins
->ot
= (IROpT
)IRTGI(op
);
1759 /* -- Loads --------------------------------------------------------------- */
1761 /* Loads cannot be folded or passed on to CSE in general.
1762 ** Alias analysis is needed to check for forwarding opportunities.
1764 ** Caveat: *all* loads must be listed here or they end up at CSE!
1768 LJFOLDX(lj_opt_fwd_aload
)
1770 /* From HREF fwd (see below). Must eliminate, not supported by fwd/backend. */
1772 LJFOLDF(kfold_hload_kkptr
)
1775 lua_assert(ir_kptr(fleft
) == niltvg(J2G(J
)));
1780 LJFOLDX(lj_opt_fwd_hload
)
1783 LJFOLDX(lj_opt_fwd_uload
)
1785 LJFOLD(CALLL any IRCALL_lj_tab_len
)
1786 LJFOLDX(lj_opt_fwd_tab_len
)
1788 /* Upvalue refs are really loads, but there are no corresponding stores.
1789 ** So CSE is ok for them, except for UREFO across a GC step (see below).
1790 ** If the referenced function is const, its upvalue addresses are const, too.
1791 ** This can be used to improve CSE by looking for the same address,
1792 ** even if the upvalues originate from a different function.
1794 LJFOLD(UREFO KGC any
)
1795 LJFOLD(UREFC KGC any
)
1798 if (LJ_LIKELY(J
->flags
& JIT_F_OPT_CSE
)) {
1799 IRRef ref
= J
->chain
[fins
->o
];
1800 GCfunc
*fn
= ir_kfunc(fleft
);
1801 GCupval
*uv
= gco2uv(gcref(fn
->l
.uvptr
[(fins
->op2
>> 8)]));
1803 IRIns
*ir
= IR(ref
);
1804 if (irref_isk(ir
->op1
)) {
1805 GCfunc
*fn2
= ir_kfunc(IR(ir
->op1
));
1806 if (gco2uv(gcref(fn2
->l
.uvptr
[(ir
->op2
>> 8)])) == uv
) {
1807 if (fins
->o
== IR_UREFO
&& gcstep_barrier(J
, ref
))
1818 LJFOLD(HREF TNEW any
)
1819 LJFOLDF(fwd_href_tnew
)
1821 if (lj_opt_fwd_href_nokey(J
))
1822 return lj_ir_kkptr(J
, niltvg(J2G(J
)));
1826 LJFOLD(HREF TDUP KPRI
)
1827 LJFOLD(HREF TDUP KGC
)
1828 LJFOLD(HREF TDUP KNUM
)
1829 LJFOLDF(fwd_href_tdup
)
1832 lj_ir_kvalue(J
->L
, &keyv
, fright
);
1833 if (lj_tab_get(J
->L
, ir_ktab(IR(fleft
->op1
)), &keyv
) == niltvg(J2G(J
)) &&
1834 lj_opt_fwd_href_nokey(J
))
1835 return lj_ir_kkptr(J
, niltvg(J2G(J
)));
1839 /* We can safely FOLD/CSE array/hash refs and field loads, since there
1840 ** are no corresponding stores. But we need to check for any NEWREF with
1841 ** an aliased table, as it may invalidate all of the pointers and fields.
1842 ** Only HREF needs the NEWREF check -- AREF and HREFK already depend on
1843 ** FLOADs. And NEWREF itself is treated like a store (see below).
1845 LJFOLD(FLOAD TNEW IRFL_TAB_ASIZE
)
1846 LJFOLDF(fload_tab_tnew_asize
)
1848 if (LJ_LIKELY(J
->flags
& JIT_F_OPT_FOLD
) && lj_opt_fwd_tptr(J
, fins
->op1
))
1849 return INTFOLD(fleft
->op1
);
1853 LJFOLD(FLOAD TNEW IRFL_TAB_HMASK
)
1854 LJFOLDF(fload_tab_tnew_hmask
)
1856 if (LJ_LIKELY(J
->flags
& JIT_F_OPT_FOLD
) && lj_opt_fwd_tptr(J
, fins
->op1
))
1857 return INTFOLD((1 << fleft
->op2
)-1);
1861 LJFOLD(FLOAD TDUP IRFL_TAB_ASIZE
)
1862 LJFOLDF(fload_tab_tdup_asize
)
1864 if (LJ_LIKELY(J
->flags
& JIT_F_OPT_FOLD
) && lj_opt_fwd_tptr(J
, fins
->op1
))
1865 return INTFOLD((int32_t)ir_ktab(IR(fleft
->op1
))->asize
);
1869 LJFOLD(FLOAD TDUP IRFL_TAB_HMASK
)
1870 LJFOLDF(fload_tab_tdup_hmask
)
1872 if (LJ_LIKELY(J
->flags
& JIT_F_OPT_FOLD
) && lj_opt_fwd_tptr(J
, fins
->op1
))
1873 return INTFOLD((int32_t)ir_ktab(IR(fleft
->op1
))->hmask
);
1877 LJFOLD(HREF any any
)
1878 LJFOLD(FLOAD any IRFL_TAB_ARRAY
)
1879 LJFOLD(FLOAD any IRFL_TAB_NODE
)
1880 LJFOLD(FLOAD any IRFL_TAB_ASIZE
)
1881 LJFOLD(FLOAD any IRFL_TAB_HMASK
)
1882 LJFOLDF(fload_tab_ah
)
1884 TRef tr
= lj_opt_cse(J
);
1885 return lj_opt_fwd_tptr(J
, tref_ref(tr
)) ? tr
: EMITFOLD
;
1888 /* Strings are immutable, so we can safely FOLD/CSE the related FLOAD. */
1889 LJFOLD(FLOAD KGC IRFL_STR_LEN
)
1890 LJFOLDF(fload_str_len_kgc
)
1892 if (LJ_LIKELY(J
->flags
& JIT_F_OPT_FOLD
))
1893 return INTFOLD((int32_t)ir_kstr(fleft
)->len
);
1897 LJFOLD(FLOAD SNEW IRFL_STR_LEN
)
1898 LJFOLDF(fload_str_len_snew
)
1900 if (LJ_LIKELY(J
->flags
& JIT_F_OPT_FOLD
)) {
1907 /* The C type ID of cdata objects is immutable. */
1908 LJFOLD(FLOAD KGC IRFL_CDATA_TYPEID
)
1909 LJFOLDF(fload_cdata_typeid_kgc
)
1911 if (LJ_LIKELY(J
->flags
& JIT_F_OPT_FOLD
))
1912 return INTFOLD((int32_t)ir_kcdata(fleft
)->typeid);
1916 /* Get the contents of immutable cdata objects. */
1917 LJFOLD(FLOAD KGC IRFL_CDATA_PTR
)
1918 LJFOLD(FLOAD KGC IRFL_CDATA_INT64
)
1919 LJFOLDF(fload_cdata_int64_kgc
)
1921 if (LJ_LIKELY(J
->flags
& JIT_F_OPT_FOLD
)) {
1922 void *p
= cdataptr(ir_kcdata(fleft
));
1923 if (irt_is64(fins
->t
))
1924 return INT64FOLD(*(uint64_t *)p
);
1926 return INTFOLD(*(int32_t *)p
);
1931 LJFOLD(FLOAD CNEW IRFL_CDATA_TYPEID
)
1932 LJFOLD(FLOAD CNEWI IRFL_CDATA_TYPEID
)
1933 LJFOLDF(fload_cdata_typeid_cnew
)
1935 if (LJ_LIKELY(J
->flags
& JIT_F_OPT_FOLD
))
1936 return fleft
->op1
; /* No PHI barrier needed. CNEW/CNEWI op1 is const. */
1940 /* Pointer and int64 cdata objects are immutable. */
1941 LJFOLD(FLOAD CNEWI IRFL_CDATA_PTR
)
1942 LJFOLD(FLOAD CNEWI IRFL_CDATA_INT64
)
1943 LJFOLDF(fload_cdata_ptr_int64_cnew
)
1945 if (LJ_LIKELY(J
->flags
& JIT_F_OPT_FOLD
))
1946 return fleft
->op2
; /* Fold even across PHI to avoid allocations. */
1950 LJFOLD(FLOAD any IRFL_STR_LEN
)
1951 LJFOLD(FLOAD any IRFL_CDATA_TYPEID
)
1952 LJFOLD(FLOAD any IRFL_CDATA_PTR
)
1953 LJFOLD(FLOAD any IRFL_CDATA_INT64
)
1954 LJFOLD(VLOAD any any
) /* Vararg loads have no corresponding stores. */
1957 /* All other field loads need alias analysis. */
1958 LJFOLD(FLOAD any any
)
1959 LJFOLDX(lj_opt_fwd_fload
)
1961 /* This is for LOOP only. Recording handles SLOADs internally. */
1962 LJFOLD(SLOAD any any
)
1965 if ((fins
->op2
& IRSLOAD_FRAME
)) {
1966 TRef tr
= lj_opt_cse(J
);
1967 return tref_ref(tr
) < J
->chain
[IR_RETF
] ? EMITFOLD
: tr
;
1969 lua_assert(J
->slot
[fins
->op1
] != 0);
1970 return J
->slot
[fins
->op1
];
1974 /* Only fold for KKPTR. The pointer _and_ the contents must be const. */
1975 LJFOLD(XLOAD KKPTR any
)
1978 TRef tr
= kfold_xload(J
, fins
, ir_kptr(fleft
));
1979 return tr
? tr
: NEXTFOLD
;
1982 LJFOLD(XLOAD any any
)
1983 LJFOLDX(lj_opt_fwd_xload
)
1985 /* -- Write barriers ------------------------------------------------------ */
1987 /* Write barriers are amenable to CSE, but not across any incremental
1990 ** The same logic applies to open upvalue references, because a stack
1991 ** may be resized during a GC step (not the current stack, but maybe that
1995 LJFOLD(OBAR any any
)
1996 LJFOLD(UREFO any any
)
1997 LJFOLDF(barrier_tab
)
1999 TRef tr
= lj_opt_cse(J
);
2000 if (gcstep_barrier(J
, tref_ref(tr
))) /* CSE across GC step? */
2001 return EMITFOLD
; /* Raw emit. Assumes fins is left intact by CSE. */
2007 LJFOLDF(barrier_tnew_tdup
)
2009 /* New tables are always white and never need a barrier. */
2010 if (fins
->op1
< J
->chain
[IR_LOOP
]) /* Except across a GC step. */
2015 /* -- Stores and allocations ---------------------------------------------- */
2017 /* Stores and allocations cannot be folded or passed on to CSE in general.
2018 ** But some stores can be eliminated with dead-store elimination (DSE).
2020 ** Caveat: *all* stores and allocs must be listed here or they end up at CSE!
2023 LJFOLD(ASTORE any any
)
2024 LJFOLD(HSTORE any any
)
2025 LJFOLDX(lj_opt_dse_ahstore
)
2027 LJFOLD(USTORE any any
)
2028 LJFOLDX(lj_opt_dse_ustore
)
2030 LJFOLD(FSTORE any any
)
2031 LJFOLDX(lj_opt_dse_fstore
)
2033 LJFOLD(XSTORE any any
)
2034 LJFOLDX(lj_opt_dse_xstore
)
2036 LJFOLD(NEWREF any any
) /* Treated like a store. */
2037 LJFOLD(CALLS any any
)
2038 LJFOLD(CALLL any any
) /* Safeguard fallback. */
2039 LJFOLD(CALLXS any any
)
2041 LJFOLD(RETF any any
) /* Modifies BASE. */
2042 LJFOLD(TNEW any any
)
2044 LJFOLD(CNEW any any
)
2045 LJFOLD(XSNEW any any
)
2048 /* ------------------------------------------------------------------------ */
2050 /* Every entry in the generated hash table is a 32 bit pattern:
2052 ** xxxxxxxx iiiiiii lllllll rrrrrrrrrr
2054 ** xxxxxxxx = 8 bit index into fold function table
2055 ** iiiiiii = 7 bit folded instruction opcode
2056 ** lllllll = 7 bit left instruction opcode
2057 ** rrrrrrrrrr = 8 bit right instruction opcode or 10 bits from literal field
2060 #include "lj_folddef.h"
2062 /* ------------------------------------------------------------------------ */
2064 /* Fold IR instruction. */
2065 TRef LJ_FASTCALL
lj_opt_fold(jit_State
*J
)
2070 if (LJ_UNLIKELY((J
->flags
& JIT_F_OPT_MASK
) != JIT_F_OPT_DEFAULT
)) {
2071 lua_assert(((JIT_F_OPT_FOLD
|JIT_F_OPT_FWD
|JIT_F_OPT_CSE
|JIT_F_OPT_DSE
) |
2072 JIT_F_OPT_DEFAULT
) == JIT_F_OPT_DEFAULT
);
2073 /* Folding disabled? Chain to CSE, but not for loads/stores/allocs. */
2074 if (!(J
->flags
& JIT_F_OPT_FOLD
) && irm_kind(lj_ir_mode
[fins
->o
]) == IRM_N
)
2075 return lj_opt_cse(J
);
2077 /* Forwarding or CSE disabled? Emit raw IR for loads, except for SLOAD. */
2078 if ((J
->flags
& (JIT_F_OPT_FWD
|JIT_F_OPT_CSE
)) !=
2079 (JIT_F_OPT_FWD
|JIT_F_OPT_CSE
) &&
2080 irm_kind(lj_ir_mode
[fins
->o
]) == IRM_L
&& fins
->o
!= IR_SLOAD
)
2081 return lj_ir_emit(J
);
2083 /* DSE disabled? Emit raw IR for stores. */
2084 if (!(J
->flags
& JIT_F_OPT_DSE
) && irm_kind(lj_ir_mode
[fins
->o
]) == IRM_S
)
2085 return lj_ir_emit(J
);
2088 /* Fold engine start/retry point. */
2090 /* Construct key from opcode and operand opcodes (unless literal/none). */
2091 key
= ((uint32_t)fins
->o
<< 17);
2092 if (fins
->op1
>= J
->cur
.nk
) {
2093 key
+= (uint32_t)IR(fins
->op1
)->o
<< 10;
2094 *fleft
= *IR(fins
->op1
);
2096 if (fins
->op2
>= J
->cur
.nk
) {
2097 key
+= (uint32_t)IR(fins
->op2
)->o
;
2098 *fright
= *IR(fins
->op2
);
2100 key
+= (fins
->op2
& 0x3ffu
); /* Literal mask. Must include IRCONV_*MASK. */
2103 /* Check for a match in order from most specific to least specific. */
2106 uint32_t k
= key
| (any
& 0x1ffff);
2107 uint32_t h
= fold_hashkey(k
);
2108 uint32_t fh
= fold_hash
[h
]; /* Lookup key in semi-perfect hash table. */
2109 if ((fh
& 0xffffff) == k
|| (fh
= fold_hash
[h
+1], (fh
& 0xffffff) == k
)) {
2110 ref
= (IRRef
)tref_ref(fold_func
[fh
>> 24](J
));
2111 if (ref
!= NEXTFOLD
)
2114 if (any
== 0xfffff) /* Exhausted folding. Pass on to CSE. */
2115 return lj_opt_cse(J
);
2116 any
= (any
| (any
>> 10)) ^ 0xffc00;
2119 /* Return value processing, ordered by frequency. */
2120 if (LJ_LIKELY(ref
>= MAX_FOLD
))
2121 return TREF(ref
, irt_t(IR(ref
)->t
));
2122 if (ref
== RETRYFOLD
)
2124 if (ref
== KINTFOLD
)
2125 return lj_ir_kint(J
, fins
->i
);
2126 if (ref
== FAILFOLD
)
2127 lj_trace_err(J
, LJ_TRERR_GFAIL
);
2128 lua_assert(ref
== DROPFOLD
);
2132 /* -- Common-Subexpression Elimination ------------------------------------ */
2134 /* CSE an IR instruction. This is very fast due to the skip-list chains. */
2135 TRef LJ_FASTCALL
lj_opt_cse(jit_State
*J
)
2137 /* Avoid narrow to wide store-to-load forwarding stall */
2138 IRRef2 op12
= (IRRef2
)fins
->op1
+ ((IRRef2
)fins
->op2
<< 16);
2140 if (LJ_LIKELY(J
->flags
& JIT_F_OPT_CSE
)) {
2141 /* Limited search for same operands in per-opcode chain. */
2142 IRRef ref
= J
->chain
[op
];
2143 IRRef lim
= fins
->op1
;
2144 if (fins
->op2
> lim
) lim
= fins
->op2
; /* Relies on lit < REF_BIAS. */
2146 if (IR(ref
)->op12
== op12
)
2147 return TREF(ref
, irt_t(IR(ref
)->t
)); /* Common subexpression found. */
2148 ref
= IR(ref
)->prev
;
2151 /* Otherwise emit IR (inlined for speed). */
2153 IRRef ref
= lj_ir_nextins(J
);
2154 IRIns
*ir
= IR(ref
);
2155 ir
->prev
= J
->chain
[op
];
2157 J
->chain
[op
] = (IRRef1
)ref
;
2159 J
->guardemit
.irt
|= fins
->t
.irt
;
2160 return TREF(ref
, irt_t((ir
->t
= fins
->t
)));
2164 /* CSE with explicit search limit. */
2165 TRef LJ_FASTCALL
lj_opt_cselim(jit_State
*J
, IRRef lim
)
2167 IRRef ref
= J
->chain
[fins
->o
];
2168 IRRef2 op12
= (IRRef2
)fins
->op1
+ ((IRRef2
)fins
->op2
<< 16);
2170 if (IR(ref
)->op12
== op12
)
2172 ref
= IR(ref
)->prev
;
2174 return lj_ir_emit(J
);
2177 /* ------------------------------------------------------------------------ */