Add support for WIN64 exception handling to external unwinder.
[luajit-2.0/celess22.git] / src / lj_opt_fold.c
blob98266d21e03aecfbc4738f957129b7271dd320d2
1 /*
2 ** FOLD: Constant Folding, Algebraic Simplifications and Reassociation.
3 ** CSE: Common-Subexpression Elimination.
4 ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
5 */
7 #define lj_opt_fold_c
8 #define LUA_CORE
10 #include "lj_obj.h"
12 #if LJ_HASJIT
14 #include "lj_str.h"
15 #include "lj_ir.h"
16 #include "lj_jit.h"
17 #include "lj_iropt.h"
18 #include "lj_trace.h"
19 #include "lj_vm.h"
21 /* Here's a short description how the FOLD engine processes instructions:
23 ** The FOLD engine receives a single instruction stored in fins (J->fold.ins).
24 ** The instruction and its operands are used to select matching fold rules.
25 ** These are applied iteratively until a fixed point is reached.
27 ** The 8 bit opcode of the instruction itself plus the opcodes of the
28 ** two instructions referenced by its operands form a 24 bit key
29 ** 'ins left right' (unused operands -> 0, literals -> lowest 8 bits).
31 ** This key is used for partial matching against the fold rules. The
32 ** left/right operand fields of the key are successively masked with
33 ** the 'any' wildcard, from most specific to least specific:
35 ** ins left right
36 ** ins any right
37 ** ins left any
38 ** ins any any
40 ** The masked key is used to lookup a matching fold rule in a semi-perfect
41 ** hash table. If a matching rule is found, the related fold function is run.
42 ** Multiple rules can share the same fold function. A fold rule may return
43 ** one of several special values:
45 ** - NEXTFOLD means no folding was applied, because an additional test
46 ** inside the fold function failed. Matching continues against less
47 ** specific fold rules. Finally the instruction is passed on to CSE.
49 ** - RETRYFOLD means the instruction was modified in-place. Folding is
50 ** retried as if this instruction had just been received.
52 ** All other return values are terminal actions -- no further folding is
53 ** applied:
55 ** - INTFOLD(i) returns a reference to the integer constant i.
57 ** - LEFTFOLD and RIGHTFOLD return the left/right operand reference
58 ** without emitting an instruction.
60 ** - CSEFOLD and EMITFOLD pass the instruction directly to CSE or emit
61 ** it without passing through any further optimizations.
63 ** - FAILFOLD, DROPFOLD and CONDFOLD only apply to instructions which have
64 ** no result (e.g. guarded assertions): FAILFOLD means the guard would
65 ** always fail, i.e. the current trace is pointless. DROPFOLD means
66 ** the guard is always true and has been eliminated. CONDFOLD is a
67 ** shortcut for FAILFOLD + cond (i.e. drop if true, otherwise fail).
69 ** - Any other return value is interpreted as an IRRef or TRef. This
70 ** can be a reference to an existing or a newly created instruction.
71 ** Only the least-significant 16 bits (IRRef1) are used to form a TRef
72 ** which is finally returned to the caller.
74 ** The FOLD engine receives instructions both from the trace recorder and
75 ** substituted instructions from LOOP unrolling. This means all types
76 ** of instructions may end up here, even though the recorder bypasses
77 ** FOLD in some cases. Thus all loads, stores and allocations must have
78 ** an any/any rule to avoid being passed on to CSE.
80 ** Carefully read the following requirements before adding or modifying
81 ** any fold rules:
83 ** Requirement #1: All fold rules must preserve their destination type.
85 ** Consistently use INTFOLD() (KINT result) or lj_ir_knum() (KNUM result).
86 ** Never use lj_ir_knumint() which can have either a KINT or KNUM result.
88 ** Requirement #2: Fold rules should not create *new* instructions which
89 ** reference operands *across* PHIs.
91 ** E.g. a RETRYFOLD with 'fins->op1 = fleft->op1' is invalid if the
92 ** left operand is a PHI. Then fleft->op1 would point across the PHI
93 ** frontier to an invariant instruction. Adding a PHI for this instruction
94 ** would be counterproductive. The solution is to add a barrier which
95 ** prevents folding across PHIs, i.e. 'PHIBARRIER(fleft)' in this case.
96 ** The only exception is for recurrences with high latencies like
97 ** repeated int->num->int conversions.
99 ** One could relax this condition a bit if the referenced instruction is
100 ** a PHI, too. But this often leads to worse code due to excessive
101 ** register shuffling.
103 ** Note: returning *existing* instructions (e.g. LEFTFOLD) is ok, though.
104 ** Even returning fleft->op1 would be ok, because a new PHI will added,
105 ** if needed. But again, this leads to excessive register shuffling and
106 ** should be avoided.
108 ** Requirement #3: The set of all fold rules must be monotonic to guarantee
109 ** termination.
111 ** The goal is optimization, so one primarily wants to add strength-reducing
112 ** rules. This means eliminating an instruction or replacing an instruction
113 ** with one or more simpler instructions. Don't add fold rules which point
114 ** into the other direction.
116 ** Some rules (like commutativity) do not directly reduce the strength of
117 ** an instruction, but enable other fold rules (e.g. by moving constants
118 ** to the right operand). These rules must be made unidirectional to avoid
119 ** cycles.
121 ** Rule of thumb: the trace recorder expands the IR and FOLD shrinks it.
124 /* Some local macros to save typing. Undef'd at the end. */
125 #define IR(ref) (&J->cur.ir[(ref)])
126 #define fins (&J->fold.ins)
127 #define fleft (&J->fold.left)
128 #define fright (&J->fold.right)
129 #define knumleft (ir_knum(fleft)->n)
130 #define knumright (ir_knum(fright)->n)
132 /* Pass IR on to next optimization in chain (FOLD). */
133 #define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
135 /* Fold function type. Fastcall on x86 significantly reduces their size. */
136 typedef IRRef (LJ_FASTCALL *FoldFunc)(jit_State *J);
138 /* Macros for the fold specs, so buildvm can recognize them. */
139 #define LJFOLD(x)
140 #define LJFOLDX(x)
141 #define LJFOLDF(name) static TRef LJ_FASTCALL fold_##name(jit_State *J)
142 /* Note: They must be at the start of a line or buildvm ignores them! */
144 /* Barrier to prevent using operands across PHIs. */
145 #define PHIBARRIER(ir) if (irt_isphi((ir)->t)) return NEXTFOLD
147 /* Barrier to prevent folding across a GC step.
148 ** GC steps can only happen at the head of a trace and at LOOP.
149 ** And the GC is only driven forward if there is at least one allocation.
151 #define gcstep_barrier(J, ref) \
152 ((ref) < J->chain[IR_LOOP] && \
153 (J->chain[IR_TNEW] || J->chain[IR_TDUP] || \
154 J->chain[IR_SNEW] || J->chain[IR_TOSTR]))
156 /* -- Constant folding ---------------------------------------------------- */
158 LJFOLD(ADD KNUM KNUM)
159 LJFOLD(SUB KNUM KNUM)
160 LJFOLD(MUL KNUM KNUM)
161 LJFOLD(DIV KNUM KNUM)
162 LJFOLD(NEG KNUM KNUM)
163 LJFOLD(ABS KNUM KNUM)
164 LJFOLD(ATAN2 KNUM KNUM)
165 LJFOLD(LDEXP KNUM KNUM)
166 LJFOLD(MIN KNUM KNUM)
167 LJFOLD(MAX KNUM KNUM)
168 LJFOLDF(kfold_numarith)
170 lua_Number a = knumleft;
171 lua_Number b = knumright;
172 lua_Number y = lj_vm_foldarith(a, b, fins->o - IR_ADD);
173 return lj_ir_knum(J, y);
176 LJFOLD(FPMATH KNUM any)
177 LJFOLDF(kfold_fpmath)
179 lua_Number a = knumleft;
180 lua_Number y = lj_vm_foldfpm(a, fins->op2);
181 return lj_ir_knum(J, y);
184 LJFOLD(POWI KNUM KINT)
185 LJFOLDF(kfold_powi)
187 lua_Number a = knumleft;
188 lua_Number b = cast_num(fright->i);
189 lua_Number y = lj_vm_foldarith(a, b, IR_POWI - IR_ADD);
190 return lj_ir_knum(J, y);
193 static int32_t kfold_intop(int32_t k1, int32_t k2, IROp op)
195 switch (op) {
196 case IR_ADD: k1 += k2; break;
197 case IR_SUB: k1 -= k2; break;
198 case IR_BAND: k1 &= k2; break;
199 case IR_BOR: k1 |= k2; break;
200 case IR_BXOR: k1 ^= k2; break;
201 case IR_BSHL: k1 <<= (k2 & 31); break;
202 case IR_BSHR: k1 = (int32_t)((uint32_t)k1 >> (k2 & 31)); break;
203 case IR_BSAR: k1 >>= (k2 & 31); break;
204 case IR_BROL: k1 = (int32_t)lj_rol((uint32_t)k1, (k2 & 31)); break;
205 case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 31)); break;
206 default: lua_assert(0); break;
208 return k1;
211 LJFOLD(ADD KINT KINT)
212 LJFOLD(SUB KINT KINT)
213 LJFOLD(BAND KINT KINT)
214 LJFOLD(BOR KINT KINT)
215 LJFOLD(BXOR KINT KINT)
216 LJFOLD(BSHL KINT KINT)
217 LJFOLD(BSHR KINT KINT)
218 LJFOLD(BSAR KINT KINT)
219 LJFOLD(BROL KINT KINT)
220 LJFOLD(BROR KINT KINT)
221 LJFOLDF(kfold_intarith)
223 return INTFOLD(kfold_intop(fleft->i, fright->i, (IROp)fins->o));
226 LJFOLD(BNOT KINT)
227 LJFOLDF(kfold_bnot)
229 return INTFOLD(~fleft->i);
232 LJFOLD(BSWAP KINT)
233 LJFOLDF(kfold_bswap)
235 return INTFOLD((int32_t)lj_bswap((uint32_t)fleft->i));
238 LJFOLD(TONUM KINT)
239 LJFOLDF(kfold_tonum)
241 return lj_ir_knum(J, cast_num(fleft->i));
244 LJFOLD(TOBIT KNUM KNUM)
245 LJFOLDF(kfold_tobit)
247 TValue tv;
248 tv.n = knumleft + knumright;
249 return INTFOLD((int32_t)tv.u32.lo);
252 LJFOLD(TOINT KNUM any)
253 LJFOLDF(kfold_toint)
255 lua_Number n = knumleft;
256 int32_t k = lj_num2int(n);
257 if (irt_isguard(fins->t) && n != cast_num(k)) {
258 /* We're about to create a guard which always fails, like TOINT +1.5.
259 ** Some pathological loops cause this during LICM, e.g.:
260 ** local x,k,t = 0,1.5,{1,[1.5]=2}
261 ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end
262 ** assert(x == 300)
264 return FAILFOLD;
266 return INTFOLD(k);
269 LJFOLD(TOSTR KNUM)
270 LJFOLDF(kfold_tostr_knum)
272 return lj_ir_kstr(J, lj_str_fromnum(J->L, &knumleft));
275 LJFOLD(TOSTR KINT)
276 LJFOLDF(kfold_tostr_kint)
278 return lj_ir_kstr(J, lj_str_fromint(J->L, fleft->i));
281 LJFOLD(STRTO KGC)
282 LJFOLDF(kfold_strto)
284 TValue n;
285 if (lj_str_tonum(ir_kstr(fleft), &n))
286 return lj_ir_knum(J, numV(&n));
287 return FAILFOLD;
290 LJFOLD(SNEW KPTR KINT)
291 LJFOLDF(kfold_snew_kptr)
293 GCstr *s = lj_str_new(J->L, (const char *)ir_kptr(fleft), (size_t)fright->i);
294 return lj_ir_kstr(J, s);
297 LJFOLD(SNEW any KINT)
298 LJFOLDF(kfold_snew_empty)
300 if (fright->i == 0)
301 return lj_ir_kstr(J, lj_str_new(J->L, "", 0));
302 return NEXTFOLD;
305 LJFOLD(STRREF KGC KINT)
306 LJFOLDF(kfold_strref)
308 GCstr *str = ir_kstr(fleft);
309 lua_assert((MSize)fright->i < str->len);
310 return lj_ir_kptr(J, (char *)strdata(str) + fright->i);
313 LJFOLD(STRREF SNEW any)
314 LJFOLDF(kfold_strref_snew)
316 PHIBARRIER(fleft);
317 if (irref_isk(fins->op2) && fright->i == 0) {
318 return fleft->op1; /* strref(snew(ptr, len), 0) ==> ptr */
319 } else {
320 /* Reassociate: strref(snew(strref(str, a), len), b) ==> strref(str, a+b) */
321 IRIns *ir = IR(fleft->op1);
322 IRRef1 str = ir->op1; /* IRIns * is not valid across emitir. */
323 lua_assert(ir->o == IR_STRREF);
324 PHIBARRIER(ir);
325 fins->op2 = emitir(IRTI(IR_ADD), ir->op2, fins->op2); /* Clobbers fins! */
326 fins->op1 = str;
327 fins->ot = IRT(IR_STRREF, IRT_PTR);
328 return RETRYFOLD;
330 return NEXTFOLD;
333 /* Must not use kfold_kref for numbers (could be NaN). */
334 LJFOLD(EQ KNUM KNUM)
335 LJFOLD(NE KNUM KNUM)
336 LJFOLD(LT KNUM KNUM)
337 LJFOLD(GE KNUM KNUM)
338 LJFOLD(LE KNUM KNUM)
339 LJFOLD(GT KNUM KNUM)
340 LJFOLD(ULT KNUM KNUM)
341 LJFOLD(UGE KNUM KNUM)
342 LJFOLD(ULE KNUM KNUM)
343 LJFOLD(UGT KNUM KNUM)
344 LJFOLDF(kfold_numcomp)
346 return CONDFOLD(lj_ir_numcmp(knumleft, knumright, (IROp)fins->o));
349 LJFOLD(LT KINT KINT)
350 LJFOLD(GE KINT KINT)
351 LJFOLD(LE KINT KINT)
352 LJFOLD(GT KINT KINT)
353 LJFOLD(ULT KINT KINT)
354 LJFOLD(UGE KINT KINT)
355 LJFOLD(ULE KINT KINT)
356 LJFOLD(UGT KINT KINT)
357 LJFOLD(ABC KINT KINT)
358 LJFOLDF(kfold_intcomp)
360 int32_t a = fleft->i, b = fright->i;
361 switch ((IROp)fins->o) {
362 case IR_LT: return CONDFOLD(a < b);
363 case IR_GE: return CONDFOLD(a >= b);
364 case IR_LE: return CONDFOLD(a <= b);
365 case IR_GT: return CONDFOLD(a > b);
366 case IR_ULT: return CONDFOLD((uint32_t)a < (uint32_t)b);
367 case IR_UGE: return CONDFOLD((uint32_t)a >= (uint32_t)b);
368 case IR_ULE: return CONDFOLD((uint32_t)a <= (uint32_t)b);
369 case IR_ABC:
370 case IR_UGT: return CONDFOLD((uint32_t)a > (uint32_t)b);
371 default: lua_assert(0); return FAILFOLD;
375 LJFOLD(CALLN CARG IRCALL_lj_str_cmp)
376 LJFOLDF(kfold_strcmp)
378 if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) {
379 GCstr *a = ir_kstr(IR(fleft->op1));
380 GCstr *b = ir_kstr(IR(fleft->op2));
381 return INTFOLD(lj_str_cmp(a, b));
383 return NEXTFOLD;
386 /* Don't constant-fold away FLOAD checks against KNULL. */
387 LJFOLD(EQ FLOAD KNULL)
388 LJFOLD(NE FLOAD KNULL)
389 LJFOLDX(lj_opt_cse)
391 /* But fold all other KNULL compares, since only KNULL is equal to KNULL. */
392 LJFOLD(EQ any KNULL)
393 LJFOLD(NE any KNULL)
394 LJFOLD(EQ KNULL any)
395 LJFOLD(NE KNULL any)
396 LJFOLD(EQ KINT KINT) /* Constants are unique, so same refs <==> same value. */
397 LJFOLD(NE KINT KINT)
398 LJFOLD(EQ KGC KGC)
399 LJFOLD(NE KGC KGC)
400 LJFOLDF(kfold_kref)
402 return CONDFOLD((fins->op1 == fins->op2) ^ (fins->o == IR_NE));
405 /* -- Algebraic shortcuts ------------------------------------------------- */
407 LJFOLD(FPMATH FPMATH IRFPM_FLOOR)
408 LJFOLD(FPMATH FPMATH IRFPM_CEIL)
409 LJFOLD(FPMATH FPMATH IRFPM_TRUNC)
410 LJFOLDF(shortcut_round)
412 IRFPMathOp op = (IRFPMathOp)fleft->op2;
413 if (op == IRFPM_FLOOR || op == IRFPM_CEIL || op == IRFPM_TRUNC)
414 return LEFTFOLD; /* round(round_left(x)) = round_left(x) */
415 return NEXTFOLD;
418 LJFOLD(FPMATH TONUM IRFPM_FLOOR)
419 LJFOLD(FPMATH TONUM IRFPM_CEIL)
420 LJFOLD(FPMATH TONUM IRFPM_TRUNC)
421 LJFOLD(ABS ABS KNUM)
422 LJFOLDF(shortcut_left)
424 return LEFTFOLD; /* f(g(x)) ==> g(x) */
427 LJFOLD(ABS NEG KNUM)
428 LJFOLDF(shortcut_dropleft)
430 PHIBARRIER(fleft);
431 fins->op1 = fleft->op1; /* abs(neg(x)) ==> abs(x) */
432 return RETRYFOLD;
435 /* Note: no safe shortcuts with STRTO and TOSTR ("1e2" ==> +100 ==> "100"). */
436 LJFOLD(NEG NEG KNUM)
437 LJFOLD(BNOT BNOT)
438 LJFOLD(BSWAP BSWAP)
439 LJFOLDF(shortcut_leftleft)
441 PHIBARRIER(fleft); /* See above. Fold would be ok, but not beneficial. */
442 return fleft->op1; /* f(g(x)) ==> x */
445 LJFOLD(TONUM TOINT)
446 LJFOLDF(shortcut_leftleft_toint)
448 PHIBARRIER(fleft);
449 if (irt_isguard(fleft->t)) /* Only safe with a guarded TOINT. */
450 return fleft->op1; /* f(g(x)) ==> x */
451 return NEXTFOLD;
454 LJFOLD(TOINT TONUM any)
455 LJFOLD(TOBIT TONUM KNUM) /* The inverse must NOT be shortcut! */
456 LJFOLDF(shortcut_leftleft_across_phi)
458 /* Fold even across PHI to avoid expensive int->num->int conversions. */
459 return fleft->op1; /* f(g(x)) ==> x */
462 /* -- FP algebraic simplifications ---------------------------------------- */
464 /* FP arithmetic is tricky -- there's not much to simplify.
465 ** Please note the following common pitfalls before sending "improvements":
466 ** x+0 ==> x is INVALID for x=-0
467 ** 0-x ==> -x is INVALID for x=+0
468 ** x*0 ==> 0 is INVALID for x=-0, x=+-Inf or x=NaN
471 LJFOLD(ADD NEG any)
472 LJFOLDF(simplify_numadd_negx)
474 PHIBARRIER(fleft);
475 fins->o = IR_SUB; /* (-a) + b ==> b - a */
476 fins->op1 = fins->op2;
477 fins->op2 = fleft->op1;
478 return RETRYFOLD;
481 LJFOLD(ADD any NEG)
482 LJFOLDF(simplify_numadd_xneg)
484 PHIBARRIER(fright);
485 fins->o = IR_SUB; /* a + (-b) ==> a - b */
486 fins->op2 = fright->op1;
487 return RETRYFOLD;
490 LJFOLD(SUB any KNUM)
491 LJFOLDF(simplify_numsub_k)
493 lua_Number n = knumright;
494 if (n == 0.0) /* x - (+-0) ==> x */
495 return LEFTFOLD;
496 return NEXTFOLD;
499 LJFOLD(SUB NEG KNUM)
500 LJFOLDF(simplify_numsub_negk)
502 PHIBARRIER(fleft);
503 fins->op2 = fleft->op1; /* (-x) - k ==> (-k) - x */
504 fins->op1 = (IRRef1)lj_ir_knum(J, -knumright);
505 return RETRYFOLD;
508 LJFOLD(SUB any NEG)
509 LJFOLDF(simplify_numsub_xneg)
511 PHIBARRIER(fright);
512 fins->o = IR_ADD; /* a - (-b) ==> a + b */
513 fins->op2 = fright->op1;
514 return RETRYFOLD;
517 LJFOLD(MUL any KNUM)
518 LJFOLD(DIV any KNUM)
519 LJFOLDF(simplify_nummuldiv_k)
521 lua_Number n = knumright;
522 if (n == 1.0) { /* x o 1 ==> x */
523 return LEFTFOLD;
524 } else if (n == -1.0) { /* x o -1 ==> -x */
525 fins->o = IR_NEG;
526 fins->op2 = (IRRef1)lj_ir_knum_neg(J);
527 return RETRYFOLD;
528 } else if (fins->o == IR_MUL && n == 2.0) { /* x * 2 ==> x + x */
529 fins->o = IR_ADD;
530 fins->op2 = fins->op1;
531 return RETRYFOLD;
533 return NEXTFOLD;
536 LJFOLD(MUL NEG KNUM)
537 LJFOLD(DIV NEG KNUM)
538 LJFOLDF(simplify_nummuldiv_negk)
540 PHIBARRIER(fleft);
541 fins->op1 = fleft->op1; /* (-a) o k ==> a o (-k) */
542 fins->op2 = (IRRef1)lj_ir_knum(J, -knumright);
543 return RETRYFOLD;
546 LJFOLD(MUL NEG NEG)
547 LJFOLD(DIV NEG NEG)
548 LJFOLDF(simplify_nummuldiv_negneg)
550 PHIBARRIER(fleft);
551 PHIBARRIER(fright);
552 fins->op1 = fleft->op1; /* (-a) o (-b) ==> a o b */
553 fins->op2 = fright->op1;
554 return RETRYFOLD;
557 LJFOLD(POWI any KINT)
558 LJFOLDF(simplify_powi_xk)
560 int32_t k = fright->i;
561 TRef ref = fins->op1;
562 if (k == 0) /* x ^ 0 ==> 1 */
563 return lj_ir_knum_one(J); /* Result must be a number, not an int. */
564 if (k == 1) /* x ^ 1 ==> x */
565 return LEFTFOLD;
566 if ((uint32_t)(k+65536) > 2*65536u) /* Limit code explosion. */
567 return NEXTFOLD;
568 if (k < 0) { /* x ^ (-k) ==> (1/x) ^ k. */
569 ref = emitir(IRTN(IR_DIV), lj_ir_knum_one(J), ref);
570 k = -k;
572 /* Unroll x^k for 1 <= k <= 65536. */
573 for (; (k & 1) == 0; k >>= 1) /* Handle leading zeros. */
574 ref = emitir(IRTN(IR_MUL), ref, ref);
575 if ((k >>= 1) != 0) { /* Handle trailing bits. */
576 TRef tmp = emitir(IRTN(IR_MUL), ref, ref);
577 for (; k != 1; k >>= 1) {
578 if (k & 1)
579 ref = emitir(IRTN(IR_MUL), ref, tmp);
580 tmp = emitir(IRTN(IR_MUL), tmp, tmp);
582 ref = emitir(IRTN(IR_MUL), ref, tmp);
584 return ref;
587 LJFOLD(POWI KNUM any)
588 LJFOLDF(simplify_powi_kx)
590 lua_Number n = knumleft;
591 if (n == 2.0) { /* 2.0 ^ i ==> ldexp(1.0, tonum(i)) */
592 fins->o = IR_TONUM;
593 fins->op1 = fins->op2;
594 fins->op2 = 0;
595 fins->op2 = (IRRef1)lj_opt_fold(J);
596 fins->op1 = (IRRef1)lj_ir_knum_one(J);
597 fins->o = IR_LDEXP;
598 return RETRYFOLD;
600 return NEXTFOLD;
603 /* -- FP conversion narrowing --------------------------------------------- */
605 LJFOLD(TOINT ADD any)
606 LJFOLD(TOINT SUB any)
607 LJFOLD(TOBIT ADD KNUM)
608 LJFOLD(TOBIT SUB KNUM)
609 LJFOLDF(narrow_convert)
611 PHIBARRIER(fleft);
612 /* Narrowing ignores PHIs and repeating it inside the loop is not useful. */
613 if (J->chain[IR_LOOP])
614 return NEXTFOLD;
615 return lj_opt_narrow_convert(J);
618 /* Relaxed CSE rule for TOINT allows commoning with stronger checks, too. */
619 LJFOLD(TOINT any any)
620 LJFOLDF(cse_toint)
622 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
623 IRRef ref, op1 = fins->op1;
624 uint8_t guard = irt_isguard(fins->t);
625 for (ref = J->chain[IR_TOINT]; ref > op1; ref = IR(ref)->prev)
626 if (IR(ref)->op1 == op1 && irt_isguard(IR(ref)->t) >= guard)
627 return ref;
629 return EMITFOLD; /* No fallthrough to regular CSE. */
632 /* -- Integer algebraic simplifications ----------------------------------- */
634 LJFOLD(ADD any KINT)
635 LJFOLD(ADDOV any KINT)
636 LJFOLD(SUBOV any KINT)
637 LJFOLDF(simplify_intadd_k)
639 if (fright->i == 0) /* i o 0 ==> i */
640 return LEFTFOLD;
641 return NEXTFOLD;
644 LJFOLD(SUB any KINT)
645 LJFOLDF(simplify_intsub_k)
647 if (fright->i == 0) /* i - 0 ==> i */
648 return LEFTFOLD;
649 fins->o = IR_ADD; /* i - k ==> i + (-k) */
650 fins->op2 = (IRRef1)lj_ir_kint(J, -fright->i); /* Overflow for -2^31 ok. */
651 return RETRYFOLD;
654 LJFOLD(SUB any any)
655 LJFOLD(SUBOV any any)
656 LJFOLDF(simplify_intsub)
658 if (fins->op1 == fins->op2 && !irt_isnum(fins->t)) /* i - i ==> 0 */
659 return INTFOLD(0);
660 return NEXTFOLD;
663 LJFOLD(SUB ADD any)
664 LJFOLDF(simplify_intsubadd_leftcancel)
666 if (!irt_isnum(fins->t)) {
667 PHIBARRIER(fleft);
668 if (fins->op2 == fleft->op1) /* (i + j) - i ==> j */
669 return fleft->op2;
670 if (fins->op2 == fleft->op2) /* (i + j) - j ==> i */
671 return fleft->op1;
673 return NEXTFOLD;
676 LJFOLD(SUB SUB any)
677 LJFOLDF(simplify_intsubsub_leftcancel)
679 if (!irt_isnum(fins->t)) {
680 PHIBARRIER(fleft);
681 if (fins->op1 == fleft->op1) { /* (i - j) - i ==> 0 - j */
682 fins->op1 = (IRRef1)lj_ir_kint(J, 0);
683 fins->op2 = fleft->op2;
684 return RETRYFOLD;
687 return NEXTFOLD;
690 LJFOLD(SUB any SUB)
691 LJFOLDF(simplify_intsubsub_rightcancel)
693 if (!irt_isnum(fins->t)) {
694 PHIBARRIER(fright);
695 if (fins->op1 == fright->op1) /* i - (i - j) ==> j */
696 return fright->op2;
698 return NEXTFOLD;
701 LJFOLD(SUB any ADD)
702 LJFOLDF(simplify_intsubadd_rightcancel)
704 if (!irt_isnum(fins->t)) {
705 PHIBARRIER(fright);
706 if (fins->op1 == fright->op1) { /* i - (i + j) ==> 0 - j */
707 fins->op2 = fright->op2;
708 fins->op1 = (IRRef1)lj_ir_kint(J, 0);
709 return RETRYFOLD;
711 if (fins->op1 == fright->op2) { /* i - (j + i) ==> 0 - j */
712 fins->op2 = fright->op1;
713 fins->op1 = (IRRef1)lj_ir_kint(J, 0);
714 return RETRYFOLD;
717 return NEXTFOLD;
720 LJFOLD(SUB ADD ADD)
721 LJFOLDF(simplify_intsubaddadd_cancel)
723 if (!irt_isnum(fins->t)) {
724 PHIBARRIER(fleft);
725 PHIBARRIER(fright);
726 if (fleft->op1 == fright->op1) { /* (i + j1) - (i + j2) ==> j1 - j2 */
727 fins->op1 = fleft->op2;
728 fins->op2 = fright->op2;
729 return RETRYFOLD;
731 if (fleft->op1 == fright->op2) { /* (i + j1) - (j2 + i) ==> j1 - j2 */
732 fins->op1 = fleft->op2;
733 fins->op2 = fright->op1;
734 return RETRYFOLD;
736 if (fleft->op2 == fright->op1) { /* (j1 + i) - (i + j2) ==> j1 - j2 */
737 fins->op1 = fleft->op1;
738 fins->op2 = fright->op2;
739 return RETRYFOLD;
741 if (fleft->op2 == fright->op2) { /* (j1 + i) - (j2 + i) ==> j1 - j2 */
742 fins->op1 = fleft->op1;
743 fins->op2 = fright->op1;
744 return RETRYFOLD;
747 return NEXTFOLD;
750 LJFOLD(BAND any KINT)
751 LJFOLDF(simplify_band_k)
753 if (fright->i == 0) /* i & 0 ==> 0 */
754 return RIGHTFOLD;
755 if (fright->i == -1) /* i & -1 ==> i */
756 return LEFTFOLD;
757 return NEXTFOLD;
760 LJFOLD(BOR any KINT)
761 LJFOLDF(simplify_bor_k)
763 if (fright->i == 0) /* i | 0 ==> i */
764 return LEFTFOLD;
765 if (fright->i == -1) /* i | -1 ==> -1 */
766 return RIGHTFOLD;
767 return NEXTFOLD;
770 LJFOLD(BXOR any KINT)
771 LJFOLDF(simplify_bxor_k)
773 if (fright->i == 0) /* i xor 0 ==> i */
774 return LEFTFOLD;
775 if (fright->i == -1) { /* i xor -1 ==> ~i */
776 fins->o = IR_BNOT;
777 fins->op2 = 0;
778 return RETRYFOLD;
780 return NEXTFOLD;
783 LJFOLD(BSHL any KINT)
784 LJFOLD(BSHR any KINT)
785 LJFOLD(BSAR any KINT)
786 LJFOLD(BROL any KINT)
787 LJFOLD(BROR any KINT)
788 LJFOLDF(simplify_shift_ik)
790 int32_t k = (fright->i & 31);
791 if (k == 0) /* i o 0 ==> i */
792 return LEFTFOLD;
793 if (k != fright->i) { /* i o k ==> i o (k & 31) */
794 fins->op2 = (IRRef1)lj_ir_kint(J, k);
795 return RETRYFOLD;
797 if (fins->o == IR_BROR) { /* bror(i, k) ==> brol(i, (-k)&31) */
798 fins->o = IR_BROL;
799 fins->op2 = (IRRef1)lj_ir_kint(J, (-k)&31);
800 return RETRYFOLD;
802 return NEXTFOLD;
805 LJFOLD(BSHL any BAND)
806 LJFOLD(BSHR any BAND)
807 LJFOLD(BSAR any BAND)
808 LJFOLD(BROL any BAND)
809 LJFOLD(BROR any BAND)
810 LJFOLDF(simplify_shift_andk)
812 #if LJ_TARGET_MASKEDSHIFT
813 IRIns *irk = IR(fright->op2);
814 PHIBARRIER(fright);
815 if (irk->o == IR_KINT) { /* i o (j & 31) ==> i o j */
816 int32_t k = irk->i & 31;
817 if (k == 31) {
818 fins->op2 = fright->op1;
819 return RETRYFOLD;
822 #endif
823 return NEXTFOLD;
826 LJFOLD(BSHL KINT any)
827 LJFOLD(BSHR KINT any)
828 LJFOLDF(simplify_shift1_ki)
830 if (fleft->i == 0) /* 0 o i ==> 0 */
831 return LEFTFOLD;
832 return NEXTFOLD;
835 LJFOLD(BSAR KINT any)
836 LJFOLD(BROL KINT any)
837 LJFOLD(BROR KINT any)
838 LJFOLDF(simplify_shift2_ki)
840 if (fleft->i == 0 || fleft->i == -1) /* 0 o i ==> 0; -1 o i ==> -1 */
841 return LEFTFOLD;
842 return NEXTFOLD;
845 /* -- Reassociation ------------------------------------------------------- */
847 LJFOLD(ADD ADD KINT)
848 LJFOLD(BAND BAND KINT)
849 LJFOLD(BOR BOR KINT)
850 LJFOLD(BXOR BXOR KINT)
851 LJFOLDF(reassoc_intarith_k)
853 IRIns *irk = IR(fleft->op2);
854 if (irk->o == IR_KINT) {
855 int32_t k = kfold_intop(irk->i, fright->i, (IROp)fins->o);
856 if (k == irk->i) /* (i o k1) o k2 ==> i o k1, if (k1 o k2) == k1. */
857 return LEFTFOLD;
858 PHIBARRIER(fleft);
859 fins->op1 = fleft->op1;
860 fins->op2 = (IRRef1)lj_ir_kint(J, k);
861 return RETRYFOLD; /* (i o k1) o k2 ==> i o (k1 o k2) */
863 return NEXTFOLD;
866 LJFOLD(MIN MIN any)
867 LJFOLD(MAX MAX any)
868 LJFOLD(BAND BAND any)
869 LJFOLD(BOR BOR any)
870 LJFOLDF(reassoc_dup)
872 PHIBARRIER(fleft);
873 if (fins->op2 == fleft->op1 || fins->op2 == fleft->op2)
874 return LEFTFOLD; /* (a o b) o a ==> a o b; (a o b) o b ==> a o b */
875 return NEXTFOLD;
878 LJFOLD(BXOR BXOR any)
879 LJFOLDF(reassoc_bxor)
881 PHIBARRIER(fleft);
882 if (fins->op2 == fleft->op1) /* (a xor b) xor a ==> b */
883 return fleft->op2;
884 if (fins->op2 == fleft->op2) /* (a xor b) xor b ==> a */
885 return fleft->op1;
886 return NEXTFOLD;
889 LJFOLD(BSHL BSHL KINT)
890 LJFOLD(BSHR BSHR KINT)
891 LJFOLD(BSAR BSAR KINT)
892 LJFOLD(BROL BROL KINT)
893 LJFOLD(BROR BROR KINT)
894 LJFOLDF(reassoc_shift)
896 IRIns *irk = IR(fleft->op2);
897 PHIBARRIER(fleft); /* The (shift any KINT) rule covers k2 == 0 and more. */
898 if (irk->o == IR_KINT) { /* (i o k1) o k2 ==> i o (k1 + k2) */
899 int32_t k = (irk->i & 31) + (fright->i & 31);
900 if (k > 31) { /* Combined shift too wide? */
901 if (fins->o == IR_BSHL || fins->o == IR_BSHR)
902 return INTFOLD(0);
903 else if (fins->o == IR_BSAR)
904 k = 31;
905 else
906 k &= 31;
908 fins->op1 = fleft->op1;
909 fins->op2 = (IRRef1)lj_ir_kint(J, k);
910 return RETRYFOLD;
912 return NEXTFOLD;
915 LJFOLD(MIN MIN KNUM)
916 LJFOLD(MAX MAX KNUM)
917 LJFOLDF(reassoc_minmax_k)
919 IRIns *irk = IR(fleft->op2);
920 if (irk->o == IR_KNUM) {
921 lua_Number a = ir_knum(irk)->n;
922 lua_Number b = knumright;
923 lua_Number y = lj_vm_foldarith(a, b, fins->o - IR_ADD);
924 if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */
925 return LEFTFOLD;
926 PHIBARRIER(fleft);
927 fins->op1 = fleft->op1;
928 fins->op2 = (IRRef1)lj_ir_knum(J, y);
929 return RETRYFOLD; /* (x o k1) o k2 ==> x o (k1 o k2) */
931 return NEXTFOLD;
934 LJFOLD(MIN MAX any)
935 LJFOLD(MAX MIN any)
936 LJFOLDF(reassoc_minmax_left)
938 if (fins->op2 == fleft->op1 || fins->op2 == fleft->op2)
939 return RIGHTFOLD; /* (b o1 a) o2 b ==> b; (a o1 b) o2 b ==> b */
940 return NEXTFOLD;
943 LJFOLD(MIN any MAX)
944 LJFOLD(MAX any MIN)
945 LJFOLDF(reassoc_minmax_right)
947 if (fins->op1 == fright->op1 || fins->op1 == fright->op2)
948 return LEFTFOLD; /* a o2 (a o1 b) ==> a; a o2 (b o1 a) ==> a */
949 return NEXTFOLD;
952 /* Eliminate ABC across PHIs to handle t[i-1] forwarding case.
953 ** ABC(asize, (i+k)+(-k)) ==> ABC(asize, i), but only if it already exists.
954 ** Could be generalized to (i+k1)+k2 ==> i+(k1+k2), but needs better disambig.
956 LJFOLD(ABC any ADD)
957 LJFOLDF(reassoc_abc)
959 if (irref_isk(fright->op2)) {
960 IRIns *add2 = IR(fright->op1);
961 if (add2->o == IR_ADD && irref_isk(add2->op2) &&
962 IR(fright->op2)->i == -IR(add2->op2)->i) {
963 IRRef ref = J->chain[IR_ABC];
964 IRRef lim = add2->op1;
965 if (fins->op1 > lim) lim = fins->op1;
966 while (ref > lim) {
967 IRIns *ir = IR(ref);
968 if (ir->op1 == fins->op1 && ir->op2 == add2->op1)
969 return DROPFOLD;
970 ref = ir->prev;
974 return NEXTFOLD;
977 /* -- Commutativity ------------------------------------------------------- */
979 /* The refs of commutative ops are canonicalized. Lower refs go to the right.
980 ** Rationale behind this:
981 ** - It (also) moves constants to the right.
982 ** - It reduces the number of FOLD rules (e.g. (BOR any KINT) suffices).
983 ** - It helps CSE to find more matches.
984 ** - The assembler generates better code with constants at the right.
987 LJFOLD(ADD any any)
988 LJFOLD(MUL any any)
989 LJFOLD(ADDOV any any)
990 LJFOLDF(comm_swap)
992 if (fins->op1 < fins->op2) { /* Move lower ref to the right. */
993 IRRef1 tmp = fins->op1;
994 fins->op1 = fins->op2;
995 fins->op2 = tmp;
996 return RETRYFOLD;
998 return NEXTFOLD;
1001 LJFOLD(EQ any any)
1002 LJFOLD(NE any any)
1003 LJFOLDF(comm_equal)
1005 /* For non-numbers only: x == x ==> drop; x ~= x ==> fail */
1006 if (fins->op1 == fins->op2 && !irt_isnum(fins->t))
1007 return CONDFOLD(fins->o == IR_EQ);
1008 return fold_comm_swap(J);
1011 LJFOLD(LT any any)
1012 LJFOLD(GE any any)
1013 LJFOLD(LE any any)
1014 LJFOLD(GT any any)
1015 LJFOLD(ULT any any)
1016 LJFOLD(UGE any any)
1017 LJFOLD(ULE any any)
1018 LJFOLD(UGT any any)
1019 LJFOLDF(comm_comp)
1021 /* For non-numbers only: x <=> x ==> drop; x <> x ==> fail */
1022 if (fins->op1 == fins->op2 && !irt_isnum(fins->t))
1023 return CONDFOLD(fins->o & 1);
1024 if (fins->op1 < fins->op2) { /* Move lower ref to the right. */
1025 IRRef1 tmp = fins->op1;
1026 fins->op1 = fins->op2;
1027 fins->op2 = tmp;
1028 fins->o ^= 3; /* GT <-> LT, GE <-> LE, does not affect U */
1029 return RETRYFOLD;
1031 return NEXTFOLD;
1034 LJFOLD(BAND any any)
1035 LJFOLD(BOR any any)
1036 LJFOLD(MIN any any)
1037 LJFOLD(MAX any any)
1038 LJFOLDF(comm_dup)
1040 if (fins->op1 == fins->op2) /* x o x ==> x */
1041 return LEFTFOLD;
1042 return fold_comm_swap(J);
1045 LJFOLD(BXOR any any)
1046 LJFOLDF(comm_bxor)
1048 if (fins->op1 == fins->op2) /* i xor i ==> 0 */
1049 return INTFOLD(0);
1050 return fold_comm_swap(J);
1053 /* -- Simplification of compound expressions ------------------------------ */
1055 static int32_t kfold_xload(IRIns *ir, const void *p)
1057 #if !LJ_TARGET_X86ORX64
1058 #error "Missing support for unaligned loads"
1059 #endif
1060 switch (irt_type(ir->t)) {
1061 case IRT_I8: return (int32_t)*(int8_t *)p;
1062 case IRT_U8: return (int32_t)*(uint8_t *)p;
1063 case IRT_I16: return (int32_t)*(int16_t *)p;
1064 case IRT_U16: return (int32_t)*(uint16_t *)p;
1065 default: lua_assert(irt_isint(ir->t)); return (int32_t)*(int32_t *)p;
1069 /* Turn: string.sub(str, a, b) == kstr
1070 ** into: string.byte(str, a) == string.byte(kstr, 1) etc.
1071 ** Note: this creates unaligned XLOADs!
1073 LJFOLD(EQ SNEW KGC)
1074 LJFOLD(NE SNEW KGC)
1075 LJFOLDF(merge_eqne_snew_kgc)
1077 GCstr *kstr = ir_kstr(fright);
1078 int32_t len = (int32_t)kstr->len;
1079 lua_assert(irt_isstr(fins->t));
1080 if (len <= 4) { /* Handle string lengths 0, 1, 2, 3, 4. */
1081 IROp op = (IROp)fins->o;
1082 IRRef strref = fleft->op1;
1083 lua_assert(IR(strref)->o == IR_STRREF);
1084 if (op == IR_EQ) {
1085 emitir(IRTGI(IR_EQ), fleft->op2, lj_ir_kint(J, len));
1086 /* Caveat: fins/fleft/fright is no longer valid after emitir. */
1087 } else {
1088 /* NE is not expanded since this would need an OR of two conds. */
1089 if (!irref_isk(fleft->op2)) /* Only handle the constant length case. */
1090 return NEXTFOLD;
1091 if (IR(fleft->op2)->i != len)
1092 return DROPFOLD;
1094 if (len > 0) {
1095 /* A 4 byte load for length 3 is ok -- all strings have an extra NUL. */
1096 uint16_t ot = (uint16_t)(len == 1 ? IRT(IR_XLOAD, IRT_I8) :
1097 len == 2 ? IRT(IR_XLOAD, IRT_U16) :
1098 IRTI(IR_XLOAD));
1099 TRef tmp = emitir(ot, strref,
1100 IRXLOAD_READONLY | (len > 1 ? IRXLOAD_UNALIGNED : 0));
1101 TRef val = lj_ir_kint(J, kfold_xload(IR(tref_ref(tmp)), strdata(kstr)));
1102 if (len == 3)
1103 tmp = emitir(IRTI(IR_BAND), tmp,
1104 lj_ir_kint(J, LJ_ENDIAN_SELECT(0x00ffffff, 0xffffff00)));
1105 fins->op1 = (IRRef1)tmp;
1106 fins->op2 = (IRRef1)val;
1107 fins->ot = (IROpT)IRTGI(op);
1108 return RETRYFOLD;
1109 } else {
1110 return DROPFOLD;
1113 return NEXTFOLD;
1116 /* -- Loads --------------------------------------------------------------- */
1118 /* Loads cannot be folded or passed on to CSE in general.
1119 ** Alias analysis is needed to check for forwarding opportunities.
1121 ** Caveat: *all* loads must be listed here or they end up at CSE!
1124 LJFOLD(ALOAD any)
1125 LJFOLDX(lj_opt_fwd_aload)
1127 LJFOLD(HLOAD any)
1128 LJFOLDX(lj_opt_fwd_hload)
1130 LJFOLD(ULOAD any)
1131 LJFOLDX(lj_opt_fwd_uload)
1133 LJFOLD(CALLL any IRCALL_lj_tab_len)
1134 LJFOLDX(lj_opt_fwd_tab_len)
1136 /* Upvalue refs are really loads, but there are no corresponding stores.
1137 ** So CSE is ok for them, except for UREFO across a GC step (see below).
1138 ** If the referenced function is const, its upvalue addresses are const, too.
1139 ** This can be used to improve CSE by looking for the same address,
1140 ** even if the upvalues originate from a different function.
1142 LJFOLD(UREFO KGC any)
1143 LJFOLD(UREFC KGC any)
1144 LJFOLDF(cse_uref)
1146 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
1147 IRRef ref = J->chain[fins->o];
1148 GCfunc *fn = ir_kfunc(fleft);
1149 GCupval *uv = gco2uv(gcref(fn->l.uvptr[fins->op2]));
1150 while (ref > 0) {
1151 IRIns *ir = IR(ref);
1152 if (irref_isk(ir->op1)) {
1153 GCfunc *fn2 = ir_kfunc(IR(ir->op1));
1154 if (gco2uv(gcref(fn2->l.uvptr[ir->op2])) == uv) {
1155 if (fins->o == IR_UREFO && gcstep_barrier(J, ref))
1156 break;
1157 return ref;
1160 ref = ir->prev;
1163 return EMITFOLD;
1166 /* We can safely FOLD/CSE array/hash refs and field loads, since there
1167 ** are no corresponding stores. But NEWREF may invalidate all of them.
1168 ** Lacking better disambiguation for table references, these optimizations
1169 ** are simply disabled across any NEWREF.
1170 ** Only HREF needs the NEWREF check -- AREF and HREFK already depend on
1171 ** FLOADs. And NEWREF itself is treated like a store (see below).
1173 LJFOLD(HREF any any)
1174 LJFOLDF(cse_href)
1176 TRef tr = lj_opt_cse(J);
1177 return tref_ref(tr) < J->chain[IR_NEWREF] ? EMITFOLD : tr;
1180 LJFOLD(FLOAD TNEW IRFL_TAB_ASIZE)
1181 LJFOLDF(fload_tab_tnew_asize)
1183 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fins->op1 > J->chain[IR_NEWREF])
1184 return INTFOLD(fleft->op1);
1185 return NEXTFOLD;
1188 LJFOLD(FLOAD TNEW IRFL_TAB_HMASK)
1189 LJFOLDF(fload_tab_tnew_hmask)
1191 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fins->op1 > J->chain[IR_NEWREF])
1192 return INTFOLD((1 << fleft->op2)-1);
1193 return NEXTFOLD;
1196 LJFOLD(FLOAD TDUP IRFL_TAB_ASIZE)
1197 LJFOLDF(fload_tab_tdup_asize)
1199 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fins->op1 > J->chain[IR_NEWREF])
1200 return INTFOLD((int32_t)ir_ktab(IR(fleft->op1))->asize);
1201 return NEXTFOLD;
1204 LJFOLD(FLOAD TDUP IRFL_TAB_HMASK)
1205 LJFOLDF(fload_tab_tdup_hmask)
1207 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fins->op1 > J->chain[IR_NEWREF])
1208 return INTFOLD((int32_t)ir_ktab(IR(fleft->op1))->hmask);
1209 return NEXTFOLD;
1212 LJFOLD(FLOAD any IRFL_TAB_ARRAY)
1213 LJFOLD(FLOAD any IRFL_TAB_NODE)
1214 LJFOLD(FLOAD any IRFL_TAB_ASIZE)
1215 LJFOLD(FLOAD any IRFL_TAB_HMASK)
1216 LJFOLDF(fload_tab_ah)
1218 TRef tr = lj_opt_cse(J);
1219 return tref_ref(tr) < J->chain[IR_NEWREF] ? EMITFOLD : tr;
1222 /* Strings are immutable, so we can safely FOLD/CSE the related FLOAD. */
1223 LJFOLD(FLOAD KGC IRFL_STR_LEN)
1224 LJFOLDF(fload_str_len_kgc)
1226 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
1227 return INTFOLD((int32_t)ir_kstr(fleft)->len);
1228 return NEXTFOLD;
1231 LJFOLD(FLOAD SNEW IRFL_STR_LEN)
1232 LJFOLDF(fload_str_len_snew)
1234 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
1235 PHIBARRIER(fleft);
1236 return fleft->op2;
1238 return NEXTFOLD;
1241 LJFOLD(FLOAD any IRFL_STR_LEN)
1242 LJFOLDX(lj_opt_cse)
1244 /* All other field loads need alias analysis. */
1245 LJFOLD(FLOAD any any)
1246 LJFOLDX(lj_opt_fwd_fload)
1248 /* This is for LOOP only. Recording handles SLOADs internally. */
1249 LJFOLD(SLOAD any any)
1250 LJFOLDF(fwd_sload)
1252 lua_assert(J->slot[fins->op1] != 0);
1253 return J->slot[fins->op1];
1256 LJFOLD(XLOAD KPTR any)
1257 LJFOLDF(xload_kptr)
1259 /* Only fold read-only integer loads for now. */
1260 if ((fins->op2 & IRXLOAD_READONLY) && irt_isinteger(fins->t))
1261 return INTFOLD(kfold_xload(fins, ir_kptr(fleft)));
1262 return NEXTFOLD;
1265 /* CSE for XLOAD depends on the type, but not on the IRXLOAD_* flags. */
1266 LJFOLD(XLOAD any any)
1267 LJFOLDF(fwd_xload)
1269 IRRef ref = J->chain[IR_XLOAD];
1270 IRRef op1 = fins->op1;
1271 while (ref > op1) {
1272 if (IR(ref)->op1 == op1 && irt_sametype(IR(ref)->t, fins->t))
1273 return ref;
1274 ref = IR(ref)->prev;
1276 return EMITFOLD;
1279 /* -- Write barriers ------------------------------------------------------ */
1281 /* Write barriers are amenable to CSE, but not across any incremental
1282 ** GC steps.
1284 ** The same logic applies to open upvalue references, because the stack
1285 ** may be resized during a GC step.
1287 LJFOLD(TBAR any)
1288 LJFOLD(OBAR any any)
1289 LJFOLD(UREFO any any)
1290 LJFOLDF(barrier_tab)
1292 TRef tr = lj_opt_cse(J);
1293 if (gcstep_barrier(J, tref_ref(tr))) /* CSE across GC step? */
1294 return EMITFOLD; /* Raw emit. Assumes fins is left intact by CSE. */
1295 return tr;
1298 LJFOLD(TBAR TNEW)
1299 LJFOLD(TBAR TDUP)
1300 LJFOLDF(barrier_tnew_tdup)
1302 /* New tables are always white and never need a barrier. */
1303 if (fins->op1 < J->chain[IR_LOOP]) /* Except across a GC step. */
1304 return NEXTFOLD;
1305 return DROPFOLD;
1308 /* -- Stores and allocations ---------------------------------------------- */
1310 /* Stores and allocations cannot be folded or passed on to CSE in general.
1311 ** But some stores can be eliminated with dead-store elimination (DSE).
1313 ** Caveat: *all* stores and allocs must be listed here or they end up at CSE!
1316 LJFOLD(ASTORE any any)
1317 LJFOLD(HSTORE any any)
1318 LJFOLDX(lj_opt_dse_ahstore)
1320 LJFOLD(USTORE any any)
1321 LJFOLDX(lj_opt_dse_ustore)
1323 LJFOLD(FSTORE any any)
1324 LJFOLDX(lj_opt_dse_fstore)
1326 LJFOLD(NEWREF any any) /* Treated like a store. */
1327 LJFOLD(CALLS any any)
1328 LJFOLD(CALLL any any) /* Safeguard fallback. */
1329 LJFOLD(TNEW any any)
1330 LJFOLD(TDUP any)
1331 LJFOLDX(lj_ir_emit)
1333 /* ------------------------------------------------------------------------ */
1335 /* Every entry in the generated hash table is a 32 bit pattern:
1337 ** xxxxxxxx iiiiiiii llllllll rrrrrrrr
1339 ** xxxxxxxx = 8 bit index into fold function table
1340 ** iiiiiiii = 8 bit folded instruction opcode
1341 ** llllllll = 8 bit left instruction opcode
1342 ** rrrrrrrr = 8 bit right instruction opcode or 8 bits from literal field
1345 #include "lj_folddef.h"
1347 /* ------------------------------------------------------------------------ */
1349 /* Fold IR instruction. */
1350 TRef LJ_FASTCALL lj_opt_fold(jit_State *J)
1352 uint32_t key, any;
1353 IRRef ref;
1355 if (LJ_UNLIKELY((J->flags & JIT_F_OPT_MASK) != JIT_F_OPT_DEFAULT)) {
1356 lua_assert(((JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE|JIT_F_OPT_DSE) |
1357 JIT_F_OPT_DEFAULT) == JIT_F_OPT_DEFAULT);
1358 /* Folding disabled? Chain to CSE, but not for loads/stores/allocs. */
1359 if (!(J->flags & JIT_F_OPT_FOLD) && irm_kind(lj_ir_mode[fins->o]) == IRM_N)
1360 return lj_opt_cse(J);
1362 /* Forwarding or CSE disabled? Emit raw IR for loads, except for SLOAD. */
1363 if ((J->flags & (JIT_F_OPT_FWD|JIT_F_OPT_CSE)) !=
1364 (JIT_F_OPT_FWD|JIT_F_OPT_CSE) &&
1365 irm_kind(lj_ir_mode[fins->o]) == IRM_L && fins->o != IR_SLOAD)
1366 return lj_ir_emit(J);
1368 /* DSE disabled? Emit raw IR for stores. */
1369 if (!(J->flags & JIT_F_OPT_DSE) && irm_kind(lj_ir_mode[fins->o]) == IRM_S)
1370 return lj_ir_emit(J);
1373 /* Fold engine start/retry point. */
1374 retry:
1375 /* Construct key from opcode and operand opcodes (unless literal/none). */
1376 key = ((uint32_t)fins->o << 16);
1377 if (fins->op1 >= J->cur.nk) {
1378 key += (uint32_t)IR(fins->op1)->o << 8;
1379 *fleft = *IR(fins->op1);
1381 if (fins->op2 >= J->cur.nk) {
1382 key += (uint32_t)IR(fins->op2)->o;
1383 *fright = *IR(fins->op2);
1384 } else {
1385 key += (fins->op2 & 0xffu); /* For IRFPM_* and IRFL_*. */
1388 /* Check for a match in order from most specific to least specific. */
1389 any = 0;
1390 for (;;) {
1391 uint32_t k = key | any;
1392 uint32_t h = fold_hashkey(k);
1393 uint32_t fh = fold_hash[h]; /* Lookup key in semi-perfect hash table. */
1394 if ((fh & 0xffffff) == k || (fh = fold_hash[h+1], (fh & 0xffffff) == k)) {
1395 ref = (IRRef)tref_ref(fold_func[fh >> 24](J));
1396 if (ref != NEXTFOLD)
1397 break;
1399 if (any == 0xffff) /* Exhausted folding. Pass on to CSE. */
1400 return lj_opt_cse(J);
1401 any = (any | (any >> 8)) ^ 0xff00;
1404 /* Return value processing, ordered by frequency. */
1405 if (LJ_LIKELY(ref >= MAX_FOLD))
1406 return TREF(ref, irt_t(IR(ref)->t));
1407 if (ref == RETRYFOLD)
1408 goto retry;
1409 if (ref == KINTFOLD)
1410 return lj_ir_kint(J, fins->i);
1411 if (ref == FAILFOLD)
1412 lj_trace_err(J, LJ_TRERR_GFAIL);
1413 lua_assert(ref == DROPFOLD);
1414 return REF_DROP;
1417 /* -- Common-Subexpression Elimination ------------------------------------ */
1419 /* CSE an IR instruction. This is very fast due to the skip-list chains. */
1420 TRef LJ_FASTCALL lj_opt_cse(jit_State *J)
1422 /* Avoid narrow to wide store-to-load forwarding stall */
1423 IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16);
1424 IROp op = fins->o;
1425 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
1426 /* Limited search for same operands in per-opcode chain. */
1427 IRRef ref = J->chain[op];
1428 IRRef lim = fins->op1;
1429 if (fins->op2 > lim) lim = fins->op2; /* Relies on lit < REF_BIAS. */
1430 while (ref > lim) {
1431 if (IR(ref)->op12 == op12)
1432 return TREF(ref, irt_t(IR(ref)->t)); /* Common subexpression found. */
1433 ref = IR(ref)->prev;
1436 /* Otherwise emit IR (inlined for speed). */
1438 IRRef ref = lj_ir_nextins(J);
1439 IRIns *ir = IR(ref);
1440 ir->prev = J->chain[op];
1441 ir->op12 = op12;
1442 J->chain[op] = (IRRef1)ref;
1443 ir->o = fins->o;
1444 J->guardemit.irt |= fins->t.irt;
1445 return TREF(ref, irt_t((ir->t = fins->t)));
1449 /* CSE with explicit search limit. */
1450 TRef LJ_FASTCALL lj_opt_cselim(jit_State *J, IRRef lim)
1452 IRRef ref = J->chain[fins->o];
1453 IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16);
1454 while (ref > lim) {
1455 if (IR(ref)->op12 == op12)
1456 return ref;
1457 ref = IR(ref)->prev;
1459 return lj_ir_emit(J);
1462 /* ------------------------------------------------------------------------ */
1464 #undef IR
1465 #undef fins
1466 #undef fleft
1467 #undef fright
1468 #undef knumleft
1469 #undef knumright
1470 #undef emitir
1472 #endif