ARM: Fix cache flush/sync for exit stubs of JIT-compiled code.
[luajit-2.0.git] / src / lj_opt_fold.c
blobbe50bf9784a2781ed88545e1650efa0dbd31b432
1 /*
2 ** FOLD: Constant Folding, Algebraic Simplifications and Reassociation.
3 ** ABCelim: Array Bounds Check Elimination.
4 ** CSE: Common-Subexpression Elimination.
5 ** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
6 */
8 #define lj_opt_fold_c
9 #define LUA_CORE
11 #include <math.h>
13 #include "lj_obj.h"
15 #if LJ_HASJIT
17 #include "lj_str.h"
18 #include "lj_tab.h"
19 #include "lj_ir.h"
20 #include "lj_jit.h"
21 #include "lj_iropt.h"
22 #include "lj_trace.h"
23 #if LJ_HASFFI
24 #include "lj_ctype.h"
25 #endif
26 #include "lj_carith.h"
27 #include "lj_vm.h"
28 #include "lj_strscan.h"
30 /* Here's a short description how the FOLD engine processes instructions:
32 ** The FOLD engine receives a single instruction stored in fins (J->fold.ins).
33 ** The instruction and its operands are used to select matching fold rules.
34 ** These are applied iteratively until a fixed point is reached.
36 ** The 8 bit opcode of the instruction itself plus the opcodes of the
37 ** two instructions referenced by its operands form a 24 bit key
38 ** 'ins left right' (unused operands -> 0, literals -> lowest 8 bits).
40 ** This key is used for partial matching against the fold rules. The
41 ** left/right operand fields of the key are successively masked with
42 ** the 'any' wildcard, from most specific to least specific:
44 ** ins left right
45 ** ins any right
46 ** ins left any
47 ** ins any any
49 ** The masked key is used to lookup a matching fold rule in a semi-perfect
50 ** hash table. If a matching rule is found, the related fold function is run.
51 ** Multiple rules can share the same fold function. A fold rule may return
52 ** one of several special values:
54 ** - NEXTFOLD means no folding was applied, because an additional test
55 ** inside the fold function failed. Matching continues against less
56 ** specific fold rules. Finally the instruction is passed on to CSE.
58 ** - RETRYFOLD means the instruction was modified in-place. Folding is
59 ** retried as if this instruction had just been received.
61 ** All other return values are terminal actions -- no further folding is
62 ** applied:
64 ** - INTFOLD(i) returns a reference to the integer constant i.
66 ** - LEFTFOLD and RIGHTFOLD return the left/right operand reference
67 ** without emitting an instruction.
69 ** - CSEFOLD and EMITFOLD pass the instruction directly to CSE or emit
70 ** it without passing through any further optimizations.
72 ** - FAILFOLD, DROPFOLD and CONDFOLD only apply to instructions which have
73 ** no result (e.g. guarded assertions): FAILFOLD means the guard would
74 ** always fail, i.e. the current trace is pointless. DROPFOLD means
75 ** the guard is always true and has been eliminated. CONDFOLD is a
76 ** shortcut for FAILFOLD + cond (i.e. drop if true, otherwise fail).
78 ** - Any other return value is interpreted as an IRRef or TRef. This
79 ** can be a reference to an existing or a newly created instruction.
80 ** Only the least-significant 16 bits (IRRef1) are used to form a TRef
81 ** which is finally returned to the caller.
83 ** The FOLD engine receives instructions both from the trace recorder and
84 ** substituted instructions from LOOP unrolling. This means all types
85 ** of instructions may end up here, even though the recorder bypasses
86 ** FOLD in some cases. Thus all loads, stores and allocations must have
87 ** an any/any rule to avoid being passed on to CSE.
89 ** Carefully read the following requirements before adding or modifying
90 ** any fold rules:
92 ** Requirement #1: All fold rules must preserve their destination type.
94 ** Consistently use INTFOLD() (KINT result) or lj_ir_knum() (KNUM result).
95 ** Never use lj_ir_knumint() which can have either a KINT or KNUM result.
97 ** Requirement #2: Fold rules should not create *new* instructions which
98 ** reference operands *across* PHIs.
100 ** E.g. a RETRYFOLD with 'fins->op1 = fleft->op1' is invalid if the
101 ** left operand is a PHI. Then fleft->op1 would point across the PHI
102 ** frontier to an invariant instruction. Adding a PHI for this instruction
103 ** would be counterproductive. The solution is to add a barrier which
104 ** prevents folding across PHIs, i.e. 'PHIBARRIER(fleft)' in this case.
105 ** The only exception is for recurrences with high latencies like
106 ** repeated int->num->int conversions.
108 ** One could relax this condition a bit if the referenced instruction is
109 ** a PHI, too. But this often leads to worse code due to excessive
110 ** register shuffling.
112 ** Note: returning *existing* instructions (e.g. LEFTFOLD) is ok, though.
113 ** Even returning fleft->op1 would be ok, because a new PHI will added,
114 ** if needed. But again, this leads to excessive register shuffling and
115 ** should be avoided.
117 ** Requirement #3: The set of all fold rules must be monotonic to guarantee
118 ** termination.
120 ** The goal is optimization, so one primarily wants to add strength-reducing
121 ** rules. This means eliminating an instruction or replacing an instruction
122 ** with one or more simpler instructions. Don't add fold rules which point
123 ** into the other direction.
125 ** Some rules (like commutativity) do not directly reduce the strength of
126 ** an instruction, but enable other fold rules (e.g. by moving constants
127 ** to the right operand). These rules must be made unidirectional to avoid
128 ** cycles.
130 ** Rule of thumb: the trace recorder expands the IR and FOLD shrinks it.
133 /* Some local macros to save typing. Undef'd at the end. */
134 #define IR(ref) (&J->cur.ir[(ref)])
135 #define fins (&J->fold.ins)
136 #define fleft (&J->fold.left)
137 #define fright (&J->fold.right)
138 #define knumleft (ir_knum(fleft)->n)
139 #define knumright (ir_knum(fright)->n)
141 /* Pass IR on to next optimization in chain (FOLD). */
142 #define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
144 /* Fold function type. Fastcall on x86 significantly reduces their size. */
145 typedef IRRef (LJ_FASTCALL *FoldFunc)(jit_State *J);
147 /* Macros for the fold specs, so buildvm can recognize them. */
148 #define LJFOLD(x)
149 #define LJFOLDX(x)
150 #define LJFOLDF(name) static TRef LJ_FASTCALL fold_##name(jit_State *J)
151 /* Note: They must be at the start of a line or buildvm ignores them! */
153 /* Barrier to prevent using operands across PHIs. */
154 #define PHIBARRIER(ir) if (irt_isphi((ir)->t)) return NEXTFOLD
156 /* Barrier to prevent folding across a GC step.
157 ** GC steps can only happen at the head of a trace and at LOOP.
158 ** And the GC is only driven forward if there is at least one allocation.
160 #define gcstep_barrier(J, ref) \
161 ((ref) < J->chain[IR_LOOP] && \
162 (J->chain[IR_SNEW] || J->chain[IR_XSNEW] || \
163 J->chain[IR_TNEW] || J->chain[IR_TDUP] || \
164 J->chain[IR_CNEW] || J->chain[IR_CNEWI] || J->chain[IR_TOSTR]))
166 /* -- Constant folding for FP numbers ------------------------------------- */
168 LJFOLD(ADD KNUM KNUM)
169 LJFOLD(SUB KNUM KNUM)
170 LJFOLD(MUL KNUM KNUM)
171 LJFOLD(DIV KNUM KNUM)
172 LJFOLD(NEG KNUM KNUM)
173 LJFOLD(ABS KNUM KNUM)
174 LJFOLD(ATAN2 KNUM KNUM)
175 LJFOLD(LDEXP KNUM KNUM)
176 LJFOLD(MIN KNUM KNUM)
177 LJFOLD(MAX KNUM KNUM)
178 LJFOLDF(kfold_numarith)
180 lua_Number a = knumleft;
181 lua_Number b = knumright;
182 lua_Number y = lj_vm_foldarith(a, b, fins->o - IR_ADD);
183 return lj_ir_knum(J, y);
186 LJFOLD(LDEXP KNUM KINT)
187 LJFOLDF(kfold_ldexp)
189 #if LJ_TARGET_X86ORX64
190 UNUSED(J);
191 return NEXTFOLD;
192 #else
193 return lj_ir_knum(J, ldexp(knumleft, fright->i));
194 #endif
197 LJFOLD(FPMATH KNUM any)
198 LJFOLDF(kfold_fpmath)
200 lua_Number a = knumleft;
201 lua_Number y = lj_vm_foldfpm(a, fins->op2);
202 return lj_ir_knum(J, y);
205 LJFOLD(POW KNUM KINT)
206 LJFOLDF(kfold_numpow)
208 lua_Number a = knumleft;
209 lua_Number b = (lua_Number)fright->i;
210 lua_Number y = lj_vm_foldarith(a, b, IR_POW - IR_ADD);
211 return lj_ir_knum(J, y);
214 /* Must not use kfold_kref for numbers (could be NaN). */
215 LJFOLD(EQ KNUM KNUM)
216 LJFOLD(NE KNUM KNUM)
217 LJFOLD(LT KNUM KNUM)
218 LJFOLD(GE KNUM KNUM)
219 LJFOLD(LE KNUM KNUM)
220 LJFOLD(GT KNUM KNUM)
221 LJFOLD(ULT KNUM KNUM)
222 LJFOLD(UGE KNUM KNUM)
223 LJFOLD(ULE KNUM KNUM)
224 LJFOLD(UGT KNUM KNUM)
225 LJFOLDF(kfold_numcomp)
227 return CONDFOLD(lj_ir_numcmp(knumleft, knumright, (IROp)fins->o));
230 /* -- Constant folding for 32 bit integers -------------------------------- */
232 static int32_t kfold_intop(int32_t k1, int32_t k2, IROp op)
234 switch (op) {
235 case IR_ADD: k1 += k2; break;
236 case IR_SUB: k1 -= k2; break;
237 case IR_MUL: k1 *= k2; break;
238 case IR_MOD: k1 = lj_vm_modi(k1, k2); break;
239 case IR_NEG: k1 = -k1; break;
240 case IR_BAND: k1 &= k2; break;
241 case IR_BOR: k1 |= k2; break;
242 case IR_BXOR: k1 ^= k2; break;
243 case IR_BSHL: k1 <<= (k2 & 31); break;
244 case IR_BSHR: k1 = (int32_t)((uint32_t)k1 >> (k2 & 31)); break;
245 case IR_BSAR: k1 >>= (k2 & 31); break;
246 case IR_BROL: k1 = (int32_t)lj_rol((uint32_t)k1, (k2 & 31)); break;
247 case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 31)); break;
248 case IR_MIN: k1 = k1 < k2 ? k1 : k2; break;
249 case IR_MAX: k1 = k1 > k2 ? k1 : k2; break;
250 default: lua_assert(0); break;
252 return k1;
255 LJFOLD(ADD KINT KINT)
256 LJFOLD(SUB KINT KINT)
257 LJFOLD(MUL KINT KINT)
258 LJFOLD(MOD KINT KINT)
259 LJFOLD(NEG KINT KINT)
260 LJFOLD(BAND KINT KINT)
261 LJFOLD(BOR KINT KINT)
262 LJFOLD(BXOR KINT KINT)
263 LJFOLD(BSHL KINT KINT)
264 LJFOLD(BSHR KINT KINT)
265 LJFOLD(BSAR KINT KINT)
266 LJFOLD(BROL KINT KINT)
267 LJFOLD(BROR KINT KINT)
268 LJFOLD(MIN KINT KINT)
269 LJFOLD(MAX KINT KINT)
270 LJFOLDF(kfold_intarith)
272 return INTFOLD(kfold_intop(fleft->i, fright->i, (IROp)fins->o));
275 LJFOLD(ADDOV KINT KINT)
276 LJFOLD(SUBOV KINT KINT)
277 LJFOLD(MULOV KINT KINT)
278 LJFOLDF(kfold_intovarith)
280 lua_Number n = lj_vm_foldarith((lua_Number)fleft->i, (lua_Number)fright->i,
281 fins->o - IR_ADDOV);
282 int32_t k = lj_num2int(n);
283 if (n != (lua_Number)k)
284 return FAILFOLD;
285 return INTFOLD(k);
288 LJFOLD(BNOT KINT)
289 LJFOLDF(kfold_bnot)
291 return INTFOLD(~fleft->i);
294 LJFOLD(BSWAP KINT)
295 LJFOLDF(kfold_bswap)
297 return INTFOLD((int32_t)lj_bswap((uint32_t)fleft->i));
300 LJFOLD(LT KINT KINT)
301 LJFOLD(GE KINT KINT)
302 LJFOLD(LE KINT KINT)
303 LJFOLD(GT KINT KINT)
304 LJFOLD(ULT KINT KINT)
305 LJFOLD(UGE KINT KINT)
306 LJFOLD(ULE KINT KINT)
307 LJFOLD(UGT KINT KINT)
308 LJFOLD(ABC KINT KINT)
309 LJFOLDF(kfold_intcomp)
311 int32_t a = fleft->i, b = fright->i;
312 switch ((IROp)fins->o) {
313 case IR_LT: return CONDFOLD(a < b);
314 case IR_GE: return CONDFOLD(a >= b);
315 case IR_LE: return CONDFOLD(a <= b);
316 case IR_GT: return CONDFOLD(a > b);
317 case IR_ULT: return CONDFOLD((uint32_t)a < (uint32_t)b);
318 case IR_UGE: return CONDFOLD((uint32_t)a >= (uint32_t)b);
319 case IR_ULE: return CONDFOLD((uint32_t)a <= (uint32_t)b);
320 case IR_ABC:
321 case IR_UGT: return CONDFOLD((uint32_t)a > (uint32_t)b);
322 default: lua_assert(0); return FAILFOLD;
326 LJFOLD(UGE any KINT)
327 LJFOLDF(kfold_intcomp0)
329 if (fright->i == 0)
330 return DROPFOLD;
331 return NEXTFOLD;
334 /* -- Constant folding for 64 bit integers -------------------------------- */
336 static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op)
338 switch (op) {
339 #if LJ_64 || LJ_HASFFI
340 case IR_ADD: k1 += k2; break;
341 case IR_SUB: k1 -= k2; break;
342 #endif
343 #if LJ_HASFFI
344 case IR_MUL: k1 *= k2; break;
345 case IR_BAND: k1 &= k2; break;
346 case IR_BOR: k1 |= k2; break;
347 case IR_BXOR: k1 ^= k2; break;
348 #endif
349 default: UNUSED(k2); lua_assert(0); break;
351 return k1;
354 LJFOLD(ADD KINT64 KINT64)
355 LJFOLD(SUB KINT64 KINT64)
356 LJFOLD(MUL KINT64 KINT64)
357 LJFOLD(BAND KINT64 KINT64)
358 LJFOLD(BOR KINT64 KINT64)
359 LJFOLD(BXOR KINT64 KINT64)
360 LJFOLDF(kfold_int64arith)
362 return INT64FOLD(kfold_int64arith(ir_k64(fleft)->u64,
363 ir_k64(fright)->u64, (IROp)fins->o));
366 LJFOLD(DIV KINT64 KINT64)
367 LJFOLD(MOD KINT64 KINT64)
368 LJFOLD(POW KINT64 KINT64)
369 LJFOLDF(kfold_int64arith2)
371 #if LJ_HASFFI
372 uint64_t k1 = ir_k64(fleft)->u64, k2 = ir_k64(fright)->u64;
373 if (irt_isi64(fins->t)) {
374 k1 = fins->o == IR_DIV ? lj_carith_divi64((int64_t)k1, (int64_t)k2) :
375 fins->o == IR_MOD ? lj_carith_modi64((int64_t)k1, (int64_t)k2) :
376 lj_carith_powi64((int64_t)k1, (int64_t)k2);
377 } else {
378 k1 = fins->o == IR_DIV ? lj_carith_divu64(k1, k2) :
379 fins->o == IR_MOD ? lj_carith_modu64(k1, k2) :
380 lj_carith_powu64(k1, k2);
382 return INT64FOLD(k1);
383 #else
384 UNUSED(J); lua_assert(0); return FAILFOLD;
385 #endif
388 LJFOLD(BSHL KINT64 KINT)
389 LJFOLD(BSHR KINT64 KINT)
390 LJFOLD(BSAR KINT64 KINT)
391 LJFOLD(BROL KINT64 KINT)
392 LJFOLD(BROR KINT64 KINT)
393 LJFOLDF(kfold_int64shift)
395 #if LJ_HASFFI || LJ_64
396 uint64_t k = ir_k64(fleft)->u64;
397 int32_t sh = (fright->i & 63);
398 switch ((IROp)fins->o) {
399 case IR_BSHL: k <<= sh; break;
400 #if LJ_HASFFI
401 case IR_BSHR: k >>= sh; break;
402 case IR_BSAR: k = (uint64_t)((int64_t)k >> sh); break;
403 case IR_BROL: k = lj_rol(k, sh); break;
404 case IR_BROR: k = lj_ror(k, sh); break;
405 #endif
406 default: lua_assert(0); break;
408 return INT64FOLD(k);
409 #else
410 UNUSED(J); lua_assert(0); return FAILFOLD;
411 #endif
414 LJFOLD(BNOT KINT64)
415 LJFOLDF(kfold_bnot64)
417 #if LJ_HASFFI
418 return INT64FOLD(~ir_k64(fleft)->u64);
419 #else
420 UNUSED(J); lua_assert(0); return FAILFOLD;
421 #endif
424 LJFOLD(BSWAP KINT64)
425 LJFOLDF(kfold_bswap64)
427 #if LJ_HASFFI
428 return INT64FOLD(lj_bswap64(ir_k64(fleft)->u64));
429 #else
430 UNUSED(J); lua_assert(0); return FAILFOLD;
431 #endif
434 LJFOLD(LT KINT64 KINT64)
435 LJFOLD(GE KINT64 KINT64)
436 LJFOLD(LE KINT64 KINT64)
437 LJFOLD(GT KINT64 KINT64)
438 LJFOLD(ULT KINT64 KINT64)
439 LJFOLD(UGE KINT64 KINT64)
440 LJFOLD(ULE KINT64 KINT64)
441 LJFOLD(UGT KINT64 KINT64)
442 LJFOLDF(kfold_int64comp)
444 #if LJ_HASFFI
445 uint64_t a = ir_k64(fleft)->u64, b = ir_k64(fright)->u64;
446 switch ((IROp)fins->o) {
447 case IR_LT: return CONDFOLD(a < b);
448 case IR_GE: return CONDFOLD(a >= b);
449 case IR_LE: return CONDFOLD(a <= b);
450 case IR_GT: return CONDFOLD(a > b);
451 case IR_ULT: return CONDFOLD((uint64_t)a < (uint64_t)b);
452 case IR_UGE: return CONDFOLD((uint64_t)a >= (uint64_t)b);
453 case IR_ULE: return CONDFOLD((uint64_t)a <= (uint64_t)b);
454 case IR_UGT: return CONDFOLD((uint64_t)a > (uint64_t)b);
455 default: lua_assert(0); return FAILFOLD;
457 #else
458 UNUSED(J); lua_assert(0); return FAILFOLD;
459 #endif
462 LJFOLD(UGE any KINT64)
463 LJFOLDF(kfold_int64comp0)
465 #if LJ_HASFFI
466 if (ir_k64(fright)->u64 == 0)
467 return DROPFOLD;
468 return NEXTFOLD;
469 #else
470 UNUSED(J); lua_assert(0); return FAILFOLD;
471 #endif
474 /* -- Constant folding for strings ---------------------------------------- */
476 LJFOLD(SNEW KKPTR KINT)
477 LJFOLDF(kfold_snew_kptr)
479 GCstr *s = lj_str_new(J->L, (const char *)ir_kptr(fleft), (size_t)fright->i);
480 return lj_ir_kstr(J, s);
483 LJFOLD(SNEW any KINT)
484 LJFOLDF(kfold_snew_empty)
486 if (fright->i == 0)
487 return lj_ir_kstr(J, &J2G(J)->strempty);
488 return NEXTFOLD;
491 LJFOLD(STRREF KGC KINT)
492 LJFOLDF(kfold_strref)
494 GCstr *str = ir_kstr(fleft);
495 lua_assert((MSize)fright->i <= str->len);
496 return lj_ir_kkptr(J, (char *)strdata(str) + fright->i);
499 LJFOLD(STRREF SNEW any)
500 LJFOLDF(kfold_strref_snew)
502 PHIBARRIER(fleft);
503 if (irref_isk(fins->op2) && fright->i == 0) {
504 return fleft->op1; /* strref(snew(ptr, len), 0) ==> ptr */
505 } else {
506 /* Reassociate: strref(snew(strref(str, a), len), b) ==> strref(str, a+b) */
507 IRIns *ir = IR(fleft->op1);
508 IRRef1 str = ir->op1; /* IRIns * is not valid across emitir. */
509 lua_assert(ir->o == IR_STRREF);
510 PHIBARRIER(ir);
511 fins->op2 = emitir(IRTI(IR_ADD), ir->op2, fins->op2); /* Clobbers fins! */
512 fins->op1 = str;
513 fins->ot = IRT(IR_STRREF, IRT_P32);
514 return RETRYFOLD;
516 return NEXTFOLD;
519 LJFOLD(CALLN CARG IRCALL_lj_str_cmp)
520 LJFOLDF(kfold_strcmp)
522 if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) {
523 GCstr *a = ir_kstr(IR(fleft->op1));
524 GCstr *b = ir_kstr(IR(fleft->op2));
525 return INTFOLD(lj_str_cmp(a, b));
527 return NEXTFOLD;
530 /* -- Constant folding of pointer arithmetic ------------------------------ */
532 LJFOLD(ADD KGC KINT)
533 LJFOLD(ADD KGC KINT64)
534 LJFOLDF(kfold_add_kgc)
536 GCobj *o = ir_kgc(fleft);
537 #if LJ_64
538 ptrdiff_t ofs = (ptrdiff_t)ir_kint64(fright)->u64;
539 #else
540 ptrdiff_t ofs = fright->i;
541 #endif
542 #if LJ_HASFFI
543 if (irt_iscdata(fleft->t)) {
544 CType *ct = ctype_raw(ctype_ctsG(J2G(J)), gco2cd(o)->ctypeid);
545 if (ctype_isnum(ct->info) || ctype_isenum(ct->info) ||
546 ctype_isptr(ct->info) || ctype_isfunc(ct->info) ||
547 ctype_iscomplex(ct->info) || ctype_isvector(ct->info))
548 return lj_ir_kkptr(J, (char *)o + ofs);
550 #endif
551 return lj_ir_kptr(J, (char *)o + ofs);
554 LJFOLD(ADD KPTR KINT)
555 LJFOLD(ADD KPTR KINT64)
556 LJFOLD(ADD KKPTR KINT)
557 LJFOLD(ADD KKPTR KINT64)
558 LJFOLDF(kfold_add_kptr)
560 void *p = ir_kptr(fleft);
561 #if LJ_64
562 ptrdiff_t ofs = (ptrdiff_t)ir_kint64(fright)->u64;
563 #else
564 ptrdiff_t ofs = fright->i;
565 #endif
566 return lj_ir_kptr_(J, fleft->o, (char *)p + ofs);
569 LJFOLD(ADD any KGC)
570 LJFOLD(ADD any KPTR)
571 LJFOLD(ADD any KKPTR)
572 LJFOLDF(kfold_add_kright)
574 if (fleft->o == IR_KINT || fleft->o == IR_KINT64) {
575 IRRef1 tmp = fins->op1; fins->op1 = fins->op2; fins->op2 = tmp;
576 return RETRYFOLD;
578 return NEXTFOLD;
581 /* -- Constant folding of conversions ------------------------------------- */
583 LJFOLD(TOBIT KNUM KNUM)
584 LJFOLDF(kfold_tobit)
586 return INTFOLD(lj_num2bit(knumleft));
589 LJFOLD(CONV KINT IRCONV_NUM_INT)
590 LJFOLDF(kfold_conv_kint_num)
592 return lj_ir_knum(J, (lua_Number)fleft->i);
595 LJFOLD(CONV KINT IRCONV_NUM_U32)
596 LJFOLDF(kfold_conv_kintu32_num)
598 return lj_ir_knum(J, (lua_Number)(uint32_t)fleft->i);
601 LJFOLD(CONV KINT IRCONV_INT_I8)
602 LJFOLD(CONV KINT IRCONV_INT_U8)
603 LJFOLD(CONV KINT IRCONV_INT_I16)
604 LJFOLD(CONV KINT IRCONV_INT_U16)
605 LJFOLDF(kfold_conv_kint_ext)
607 int32_t k = fleft->i;
608 if ((fins->op2 & IRCONV_SRCMASK) == IRT_I8) k = (int8_t)k;
609 else if ((fins->op2 & IRCONV_SRCMASK) == IRT_U8) k = (uint8_t)k;
610 else if ((fins->op2 & IRCONV_SRCMASK) == IRT_I16) k = (int16_t)k;
611 else k = (uint16_t)k;
612 return INTFOLD(k);
615 LJFOLD(CONV KINT IRCONV_I64_INT)
616 LJFOLD(CONV KINT IRCONV_U64_INT)
617 LJFOLD(CONV KINT IRCONV_I64_U32)
618 LJFOLD(CONV KINT IRCONV_U64_U32)
619 LJFOLDF(kfold_conv_kint_i64)
621 if ((fins->op2 & IRCONV_SEXT))
622 return INT64FOLD((uint64_t)(int64_t)fleft->i);
623 else
624 return INT64FOLD((uint64_t)(int64_t)(uint32_t)fleft->i);
627 LJFOLD(CONV KINT64 IRCONV_NUM_I64)
628 LJFOLDF(kfold_conv_kint64_num_i64)
630 return lj_ir_knum(J, (lua_Number)(int64_t)ir_kint64(fleft)->u64);
633 LJFOLD(CONV KINT64 IRCONV_NUM_U64)
634 LJFOLDF(kfold_conv_kint64_num_u64)
636 return lj_ir_knum(J, (lua_Number)ir_kint64(fleft)->u64);
639 LJFOLD(CONV KINT64 IRCONV_INT_I64)
640 LJFOLD(CONV KINT64 IRCONV_U32_I64)
641 LJFOLDF(kfold_conv_kint64_int_i64)
643 return INTFOLD((int32_t)ir_kint64(fleft)->u64);
646 LJFOLD(CONV KNUM IRCONV_INT_NUM)
647 LJFOLDF(kfold_conv_knum_int_num)
649 lua_Number n = knumleft;
650 if (!(fins->op2 & IRCONV_TRUNC)) {
651 int32_t k = lj_num2int(n);
652 if (irt_isguard(fins->t) && n != (lua_Number)k) {
653 /* We're about to create a guard which always fails, like CONV +1.5.
654 ** Some pathological loops cause this during LICM, e.g.:
655 ** local x,k,t = 0,1.5,{1,[1.5]=2}
656 ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end
657 ** assert(x == 300)
659 return FAILFOLD;
661 return INTFOLD(k);
662 } else {
663 return INTFOLD((int32_t)n);
667 LJFOLD(CONV KNUM IRCONV_U32_NUM)
668 LJFOLDF(kfold_conv_knum_u32_num)
670 lua_assert((fins->op2 & IRCONV_TRUNC));
671 #ifdef _MSC_VER
672 { /* Workaround for MSVC bug. */
673 volatile uint32_t u = (uint32_t)knumleft;
674 return INTFOLD((int32_t)u);
676 #else
677 return INTFOLD((int32_t)(uint32_t)knumleft);
678 #endif
681 LJFOLD(CONV KNUM IRCONV_I64_NUM)
682 LJFOLDF(kfold_conv_knum_i64_num)
684 lua_assert((fins->op2 & IRCONV_TRUNC));
685 return INT64FOLD((uint64_t)(int64_t)knumleft);
688 LJFOLD(CONV KNUM IRCONV_U64_NUM)
689 LJFOLDF(kfold_conv_knum_u64_num)
691 lua_assert((fins->op2 & IRCONV_TRUNC));
692 return INT64FOLD(lj_num2u64(knumleft));
695 LJFOLD(TOSTR KNUM)
696 LJFOLDF(kfold_tostr_knum)
698 return lj_ir_kstr(J, lj_str_fromnum(J->L, &knumleft));
701 LJFOLD(TOSTR KINT)
702 LJFOLDF(kfold_tostr_kint)
704 return lj_ir_kstr(J, lj_str_fromint(J->L, fleft->i));
707 LJFOLD(STRTO KGC)
708 LJFOLDF(kfold_strto)
710 TValue n;
711 if (lj_strscan_num(ir_kstr(fleft), &n))
712 return lj_ir_knum(J, numV(&n));
713 return FAILFOLD;
716 /* -- Constant folding of equality checks --------------------------------- */
718 /* Don't constant-fold away FLOAD checks against KNULL. */
719 LJFOLD(EQ FLOAD KNULL)
720 LJFOLD(NE FLOAD KNULL)
721 LJFOLDX(lj_opt_cse)
723 /* But fold all other KNULL compares, since only KNULL is equal to KNULL. */
724 LJFOLD(EQ any KNULL)
725 LJFOLD(NE any KNULL)
726 LJFOLD(EQ KNULL any)
727 LJFOLD(NE KNULL any)
728 LJFOLD(EQ KINT KINT) /* Constants are unique, so same refs <==> same value. */
729 LJFOLD(NE KINT KINT)
730 LJFOLD(EQ KINT64 KINT64)
731 LJFOLD(NE KINT64 KINT64)
732 LJFOLD(EQ KGC KGC)
733 LJFOLD(NE KGC KGC)
734 LJFOLDF(kfold_kref)
736 return CONDFOLD((fins->op1 == fins->op2) ^ (fins->o == IR_NE));
739 /* -- Algebraic shortcuts ------------------------------------------------- */
741 LJFOLD(FPMATH FPMATH IRFPM_FLOOR)
742 LJFOLD(FPMATH FPMATH IRFPM_CEIL)
743 LJFOLD(FPMATH FPMATH IRFPM_TRUNC)
744 LJFOLDF(shortcut_round)
746 IRFPMathOp op = (IRFPMathOp)fleft->op2;
747 if (op == IRFPM_FLOOR || op == IRFPM_CEIL || op == IRFPM_TRUNC)
748 return LEFTFOLD; /* round(round_left(x)) = round_left(x) */
749 return NEXTFOLD;
752 LJFOLD(ABS ABS KNUM)
753 LJFOLDF(shortcut_left)
755 return LEFTFOLD; /* f(g(x)) ==> g(x) */
758 LJFOLD(ABS NEG KNUM)
759 LJFOLDF(shortcut_dropleft)
761 PHIBARRIER(fleft);
762 fins->op1 = fleft->op1; /* abs(neg(x)) ==> abs(x) */
763 return RETRYFOLD;
766 /* Note: no safe shortcuts with STRTO and TOSTR ("1e2" ==> +100 ==> "100"). */
767 LJFOLD(NEG NEG any)
768 LJFOLD(BNOT BNOT)
769 LJFOLD(BSWAP BSWAP)
770 LJFOLDF(shortcut_leftleft)
772 PHIBARRIER(fleft); /* See above. Fold would be ok, but not beneficial. */
773 return fleft->op1; /* f(g(x)) ==> x */
776 /* -- FP algebraic simplifications ---------------------------------------- */
778 /* FP arithmetic is tricky -- there's not much to simplify.
779 ** Please note the following common pitfalls before sending "improvements":
780 ** x+0 ==> x is INVALID for x=-0
781 ** 0-x ==> -x is INVALID for x=+0
782 ** x*0 ==> 0 is INVALID for x=-0, x=+-Inf or x=NaN
785 LJFOLD(ADD NEG any)
786 LJFOLDF(simplify_numadd_negx)
788 PHIBARRIER(fleft);
789 fins->o = IR_SUB; /* (-a) + b ==> b - a */
790 fins->op1 = fins->op2;
791 fins->op2 = fleft->op1;
792 return RETRYFOLD;
795 LJFOLD(ADD any NEG)
796 LJFOLDF(simplify_numadd_xneg)
798 PHIBARRIER(fright);
799 fins->o = IR_SUB; /* a + (-b) ==> a - b */
800 fins->op2 = fright->op1;
801 return RETRYFOLD;
804 LJFOLD(SUB any KNUM)
805 LJFOLDF(simplify_numsub_k)
807 lua_Number n = knumright;
808 if (n == 0.0) /* x - (+-0) ==> x */
809 return LEFTFOLD;
810 return NEXTFOLD;
813 LJFOLD(SUB NEG KNUM)
814 LJFOLDF(simplify_numsub_negk)
816 PHIBARRIER(fleft);
817 fins->op2 = fleft->op1; /* (-x) - k ==> (-k) - x */
818 fins->op1 = (IRRef1)lj_ir_knum(J, -knumright);
819 return RETRYFOLD;
822 LJFOLD(SUB any NEG)
823 LJFOLDF(simplify_numsub_xneg)
825 PHIBARRIER(fright);
826 fins->o = IR_ADD; /* a - (-b) ==> a + b */
827 fins->op2 = fright->op1;
828 return RETRYFOLD;
831 LJFOLD(MUL any KNUM)
832 LJFOLD(DIV any KNUM)
833 LJFOLDF(simplify_nummuldiv_k)
835 lua_Number n = knumright;
836 if (n == 1.0) { /* x o 1 ==> x */
837 return LEFTFOLD;
838 } else if (n == -1.0) { /* x o -1 ==> -x */
839 fins->o = IR_NEG;
840 fins->op2 = (IRRef1)lj_ir_knum_neg(J);
841 return RETRYFOLD;
842 } else if (fins->o == IR_MUL && n == 2.0) { /* x * 2 ==> x + x */
843 fins->o = IR_ADD;
844 fins->op2 = fins->op1;
845 return RETRYFOLD;
846 } else if (fins->o == IR_DIV) { /* x / 2^k ==> x * 2^-k */
847 uint64_t u = ir_knum(fright)->u64;
848 uint32_t ex = ((uint32_t)(u >> 52) & 0x7ff);
849 if ((u & U64x(000fffff,ffffffff)) == 0 && ex - 1 < 0x7fd) {
850 u = (u & ((uint64_t)1 << 63)) | ((uint64_t)(0x7fe - ex) << 52);
851 fins->o = IR_MUL; /* Multiply by exact reciprocal. */
852 fins->op2 = lj_ir_knum_u64(J, u);
853 return RETRYFOLD;
856 return NEXTFOLD;
859 LJFOLD(MUL NEG KNUM)
860 LJFOLD(DIV NEG KNUM)
861 LJFOLDF(simplify_nummuldiv_negk)
863 PHIBARRIER(fleft);
864 fins->op1 = fleft->op1; /* (-a) o k ==> a o (-k) */
865 fins->op2 = (IRRef1)lj_ir_knum(J, -knumright);
866 return RETRYFOLD;
869 LJFOLD(MUL NEG NEG)
870 LJFOLD(DIV NEG NEG)
871 LJFOLDF(simplify_nummuldiv_negneg)
873 PHIBARRIER(fleft);
874 PHIBARRIER(fright);
875 fins->op1 = fleft->op1; /* (-a) o (-b) ==> a o b */
876 fins->op2 = fright->op1;
877 return RETRYFOLD;
880 LJFOLD(POW any KINT)
881 LJFOLDF(simplify_numpow_xk)
883 int32_t k = fright->i;
884 TRef ref = fins->op1;
885 if (k == 0) /* x ^ 0 ==> 1 */
886 return lj_ir_knum_one(J); /* Result must be a number, not an int. */
887 if (k == 1) /* x ^ 1 ==> x */
888 return LEFTFOLD;
889 if ((uint32_t)(k+65536) > 2*65536u) /* Limit code explosion. */
890 return NEXTFOLD;
891 if (k < 0) { /* x ^ (-k) ==> (1/x) ^ k. */
892 ref = emitir(IRTN(IR_DIV), lj_ir_knum_one(J), ref);
893 k = -k;
895 /* Unroll x^k for 1 <= k <= 65536. */
896 for (; (k & 1) == 0; k >>= 1) /* Handle leading zeros. */
897 ref = emitir(IRTN(IR_MUL), ref, ref);
898 if ((k >>= 1) != 0) { /* Handle trailing bits. */
899 TRef tmp = emitir(IRTN(IR_MUL), ref, ref);
900 for (; k != 1; k >>= 1) {
901 if (k & 1)
902 ref = emitir(IRTN(IR_MUL), ref, tmp);
903 tmp = emitir(IRTN(IR_MUL), tmp, tmp);
905 ref = emitir(IRTN(IR_MUL), ref, tmp);
907 return ref;
910 LJFOLD(POW KNUM any)
911 LJFOLDF(simplify_numpow_kx)
913 lua_Number n = knumleft;
914 if (n == 2.0) { /* 2.0 ^ i ==> ldexp(1.0, tonum(i)) */
915 fins->o = IR_CONV;
916 #if LJ_TARGET_X86ORX64
917 fins->op1 = fins->op2;
918 fins->op2 = IRCONV_NUM_INT;
919 fins->op2 = (IRRef1)lj_opt_fold(J);
920 #endif
921 fins->op1 = (IRRef1)lj_ir_knum_one(J);
922 fins->o = IR_LDEXP;
923 return RETRYFOLD;
925 return NEXTFOLD;
928 /* -- Simplify conversions ------------------------------------------------ */
930 LJFOLD(CONV CONV IRCONV_NUM_INT) /* _NUM */
931 LJFOLDF(shortcut_conv_num_int)
933 PHIBARRIER(fleft);
934 /* Only safe with a guarded conversion to int. */
935 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_NUM && irt_isguard(fleft->t))
936 return fleft->op1; /* f(g(x)) ==> x */
937 return NEXTFOLD;
940 LJFOLD(CONV CONV IRCONV_INT_NUM) /* _INT */
941 LJFOLD(CONV CONV IRCONV_U32_NUM) /* _U32*/
942 LJFOLDF(simplify_conv_int_num)
944 /* Fold even across PHI to avoid expensive num->int conversions in loop. */
945 if ((fleft->op2 & IRCONV_SRCMASK) ==
946 ((fins->op2 & IRCONV_DSTMASK) >> IRCONV_DSH))
947 return fleft->op1;
948 return NEXTFOLD;
951 LJFOLD(CONV CONV IRCONV_I64_NUM) /* _INT or _U32 */
952 LJFOLD(CONV CONV IRCONV_U64_NUM) /* _INT or _U32 */
953 LJFOLDF(simplify_conv_i64_num)
955 PHIBARRIER(fleft);
956 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT) {
957 /* Reduce to a sign-extension. */
958 fins->op1 = fleft->op1;
959 fins->op2 = ((IRT_I64<<5)|IRT_INT|IRCONV_SEXT);
960 return RETRYFOLD;
961 } else if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) {
962 #if LJ_TARGET_X64
963 return fleft->op1;
964 #else
965 /* Reduce to a zero-extension. */
966 fins->op1 = fleft->op1;
967 fins->op2 = (IRT_I64<<5)|IRT_U32;
968 return RETRYFOLD;
969 #endif
971 return NEXTFOLD;
974 LJFOLD(CONV CONV IRCONV_INT_I64) /* _INT or _U32 */
975 LJFOLD(CONV CONV IRCONV_INT_U64) /* _INT or _U32 */
976 LJFOLD(CONV CONV IRCONV_U32_I64) /* _INT or _U32 */
977 LJFOLD(CONV CONV IRCONV_U32_U64) /* _INT or _U32 */
978 LJFOLDF(simplify_conv_int_i64)
980 int src;
981 PHIBARRIER(fleft);
982 src = (fleft->op2 & IRCONV_SRCMASK);
983 if (src == IRT_INT || src == IRT_U32) {
984 if (src == ((fins->op2 & IRCONV_DSTMASK) >> IRCONV_DSH)) {
985 return fleft->op1;
986 } else {
987 fins->op2 = ((fins->op2 & IRCONV_DSTMASK) | src);
988 fins->op1 = fleft->op1;
989 return RETRYFOLD;
992 return NEXTFOLD;
995 LJFOLD(CONV CONV IRCONV_FLOAT_NUM) /* _FLOAT */
996 LJFOLDF(simplify_conv_flt_num)
998 PHIBARRIER(fleft);
999 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_FLOAT)
1000 return fleft->op1;
1001 return NEXTFOLD;
1004 /* Shortcut TOBIT + IRT_NUM <- IRT_INT/IRT_U32 conversion. */
1005 LJFOLD(TOBIT CONV KNUM)
1006 LJFOLDF(simplify_tobit_conv)
1008 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT ||
1009 (fleft->op2 & IRCONV_SRCMASK) == IRT_U32) {
1010 /* Fold even across PHI to avoid expensive num->int conversions in loop. */
1011 lua_assert(irt_isnum(fleft->t));
1012 return fleft->op1;
1014 return NEXTFOLD;
1017 /* Shortcut floor/ceil/round + IRT_NUM <- IRT_INT/IRT_U32 conversion. */
1018 LJFOLD(FPMATH CONV IRFPM_FLOOR)
1019 LJFOLD(FPMATH CONV IRFPM_CEIL)
1020 LJFOLD(FPMATH CONV IRFPM_TRUNC)
1021 LJFOLDF(simplify_floor_conv)
1023 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT ||
1024 (fleft->op2 & IRCONV_SRCMASK) == IRT_U32)
1025 return LEFTFOLD;
1026 return NEXTFOLD;
1029 /* Strength reduction of widening. */
1030 LJFOLD(CONV any IRCONV_I64_INT)
1031 LJFOLD(CONV any IRCONV_U64_INT)
1032 LJFOLDF(simplify_conv_sext)
1034 IRRef ref = fins->op1;
1035 int64_t ofs = 0;
1036 if (!(fins->op2 & IRCONV_SEXT))
1037 return NEXTFOLD;
1038 PHIBARRIER(fleft);
1039 if (fleft->o == IR_XLOAD && (irt_isu8(fleft->t) || irt_isu16(fleft->t)))
1040 goto ok_reduce;
1041 if (fleft->o == IR_ADD && irref_isk(fleft->op2)) {
1042 ofs = (int64_t)IR(fleft->op2)->i;
1043 ref = fleft->op1;
1045 /* Use scalar evolution analysis results to strength-reduce sign-extension. */
1046 if (ref == J->scev.idx) {
1047 IRRef lo = J->scev.dir ? J->scev.start : J->scev.stop;
1048 lua_assert(irt_isint(J->scev.t));
1049 if (lo && IR(lo)->i + ofs >= 0) {
1050 ok_reduce:
1051 #if LJ_TARGET_X64
1052 /* Eliminate widening. All 32 bit ops do an implicit zero-extension. */
1053 return LEFTFOLD;
1054 #else
1055 /* Reduce to a (cheaper) zero-extension. */
1056 fins->op2 &= ~IRCONV_SEXT;
1057 return RETRYFOLD;
1058 #endif
1061 return NEXTFOLD;
1064 /* Strength reduction of narrowing. */
1065 LJFOLD(CONV ADD IRCONV_INT_I64)
1066 LJFOLD(CONV SUB IRCONV_INT_I64)
1067 LJFOLD(CONV MUL IRCONV_INT_I64)
1068 LJFOLD(CONV ADD IRCONV_INT_U64)
1069 LJFOLD(CONV SUB IRCONV_INT_U64)
1070 LJFOLD(CONV MUL IRCONV_INT_U64)
1071 LJFOLD(CONV ADD IRCONV_U32_I64)
1072 LJFOLD(CONV SUB IRCONV_U32_I64)
1073 LJFOLD(CONV MUL IRCONV_U32_I64)
1074 LJFOLD(CONV ADD IRCONV_U32_U64)
1075 LJFOLD(CONV SUB IRCONV_U32_U64)
1076 LJFOLD(CONV MUL IRCONV_U32_U64)
1077 LJFOLDF(simplify_conv_narrow)
1079 IROp op = (IROp)fleft->o;
1080 IRType t = irt_type(fins->t);
1081 IRRef op1 = fleft->op1, op2 = fleft->op2, mode = fins->op2;
1082 PHIBARRIER(fleft);
1083 op1 = emitir(IRTI(IR_CONV), op1, mode);
1084 op2 = emitir(IRTI(IR_CONV), op2, mode);
1085 fins->ot = IRT(op, t);
1086 fins->op1 = op1;
1087 fins->op2 = op2;
1088 return RETRYFOLD;
1091 /* Special CSE rule for CONV. */
1092 LJFOLD(CONV any any)
1093 LJFOLDF(cse_conv)
1095 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
1096 IRRef op1 = fins->op1, op2 = (fins->op2 & IRCONV_MODEMASK);
1097 uint8_t guard = irt_isguard(fins->t);
1098 IRRef ref = J->chain[IR_CONV];
1099 while (ref > op1) {
1100 IRIns *ir = IR(ref);
1101 /* Commoning with stronger checks is ok. */
1102 if (ir->op1 == op1 && (ir->op2 & IRCONV_MODEMASK) == op2 &&
1103 irt_isguard(ir->t) >= guard)
1104 return ref;
1105 ref = ir->prev;
1108 return EMITFOLD; /* No fallthrough to regular CSE. */
1111 /* FP conversion narrowing. */
1112 LJFOLD(TOBIT ADD KNUM)
1113 LJFOLD(TOBIT SUB KNUM)
1114 LJFOLD(CONV ADD IRCONV_INT_NUM)
1115 LJFOLD(CONV SUB IRCONV_INT_NUM)
1116 LJFOLD(CONV ADD IRCONV_I64_NUM)
1117 LJFOLD(CONV SUB IRCONV_I64_NUM)
1118 LJFOLDF(narrow_convert)
1120 PHIBARRIER(fleft);
1121 /* Narrowing ignores PHIs and repeating it inside the loop is not useful. */
1122 if (J->chain[IR_LOOP])
1123 return NEXTFOLD;
1124 lua_assert(fins->o != IR_CONV || (fins->op2&IRCONV_CONVMASK) != IRCONV_TOBIT);
1125 return lj_opt_narrow_convert(J);
1128 /* -- Integer algebraic simplifications ----------------------------------- */
1130 LJFOLD(ADD any KINT)
1131 LJFOLD(ADDOV any KINT)
1132 LJFOLD(SUBOV any KINT)
1133 LJFOLDF(simplify_intadd_k)
1135 if (fright->i == 0) /* i o 0 ==> i */
1136 return LEFTFOLD;
1137 return NEXTFOLD;
1140 LJFOLD(MULOV any KINT)
1141 LJFOLDF(simplify_intmul_k)
1143 if (fright->i == 0) /* i * 0 ==> 0 */
1144 return RIGHTFOLD;
1145 if (fright->i == 1) /* i * 1 ==> i */
1146 return LEFTFOLD;
1147 if (fright->i == 2) { /* i * 2 ==> i + i */
1148 fins->o = IR_ADDOV;
1149 fins->op2 = fins->op1;
1150 return RETRYFOLD;
1152 return NEXTFOLD;
1155 LJFOLD(SUB any KINT)
1156 LJFOLDF(simplify_intsub_k)
1158 if (fright->i == 0) /* i - 0 ==> i */
1159 return LEFTFOLD;
1160 fins->o = IR_ADD; /* i - k ==> i + (-k) */
1161 fins->op2 = (IRRef1)lj_ir_kint(J, -fright->i); /* Overflow for -2^31 ok. */
1162 return RETRYFOLD;
1165 LJFOLD(SUB KINT any)
1166 LJFOLD(SUB KINT64 any)
1167 LJFOLDF(simplify_intsub_kleft)
1169 if (fleft->o == IR_KINT ? (fleft->i == 0) : (ir_kint64(fleft)->u64 == 0)) {
1170 fins->o = IR_NEG; /* 0 - i ==> -i */
1171 fins->op1 = fins->op2;
1172 return RETRYFOLD;
1174 return NEXTFOLD;
1177 LJFOLD(ADD any KINT64)
1178 LJFOLDF(simplify_intadd_k64)
1180 if (ir_kint64(fright)->u64 == 0) /* i + 0 ==> i */
1181 return LEFTFOLD;
1182 return NEXTFOLD;
1185 LJFOLD(SUB any KINT64)
1186 LJFOLDF(simplify_intsub_k64)
1188 uint64_t k = ir_kint64(fright)->u64;
1189 if (k == 0) /* i - 0 ==> i */
1190 return LEFTFOLD;
1191 fins->o = IR_ADD; /* i - k ==> i + (-k) */
1192 fins->op2 = (IRRef1)lj_ir_kint64(J, (uint64_t)-(int64_t)k);
1193 return RETRYFOLD;
1196 static TRef simplify_intmul_k(jit_State *J, int32_t k)
1198 /* Note: many more simplifications are possible, e.g. 2^k1 +- 2^k2.
1199 ** But this is mainly intended for simple address arithmetic.
1200 ** Also it's easier for the backend to optimize the original multiplies.
1202 if (k == 1) { /* i * 1 ==> i */
1203 return LEFTFOLD;
1204 } else if ((k & (k-1)) == 0) { /* i * 2^k ==> i << k */
1205 fins->o = IR_BSHL;
1206 fins->op2 = lj_ir_kint(J, lj_fls((uint32_t)k));
1207 return RETRYFOLD;
1209 return NEXTFOLD;
1212 LJFOLD(MUL any KINT)
1213 LJFOLDF(simplify_intmul_k32)
1215 if (fright->i == 0) /* i * 0 ==> 0 */
1216 return INTFOLD(0);
1217 else if (fright->i > 0)
1218 return simplify_intmul_k(J, fright->i);
1219 return NEXTFOLD;
1222 LJFOLD(MUL any KINT64)
1223 LJFOLDF(simplify_intmul_k64)
1225 if (ir_kint64(fright)->u64 == 0) /* i * 0 ==> 0 */
1226 return INT64FOLD(0);
1227 #if LJ_64
1228 /* NYI: SPLIT for BSHL and 32 bit backend support. */
1229 else if (ir_kint64(fright)->u64 < 0x80000000u)
1230 return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64);
1231 #endif
1232 return NEXTFOLD;
1235 LJFOLD(MOD any KINT)
1236 LJFOLDF(simplify_intmod_k)
1238 int32_t k = fright->i;
1239 lua_assert(k != 0);
1240 if (k > 0 && (k & (k-1)) == 0) { /* i % (2^k) ==> i & (2^k-1) */
1241 fins->o = IR_BAND;
1242 fins->op2 = lj_ir_kint(J, k-1);
1243 return RETRYFOLD;
1245 return NEXTFOLD;
1248 LJFOLD(MOD KINT any)
1249 LJFOLDF(simplify_intmod_kleft)
1251 if (fleft->i == 0)
1252 return INTFOLD(0);
1253 return NEXTFOLD;
1256 LJFOLD(SUB any any)
1257 LJFOLD(SUBOV any any)
1258 LJFOLDF(simplify_intsub)
1260 if (fins->op1 == fins->op2 && !irt_isnum(fins->t)) /* i - i ==> 0 */
1261 return irt_is64(fins->t) ? INT64FOLD(0) : INTFOLD(0);
1262 return NEXTFOLD;
1265 LJFOLD(SUB ADD any)
1266 LJFOLDF(simplify_intsubadd_leftcancel)
1268 if (!irt_isnum(fins->t)) {
1269 PHIBARRIER(fleft);
1270 if (fins->op2 == fleft->op1) /* (i + j) - i ==> j */
1271 return fleft->op2;
1272 if (fins->op2 == fleft->op2) /* (i + j) - j ==> i */
1273 return fleft->op1;
1275 return NEXTFOLD;
1278 LJFOLD(SUB SUB any)
1279 LJFOLDF(simplify_intsubsub_leftcancel)
1281 if (!irt_isnum(fins->t)) {
1282 PHIBARRIER(fleft);
1283 if (fins->op2 == fleft->op1) { /* (i - j) - i ==> 0 - j */
1284 fins->op1 = (IRRef1)lj_ir_kint(J, 0);
1285 fins->op2 = fleft->op2;
1286 return RETRYFOLD;
1289 return NEXTFOLD;
1292 LJFOLD(SUB any SUB)
1293 LJFOLDF(simplify_intsubsub_rightcancel)
1295 if (!irt_isnum(fins->t)) {
1296 PHIBARRIER(fright);
1297 if (fins->op1 == fright->op1) /* i - (i - j) ==> j */
1298 return fright->op2;
1300 return NEXTFOLD;
1303 LJFOLD(SUB any ADD)
1304 LJFOLDF(simplify_intsubadd_rightcancel)
1306 if (!irt_isnum(fins->t)) {
1307 PHIBARRIER(fright);
1308 if (fins->op1 == fright->op1) { /* i - (i + j) ==> 0 - j */
1309 fins->op2 = fright->op2;
1310 fins->op1 = (IRRef1)lj_ir_kint(J, 0);
1311 return RETRYFOLD;
1313 if (fins->op1 == fright->op2) { /* i - (j + i) ==> 0 - j */
1314 fins->op2 = fright->op1;
1315 fins->op1 = (IRRef1)lj_ir_kint(J, 0);
1316 return RETRYFOLD;
1319 return NEXTFOLD;
1322 LJFOLD(SUB ADD ADD)
1323 LJFOLDF(simplify_intsubaddadd_cancel)
1325 if (!irt_isnum(fins->t)) {
1326 PHIBARRIER(fleft);
1327 PHIBARRIER(fright);
1328 if (fleft->op1 == fright->op1) { /* (i + j1) - (i + j2) ==> j1 - j2 */
1329 fins->op1 = fleft->op2;
1330 fins->op2 = fright->op2;
1331 return RETRYFOLD;
1333 if (fleft->op1 == fright->op2) { /* (i + j1) - (j2 + i) ==> j1 - j2 */
1334 fins->op1 = fleft->op2;
1335 fins->op2 = fright->op1;
1336 return RETRYFOLD;
1338 if (fleft->op2 == fright->op1) { /* (j1 + i) - (i + j2) ==> j1 - j2 */
1339 fins->op1 = fleft->op1;
1340 fins->op2 = fright->op2;
1341 return RETRYFOLD;
1343 if (fleft->op2 == fright->op2) { /* (j1 + i) - (j2 + i) ==> j1 - j2 */
1344 fins->op1 = fleft->op1;
1345 fins->op2 = fright->op1;
1346 return RETRYFOLD;
1349 return NEXTFOLD;
1352 LJFOLD(BAND any KINT)
1353 LJFOLD(BAND any KINT64)
1354 LJFOLDF(simplify_band_k)
1356 int64_t k = fright->o == IR_KINT ? (int64_t)fright->i :
1357 (int64_t)ir_k64(fright)->u64;
1358 if (k == 0) /* i & 0 ==> 0 */
1359 return RIGHTFOLD;
1360 if (k == -1) /* i & -1 ==> i */
1361 return LEFTFOLD;
1362 return NEXTFOLD;
1365 LJFOLD(BOR any KINT)
1366 LJFOLD(BOR any KINT64)
1367 LJFOLDF(simplify_bor_k)
1369 int64_t k = fright->o == IR_KINT ? (int64_t)fright->i :
1370 (int64_t)ir_k64(fright)->u64;
1371 if (k == 0) /* i | 0 ==> i */
1372 return LEFTFOLD;
1373 if (k == -1) /* i | -1 ==> -1 */
1374 return RIGHTFOLD;
1375 return NEXTFOLD;
1378 LJFOLD(BXOR any KINT)
1379 LJFOLD(BXOR any KINT64)
1380 LJFOLDF(simplify_bxor_k)
1382 int64_t k = fright->o == IR_KINT ? (int64_t)fright->i :
1383 (int64_t)ir_k64(fright)->u64;
1384 if (k == 0) /* i xor 0 ==> i */
1385 return LEFTFOLD;
1386 if (k == -1) { /* i xor -1 ==> ~i */
1387 fins->o = IR_BNOT;
1388 fins->op2 = 0;
1389 return RETRYFOLD;
1391 return NEXTFOLD;
1394 LJFOLD(BSHL any KINT)
1395 LJFOLD(BSHR any KINT)
1396 LJFOLD(BSAR any KINT)
1397 LJFOLD(BROL any KINT)
1398 LJFOLD(BROR any KINT)
1399 LJFOLDF(simplify_shift_ik)
1401 int32_t mask = irt_is64(fins->t) ? 63 : 31;
1402 int32_t k = (fright->i & mask);
1403 if (k == 0) /* i o 0 ==> i */
1404 return LEFTFOLD;
1405 if (k == 1 && fins->o == IR_BSHL) { /* i << 1 ==> i + i */
1406 fins->o = IR_ADD;
1407 fins->op2 = fins->op1;
1408 return RETRYFOLD;
1410 if (k != fright->i) { /* i o k ==> i o (k & mask) */
1411 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1412 return RETRYFOLD;
1414 #ifndef LJ_TARGET_UNIFYROT
1415 if (fins->o == IR_BROR) { /* bror(i, k) ==> brol(i, (-k)&mask) */
1416 fins->o = IR_BROL;
1417 fins->op2 = (IRRef1)lj_ir_kint(J, (-k)&mask);
1418 return RETRYFOLD;
1420 #endif
1421 return NEXTFOLD;
1424 LJFOLD(BSHL any BAND)
1425 LJFOLD(BSHR any BAND)
1426 LJFOLD(BSAR any BAND)
1427 LJFOLD(BROL any BAND)
1428 LJFOLD(BROR any BAND)
1429 LJFOLDF(simplify_shift_andk)
1431 IRIns *irk = IR(fright->op2);
1432 PHIBARRIER(fright);
1433 if ((fins->o < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) &&
1434 irk->o == IR_KINT) { /* i o (j & mask) ==> i o j */
1435 int32_t mask = irt_is64(fins->t) ? 63 : 31;
1436 int32_t k = irk->i & mask;
1437 if (k == mask) {
1438 fins->op2 = fright->op1;
1439 return RETRYFOLD;
1442 return NEXTFOLD;
1445 LJFOLD(BSHL KINT any)
1446 LJFOLD(BSHR KINT any)
1447 LJFOLD(BSHL KINT64 any)
1448 LJFOLD(BSHR KINT64 any)
1449 LJFOLDF(simplify_shift1_ki)
1451 int64_t k = fleft->o == IR_KINT ? (int64_t)fleft->i :
1452 (int64_t)ir_k64(fleft)->u64;
1453 if (k == 0) /* 0 o i ==> 0 */
1454 return LEFTFOLD;
1455 return NEXTFOLD;
1458 LJFOLD(BSAR KINT any)
1459 LJFOLD(BROL KINT any)
1460 LJFOLD(BROR KINT any)
1461 LJFOLD(BSAR KINT64 any)
1462 LJFOLD(BROL KINT64 any)
1463 LJFOLD(BROR KINT64 any)
1464 LJFOLDF(simplify_shift2_ki)
1466 int64_t k = fleft->o == IR_KINT ? (int64_t)fleft->i :
1467 (int64_t)ir_k64(fleft)->u64;
1468 if (k == 0 || k == -1) /* 0 o i ==> 0; -1 o i ==> -1 */
1469 return LEFTFOLD;
1470 return NEXTFOLD;
1473 LJFOLD(BSHL BAND KINT)
1474 LJFOLD(BSHR BAND KINT)
1475 LJFOLD(BROL BAND KINT)
1476 LJFOLD(BROR BAND KINT)
1477 LJFOLDF(simplify_shiftk_andk)
1479 IRIns *irk = IR(fleft->op2);
1480 PHIBARRIER(fleft);
1481 if (irk->o == IR_KINT) { /* (i & k1) o k2 ==> (i o k2) & (k1 o k2) */
1482 int32_t k = kfold_intop(irk->i, fright->i, (IROp)fins->o);
1483 fins->op1 = fleft->op1;
1484 fins->op1 = (IRRef1)lj_opt_fold(J);
1485 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1486 fins->ot = IRTI(IR_BAND);
1487 return RETRYFOLD;
1489 return NEXTFOLD;
1492 LJFOLD(BAND BSHL KINT)
1493 LJFOLD(BAND BSHR KINT)
1494 LJFOLDF(simplify_andk_shiftk)
1496 IRIns *irk = IR(fleft->op2);
1497 if (irk->o == IR_KINT &&
1498 kfold_intop(-1, irk->i, (IROp)fleft->o) == fright->i)
1499 return LEFTFOLD; /* (i o k1) & k2 ==> i, if (-1 o k1) == k2 */
1500 return NEXTFOLD;
1503 /* -- Reassociation ------------------------------------------------------- */
1505 LJFOLD(ADD ADD KINT)
1506 LJFOLD(MUL MUL KINT)
1507 LJFOLD(BAND BAND KINT)
1508 LJFOLD(BOR BOR KINT)
1509 LJFOLD(BXOR BXOR KINT)
1510 LJFOLDF(reassoc_intarith_k)
1512 IRIns *irk = IR(fleft->op2);
1513 if (irk->o == IR_KINT) {
1514 int32_t k = kfold_intop(irk->i, fright->i, (IROp)fins->o);
1515 if (k == irk->i) /* (i o k1) o k2 ==> i o k1, if (k1 o k2) == k1. */
1516 return LEFTFOLD;
1517 PHIBARRIER(fleft);
1518 fins->op1 = fleft->op1;
1519 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1520 return RETRYFOLD; /* (i o k1) o k2 ==> i o (k1 o k2) */
1522 return NEXTFOLD;
1525 LJFOLD(ADD ADD KINT64)
1526 LJFOLD(MUL MUL KINT64)
1527 LJFOLD(BAND BAND KINT64)
1528 LJFOLD(BOR BOR KINT64)
1529 LJFOLD(BXOR BXOR KINT64)
1530 LJFOLDF(reassoc_intarith_k64)
1532 #if LJ_HASFFI || LJ_64
1533 IRIns *irk = IR(fleft->op2);
1534 if (irk->o == IR_KINT64) {
1535 uint64_t k = kfold_int64arith(ir_k64(irk)->u64,
1536 ir_k64(fright)->u64, (IROp)fins->o);
1537 PHIBARRIER(fleft);
1538 fins->op1 = fleft->op1;
1539 fins->op2 = (IRRef1)lj_ir_kint64(J, k);
1540 return RETRYFOLD; /* (i o k1) o k2 ==> i o (k1 o k2) */
1542 return NEXTFOLD;
1543 #else
1544 UNUSED(J); lua_assert(0); return FAILFOLD;
1545 #endif
1548 LJFOLD(MIN MIN any)
1549 LJFOLD(MAX MAX any)
1550 LJFOLD(BAND BAND any)
1551 LJFOLD(BOR BOR any)
1552 LJFOLDF(reassoc_dup)
1554 if (fins->op2 == fleft->op1 || fins->op2 == fleft->op2)
1555 return LEFTFOLD; /* (a o b) o a ==> a o b; (a o b) o b ==> a o b */
1556 return NEXTFOLD;
1559 LJFOLD(BXOR BXOR any)
1560 LJFOLDF(reassoc_bxor)
1562 PHIBARRIER(fleft);
1563 if (fins->op2 == fleft->op1) /* (a xor b) xor a ==> b */
1564 return fleft->op2;
1565 if (fins->op2 == fleft->op2) /* (a xor b) xor b ==> a */
1566 return fleft->op1;
1567 return NEXTFOLD;
1570 LJFOLD(BSHL BSHL KINT)
1571 LJFOLD(BSHR BSHR KINT)
1572 LJFOLD(BSAR BSAR KINT)
1573 LJFOLD(BROL BROL KINT)
1574 LJFOLD(BROR BROR KINT)
1575 LJFOLDF(reassoc_shift)
1577 IRIns *irk = IR(fleft->op2);
1578 PHIBARRIER(fleft); /* The (shift any KINT) rule covers k2 == 0 and more. */
1579 if (irk->o == IR_KINT) { /* (i o k1) o k2 ==> i o (k1 + k2) */
1580 int32_t mask = irt_is64(fins->t) ? 63 : 31;
1581 int32_t k = (irk->i & mask) + (fright->i & mask);
1582 if (k > mask) { /* Combined shift too wide? */
1583 if (fins->o == IR_BSHL || fins->o == IR_BSHR)
1584 return mask == 31 ? INTFOLD(0) : INT64FOLD(0);
1585 else if (fins->o == IR_BSAR)
1586 k = mask;
1587 else
1588 k &= mask;
1590 fins->op1 = fleft->op1;
1591 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1592 return RETRYFOLD;
1594 return NEXTFOLD;
1597 LJFOLD(MIN MIN KNUM)
1598 LJFOLD(MAX MAX KNUM)
1599 LJFOLD(MIN MIN KINT)
1600 LJFOLD(MAX MAX KINT)
1601 LJFOLDF(reassoc_minmax_k)
1603 IRIns *irk = IR(fleft->op2);
1604 if (irk->o == IR_KNUM) {
1605 lua_Number a = ir_knum(irk)->n;
1606 lua_Number y = lj_vm_foldarith(a, knumright, fins->o - IR_ADD);
1607 if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */
1608 return LEFTFOLD;
1609 PHIBARRIER(fleft);
1610 fins->op1 = fleft->op1;
1611 fins->op2 = (IRRef1)lj_ir_knum(J, y);
1612 return RETRYFOLD; /* (x o k1) o k2 ==> x o (k1 o k2) */
1613 } else if (irk->o == IR_KINT) {
1614 int32_t a = irk->i;
1615 int32_t y = kfold_intop(a, fright->i, fins->o);
1616 if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */
1617 return LEFTFOLD;
1618 PHIBARRIER(fleft);
1619 fins->op1 = fleft->op1;
1620 fins->op2 = (IRRef1)lj_ir_kint(J, y);
1621 return RETRYFOLD; /* (x o k1) o k2 ==> x o (k1 o k2) */
1623 return NEXTFOLD;
1626 LJFOLD(MIN MAX any)
1627 LJFOLD(MAX MIN any)
1628 LJFOLDF(reassoc_minmax_left)
1630 if (fins->op2 == fleft->op1 || fins->op2 == fleft->op2)
1631 return RIGHTFOLD; /* (b o1 a) o2 b ==> b; (a o1 b) o2 b ==> b */
1632 return NEXTFOLD;
1635 LJFOLD(MIN any MAX)
1636 LJFOLD(MAX any MIN)
1637 LJFOLDF(reassoc_minmax_right)
1639 if (fins->op1 == fright->op1 || fins->op1 == fright->op2)
1640 return LEFTFOLD; /* a o2 (a o1 b) ==> a; a o2 (b o1 a) ==> a */
1641 return NEXTFOLD;
1644 /* -- Array bounds check elimination -------------------------------------- */
1646 /* Eliminate ABC across PHIs to handle t[i-1] forwarding case.
1647 ** ABC(asize, (i+k)+(-k)) ==> ABC(asize, i), but only if it already exists.
1648 ** Could be generalized to (i+k1)+k2 ==> i+(k1+k2), but needs better disambig.
1650 LJFOLD(ABC any ADD)
1651 LJFOLDF(abc_fwd)
1653 if (LJ_LIKELY(J->flags & JIT_F_OPT_ABC)) {
1654 if (irref_isk(fright->op2)) {
1655 IRIns *add2 = IR(fright->op1);
1656 if (add2->o == IR_ADD && irref_isk(add2->op2) &&
1657 IR(fright->op2)->i == -IR(add2->op2)->i) {
1658 IRRef ref = J->chain[IR_ABC];
1659 IRRef lim = add2->op1;
1660 if (fins->op1 > lim) lim = fins->op1;
1661 while (ref > lim) {
1662 IRIns *ir = IR(ref);
1663 if (ir->op1 == fins->op1 && ir->op2 == add2->op1)
1664 return DROPFOLD;
1665 ref = ir->prev;
1670 return NEXTFOLD;
1673 /* Eliminate ABC for constants.
1674 ** ABC(asize, k1), ABC(asize k2) ==> ABC(asize, max(k1, k2))
1675 ** Drop second ABC if k2 is lower. Otherwise patch first ABC with k2.
1677 LJFOLD(ABC any KINT)
1678 LJFOLDF(abc_k)
1680 if (LJ_LIKELY(J->flags & JIT_F_OPT_ABC)) {
1681 IRRef ref = J->chain[IR_ABC];
1682 IRRef asize = fins->op1;
1683 while (ref > asize) {
1684 IRIns *ir = IR(ref);
1685 if (ir->op1 == asize && irref_isk(ir->op2)) {
1686 int32_t k = IR(ir->op2)->i;
1687 if (fright->i > k)
1688 ir->op2 = fins->op2;
1689 return DROPFOLD;
1691 ref = ir->prev;
1693 return EMITFOLD; /* Already performed CSE. */
1695 return NEXTFOLD;
1698 /* Eliminate invariant ABC inside loop. */
1699 LJFOLD(ABC any any)
1700 LJFOLDF(abc_invar)
1702 if (!irt_isint(fins->t) && J->chain[IR_LOOP]) /* Currently marked as PTR. */
1703 return DROPFOLD;
1704 return NEXTFOLD;
1707 /* -- Commutativity ------------------------------------------------------- */
1709 /* The refs of commutative ops are canonicalized. Lower refs go to the right.
1710 ** Rationale behind this:
1711 ** - It (also) moves constants to the right.
1712 ** - It reduces the number of FOLD rules (e.g. (BOR any KINT) suffices).
1713 ** - It helps CSE to find more matches.
1714 ** - The assembler generates better code with constants at the right.
1717 LJFOLD(ADD any any)
1718 LJFOLD(MUL any any)
1719 LJFOLD(ADDOV any any)
1720 LJFOLD(MULOV any any)
1721 LJFOLDF(comm_swap)
1723 if (fins->op1 < fins->op2) { /* Move lower ref to the right. */
1724 IRRef1 tmp = fins->op1;
1725 fins->op1 = fins->op2;
1726 fins->op2 = tmp;
1727 return RETRYFOLD;
1729 return NEXTFOLD;
1732 LJFOLD(EQ any any)
1733 LJFOLD(NE any any)
1734 LJFOLDF(comm_equal)
1736 /* For non-numbers only: x == x ==> drop; x ~= x ==> fail */
1737 if (fins->op1 == fins->op2 && !irt_isnum(fins->t))
1738 return CONDFOLD(fins->o == IR_EQ);
1739 return fold_comm_swap(J);
1742 LJFOLD(LT any any)
1743 LJFOLD(GE any any)
1744 LJFOLD(LE any any)
1745 LJFOLD(GT any any)
1746 LJFOLD(ULT any any)
1747 LJFOLD(UGE any any)
1748 LJFOLD(ULE any any)
1749 LJFOLD(UGT any any)
1750 LJFOLDF(comm_comp)
1752 /* For non-numbers only: x <=> x ==> drop; x <> x ==> fail */
1753 if (fins->op1 == fins->op2 && !irt_isnum(fins->t))
1754 return CONDFOLD((fins->o ^ (fins->o >> 1)) & 1);
1755 if (fins->op1 < fins->op2) { /* Move lower ref to the right. */
1756 IRRef1 tmp = fins->op1;
1757 fins->op1 = fins->op2;
1758 fins->op2 = tmp;
1759 fins->o ^= 3; /* GT <-> LT, GE <-> LE, does not affect U */
1760 return RETRYFOLD;
1762 return NEXTFOLD;
1765 LJFOLD(BAND any any)
1766 LJFOLD(BOR any any)
1767 LJFOLD(MIN any any)
1768 LJFOLD(MAX any any)
1769 LJFOLDF(comm_dup)
1771 if (fins->op1 == fins->op2) /* x o x ==> x */
1772 return LEFTFOLD;
1773 return fold_comm_swap(J);
1776 LJFOLD(BXOR any any)
1777 LJFOLDF(comm_bxor)
1779 if (fins->op1 == fins->op2) /* i xor i ==> 0 */
1780 return irt_is64(fins->t) ? INT64FOLD(0) : INTFOLD(0);
1781 return fold_comm_swap(J);
1784 /* -- Simplification of compound expressions ------------------------------ */
1786 static TRef kfold_xload(jit_State *J, IRIns *ir, const void *p)
1788 int32_t k;
1789 switch (irt_type(ir->t)) {
1790 case IRT_NUM: return lj_ir_knum_u64(J, *(uint64_t *)p);
1791 case IRT_I8: k = (int32_t)*(int8_t *)p; break;
1792 case IRT_U8: k = (int32_t)*(uint8_t *)p; break;
1793 case IRT_I16: k = (int32_t)(int16_t)lj_getu16(p); break;
1794 case IRT_U16: k = (int32_t)(uint16_t)lj_getu16(p); break;
1795 case IRT_INT: case IRT_U32: k = (int32_t)lj_getu32(p); break;
1796 case IRT_I64: case IRT_U64: return lj_ir_kint64(J, *(uint64_t *)p);
1797 default: return 0;
1799 return lj_ir_kint(J, k);
1802 /* Turn: string.sub(str, a, b) == kstr
1803 ** into: string.byte(str, a) == string.byte(kstr, 1) etc.
1804 ** Note: this creates unaligned XLOADs on x86/x64.
1806 LJFOLD(EQ SNEW KGC)
1807 LJFOLD(NE SNEW KGC)
1808 LJFOLDF(merge_eqne_snew_kgc)
1810 GCstr *kstr = ir_kstr(fright);
1811 int32_t len = (int32_t)kstr->len;
1812 lua_assert(irt_isstr(fins->t));
1814 #if LJ_TARGET_UNALIGNED
1815 #define FOLD_SNEW_MAX_LEN 4 /* Handle string lengths 0, 1, 2, 3, 4. */
1816 #define FOLD_SNEW_TYPE8 IRT_I8 /* Creates shorter immediates. */
1817 #else
1818 #define FOLD_SNEW_MAX_LEN 1 /* Handle string lengths 0 or 1. */
1819 #define FOLD_SNEW_TYPE8 IRT_U8 /* Prefer unsigned loads. */
1820 #endif
1822 if (len <= FOLD_SNEW_MAX_LEN) {
1823 IROp op = (IROp)fins->o;
1824 IRRef strref = fleft->op1;
1825 lua_assert(IR(strref)->o == IR_STRREF);
1826 if (op == IR_EQ) {
1827 emitir(IRTGI(IR_EQ), fleft->op2, lj_ir_kint(J, len));
1828 /* Caveat: fins/fleft/fright is no longer valid after emitir. */
1829 } else {
1830 /* NE is not expanded since this would need an OR of two conds. */
1831 if (!irref_isk(fleft->op2)) /* Only handle the constant length case. */
1832 return NEXTFOLD;
1833 if (IR(fleft->op2)->i != len)
1834 return DROPFOLD;
1836 if (len > 0) {
1837 /* A 4 byte load for length 3 is ok -- all strings have an extra NUL. */
1838 uint16_t ot = (uint16_t)(len == 1 ? IRT(IR_XLOAD, FOLD_SNEW_TYPE8) :
1839 len == 2 ? IRT(IR_XLOAD, IRT_U16) :
1840 IRTI(IR_XLOAD));
1841 TRef tmp = emitir(ot, strref,
1842 IRXLOAD_READONLY | (len > 1 ? IRXLOAD_UNALIGNED : 0));
1843 TRef val = kfold_xload(J, IR(tref_ref(tmp)), strdata(kstr));
1844 if (len == 3)
1845 tmp = emitir(IRTI(IR_BAND), tmp,
1846 lj_ir_kint(J, LJ_ENDIAN_SELECT(0x00ffffff, 0xffffff00)));
1847 fins->op1 = (IRRef1)tmp;
1848 fins->op2 = (IRRef1)val;
1849 fins->ot = (IROpT)IRTGI(op);
1850 return RETRYFOLD;
1851 } else {
1852 return DROPFOLD;
1855 return NEXTFOLD;
1858 /* -- Loads --------------------------------------------------------------- */
1860 /* Loads cannot be folded or passed on to CSE in general.
1861 ** Alias analysis is needed to check for forwarding opportunities.
1863 ** Caveat: *all* loads must be listed here or they end up at CSE!
1866 LJFOLD(ALOAD any)
1867 LJFOLDX(lj_opt_fwd_aload)
1869 /* From HREF fwd (see below). Must eliminate, not supported by fwd/backend. */
1870 LJFOLD(HLOAD KKPTR)
1871 LJFOLDF(kfold_hload_kkptr)
1873 UNUSED(J);
1874 lua_assert(ir_kptr(fleft) == niltvg(J2G(J)));
1875 return TREF_NIL;
1878 LJFOLD(HLOAD any)
1879 LJFOLDX(lj_opt_fwd_hload)
1881 LJFOLD(ULOAD any)
1882 LJFOLDX(lj_opt_fwd_uload)
1884 LJFOLD(CALLL any IRCALL_lj_tab_len)
1885 LJFOLDX(lj_opt_fwd_tab_len)
1887 /* Upvalue refs are really loads, but there are no corresponding stores.
1888 ** So CSE is ok for them, except for UREFO across a GC step (see below).
1889 ** If the referenced function is const, its upvalue addresses are const, too.
1890 ** This can be used to improve CSE by looking for the same address,
1891 ** even if the upvalues originate from a different function.
1893 LJFOLD(UREFO KGC any)
1894 LJFOLD(UREFC KGC any)
1895 LJFOLDF(cse_uref)
1897 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
1898 IRRef ref = J->chain[fins->o];
1899 GCfunc *fn = ir_kfunc(fleft);
1900 GCupval *uv = gco2uv(gcref(fn->l.uvptr[(fins->op2 >> 8)]));
1901 while (ref > 0) {
1902 IRIns *ir = IR(ref);
1903 if (irref_isk(ir->op1)) {
1904 GCfunc *fn2 = ir_kfunc(IR(ir->op1));
1905 if (gco2uv(gcref(fn2->l.uvptr[(ir->op2 >> 8)])) == uv) {
1906 if (fins->o == IR_UREFO && gcstep_barrier(J, ref))
1907 break;
1908 return ref;
1911 ref = ir->prev;
1914 return EMITFOLD;
1917 LJFOLD(HREFK any any)
1918 LJFOLDX(lj_opt_fwd_hrefk)
1920 LJFOLD(HREF TNEW any)
1921 LJFOLDF(fwd_href_tnew)
1923 if (lj_opt_fwd_href_nokey(J))
1924 return lj_ir_kkptr(J, niltvg(J2G(J)));
1925 return NEXTFOLD;
1928 LJFOLD(HREF TDUP KPRI)
1929 LJFOLD(HREF TDUP KGC)
1930 LJFOLD(HREF TDUP KNUM)
1931 LJFOLDF(fwd_href_tdup)
1933 TValue keyv;
1934 lj_ir_kvalue(J->L, &keyv, fright);
1935 if (lj_tab_get(J->L, ir_ktab(IR(fleft->op1)), &keyv) == niltvg(J2G(J)) &&
1936 lj_opt_fwd_href_nokey(J))
1937 return lj_ir_kkptr(J, niltvg(J2G(J)));
1938 return NEXTFOLD;
1941 /* We can safely FOLD/CSE array/hash refs and field loads, since there
1942 ** are no corresponding stores. But we need to check for any NEWREF with
1943 ** an aliased table, as it may invalidate all of the pointers and fields.
1944 ** Only HREF needs the NEWREF check -- AREF and HREFK already depend on
1945 ** FLOADs. And NEWREF itself is treated like a store (see below).
1947 LJFOLD(FLOAD TNEW IRFL_TAB_ASIZE)
1948 LJFOLDF(fload_tab_tnew_asize)
1950 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
1951 return INTFOLD(fleft->op1);
1952 return NEXTFOLD;
1955 LJFOLD(FLOAD TNEW IRFL_TAB_HMASK)
1956 LJFOLDF(fload_tab_tnew_hmask)
1958 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
1959 return INTFOLD((1 << fleft->op2)-1);
1960 return NEXTFOLD;
1963 LJFOLD(FLOAD TDUP IRFL_TAB_ASIZE)
1964 LJFOLDF(fload_tab_tdup_asize)
1966 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
1967 return INTFOLD((int32_t)ir_ktab(IR(fleft->op1))->asize);
1968 return NEXTFOLD;
1971 LJFOLD(FLOAD TDUP IRFL_TAB_HMASK)
1972 LJFOLDF(fload_tab_tdup_hmask)
1974 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
1975 return INTFOLD((int32_t)ir_ktab(IR(fleft->op1))->hmask);
1976 return NEXTFOLD;
1979 LJFOLD(HREF any any)
1980 LJFOLD(FLOAD any IRFL_TAB_ARRAY)
1981 LJFOLD(FLOAD any IRFL_TAB_NODE)
1982 LJFOLD(FLOAD any IRFL_TAB_ASIZE)
1983 LJFOLD(FLOAD any IRFL_TAB_HMASK)
1984 LJFOLDF(fload_tab_ah)
1986 TRef tr = lj_opt_cse(J);
1987 return lj_opt_fwd_tptr(J, tref_ref(tr)) ? tr : EMITFOLD;
1990 /* Strings are immutable, so we can safely FOLD/CSE the related FLOAD. */
1991 LJFOLD(FLOAD KGC IRFL_STR_LEN)
1992 LJFOLDF(fload_str_len_kgc)
1994 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
1995 return INTFOLD((int32_t)ir_kstr(fleft)->len);
1996 return NEXTFOLD;
1999 LJFOLD(FLOAD SNEW IRFL_STR_LEN)
2000 LJFOLDF(fload_str_len_snew)
2002 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
2003 PHIBARRIER(fleft);
2004 return fleft->op2;
2006 return NEXTFOLD;
2009 /* The C type ID of cdata objects is immutable. */
2010 LJFOLD(FLOAD KGC IRFL_CDATA_CTYPEID)
2011 LJFOLDF(fload_cdata_typeid_kgc)
2013 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
2014 return INTFOLD((int32_t)ir_kcdata(fleft)->ctypeid);
2015 return NEXTFOLD;
2018 /* Get the contents of immutable cdata objects. */
2019 LJFOLD(FLOAD KGC IRFL_CDATA_PTR)
2020 LJFOLD(FLOAD KGC IRFL_CDATA_INT)
2021 LJFOLD(FLOAD KGC IRFL_CDATA_INT64)
2022 LJFOLDF(fload_cdata_int64_kgc)
2024 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
2025 void *p = cdataptr(ir_kcdata(fleft));
2026 if (irt_is64(fins->t))
2027 return INT64FOLD(*(uint64_t *)p);
2028 else
2029 return INTFOLD(*(int32_t *)p);
2031 return NEXTFOLD;
2034 LJFOLD(FLOAD CNEW IRFL_CDATA_CTYPEID)
2035 LJFOLD(FLOAD CNEWI IRFL_CDATA_CTYPEID)
2036 LJFOLDF(fload_cdata_typeid_cnew)
2038 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
2039 return fleft->op1; /* No PHI barrier needed. CNEW/CNEWI op1 is const. */
2040 return NEXTFOLD;
2043 /* Pointer, int and int64 cdata objects are immutable. */
2044 LJFOLD(FLOAD CNEWI IRFL_CDATA_PTR)
2045 LJFOLD(FLOAD CNEWI IRFL_CDATA_INT)
2046 LJFOLD(FLOAD CNEWI IRFL_CDATA_INT64)
2047 LJFOLDF(fload_cdata_ptr_int64_cnew)
2049 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
2050 return fleft->op2; /* Fold even across PHI to avoid allocations. */
2051 return NEXTFOLD;
2054 LJFOLD(FLOAD any IRFL_STR_LEN)
2055 LJFOLD(FLOAD any IRFL_CDATA_CTYPEID)
2056 LJFOLD(FLOAD any IRFL_CDATA_PTR)
2057 LJFOLD(FLOAD any IRFL_CDATA_INT)
2058 LJFOLD(FLOAD any IRFL_CDATA_INT64)
2059 LJFOLD(VLOAD any any) /* Vararg loads have no corresponding stores. */
2060 LJFOLDX(lj_opt_cse)
2062 /* All other field loads need alias analysis. */
2063 LJFOLD(FLOAD any any)
2064 LJFOLDX(lj_opt_fwd_fload)
2066 /* This is for LOOP only. Recording handles SLOADs internally. */
2067 LJFOLD(SLOAD any any)
2068 LJFOLDF(fwd_sload)
2070 if ((fins->op2 & IRSLOAD_FRAME)) {
2071 TRef tr = lj_opt_cse(J);
2072 return tref_ref(tr) < J->chain[IR_RETF] ? EMITFOLD : tr;
2073 } else {
2074 lua_assert(J->slot[fins->op1] != 0);
2075 return J->slot[fins->op1];
2079 /* Only fold for KKPTR. The pointer _and_ the contents must be const. */
2080 LJFOLD(XLOAD KKPTR any)
2081 LJFOLDF(xload_kptr)
2083 TRef tr = kfold_xload(J, fins, ir_kptr(fleft));
2084 return tr ? tr : NEXTFOLD;
2087 LJFOLD(XLOAD any any)
2088 LJFOLDX(lj_opt_fwd_xload)
2090 /* -- Write barriers ------------------------------------------------------ */
2092 /* Write barriers are amenable to CSE, but not across any incremental
2093 ** GC steps.
2095 ** The same logic applies to open upvalue references, because a stack
2096 ** may be resized during a GC step (not the current stack, but maybe that
2097 ** of a coroutine).
2099 LJFOLD(TBAR any)
2100 LJFOLD(OBAR any any)
2101 LJFOLD(UREFO any any)
2102 LJFOLDF(barrier_tab)
2104 TRef tr = lj_opt_cse(J);
2105 if (gcstep_barrier(J, tref_ref(tr))) /* CSE across GC step? */
2106 return EMITFOLD; /* Raw emit. Assumes fins is left intact by CSE. */
2107 return tr;
2110 LJFOLD(TBAR TNEW)
2111 LJFOLD(TBAR TDUP)
2112 LJFOLDF(barrier_tnew_tdup)
2114 /* New tables are always white and never need a barrier. */
2115 if (fins->op1 < J->chain[IR_LOOP]) /* Except across a GC step. */
2116 return NEXTFOLD;
2117 return DROPFOLD;
2120 /* -- Stores and allocations ---------------------------------------------- */
2122 /* Stores and allocations cannot be folded or passed on to CSE in general.
2123 ** But some stores can be eliminated with dead-store elimination (DSE).
2125 ** Caveat: *all* stores and allocs must be listed here or they end up at CSE!
2128 LJFOLD(ASTORE any any)
2129 LJFOLD(HSTORE any any)
2130 LJFOLDX(lj_opt_dse_ahstore)
2132 LJFOLD(USTORE any any)
2133 LJFOLDX(lj_opt_dse_ustore)
2135 LJFOLD(FSTORE any any)
2136 LJFOLDX(lj_opt_dse_fstore)
2138 LJFOLD(XSTORE any any)
2139 LJFOLDX(lj_opt_dse_xstore)
2141 LJFOLD(NEWREF any any) /* Treated like a store. */
2142 LJFOLD(CALLS any any)
2143 LJFOLD(CALLL any any) /* Safeguard fallback. */
2144 LJFOLD(CALLXS any any)
2145 LJFOLD(XBAR)
2146 LJFOLD(RETF any any) /* Modifies BASE. */
2147 LJFOLD(TNEW any any)
2148 LJFOLD(TDUP any)
2149 LJFOLD(CNEW any any)
2150 LJFOLD(XSNEW any any)
2151 LJFOLDX(lj_ir_emit)
2153 /* ------------------------------------------------------------------------ */
2155 /* Every entry in the generated hash table is a 32 bit pattern:
2157 ** xxxxxxxx iiiiiii lllllll rrrrrrrrrr
2159 ** xxxxxxxx = 8 bit index into fold function table
2160 ** iiiiiii = 7 bit folded instruction opcode
2161 ** lllllll = 7 bit left instruction opcode
2162 ** rrrrrrrrrr = 8 bit right instruction opcode or 10 bits from literal field
2165 #include "lj_folddef.h"
2167 /* ------------------------------------------------------------------------ */
2169 /* Fold IR instruction. */
2170 TRef LJ_FASTCALL lj_opt_fold(jit_State *J)
2172 uint32_t key, any;
2173 IRRef ref;
2175 if (LJ_UNLIKELY((J->flags & JIT_F_OPT_MASK) != JIT_F_OPT_DEFAULT)) {
2176 lua_assert(((JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE|JIT_F_OPT_DSE) |
2177 JIT_F_OPT_DEFAULT) == JIT_F_OPT_DEFAULT);
2178 /* Folding disabled? Chain to CSE, but not for loads/stores/allocs. */
2179 if (!(J->flags & JIT_F_OPT_FOLD) && irm_kind(lj_ir_mode[fins->o]) == IRM_N)
2180 return lj_opt_cse(J);
2182 /* Forwarding or CSE disabled? Emit raw IR for loads, except for SLOAD. */
2183 if ((J->flags & (JIT_F_OPT_FWD|JIT_F_OPT_CSE)) !=
2184 (JIT_F_OPT_FWD|JIT_F_OPT_CSE) &&
2185 irm_kind(lj_ir_mode[fins->o]) == IRM_L && fins->o != IR_SLOAD)
2186 return lj_ir_emit(J);
2188 /* DSE disabled? Emit raw IR for stores. */
2189 if (!(J->flags & JIT_F_OPT_DSE) && irm_kind(lj_ir_mode[fins->o]) == IRM_S)
2190 return lj_ir_emit(J);
2193 /* Fold engine start/retry point. */
2194 retry:
2195 /* Construct key from opcode and operand opcodes (unless literal/none). */
2196 key = ((uint32_t)fins->o << 17);
2197 if (fins->op1 >= J->cur.nk) {
2198 key += (uint32_t)IR(fins->op1)->o << 10;
2199 *fleft = *IR(fins->op1);
2201 if (fins->op2 >= J->cur.nk) {
2202 key += (uint32_t)IR(fins->op2)->o;
2203 *fright = *IR(fins->op2);
2204 } else {
2205 key += (fins->op2 & 0x3ffu); /* Literal mask. Must include IRCONV_*MASK. */
2208 /* Check for a match in order from most specific to least specific. */
2209 any = 0;
2210 for (;;) {
2211 uint32_t k = key | (any & 0x1ffff);
2212 uint32_t h = fold_hashkey(k);
2213 uint32_t fh = fold_hash[h]; /* Lookup key in semi-perfect hash table. */
2214 if ((fh & 0xffffff) == k || (fh = fold_hash[h+1], (fh & 0xffffff) == k)) {
2215 ref = (IRRef)tref_ref(fold_func[fh >> 24](J));
2216 if (ref != NEXTFOLD)
2217 break;
2219 if (any == 0xfffff) /* Exhausted folding. Pass on to CSE. */
2220 return lj_opt_cse(J);
2221 any = (any | (any >> 10)) ^ 0xffc00;
2224 /* Return value processing, ordered by frequency. */
2225 if (LJ_LIKELY(ref >= MAX_FOLD))
2226 return TREF(ref, irt_t(IR(ref)->t));
2227 if (ref == RETRYFOLD)
2228 goto retry;
2229 if (ref == KINTFOLD)
2230 return lj_ir_kint(J, fins->i);
2231 if (ref == FAILFOLD)
2232 lj_trace_err(J, LJ_TRERR_GFAIL);
2233 lua_assert(ref == DROPFOLD);
2234 return REF_DROP;
2237 /* -- Common-Subexpression Elimination ------------------------------------ */
2239 /* CSE an IR instruction. This is very fast due to the skip-list chains. */
2240 TRef LJ_FASTCALL lj_opt_cse(jit_State *J)
2242 /* Avoid narrow to wide store-to-load forwarding stall */
2243 IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16);
2244 IROp op = fins->o;
2245 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
2246 /* Limited search for same operands in per-opcode chain. */
2247 IRRef ref = J->chain[op];
2248 IRRef lim = fins->op1;
2249 if (fins->op2 > lim) lim = fins->op2; /* Relies on lit < REF_BIAS. */
2250 while (ref > lim) {
2251 if (IR(ref)->op12 == op12)
2252 return TREF(ref, irt_t(IR(ref)->t)); /* Common subexpression found. */
2253 ref = IR(ref)->prev;
2256 /* Otherwise emit IR (inlined for speed). */
2258 IRRef ref = lj_ir_nextins(J);
2259 IRIns *ir = IR(ref);
2260 ir->prev = J->chain[op];
2261 ir->op12 = op12;
2262 J->chain[op] = (IRRef1)ref;
2263 ir->o = fins->o;
2264 J->guardemit.irt |= fins->t.irt;
2265 return TREF(ref, irt_t((ir->t = fins->t)));
2269 /* CSE with explicit search limit. */
2270 TRef LJ_FASTCALL lj_opt_cselim(jit_State *J, IRRef lim)
2272 IRRef ref = J->chain[fins->o];
2273 IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16);
2274 while (ref > lim) {
2275 if (IR(ref)->op12 == op12)
2276 return ref;
2277 ref = IR(ref)->prev;
2279 return lj_ir_emit(J);
2282 /* ------------------------------------------------------------------------ */
2284 #undef IR
2285 #undef fins
2286 #undef fleft
2287 #undef fright
2288 #undef knumleft
2289 #undef knumright
2290 #undef emitir
2292 #endif