Fix for last commit
[luajit-2.0.git] / src / lj_opt_fold.c
blob470b6d44d8564f714cb42c203e12fb87cc360cb5
1 /*
2 ** FOLD: Constant Folding, Algebraic Simplifications and Reassociation.
3 ** ABCelim: Array Bounds Check Elimination.
4 ** CSE: Common-Subexpression Elimination.
5 ** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
6 */
8 #define lj_opt_fold_c
9 #define LUA_CORE
11 #include <math.h>
13 #include "lj_obj.h"
15 #if LJ_HASJIT
17 #include "lj_str.h"
18 #include "lj_tab.h"
19 #include "lj_ir.h"
20 #include "lj_jit.h"
21 #include "lj_iropt.h"
22 #include "lj_trace.h"
23 #if LJ_HASFFI
24 #include "lj_ctype.h"
25 #endif
26 #include "lj_carith.h"
27 #include "lj_vm.h"
28 #include "lj_strscan.h"
30 /* Here's a short description how the FOLD engine processes instructions:
32 ** The FOLD engine receives a single instruction stored in fins (J->fold.ins).
33 ** The instruction and its operands are used to select matching fold rules.
34 ** These are applied iteratively until a fixed point is reached.
36 ** The 8 bit opcode of the instruction itself plus the opcodes of the
37 ** two instructions referenced by its operands form a 24 bit key
38 ** 'ins left right' (unused operands -> 0, literals -> lowest 8 bits).
40 ** This key is used for partial matching against the fold rules. The
41 ** left/right operand fields of the key are successively masked with
42 ** the 'any' wildcard, from most specific to least specific:
44 ** ins left right
45 ** ins any right
46 ** ins left any
47 ** ins any any
49 ** The masked key is used to lookup a matching fold rule in a semi-perfect
50 ** hash table. If a matching rule is found, the related fold function is run.
51 ** Multiple rules can share the same fold function. A fold rule may return
52 ** one of several special values:
54 ** - NEXTFOLD means no folding was applied, because an additional test
55 ** inside the fold function failed. Matching continues against less
56 ** specific fold rules. Finally the instruction is passed on to CSE.
58 ** - RETRYFOLD means the instruction was modified in-place. Folding is
59 ** retried as if this instruction had just been received.
61 ** All other return values are terminal actions -- no further folding is
62 ** applied:
64 ** - INTFOLD(i) returns a reference to the integer constant i.
66 ** - LEFTFOLD and RIGHTFOLD return the left/right operand reference
67 ** without emitting an instruction.
69 ** - CSEFOLD and EMITFOLD pass the instruction directly to CSE or emit
70 ** it without passing through any further optimizations.
72 ** - FAILFOLD, DROPFOLD and CONDFOLD only apply to instructions which have
73 ** no result (e.g. guarded assertions): FAILFOLD means the guard would
74 ** always fail, i.e. the current trace is pointless. DROPFOLD means
75 ** the guard is always true and has been eliminated. CONDFOLD is a
76 ** shortcut for FAILFOLD + cond (i.e. drop if true, otherwise fail).
78 ** - Any other return value is interpreted as an IRRef or TRef. This
79 ** can be a reference to an existing or a newly created instruction.
80 ** Only the least-significant 16 bits (IRRef1) are used to form a TRef
81 ** which is finally returned to the caller.
83 ** The FOLD engine receives instructions both from the trace recorder and
84 ** substituted instructions from LOOP unrolling. This means all types
85 ** of instructions may end up here, even though the recorder bypasses
86 ** FOLD in some cases. Thus all loads, stores and allocations must have
87 ** an any/any rule to avoid being passed on to CSE.
89 ** Carefully read the following requirements before adding or modifying
90 ** any fold rules:
92 ** Requirement #1: All fold rules must preserve their destination type.
94 ** Consistently use INTFOLD() (KINT result) or lj_ir_knum() (KNUM result).
95 ** Never use lj_ir_knumint() which can have either a KINT or KNUM result.
97 ** Requirement #2: Fold rules should not create *new* instructions which
98 ** reference operands *across* PHIs.
100 ** E.g. a RETRYFOLD with 'fins->op1 = fleft->op1' is invalid if the
101 ** left operand is a PHI. Then fleft->op1 would point across the PHI
102 ** frontier to an invariant instruction. Adding a PHI for this instruction
103 ** would be counterproductive. The solution is to add a barrier which
104 ** prevents folding across PHIs, i.e. 'PHIBARRIER(fleft)' in this case.
105 ** The only exception is for recurrences with high latencies like
106 ** repeated int->num->int conversions.
108 ** One could relax this condition a bit if the referenced instruction is
109 ** a PHI, too. But this often leads to worse code due to excessive
110 ** register shuffling.
112 ** Note: returning *existing* instructions (e.g. LEFTFOLD) is ok, though.
113 ** Even returning fleft->op1 would be ok, because a new PHI will added,
114 ** if needed. But again, this leads to excessive register shuffling and
115 ** should be avoided.
117 ** Requirement #3: The set of all fold rules must be monotonic to guarantee
118 ** termination.
120 ** The goal is optimization, so one primarily wants to add strength-reducing
121 ** rules. This means eliminating an instruction or replacing an instruction
122 ** with one or more simpler instructions. Don't add fold rules which point
123 ** into the other direction.
125 ** Some rules (like commutativity) do not directly reduce the strength of
126 ** an instruction, but enable other fold rules (e.g. by moving constants
127 ** to the right operand). These rules must be made unidirectional to avoid
128 ** cycles.
130 ** Rule of thumb: the trace recorder expands the IR and FOLD shrinks it.
133 /* Some local macros to save typing. Undef'd at the end. */
134 #define IR(ref) (&J->cur.ir[(ref)])
135 #define fins (&J->fold.ins)
136 #define fleft (&J->fold.left)
137 #define fright (&J->fold.right)
138 #define knumleft (ir_knum(fleft)->n)
139 #define knumright (ir_knum(fright)->n)
141 /* Pass IR on to next optimization in chain (FOLD). */
142 #define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
144 /* Fold function type. Fastcall on x86 significantly reduces their size. */
145 typedef IRRef (LJ_FASTCALL *FoldFunc)(jit_State *J);
147 /* Macros for the fold specs, so buildvm can recognize them. */
148 #define LJFOLD(x)
149 #define LJFOLDX(x)
150 #define LJFOLDF(name) static TRef LJ_FASTCALL fold_##name(jit_State *J)
151 /* Note: They must be at the start of a line or buildvm ignores them! */
153 /* Barrier to prevent using operands across PHIs. */
154 #define PHIBARRIER(ir) if (irt_isphi((ir)->t)) return NEXTFOLD
156 /* Barrier to prevent folding across a GC step.
157 ** GC steps can only happen at the head of a trace and at LOOP.
158 ** And the GC is only driven forward if there is at least one allocation.
160 #define gcstep_barrier(J, ref) \
161 ((ref) < J->chain[IR_LOOP] && \
162 (J->chain[IR_SNEW] || J->chain[IR_XSNEW] || \
163 J->chain[IR_TNEW] || J->chain[IR_TDUP] || \
164 J->chain[IR_CNEW] || J->chain[IR_CNEWI] || J->chain[IR_TOSTR]))
166 /* -- Constant folding for FP numbers ------------------------------------- */
168 LJFOLD(ADD KNUM KNUM)
169 LJFOLD(SUB KNUM KNUM)
170 LJFOLD(MUL KNUM KNUM)
171 LJFOLD(DIV KNUM KNUM)
172 LJFOLD(NEG KNUM KNUM)
173 LJFOLD(ABS KNUM KNUM)
174 LJFOLD(ATAN2 KNUM KNUM)
175 LJFOLD(LDEXP KNUM KNUM)
176 LJFOLD(MIN KNUM KNUM)
177 LJFOLD(MAX KNUM KNUM)
178 LJFOLDF(kfold_numarith)
180 lua_Number a = knumleft;
181 lua_Number b = knumright;
182 lua_Number y = lj_vm_foldarith(a, b, fins->o - IR_ADD);
183 return lj_ir_knum(J, y);
186 LJFOLD(LDEXP KNUM KINT)
187 LJFOLDF(kfold_ldexp)
189 #if LJ_TARGET_X86ORX64
190 UNUSED(J);
191 return NEXTFOLD;
192 #else
193 return lj_ir_knum(J, ldexp(knumleft, fright->i));
194 #endif
197 LJFOLD(FPMATH KNUM any)
198 LJFOLDF(kfold_fpmath)
200 lua_Number a = knumleft;
201 lua_Number y = lj_vm_foldfpm(a, fins->op2);
202 return lj_ir_knum(J, y);
205 LJFOLD(POW KNUM KINT)
206 LJFOLDF(kfold_numpow)
208 lua_Number a = knumleft;
209 lua_Number b = (lua_Number)fright->i;
210 lua_Number y = lj_vm_foldarith(a, b, IR_POW - IR_ADD);
211 return lj_ir_knum(J, y);
214 /* Must not use kfold_kref for numbers (could be NaN). */
215 LJFOLD(EQ KNUM KNUM)
216 LJFOLD(NE KNUM KNUM)
217 LJFOLD(LT KNUM KNUM)
218 LJFOLD(GE KNUM KNUM)
219 LJFOLD(LE KNUM KNUM)
220 LJFOLD(GT KNUM KNUM)
221 LJFOLD(ULT KNUM KNUM)
222 LJFOLD(UGE KNUM KNUM)
223 LJFOLD(ULE KNUM KNUM)
224 LJFOLD(UGT KNUM KNUM)
225 LJFOLDF(kfold_numcomp)
227 return CONDFOLD(lj_ir_numcmp(knumleft, knumright, (IROp)fins->o));
230 /* -- Constant folding for 32 bit integers -------------------------------- */
232 static int32_t kfold_intop(int32_t k1, int32_t k2, IROp op)
234 switch (op) {
235 case IR_ADD: k1 += k2; break;
236 case IR_SUB: k1 -= k2; break;
237 case IR_MUL: k1 *= k2; break;
238 case IR_MOD: k1 = lj_vm_modi(k1, k2); break;
239 case IR_NEG: k1 = -k1; break;
240 case IR_BAND: k1 &= k2; break;
241 case IR_BOR: k1 |= k2; break;
242 case IR_BXOR: k1 ^= k2; break;
243 case IR_BSHL: k1 <<= (k2 & 31); break;
244 case IR_BSHR: k1 = (int32_t)((uint32_t)k1 >> (k2 & 31)); break;
245 case IR_BSAR: k1 >>= (k2 & 31); break;
246 case IR_BROL: k1 = (int32_t)lj_rol((uint32_t)k1, (k2 & 31)); break;
247 case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 31)); break;
248 case IR_MIN: k1 = k1 < k2 ? k1 : k2; break;
249 case IR_MAX: k1 = k1 > k2 ? k1 : k2; break;
250 default: lua_assert(0); break;
252 return k1;
255 LJFOLD(ADD KINT KINT)
256 LJFOLD(SUB KINT KINT)
257 LJFOLD(MUL KINT KINT)
258 LJFOLD(MOD KINT KINT)
259 LJFOLD(NEG KINT KINT)
260 LJFOLD(BAND KINT KINT)
261 LJFOLD(BOR KINT KINT)
262 LJFOLD(BXOR KINT KINT)
263 LJFOLD(BSHL KINT KINT)
264 LJFOLD(BSHR KINT KINT)
265 LJFOLD(BSAR KINT KINT)
266 LJFOLD(BROL KINT KINT)
267 LJFOLD(BROR KINT KINT)
268 LJFOLD(MIN KINT KINT)
269 LJFOLD(MAX KINT KINT)
270 LJFOLDF(kfold_intarith)
272 return INTFOLD(kfold_intop(fleft->i, fright->i, (IROp)fins->o));
275 LJFOLD(ADDOV KINT KINT)
276 LJFOLD(SUBOV KINT KINT)
277 LJFOLD(MULOV KINT KINT)
278 LJFOLDF(kfold_intovarith)
280 lua_Number n = lj_vm_foldarith((lua_Number)fleft->i, (lua_Number)fright->i,
281 fins->o - IR_ADDOV);
282 int32_t k = lj_num2int(n);
283 if (n != (lua_Number)k)
284 return FAILFOLD;
285 return INTFOLD(k);
288 LJFOLD(BNOT KINT)
289 LJFOLDF(kfold_bnot)
291 return INTFOLD(~fleft->i);
294 LJFOLD(BSWAP KINT)
295 LJFOLDF(kfold_bswap)
297 return INTFOLD((int32_t)lj_bswap((uint32_t)fleft->i));
300 LJFOLD(LT KINT KINT)
301 LJFOLD(GE KINT KINT)
302 LJFOLD(LE KINT KINT)
303 LJFOLD(GT KINT KINT)
304 LJFOLD(ULT KINT KINT)
305 LJFOLD(UGE KINT KINT)
306 LJFOLD(ULE KINT KINT)
307 LJFOLD(UGT KINT KINT)
308 LJFOLD(ABC KINT KINT)
309 LJFOLDF(kfold_intcomp)
311 int32_t a = fleft->i, b = fright->i;
312 switch ((IROp)fins->o) {
313 case IR_LT: return CONDFOLD(a < b);
314 case IR_GE: return CONDFOLD(a >= b);
315 case IR_LE: return CONDFOLD(a <= b);
316 case IR_GT: return CONDFOLD(a > b);
317 case IR_ULT: return CONDFOLD((uint32_t)a < (uint32_t)b);
318 case IR_UGE: return CONDFOLD((uint32_t)a >= (uint32_t)b);
319 case IR_ULE: return CONDFOLD((uint32_t)a <= (uint32_t)b);
320 case IR_ABC:
321 case IR_UGT: return CONDFOLD((uint32_t)a > (uint32_t)b);
322 default: lua_assert(0); return FAILFOLD;
326 LJFOLD(UGE any KINT)
327 LJFOLDF(kfold_intcomp0)
329 if (fright->i == 0)
330 return DROPFOLD;
331 return NEXTFOLD;
334 /* -- Constant folding for 64 bit integers -------------------------------- */
336 static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op)
338 switch (op) {
339 #if LJ_64 || LJ_HASFFI
340 case IR_ADD: k1 += k2; break;
341 case IR_SUB: k1 -= k2; break;
342 #endif
343 #if LJ_HASFFI
344 case IR_MUL: k1 *= k2; break;
345 case IR_BAND: k1 &= k2; break;
346 case IR_BOR: k1 |= k2; break;
347 case IR_BXOR: k1 ^= k2; break;
348 #endif
349 default: UNUSED(k2); lua_assert(0); break;
351 return k1;
354 LJFOLD(ADD KINT64 KINT64)
355 LJFOLD(SUB KINT64 KINT64)
356 LJFOLD(MUL KINT64 KINT64)
357 LJFOLD(BAND KINT64 KINT64)
358 LJFOLD(BOR KINT64 KINT64)
359 LJFOLD(BXOR KINT64 KINT64)
360 LJFOLDF(kfold_int64arith)
362 return INT64FOLD(kfold_int64arith(ir_k64(fleft)->u64,
363 ir_k64(fright)->u64, (IROp)fins->o));
366 LJFOLD(DIV KINT64 KINT64)
367 LJFOLD(MOD KINT64 KINT64)
368 LJFOLD(POW KINT64 KINT64)
369 LJFOLDF(kfold_int64arith2)
371 #if LJ_HASFFI
372 uint64_t k1 = ir_k64(fleft)->u64, k2 = ir_k64(fright)->u64;
373 if (irt_isi64(fins->t)) {
374 k1 = fins->o == IR_DIV ? lj_carith_divi64((int64_t)k1, (int64_t)k2) :
375 fins->o == IR_MOD ? lj_carith_modi64((int64_t)k1, (int64_t)k2) :
376 lj_carith_powi64((int64_t)k1, (int64_t)k2);
377 } else {
378 k1 = fins->o == IR_DIV ? lj_carith_divu64(k1, k2) :
379 fins->o == IR_MOD ? lj_carith_modu64(k1, k2) :
380 lj_carith_powu64(k1, k2);
382 return INT64FOLD(k1);
383 #else
384 UNUSED(J); lua_assert(0); return FAILFOLD;
385 #endif
388 LJFOLD(BSHL KINT64 KINT)
389 LJFOLD(BSHR KINT64 KINT)
390 LJFOLD(BSAR KINT64 KINT)
391 LJFOLD(BROL KINT64 KINT)
392 LJFOLD(BROR KINT64 KINT)
393 LJFOLDF(kfold_int64shift)
395 #if LJ_HASFFI || LJ_64
396 uint64_t k = ir_k64(fleft)->u64;
397 int32_t sh = (fright->i & 63);
398 switch ((IROp)fins->o) {
399 case IR_BSHL: k <<= sh; break;
400 #if LJ_HASFFI
401 case IR_BSHR: k >>= sh; break;
402 case IR_BSAR: k = (uint64_t)((int64_t)k >> sh); break;
403 case IR_BROL: k = lj_rol(k, sh); break;
404 case IR_BROR: k = lj_ror(k, sh); break;
405 #endif
406 default: lua_assert(0); break;
408 return INT64FOLD(k);
409 #else
410 UNUSED(J); lua_assert(0); return FAILFOLD;
411 #endif
414 LJFOLD(BNOT KINT64)
415 LJFOLDF(kfold_bnot64)
417 #if LJ_HASFFI
418 return INT64FOLD(~ir_k64(fleft)->u64);
419 #else
420 UNUSED(J); lua_assert(0); return FAILFOLD;
421 #endif
424 LJFOLD(BSWAP KINT64)
425 LJFOLDF(kfold_bswap64)
427 #if LJ_HASFFI
428 return INT64FOLD(lj_bswap64(ir_k64(fleft)->u64));
429 #else
430 UNUSED(J); lua_assert(0); return FAILFOLD;
431 #endif
434 LJFOLD(LT KINT64 KINT64)
435 LJFOLD(GE KINT64 KINT64)
436 LJFOLD(LE KINT64 KINT64)
437 LJFOLD(GT KINT64 KINT64)
438 LJFOLD(ULT KINT64 KINT64)
439 LJFOLD(UGE KINT64 KINT64)
440 LJFOLD(ULE KINT64 KINT64)
441 LJFOLD(UGT KINT64 KINT64)
442 LJFOLDF(kfold_int64comp)
444 #if LJ_HASFFI
445 uint64_t a = ir_k64(fleft)->u64, b = ir_k64(fright)->u64;
446 switch ((IROp)fins->o) {
447 case IR_LT: return CONDFOLD(a < b);
448 case IR_GE: return CONDFOLD(a >= b);
449 case IR_LE: return CONDFOLD(a <= b);
450 case IR_GT: return CONDFOLD(a > b);
451 case IR_ULT: return CONDFOLD((uint64_t)a < (uint64_t)b);
452 case IR_UGE: return CONDFOLD((uint64_t)a >= (uint64_t)b);
453 case IR_ULE: return CONDFOLD((uint64_t)a <= (uint64_t)b);
454 case IR_UGT: return CONDFOLD((uint64_t)a > (uint64_t)b);
455 default: lua_assert(0); return FAILFOLD;
457 #else
458 UNUSED(J); lua_assert(0); return FAILFOLD;
459 #endif
462 LJFOLD(UGE any KINT64)
463 LJFOLDF(kfold_int64comp0)
465 #if LJ_HASFFI
466 if (ir_k64(fright)->u64 == 0)
467 return DROPFOLD;
468 return NEXTFOLD;
469 #else
470 UNUSED(J); lua_assert(0); return FAILFOLD;
471 #endif
474 /* -- Constant folding for strings ---------------------------------------- */
476 LJFOLD(SNEW KKPTR KINT)
477 LJFOLDF(kfold_snew_kptr)
479 GCstr *s = lj_str_new(J->L, (const char *)ir_kptr(fleft), (size_t)fright->i);
480 return lj_ir_kstr(J, s);
483 LJFOLD(SNEW any KINT)
484 LJFOLDF(kfold_snew_empty)
486 if (fright->i == 0)
487 return lj_ir_kstr(J, &J2G(J)->strempty);
488 return NEXTFOLD;
491 LJFOLD(STRREF KGC KINT)
492 LJFOLDF(kfold_strref)
494 GCstr *str = ir_kstr(fleft);
495 lua_assert((MSize)fright->i <= str->len);
496 return lj_ir_kkptr(J, (char *)strdata(str) + fright->i);
499 LJFOLD(STRREF SNEW any)
500 LJFOLDF(kfold_strref_snew)
502 PHIBARRIER(fleft);
503 if (irref_isk(fins->op2) && fright->i == 0) {
504 return fleft->op1; /* strref(snew(ptr, len), 0) ==> ptr */
505 } else {
506 /* Reassociate: strref(snew(strref(str, a), len), b) ==> strref(str, a+b) */
507 IRIns *ir = IR(fleft->op1);
508 IRRef1 str = ir->op1; /* IRIns * is not valid across emitir. */
509 lua_assert(ir->o == IR_STRREF);
510 PHIBARRIER(ir);
511 fins->op2 = emitir(IRTI(IR_ADD), ir->op2, fins->op2); /* Clobbers fins! */
512 fins->op1 = str;
513 fins->ot = IRT(IR_STRREF, IRT_P32);
514 return RETRYFOLD;
516 return NEXTFOLD;
519 LJFOLD(CALLN CARG IRCALL_lj_str_cmp)
520 LJFOLDF(kfold_strcmp)
522 if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) {
523 GCstr *a = ir_kstr(IR(fleft->op1));
524 GCstr *b = ir_kstr(IR(fleft->op2));
525 return INTFOLD(lj_str_cmp(a, b));
527 return NEXTFOLD;
530 /* -- Constant folding of pointer arithmetic ------------------------------ */
532 LJFOLD(ADD KGC KINT)
533 LJFOLD(ADD KGC KINT64)
534 LJFOLDF(kfold_add_kgc)
536 GCobj *o = ir_kgc(fleft);
537 #if LJ_64
538 ptrdiff_t ofs = (ptrdiff_t)ir_kint64(fright)->u64;
539 #else
540 ptrdiff_t ofs = fright->i;
541 #endif
542 #if LJ_HASFFI
543 if (irt_iscdata(fleft->t)) {
544 CType *ct = ctype_raw(ctype_ctsG(J2G(J)), gco2cd(o)->ctypeid);
545 if (ctype_isnum(ct->info) || ctype_isenum(ct->info) ||
546 ctype_isptr(ct->info) || ctype_isfunc(ct->info) ||
547 ctype_iscomplex(ct->info) || ctype_isvector(ct->info))
548 return lj_ir_kkptr(J, (char *)o + ofs);
550 #endif
551 return lj_ir_kptr(J, (char *)o + ofs);
554 LJFOLD(ADD KPTR KINT)
555 LJFOLD(ADD KPTR KINT64)
556 LJFOLD(ADD KKPTR KINT)
557 LJFOLD(ADD KKPTR KINT64)
558 LJFOLDF(kfold_add_kptr)
560 void *p = ir_kptr(fleft);
561 #if LJ_64
562 ptrdiff_t ofs = (ptrdiff_t)ir_kint64(fright)->u64;
563 #else
564 ptrdiff_t ofs = fright->i;
565 #endif
566 return lj_ir_kptr_(J, fleft->o, (char *)p + ofs);
569 LJFOLD(ADD any KGC)
570 LJFOLD(ADD any KPTR)
571 LJFOLD(ADD any KKPTR)
572 LJFOLDF(kfold_add_kright)
574 if (fleft->o == IR_KINT || fleft->o == IR_KINT64) {
575 IRRef1 tmp = fins->op1; fins->op1 = fins->op2; fins->op2 = tmp;
576 return RETRYFOLD;
578 return NEXTFOLD;
581 /* -- Constant folding of conversions ------------------------------------- */
583 LJFOLD(TOBIT KNUM KNUM)
584 LJFOLDF(kfold_tobit)
586 return INTFOLD(lj_num2bit(knumleft));
589 LJFOLD(CONV KINT IRCONV_NUM_INT)
590 LJFOLDF(kfold_conv_kint_num)
592 return lj_ir_knum(J, (lua_Number)fleft->i);
595 LJFOLD(CONV KINT IRCONV_NUM_U32)
596 LJFOLDF(kfold_conv_kintu32_num)
598 return lj_ir_knum(J, (lua_Number)(uint32_t)fleft->i);
601 LJFOLD(CONV KINT IRCONV_INT_I8)
602 LJFOLD(CONV KINT IRCONV_INT_U8)
603 LJFOLD(CONV KINT IRCONV_INT_I16)
604 LJFOLD(CONV KINT IRCONV_INT_U16)
605 LJFOLDF(kfold_conv_kint_ext)
607 int32_t k = fleft->i;
608 if ((fins->op2 & IRCONV_SRCMASK) == IRT_I8) k = (int8_t)k;
609 else if ((fins->op2 & IRCONV_SRCMASK) == IRT_U8) k = (uint8_t)k;
610 else if ((fins->op2 & IRCONV_SRCMASK) == IRT_I16) k = (int16_t)k;
611 else k = (uint16_t)k;
612 return INTFOLD(k);
615 LJFOLD(CONV KINT IRCONV_I64_INT)
616 LJFOLD(CONV KINT IRCONV_U64_INT)
617 LJFOLD(CONV KINT IRCONV_I64_U32)
618 LJFOLD(CONV KINT IRCONV_U64_U32)
619 LJFOLDF(kfold_conv_kint_i64)
621 if ((fins->op2 & IRCONV_SEXT))
622 return INT64FOLD((uint64_t)(int64_t)fleft->i);
623 else
624 return INT64FOLD((uint64_t)(int64_t)(uint32_t)fleft->i);
627 LJFOLD(CONV KINT64 IRCONV_NUM_I64)
628 LJFOLDF(kfold_conv_kint64_num_i64)
630 return lj_ir_knum(J, (lua_Number)(int64_t)ir_kint64(fleft)->u64);
633 LJFOLD(CONV KINT64 IRCONV_NUM_U64)
634 LJFOLDF(kfold_conv_kint64_num_u64)
636 return lj_ir_knum(J, (lua_Number)ir_kint64(fleft)->u64);
639 LJFOLD(CONV KINT64 IRCONV_INT_I64)
640 LJFOLD(CONV KINT64 IRCONV_U32_I64)
641 LJFOLDF(kfold_conv_kint64_int_i64)
643 return INTFOLD((int32_t)ir_kint64(fleft)->u64);
646 LJFOLD(CONV KNUM IRCONV_INT_NUM)
647 LJFOLDF(kfold_conv_knum_int_num)
649 lua_Number n = knumleft;
650 if (!(fins->op2 & IRCONV_TRUNC)) {
651 int32_t k = lj_num2int(n);
652 if (irt_isguard(fins->t) && n != (lua_Number)k) {
653 /* We're about to create a guard which always fails, like CONV +1.5.
654 ** Some pathological loops cause this during LICM, e.g.:
655 ** local x,k,t = 0,1.5,{1,[1.5]=2}
656 ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end
657 ** assert(x == 300)
659 return FAILFOLD;
661 return INTFOLD(k);
662 } else {
663 return INTFOLD((int32_t)n);
667 LJFOLD(CONV KNUM IRCONV_U32_NUM)
668 LJFOLDF(kfold_conv_knum_u32_num)
670 lua_assert((fins->op2 & IRCONV_TRUNC));
671 #ifdef _MSC_VER
672 { /* Workaround for MSVC bug. */
673 volatile uint32_t u = (uint32_t)knumleft;
674 return INTFOLD((int32_t)u);
676 #else
677 return INTFOLD((int32_t)(uint32_t)knumleft);
678 #endif
681 LJFOLD(CONV KNUM IRCONV_I64_NUM)
682 LJFOLDF(kfold_conv_knum_i64_num)
684 lua_assert((fins->op2 & IRCONV_TRUNC));
685 return INT64FOLD((uint64_t)(int64_t)knumleft);
688 LJFOLD(CONV KNUM IRCONV_U64_NUM)
689 LJFOLDF(kfold_conv_knum_u64_num)
691 lua_assert((fins->op2 & IRCONV_TRUNC));
692 return INT64FOLD(lj_num2u64(knumleft));
695 LJFOLD(TOSTR KNUM)
696 LJFOLDF(kfold_tostr_knum)
698 return lj_ir_kstr(J, lj_str_fromnum(J->L, &knumleft));
701 LJFOLD(TOSTR KINT)
702 LJFOLDF(kfold_tostr_kint)
704 return lj_ir_kstr(J, lj_str_fromint(J->L, fleft->i));
707 LJFOLD(STRTO KGC)
708 LJFOLDF(kfold_strto)
710 TValue n;
711 if (lj_strscan_num(ir_kstr(fleft), &n))
712 return lj_ir_knum(J, numV(&n));
713 return FAILFOLD;
716 /* -- Constant folding of equality checks --------------------------------- */
718 /* Don't constant-fold away FLOAD checks against KNULL. */
719 LJFOLD(EQ FLOAD KNULL)
720 LJFOLD(NE FLOAD KNULL)
721 LJFOLDX(lj_opt_cse)
723 /* But fold all other KNULL compares, since only KNULL is equal to KNULL. */
724 LJFOLD(EQ any KNULL)
725 LJFOLD(NE any KNULL)
726 LJFOLD(EQ KNULL any)
727 LJFOLD(NE KNULL any)
728 LJFOLD(EQ KINT KINT) /* Constants are unique, so same refs <==> same value. */
729 LJFOLD(NE KINT KINT)
730 LJFOLD(EQ KINT64 KINT64)
731 LJFOLD(NE KINT64 KINT64)
732 LJFOLD(EQ KGC KGC)
733 LJFOLD(NE KGC KGC)
734 LJFOLDF(kfold_kref)
736 return CONDFOLD((fins->op1 == fins->op2) ^ (fins->o == IR_NE));
739 /* -- Algebraic shortcuts ------------------------------------------------- */
741 LJFOLD(FPMATH FPMATH IRFPM_FLOOR)
742 LJFOLD(FPMATH FPMATH IRFPM_CEIL)
743 LJFOLD(FPMATH FPMATH IRFPM_TRUNC)
744 LJFOLDF(shortcut_round)
746 IRFPMathOp op = (IRFPMathOp)fleft->op2;
747 if (op == IRFPM_FLOOR || op == IRFPM_CEIL || op == IRFPM_TRUNC)
748 return LEFTFOLD; /* round(round_left(x)) = round_left(x) */
749 return NEXTFOLD;
752 LJFOLD(ABS ABS KNUM)
753 LJFOLDF(shortcut_left)
755 return LEFTFOLD; /* f(g(x)) ==> g(x) */
758 LJFOLD(ABS NEG KNUM)
759 LJFOLDF(shortcut_dropleft)
761 PHIBARRIER(fleft);
762 fins->op1 = fleft->op1; /* abs(neg(x)) ==> abs(x) */
763 return RETRYFOLD;
766 /* Note: no safe shortcuts with STRTO and TOSTR ("1e2" ==> +100 ==> "100"). */
767 LJFOLD(NEG NEG any)
768 LJFOLD(BNOT BNOT)
769 LJFOLD(BSWAP BSWAP)
770 LJFOLDF(shortcut_leftleft)
772 PHIBARRIER(fleft); /* See above. Fold would be ok, but not beneficial. */
773 return fleft->op1; /* f(g(x)) ==> x */
776 /* -- FP algebraic simplifications ---------------------------------------- */
778 /* FP arithmetic is tricky -- there's not much to simplify.
779 ** Please note the following common pitfalls before sending "improvements":
780 ** x+0 ==> x is INVALID for x=-0
781 ** 0-x ==> -x is INVALID for x=+0
782 ** x*0 ==> 0 is INVALID for x=-0, x=+-Inf or x=NaN
785 LJFOLD(ADD NEG any)
786 LJFOLDF(simplify_numadd_negx)
788 PHIBARRIER(fleft);
789 fins->o = IR_SUB; /* (-a) + b ==> b - a */
790 fins->op1 = fins->op2;
791 fins->op2 = fleft->op1;
792 return RETRYFOLD;
795 LJFOLD(ADD any NEG)
796 LJFOLDF(simplify_numadd_xneg)
798 PHIBARRIER(fright);
799 fins->o = IR_SUB; /* a + (-b) ==> a - b */
800 fins->op2 = fright->op1;
801 return RETRYFOLD;
804 LJFOLD(SUB any KNUM)
805 LJFOLDF(simplify_numsub_k)
807 lua_Number n = knumright;
808 if (n == 0.0) /* x - (+-0) ==> x */
809 return LEFTFOLD;
810 return NEXTFOLD;
813 LJFOLD(SUB NEG KNUM)
814 LJFOLDF(simplify_numsub_negk)
816 PHIBARRIER(fleft);
817 fins->op2 = fleft->op1; /* (-x) - k ==> (-k) - x */
818 fins->op1 = (IRRef1)lj_ir_knum(J, -knumright);
819 return RETRYFOLD;
822 LJFOLD(SUB any NEG)
823 LJFOLDF(simplify_numsub_xneg)
825 PHIBARRIER(fright);
826 fins->o = IR_ADD; /* a - (-b) ==> a + b */
827 fins->op2 = fright->op1;
828 return RETRYFOLD;
831 LJFOLD(MUL any KNUM)
832 LJFOLD(DIV any KNUM)
833 LJFOLDF(simplify_nummuldiv_k)
835 lua_Number n = knumright;
836 if (n == 1.0) { /* x o 1 ==> x */
837 return LEFTFOLD;
838 } else if (n == -1.0) { /* x o -1 ==> -x */
839 fins->o = IR_NEG;
840 fins->op2 = (IRRef1)lj_ir_knum_neg(J);
841 return RETRYFOLD;
842 } else if (fins->o == IR_MUL && n == 2.0) { /* x * 2 ==> x + x */
843 fins->o = IR_ADD;
844 fins->op2 = fins->op1;
845 return RETRYFOLD;
846 } else if (fins->o == IR_DIV) { /* x / 2^k ==> x * 2^-k */
847 uint64_t u = ir_knum(fright)->u64;
848 uint32_t ex = ((uint32_t)(u >> 52) & 0x7ff);
849 if ((u & U64x(000fffff,ffffffff)) == 0 && ex - 1 < 0x7fd) {
850 u = (u & ((uint64_t)1 << 63)) | ((uint64_t)(0x7fe - ex) << 52);
851 fins->o = IR_MUL; /* Multiply by exact reciprocal. */
852 fins->op2 = lj_ir_knum_u64(J, u);
853 return RETRYFOLD;
856 return NEXTFOLD;
859 LJFOLD(MUL NEG KNUM)
860 LJFOLD(DIV NEG KNUM)
861 LJFOLDF(simplify_nummuldiv_negk)
863 PHIBARRIER(fleft);
864 fins->op1 = fleft->op1; /* (-a) o k ==> a o (-k) */
865 fins->op2 = (IRRef1)lj_ir_knum(J, -knumright);
866 return RETRYFOLD;
869 LJFOLD(MUL NEG NEG)
870 LJFOLD(DIV NEG NEG)
871 LJFOLDF(simplify_nummuldiv_negneg)
873 PHIBARRIER(fleft);
874 PHIBARRIER(fright);
875 fins->op1 = fleft->op1; /* (-a) o (-b) ==> a o b */
876 fins->op2 = fright->op1;
877 return RETRYFOLD;
880 LJFOLD(POW any KINT)
881 LJFOLDF(simplify_numpow_xk)
883 int32_t k = fright->i;
884 TRef ref = fins->op1;
885 if (k == 0) /* x ^ 0 ==> 1 */
886 return lj_ir_knum_one(J); /* Result must be a number, not an int. */
887 if (k == 1) /* x ^ 1 ==> x */
888 return LEFTFOLD;
889 if ((uint32_t)(k+65536) > 2*65536u) /* Limit code explosion. */
890 return NEXTFOLD;
891 if (k < 0) { /* x ^ (-k) ==> (1/x) ^ k. */
892 ref = emitir(IRTN(IR_DIV), lj_ir_knum_one(J), ref);
893 k = -k;
895 /* Unroll x^k for 1 <= k <= 65536. */
896 for (; (k & 1) == 0; k >>= 1) /* Handle leading zeros. */
897 ref = emitir(IRTN(IR_MUL), ref, ref);
898 if ((k >>= 1) != 0) { /* Handle trailing bits. */
899 TRef tmp = emitir(IRTN(IR_MUL), ref, ref);
900 for (; k != 1; k >>= 1) {
901 if (k & 1)
902 ref = emitir(IRTN(IR_MUL), ref, tmp);
903 tmp = emitir(IRTN(IR_MUL), tmp, tmp);
905 ref = emitir(IRTN(IR_MUL), ref, tmp);
907 return ref;
910 LJFOLD(POW KNUM any)
911 LJFOLDF(simplify_numpow_kx)
913 lua_Number n = knumleft;
914 if (n == 2.0) { /* 2.0 ^ i ==> ldexp(1.0, tonum(i)) */
915 fins->o = IR_CONV;
916 #if LJ_TARGET_X86ORX64
917 fins->op1 = fins->op2;
918 fins->op2 = IRCONV_NUM_INT;
919 fins->op2 = (IRRef1)lj_opt_fold(J);
920 #endif
921 fins->op1 = (IRRef1)lj_ir_knum_one(J);
922 fins->o = IR_LDEXP;
923 return RETRYFOLD;
925 return NEXTFOLD;
928 /* -- Simplify conversions ------------------------------------------------ */
930 LJFOLD(CONV CONV IRCONV_NUM_INT) /* _NUM */
931 LJFOLDF(shortcut_conv_num_int)
933 PHIBARRIER(fleft);
934 /* Only safe with a guarded conversion to int. */
935 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_NUM && irt_isguard(fleft->t))
936 return fleft->op1; /* f(g(x)) ==> x */
937 return NEXTFOLD;
940 LJFOLD(CONV CONV IRCONV_INT_NUM) /* _INT */
941 LJFOLD(CONV CONV IRCONV_U32_NUM) /* _U32*/
942 LJFOLDF(simplify_conv_int_num)
944 /* Fold even across PHI to avoid expensive num->int conversions in loop. */
945 if ((fleft->op2 & IRCONV_SRCMASK) ==
946 ((fins->op2 & IRCONV_DSTMASK) >> IRCONV_DSH))
947 return fleft->op1;
948 return NEXTFOLD;
951 LJFOLD(CONV CONV IRCONV_I64_NUM) /* _INT or _U32 */
952 LJFOLD(CONV CONV IRCONV_U64_NUM) /* _INT or _U32 */
953 LJFOLDF(simplify_conv_i64_num)
955 PHIBARRIER(fleft);
956 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT) {
957 /* Reduce to a sign-extension. */
958 fins->op1 = fleft->op1;
959 fins->op2 = ((IRT_I64<<5)|IRT_INT|IRCONV_SEXT);
960 return RETRYFOLD;
961 } else if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) {
962 #if LJ_TARGET_X64
963 return fleft->op1;
964 #else
965 /* Reduce to a zero-extension. */
966 fins->op1 = fleft->op1;
967 fins->op2 = (IRT_I64<<5)|IRT_U32;
968 return RETRYFOLD;
969 #endif
971 return NEXTFOLD;
974 LJFOLD(CONV CONV IRCONV_INT_I64) /* _INT or _U32 */
975 LJFOLD(CONV CONV IRCONV_INT_U64) /* _INT or _U32 */
976 LJFOLD(CONV CONV IRCONV_U32_I64) /* _INT or _U32 */
977 LJFOLD(CONV CONV IRCONV_U32_U64) /* _INT or _U32 */
978 LJFOLDF(simplify_conv_int_i64)
980 int src;
981 PHIBARRIER(fleft);
982 src = (fleft->op2 & IRCONV_SRCMASK);
983 if (src == IRT_INT || src == IRT_U32) {
984 if (src == ((fins->op2 & IRCONV_DSTMASK) >> IRCONV_DSH)) {
985 return fleft->op1;
986 } else {
987 fins->op2 = ((fins->op2 & IRCONV_DSTMASK) | src);
988 fins->op1 = fleft->op1;
989 return RETRYFOLD;
992 return NEXTFOLD;
995 LJFOLD(CONV CONV IRCONV_FLOAT_NUM) /* _FLOAT */
996 LJFOLDF(simplify_conv_flt_num)
998 PHIBARRIER(fleft);
999 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_FLOAT)
1000 return fleft->op1;
1001 return NEXTFOLD;
1004 /* Shortcut TOBIT + IRT_NUM <- IRT_INT/IRT_U32 conversion. */
1005 LJFOLD(TOBIT CONV KNUM)
1006 LJFOLDF(simplify_tobit_conv)
1008 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT ||
1009 (fleft->op2 & IRCONV_SRCMASK) == IRT_U32) {
1010 /* Fold even across PHI to avoid expensive num->int conversions in loop. */
1011 lua_assert(irt_isnum(fleft->t));
1012 return fleft->op1;
1014 return NEXTFOLD;
1017 /* Shortcut floor/ceil/round + IRT_NUM <- IRT_INT/IRT_U32 conversion. */
1018 LJFOLD(FPMATH CONV IRFPM_FLOOR)
1019 LJFOLD(FPMATH CONV IRFPM_CEIL)
1020 LJFOLD(FPMATH CONV IRFPM_TRUNC)
1021 LJFOLDF(simplify_floor_conv)
1023 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT ||
1024 (fleft->op2 & IRCONV_SRCMASK) == IRT_U32)
1025 return LEFTFOLD;
1026 return NEXTFOLD;
1029 /* Strength reduction of widening. */
1030 LJFOLD(CONV any IRCONV_I64_INT)
1031 LJFOLD(CONV any IRCONV_U64_INT)
1032 LJFOLDF(simplify_conv_sext)
1034 IRRef ref = fins->op1;
1035 int64_t ofs = 0;
1036 if (!(fins->op2 & IRCONV_SEXT))
1037 return NEXTFOLD;
1038 PHIBARRIER(fleft);
1039 if (fleft->o == IR_XLOAD && (irt_isu8(fleft->t) || irt_isu16(fleft->t)))
1040 goto ok_reduce;
1041 if (fleft->o == IR_ADD && irref_isk(fleft->op2)) {
1042 ofs = (int64_t)IR(fleft->op2)->i;
1043 ref = fleft->op1;
1045 /* Use scalar evolution analysis results to strength-reduce sign-extension. */
1046 if (ref == J->scev.idx) {
1047 IRRef lo = J->scev.dir ? J->scev.start : J->scev.stop;
1048 lua_assert(irt_isint(J->scev.t));
1049 if (lo && IR(lo)->i + ofs >= 0) {
1050 ok_reduce:
1051 #if LJ_TARGET_X64
1052 /* Eliminate widening. All 32 bit ops do an implicit zero-extension. */
1053 return LEFTFOLD;
1054 #else
1055 /* Reduce to a (cheaper) zero-extension. */
1056 fins->op2 &= ~IRCONV_SEXT;
1057 return RETRYFOLD;
1058 #endif
1061 return NEXTFOLD;
1064 /* Strength reduction of narrowing. */
1065 LJFOLD(CONV ADD IRCONV_INT_I64)
1066 LJFOLD(CONV SUB IRCONV_INT_I64)
1067 LJFOLD(CONV MUL IRCONV_INT_I64)
1068 LJFOLD(CONV ADD IRCONV_INT_U64)
1069 LJFOLD(CONV SUB IRCONV_INT_U64)
1070 LJFOLD(CONV MUL IRCONV_INT_U64)
1071 LJFOLD(CONV ADD IRCONV_U32_I64)
1072 LJFOLD(CONV SUB IRCONV_U32_I64)
1073 LJFOLD(CONV MUL IRCONV_U32_I64)
1074 LJFOLD(CONV ADD IRCONV_U32_U64)
1075 LJFOLD(CONV SUB IRCONV_U32_U64)
1076 LJFOLD(CONV MUL IRCONV_U32_U64)
1077 LJFOLDF(simplify_conv_narrow)
1079 IROp op = (IROp)fleft->o;
1080 IRType t = irt_type(fins->t);
1081 IRRef op1 = fleft->op1, op2 = fleft->op2, mode = fins->op2;
1082 PHIBARRIER(fleft);
1083 op1 = emitir(IRTI(IR_CONV), op1, mode);
1084 op2 = emitir(IRTI(IR_CONV), op2, mode);
1085 fins->ot = IRT(op, t);
1086 fins->op1 = op1;
1087 fins->op2 = op2;
1088 return RETRYFOLD;
1091 /* Special CSE rule for CONV. */
1092 LJFOLD(CONV any any)
1093 LJFOLDF(cse_conv)
1095 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
1096 IRRef op1 = fins->op1, op2 = (fins->op2 & IRCONV_MODEMASK);
1097 uint8_t guard = irt_isguard(fins->t);
1098 IRRef ref = J->chain[IR_CONV];
1099 while (ref > op1) {
1100 IRIns *ir = IR(ref);
1101 /* Commoning with stronger checks is ok. */
1102 if (ir->op1 == op1 && (ir->op2 & IRCONV_MODEMASK) == op2 &&
1103 irt_isguard(ir->t) >= guard)
1104 return ref;
1105 ref = ir->prev;
1108 return EMITFOLD; /* No fallthrough to regular CSE. */
1111 /* FP conversion narrowing. */
1112 LJFOLD(TOBIT ADD KNUM)
1113 LJFOLD(TOBIT SUB KNUM)
1114 LJFOLD(CONV ADD IRCONV_INT_NUM)
1115 LJFOLD(CONV SUB IRCONV_INT_NUM)
1116 LJFOLD(CONV ADD IRCONV_I64_NUM)
1117 LJFOLD(CONV SUB IRCONV_I64_NUM)
1118 LJFOLDF(narrow_convert)
1120 PHIBARRIER(fleft);
1121 /* Narrowing ignores PHIs and repeating it inside the loop is not useful. */
1122 if (J->chain[IR_LOOP])
1123 return NEXTFOLD;
1124 lua_assert(fins->o != IR_CONV || (fins->op2&IRCONV_CONVMASK) != IRCONV_TOBIT);
1125 return lj_opt_narrow_convert(J);
1128 /* -- Integer algebraic simplifications ----------------------------------- */
1130 LJFOLD(ADD any KINT)
1131 LJFOLD(ADDOV any KINT)
1132 LJFOLD(SUBOV any KINT)
1133 LJFOLDF(simplify_intadd_k)
1135 if (fright->i == 0) /* i o 0 ==> i */
1136 return LEFTFOLD;
1137 return NEXTFOLD;
1140 LJFOLD(MULOV any KINT)
1141 LJFOLDF(simplify_intmul_k)
1143 if (fright->i == 0) /* i * 0 ==> 0 */
1144 return RIGHTFOLD;
1145 if (fright->i == 1) /* i * 1 ==> i */
1146 return LEFTFOLD;
1147 if (fright->i == 2) { /* i * 2 ==> i + i */
1148 fins->o = IR_ADDOV;
1149 fins->op2 = fins->op1;
1150 return RETRYFOLD;
1152 return NEXTFOLD;
1155 LJFOLD(SUB any KINT)
1156 LJFOLDF(simplify_intsub_k)
1158 if (fright->i == 0) /* i - 0 ==> i */
1159 return LEFTFOLD;
1160 fins->o = IR_ADD; /* i - k ==> i + (-k) */
1161 fins->op2 = (IRRef1)lj_ir_kint(J, -fright->i); /* Overflow for -2^31 ok. */
1162 return RETRYFOLD;
1165 LJFOLD(SUB KINT any)
1166 LJFOLD(SUB KINT64 any)
1167 LJFOLDF(simplify_intsub_kleft)
1169 if (fleft->o == IR_KINT ? (fleft->i == 0) : (ir_kint64(fleft)->u64 == 0)) {
1170 fins->o = IR_NEG; /* 0 - i ==> -i */
1171 fins->op1 = fins->op2;
1172 return RETRYFOLD;
1174 return NEXTFOLD;
1177 LJFOLD(ADD any KINT64)
1178 LJFOLDF(simplify_intadd_k64)
1180 if (ir_kint64(fright)->u64 == 0) /* i + 0 ==> i */
1181 return LEFTFOLD;
1182 return NEXTFOLD;
1185 LJFOLD(SUB any KINT64)
1186 LJFOLDF(simplify_intsub_k64)
1188 uint64_t k = ir_kint64(fright)->u64;
1189 if (k == 0) /* i - 0 ==> i */
1190 return LEFTFOLD;
1191 fins->o = IR_ADD; /* i - k ==> i + (-k) */
1192 fins->op2 = (IRRef1)lj_ir_kint64(J, (uint64_t)-(int64_t)k);
1193 return RETRYFOLD;
1196 static TRef simplify_intmul_k(jit_State *J, int32_t k)
1198 /* Note: many more simplifications are possible, e.g. 2^k1 +- 2^k2.
1199 ** But this is mainly intended for simple address arithmetic.
1200 ** Also it's easier for the backend to optimize the original multiplies.
1202 if (k == 1) { /* i * 1 ==> i */
1203 return LEFTFOLD;
1204 } else if ((k & (k-1)) == 0) { /* i * 2^k ==> i << k */
1205 fins->o = IR_BSHL;
1206 fins->op2 = lj_ir_kint(J, lj_fls((uint32_t)k));
1207 return RETRYFOLD;
1209 return NEXTFOLD;
1212 LJFOLD(MUL any KINT)
1213 LJFOLDF(simplify_intmul_k32)
1215 if (fright->i == 0) /* i * 0 ==> 0 */
1216 return INTFOLD(0);
1217 else if (fright->i > 0)
1218 return simplify_intmul_k(J, fright->i);
1219 return NEXTFOLD;
1222 LJFOLD(MUL any KINT64)
1223 LJFOLDF(simplify_intmul_k64)
1225 if (ir_kint64(fright)->u64 == 0) /* i * 0 ==> 0 */
1226 return INT64FOLD(0);
1227 #if LJ_64
1228 /* NYI: SPLIT for BSHL and 32 bit backend support. */
1229 else if (ir_kint64(fright)->u64 < 0x80000000u)
1230 return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64);
1231 #endif
1232 return NEXTFOLD;
1235 LJFOLD(MOD any KINT)
1236 LJFOLDF(simplify_intmod_k)
1238 int32_t k = fright->i;
1239 lua_assert(k != 0);
1240 if (k > 0 && (k & (k-1)) == 0) { /* i % (2^k) ==> i & (2^k-1) */
1241 fins->o = IR_BAND;
1242 fins->op2 = lj_ir_kint(J, k-1);
1243 return RETRYFOLD;
1245 return NEXTFOLD;
1248 LJFOLD(MOD KINT any)
1249 LJFOLDF(simplify_intmod_kleft)
1251 if (fleft->i == 0)
1252 return INTFOLD(0);
1253 return NEXTFOLD;
1256 LJFOLD(SUB any any)
1257 LJFOLD(SUBOV any any)
1258 LJFOLDF(simplify_intsub)
1260 if (fins->op1 == fins->op2 && !irt_isnum(fins->t)) /* i - i ==> 0 */
1261 return irt_is64(fins->t) ? INT64FOLD(0) : INTFOLD(0);
1262 return NEXTFOLD;
1265 LJFOLD(SUB ADD any)
1266 LJFOLDF(simplify_intsubadd_leftcancel)
1268 if (!irt_isnum(fins->t)) {
1269 PHIBARRIER(fleft);
1270 if (fins->op2 == fleft->op1) /* (i + j) - i ==> j */
1271 return fleft->op2;
1272 if (fins->op2 == fleft->op2) /* (i + j) - j ==> i */
1273 return fleft->op1;
1275 return NEXTFOLD;
1278 LJFOLD(SUB SUB any)
1279 LJFOLDF(simplify_intsubsub_leftcancel)
1281 if (!irt_isnum(fins->t)) {
1282 PHIBARRIER(fleft);
1283 if (fins->op2 == fleft->op1) { /* (i - j) - i ==> 0 - j */
1284 fins->op1 = (IRRef1)lj_ir_kint(J, 0);
1285 fins->op2 = fleft->op2;
1286 return RETRYFOLD;
1289 return NEXTFOLD;
1292 LJFOLD(SUB any SUB)
1293 LJFOLDF(simplify_intsubsub_rightcancel)
1295 if (!irt_isnum(fins->t)) {
1296 PHIBARRIER(fright);
1297 if (fins->op1 == fright->op1) /* i - (i - j) ==> j */
1298 return fright->op2;
1300 return NEXTFOLD;
1303 LJFOLD(SUB any ADD)
1304 LJFOLDF(simplify_intsubadd_rightcancel)
1306 if (!irt_isnum(fins->t)) {
1307 PHIBARRIER(fright);
1308 if (fins->op1 == fright->op1) { /* i - (i + j) ==> 0 - j */
1309 fins->op2 = fright->op2;
1310 fins->op1 = (IRRef1)lj_ir_kint(J, 0);
1311 return RETRYFOLD;
1313 if (fins->op1 == fright->op2) { /* i - (j + i) ==> 0 - j */
1314 fins->op2 = fright->op1;
1315 fins->op1 = (IRRef1)lj_ir_kint(J, 0);
1316 return RETRYFOLD;
1319 return NEXTFOLD;
1322 LJFOLD(SUB ADD ADD)
1323 LJFOLDF(simplify_intsubaddadd_cancel)
1325 if (!irt_isnum(fins->t)) {
1326 PHIBARRIER(fleft);
1327 PHIBARRIER(fright);
1328 if (fleft->op1 == fright->op1) { /* (i + j1) - (i + j2) ==> j1 - j2 */
1329 fins->op1 = fleft->op2;
1330 fins->op2 = fright->op2;
1331 return RETRYFOLD;
1333 if (fleft->op1 == fright->op2) { /* (i + j1) - (j2 + i) ==> j1 - j2 */
1334 fins->op1 = fleft->op2;
1335 fins->op2 = fright->op1;
1336 return RETRYFOLD;
1338 if (fleft->op2 == fright->op1) { /* (j1 + i) - (i + j2) ==> j1 - j2 */
1339 fins->op1 = fleft->op1;
1340 fins->op2 = fright->op2;
1341 return RETRYFOLD;
1343 if (fleft->op2 == fright->op2) { /* (j1 + i) - (j2 + i) ==> j1 - j2 */
1344 fins->op1 = fleft->op1;
1345 fins->op2 = fright->op1;
1346 return RETRYFOLD;
1349 return NEXTFOLD;
1352 LJFOLD(BAND any KINT)
1353 LJFOLD(BAND any KINT64)
1354 LJFOLDF(simplify_band_k)
1356 int64_t k = fright->o == IR_KINT ? (int64_t)fright->i :
1357 (int64_t)ir_k64(fright)->u64;
1358 if (k == 0) /* i & 0 ==> 0 */
1359 return RIGHTFOLD;
1360 if (k == -1) /* i & -1 ==> i */
1361 return LEFTFOLD;
1362 return NEXTFOLD;
1365 LJFOLD(BOR any KINT)
1366 LJFOLD(BOR any KINT64)
1367 LJFOLDF(simplify_bor_k)
1369 int64_t k = fright->o == IR_KINT ? (int64_t)fright->i :
1370 (int64_t)ir_k64(fright)->u64;
1371 if (k == 0) /* i | 0 ==> i */
1372 return LEFTFOLD;
1373 if (k == -1) /* i | -1 ==> -1 */
1374 return RIGHTFOLD;
1375 return NEXTFOLD;
1378 LJFOLD(BXOR any KINT)
1379 LJFOLD(BXOR any KINT64)
1380 LJFOLDF(simplify_bxor_k)
1382 int64_t k = fright->o == IR_KINT ? (int64_t)fright->i :
1383 (int64_t)ir_k64(fright)->u64;
1384 if (k == 0) /* i xor 0 ==> i */
1385 return LEFTFOLD;
1386 if (k == -1) { /* i xor -1 ==> ~i */
1387 fins->o = IR_BNOT;
1388 fins->op2 = 0;
1389 return RETRYFOLD;
1391 return NEXTFOLD;
1394 LJFOLD(BSHL any KINT)
1395 LJFOLD(BSHR any KINT)
1396 LJFOLD(BSAR any KINT)
1397 LJFOLD(BROL any KINT)
1398 LJFOLD(BROR any KINT)
1399 LJFOLDF(simplify_shift_ik)
1401 int32_t mask = irt_is64(fins->t) ? 63 : 31;
1402 int32_t k = (fright->i & mask);
1403 if (k == 0) /* i o 0 ==> i */
1404 return LEFTFOLD;
1405 if (k == 1 && fins->o == IR_BSHL) { /* i << 1 ==> i + i */
1406 fins->o = IR_ADD;
1407 fins->op2 = fins->op1;
1408 return RETRYFOLD;
1410 if (k != fright->i) { /* i o k ==> i o (k & mask) */
1411 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1412 return RETRYFOLD;
1414 #ifndef LJ_TARGET_UNIFYROT
1415 if (fins->o == IR_BROR) { /* bror(i, k) ==> brol(i, (-k)&mask) */
1416 fins->o = IR_BROL;
1417 fins->op2 = (IRRef1)lj_ir_kint(J, (-k)&mask);
1418 return RETRYFOLD;
1420 #endif
1421 return NEXTFOLD;
1424 LJFOLD(BSHL any BAND)
1425 LJFOLD(BSHR any BAND)
1426 LJFOLD(BSAR any BAND)
1427 LJFOLD(BROL any BAND)
1428 LJFOLD(BROR any BAND)
1429 LJFOLDF(simplify_shift_andk)
1431 IRIns *irk = IR(fright->op2);
1432 PHIBARRIER(fright);
1433 if ((fins->o < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) &&
1434 irk->o == IR_KINT) { /* i o (j & mask) ==> i o j */
1435 int32_t mask = irt_is64(fins->t) ? 63 : 31;
1436 int32_t k = irk->i & mask;
1437 if (k == mask) {
1438 fins->op2 = fright->op1;
1439 return RETRYFOLD;
1442 return NEXTFOLD;
1445 LJFOLD(BSHL KINT any)
1446 LJFOLD(BSHR KINT any)
1447 LJFOLD(BSHL KINT64 any)
1448 LJFOLD(BSHR KINT64 any)
1449 LJFOLDF(simplify_shift1_ki)
1451 int64_t k = fleft->o == IR_KINT ? (int64_t)fleft->i :
1452 (int64_t)ir_k64(fleft)->u64;
1453 if (k == 0) /* 0 o i ==> 0 */
1454 return LEFTFOLD;
1455 return NEXTFOLD;
1458 LJFOLD(BSAR KINT any)
1459 LJFOLD(BROL KINT any)
1460 LJFOLD(BROR KINT any)
1461 LJFOLD(BSAR KINT64 any)
1462 LJFOLD(BROL KINT64 any)
1463 LJFOLD(BROR KINT64 any)
1464 LJFOLDF(simplify_shift2_ki)
1466 int64_t k = fleft->o == IR_KINT ? (int64_t)fleft->i :
1467 (int64_t)ir_k64(fleft)->u64;
1468 if (k == 0 || k == -1) /* 0 o i ==> 0; -1 o i ==> -1 */
1469 return LEFTFOLD;
1470 return NEXTFOLD;
1473 LJFOLD(BSHL BAND KINT)
1474 LJFOLD(BSHR BAND KINT)
1475 LJFOLD(BROL BAND KINT)
1476 LJFOLD(BROR BAND KINT)
1477 LJFOLDF(simplify_shiftk_andk)
1479 IRIns *irk = IR(fleft->op2);
1480 PHIBARRIER(fleft);
1481 if (irk->o == IR_KINT) { /* (i & k1) o k2 ==> (i o k2) & (k1 o k2) */
1482 int32_t k = kfold_intop(irk->i, fright->i, (IROp)fins->o);
1483 fins->op1 = fleft->op1;
1484 fins->op1 = (IRRef1)lj_opt_fold(J);
1485 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1486 fins->ot = IRTI(IR_BAND);
1487 return RETRYFOLD;
1489 return NEXTFOLD;
1492 LJFOLD(BAND BSHL KINT)
1493 LJFOLD(BAND BSHR KINT)
1494 LJFOLDF(simplify_andk_shiftk)
1496 IRIns *irk = IR(fleft->op2);
1497 if (irk->o == IR_KINT &&
1498 kfold_intop(-1, irk->i, (IROp)fleft->o) == fright->i)
1499 return LEFTFOLD; /* (i o k1) & k2 ==> i, if (-1 o k1) == k2 */
1500 return NEXTFOLD;
1503 /* -- Reassociation ------------------------------------------------------- */
1505 LJFOLD(ADD ADD KINT)
1506 LJFOLD(MUL MUL KINT)
1507 LJFOLD(BAND BAND KINT)
1508 LJFOLD(BOR BOR KINT)
1509 LJFOLD(BXOR BXOR KINT)
1510 LJFOLDF(reassoc_intarith_k)
1512 IRIns *irk = IR(fleft->op2);
1513 if (irk->o == IR_KINT) {
1514 int32_t k = kfold_intop(irk->i, fright->i, (IROp)fins->o);
1515 if (k == irk->i) /* (i o k1) o k2 ==> i o k1, if (k1 o k2) == k1. */
1516 return LEFTFOLD;
1517 PHIBARRIER(fleft);
1518 fins->op1 = fleft->op1;
1519 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1520 return RETRYFOLD; /* (i o k1) o k2 ==> i o (k1 o k2) */
1522 return NEXTFOLD;
1525 LJFOLD(ADD ADD KINT64)
1526 LJFOLD(MUL MUL KINT64)
1527 LJFOLD(BAND BAND KINT64)
1528 LJFOLD(BOR BOR KINT64)
1529 LJFOLD(BXOR BXOR KINT64)
1530 LJFOLDF(reassoc_intarith_k64)
1532 #if LJ_HASFFI || LJ_64
1533 IRIns *irk = IR(fleft->op2);
1534 if (irk->o == IR_KINT64) {
1535 uint64_t k = kfold_int64arith(ir_k64(irk)->u64,
1536 ir_k64(fright)->u64, (IROp)fins->o);
1537 PHIBARRIER(fleft);
1538 fins->op1 = fleft->op1;
1539 fins->op2 = (IRRef1)lj_ir_kint64(J, k);
1540 return RETRYFOLD; /* (i o k1) o k2 ==> i o (k1 o k2) */
1542 return NEXTFOLD;
1543 #else
1544 UNUSED(J); lua_assert(0); return FAILFOLD;
1545 #endif
1548 LJFOLD(MIN MIN any)
1549 LJFOLD(MAX MAX any)
1550 LJFOLD(BAND BAND any)
1551 LJFOLD(BOR BOR any)
1552 LJFOLDF(reassoc_dup)
1554 if (fins->op2 == fleft->op1 || fins->op2 == fleft->op2)
1555 return LEFTFOLD; /* (a o b) o a ==> a o b; (a o b) o b ==> a o b */
1556 return NEXTFOLD;
1559 LJFOLD(BXOR BXOR any)
1560 LJFOLDF(reassoc_bxor)
1562 PHIBARRIER(fleft);
1563 if (fins->op2 == fleft->op1) /* (a xor b) xor a ==> b */
1564 return fleft->op2;
1565 if (fins->op2 == fleft->op2) /* (a xor b) xor b ==> a */
1566 return fleft->op1;
1567 return NEXTFOLD;
1570 LJFOLD(BSHL BSHL KINT)
1571 LJFOLD(BSHR BSHR KINT)
1572 LJFOLD(BSAR BSAR KINT)
1573 LJFOLD(BROL BROL KINT)
1574 LJFOLD(BROR BROR KINT)
1575 LJFOLDF(reassoc_shift)
1577 IRIns *irk = IR(fleft->op2);
1578 PHIBARRIER(fleft); /* The (shift any KINT) rule covers k2 == 0 and more. */
1579 if (irk->o == IR_KINT) { /* (i o k1) o k2 ==> i o (k1 + k2) */
1580 int32_t mask = irt_is64(fins->t) ? 63 : 31;
1581 int32_t k = (irk->i & mask) + (fright->i & mask);
1582 if (k > mask) { /* Combined shift too wide? */
1583 if (fins->o == IR_BSHL || fins->o == IR_BSHR)
1584 return mask == 31 ? INTFOLD(0) : INT64FOLD(0);
1585 else if (fins->o == IR_BSAR)
1586 k = mask;
1587 else
1588 k &= mask;
1590 fins->op1 = fleft->op1;
1591 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1592 return RETRYFOLD;
1594 return NEXTFOLD;
1597 LJFOLD(MIN MIN KNUM)
1598 LJFOLD(MAX MAX KNUM)
1599 LJFOLD(MIN MIN KINT)
1600 LJFOLD(MAX MAX KINT)
1601 LJFOLDF(reassoc_minmax_k)
1603 IRIns *irk = IR(fleft->op2);
1604 if (irk->o == IR_KNUM) {
1605 lua_Number a = ir_knum(irk)->n;
1606 lua_Number y = lj_vm_foldarith(a, knumright, fins->o - IR_ADD);
1607 if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */
1608 return LEFTFOLD;
1609 PHIBARRIER(fleft);
1610 fins->op1 = fleft->op1;
1611 fins->op2 = (IRRef1)lj_ir_knum(J, y);
1612 return RETRYFOLD; /* (x o k1) o k2 ==> x o (k1 o k2) */
1613 } else if (irk->o == IR_KINT) {
1614 int32_t a = irk->i;
1615 int32_t y = kfold_intop(a, fright->i, fins->o);
1616 if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */
1617 return LEFTFOLD;
1618 PHIBARRIER(fleft);
1619 fins->op1 = fleft->op1;
1620 fins->op2 = (IRRef1)lj_ir_kint(J, y);
1621 return RETRYFOLD; /* (x o k1) o k2 ==> x o (k1 o k2) */
1623 return NEXTFOLD;
1626 LJFOLD(MIN MAX any)
1627 LJFOLD(MAX MIN any)
1628 LJFOLDF(reassoc_minmax_left)
1630 if (fins->op2 == fleft->op1 || fins->op2 == fleft->op2)
1631 return RIGHTFOLD; /* (b o1 a) o2 b ==> b; (a o1 b) o2 b ==> b */
1632 return NEXTFOLD;
1635 LJFOLD(MIN any MAX)
1636 LJFOLD(MAX any MIN)
1637 LJFOLDF(reassoc_minmax_right)
1639 if (fins->op1 == fright->op1 || fins->op1 == fright->op2)
1640 return LEFTFOLD; /* a o2 (a o1 b) ==> a; a o2 (b o1 a) ==> a */
1641 return NEXTFOLD;
1644 /* -- Array bounds check elimination -------------------------------------- */
1646 /* Eliminate ABC across PHIs to handle t[i-1] forwarding case.
1647 ** ABC(asize, (i+k)+(-k)) ==> ABC(asize, i), but only if it already exists.
1648 ** Could be generalized to (i+k1)+k2 ==> i+(k1+k2), but needs better disambig.
1650 LJFOLD(ABC any ADD)
1651 LJFOLDF(abc_fwd)
1653 if (LJ_LIKELY(J->flags & JIT_F_OPT_ABC)) {
1654 if (irref_isk(fright->op2)) {
1655 IRIns *add2 = IR(fright->op1);
1656 if (add2->o == IR_ADD && irref_isk(add2->op2) &&
1657 IR(fright->op2)->i == -IR(add2->op2)->i) {
1658 IRRef ref = J->chain[IR_ABC];
1659 IRRef lim = add2->op1;
1660 if (fins->op1 > lim) lim = fins->op1;
1661 while (ref > lim) {
1662 IRIns *ir = IR(ref);
1663 if (ir->op1 == fins->op1 && ir->op2 == add2->op1)
1664 return DROPFOLD;
1665 ref = ir->prev;
1670 return NEXTFOLD;
1673 /* Eliminate ABC for constants.
1674 ** ABC(asize, k1), ABC(asize k2) ==> ABC(asize, max(k1, k2))
1675 ** Drop second ABC if k2 is lower. Otherwise patch first ABC with k2.
1677 LJFOLD(ABC any KINT)
1678 LJFOLDF(abc_k)
1680 if (LJ_LIKELY(J->flags & JIT_F_OPT_ABC)) {
1681 IRRef ref = J->chain[IR_ABC];
1682 IRRef asize = fins->op1;
1683 while (ref > asize) {
1684 IRIns *ir = IR(ref);
1685 if (ir->op1 == asize && irref_isk(ir->op2)) {
1686 int32_t k = IR(ir->op2)->i;
1687 if (fright->i > k)
1688 ir->op2 = fins->op2;
1689 return DROPFOLD;
1691 ref = ir->prev;
1693 return EMITFOLD; /* Already performed CSE. */
1695 return NEXTFOLD;
1698 /* Eliminate invariant ABC inside loop. */
1699 LJFOLD(ABC any any)
1700 LJFOLDF(abc_invar)
1702 /* Invariant ABC marked as PTR. Drop if op1 is invariant, too. */
1703 if (!irt_isint(fins->t) && fins->op1 < J->chain[IR_LOOP])
1704 return DROPFOLD;
1705 return NEXTFOLD;
1708 /* -- Commutativity ------------------------------------------------------- */
1710 /* The refs of commutative ops are canonicalized. Lower refs go to the right.
1711 ** Rationale behind this:
1712 ** - It (also) moves constants to the right.
1713 ** - It reduces the number of FOLD rules (e.g. (BOR any KINT) suffices).
1714 ** - It helps CSE to find more matches.
1715 ** - The assembler generates better code with constants at the right.
1718 LJFOLD(ADD any any)
1719 LJFOLD(MUL any any)
1720 LJFOLD(ADDOV any any)
1721 LJFOLD(MULOV any any)
1722 LJFOLDF(comm_swap)
1724 if (fins->op1 < fins->op2) { /* Move lower ref to the right. */
1725 IRRef1 tmp = fins->op1;
1726 fins->op1 = fins->op2;
1727 fins->op2 = tmp;
1728 return RETRYFOLD;
1730 return NEXTFOLD;
1733 LJFOLD(EQ any any)
1734 LJFOLD(NE any any)
1735 LJFOLDF(comm_equal)
1737 /* For non-numbers only: x == x ==> drop; x ~= x ==> fail */
1738 if (fins->op1 == fins->op2 && !irt_isnum(fins->t))
1739 return CONDFOLD(fins->o == IR_EQ);
1740 return fold_comm_swap(J);
1743 LJFOLD(LT any any)
1744 LJFOLD(GE any any)
1745 LJFOLD(LE any any)
1746 LJFOLD(GT any any)
1747 LJFOLD(ULT any any)
1748 LJFOLD(UGE any any)
1749 LJFOLD(ULE any any)
1750 LJFOLD(UGT any any)
1751 LJFOLDF(comm_comp)
1753 /* For non-numbers only: x <=> x ==> drop; x <> x ==> fail */
1754 if (fins->op1 == fins->op2 && !irt_isnum(fins->t))
1755 return CONDFOLD((fins->o ^ (fins->o >> 1)) & 1);
1756 if (fins->op1 < fins->op2) { /* Move lower ref to the right. */
1757 IRRef1 tmp = fins->op1;
1758 fins->op1 = fins->op2;
1759 fins->op2 = tmp;
1760 fins->o ^= 3; /* GT <-> LT, GE <-> LE, does not affect U */
1761 return RETRYFOLD;
1763 return NEXTFOLD;
1766 LJFOLD(BAND any any)
1767 LJFOLD(BOR any any)
1768 LJFOLD(MIN any any)
1769 LJFOLD(MAX any any)
1770 LJFOLDF(comm_dup)
1772 if (fins->op1 == fins->op2) /* x o x ==> x */
1773 return LEFTFOLD;
1774 return fold_comm_swap(J);
1777 LJFOLD(BXOR any any)
1778 LJFOLDF(comm_bxor)
1780 if (fins->op1 == fins->op2) /* i xor i ==> 0 */
1781 return irt_is64(fins->t) ? INT64FOLD(0) : INTFOLD(0);
1782 return fold_comm_swap(J);
1785 /* -- Simplification of compound expressions ------------------------------ */
1787 static TRef kfold_xload(jit_State *J, IRIns *ir, const void *p)
1789 int32_t k;
1790 switch (irt_type(ir->t)) {
1791 case IRT_NUM: return lj_ir_knum_u64(J, *(uint64_t *)p);
1792 case IRT_I8: k = (int32_t)*(int8_t *)p; break;
1793 case IRT_U8: k = (int32_t)*(uint8_t *)p; break;
1794 case IRT_I16: k = (int32_t)(int16_t)lj_getu16(p); break;
1795 case IRT_U16: k = (int32_t)(uint16_t)lj_getu16(p); break;
1796 case IRT_INT: case IRT_U32: k = (int32_t)lj_getu32(p); break;
1797 case IRT_I64: case IRT_U64: return lj_ir_kint64(J, *(uint64_t *)p);
1798 default: return 0;
1800 return lj_ir_kint(J, k);
1803 /* Turn: string.sub(str, a, b) == kstr
1804 ** into: string.byte(str, a) == string.byte(kstr, 1) etc.
1805 ** Note: this creates unaligned XLOADs on x86/x64.
1807 LJFOLD(EQ SNEW KGC)
1808 LJFOLD(NE SNEW KGC)
1809 LJFOLDF(merge_eqne_snew_kgc)
1811 GCstr *kstr = ir_kstr(fright);
1812 int32_t len = (int32_t)kstr->len;
1813 lua_assert(irt_isstr(fins->t));
1815 #if LJ_TARGET_UNALIGNED
1816 #define FOLD_SNEW_MAX_LEN 4 /* Handle string lengths 0, 1, 2, 3, 4. */
1817 #define FOLD_SNEW_TYPE8 IRT_I8 /* Creates shorter immediates. */
1818 #else
1819 #define FOLD_SNEW_MAX_LEN 1 /* Handle string lengths 0 or 1. */
1820 #define FOLD_SNEW_TYPE8 IRT_U8 /* Prefer unsigned loads. */
1821 #endif
1823 PHIBARRIER(fleft);
1824 if (len <= FOLD_SNEW_MAX_LEN) {
1825 IROp op = (IROp)fins->o;
1826 IRRef strref = fleft->op1;
1827 lua_assert(IR(strref)->o == IR_STRREF);
1828 if (op == IR_EQ) {
1829 emitir(IRTGI(IR_EQ), fleft->op2, lj_ir_kint(J, len));
1830 /* Caveat: fins/fleft/fright is no longer valid after emitir. */
1831 } else {
1832 /* NE is not expanded since this would need an OR of two conds. */
1833 if (!irref_isk(fleft->op2)) /* Only handle the constant length case. */
1834 return NEXTFOLD;
1835 if (IR(fleft->op2)->i != len)
1836 return DROPFOLD;
1838 if (len > 0) {
1839 /* A 4 byte load for length 3 is ok -- all strings have an extra NUL. */
1840 uint16_t ot = (uint16_t)(len == 1 ? IRT(IR_XLOAD, FOLD_SNEW_TYPE8) :
1841 len == 2 ? IRT(IR_XLOAD, IRT_U16) :
1842 IRTI(IR_XLOAD));
1843 TRef tmp = emitir(ot, strref,
1844 IRXLOAD_READONLY | (len > 1 ? IRXLOAD_UNALIGNED : 0));
1845 TRef val = kfold_xload(J, IR(tref_ref(tmp)), strdata(kstr));
1846 if (len == 3)
1847 tmp = emitir(IRTI(IR_BAND), tmp,
1848 lj_ir_kint(J, LJ_ENDIAN_SELECT(0x00ffffff, 0xffffff00)));
1849 fins->op1 = (IRRef1)tmp;
1850 fins->op2 = (IRRef1)val;
1851 fins->ot = (IROpT)IRTGI(op);
1852 return RETRYFOLD;
1853 } else {
1854 return DROPFOLD;
1857 return NEXTFOLD;
1860 /* -- Loads --------------------------------------------------------------- */
1862 /* Loads cannot be folded or passed on to CSE in general.
1863 ** Alias analysis is needed to check for forwarding opportunities.
1865 ** Caveat: *all* loads must be listed here or they end up at CSE!
1868 LJFOLD(ALOAD any)
1869 LJFOLDX(lj_opt_fwd_aload)
1871 /* From HREF fwd (see below). Must eliminate, not supported by fwd/backend. */
1872 LJFOLD(HLOAD KKPTR)
1873 LJFOLDF(kfold_hload_kkptr)
1875 UNUSED(J);
1876 lua_assert(ir_kptr(fleft) == niltvg(J2G(J)));
1877 return TREF_NIL;
1880 LJFOLD(HLOAD any)
1881 LJFOLDX(lj_opt_fwd_hload)
1883 LJFOLD(ULOAD any)
1884 LJFOLDX(lj_opt_fwd_uload)
1886 LJFOLD(CALLL any IRCALL_lj_tab_len)
1887 LJFOLDX(lj_opt_fwd_tab_len)
1889 /* Upvalue refs are really loads, but there are no corresponding stores.
1890 ** So CSE is ok for them, except for UREFO across a GC step (see below).
1891 ** If the referenced function is const, its upvalue addresses are const, too.
1892 ** This can be used to improve CSE by looking for the same address,
1893 ** even if the upvalues originate from a different function.
1895 LJFOLD(UREFO KGC any)
1896 LJFOLD(UREFC KGC any)
1897 LJFOLDF(cse_uref)
1899 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
1900 IRRef ref = J->chain[fins->o];
1901 GCfunc *fn = ir_kfunc(fleft);
1902 GCupval *uv = gco2uv(gcref(fn->l.uvptr[(fins->op2 >> 8)]));
1903 while (ref > 0) {
1904 IRIns *ir = IR(ref);
1905 if (irref_isk(ir->op1)) {
1906 GCfunc *fn2 = ir_kfunc(IR(ir->op1));
1907 if (gco2uv(gcref(fn2->l.uvptr[(ir->op2 >> 8)])) == uv) {
1908 if (fins->o == IR_UREFO && gcstep_barrier(J, ref))
1909 break;
1910 return ref;
1913 ref = ir->prev;
1916 return EMITFOLD;
1919 LJFOLD(HREFK any any)
1920 LJFOLDX(lj_opt_fwd_hrefk)
1922 LJFOLD(HREF TNEW any)
1923 LJFOLDF(fwd_href_tnew)
1925 if (lj_opt_fwd_href_nokey(J))
1926 return lj_ir_kkptr(J, niltvg(J2G(J)));
1927 return NEXTFOLD;
1930 LJFOLD(HREF TDUP KPRI)
1931 LJFOLD(HREF TDUP KGC)
1932 LJFOLD(HREF TDUP KNUM)
1933 LJFOLDF(fwd_href_tdup)
1935 TValue keyv;
1936 lj_ir_kvalue(J->L, &keyv, fright);
1937 if (lj_tab_get(J->L, ir_ktab(IR(fleft->op1)), &keyv) == niltvg(J2G(J)) &&
1938 lj_opt_fwd_href_nokey(J))
1939 return lj_ir_kkptr(J, niltvg(J2G(J)));
1940 return NEXTFOLD;
1943 /* We can safely FOLD/CSE array/hash refs and field loads, since there
1944 ** are no corresponding stores. But we need to check for any NEWREF with
1945 ** an aliased table, as it may invalidate all of the pointers and fields.
1946 ** Only HREF needs the NEWREF check -- AREF and HREFK already depend on
1947 ** FLOADs. And NEWREF itself is treated like a store (see below).
1949 LJFOLD(FLOAD TNEW IRFL_TAB_ASIZE)
1950 LJFOLDF(fload_tab_tnew_asize)
1952 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
1953 return INTFOLD(fleft->op1);
1954 return NEXTFOLD;
1957 LJFOLD(FLOAD TNEW IRFL_TAB_HMASK)
1958 LJFOLDF(fload_tab_tnew_hmask)
1960 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
1961 return INTFOLD((1 << fleft->op2)-1);
1962 return NEXTFOLD;
1965 LJFOLD(FLOAD TDUP IRFL_TAB_ASIZE)
1966 LJFOLDF(fload_tab_tdup_asize)
1968 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
1969 return INTFOLD((int32_t)ir_ktab(IR(fleft->op1))->asize);
1970 return NEXTFOLD;
1973 LJFOLD(FLOAD TDUP IRFL_TAB_HMASK)
1974 LJFOLDF(fload_tab_tdup_hmask)
1976 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
1977 return INTFOLD((int32_t)ir_ktab(IR(fleft->op1))->hmask);
1978 return NEXTFOLD;
1981 LJFOLD(HREF any any)
1982 LJFOLD(FLOAD any IRFL_TAB_ARRAY)
1983 LJFOLD(FLOAD any IRFL_TAB_NODE)
1984 LJFOLD(FLOAD any IRFL_TAB_ASIZE)
1985 LJFOLD(FLOAD any IRFL_TAB_HMASK)
1986 LJFOLDF(fload_tab_ah)
1988 TRef tr = lj_opt_cse(J);
1989 return lj_opt_fwd_tptr(J, tref_ref(tr)) ? tr : EMITFOLD;
1992 /* Strings are immutable, so we can safely FOLD/CSE the related FLOAD. */
1993 LJFOLD(FLOAD KGC IRFL_STR_LEN)
1994 LJFOLDF(fload_str_len_kgc)
1996 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
1997 return INTFOLD((int32_t)ir_kstr(fleft)->len);
1998 return NEXTFOLD;
2001 LJFOLD(FLOAD SNEW IRFL_STR_LEN)
2002 LJFOLDF(fload_str_len_snew)
2004 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
2005 PHIBARRIER(fleft);
2006 return fleft->op2;
2008 return NEXTFOLD;
2011 /* The C type ID of cdata objects is immutable. */
2012 LJFOLD(FLOAD KGC IRFL_CDATA_CTYPEID)
2013 LJFOLDF(fload_cdata_typeid_kgc)
2015 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
2016 return INTFOLD((int32_t)ir_kcdata(fleft)->ctypeid);
2017 return NEXTFOLD;
2020 /* Get the contents of immutable cdata objects. */
2021 LJFOLD(FLOAD KGC IRFL_CDATA_PTR)
2022 LJFOLD(FLOAD KGC IRFL_CDATA_INT)
2023 LJFOLD(FLOAD KGC IRFL_CDATA_INT64)
2024 LJFOLDF(fload_cdata_int64_kgc)
2026 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
2027 void *p = cdataptr(ir_kcdata(fleft));
2028 if (irt_is64(fins->t))
2029 return INT64FOLD(*(uint64_t *)p);
2030 else
2031 return INTFOLD(*(int32_t *)p);
2033 return NEXTFOLD;
2036 LJFOLD(FLOAD CNEW IRFL_CDATA_CTYPEID)
2037 LJFOLD(FLOAD CNEWI IRFL_CDATA_CTYPEID)
2038 LJFOLDF(fload_cdata_typeid_cnew)
2040 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
2041 return fleft->op1; /* No PHI barrier needed. CNEW/CNEWI op1 is const. */
2042 return NEXTFOLD;
2045 /* Pointer, int and int64 cdata objects are immutable. */
2046 LJFOLD(FLOAD CNEWI IRFL_CDATA_PTR)
2047 LJFOLD(FLOAD CNEWI IRFL_CDATA_INT)
2048 LJFOLD(FLOAD CNEWI IRFL_CDATA_INT64)
2049 LJFOLDF(fload_cdata_ptr_int64_cnew)
2051 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
2052 return fleft->op2; /* Fold even across PHI to avoid allocations. */
2053 return NEXTFOLD;
2056 LJFOLD(FLOAD any IRFL_STR_LEN)
2057 LJFOLD(FLOAD any IRFL_CDATA_CTYPEID)
2058 LJFOLD(FLOAD any IRFL_CDATA_PTR)
2059 LJFOLD(FLOAD any IRFL_CDATA_INT)
2060 LJFOLD(FLOAD any IRFL_CDATA_INT64)
2061 LJFOLD(VLOAD any any) /* Vararg loads have no corresponding stores. */
2062 LJFOLDX(lj_opt_cse)
2064 /* All other field loads need alias analysis. */
2065 LJFOLD(FLOAD any any)
2066 LJFOLDX(lj_opt_fwd_fload)
2068 /* This is for LOOP only. Recording handles SLOADs internally. */
2069 LJFOLD(SLOAD any any)
2070 LJFOLDF(fwd_sload)
2072 if ((fins->op2 & IRSLOAD_FRAME)) {
2073 TRef tr = lj_opt_cse(J);
2074 return tref_ref(tr) < J->chain[IR_RETF] ? EMITFOLD : tr;
2075 } else {
2076 lua_assert(J->slot[fins->op1] != 0);
2077 return J->slot[fins->op1];
2081 /* Only fold for KKPTR. The pointer _and_ the contents must be const. */
2082 LJFOLD(XLOAD KKPTR any)
2083 LJFOLDF(xload_kptr)
2085 TRef tr = kfold_xload(J, fins, ir_kptr(fleft));
2086 return tr ? tr : NEXTFOLD;
2089 LJFOLD(XLOAD any any)
2090 LJFOLDX(lj_opt_fwd_xload)
2092 /* -- Write barriers ------------------------------------------------------ */
2094 /* Write barriers are amenable to CSE, but not across any incremental
2095 ** GC steps.
2097 ** The same logic applies to open upvalue references, because a stack
2098 ** may be resized during a GC step (not the current stack, but maybe that
2099 ** of a coroutine).
2101 LJFOLD(TBAR any)
2102 LJFOLD(OBAR any any)
2103 LJFOLD(UREFO any any)
2104 LJFOLDF(barrier_tab)
2106 TRef tr = lj_opt_cse(J);
2107 if (gcstep_barrier(J, tref_ref(tr))) /* CSE across GC step? */
2108 return EMITFOLD; /* Raw emit. Assumes fins is left intact by CSE. */
2109 return tr;
2112 LJFOLD(TBAR TNEW)
2113 LJFOLD(TBAR TDUP)
2114 LJFOLDF(barrier_tnew_tdup)
2116 /* New tables are always white and never need a barrier. */
2117 if (fins->op1 < J->chain[IR_LOOP]) /* Except across a GC step. */
2118 return NEXTFOLD;
2119 return DROPFOLD;
2122 /* -- Stores and allocations ---------------------------------------------- */
2124 /* Stores and allocations cannot be folded or passed on to CSE in general.
2125 ** But some stores can be eliminated with dead-store elimination (DSE).
2127 ** Caveat: *all* stores and allocs must be listed here or they end up at CSE!
2130 LJFOLD(ASTORE any any)
2131 LJFOLD(HSTORE any any)
2132 LJFOLDX(lj_opt_dse_ahstore)
2134 LJFOLD(USTORE any any)
2135 LJFOLDX(lj_opt_dse_ustore)
2137 LJFOLD(FSTORE any any)
2138 LJFOLDX(lj_opt_dse_fstore)
2140 LJFOLD(XSTORE any any)
2141 LJFOLDX(lj_opt_dse_xstore)
2143 LJFOLD(NEWREF any any) /* Treated like a store. */
2144 LJFOLD(CALLS any any)
2145 LJFOLD(CALLL any any) /* Safeguard fallback. */
2146 LJFOLD(CALLXS any any)
2147 LJFOLD(XBAR)
2148 LJFOLD(RETF any any) /* Modifies BASE. */
2149 LJFOLD(TNEW any any)
2150 LJFOLD(TDUP any)
2151 LJFOLD(CNEW any any)
2152 LJFOLD(XSNEW any any)
2153 LJFOLDX(lj_ir_emit)
2155 /* ------------------------------------------------------------------------ */
2157 /* Every entry in the generated hash table is a 32 bit pattern:
2159 ** xxxxxxxx iiiiiii lllllll rrrrrrrrrr
2161 ** xxxxxxxx = 8 bit index into fold function table
2162 ** iiiiiii = 7 bit folded instruction opcode
2163 ** lllllll = 7 bit left instruction opcode
2164 ** rrrrrrrrrr = 8 bit right instruction opcode or 10 bits from literal field
2167 #include "lj_folddef.h"
2169 /* ------------------------------------------------------------------------ */
2171 /* Fold IR instruction. */
2172 TRef LJ_FASTCALL lj_opt_fold(jit_State *J)
2174 uint32_t key, any;
2175 IRRef ref;
2177 if (LJ_UNLIKELY((J->flags & JIT_F_OPT_MASK) != JIT_F_OPT_DEFAULT)) {
2178 lua_assert(((JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE|JIT_F_OPT_DSE) |
2179 JIT_F_OPT_DEFAULT) == JIT_F_OPT_DEFAULT);
2180 /* Folding disabled? Chain to CSE, but not for loads/stores/allocs. */
2181 if (!(J->flags & JIT_F_OPT_FOLD) && irm_kind(lj_ir_mode[fins->o]) == IRM_N)
2182 return lj_opt_cse(J);
2184 /* No FOLD, forwarding or CSE? Emit raw IR for loads, except for SLOAD. */
2185 if ((J->flags & (JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE)) !=
2186 (JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE) &&
2187 irm_kind(lj_ir_mode[fins->o]) == IRM_L && fins->o != IR_SLOAD)
2188 return lj_ir_emit(J);
2190 /* No FOLD or DSE? Emit raw IR for stores. */
2191 if ((J->flags & (JIT_F_OPT_FOLD|JIT_F_OPT_DSE)) !=
2192 (JIT_F_OPT_FOLD|JIT_F_OPT_DSE) &&
2193 irm_kind(lj_ir_mode[fins->o]) == IRM_S)
2194 return lj_ir_emit(J);
2197 /* Fold engine start/retry point. */
2198 retry:
2199 /* Construct key from opcode and operand opcodes (unless literal/none). */
2200 key = ((uint32_t)fins->o << 17);
2201 if (fins->op1 >= J->cur.nk) {
2202 key += (uint32_t)IR(fins->op1)->o << 10;
2203 *fleft = *IR(fins->op1);
2205 if (fins->op2 >= J->cur.nk) {
2206 key += (uint32_t)IR(fins->op2)->o;
2207 *fright = *IR(fins->op2);
2208 } else {
2209 key += (fins->op2 & 0x3ffu); /* Literal mask. Must include IRCONV_*MASK. */
2212 /* Check for a match in order from most specific to least specific. */
2213 any = 0;
2214 for (;;) {
2215 uint32_t k = key | (any & 0x1ffff);
2216 uint32_t h = fold_hashkey(k);
2217 uint32_t fh = fold_hash[h]; /* Lookup key in semi-perfect hash table. */
2218 if ((fh & 0xffffff) == k || (fh = fold_hash[h+1], (fh & 0xffffff) == k)) {
2219 ref = (IRRef)tref_ref(fold_func[fh >> 24](J));
2220 if (ref != NEXTFOLD)
2221 break;
2223 if (any == 0xfffff) /* Exhausted folding. Pass on to CSE. */
2224 return lj_opt_cse(J);
2225 any = (any | (any >> 10)) ^ 0xffc00;
2228 /* Return value processing, ordered by frequency. */
2229 if (LJ_LIKELY(ref >= MAX_FOLD))
2230 return TREF(ref, irt_t(IR(ref)->t));
2231 if (ref == RETRYFOLD)
2232 goto retry;
2233 if (ref == KINTFOLD)
2234 return lj_ir_kint(J, fins->i);
2235 if (ref == FAILFOLD)
2236 lj_trace_err(J, LJ_TRERR_GFAIL);
2237 lua_assert(ref == DROPFOLD);
2238 return REF_DROP;
2241 /* -- Common-Subexpression Elimination ------------------------------------ */
2243 /* CSE an IR instruction. This is very fast due to the skip-list chains. */
2244 TRef LJ_FASTCALL lj_opt_cse(jit_State *J)
2246 /* Avoid narrow to wide store-to-load forwarding stall */
2247 IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16);
2248 IROp op = fins->o;
2249 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
2250 /* Limited search for same operands in per-opcode chain. */
2251 IRRef ref = J->chain[op];
2252 IRRef lim = fins->op1;
2253 if (fins->op2 > lim) lim = fins->op2; /* Relies on lit < REF_BIAS. */
2254 while (ref > lim) {
2255 if (IR(ref)->op12 == op12)
2256 return TREF(ref, irt_t(IR(ref)->t)); /* Common subexpression found. */
2257 ref = IR(ref)->prev;
2260 /* Otherwise emit IR (inlined for speed). */
2262 IRRef ref = lj_ir_nextins(J);
2263 IRIns *ir = IR(ref);
2264 ir->prev = J->chain[op];
2265 ir->op12 = op12;
2266 J->chain[op] = (IRRef1)ref;
2267 ir->o = fins->o;
2268 J->guardemit.irt |= fins->t.irt;
2269 return TREF(ref, irt_t((ir->t = fins->t)));
2273 /* CSE with explicit search limit. */
2274 TRef LJ_FASTCALL lj_opt_cselim(jit_State *J, IRRef lim)
2276 IRRef ref = J->chain[fins->o];
2277 IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16);
2278 while (ref > lim) {
2279 if (IR(ref)->op12 == op12)
2280 return ref;
2281 ref = IR(ref)->prev;
2283 return lj_ir_emit(J);
2286 /* ------------------------------------------------------------------------ */
2288 #undef IR
2289 #undef fins
2290 #undef fleft
2291 #undef fright
2292 #undef knumleft
2293 #undef knumright
2294 #undef emitir
2296 #endif