From Lua 5.2: Add table.pack(). Needs -DLUAJIT_ENABLE_LUA52COMPAT.
[luajit-2.0/celess22.git] / src / lj_opt_fold.c
blobc1e30511c31affe5f410d14a6424a2220fd5bc8a
1 /*
2 ** FOLD: Constant Folding, Algebraic Simplifications and Reassociation.
3 ** ABCelim: Array Bounds Check Elimination.
4 ** CSE: Common-Subexpression Elimination.
5 ** Copyright (C) 2005-2012 Mike Pall. See Copyright Notice in luajit.h
6 */
8 #define lj_opt_fold_c
9 #define LUA_CORE
11 #include <math.h>
13 #include "lj_obj.h"
15 #if LJ_HASJIT
17 #include "lj_str.h"
18 #include "lj_tab.h"
19 #include "lj_ir.h"
20 #include "lj_jit.h"
21 #include "lj_iropt.h"
22 #include "lj_trace.h"
23 #if LJ_HASFFI
24 #include "lj_ctype.h"
25 #endif
26 #include "lj_carith.h"
27 #include "lj_vm.h"
28 #include "lj_strscan.h"
30 /* Here's a short description how the FOLD engine processes instructions:
32 ** The FOLD engine receives a single instruction stored in fins (J->fold.ins).
33 ** The instruction and its operands are used to select matching fold rules.
34 ** These are applied iteratively until a fixed point is reached.
36 ** The 8 bit opcode of the instruction itself plus the opcodes of the
37 ** two instructions referenced by its operands form a 24 bit key
38 ** 'ins left right' (unused operands -> 0, literals -> lowest 8 bits).
40 ** This key is used for partial matching against the fold rules. The
41 ** left/right operand fields of the key are successively masked with
42 ** the 'any' wildcard, from most specific to least specific:
44 ** ins left right
45 ** ins any right
46 ** ins left any
47 ** ins any any
49 ** The masked key is used to lookup a matching fold rule in a semi-perfect
50 ** hash table. If a matching rule is found, the related fold function is run.
51 ** Multiple rules can share the same fold function. A fold rule may return
52 ** one of several special values:
54 ** - NEXTFOLD means no folding was applied, because an additional test
55 ** inside the fold function failed. Matching continues against less
56 ** specific fold rules. Finally the instruction is passed on to CSE.
58 ** - RETRYFOLD means the instruction was modified in-place. Folding is
59 ** retried as if this instruction had just been received.
61 ** All other return values are terminal actions -- no further folding is
62 ** applied:
64 ** - INTFOLD(i) returns a reference to the integer constant i.
66 ** - LEFTFOLD and RIGHTFOLD return the left/right operand reference
67 ** without emitting an instruction.
69 ** - CSEFOLD and EMITFOLD pass the instruction directly to CSE or emit
70 ** it without passing through any further optimizations.
72 ** - FAILFOLD, DROPFOLD and CONDFOLD only apply to instructions which have
73 ** no result (e.g. guarded assertions): FAILFOLD means the guard would
74 ** always fail, i.e. the current trace is pointless. DROPFOLD means
75 ** the guard is always true and has been eliminated. CONDFOLD is a
76 ** shortcut for FAILFOLD + cond (i.e. drop if true, otherwise fail).
78 ** - Any other return value is interpreted as an IRRef or TRef. This
79 ** can be a reference to an existing or a newly created instruction.
80 ** Only the least-significant 16 bits (IRRef1) are used to form a TRef
81 ** which is finally returned to the caller.
83 ** The FOLD engine receives instructions both from the trace recorder and
84 ** substituted instructions from LOOP unrolling. This means all types
85 ** of instructions may end up here, even though the recorder bypasses
86 ** FOLD in some cases. Thus all loads, stores and allocations must have
87 ** an any/any rule to avoid being passed on to CSE.
89 ** Carefully read the following requirements before adding or modifying
90 ** any fold rules:
92 ** Requirement #1: All fold rules must preserve their destination type.
94 ** Consistently use INTFOLD() (KINT result) or lj_ir_knum() (KNUM result).
95 ** Never use lj_ir_knumint() which can have either a KINT or KNUM result.
97 ** Requirement #2: Fold rules should not create *new* instructions which
98 ** reference operands *across* PHIs.
100 ** E.g. a RETRYFOLD with 'fins->op1 = fleft->op1' is invalid if the
101 ** left operand is a PHI. Then fleft->op1 would point across the PHI
102 ** frontier to an invariant instruction. Adding a PHI for this instruction
103 ** would be counterproductive. The solution is to add a barrier which
104 ** prevents folding across PHIs, i.e. 'PHIBARRIER(fleft)' in this case.
105 ** The only exception is for recurrences with high latencies like
106 ** repeated int->num->int conversions.
108 ** One could relax this condition a bit if the referenced instruction is
109 ** a PHI, too. But this often leads to worse code due to excessive
110 ** register shuffling.
112 ** Note: returning *existing* instructions (e.g. LEFTFOLD) is ok, though.
113 ** Even returning fleft->op1 would be ok, because a new PHI will added,
114 ** if needed. But again, this leads to excessive register shuffling and
115 ** should be avoided.
117 ** Requirement #3: The set of all fold rules must be monotonic to guarantee
118 ** termination.
120 ** The goal is optimization, so one primarily wants to add strength-reducing
121 ** rules. This means eliminating an instruction or replacing an instruction
122 ** with one or more simpler instructions. Don't add fold rules which point
123 ** into the other direction.
125 ** Some rules (like commutativity) do not directly reduce the strength of
126 ** an instruction, but enable other fold rules (e.g. by moving constants
127 ** to the right operand). These rules must be made unidirectional to avoid
128 ** cycles.
130 ** Rule of thumb: the trace recorder expands the IR and FOLD shrinks it.
133 /* Some local macros to save typing. Undef'd at the end. */
134 #define IR(ref) (&J->cur.ir[(ref)])
135 #define fins (&J->fold.ins)
136 #define fleft (&J->fold.left)
137 #define fright (&J->fold.right)
138 #define knumleft (ir_knum(fleft)->n)
139 #define knumright (ir_knum(fright)->n)
141 /* Pass IR on to next optimization in chain (FOLD). */
142 #define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
144 /* Fold function type. Fastcall on x86 significantly reduces their size. */
145 typedef IRRef (LJ_FASTCALL *FoldFunc)(jit_State *J);
147 /* Macros for the fold specs, so buildvm can recognize them. */
148 #define LJFOLD(x)
149 #define LJFOLDX(x)
150 #define LJFOLDF(name) static TRef LJ_FASTCALL fold_##name(jit_State *J)
151 /* Note: They must be at the start of a line or buildvm ignores them! */
153 /* Barrier to prevent using operands across PHIs. */
154 #define PHIBARRIER(ir) if (irt_isphi((ir)->t)) return NEXTFOLD
156 /* Barrier to prevent folding across a GC step.
157 ** GC steps can only happen at the head of a trace and at LOOP.
158 ** And the GC is only driven forward if there is at least one allocation.
160 #define gcstep_barrier(J, ref) \
161 ((ref) < J->chain[IR_LOOP] && \
162 (J->chain[IR_SNEW] || J->chain[IR_XSNEW] || \
163 J->chain[IR_TNEW] || J->chain[IR_TDUP] || \
164 J->chain[IR_CNEW] || J->chain[IR_CNEWI] || J->chain[IR_TOSTR]))
166 /* -- Constant folding for FP numbers ------------------------------------- */
168 LJFOLD(ADD KNUM KNUM)
169 LJFOLD(SUB KNUM KNUM)
170 LJFOLD(MUL KNUM KNUM)
171 LJFOLD(DIV KNUM KNUM)
172 LJFOLD(NEG KNUM KNUM)
173 LJFOLD(ABS KNUM KNUM)
174 LJFOLD(ATAN2 KNUM KNUM)
175 LJFOLD(LDEXP KNUM KNUM)
176 LJFOLD(MIN KNUM KNUM)
177 LJFOLD(MAX KNUM KNUM)
178 LJFOLDF(kfold_numarith)
180 lua_Number a = knumleft;
181 lua_Number b = knumright;
182 lua_Number y = lj_vm_foldarith(a, b, fins->o - IR_ADD);
183 return lj_ir_knum(J, y);
186 LJFOLD(LDEXP KNUM KINT)
187 LJFOLDF(kfold_ldexp)
189 #if LJ_TARGET_X86ORX64
190 UNUSED(J);
191 return NEXTFOLD;
192 #else
193 return lj_ir_knum(J, ldexp(knumleft, fright->i));
194 #endif
197 LJFOLD(FPMATH KNUM any)
198 LJFOLDF(kfold_fpmath)
200 lua_Number a = knumleft;
201 lua_Number y = lj_vm_foldfpm(a, fins->op2);
202 return lj_ir_knum(J, y);
205 LJFOLD(POW KNUM KINT)
206 LJFOLDF(kfold_numpow)
208 lua_Number a = knumleft;
209 lua_Number b = (lua_Number)fright->i;
210 lua_Number y = lj_vm_foldarith(a, b, IR_POW - IR_ADD);
211 return lj_ir_knum(J, y);
214 /* Must not use kfold_kref for numbers (could be NaN). */
215 LJFOLD(EQ KNUM KNUM)
216 LJFOLD(NE KNUM KNUM)
217 LJFOLD(LT KNUM KNUM)
218 LJFOLD(GE KNUM KNUM)
219 LJFOLD(LE KNUM KNUM)
220 LJFOLD(GT KNUM KNUM)
221 LJFOLD(ULT KNUM KNUM)
222 LJFOLD(UGE KNUM KNUM)
223 LJFOLD(ULE KNUM KNUM)
224 LJFOLD(UGT KNUM KNUM)
225 LJFOLDF(kfold_numcomp)
227 return CONDFOLD(lj_ir_numcmp(knumleft, knumright, (IROp)fins->o));
230 /* -- Constant folding for 32 bit integers -------------------------------- */
232 static int32_t kfold_intop(int32_t k1, int32_t k2, IROp op)
234 switch (op) {
235 case IR_ADD: k1 += k2; break;
236 case IR_SUB: k1 -= k2; break;
237 case IR_MUL: k1 *= k2; break;
238 case IR_MOD: k1 = lj_vm_modi(k1, k2); break;
239 case IR_NEG: k1 = -k1; break;
240 case IR_BAND: k1 &= k2; break;
241 case IR_BOR: k1 |= k2; break;
242 case IR_BXOR: k1 ^= k2; break;
243 case IR_BSHL: k1 <<= (k2 & 31); break;
244 case IR_BSHR: k1 = (int32_t)((uint32_t)k1 >> (k2 & 31)); break;
245 case IR_BSAR: k1 >>= (k2 & 31); break;
246 case IR_BROL: k1 = (int32_t)lj_rol((uint32_t)k1, (k2 & 31)); break;
247 case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 31)); break;
248 case IR_MIN: k1 = k1 < k2 ? k1 : k2; break;
249 case IR_MAX: k1 = k1 > k2 ? k1 : k2; break;
250 default: lua_assert(0); break;
252 return k1;
255 LJFOLD(ADD KINT KINT)
256 LJFOLD(SUB KINT KINT)
257 LJFOLD(MUL KINT KINT)
258 LJFOLD(MOD KINT KINT)
259 LJFOLD(NEG KINT KINT)
260 LJFOLD(BAND KINT KINT)
261 LJFOLD(BOR KINT KINT)
262 LJFOLD(BXOR KINT KINT)
263 LJFOLD(BSHL KINT KINT)
264 LJFOLD(BSHR KINT KINT)
265 LJFOLD(BSAR KINT KINT)
266 LJFOLD(BROL KINT KINT)
267 LJFOLD(BROR KINT KINT)
268 LJFOLD(MIN KINT KINT)
269 LJFOLD(MAX KINT KINT)
270 LJFOLDF(kfold_intarith)
272 return INTFOLD(kfold_intop(fleft->i, fright->i, (IROp)fins->o));
275 LJFOLD(ADDOV KINT KINT)
276 LJFOLD(SUBOV KINT KINT)
277 LJFOLD(MULOV KINT KINT)
278 LJFOLDF(kfold_intovarith)
280 lua_Number n = lj_vm_foldarith((lua_Number)fleft->i, (lua_Number)fright->i,
281 fins->o - IR_ADDOV);
282 int32_t k = lj_num2int(n);
283 if (n != (lua_Number)k)
284 return FAILFOLD;
285 return INTFOLD(k);
288 LJFOLD(BNOT KINT)
289 LJFOLDF(kfold_bnot)
291 return INTFOLD(~fleft->i);
294 LJFOLD(BSWAP KINT)
295 LJFOLDF(kfold_bswap)
297 return INTFOLD((int32_t)lj_bswap((uint32_t)fleft->i));
300 LJFOLD(LT KINT KINT)
301 LJFOLD(GE KINT KINT)
302 LJFOLD(LE KINT KINT)
303 LJFOLD(GT KINT KINT)
304 LJFOLD(ULT KINT KINT)
305 LJFOLD(UGE KINT KINT)
306 LJFOLD(ULE KINT KINT)
307 LJFOLD(UGT KINT KINT)
308 LJFOLD(ABC KINT KINT)
309 LJFOLDF(kfold_intcomp)
311 int32_t a = fleft->i, b = fright->i;
312 switch ((IROp)fins->o) {
313 case IR_LT: return CONDFOLD(a < b);
314 case IR_GE: return CONDFOLD(a >= b);
315 case IR_LE: return CONDFOLD(a <= b);
316 case IR_GT: return CONDFOLD(a > b);
317 case IR_ULT: return CONDFOLD((uint32_t)a < (uint32_t)b);
318 case IR_UGE: return CONDFOLD((uint32_t)a >= (uint32_t)b);
319 case IR_ULE: return CONDFOLD((uint32_t)a <= (uint32_t)b);
320 case IR_ABC:
321 case IR_UGT: return CONDFOLD((uint32_t)a > (uint32_t)b);
322 default: lua_assert(0); return FAILFOLD;
326 LJFOLD(UGE any KINT)
327 LJFOLDF(kfold_intcomp0)
329 if (fright->i == 0)
330 return DROPFOLD;
331 return NEXTFOLD;
334 /* -- Constant folding for 64 bit integers -------------------------------- */
336 static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op)
338 switch (op) {
339 #if LJ_64 || LJ_HASFFI
340 case IR_ADD: k1 += k2; break;
341 case IR_SUB: k1 -= k2; break;
342 #endif
343 #if LJ_HASFFI
344 case IR_MUL: k1 *= k2; break;
345 case IR_BAND: k1 &= k2; break;
346 case IR_BOR: k1 |= k2; break;
347 case IR_BXOR: k1 ^= k2; break;
348 #endif
349 default: UNUSED(k2); lua_assert(0); break;
351 return k1;
354 LJFOLD(ADD KINT64 KINT64)
355 LJFOLD(SUB KINT64 KINT64)
356 LJFOLD(MUL KINT64 KINT64)
357 LJFOLD(BAND KINT64 KINT64)
358 LJFOLD(BOR KINT64 KINT64)
359 LJFOLD(BXOR KINT64 KINT64)
360 LJFOLDF(kfold_int64arith)
362 return INT64FOLD(kfold_int64arith(ir_k64(fleft)->u64,
363 ir_k64(fright)->u64, (IROp)fins->o));
366 LJFOLD(DIV KINT64 KINT64)
367 LJFOLD(MOD KINT64 KINT64)
368 LJFOLD(POW KINT64 KINT64)
369 LJFOLDF(kfold_int64arith2)
371 #if LJ_HASFFI
372 uint64_t k1 = ir_k64(fleft)->u64, k2 = ir_k64(fright)->u64;
373 if (irt_isi64(fins->t)) {
374 k1 = fins->o == IR_DIV ? lj_carith_divi64((int64_t)k1, (int64_t)k2) :
375 fins->o == IR_MOD ? lj_carith_modi64((int64_t)k1, (int64_t)k2) :
376 lj_carith_powi64((int64_t)k1, (int64_t)k2);
377 } else {
378 k1 = fins->o == IR_DIV ? lj_carith_divu64(k1, k2) :
379 fins->o == IR_MOD ? lj_carith_modu64(k1, k2) :
380 lj_carith_powu64(k1, k2);
382 return INT64FOLD(k1);
383 #else
384 UNUSED(J); lua_assert(0); return FAILFOLD;
385 #endif
388 LJFOLD(BSHL KINT64 KINT)
389 LJFOLD(BSHR KINT64 KINT)
390 LJFOLD(BSAR KINT64 KINT)
391 LJFOLD(BROL KINT64 KINT)
392 LJFOLD(BROR KINT64 KINT)
393 LJFOLDF(kfold_int64shift)
395 #if LJ_HASFFI || LJ_64
396 uint64_t k = ir_k64(fleft)->u64;
397 int32_t sh = (fright->i & 63);
398 switch ((IROp)fins->o) {
399 case IR_BSHL: k <<= sh; break;
400 #if LJ_HASFFI
401 case IR_BSHR: k >>= sh; break;
402 case IR_BSAR: k = (uint64_t)((int64_t)k >> sh); break;
403 case IR_BROL: k = lj_rol(k, sh); break;
404 case IR_BROR: k = lj_ror(k, sh); break;
405 #endif
406 default: lua_assert(0); break;
408 return INT64FOLD(k);
409 #else
410 UNUSED(J); lua_assert(0); return FAILFOLD;
411 #endif
414 LJFOLD(BNOT KINT64)
415 LJFOLDF(kfold_bnot64)
417 #if LJ_HASFFI
418 return INT64FOLD(~ir_k64(fleft)->u64);
419 #else
420 UNUSED(J); lua_assert(0); return FAILFOLD;
421 #endif
424 LJFOLD(BSWAP KINT64)
425 LJFOLDF(kfold_bswap64)
427 #if LJ_HASFFI
428 return INT64FOLD(lj_bswap64(ir_k64(fleft)->u64));
429 #else
430 UNUSED(J); lua_assert(0); return FAILFOLD;
431 #endif
434 LJFOLD(LT KINT64 KINT64)
435 LJFOLD(GE KINT64 KINT64)
436 LJFOLD(LE KINT64 KINT64)
437 LJFOLD(GT KINT64 KINT64)
438 LJFOLD(ULT KINT64 KINT64)
439 LJFOLD(UGE KINT64 KINT64)
440 LJFOLD(ULE KINT64 KINT64)
441 LJFOLD(UGT KINT64 KINT64)
442 LJFOLDF(kfold_int64comp)
444 #if LJ_HASFFI
445 uint64_t a = ir_k64(fleft)->u64, b = ir_k64(fright)->u64;
446 switch ((IROp)fins->o) {
447 case IR_LT: return CONDFOLD(a < b);
448 case IR_GE: return CONDFOLD(a >= b);
449 case IR_LE: return CONDFOLD(a <= b);
450 case IR_GT: return CONDFOLD(a > b);
451 case IR_ULT: return CONDFOLD((uint64_t)a < (uint64_t)b);
452 case IR_UGE: return CONDFOLD((uint64_t)a >= (uint64_t)b);
453 case IR_ULE: return CONDFOLD((uint64_t)a <= (uint64_t)b);
454 case IR_UGT: return CONDFOLD((uint64_t)a > (uint64_t)b);
455 default: lua_assert(0); return FAILFOLD;
457 #else
458 UNUSED(J); lua_assert(0); return FAILFOLD;
459 #endif
462 LJFOLD(UGE any KINT64)
463 LJFOLDF(kfold_int64comp0)
465 #if LJ_HASFFI
466 if (ir_k64(fright)->u64 == 0)
467 return DROPFOLD;
468 return NEXTFOLD;
469 #else
470 UNUSED(J); lua_assert(0); return FAILFOLD;
471 #endif
474 /* -- Constant folding for strings ---------------------------------------- */
476 LJFOLD(SNEW KKPTR KINT)
477 LJFOLDF(kfold_snew_kptr)
479 GCstr *s = lj_str_new(J->L, (const char *)ir_kptr(fleft), (size_t)fright->i);
480 return lj_ir_kstr(J, s);
483 LJFOLD(SNEW any KINT)
484 LJFOLDF(kfold_snew_empty)
486 if (fright->i == 0)
487 return lj_ir_kstr(J, &J2G(J)->strempty);
488 return NEXTFOLD;
491 LJFOLD(STRREF KGC KINT)
492 LJFOLDF(kfold_strref)
494 GCstr *str = ir_kstr(fleft);
495 lua_assert((MSize)fright->i < str->len);
496 return lj_ir_kkptr(J, (char *)strdata(str) + fright->i);
499 LJFOLD(STRREF SNEW any)
500 LJFOLDF(kfold_strref_snew)
502 PHIBARRIER(fleft);
503 if (irref_isk(fins->op2) && fright->i == 0) {
504 return fleft->op1; /* strref(snew(ptr, len), 0) ==> ptr */
505 } else {
506 /* Reassociate: strref(snew(strref(str, a), len), b) ==> strref(str, a+b) */
507 IRIns *ir = IR(fleft->op1);
508 IRRef1 str = ir->op1; /* IRIns * is not valid across emitir. */
509 lua_assert(ir->o == IR_STRREF);
510 PHIBARRIER(ir);
511 fins->op2 = emitir(IRTI(IR_ADD), ir->op2, fins->op2); /* Clobbers fins! */
512 fins->op1 = str;
513 fins->ot = IRT(IR_STRREF, IRT_P32);
514 return RETRYFOLD;
516 return NEXTFOLD;
519 LJFOLD(CALLN CARG IRCALL_lj_str_cmp)
520 LJFOLDF(kfold_strcmp)
522 if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) {
523 GCstr *a = ir_kstr(IR(fleft->op1));
524 GCstr *b = ir_kstr(IR(fleft->op2));
525 return INTFOLD(lj_str_cmp(a, b));
527 return NEXTFOLD;
530 /* -- Constant folding of pointer arithmetic ------------------------------ */
532 LJFOLD(ADD KGC KINT)
533 LJFOLD(ADD KGC KINT64)
534 LJFOLDF(kfold_add_kgc)
536 GCobj *o = ir_kgc(fleft);
537 #if LJ_64
538 ptrdiff_t ofs = (ptrdiff_t)ir_kint64(fright)->u64;
539 #else
540 ptrdiff_t ofs = fright->i;
541 #endif
542 #if LJ_HASFFI
543 if (irt_iscdata(fleft->t)) {
544 CType *ct = ctype_raw(ctype_ctsG(J2G(J)), gco2cd(o)->ctypeid);
545 if (ctype_isnum(ct->info) || ctype_isenum(ct->info) ||
546 ctype_isptr(ct->info) || ctype_isfunc(ct->info) ||
547 ctype_iscomplex(ct->info) || ctype_isvector(ct->info))
548 return lj_ir_kkptr(J, (char *)o + ofs);
550 #endif
551 return lj_ir_kptr(J, (char *)o + ofs);
554 LJFOLD(ADD KPTR KINT)
555 LJFOLD(ADD KPTR KINT64)
556 LJFOLD(ADD KKPTR KINT)
557 LJFOLD(ADD KKPTR KINT64)
558 LJFOLDF(kfold_add_kptr)
560 void *p = ir_kptr(fleft);
561 #if LJ_64
562 ptrdiff_t ofs = (ptrdiff_t)ir_kint64(fright)->u64;
563 #else
564 ptrdiff_t ofs = fright->i;
565 #endif
566 return lj_ir_kptr_(J, fleft->o, (char *)p + ofs);
569 LJFOLD(ADD any KGC)
570 LJFOLD(ADD any KPTR)
571 LJFOLD(ADD any KKPTR)
572 LJFOLDF(kfold_add_kright)
574 if (fleft->o == IR_KINT || fleft->o == IR_KINT64) {
575 IRRef1 tmp = fins->op1; fins->op1 = fins->op2; fins->op2 = tmp;
576 return RETRYFOLD;
578 return NEXTFOLD;
581 /* -- Constant folding of conversions ------------------------------------- */
583 LJFOLD(TOBIT KNUM KNUM)
584 LJFOLDF(kfold_tobit)
586 return INTFOLD(lj_num2bit(knumleft));
589 LJFOLD(CONV KINT IRCONV_NUM_INT)
590 LJFOLDF(kfold_conv_kint_num)
592 return lj_ir_knum(J, (lua_Number)fleft->i);
595 LJFOLD(CONV KINT IRCONV_NUM_U32)
596 LJFOLDF(kfold_conv_kintu32_num)
598 return lj_ir_knum(J, (lua_Number)(uint32_t)fleft->i);
601 LJFOLD(CONV KINT IRCONV_I64_INT)
602 LJFOLD(CONV KINT IRCONV_U64_INT)
603 LJFOLD(CONV KINT IRCONV_I64_U32)
604 LJFOLD(CONV KINT IRCONV_U64_U32)
605 LJFOLDF(kfold_conv_kint_i64)
607 if ((fins->op2 & IRCONV_SEXT))
608 return INT64FOLD((uint64_t)(int64_t)fleft->i);
609 else
610 return INT64FOLD((uint64_t)(int64_t)(uint32_t)fleft->i);
613 LJFOLD(CONV KINT64 IRCONV_NUM_I64)
614 LJFOLDF(kfold_conv_kint64_num_i64)
616 return lj_ir_knum(J, (lua_Number)(int64_t)ir_kint64(fleft)->u64);
619 LJFOLD(CONV KINT64 IRCONV_NUM_U64)
620 LJFOLDF(kfold_conv_kint64_num_u64)
622 return lj_ir_knum(J, (lua_Number)ir_kint64(fleft)->u64);
625 LJFOLD(CONV KINT64 IRCONV_INT_I64)
626 LJFOLD(CONV KINT64 IRCONV_U32_I64)
627 LJFOLDF(kfold_conv_kint64_int_i64)
629 return INTFOLD((int32_t)ir_kint64(fleft)->u64);
632 LJFOLD(CONV KNUM IRCONV_INT_NUM)
633 LJFOLDF(kfold_conv_knum_int_num)
635 lua_Number n = knumleft;
636 if (!(fins->op2 & IRCONV_TRUNC)) {
637 int32_t k = lj_num2int(n);
638 if (irt_isguard(fins->t) && n != (lua_Number)k) {
639 /* We're about to create a guard which always fails, like CONV +1.5.
640 ** Some pathological loops cause this during LICM, e.g.:
641 ** local x,k,t = 0,1.5,{1,[1.5]=2}
642 ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end
643 ** assert(x == 300)
645 return FAILFOLD;
647 return INTFOLD(k);
648 } else {
649 return INTFOLD((int32_t)n);
653 LJFOLD(CONV KNUM IRCONV_U32_NUM)
654 LJFOLDF(kfold_conv_knum_u32_num)
656 lua_assert((fins->op2 & IRCONV_TRUNC));
657 #ifdef _MSC_VER
658 { /* Workaround for MSVC bug. */
659 volatile uint32_t u = (uint32_t)knumleft;
660 return INTFOLD((int32_t)u);
662 #else
663 return INTFOLD((int32_t)(uint32_t)knumleft);
664 #endif
667 LJFOLD(CONV KNUM IRCONV_I64_NUM)
668 LJFOLDF(kfold_conv_knum_i64_num)
670 lua_assert((fins->op2 & IRCONV_TRUNC));
671 return INT64FOLD((uint64_t)(int64_t)knumleft);
674 LJFOLD(CONV KNUM IRCONV_U64_NUM)
675 LJFOLDF(kfold_conv_knum_u64_num)
677 lua_assert((fins->op2 & IRCONV_TRUNC));
678 return INT64FOLD(lj_num2u64(knumleft));
681 LJFOLD(TOSTR KNUM)
682 LJFOLDF(kfold_tostr_knum)
684 return lj_ir_kstr(J, lj_str_fromnum(J->L, &knumleft));
687 LJFOLD(TOSTR KINT)
688 LJFOLDF(kfold_tostr_kint)
690 return lj_ir_kstr(J, lj_str_fromint(J->L, fleft->i));
693 LJFOLD(STRTO KGC)
694 LJFOLDF(kfold_strto)
696 TValue n;
697 if (lj_strscan_num(ir_kstr(fleft), &n))
698 return lj_ir_knum(J, numV(&n));
699 return FAILFOLD;
702 /* -- Constant folding of equality checks --------------------------------- */
704 /* Don't constant-fold away FLOAD checks against KNULL. */
705 LJFOLD(EQ FLOAD KNULL)
706 LJFOLD(NE FLOAD KNULL)
707 LJFOLDX(lj_opt_cse)
709 /* But fold all other KNULL compares, since only KNULL is equal to KNULL. */
710 LJFOLD(EQ any KNULL)
711 LJFOLD(NE any KNULL)
712 LJFOLD(EQ KNULL any)
713 LJFOLD(NE KNULL any)
714 LJFOLD(EQ KINT KINT) /* Constants are unique, so same refs <==> same value. */
715 LJFOLD(NE KINT KINT)
716 LJFOLD(EQ KINT64 KINT64)
717 LJFOLD(NE KINT64 KINT64)
718 LJFOLD(EQ KGC KGC)
719 LJFOLD(NE KGC KGC)
720 LJFOLDF(kfold_kref)
722 return CONDFOLD((fins->op1 == fins->op2) ^ (fins->o == IR_NE));
725 /* -- Algebraic shortcuts ------------------------------------------------- */
727 LJFOLD(FPMATH FPMATH IRFPM_FLOOR)
728 LJFOLD(FPMATH FPMATH IRFPM_CEIL)
729 LJFOLD(FPMATH FPMATH IRFPM_TRUNC)
730 LJFOLDF(shortcut_round)
732 IRFPMathOp op = (IRFPMathOp)fleft->op2;
733 if (op == IRFPM_FLOOR || op == IRFPM_CEIL || op == IRFPM_TRUNC)
734 return LEFTFOLD; /* round(round_left(x)) = round_left(x) */
735 return NEXTFOLD;
738 LJFOLD(ABS ABS KNUM)
739 LJFOLDF(shortcut_left)
741 return LEFTFOLD; /* f(g(x)) ==> g(x) */
744 LJFOLD(ABS NEG KNUM)
745 LJFOLDF(shortcut_dropleft)
747 PHIBARRIER(fleft);
748 fins->op1 = fleft->op1; /* abs(neg(x)) ==> abs(x) */
749 return RETRYFOLD;
752 /* Note: no safe shortcuts with STRTO and TOSTR ("1e2" ==> +100 ==> "100"). */
753 LJFOLD(NEG NEG any)
754 LJFOLD(BNOT BNOT)
755 LJFOLD(BSWAP BSWAP)
756 LJFOLDF(shortcut_leftleft)
758 PHIBARRIER(fleft); /* See above. Fold would be ok, but not beneficial. */
759 return fleft->op1; /* f(g(x)) ==> x */
762 /* -- FP algebraic simplifications ---------------------------------------- */
764 /* FP arithmetic is tricky -- there's not much to simplify.
765 ** Please note the following common pitfalls before sending "improvements":
766 ** x+0 ==> x is INVALID for x=-0
767 ** 0-x ==> -x is INVALID for x=+0
768 ** x*0 ==> 0 is INVALID for x=-0, x=+-Inf or x=NaN
771 LJFOLD(ADD NEG any)
772 LJFOLDF(simplify_numadd_negx)
774 PHIBARRIER(fleft);
775 fins->o = IR_SUB; /* (-a) + b ==> b - a */
776 fins->op1 = fins->op2;
777 fins->op2 = fleft->op1;
778 return RETRYFOLD;
781 LJFOLD(ADD any NEG)
782 LJFOLDF(simplify_numadd_xneg)
784 PHIBARRIER(fright);
785 fins->o = IR_SUB; /* a + (-b) ==> a - b */
786 fins->op2 = fright->op1;
787 return RETRYFOLD;
790 LJFOLD(SUB any KNUM)
791 LJFOLDF(simplify_numsub_k)
793 lua_Number n = knumright;
794 if (n == 0.0) /* x - (+-0) ==> x */
795 return LEFTFOLD;
796 return NEXTFOLD;
799 LJFOLD(SUB NEG KNUM)
800 LJFOLDF(simplify_numsub_negk)
802 PHIBARRIER(fleft);
803 fins->op2 = fleft->op1; /* (-x) - k ==> (-k) - x */
804 fins->op1 = (IRRef1)lj_ir_knum(J, -knumright);
805 return RETRYFOLD;
808 LJFOLD(SUB any NEG)
809 LJFOLDF(simplify_numsub_xneg)
811 PHIBARRIER(fright);
812 fins->o = IR_ADD; /* a - (-b) ==> a + b */
813 fins->op2 = fright->op1;
814 return RETRYFOLD;
817 LJFOLD(MUL any KNUM)
818 LJFOLD(DIV any KNUM)
819 LJFOLDF(simplify_nummuldiv_k)
821 lua_Number n = knumright;
822 if (n == 1.0) { /* x o 1 ==> x */
823 return LEFTFOLD;
824 } else if (n == -1.0) { /* x o -1 ==> -x */
825 fins->o = IR_NEG;
826 fins->op2 = (IRRef1)lj_ir_knum_neg(J);
827 return RETRYFOLD;
828 } else if (fins->o == IR_MUL && n == 2.0) { /* x * 2 ==> x + x */
829 fins->o = IR_ADD;
830 fins->op2 = fins->op1;
831 return RETRYFOLD;
832 } else if (fins->o == IR_DIV) { /* x / 2^k ==> x * 2^-k */
833 uint64_t u = ir_knum(fright)->u64;
834 uint32_t ex = ((uint32_t)(u >> 52) & 0x7ff);
835 if ((u & U64x(000fffff,ffffffff)) == 0 && ex - 1 < 0x7fd) {
836 u = (u & ((uint64_t)1 << 63)) | ((uint64_t)(0x7fe - ex) << 52);
837 fins->o = IR_MUL; /* Multiply by exact reciprocal. */
838 fins->op2 = lj_ir_knum_u64(J, u);
839 return RETRYFOLD;
842 return NEXTFOLD;
845 LJFOLD(MUL NEG KNUM)
846 LJFOLD(DIV NEG KNUM)
847 LJFOLDF(simplify_nummuldiv_negk)
849 PHIBARRIER(fleft);
850 fins->op1 = fleft->op1; /* (-a) o k ==> a o (-k) */
851 fins->op2 = (IRRef1)lj_ir_knum(J, -knumright);
852 return RETRYFOLD;
855 LJFOLD(MUL NEG NEG)
856 LJFOLD(DIV NEG NEG)
857 LJFOLDF(simplify_nummuldiv_negneg)
859 PHIBARRIER(fleft);
860 PHIBARRIER(fright);
861 fins->op1 = fleft->op1; /* (-a) o (-b) ==> a o b */
862 fins->op2 = fright->op1;
863 return RETRYFOLD;
866 LJFOLD(POW any KINT)
867 LJFOLDF(simplify_numpow_xk)
869 int32_t k = fright->i;
870 TRef ref = fins->op1;
871 if (k == 0) /* x ^ 0 ==> 1 */
872 return lj_ir_knum_one(J); /* Result must be a number, not an int. */
873 if (k == 1) /* x ^ 1 ==> x */
874 return LEFTFOLD;
875 if ((uint32_t)(k+65536) > 2*65536u) /* Limit code explosion. */
876 return NEXTFOLD;
877 if (k < 0) { /* x ^ (-k) ==> (1/x) ^ k. */
878 ref = emitir(IRTN(IR_DIV), lj_ir_knum_one(J), ref);
879 k = -k;
881 /* Unroll x^k for 1 <= k <= 65536. */
882 for (; (k & 1) == 0; k >>= 1) /* Handle leading zeros. */
883 ref = emitir(IRTN(IR_MUL), ref, ref);
884 if ((k >>= 1) != 0) { /* Handle trailing bits. */
885 TRef tmp = emitir(IRTN(IR_MUL), ref, ref);
886 for (; k != 1; k >>= 1) {
887 if (k & 1)
888 ref = emitir(IRTN(IR_MUL), ref, tmp);
889 tmp = emitir(IRTN(IR_MUL), tmp, tmp);
891 ref = emitir(IRTN(IR_MUL), ref, tmp);
893 return ref;
896 LJFOLD(POW KNUM any)
897 LJFOLDF(simplify_numpow_kx)
899 lua_Number n = knumleft;
900 if (n == 2.0) { /* 2.0 ^ i ==> ldexp(1.0, tonum(i)) */
901 fins->o = IR_CONV;
902 #if LJ_TARGET_X86ORX64
903 fins->op1 = fins->op2;
904 fins->op2 = IRCONV_NUM_INT;
905 fins->op2 = (IRRef1)lj_opt_fold(J);
906 #endif
907 fins->op1 = (IRRef1)lj_ir_knum_one(J);
908 fins->o = IR_LDEXP;
909 return RETRYFOLD;
911 return NEXTFOLD;
914 /* -- Simplify conversions ------------------------------------------------ */
916 LJFOLD(CONV CONV IRCONV_NUM_INT) /* _NUM */
917 LJFOLDF(shortcut_conv_num_int)
919 PHIBARRIER(fleft);
920 /* Only safe with a guarded conversion to int. */
921 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_NUM && irt_isguard(fleft->t))
922 return fleft->op1; /* f(g(x)) ==> x */
923 return NEXTFOLD;
926 LJFOLD(CONV CONV IRCONV_INT_NUM) /* _INT */
927 LJFOLDF(simplify_conv_int_num)
929 /* Fold even across PHI to avoid expensive num->int conversions in loop. */
930 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT)
931 return fleft->op1;
932 return NEXTFOLD;
935 LJFOLD(CONV CONV IRCONV_U32_NUM) /* _U32*/
936 LJFOLDF(simplify_conv_u32_num)
938 /* Fold even across PHI to avoid expensive num->int conversions in loop. */
939 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32)
940 return fleft->op1;
941 return NEXTFOLD;
944 LJFOLD(CONV CONV IRCONV_I64_NUM) /* _INT or _U32*/
945 LJFOLD(CONV CONV IRCONV_U64_NUM) /* _INT or _U32*/
946 LJFOLDF(simplify_conv_i64_num)
948 PHIBARRIER(fleft);
949 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT) {
950 /* Reduce to a sign-extension. */
951 fins->op1 = fleft->op1;
952 fins->op2 = ((IRT_I64<<5)|IRT_INT|IRCONV_SEXT);
953 return RETRYFOLD;
954 } else if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) {
955 #if LJ_TARGET_X64
956 return fleft->op1;
957 #else
958 /* Reduce to a zero-extension. */
959 fins->op1 = fleft->op1;
960 fins->op2 = (IRT_I64<<5)|IRT_U32;
961 return RETRYFOLD;
962 #endif
964 return NEXTFOLD;
967 LJFOLD(CONV CONV IRCONV_INT_I64) /* _INT */
968 LJFOLD(CONV CONV IRCONV_INT_U64) /* _INT */
969 LJFOLDF(simplify_conv_int_i64)
971 PHIBARRIER(fleft);
972 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT)
973 return fleft->op1;
974 return NEXTFOLD;
977 LJFOLD(CONV CONV IRCONV_FLOAT_NUM) /* _FLOAT */
978 LJFOLDF(simplify_conv_flt_num)
980 PHIBARRIER(fleft);
981 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_FLOAT)
982 return fleft->op1;
983 return NEXTFOLD;
986 /* Shortcut TOBIT + IRT_NUM <- IRT_INT/IRT_U32 conversion. */
987 LJFOLD(TOBIT CONV KNUM)
988 LJFOLDF(simplify_tobit_conv)
990 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT ||
991 (fleft->op2 & IRCONV_SRCMASK) == IRT_U32) {
992 /* Fold even across PHI to avoid expensive num->int conversions in loop. */
993 lua_assert(irt_isnum(fleft->t));
994 return fleft->op1;
996 return NEXTFOLD;
999 /* Shortcut floor/ceil/round + IRT_NUM <- IRT_INT/IRT_U32 conversion. */
1000 LJFOLD(FPMATH CONV IRFPM_FLOOR)
1001 LJFOLD(FPMATH CONV IRFPM_CEIL)
1002 LJFOLD(FPMATH CONV IRFPM_TRUNC)
1003 LJFOLDF(simplify_floor_conv)
1005 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT ||
1006 (fleft->op2 & IRCONV_SRCMASK) == IRT_U32)
1007 return LEFTFOLD;
1008 return NEXTFOLD;
1011 /* Strength reduction of widening. */
1012 LJFOLD(CONV any IRCONV_I64_INT)
1013 LJFOLD(CONV any IRCONV_U64_INT)
1014 LJFOLDF(simplify_conv_sext)
1016 IRRef ref = fins->op1;
1017 int64_t ofs = 0;
1018 if (!(fins->op2 & IRCONV_SEXT))
1019 return NEXTFOLD;
1020 PHIBARRIER(fleft);
1021 if (fleft->o == IR_XLOAD && (irt_isu8(fleft->t) || irt_isu16(fleft->t)))
1022 goto ok_reduce;
1023 if (fleft->o == IR_ADD && irref_isk(fleft->op2)) {
1024 ofs = (int64_t)IR(fleft->op2)->i;
1025 ref = fleft->op1;
1027 /* Use scalar evolution analysis results to strength-reduce sign-extension. */
1028 if (ref == J->scev.idx) {
1029 IRRef lo = J->scev.dir ? J->scev.start : J->scev.stop;
1030 lua_assert(irt_isint(J->scev.t));
1031 if (lo && IR(lo)->i + ofs >= 0) {
1032 ok_reduce:
1033 #if LJ_TARGET_X64
1034 /* Eliminate widening. All 32 bit ops do an implicit zero-extension. */
1035 return LEFTFOLD;
1036 #else
1037 /* Reduce to a (cheaper) zero-extension. */
1038 fins->op2 &= ~IRCONV_SEXT;
1039 return RETRYFOLD;
1040 #endif
1043 return NEXTFOLD;
1046 /* Strength reduction of narrowing. */
1047 LJFOLD(CONV ADD IRCONV_INT_I64)
1048 LJFOLD(CONV SUB IRCONV_INT_I64)
1049 LJFOLD(CONV MUL IRCONV_INT_I64)
1050 LJFOLD(CONV ADD IRCONV_INT_U64)
1051 LJFOLD(CONV SUB IRCONV_INT_U64)
1052 LJFOLD(CONV MUL IRCONV_INT_U64)
1053 LJFOLDF(simplify_conv_narrow)
1055 IROp op = (IROp)fleft->o;
1056 IRRef op1 = fleft->op1, op2 = fleft->op2, mode = fins->op2;
1057 PHIBARRIER(fleft);
1058 op1 = emitir(IRTI(IR_CONV), op1, mode);
1059 op2 = emitir(IRTI(IR_CONV), op2, mode);
1060 fins->ot = IRTI(op);
1061 fins->op1 = op1;
1062 fins->op2 = op2;
1063 return RETRYFOLD;
1066 /* Special CSE rule for CONV. */
1067 LJFOLD(CONV any any)
1068 LJFOLDF(cse_conv)
1070 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
1071 IRRef op1 = fins->op1, op2 = (fins->op2 & IRCONV_MODEMASK);
1072 uint8_t guard = irt_isguard(fins->t);
1073 IRRef ref = J->chain[IR_CONV];
1074 while (ref > op1) {
1075 IRIns *ir = IR(ref);
1076 /* Commoning with stronger checks is ok. */
1077 if (ir->op1 == op1 && (ir->op2 & IRCONV_MODEMASK) == op2 &&
1078 irt_isguard(ir->t) >= guard)
1079 return ref;
1080 ref = ir->prev;
1083 return EMITFOLD; /* No fallthrough to regular CSE. */
1086 /* FP conversion narrowing. */
1087 LJFOLD(TOBIT ADD KNUM)
1088 LJFOLD(TOBIT SUB KNUM)
1089 LJFOLD(CONV ADD IRCONV_INT_NUM)
1090 LJFOLD(CONV SUB IRCONV_INT_NUM)
1091 LJFOLD(CONV ADD IRCONV_I64_NUM)
1092 LJFOLD(CONV SUB IRCONV_I64_NUM)
1093 LJFOLDF(narrow_convert)
1095 PHIBARRIER(fleft);
1096 /* Narrowing ignores PHIs and repeating it inside the loop is not useful. */
1097 if (J->chain[IR_LOOP])
1098 return NEXTFOLD;
1099 lua_assert(fins->o != IR_CONV || (fins->op2&IRCONV_CONVMASK) != IRCONV_TOBIT);
1100 return lj_opt_narrow_convert(J);
1103 /* -- Integer algebraic simplifications ----------------------------------- */
1105 LJFOLD(ADD any KINT)
1106 LJFOLD(ADDOV any KINT)
1107 LJFOLD(SUBOV any KINT)
1108 LJFOLDF(simplify_intadd_k)
1110 if (fright->i == 0) /* i o 0 ==> i */
1111 return LEFTFOLD;
1112 return NEXTFOLD;
1115 LJFOLD(MULOV any KINT)
1116 LJFOLDF(simplify_intmul_k)
1118 if (fright->i == 0) /* i * 0 ==> 0 */
1119 return RIGHTFOLD;
1120 if (fright->i == 1) /* i * 1 ==> i */
1121 return LEFTFOLD;
1122 if (fright->i == 2) { /* i * 2 ==> i + i */
1123 fins->o = IR_ADDOV;
1124 fins->op2 = fins->op1;
1125 return RETRYFOLD;
1127 return NEXTFOLD;
1130 LJFOLD(SUB any KINT)
1131 LJFOLDF(simplify_intsub_k)
1133 if (fright->i == 0) /* i - 0 ==> i */
1134 return LEFTFOLD;
1135 fins->o = IR_ADD; /* i - k ==> i + (-k) */
1136 fins->op2 = (IRRef1)lj_ir_kint(J, -fright->i); /* Overflow for -2^31 ok. */
1137 return RETRYFOLD;
1140 LJFOLD(SUB KINT any)
1141 LJFOLD(SUB KINT64 any)
1142 LJFOLDF(simplify_intsub_kleft)
1144 if (fleft->o == IR_KINT ? (fleft->i == 0) : (ir_kint64(fleft)->u64 == 0)) {
1145 fins->o = IR_NEG; /* 0 - i ==> -i */
1146 fins->op1 = fins->op2;
1147 return RETRYFOLD;
1149 return NEXTFOLD;
1152 LJFOLD(ADD any KINT64)
1153 LJFOLDF(simplify_intadd_k64)
1155 if (ir_kint64(fright)->u64 == 0) /* i + 0 ==> i */
1156 return LEFTFOLD;
1157 return NEXTFOLD;
1160 LJFOLD(SUB any KINT64)
1161 LJFOLDF(simplify_intsub_k64)
1163 uint64_t k = ir_kint64(fright)->u64;
1164 if (k == 0) /* i - 0 ==> i */
1165 return LEFTFOLD;
1166 fins->o = IR_ADD; /* i - k ==> i + (-k) */
1167 fins->op2 = (IRRef1)lj_ir_kint64(J, (uint64_t)-(int64_t)k);
1168 return RETRYFOLD;
1171 static TRef simplify_intmul_k(jit_State *J, int32_t k)
1173 /* Note: many more simplifications are possible, e.g. 2^k1 +- 2^k2.
1174 ** But this is mainly intended for simple address arithmetic.
1175 ** Also it's easier for the backend to optimize the original multiplies.
1177 if (k == 1) { /* i * 1 ==> i */
1178 return LEFTFOLD;
1179 } else if ((k & (k-1)) == 0) { /* i * 2^k ==> i << k */
1180 fins->o = IR_BSHL;
1181 fins->op2 = lj_ir_kint(J, lj_fls((uint32_t)k));
1182 return RETRYFOLD;
1184 return NEXTFOLD;
1187 LJFOLD(MUL any KINT)
1188 LJFOLDF(simplify_intmul_k32)
1190 if (fright->i == 0) /* i * 0 ==> 0 */
1191 return INTFOLD(0);
1192 else if (fright->i > 0)
1193 return simplify_intmul_k(J, fright->i);
1194 return NEXTFOLD;
1197 LJFOLD(MUL any KINT64)
1198 LJFOLDF(simplify_intmul_k64)
1200 if (ir_kint64(fright)->u64 == 0) /* i * 0 ==> 0 */
1201 return INT64FOLD(0);
1202 #if LJ_64
1203 /* NYI: SPLIT for BSHL and 32 bit backend support. */
1204 else if (ir_kint64(fright)->u64 < 0x80000000u)
1205 return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64);
1206 #endif
1207 return NEXTFOLD;
1210 LJFOLD(MOD any KINT)
1211 LJFOLDF(simplify_intmod_k)
1213 int32_t k = fright->i;
1214 lua_assert(k != 0);
1215 if (k > 0 && (k & (k-1)) == 0) { /* i % (2^k) ==> i & (2^k-1) */
1216 fins->o = IR_BAND;
1217 fins->op2 = lj_ir_kint(J, k-1);
1218 return RETRYFOLD;
1220 return NEXTFOLD;
1223 LJFOLD(MOD KINT any)
1224 LJFOLDF(simplify_intmod_kleft)
1226 if (fleft->i == 0)
1227 return INTFOLD(0);
1228 return NEXTFOLD;
1231 LJFOLD(SUB any any)
1232 LJFOLD(SUBOV any any)
1233 LJFOLDF(simplify_intsub)
1235 if (fins->op1 == fins->op2 && !irt_isnum(fins->t)) /* i - i ==> 0 */
1236 return irt_is64(fins->t) ? INT64FOLD(0) : INTFOLD(0);
1237 return NEXTFOLD;
1240 LJFOLD(SUB ADD any)
1241 LJFOLDF(simplify_intsubadd_leftcancel)
1243 if (!irt_isnum(fins->t)) {
1244 PHIBARRIER(fleft);
1245 if (fins->op2 == fleft->op1) /* (i + j) - i ==> j */
1246 return fleft->op2;
1247 if (fins->op2 == fleft->op2) /* (i + j) - j ==> i */
1248 return fleft->op1;
1250 return NEXTFOLD;
1253 LJFOLD(SUB SUB any)
1254 LJFOLDF(simplify_intsubsub_leftcancel)
1256 if (!irt_isnum(fins->t)) {
1257 PHIBARRIER(fleft);
1258 if (fins->op2 == fleft->op1) { /* (i - j) - i ==> 0 - j */
1259 fins->op1 = (IRRef1)lj_ir_kint(J, 0);
1260 fins->op2 = fleft->op2;
1261 return RETRYFOLD;
1264 return NEXTFOLD;
1267 LJFOLD(SUB any SUB)
1268 LJFOLDF(simplify_intsubsub_rightcancel)
1270 if (!irt_isnum(fins->t)) {
1271 PHIBARRIER(fright);
1272 if (fins->op1 == fright->op1) /* i - (i - j) ==> j */
1273 return fright->op2;
1275 return NEXTFOLD;
1278 LJFOLD(SUB any ADD)
1279 LJFOLDF(simplify_intsubadd_rightcancel)
1281 if (!irt_isnum(fins->t)) {
1282 PHIBARRIER(fright);
1283 if (fins->op1 == fright->op1) { /* i - (i + j) ==> 0 - j */
1284 fins->op2 = fright->op2;
1285 fins->op1 = (IRRef1)lj_ir_kint(J, 0);
1286 return RETRYFOLD;
1288 if (fins->op1 == fright->op2) { /* i - (j + i) ==> 0 - j */
1289 fins->op2 = fright->op1;
1290 fins->op1 = (IRRef1)lj_ir_kint(J, 0);
1291 return RETRYFOLD;
1294 return NEXTFOLD;
1297 LJFOLD(SUB ADD ADD)
1298 LJFOLDF(simplify_intsubaddadd_cancel)
1300 if (!irt_isnum(fins->t)) {
1301 PHIBARRIER(fleft);
1302 PHIBARRIER(fright);
1303 if (fleft->op1 == fright->op1) { /* (i + j1) - (i + j2) ==> j1 - j2 */
1304 fins->op1 = fleft->op2;
1305 fins->op2 = fright->op2;
1306 return RETRYFOLD;
1308 if (fleft->op1 == fright->op2) { /* (i + j1) - (j2 + i) ==> j1 - j2 */
1309 fins->op1 = fleft->op2;
1310 fins->op2 = fright->op1;
1311 return RETRYFOLD;
1313 if (fleft->op2 == fright->op1) { /* (j1 + i) - (i + j2) ==> j1 - j2 */
1314 fins->op1 = fleft->op1;
1315 fins->op2 = fright->op2;
1316 return RETRYFOLD;
1318 if (fleft->op2 == fright->op2) { /* (j1 + i) - (j2 + i) ==> j1 - j2 */
1319 fins->op1 = fleft->op1;
1320 fins->op2 = fright->op1;
1321 return RETRYFOLD;
1324 return NEXTFOLD;
1327 LJFOLD(BAND any KINT)
1328 LJFOLD(BAND any KINT64)
1329 LJFOLDF(simplify_band_k)
1331 int64_t k = fright->o == IR_KINT ? (int64_t)fright->i :
1332 (int64_t)ir_k64(fright)->u64;
1333 if (k == 0) /* i & 0 ==> 0 */
1334 return RIGHTFOLD;
1335 if (k == -1) /* i & -1 ==> i */
1336 return LEFTFOLD;
1337 return NEXTFOLD;
1340 LJFOLD(BOR any KINT)
1341 LJFOLD(BOR any KINT64)
1342 LJFOLDF(simplify_bor_k)
1344 int64_t k = fright->o == IR_KINT ? (int64_t)fright->i :
1345 (int64_t)ir_k64(fright)->u64;
1346 if (k == 0) /* i | 0 ==> i */
1347 return LEFTFOLD;
1348 if (k == -1) /* i | -1 ==> -1 */
1349 return RIGHTFOLD;
1350 return NEXTFOLD;
1353 LJFOLD(BXOR any KINT)
1354 LJFOLD(BXOR any KINT64)
1355 LJFOLDF(simplify_bxor_k)
1357 int64_t k = fright->o == IR_KINT ? (int64_t)fright->i :
1358 (int64_t)ir_k64(fright)->u64;
1359 if (k == 0) /* i xor 0 ==> i */
1360 return LEFTFOLD;
1361 if (k == -1) { /* i xor -1 ==> ~i */
1362 fins->o = IR_BNOT;
1363 fins->op2 = 0;
1364 return RETRYFOLD;
1366 return NEXTFOLD;
1369 LJFOLD(BSHL any KINT)
1370 LJFOLD(BSHR any KINT)
1371 LJFOLD(BSAR any KINT)
1372 LJFOLD(BROL any KINT)
1373 LJFOLD(BROR any KINT)
1374 LJFOLDF(simplify_shift_ik)
1376 int32_t mask = irt_is64(fins->t) ? 63 : 31;
1377 int32_t k = (fright->i & mask);
1378 if (k == 0) /* i o 0 ==> i */
1379 return LEFTFOLD;
1380 if (k == 1 && fins->o == IR_BSHL) { /* i << 1 ==> i + i */
1381 fins->o = IR_ADD;
1382 fins->op2 = fins->op1;
1383 return RETRYFOLD;
1385 if (k != fright->i) { /* i o k ==> i o (k & mask) */
1386 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1387 return RETRYFOLD;
1389 #ifndef LJ_TARGET_UNIFYROT
1390 if (fins->o == IR_BROR) { /* bror(i, k) ==> brol(i, (-k)&mask) */
1391 fins->o = IR_BROL;
1392 fins->op2 = (IRRef1)lj_ir_kint(J, (-k)&mask);
1393 return RETRYFOLD;
1395 #endif
1396 return NEXTFOLD;
1399 LJFOLD(BSHL any BAND)
1400 LJFOLD(BSHR any BAND)
1401 LJFOLD(BSAR any BAND)
1402 LJFOLD(BROL any BAND)
1403 LJFOLD(BROR any BAND)
1404 LJFOLDF(simplify_shift_andk)
1406 IRIns *irk = IR(fright->op2);
1407 PHIBARRIER(fright);
1408 if ((fins->o < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) &&
1409 irk->o == IR_KINT) { /* i o (j & mask) ==> i o j */
1410 int32_t mask = irt_is64(fins->t) ? 63 : 31;
1411 int32_t k = irk->i & mask;
1412 if (k == mask) {
1413 fins->op2 = fright->op1;
1414 return RETRYFOLD;
1417 return NEXTFOLD;
1420 LJFOLD(BSHL KINT any)
1421 LJFOLD(BSHR KINT any)
1422 LJFOLD(BSHL KINT64 any)
1423 LJFOLD(BSHR KINT64 any)
1424 LJFOLDF(simplify_shift1_ki)
1426 int64_t k = fleft->o == IR_KINT ? (int64_t)fleft->i :
1427 (int64_t)ir_k64(fleft)->u64;
1428 if (k == 0) /* 0 o i ==> 0 */
1429 return LEFTFOLD;
1430 return NEXTFOLD;
1433 LJFOLD(BSAR KINT any)
1434 LJFOLD(BROL KINT any)
1435 LJFOLD(BROR KINT any)
1436 LJFOLD(BSAR KINT64 any)
1437 LJFOLD(BROL KINT64 any)
1438 LJFOLD(BROR KINT64 any)
1439 LJFOLDF(simplify_shift2_ki)
1441 int64_t k = fleft->o == IR_KINT ? (int64_t)fleft->i :
1442 (int64_t)ir_k64(fleft)->u64;
1443 if (k == 0 || k == -1) /* 0 o i ==> 0; -1 o i ==> -1 */
1444 return LEFTFOLD;
1445 return NEXTFOLD;
1448 LJFOLD(BSHL BAND KINT)
1449 LJFOLD(BSHR BAND KINT)
1450 LJFOLD(BROL BAND KINT)
1451 LJFOLD(BROR BAND KINT)
1452 LJFOLDF(simplify_shiftk_andk)
1454 IRIns *irk = IR(fleft->op2);
1455 PHIBARRIER(fleft);
1456 if (irk->o == IR_KINT) { /* (i & k1) o k2 ==> (i o k2) & (k1 o k2) */
1457 int32_t k = kfold_intop(irk->i, fright->i, (IROp)fins->o);
1458 fins->op1 = fleft->op1;
1459 fins->op1 = (IRRef1)lj_opt_fold(J);
1460 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1461 fins->ot = IRTI(IR_BAND);
1462 return RETRYFOLD;
1464 return NEXTFOLD;
1467 LJFOLD(BAND BSHL KINT)
1468 LJFOLD(BAND BSHR KINT)
1469 LJFOLDF(simplify_andk_shiftk)
1471 IRIns *irk = IR(fleft->op2);
1472 if (irk->o == IR_KINT &&
1473 kfold_intop(-1, irk->i, (IROp)fleft->o) == fright->i)
1474 return LEFTFOLD; /* (i o k1) & k2 ==> i, if (-1 o k1) == k2 */
1475 return NEXTFOLD;
1478 /* -- Reassociation ------------------------------------------------------- */
1480 LJFOLD(ADD ADD KINT)
1481 LJFOLD(MUL MUL KINT)
1482 LJFOLD(BAND BAND KINT)
1483 LJFOLD(BOR BOR KINT)
1484 LJFOLD(BXOR BXOR KINT)
1485 LJFOLDF(reassoc_intarith_k)
1487 IRIns *irk = IR(fleft->op2);
1488 if (irk->o == IR_KINT) {
1489 int32_t k = kfold_intop(irk->i, fright->i, (IROp)fins->o);
1490 if (k == irk->i) /* (i o k1) o k2 ==> i o k1, if (k1 o k2) == k1. */
1491 return LEFTFOLD;
1492 PHIBARRIER(fleft);
1493 fins->op1 = fleft->op1;
1494 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1495 return RETRYFOLD; /* (i o k1) o k2 ==> i o (k1 o k2) */
1497 return NEXTFOLD;
1500 LJFOLD(ADD ADD KINT64)
1501 LJFOLD(MUL MUL KINT64)
1502 LJFOLD(BAND BAND KINT64)
1503 LJFOLD(BOR BOR KINT64)
1504 LJFOLD(BXOR BXOR KINT64)
1505 LJFOLDF(reassoc_intarith_k64)
1507 #if LJ_HASFFI || LJ_64
1508 IRIns *irk = IR(fleft->op2);
1509 if (irk->o == IR_KINT64) {
1510 uint64_t k = kfold_int64arith(ir_k64(irk)->u64,
1511 ir_k64(fright)->u64, (IROp)fins->o);
1512 PHIBARRIER(fleft);
1513 fins->op1 = fleft->op1;
1514 fins->op2 = (IRRef1)lj_ir_kint64(J, k);
1515 return RETRYFOLD; /* (i o k1) o k2 ==> i o (k1 o k2) */
1517 return NEXTFOLD;
1518 #else
1519 UNUSED(J); lua_assert(0); return FAILFOLD;
1520 #endif
1523 LJFOLD(MIN MIN any)
1524 LJFOLD(MAX MAX any)
1525 LJFOLD(BAND BAND any)
1526 LJFOLD(BOR BOR any)
1527 LJFOLDF(reassoc_dup)
1529 if (fins->op2 == fleft->op1 || fins->op2 == fleft->op2)
1530 return LEFTFOLD; /* (a o b) o a ==> a o b; (a o b) o b ==> a o b */
1531 return NEXTFOLD;
1534 LJFOLD(BXOR BXOR any)
1535 LJFOLDF(reassoc_bxor)
1537 PHIBARRIER(fleft);
1538 if (fins->op2 == fleft->op1) /* (a xor b) xor a ==> b */
1539 return fleft->op2;
1540 if (fins->op2 == fleft->op2) /* (a xor b) xor b ==> a */
1541 return fleft->op1;
1542 return NEXTFOLD;
1545 LJFOLD(BSHL BSHL KINT)
1546 LJFOLD(BSHR BSHR KINT)
1547 LJFOLD(BSAR BSAR KINT)
1548 LJFOLD(BROL BROL KINT)
1549 LJFOLD(BROR BROR KINT)
1550 LJFOLDF(reassoc_shift)
1552 IRIns *irk = IR(fleft->op2);
1553 PHIBARRIER(fleft); /* The (shift any KINT) rule covers k2 == 0 and more. */
1554 if (irk->o == IR_KINT) { /* (i o k1) o k2 ==> i o (k1 + k2) */
1555 int32_t mask = irt_is64(fins->t) ? 63 : 31;
1556 int32_t k = (irk->i & mask) + (fright->i & mask);
1557 if (k > mask) { /* Combined shift too wide? */
1558 if (fins->o == IR_BSHL || fins->o == IR_BSHR)
1559 return mask == 31 ? INTFOLD(0) : INT64FOLD(0);
1560 else if (fins->o == IR_BSAR)
1561 k = mask;
1562 else
1563 k &= mask;
1565 fins->op1 = fleft->op1;
1566 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1567 return RETRYFOLD;
1569 return NEXTFOLD;
1572 LJFOLD(MIN MIN KNUM)
1573 LJFOLD(MAX MAX KNUM)
1574 LJFOLD(MIN MIN KINT)
1575 LJFOLD(MAX MAX KINT)
1576 LJFOLDF(reassoc_minmax_k)
1578 IRIns *irk = IR(fleft->op2);
1579 if (irk->o == IR_KNUM) {
1580 lua_Number a = ir_knum(irk)->n;
1581 lua_Number y = lj_vm_foldarith(a, knumright, fins->o - IR_ADD);
1582 if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */
1583 return LEFTFOLD;
1584 PHIBARRIER(fleft);
1585 fins->op1 = fleft->op1;
1586 fins->op2 = (IRRef1)lj_ir_knum(J, y);
1587 return RETRYFOLD; /* (x o k1) o k2 ==> x o (k1 o k2) */
1588 } else if (irk->o == IR_KINT) {
1589 int32_t a = irk->i;
1590 int32_t y = kfold_intop(a, fright->i, fins->o);
1591 if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */
1592 return LEFTFOLD;
1593 PHIBARRIER(fleft);
1594 fins->op1 = fleft->op1;
1595 fins->op2 = (IRRef1)lj_ir_kint(J, y);
1596 return RETRYFOLD; /* (x o k1) o k2 ==> x o (k1 o k2) */
1598 return NEXTFOLD;
1601 LJFOLD(MIN MAX any)
1602 LJFOLD(MAX MIN any)
1603 LJFOLDF(reassoc_minmax_left)
1605 if (fins->op2 == fleft->op1 || fins->op2 == fleft->op2)
1606 return RIGHTFOLD; /* (b o1 a) o2 b ==> b; (a o1 b) o2 b ==> b */
1607 return NEXTFOLD;
1610 LJFOLD(MIN any MAX)
1611 LJFOLD(MAX any MIN)
1612 LJFOLDF(reassoc_minmax_right)
1614 if (fins->op1 == fright->op1 || fins->op1 == fright->op2)
1615 return LEFTFOLD; /* a o2 (a o1 b) ==> a; a o2 (b o1 a) ==> a */
1616 return NEXTFOLD;
1619 /* -- Array bounds check elimination -------------------------------------- */
1621 /* Eliminate ABC across PHIs to handle t[i-1] forwarding case.
1622 ** ABC(asize, (i+k)+(-k)) ==> ABC(asize, i), but only if it already exists.
1623 ** Could be generalized to (i+k1)+k2 ==> i+(k1+k2), but needs better disambig.
1625 LJFOLD(ABC any ADD)
1626 LJFOLDF(abc_fwd)
1628 if (LJ_LIKELY(J->flags & JIT_F_OPT_ABC)) {
1629 if (irref_isk(fright->op2)) {
1630 IRIns *add2 = IR(fright->op1);
1631 if (add2->o == IR_ADD && irref_isk(add2->op2) &&
1632 IR(fright->op2)->i == -IR(add2->op2)->i) {
1633 IRRef ref = J->chain[IR_ABC];
1634 IRRef lim = add2->op1;
1635 if (fins->op1 > lim) lim = fins->op1;
1636 while (ref > lim) {
1637 IRIns *ir = IR(ref);
1638 if (ir->op1 == fins->op1 && ir->op2 == add2->op1)
1639 return DROPFOLD;
1640 ref = ir->prev;
1645 return NEXTFOLD;
1648 /* Eliminate ABC for constants.
1649 ** ABC(asize, k1), ABC(asize k2) ==> ABC(asize, max(k1, k2))
1650 ** Drop second ABC if k2 is lower. Otherwise patch first ABC with k2.
1652 LJFOLD(ABC any KINT)
1653 LJFOLDF(abc_k)
1655 if (LJ_LIKELY(J->flags & JIT_F_OPT_ABC)) {
1656 IRRef ref = J->chain[IR_ABC];
1657 IRRef asize = fins->op1;
1658 while (ref > asize) {
1659 IRIns *ir = IR(ref);
1660 if (ir->op1 == asize && irref_isk(ir->op2)) {
1661 int32_t k = IR(ir->op2)->i;
1662 if (fright->i > k)
1663 ir->op2 = fins->op2;
1664 return DROPFOLD;
1666 ref = ir->prev;
1668 return EMITFOLD; /* Already performed CSE. */
1670 return NEXTFOLD;
1673 /* Eliminate invariant ABC inside loop. */
1674 LJFOLD(ABC any any)
1675 LJFOLDF(abc_invar)
1677 if (!irt_isint(fins->t) && J->chain[IR_LOOP]) /* Currently marked as PTR. */
1678 return DROPFOLD;
1679 return NEXTFOLD;
1682 /* -- Commutativity ------------------------------------------------------- */
1684 /* The refs of commutative ops are canonicalized. Lower refs go to the right.
1685 ** Rationale behind this:
1686 ** - It (also) moves constants to the right.
1687 ** - It reduces the number of FOLD rules (e.g. (BOR any KINT) suffices).
1688 ** - It helps CSE to find more matches.
1689 ** - The assembler generates better code with constants at the right.
1692 LJFOLD(ADD any any)
1693 LJFOLD(MUL any any)
1694 LJFOLD(ADDOV any any)
1695 LJFOLD(MULOV any any)
1696 LJFOLDF(comm_swap)
1698 if (fins->op1 < fins->op2) { /* Move lower ref to the right. */
1699 IRRef1 tmp = fins->op1;
1700 fins->op1 = fins->op2;
1701 fins->op2 = tmp;
1702 return RETRYFOLD;
1704 return NEXTFOLD;
1707 LJFOLD(EQ any any)
1708 LJFOLD(NE any any)
1709 LJFOLDF(comm_equal)
1711 /* For non-numbers only: x == x ==> drop; x ~= x ==> fail */
1712 if (fins->op1 == fins->op2 && !irt_isnum(fins->t))
1713 return CONDFOLD(fins->o == IR_EQ);
1714 return fold_comm_swap(J);
1717 LJFOLD(LT any any)
1718 LJFOLD(GE any any)
1719 LJFOLD(LE any any)
1720 LJFOLD(GT any any)
1721 LJFOLD(ULT any any)
1722 LJFOLD(UGE any any)
1723 LJFOLD(ULE any any)
1724 LJFOLD(UGT any any)
1725 LJFOLDF(comm_comp)
1727 /* For non-numbers only: x <=> x ==> drop; x <> x ==> fail */
1728 if (fins->op1 == fins->op2 && !irt_isnum(fins->t))
1729 return CONDFOLD((fins->o ^ (fins->o >> 1)) & 1);
1730 if (fins->op1 < fins->op2) { /* Move lower ref to the right. */
1731 IRRef1 tmp = fins->op1;
1732 fins->op1 = fins->op2;
1733 fins->op2 = tmp;
1734 fins->o ^= 3; /* GT <-> LT, GE <-> LE, does not affect U */
1735 return RETRYFOLD;
1737 return NEXTFOLD;
1740 LJFOLD(BAND any any)
1741 LJFOLD(BOR any any)
1742 LJFOLD(MIN any any)
1743 LJFOLD(MAX any any)
1744 LJFOLDF(comm_dup)
1746 if (fins->op1 == fins->op2) /* x o x ==> x */
1747 return LEFTFOLD;
1748 return fold_comm_swap(J);
1751 LJFOLD(BXOR any any)
1752 LJFOLDF(comm_bxor)
1754 if (fins->op1 == fins->op2) /* i xor i ==> 0 */
1755 return irt_is64(fins->t) ? INT64FOLD(0) : INTFOLD(0);
1756 return fold_comm_swap(J);
1759 /* -- Simplification of compound expressions ------------------------------ */
1761 static TRef kfold_xload(jit_State *J, IRIns *ir, const void *p)
1763 int32_t k;
1764 switch (irt_type(ir->t)) {
1765 case IRT_NUM: return lj_ir_knum_u64(J, *(uint64_t *)p);
1766 case IRT_I8: k = (int32_t)*(int8_t *)p; break;
1767 case IRT_U8: k = (int32_t)*(uint8_t *)p; break;
1768 case IRT_I16: k = (int32_t)(int16_t)lj_getu16(p); break;
1769 case IRT_U16: k = (int32_t)(uint16_t)lj_getu16(p); break;
1770 case IRT_INT: case IRT_U32: k = (int32_t)lj_getu32(p); break;
1771 case IRT_I64: case IRT_U64: return lj_ir_kint64(J, *(uint64_t *)p);
1772 default: return 0;
1774 return lj_ir_kint(J, k);
1777 /* Turn: string.sub(str, a, b) == kstr
1778 ** into: string.byte(str, a) == string.byte(kstr, 1) etc.
1779 ** Note: this creates unaligned XLOADs on x86/x64.
1781 LJFOLD(EQ SNEW KGC)
1782 LJFOLD(NE SNEW KGC)
1783 LJFOLDF(merge_eqne_snew_kgc)
1785 GCstr *kstr = ir_kstr(fright);
1786 int32_t len = (int32_t)kstr->len;
1787 lua_assert(irt_isstr(fins->t));
1789 #if LJ_TARGET_X86ORX64
1790 #define FOLD_SNEW_MAX_LEN 4 /* Handle string lengths 0, 1, 2, 3, 4. */
1791 #define FOLD_SNEW_TYPE8 IRT_I8 /* Creates shorter immediates. */
1792 #else
1793 #define FOLD_SNEW_MAX_LEN 1 /* Handle string lengths 0 or 1. */
1794 #define FOLD_SNEW_TYPE8 IRT_U8 /* Prefer unsigned loads. */
1795 #endif
1797 if (len <= FOLD_SNEW_MAX_LEN) {
1798 IROp op = (IROp)fins->o;
1799 IRRef strref = fleft->op1;
1800 lua_assert(IR(strref)->o == IR_STRREF);
1801 if (op == IR_EQ) {
1802 emitir(IRTGI(IR_EQ), fleft->op2, lj_ir_kint(J, len));
1803 /* Caveat: fins/fleft/fright is no longer valid after emitir. */
1804 } else {
1805 /* NE is not expanded since this would need an OR of two conds. */
1806 if (!irref_isk(fleft->op2)) /* Only handle the constant length case. */
1807 return NEXTFOLD;
1808 if (IR(fleft->op2)->i != len)
1809 return DROPFOLD;
1811 if (len > 0) {
1812 /* A 4 byte load for length 3 is ok -- all strings have an extra NUL. */
1813 uint16_t ot = (uint16_t)(len == 1 ? IRT(IR_XLOAD, FOLD_SNEW_TYPE8) :
1814 len == 2 ? IRT(IR_XLOAD, IRT_U16) :
1815 IRTI(IR_XLOAD));
1816 TRef tmp = emitir(ot, strref,
1817 IRXLOAD_READONLY | (len > 1 ? IRXLOAD_UNALIGNED : 0));
1818 TRef val = kfold_xload(J, IR(tref_ref(tmp)), strdata(kstr));
1819 if (len == 3)
1820 tmp = emitir(IRTI(IR_BAND), tmp,
1821 lj_ir_kint(J, LJ_ENDIAN_SELECT(0x00ffffff, 0xffffff00)));
1822 fins->op1 = (IRRef1)tmp;
1823 fins->op2 = (IRRef1)val;
1824 fins->ot = (IROpT)IRTGI(op);
1825 return RETRYFOLD;
1826 } else {
1827 return DROPFOLD;
1830 return NEXTFOLD;
1833 /* -- Loads --------------------------------------------------------------- */
1835 /* Loads cannot be folded or passed on to CSE in general.
1836 ** Alias analysis is needed to check for forwarding opportunities.
1838 ** Caveat: *all* loads must be listed here or they end up at CSE!
1841 LJFOLD(ALOAD any)
1842 LJFOLDX(lj_opt_fwd_aload)
1844 /* From HREF fwd (see below). Must eliminate, not supported by fwd/backend. */
1845 LJFOLD(HLOAD KKPTR)
1846 LJFOLDF(kfold_hload_kkptr)
1848 UNUSED(J);
1849 lua_assert(ir_kptr(fleft) == niltvg(J2G(J)));
1850 return TREF_NIL;
1853 LJFOLD(HLOAD any)
1854 LJFOLDX(lj_opt_fwd_hload)
1856 LJFOLD(ULOAD any)
1857 LJFOLDX(lj_opt_fwd_uload)
1859 LJFOLD(CALLL any IRCALL_lj_tab_len)
1860 LJFOLDX(lj_opt_fwd_tab_len)
1862 /* Upvalue refs are really loads, but there are no corresponding stores.
1863 ** So CSE is ok for them, except for UREFO across a GC step (see below).
1864 ** If the referenced function is const, its upvalue addresses are const, too.
1865 ** This can be used to improve CSE by looking for the same address,
1866 ** even if the upvalues originate from a different function.
1868 LJFOLD(UREFO KGC any)
1869 LJFOLD(UREFC KGC any)
1870 LJFOLDF(cse_uref)
1872 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
1873 IRRef ref = J->chain[fins->o];
1874 GCfunc *fn = ir_kfunc(fleft);
1875 GCupval *uv = gco2uv(gcref(fn->l.uvptr[(fins->op2 >> 8)]));
1876 while (ref > 0) {
1877 IRIns *ir = IR(ref);
1878 if (irref_isk(ir->op1)) {
1879 GCfunc *fn2 = ir_kfunc(IR(ir->op1));
1880 if (gco2uv(gcref(fn2->l.uvptr[(ir->op2 >> 8)])) == uv) {
1881 if (fins->o == IR_UREFO && gcstep_barrier(J, ref))
1882 break;
1883 return ref;
1886 ref = ir->prev;
1889 return EMITFOLD;
1892 LJFOLD(HREFK any any)
1893 LJFOLDX(lj_opt_fwd_hrefk)
1895 LJFOLD(HREF TNEW any)
1896 LJFOLDF(fwd_href_tnew)
1898 if (lj_opt_fwd_href_nokey(J))
1899 return lj_ir_kkptr(J, niltvg(J2G(J)));
1900 return NEXTFOLD;
1903 LJFOLD(HREF TDUP KPRI)
1904 LJFOLD(HREF TDUP KGC)
1905 LJFOLD(HREF TDUP KNUM)
1906 LJFOLDF(fwd_href_tdup)
1908 TValue keyv;
1909 lj_ir_kvalue(J->L, &keyv, fright);
1910 if (lj_tab_get(J->L, ir_ktab(IR(fleft->op1)), &keyv) == niltvg(J2G(J)) &&
1911 lj_opt_fwd_href_nokey(J))
1912 return lj_ir_kkptr(J, niltvg(J2G(J)));
1913 return NEXTFOLD;
1916 /* We can safely FOLD/CSE array/hash refs and field loads, since there
1917 ** are no corresponding stores. But we need to check for any NEWREF with
1918 ** an aliased table, as it may invalidate all of the pointers and fields.
1919 ** Only HREF needs the NEWREF check -- AREF and HREFK already depend on
1920 ** FLOADs. And NEWREF itself is treated like a store (see below).
1922 LJFOLD(FLOAD TNEW IRFL_TAB_ASIZE)
1923 LJFOLDF(fload_tab_tnew_asize)
1925 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
1926 return INTFOLD(fleft->op1);
1927 return NEXTFOLD;
1930 LJFOLD(FLOAD TNEW IRFL_TAB_HMASK)
1931 LJFOLDF(fload_tab_tnew_hmask)
1933 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
1934 return INTFOLD((1 << fleft->op2)-1);
1935 return NEXTFOLD;
1938 LJFOLD(FLOAD TDUP IRFL_TAB_ASIZE)
1939 LJFOLDF(fload_tab_tdup_asize)
1941 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
1942 return INTFOLD((int32_t)ir_ktab(IR(fleft->op1))->asize);
1943 return NEXTFOLD;
1946 LJFOLD(FLOAD TDUP IRFL_TAB_HMASK)
1947 LJFOLDF(fload_tab_tdup_hmask)
1949 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
1950 return INTFOLD((int32_t)ir_ktab(IR(fleft->op1))->hmask);
1951 return NEXTFOLD;
1954 LJFOLD(HREF any any)
1955 LJFOLD(FLOAD any IRFL_TAB_ARRAY)
1956 LJFOLD(FLOAD any IRFL_TAB_NODE)
1957 LJFOLD(FLOAD any IRFL_TAB_ASIZE)
1958 LJFOLD(FLOAD any IRFL_TAB_HMASK)
1959 LJFOLDF(fload_tab_ah)
1961 TRef tr = lj_opt_cse(J);
1962 return lj_opt_fwd_tptr(J, tref_ref(tr)) ? tr : EMITFOLD;
1965 /* Strings are immutable, so we can safely FOLD/CSE the related FLOAD. */
1966 LJFOLD(FLOAD KGC IRFL_STR_LEN)
1967 LJFOLDF(fload_str_len_kgc)
1969 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
1970 return INTFOLD((int32_t)ir_kstr(fleft)->len);
1971 return NEXTFOLD;
1974 LJFOLD(FLOAD SNEW IRFL_STR_LEN)
1975 LJFOLDF(fload_str_len_snew)
1977 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
1978 PHIBARRIER(fleft);
1979 return fleft->op2;
1981 return NEXTFOLD;
1984 /* The C type ID of cdata objects is immutable. */
1985 LJFOLD(FLOAD KGC IRFL_CDATA_CTYPEID)
1986 LJFOLDF(fload_cdata_typeid_kgc)
1988 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
1989 return INTFOLD((int32_t)ir_kcdata(fleft)->ctypeid);
1990 return NEXTFOLD;
1993 /* Get the contents of immutable cdata objects. */
1994 LJFOLD(FLOAD KGC IRFL_CDATA_PTR)
1995 LJFOLD(FLOAD KGC IRFL_CDATA_INT)
1996 LJFOLD(FLOAD KGC IRFL_CDATA_INT64)
1997 LJFOLDF(fload_cdata_int64_kgc)
1999 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
2000 void *p = cdataptr(ir_kcdata(fleft));
2001 if (irt_is64(fins->t))
2002 return INT64FOLD(*(uint64_t *)p);
2003 else
2004 return INTFOLD(*(int32_t *)p);
2006 return NEXTFOLD;
2009 LJFOLD(FLOAD CNEW IRFL_CDATA_CTYPEID)
2010 LJFOLD(FLOAD CNEWI IRFL_CDATA_CTYPEID)
2011 LJFOLDF(fload_cdata_typeid_cnew)
2013 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
2014 return fleft->op1; /* No PHI barrier needed. CNEW/CNEWI op1 is const. */
2015 return NEXTFOLD;
2018 /* Pointer, int and int64 cdata objects are immutable. */
2019 LJFOLD(FLOAD CNEWI IRFL_CDATA_PTR)
2020 LJFOLD(FLOAD CNEWI IRFL_CDATA_INT)
2021 LJFOLD(FLOAD CNEWI IRFL_CDATA_INT64)
2022 LJFOLDF(fload_cdata_ptr_int64_cnew)
2024 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
2025 return fleft->op2; /* Fold even across PHI to avoid allocations. */
2026 return NEXTFOLD;
2029 LJFOLD(FLOAD any IRFL_STR_LEN)
2030 LJFOLD(FLOAD any IRFL_CDATA_CTYPEID)
2031 LJFOLD(FLOAD any IRFL_CDATA_PTR)
2032 LJFOLD(FLOAD any IRFL_CDATA_INT)
2033 LJFOLD(FLOAD any IRFL_CDATA_INT64)
2034 LJFOLD(VLOAD any any) /* Vararg loads have no corresponding stores. */
2035 LJFOLDX(lj_opt_cse)
2037 /* All other field loads need alias analysis. */
2038 LJFOLD(FLOAD any any)
2039 LJFOLDX(lj_opt_fwd_fload)
2041 /* This is for LOOP only. Recording handles SLOADs internally. */
2042 LJFOLD(SLOAD any any)
2043 LJFOLDF(fwd_sload)
2045 if ((fins->op2 & IRSLOAD_FRAME)) {
2046 TRef tr = lj_opt_cse(J);
2047 return tref_ref(tr) < J->chain[IR_RETF] ? EMITFOLD : tr;
2048 } else {
2049 lua_assert(J->slot[fins->op1] != 0);
2050 return J->slot[fins->op1];
2054 /* Only fold for KKPTR. The pointer _and_ the contents must be const. */
2055 LJFOLD(XLOAD KKPTR any)
2056 LJFOLDF(xload_kptr)
2058 TRef tr = kfold_xload(J, fins, ir_kptr(fleft));
2059 return tr ? tr : NEXTFOLD;
2062 LJFOLD(XLOAD any any)
2063 LJFOLDX(lj_opt_fwd_xload)
2065 /* -- Write barriers ------------------------------------------------------ */
2067 /* Write barriers are amenable to CSE, but not across any incremental
2068 ** GC steps.
2070 ** The same logic applies to open upvalue references, because a stack
2071 ** may be resized during a GC step (not the current stack, but maybe that
2072 ** of a coroutine).
2074 LJFOLD(TBAR any)
2075 LJFOLD(OBAR any any)
2076 LJFOLD(UREFO any any)
2077 LJFOLDF(barrier_tab)
2079 TRef tr = lj_opt_cse(J);
2080 if (gcstep_barrier(J, tref_ref(tr))) /* CSE across GC step? */
2081 return EMITFOLD; /* Raw emit. Assumes fins is left intact by CSE. */
2082 return tr;
2085 LJFOLD(TBAR TNEW)
2086 LJFOLD(TBAR TDUP)
2087 LJFOLDF(barrier_tnew_tdup)
2089 /* New tables are always white and never need a barrier. */
2090 if (fins->op1 < J->chain[IR_LOOP]) /* Except across a GC step. */
2091 return NEXTFOLD;
2092 return DROPFOLD;
2095 /* -- Stores and allocations ---------------------------------------------- */
2097 /* Stores and allocations cannot be folded or passed on to CSE in general.
2098 ** But some stores can be eliminated with dead-store elimination (DSE).
2100 ** Caveat: *all* stores and allocs must be listed here or they end up at CSE!
2103 LJFOLD(ASTORE any any)
2104 LJFOLD(HSTORE any any)
2105 LJFOLDX(lj_opt_dse_ahstore)
2107 LJFOLD(USTORE any any)
2108 LJFOLDX(lj_opt_dse_ustore)
2110 LJFOLD(FSTORE any any)
2111 LJFOLDX(lj_opt_dse_fstore)
2113 LJFOLD(XSTORE any any)
2114 LJFOLDX(lj_opt_dse_xstore)
2116 LJFOLD(NEWREF any any) /* Treated like a store. */
2117 LJFOLD(CALLS any any)
2118 LJFOLD(CALLL any any) /* Safeguard fallback. */
2119 LJFOLD(CALLXS any any)
2120 LJFOLD(XBAR)
2121 LJFOLD(RETF any any) /* Modifies BASE. */
2122 LJFOLD(TNEW any any)
2123 LJFOLD(TDUP any)
2124 LJFOLD(CNEW any any)
2125 LJFOLD(XSNEW any any)
2126 LJFOLDX(lj_ir_emit)
2128 /* ------------------------------------------------------------------------ */
2130 /* Every entry in the generated hash table is a 32 bit pattern:
2132 ** xxxxxxxx iiiiiii lllllll rrrrrrrrrr
2134 ** xxxxxxxx = 8 bit index into fold function table
2135 ** iiiiiii = 7 bit folded instruction opcode
2136 ** lllllll = 7 bit left instruction opcode
2137 ** rrrrrrrrrr = 8 bit right instruction opcode or 10 bits from literal field
2140 #include "lj_folddef.h"
2142 /* ------------------------------------------------------------------------ */
2144 /* Fold IR instruction. */
2145 TRef LJ_FASTCALL lj_opt_fold(jit_State *J)
2147 uint32_t key, any;
2148 IRRef ref;
2150 if (LJ_UNLIKELY((J->flags & JIT_F_OPT_MASK) != JIT_F_OPT_DEFAULT)) {
2151 lua_assert(((JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE|JIT_F_OPT_DSE) |
2152 JIT_F_OPT_DEFAULT) == JIT_F_OPT_DEFAULT);
2153 /* Folding disabled? Chain to CSE, but not for loads/stores/allocs. */
2154 if (!(J->flags & JIT_F_OPT_FOLD) && irm_kind(lj_ir_mode[fins->o]) == IRM_N)
2155 return lj_opt_cse(J);
2157 /* Forwarding or CSE disabled? Emit raw IR for loads, except for SLOAD. */
2158 if ((J->flags & (JIT_F_OPT_FWD|JIT_F_OPT_CSE)) !=
2159 (JIT_F_OPT_FWD|JIT_F_OPT_CSE) &&
2160 irm_kind(lj_ir_mode[fins->o]) == IRM_L && fins->o != IR_SLOAD)
2161 return lj_ir_emit(J);
2163 /* DSE disabled? Emit raw IR for stores. */
2164 if (!(J->flags & JIT_F_OPT_DSE) && irm_kind(lj_ir_mode[fins->o]) == IRM_S)
2165 return lj_ir_emit(J);
2168 /* Fold engine start/retry point. */
2169 retry:
2170 /* Construct key from opcode and operand opcodes (unless literal/none). */
2171 key = ((uint32_t)fins->o << 17);
2172 if (fins->op1 >= J->cur.nk) {
2173 key += (uint32_t)IR(fins->op1)->o << 10;
2174 *fleft = *IR(fins->op1);
2176 if (fins->op2 >= J->cur.nk) {
2177 key += (uint32_t)IR(fins->op2)->o;
2178 *fright = *IR(fins->op2);
2179 } else {
2180 key += (fins->op2 & 0x3ffu); /* Literal mask. Must include IRCONV_*MASK. */
2183 /* Check for a match in order from most specific to least specific. */
2184 any = 0;
2185 for (;;) {
2186 uint32_t k = key | (any & 0x1ffff);
2187 uint32_t h = fold_hashkey(k);
2188 uint32_t fh = fold_hash[h]; /* Lookup key in semi-perfect hash table. */
2189 if ((fh & 0xffffff) == k || (fh = fold_hash[h+1], (fh & 0xffffff) == k)) {
2190 ref = (IRRef)tref_ref(fold_func[fh >> 24](J));
2191 if (ref != NEXTFOLD)
2192 break;
2194 if (any == 0xfffff) /* Exhausted folding. Pass on to CSE. */
2195 return lj_opt_cse(J);
2196 any = (any | (any >> 10)) ^ 0xffc00;
2199 /* Return value processing, ordered by frequency. */
2200 if (LJ_LIKELY(ref >= MAX_FOLD))
2201 return TREF(ref, irt_t(IR(ref)->t));
2202 if (ref == RETRYFOLD)
2203 goto retry;
2204 if (ref == KINTFOLD)
2205 return lj_ir_kint(J, fins->i);
2206 if (ref == FAILFOLD)
2207 lj_trace_err(J, LJ_TRERR_GFAIL);
2208 lua_assert(ref == DROPFOLD);
2209 return REF_DROP;
2212 /* -- Common-Subexpression Elimination ------------------------------------ */
2214 /* CSE an IR instruction. This is very fast due to the skip-list chains. */
2215 TRef LJ_FASTCALL lj_opt_cse(jit_State *J)
2217 /* Avoid narrow to wide store-to-load forwarding stall */
2218 IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16);
2219 IROp op = fins->o;
2220 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
2221 /* Limited search for same operands in per-opcode chain. */
2222 IRRef ref = J->chain[op];
2223 IRRef lim = fins->op1;
2224 if (fins->op2 > lim) lim = fins->op2; /* Relies on lit < REF_BIAS. */
2225 while (ref > lim) {
2226 if (IR(ref)->op12 == op12)
2227 return TREF(ref, irt_t(IR(ref)->t)); /* Common subexpression found. */
2228 ref = IR(ref)->prev;
2231 /* Otherwise emit IR (inlined for speed). */
2233 IRRef ref = lj_ir_nextins(J);
2234 IRIns *ir = IR(ref);
2235 ir->prev = J->chain[op];
2236 ir->op12 = op12;
2237 J->chain[op] = (IRRef1)ref;
2238 ir->o = fins->o;
2239 J->guardemit.irt |= fins->t.irt;
2240 return TREF(ref, irt_t((ir->t = fins->t)));
2244 /* CSE with explicit search limit. */
2245 TRef LJ_FASTCALL lj_opt_cselim(jit_State *J, IRRef lim)
2247 IRRef ref = J->chain[fins->o];
2248 IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16);
2249 while (ref > lim) {
2250 if (IR(ref)->op12 == op12)
2251 return ref;
2252 ref = IR(ref)->prev;
2254 return lj_ir_emit(J);
2257 /* ------------------------------------------------------------------------ */
2259 #undef IR
2260 #undef fins
2261 #undef fleft
2262 #undef fright
2263 #undef knumleft
2264 #undef knumright
2265 #undef emitir
2267 #endif