Fix saved bytecode encapsulated in ELF objects.
[luajit-2.0/celess22.git] / src / lj_opt_fold.c
blob5dc7ae3da932337a8af770edae14a2984261ab21
1 /*
2 ** FOLD: Constant Folding, Algebraic Simplifications and Reassociation.
3 ** ABCelim: Array Bounds Check Elimination.
4 ** CSE: Common-Subexpression Elimination.
5 ** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
6 */
8 #define lj_opt_fold_c
9 #define LUA_CORE
11 #include <math.h>
13 #include "lj_obj.h"
15 #if LJ_HASJIT
17 #include "lj_str.h"
18 #include "lj_tab.h"
19 #include "lj_ir.h"
20 #include "lj_jit.h"
21 #include "lj_iropt.h"
22 #include "lj_trace.h"
23 #if LJ_HASFFI
24 #include "lj_ctype.h"
25 #endif
26 #include "lj_carith.h"
27 #include "lj_vm.h"
28 #include "lj_strscan.h"
30 /* Here's a short description how the FOLD engine processes instructions:
32 ** The FOLD engine receives a single instruction stored in fins (J->fold.ins).
33 ** The instruction and its operands are used to select matching fold rules.
34 ** These are applied iteratively until a fixed point is reached.
36 ** The 8 bit opcode of the instruction itself plus the opcodes of the
37 ** two instructions referenced by its operands form a 24 bit key
38 ** 'ins left right' (unused operands -> 0, literals -> lowest 8 bits).
40 ** This key is used for partial matching against the fold rules. The
41 ** left/right operand fields of the key are successively masked with
42 ** the 'any' wildcard, from most specific to least specific:
44 ** ins left right
45 ** ins any right
46 ** ins left any
47 ** ins any any
49 ** The masked key is used to lookup a matching fold rule in a semi-perfect
50 ** hash table. If a matching rule is found, the related fold function is run.
51 ** Multiple rules can share the same fold function. A fold rule may return
52 ** one of several special values:
54 ** - NEXTFOLD means no folding was applied, because an additional test
55 ** inside the fold function failed. Matching continues against less
56 ** specific fold rules. Finally the instruction is passed on to CSE.
58 ** - RETRYFOLD means the instruction was modified in-place. Folding is
59 ** retried as if this instruction had just been received.
61 ** All other return values are terminal actions -- no further folding is
62 ** applied:
64 ** - INTFOLD(i) returns a reference to the integer constant i.
66 ** - LEFTFOLD and RIGHTFOLD return the left/right operand reference
67 ** without emitting an instruction.
69 ** - CSEFOLD and EMITFOLD pass the instruction directly to CSE or emit
70 ** it without passing through any further optimizations.
72 ** - FAILFOLD, DROPFOLD and CONDFOLD only apply to instructions which have
73 ** no result (e.g. guarded assertions): FAILFOLD means the guard would
74 ** always fail, i.e. the current trace is pointless. DROPFOLD means
75 ** the guard is always true and has been eliminated. CONDFOLD is a
76 ** shortcut for FAILFOLD + cond (i.e. drop if true, otherwise fail).
78 ** - Any other return value is interpreted as an IRRef or TRef. This
79 ** can be a reference to an existing or a newly created instruction.
80 ** Only the least-significant 16 bits (IRRef1) are used to form a TRef
81 ** which is finally returned to the caller.
83 ** The FOLD engine receives instructions both from the trace recorder and
84 ** substituted instructions from LOOP unrolling. This means all types
85 ** of instructions may end up here, even though the recorder bypasses
86 ** FOLD in some cases. Thus all loads, stores and allocations must have
87 ** an any/any rule to avoid being passed on to CSE.
89 ** Carefully read the following requirements before adding or modifying
90 ** any fold rules:
92 ** Requirement #1: All fold rules must preserve their destination type.
94 ** Consistently use INTFOLD() (KINT result) or lj_ir_knum() (KNUM result).
95 ** Never use lj_ir_knumint() which can have either a KINT or KNUM result.
97 ** Requirement #2: Fold rules should not create *new* instructions which
98 ** reference operands *across* PHIs.
100 ** E.g. a RETRYFOLD with 'fins->op1 = fleft->op1' is invalid if the
101 ** left operand is a PHI. Then fleft->op1 would point across the PHI
102 ** frontier to an invariant instruction. Adding a PHI for this instruction
103 ** would be counterproductive. The solution is to add a barrier which
104 ** prevents folding across PHIs, i.e. 'PHIBARRIER(fleft)' in this case.
105 ** The only exception is for recurrences with high latencies like
106 ** repeated int->num->int conversions.
108 ** One could relax this condition a bit if the referenced instruction is
109 ** a PHI, too. But this often leads to worse code due to excessive
110 ** register shuffling.
112 ** Note: returning *existing* instructions (e.g. LEFTFOLD) is ok, though.
113 ** Even returning fleft->op1 would be ok, because a new PHI will added,
114 ** if needed. But again, this leads to excessive register shuffling and
115 ** should be avoided.
117 ** Requirement #3: The set of all fold rules must be monotonic to guarantee
118 ** termination.
120 ** The goal is optimization, so one primarily wants to add strength-reducing
121 ** rules. This means eliminating an instruction or replacing an instruction
122 ** with one or more simpler instructions. Don't add fold rules which point
123 ** into the other direction.
125 ** Some rules (like commutativity) do not directly reduce the strength of
126 ** an instruction, but enable other fold rules (e.g. by moving constants
127 ** to the right operand). These rules must be made unidirectional to avoid
128 ** cycles.
130 ** Rule of thumb: the trace recorder expands the IR and FOLD shrinks it.
133 /* Some local macros to save typing. Undef'd at the end. */
134 #define IR(ref) (&J->cur.ir[(ref)])
135 #define fins (&J->fold.ins)
136 #define fleft (&J->fold.left)
137 #define fright (&J->fold.right)
138 #define knumleft (ir_knum(fleft)->n)
139 #define knumright (ir_knum(fright)->n)
141 /* Pass IR on to next optimization in chain (FOLD). */
142 #define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
144 /* Fold function type. Fastcall on x86 significantly reduces their size. */
145 typedef IRRef (LJ_FASTCALL *FoldFunc)(jit_State *J);
147 /* Macros for the fold specs, so buildvm can recognize them. */
148 #define LJFOLD(x)
149 #define LJFOLDX(x)
150 #define LJFOLDF(name) static TRef LJ_FASTCALL fold_##name(jit_State *J)
151 /* Note: They must be at the start of a line or buildvm ignores them! */
153 /* Barrier to prevent using operands across PHIs. */
154 #define PHIBARRIER(ir) if (irt_isphi((ir)->t)) return NEXTFOLD
156 /* Barrier to prevent folding across a GC step.
157 ** GC steps can only happen at the head of a trace and at LOOP.
158 ** And the GC is only driven forward if there is at least one allocation.
160 #define gcstep_barrier(J, ref) \
161 ((ref) < J->chain[IR_LOOP] && \
162 (J->chain[IR_SNEW] || J->chain[IR_XSNEW] || \
163 J->chain[IR_TNEW] || J->chain[IR_TDUP] || \
164 J->chain[IR_CNEW] || J->chain[IR_CNEWI] || J->chain[IR_TOSTR]))
166 /* -- Constant folding for FP numbers ------------------------------------- */
168 LJFOLD(ADD KNUM KNUM)
169 LJFOLD(SUB KNUM KNUM)
170 LJFOLD(MUL KNUM KNUM)
171 LJFOLD(DIV KNUM KNUM)
172 LJFOLD(NEG KNUM KNUM)
173 LJFOLD(ABS KNUM KNUM)
174 LJFOLD(ATAN2 KNUM KNUM)
175 LJFOLD(LDEXP KNUM KNUM)
176 LJFOLD(MIN KNUM KNUM)
177 LJFOLD(MAX KNUM KNUM)
178 LJFOLDF(kfold_numarith)
180 lua_Number a = knumleft;
181 lua_Number b = knumright;
182 lua_Number y = lj_vm_foldarith(a, b, fins->o - IR_ADD);
183 return lj_ir_knum(J, y);
186 LJFOLD(LDEXP KNUM KINT)
187 LJFOLDF(kfold_ldexp)
189 #if LJ_TARGET_X86ORX64
190 UNUSED(J);
191 return NEXTFOLD;
192 #else
193 return lj_ir_knum(J, ldexp(knumleft, fright->i));
194 #endif
197 LJFOLD(FPMATH KNUM any)
198 LJFOLDF(kfold_fpmath)
200 lua_Number a = knumleft;
201 lua_Number y = lj_vm_foldfpm(a, fins->op2);
202 return lj_ir_knum(J, y);
205 LJFOLD(POW KNUM KINT)
206 LJFOLDF(kfold_numpow)
208 lua_Number a = knumleft;
209 lua_Number b = (lua_Number)fright->i;
210 lua_Number y = lj_vm_foldarith(a, b, IR_POW - IR_ADD);
211 return lj_ir_knum(J, y);
214 /* Must not use kfold_kref for numbers (could be NaN). */
215 LJFOLD(EQ KNUM KNUM)
216 LJFOLD(NE KNUM KNUM)
217 LJFOLD(LT KNUM KNUM)
218 LJFOLD(GE KNUM KNUM)
219 LJFOLD(LE KNUM KNUM)
220 LJFOLD(GT KNUM KNUM)
221 LJFOLD(ULT KNUM KNUM)
222 LJFOLD(UGE KNUM KNUM)
223 LJFOLD(ULE KNUM KNUM)
224 LJFOLD(UGT KNUM KNUM)
225 LJFOLDF(kfold_numcomp)
227 return CONDFOLD(lj_ir_numcmp(knumleft, knumright, (IROp)fins->o));
230 /* -- Constant folding for 32 bit integers -------------------------------- */
232 static int32_t kfold_intop(int32_t k1, int32_t k2, IROp op)
234 switch (op) {
235 case IR_ADD: k1 += k2; break;
236 case IR_SUB: k1 -= k2; break;
237 case IR_MUL: k1 *= k2; break;
238 case IR_MOD: k1 = lj_vm_modi(k1, k2); break;
239 case IR_NEG: k1 = -k1; break;
240 case IR_BAND: k1 &= k2; break;
241 case IR_BOR: k1 |= k2; break;
242 case IR_BXOR: k1 ^= k2; break;
243 case IR_BSHL: k1 <<= (k2 & 31); break;
244 case IR_BSHR: k1 = (int32_t)((uint32_t)k1 >> (k2 & 31)); break;
245 case IR_BSAR: k1 >>= (k2 & 31); break;
246 case IR_BROL: k1 = (int32_t)lj_rol((uint32_t)k1, (k2 & 31)); break;
247 case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 31)); break;
248 case IR_MIN: k1 = k1 < k2 ? k1 : k2; break;
249 case IR_MAX: k1 = k1 > k2 ? k1 : k2; break;
250 default: lua_assert(0); break;
252 return k1;
255 LJFOLD(ADD KINT KINT)
256 LJFOLD(SUB KINT KINT)
257 LJFOLD(MUL KINT KINT)
258 LJFOLD(MOD KINT KINT)
259 LJFOLD(NEG KINT KINT)
260 LJFOLD(BAND KINT KINT)
261 LJFOLD(BOR KINT KINT)
262 LJFOLD(BXOR KINT KINT)
263 LJFOLD(BSHL KINT KINT)
264 LJFOLD(BSHR KINT KINT)
265 LJFOLD(BSAR KINT KINT)
266 LJFOLD(BROL KINT KINT)
267 LJFOLD(BROR KINT KINT)
268 LJFOLD(MIN KINT KINT)
269 LJFOLD(MAX KINT KINT)
270 LJFOLDF(kfold_intarith)
272 return INTFOLD(kfold_intop(fleft->i, fright->i, (IROp)fins->o));
275 LJFOLD(ADDOV KINT KINT)
276 LJFOLD(SUBOV KINT KINT)
277 LJFOLD(MULOV KINT KINT)
278 LJFOLDF(kfold_intovarith)
280 lua_Number n = lj_vm_foldarith((lua_Number)fleft->i, (lua_Number)fright->i,
281 fins->o - IR_ADDOV);
282 int32_t k = lj_num2int(n);
283 if (n != (lua_Number)k)
284 return FAILFOLD;
285 return INTFOLD(k);
288 LJFOLD(BNOT KINT)
289 LJFOLDF(kfold_bnot)
291 return INTFOLD(~fleft->i);
294 LJFOLD(BSWAP KINT)
295 LJFOLDF(kfold_bswap)
297 return INTFOLD((int32_t)lj_bswap((uint32_t)fleft->i));
300 LJFOLD(LT KINT KINT)
301 LJFOLD(GE KINT KINT)
302 LJFOLD(LE KINT KINT)
303 LJFOLD(GT KINT KINT)
304 LJFOLD(ULT KINT KINT)
305 LJFOLD(UGE KINT KINT)
306 LJFOLD(ULE KINT KINT)
307 LJFOLD(UGT KINT KINT)
308 LJFOLD(ABC KINT KINT)
309 LJFOLDF(kfold_intcomp)
311 int32_t a = fleft->i, b = fright->i;
312 switch ((IROp)fins->o) {
313 case IR_LT: return CONDFOLD(a < b);
314 case IR_GE: return CONDFOLD(a >= b);
315 case IR_LE: return CONDFOLD(a <= b);
316 case IR_GT: return CONDFOLD(a > b);
317 case IR_ULT: return CONDFOLD((uint32_t)a < (uint32_t)b);
318 case IR_UGE: return CONDFOLD((uint32_t)a >= (uint32_t)b);
319 case IR_ULE: return CONDFOLD((uint32_t)a <= (uint32_t)b);
320 case IR_ABC:
321 case IR_UGT: return CONDFOLD((uint32_t)a > (uint32_t)b);
322 default: lua_assert(0); return FAILFOLD;
326 LJFOLD(UGE any KINT)
327 LJFOLDF(kfold_intcomp0)
329 if (fright->i == 0)
330 return DROPFOLD;
331 return NEXTFOLD;
334 /* -- Constant folding for 64 bit integers -------------------------------- */
336 static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op)
338 switch (op) {
339 #if LJ_64 || LJ_HASFFI
340 case IR_ADD: k1 += k2; break;
341 case IR_SUB: k1 -= k2; break;
342 #endif
343 #if LJ_HASFFI
344 case IR_MUL: k1 *= k2; break;
345 case IR_BAND: k1 &= k2; break;
346 case IR_BOR: k1 |= k2; break;
347 case IR_BXOR: k1 ^= k2; break;
348 #endif
349 default: UNUSED(k2); lua_assert(0); break;
351 return k1;
354 LJFOLD(ADD KINT64 KINT64)
355 LJFOLD(SUB KINT64 KINT64)
356 LJFOLD(MUL KINT64 KINT64)
357 LJFOLD(BAND KINT64 KINT64)
358 LJFOLD(BOR KINT64 KINT64)
359 LJFOLD(BXOR KINT64 KINT64)
360 LJFOLDF(kfold_int64arith)
362 return INT64FOLD(kfold_int64arith(ir_k64(fleft)->u64,
363 ir_k64(fright)->u64, (IROp)fins->o));
366 LJFOLD(DIV KINT64 KINT64)
367 LJFOLD(MOD KINT64 KINT64)
368 LJFOLD(POW KINT64 KINT64)
369 LJFOLDF(kfold_int64arith2)
371 #if LJ_HASFFI
372 uint64_t k1 = ir_k64(fleft)->u64, k2 = ir_k64(fright)->u64;
373 if (irt_isi64(fins->t)) {
374 k1 = fins->o == IR_DIV ? lj_carith_divi64((int64_t)k1, (int64_t)k2) :
375 fins->o == IR_MOD ? lj_carith_modi64((int64_t)k1, (int64_t)k2) :
376 lj_carith_powi64((int64_t)k1, (int64_t)k2);
377 } else {
378 k1 = fins->o == IR_DIV ? lj_carith_divu64(k1, k2) :
379 fins->o == IR_MOD ? lj_carith_modu64(k1, k2) :
380 lj_carith_powu64(k1, k2);
382 return INT64FOLD(k1);
383 #else
384 UNUSED(J); lua_assert(0); return FAILFOLD;
385 #endif
388 LJFOLD(BSHL KINT64 KINT)
389 LJFOLD(BSHR KINT64 KINT)
390 LJFOLD(BSAR KINT64 KINT)
391 LJFOLD(BROL KINT64 KINT)
392 LJFOLD(BROR KINT64 KINT)
393 LJFOLDF(kfold_int64shift)
395 #if LJ_HASFFI || LJ_64
396 uint64_t k = ir_k64(fleft)->u64;
397 int32_t sh = (fright->i & 63);
398 switch ((IROp)fins->o) {
399 case IR_BSHL: k <<= sh; break;
400 #if LJ_HASFFI
401 case IR_BSHR: k >>= sh; break;
402 case IR_BSAR: k = (uint64_t)((int64_t)k >> sh); break;
403 case IR_BROL: k = lj_rol(k, sh); break;
404 case IR_BROR: k = lj_ror(k, sh); break;
405 #endif
406 default: lua_assert(0); break;
408 return INT64FOLD(k);
409 #else
410 UNUSED(J); lua_assert(0); return FAILFOLD;
411 #endif
414 LJFOLD(BNOT KINT64)
415 LJFOLDF(kfold_bnot64)
417 #if LJ_HASFFI
418 return INT64FOLD(~ir_k64(fleft)->u64);
419 #else
420 UNUSED(J); lua_assert(0); return FAILFOLD;
421 #endif
424 LJFOLD(BSWAP KINT64)
425 LJFOLDF(kfold_bswap64)
427 #if LJ_HASFFI
428 return INT64FOLD(lj_bswap64(ir_k64(fleft)->u64));
429 #else
430 UNUSED(J); lua_assert(0); return FAILFOLD;
431 #endif
434 LJFOLD(LT KINT64 KINT64)
435 LJFOLD(GE KINT64 KINT64)
436 LJFOLD(LE KINT64 KINT64)
437 LJFOLD(GT KINT64 KINT64)
438 LJFOLD(ULT KINT64 KINT64)
439 LJFOLD(UGE KINT64 KINT64)
440 LJFOLD(ULE KINT64 KINT64)
441 LJFOLD(UGT KINT64 KINT64)
442 LJFOLDF(kfold_int64comp)
444 #if LJ_HASFFI
445 uint64_t a = ir_k64(fleft)->u64, b = ir_k64(fright)->u64;
446 switch ((IROp)fins->o) {
447 case IR_LT: return CONDFOLD((int64_t)a < (int64_t)b);
448 case IR_GE: return CONDFOLD((int64_t)a >= (int64_t)b);
449 case IR_LE: return CONDFOLD((int64_t)a <= (int64_t)b);
450 case IR_GT: return CONDFOLD((int64_t)a > (int64_t)b);
451 case IR_ULT: return CONDFOLD(a < b);
452 case IR_UGE: return CONDFOLD(a >= b);
453 case IR_ULE: return CONDFOLD(a <= b);
454 case IR_UGT: return CONDFOLD(a > b);
455 default: lua_assert(0); return FAILFOLD;
457 #else
458 UNUSED(J); lua_assert(0); return FAILFOLD;
459 #endif
462 LJFOLD(UGE any KINT64)
463 LJFOLDF(kfold_int64comp0)
465 #if LJ_HASFFI
466 if (ir_k64(fright)->u64 == 0)
467 return DROPFOLD;
468 return NEXTFOLD;
469 #else
470 UNUSED(J); lua_assert(0); return FAILFOLD;
471 #endif
474 /* -- Constant folding for strings ---------------------------------------- */
476 LJFOLD(SNEW KKPTR KINT)
477 LJFOLDF(kfold_snew_kptr)
479 GCstr *s = lj_str_new(J->L, (const char *)ir_kptr(fleft), (size_t)fright->i);
480 return lj_ir_kstr(J, s);
483 LJFOLD(SNEW any KINT)
484 LJFOLDF(kfold_snew_empty)
486 if (fright->i == 0)
487 return lj_ir_kstr(J, &J2G(J)->strempty);
488 return NEXTFOLD;
491 LJFOLD(STRREF KGC KINT)
492 LJFOLDF(kfold_strref)
494 GCstr *str = ir_kstr(fleft);
495 lua_assert((MSize)fright->i <= str->len);
496 return lj_ir_kkptr(J, (char *)strdata(str) + fright->i);
499 LJFOLD(STRREF SNEW any)
500 LJFOLDF(kfold_strref_snew)
502 PHIBARRIER(fleft);
503 if (irref_isk(fins->op2) && fright->i == 0) {
504 return fleft->op1; /* strref(snew(ptr, len), 0) ==> ptr */
505 } else {
506 /* Reassociate: strref(snew(strref(str, a), len), b) ==> strref(str, a+b) */
507 IRIns *ir = IR(fleft->op1);
508 if (ir->o == IR_STRREF) {
509 IRRef1 str = ir->op1; /* IRIns * is not valid across emitir. */
510 PHIBARRIER(ir);
511 fins->op2 = emitir(IRTI(IR_ADD), ir->op2, fins->op2); /* Clobbers fins! */
512 fins->op1 = str;
513 fins->ot = IRT(IR_STRREF, IRT_P32);
514 return RETRYFOLD;
517 return NEXTFOLD;
520 LJFOLD(CALLN CARG IRCALL_lj_str_cmp)
521 LJFOLDF(kfold_strcmp)
523 if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) {
524 GCstr *a = ir_kstr(IR(fleft->op1));
525 GCstr *b = ir_kstr(IR(fleft->op2));
526 return INTFOLD(lj_str_cmp(a, b));
528 return NEXTFOLD;
531 /* -- Constant folding of pointer arithmetic ------------------------------ */
533 LJFOLD(ADD KGC KINT)
534 LJFOLD(ADD KGC KINT64)
535 LJFOLDF(kfold_add_kgc)
537 GCobj *o = ir_kgc(fleft);
538 #if LJ_64
539 ptrdiff_t ofs = (ptrdiff_t)ir_kint64(fright)->u64;
540 #else
541 ptrdiff_t ofs = fright->i;
542 #endif
543 #if LJ_HASFFI
544 if (irt_iscdata(fleft->t)) {
545 CType *ct = ctype_raw(ctype_ctsG(J2G(J)), gco2cd(o)->ctypeid);
546 if (ctype_isnum(ct->info) || ctype_isenum(ct->info) ||
547 ctype_isptr(ct->info) || ctype_isfunc(ct->info) ||
548 ctype_iscomplex(ct->info) || ctype_isvector(ct->info))
549 return lj_ir_kkptr(J, (char *)o + ofs);
551 #endif
552 return lj_ir_kptr(J, (char *)o + ofs);
555 LJFOLD(ADD KPTR KINT)
556 LJFOLD(ADD KPTR KINT64)
557 LJFOLD(ADD KKPTR KINT)
558 LJFOLD(ADD KKPTR KINT64)
559 LJFOLDF(kfold_add_kptr)
561 void *p = ir_kptr(fleft);
562 #if LJ_64
563 ptrdiff_t ofs = (ptrdiff_t)ir_kint64(fright)->u64;
564 #else
565 ptrdiff_t ofs = fright->i;
566 #endif
567 return lj_ir_kptr_(J, fleft->o, (char *)p + ofs);
570 LJFOLD(ADD any KGC)
571 LJFOLD(ADD any KPTR)
572 LJFOLD(ADD any KKPTR)
573 LJFOLDF(kfold_add_kright)
575 if (fleft->o == IR_KINT || fleft->o == IR_KINT64) {
576 IRRef1 tmp = fins->op1; fins->op1 = fins->op2; fins->op2 = tmp;
577 return RETRYFOLD;
579 return NEXTFOLD;
582 /* -- Constant folding of conversions ------------------------------------- */
584 LJFOLD(TOBIT KNUM KNUM)
585 LJFOLDF(kfold_tobit)
587 return INTFOLD(lj_num2bit(knumleft));
590 LJFOLD(CONV KINT IRCONV_NUM_INT)
591 LJFOLDF(kfold_conv_kint_num)
593 return lj_ir_knum(J, (lua_Number)fleft->i);
596 LJFOLD(CONV KINT IRCONV_NUM_U32)
597 LJFOLDF(kfold_conv_kintu32_num)
599 return lj_ir_knum(J, (lua_Number)(uint32_t)fleft->i);
602 LJFOLD(CONV KINT IRCONV_INT_I8)
603 LJFOLD(CONV KINT IRCONV_INT_U8)
604 LJFOLD(CONV KINT IRCONV_INT_I16)
605 LJFOLD(CONV KINT IRCONV_INT_U16)
606 LJFOLDF(kfold_conv_kint_ext)
608 int32_t k = fleft->i;
609 if ((fins->op2 & IRCONV_SRCMASK) == IRT_I8) k = (int8_t)k;
610 else if ((fins->op2 & IRCONV_SRCMASK) == IRT_U8) k = (uint8_t)k;
611 else if ((fins->op2 & IRCONV_SRCMASK) == IRT_I16) k = (int16_t)k;
612 else k = (uint16_t)k;
613 return INTFOLD(k);
616 LJFOLD(CONV KINT IRCONV_I64_INT)
617 LJFOLD(CONV KINT IRCONV_U64_INT)
618 LJFOLD(CONV KINT IRCONV_I64_U32)
619 LJFOLD(CONV KINT IRCONV_U64_U32)
620 LJFOLDF(kfold_conv_kint_i64)
622 if ((fins->op2 & IRCONV_SEXT))
623 return INT64FOLD((uint64_t)(int64_t)fleft->i);
624 else
625 return INT64FOLD((uint64_t)(int64_t)(uint32_t)fleft->i);
628 LJFOLD(CONV KINT64 IRCONV_NUM_I64)
629 LJFOLDF(kfold_conv_kint64_num_i64)
631 return lj_ir_knum(J, (lua_Number)(int64_t)ir_kint64(fleft)->u64);
634 LJFOLD(CONV KINT64 IRCONV_NUM_U64)
635 LJFOLDF(kfold_conv_kint64_num_u64)
637 return lj_ir_knum(J, (lua_Number)ir_kint64(fleft)->u64);
640 LJFOLD(CONV KINT64 IRCONV_INT_I64)
641 LJFOLD(CONV KINT64 IRCONV_U32_I64)
642 LJFOLDF(kfold_conv_kint64_int_i64)
644 return INTFOLD((int32_t)ir_kint64(fleft)->u64);
647 LJFOLD(CONV KNUM IRCONV_INT_NUM)
648 LJFOLDF(kfold_conv_knum_int_num)
650 lua_Number n = knumleft;
651 if (!(fins->op2 & IRCONV_TRUNC)) {
652 int32_t k = lj_num2int(n);
653 if (irt_isguard(fins->t) && n != (lua_Number)k) {
654 /* We're about to create a guard which always fails, like CONV +1.5.
655 ** Some pathological loops cause this during LICM, e.g.:
656 ** local x,k,t = 0,1.5,{1,[1.5]=2}
657 ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end
658 ** assert(x == 300)
660 return FAILFOLD;
662 return INTFOLD(k);
663 } else {
664 return INTFOLD((int32_t)n);
668 LJFOLD(CONV KNUM IRCONV_U32_NUM)
669 LJFOLDF(kfold_conv_knum_u32_num)
671 lua_assert((fins->op2 & IRCONV_TRUNC));
672 #ifdef _MSC_VER
673 { /* Workaround for MSVC bug. */
674 volatile uint32_t u = (uint32_t)knumleft;
675 return INTFOLD((int32_t)u);
677 #else
678 return INTFOLD((int32_t)(uint32_t)knumleft);
679 #endif
682 LJFOLD(CONV KNUM IRCONV_I64_NUM)
683 LJFOLDF(kfold_conv_knum_i64_num)
685 lua_assert((fins->op2 & IRCONV_TRUNC));
686 return INT64FOLD((uint64_t)(int64_t)knumleft);
689 LJFOLD(CONV KNUM IRCONV_U64_NUM)
690 LJFOLDF(kfold_conv_knum_u64_num)
692 lua_assert((fins->op2 & IRCONV_TRUNC));
693 return INT64FOLD(lj_num2u64(knumleft));
696 LJFOLD(TOSTR KNUM)
697 LJFOLDF(kfold_tostr_knum)
699 return lj_ir_kstr(J, lj_str_fromnum(J->L, &knumleft));
702 LJFOLD(TOSTR KINT)
703 LJFOLDF(kfold_tostr_kint)
705 return lj_ir_kstr(J, lj_str_fromint(J->L, fleft->i));
708 LJFOLD(STRTO KGC)
709 LJFOLDF(kfold_strto)
711 TValue n;
712 if (lj_strscan_num(ir_kstr(fleft), &n))
713 return lj_ir_knum(J, numV(&n));
714 return FAILFOLD;
717 /* -- Constant folding of equality checks --------------------------------- */
719 /* Don't constant-fold away FLOAD checks against KNULL. */
720 LJFOLD(EQ FLOAD KNULL)
721 LJFOLD(NE FLOAD KNULL)
722 LJFOLDX(lj_opt_cse)
724 /* But fold all other KNULL compares, since only KNULL is equal to KNULL. */
725 LJFOLD(EQ any KNULL)
726 LJFOLD(NE any KNULL)
727 LJFOLD(EQ KNULL any)
728 LJFOLD(NE KNULL any)
729 LJFOLD(EQ KINT KINT) /* Constants are unique, so same refs <==> same value. */
730 LJFOLD(NE KINT KINT)
731 LJFOLD(EQ KINT64 KINT64)
732 LJFOLD(NE KINT64 KINT64)
733 LJFOLD(EQ KGC KGC)
734 LJFOLD(NE KGC KGC)
735 LJFOLDF(kfold_kref)
737 return CONDFOLD((fins->op1 == fins->op2) ^ (fins->o == IR_NE));
740 /* -- Algebraic shortcuts ------------------------------------------------- */
742 LJFOLD(FPMATH FPMATH IRFPM_FLOOR)
743 LJFOLD(FPMATH FPMATH IRFPM_CEIL)
744 LJFOLD(FPMATH FPMATH IRFPM_TRUNC)
745 LJFOLDF(shortcut_round)
747 IRFPMathOp op = (IRFPMathOp)fleft->op2;
748 if (op == IRFPM_FLOOR || op == IRFPM_CEIL || op == IRFPM_TRUNC)
749 return LEFTFOLD; /* round(round_left(x)) = round_left(x) */
750 return NEXTFOLD;
753 LJFOLD(ABS ABS KNUM)
754 LJFOLDF(shortcut_left)
756 return LEFTFOLD; /* f(g(x)) ==> g(x) */
759 LJFOLD(ABS NEG KNUM)
760 LJFOLDF(shortcut_dropleft)
762 PHIBARRIER(fleft);
763 fins->op1 = fleft->op1; /* abs(neg(x)) ==> abs(x) */
764 return RETRYFOLD;
767 /* Note: no safe shortcuts with STRTO and TOSTR ("1e2" ==> +100 ==> "100"). */
768 LJFOLD(NEG NEG any)
769 LJFOLD(BNOT BNOT)
770 LJFOLD(BSWAP BSWAP)
771 LJFOLDF(shortcut_leftleft)
773 PHIBARRIER(fleft); /* See above. Fold would be ok, but not beneficial. */
774 return fleft->op1; /* f(g(x)) ==> x */
777 /* -- FP algebraic simplifications ---------------------------------------- */
779 /* FP arithmetic is tricky -- there's not much to simplify.
780 ** Please note the following common pitfalls before sending "improvements":
781 ** x+0 ==> x is INVALID for x=-0
782 ** 0-x ==> -x is INVALID for x=+0
783 ** x*0 ==> 0 is INVALID for x=-0, x=+-Inf or x=NaN
786 LJFOLD(ADD NEG any)
787 LJFOLDF(simplify_numadd_negx)
789 PHIBARRIER(fleft);
790 fins->o = IR_SUB; /* (-a) + b ==> b - a */
791 fins->op1 = fins->op2;
792 fins->op2 = fleft->op1;
793 return RETRYFOLD;
796 LJFOLD(ADD any NEG)
797 LJFOLDF(simplify_numadd_xneg)
799 PHIBARRIER(fright);
800 fins->o = IR_SUB; /* a + (-b) ==> a - b */
801 fins->op2 = fright->op1;
802 return RETRYFOLD;
805 LJFOLD(SUB any KNUM)
806 LJFOLDF(simplify_numsub_k)
808 lua_Number n = knumright;
809 if (n == 0.0) /* x - (+-0) ==> x */
810 return LEFTFOLD;
811 return NEXTFOLD;
814 LJFOLD(SUB NEG KNUM)
815 LJFOLDF(simplify_numsub_negk)
817 PHIBARRIER(fleft);
818 fins->op2 = fleft->op1; /* (-x) - k ==> (-k) - x */
819 fins->op1 = (IRRef1)lj_ir_knum(J, -knumright);
820 return RETRYFOLD;
823 LJFOLD(SUB any NEG)
824 LJFOLDF(simplify_numsub_xneg)
826 PHIBARRIER(fright);
827 fins->o = IR_ADD; /* a - (-b) ==> a + b */
828 fins->op2 = fright->op1;
829 return RETRYFOLD;
832 LJFOLD(MUL any KNUM)
833 LJFOLD(DIV any KNUM)
834 LJFOLDF(simplify_nummuldiv_k)
836 lua_Number n = knumright;
837 if (n == 1.0) { /* x o 1 ==> x */
838 return LEFTFOLD;
839 } else if (n == -1.0) { /* x o -1 ==> -x */
840 fins->o = IR_NEG;
841 fins->op2 = (IRRef1)lj_ir_knum_neg(J);
842 return RETRYFOLD;
843 } else if (fins->o == IR_MUL && n == 2.0) { /* x * 2 ==> x + x */
844 fins->o = IR_ADD;
845 fins->op2 = fins->op1;
846 return RETRYFOLD;
847 } else if (fins->o == IR_DIV) { /* x / 2^k ==> x * 2^-k */
848 uint64_t u = ir_knum(fright)->u64;
849 uint32_t ex = ((uint32_t)(u >> 52) & 0x7ff);
850 if ((u & U64x(000fffff,ffffffff)) == 0 && ex - 1 < 0x7fd) {
851 u = (u & ((uint64_t)1 << 63)) | ((uint64_t)(0x7fe - ex) << 52);
852 fins->o = IR_MUL; /* Multiply by exact reciprocal. */
853 fins->op2 = lj_ir_knum_u64(J, u);
854 return RETRYFOLD;
857 return NEXTFOLD;
860 LJFOLD(MUL NEG KNUM)
861 LJFOLD(DIV NEG KNUM)
862 LJFOLDF(simplify_nummuldiv_negk)
864 PHIBARRIER(fleft);
865 fins->op1 = fleft->op1; /* (-a) o k ==> a o (-k) */
866 fins->op2 = (IRRef1)lj_ir_knum(J, -knumright);
867 return RETRYFOLD;
870 LJFOLD(MUL NEG NEG)
871 LJFOLD(DIV NEG NEG)
872 LJFOLDF(simplify_nummuldiv_negneg)
874 PHIBARRIER(fleft);
875 PHIBARRIER(fright);
876 fins->op1 = fleft->op1; /* (-a) o (-b) ==> a o b */
877 fins->op2 = fright->op1;
878 return RETRYFOLD;
881 LJFOLD(POW any KINT)
882 LJFOLDF(simplify_numpow_xk)
884 int32_t k = fright->i;
885 TRef ref = fins->op1;
886 if (k == 0) /* x ^ 0 ==> 1 */
887 return lj_ir_knum_one(J); /* Result must be a number, not an int. */
888 if (k == 1) /* x ^ 1 ==> x */
889 return LEFTFOLD;
890 if ((uint32_t)(k+65536) > 2*65536u) /* Limit code explosion. */
891 return NEXTFOLD;
892 if (k < 0) { /* x ^ (-k) ==> (1/x) ^ k. */
893 ref = emitir(IRTN(IR_DIV), lj_ir_knum_one(J), ref);
894 k = -k;
896 /* Unroll x^k for 1 <= k <= 65536. */
897 for (; (k & 1) == 0; k >>= 1) /* Handle leading zeros. */
898 ref = emitir(IRTN(IR_MUL), ref, ref);
899 if ((k >>= 1) != 0) { /* Handle trailing bits. */
900 TRef tmp = emitir(IRTN(IR_MUL), ref, ref);
901 for (; k != 1; k >>= 1) {
902 if (k & 1)
903 ref = emitir(IRTN(IR_MUL), ref, tmp);
904 tmp = emitir(IRTN(IR_MUL), tmp, tmp);
906 ref = emitir(IRTN(IR_MUL), ref, tmp);
908 return ref;
911 LJFOLD(POW KNUM any)
912 LJFOLDF(simplify_numpow_kx)
914 lua_Number n = knumleft;
915 if (n == 2.0) { /* 2.0 ^ i ==> ldexp(1.0, tonum(i)) */
916 fins->o = IR_CONV;
917 #if LJ_TARGET_X86ORX64
918 fins->op1 = fins->op2;
919 fins->op2 = IRCONV_NUM_INT;
920 fins->op2 = (IRRef1)lj_opt_fold(J);
921 #endif
922 fins->op1 = (IRRef1)lj_ir_knum_one(J);
923 fins->o = IR_LDEXP;
924 return RETRYFOLD;
926 return NEXTFOLD;
929 /* -- Simplify conversions ------------------------------------------------ */
931 LJFOLD(CONV CONV IRCONV_NUM_INT) /* _NUM */
932 LJFOLDF(shortcut_conv_num_int)
934 PHIBARRIER(fleft);
935 /* Only safe with a guarded conversion to int. */
936 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_NUM && irt_isguard(fleft->t))
937 return fleft->op1; /* f(g(x)) ==> x */
938 return NEXTFOLD;
941 LJFOLD(CONV CONV IRCONV_INT_NUM) /* _INT */
942 LJFOLD(CONV CONV IRCONV_U32_NUM) /* _U32*/
943 LJFOLDF(simplify_conv_int_num)
945 /* Fold even across PHI to avoid expensive num->int conversions in loop. */
946 if ((fleft->op2 & IRCONV_SRCMASK) ==
947 ((fins->op2 & IRCONV_DSTMASK) >> IRCONV_DSH))
948 return fleft->op1;
949 return NEXTFOLD;
952 LJFOLD(CONV CONV IRCONV_I64_NUM) /* _INT or _U32 */
953 LJFOLD(CONV CONV IRCONV_U64_NUM) /* _INT or _U32 */
954 LJFOLDF(simplify_conv_i64_num)
956 PHIBARRIER(fleft);
957 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT) {
958 /* Reduce to a sign-extension. */
959 fins->op1 = fleft->op1;
960 fins->op2 = ((IRT_I64<<5)|IRT_INT|IRCONV_SEXT);
961 return RETRYFOLD;
962 } else if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) {
963 #if LJ_TARGET_X64
964 return fleft->op1;
965 #else
966 /* Reduce to a zero-extension. */
967 fins->op1 = fleft->op1;
968 fins->op2 = (IRT_I64<<5)|IRT_U32;
969 return RETRYFOLD;
970 #endif
972 return NEXTFOLD;
975 LJFOLD(CONV CONV IRCONV_INT_I64) /* _INT or _U32 */
976 LJFOLD(CONV CONV IRCONV_INT_U64) /* _INT or _U32 */
977 LJFOLD(CONV CONV IRCONV_U32_I64) /* _INT or _U32 */
978 LJFOLD(CONV CONV IRCONV_U32_U64) /* _INT or _U32 */
979 LJFOLDF(simplify_conv_int_i64)
981 int src;
982 PHIBARRIER(fleft);
983 src = (fleft->op2 & IRCONV_SRCMASK);
984 if (src == IRT_INT || src == IRT_U32) {
985 if (src == ((fins->op2 & IRCONV_DSTMASK) >> IRCONV_DSH)) {
986 return fleft->op1;
987 } else {
988 fins->op2 = ((fins->op2 & IRCONV_DSTMASK) | src);
989 fins->op1 = fleft->op1;
990 return RETRYFOLD;
993 return NEXTFOLD;
996 LJFOLD(CONV CONV IRCONV_FLOAT_NUM) /* _FLOAT */
997 LJFOLDF(simplify_conv_flt_num)
999 PHIBARRIER(fleft);
1000 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_FLOAT)
1001 return fleft->op1;
1002 return NEXTFOLD;
1005 /* Shortcut TOBIT + IRT_NUM <- IRT_INT/IRT_U32 conversion. */
1006 LJFOLD(TOBIT CONV KNUM)
1007 LJFOLDF(simplify_tobit_conv)
1009 /* Fold even across PHI to avoid expensive num->int conversions in loop. */
1010 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT) {
1011 lua_assert(irt_isnum(fleft->t));
1012 return fleft->op1;
1013 } else if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) {
1014 lua_assert(irt_isnum(fleft->t));
1015 fins->o = IR_CONV;
1016 fins->op1 = fleft->op1;
1017 fins->op2 = (IRT_INT<<5)|IRT_U32;
1018 return RETRYFOLD;
1020 return NEXTFOLD;
1023 /* Shortcut floor/ceil/round + IRT_NUM <- IRT_INT/IRT_U32 conversion. */
1024 LJFOLD(FPMATH CONV IRFPM_FLOOR)
1025 LJFOLD(FPMATH CONV IRFPM_CEIL)
1026 LJFOLD(FPMATH CONV IRFPM_TRUNC)
1027 LJFOLDF(simplify_floor_conv)
1029 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT ||
1030 (fleft->op2 & IRCONV_SRCMASK) == IRT_U32)
1031 return LEFTFOLD;
1032 return NEXTFOLD;
1035 /* Strength reduction of widening. */
1036 LJFOLD(CONV any IRCONV_I64_INT)
1037 LJFOLD(CONV any IRCONV_U64_INT)
1038 LJFOLDF(simplify_conv_sext)
1040 IRRef ref = fins->op1;
1041 int64_t ofs = 0;
1042 if (!(fins->op2 & IRCONV_SEXT))
1043 return NEXTFOLD;
1044 PHIBARRIER(fleft);
1045 if (fleft->o == IR_XLOAD && (irt_isu8(fleft->t) || irt_isu16(fleft->t)))
1046 goto ok_reduce;
1047 if (fleft->o == IR_ADD && irref_isk(fleft->op2)) {
1048 ofs = (int64_t)IR(fleft->op2)->i;
1049 ref = fleft->op1;
1051 /* Use scalar evolution analysis results to strength-reduce sign-extension. */
1052 if (ref == J->scev.idx) {
1053 IRRef lo = J->scev.dir ? J->scev.start : J->scev.stop;
1054 lua_assert(irt_isint(J->scev.t));
1055 if (lo && IR(lo)->o == IR_KINT && IR(lo)->i + ofs >= 0) {
1056 ok_reduce:
1057 #if LJ_TARGET_X64
1058 /* Eliminate widening. All 32 bit ops do an implicit zero-extension. */
1059 return LEFTFOLD;
1060 #else
1061 /* Reduce to a (cheaper) zero-extension. */
1062 fins->op2 &= ~IRCONV_SEXT;
1063 return RETRYFOLD;
1064 #endif
1067 return NEXTFOLD;
1070 /* Strength reduction of narrowing. */
1071 LJFOLD(CONV ADD IRCONV_INT_I64)
1072 LJFOLD(CONV SUB IRCONV_INT_I64)
1073 LJFOLD(CONV MUL IRCONV_INT_I64)
1074 LJFOLD(CONV ADD IRCONV_INT_U64)
1075 LJFOLD(CONV SUB IRCONV_INT_U64)
1076 LJFOLD(CONV MUL IRCONV_INT_U64)
1077 LJFOLD(CONV ADD IRCONV_U32_I64)
1078 LJFOLD(CONV SUB IRCONV_U32_I64)
1079 LJFOLD(CONV MUL IRCONV_U32_I64)
1080 LJFOLD(CONV ADD IRCONV_U32_U64)
1081 LJFOLD(CONV SUB IRCONV_U32_U64)
1082 LJFOLD(CONV MUL IRCONV_U32_U64)
1083 LJFOLDF(simplify_conv_narrow)
1085 IROp op = (IROp)fleft->o;
1086 IRType t = irt_type(fins->t);
1087 IRRef op1 = fleft->op1, op2 = fleft->op2, mode = fins->op2;
1088 PHIBARRIER(fleft);
1089 op1 = emitir(IRTI(IR_CONV), op1, mode);
1090 op2 = emitir(IRTI(IR_CONV), op2, mode);
1091 fins->ot = IRT(op, t);
1092 fins->op1 = op1;
1093 fins->op2 = op2;
1094 return RETRYFOLD;
1097 /* Special CSE rule for CONV. */
1098 LJFOLD(CONV any any)
1099 LJFOLDF(cse_conv)
1101 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
1102 IRRef op1 = fins->op1, op2 = (fins->op2 & IRCONV_MODEMASK);
1103 uint8_t guard = irt_isguard(fins->t);
1104 IRRef ref = J->chain[IR_CONV];
1105 while (ref > op1) {
1106 IRIns *ir = IR(ref);
1107 /* Commoning with stronger checks is ok. */
1108 if (ir->op1 == op1 && (ir->op2 & IRCONV_MODEMASK) == op2 &&
1109 irt_isguard(ir->t) >= guard)
1110 return ref;
1111 ref = ir->prev;
1114 return EMITFOLD; /* No fallthrough to regular CSE. */
1117 /* FP conversion narrowing. */
1118 LJFOLD(TOBIT ADD KNUM)
1119 LJFOLD(TOBIT SUB KNUM)
1120 LJFOLD(CONV ADD IRCONV_INT_NUM)
1121 LJFOLD(CONV SUB IRCONV_INT_NUM)
1122 LJFOLD(CONV ADD IRCONV_I64_NUM)
1123 LJFOLD(CONV SUB IRCONV_I64_NUM)
1124 LJFOLDF(narrow_convert)
1126 PHIBARRIER(fleft);
1127 /* Narrowing ignores PHIs and repeating it inside the loop is not useful. */
1128 if (J->chain[IR_LOOP])
1129 return NEXTFOLD;
1130 lua_assert(fins->o != IR_CONV || (fins->op2&IRCONV_CONVMASK) != IRCONV_TOBIT);
1131 return lj_opt_narrow_convert(J);
1134 /* -- Integer algebraic simplifications ----------------------------------- */
1136 LJFOLD(ADD any KINT)
1137 LJFOLD(ADDOV any KINT)
1138 LJFOLD(SUBOV any KINT)
1139 LJFOLDF(simplify_intadd_k)
1141 if (fright->i == 0) /* i o 0 ==> i */
1142 return LEFTFOLD;
1143 return NEXTFOLD;
1146 LJFOLD(MULOV any KINT)
1147 LJFOLDF(simplify_intmul_k)
1149 if (fright->i == 0) /* i * 0 ==> 0 */
1150 return RIGHTFOLD;
1151 if (fright->i == 1) /* i * 1 ==> i */
1152 return LEFTFOLD;
1153 if (fright->i == 2) { /* i * 2 ==> i + i */
1154 fins->o = IR_ADDOV;
1155 fins->op2 = fins->op1;
1156 return RETRYFOLD;
1158 return NEXTFOLD;
1161 LJFOLD(SUB any KINT)
1162 LJFOLDF(simplify_intsub_k)
1164 if (fright->i == 0) /* i - 0 ==> i */
1165 return LEFTFOLD;
1166 fins->o = IR_ADD; /* i - k ==> i + (-k) */
1167 fins->op2 = (IRRef1)lj_ir_kint(J, -fright->i); /* Overflow for -2^31 ok. */
1168 return RETRYFOLD;
1171 LJFOLD(SUB KINT any)
1172 LJFOLD(SUB KINT64 any)
1173 LJFOLDF(simplify_intsub_kleft)
1175 if (fleft->o == IR_KINT ? (fleft->i == 0) : (ir_kint64(fleft)->u64 == 0)) {
1176 fins->o = IR_NEG; /* 0 - i ==> -i */
1177 fins->op1 = fins->op2;
1178 return RETRYFOLD;
1180 return NEXTFOLD;
1183 LJFOLD(ADD any KINT64)
1184 LJFOLDF(simplify_intadd_k64)
1186 if (ir_kint64(fright)->u64 == 0) /* i + 0 ==> i */
1187 return LEFTFOLD;
1188 return NEXTFOLD;
1191 LJFOLD(SUB any KINT64)
1192 LJFOLDF(simplify_intsub_k64)
1194 uint64_t k = ir_kint64(fright)->u64;
1195 if (k == 0) /* i - 0 ==> i */
1196 return LEFTFOLD;
1197 fins->o = IR_ADD; /* i - k ==> i + (-k) */
1198 fins->op2 = (IRRef1)lj_ir_kint64(J, (uint64_t)-(int64_t)k);
1199 return RETRYFOLD;
1202 static TRef simplify_intmul_k(jit_State *J, int32_t k)
1204 /* Note: many more simplifications are possible, e.g. 2^k1 +- 2^k2.
1205 ** But this is mainly intended for simple address arithmetic.
1206 ** Also it's easier for the backend to optimize the original multiplies.
1208 if (k == 1) { /* i * 1 ==> i */
1209 return LEFTFOLD;
1210 } else if ((k & (k-1)) == 0) { /* i * 2^k ==> i << k */
1211 fins->o = IR_BSHL;
1212 fins->op2 = lj_ir_kint(J, lj_fls((uint32_t)k));
1213 return RETRYFOLD;
1215 return NEXTFOLD;
1218 LJFOLD(MUL any KINT)
1219 LJFOLDF(simplify_intmul_k32)
1221 if (fright->i == 0) /* i * 0 ==> 0 */
1222 return INTFOLD(0);
1223 else if (fright->i > 0)
1224 return simplify_intmul_k(J, fright->i);
1225 return NEXTFOLD;
1228 LJFOLD(MUL any KINT64)
1229 LJFOLDF(simplify_intmul_k64)
1231 if (ir_kint64(fright)->u64 == 0) /* i * 0 ==> 0 */
1232 return INT64FOLD(0);
1233 #if LJ_64
1234 /* NYI: SPLIT for BSHL and 32 bit backend support. */
1235 else if (ir_kint64(fright)->u64 < 0x80000000u)
1236 return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64);
1237 #endif
1238 return NEXTFOLD;
1241 LJFOLD(MOD any KINT)
1242 LJFOLDF(simplify_intmod_k)
1244 int32_t k = fright->i;
1245 lua_assert(k != 0);
1246 if (k > 0 && (k & (k-1)) == 0) { /* i % (2^k) ==> i & (2^k-1) */
1247 fins->o = IR_BAND;
1248 fins->op2 = lj_ir_kint(J, k-1);
1249 return RETRYFOLD;
1251 return NEXTFOLD;
1254 LJFOLD(MOD KINT any)
1255 LJFOLDF(simplify_intmod_kleft)
1257 if (fleft->i == 0)
1258 return INTFOLD(0);
1259 return NEXTFOLD;
1262 LJFOLD(SUB any any)
1263 LJFOLD(SUBOV any any)
1264 LJFOLDF(simplify_intsub)
1266 if (fins->op1 == fins->op2 && !irt_isnum(fins->t)) /* i - i ==> 0 */
1267 return irt_is64(fins->t) ? INT64FOLD(0) : INTFOLD(0);
1268 return NEXTFOLD;
1271 LJFOLD(SUB ADD any)
1272 LJFOLDF(simplify_intsubadd_leftcancel)
1274 if (!irt_isnum(fins->t)) {
1275 PHIBARRIER(fleft);
1276 if (fins->op2 == fleft->op1) /* (i + j) - i ==> j */
1277 return fleft->op2;
1278 if (fins->op2 == fleft->op2) /* (i + j) - j ==> i */
1279 return fleft->op1;
1281 return NEXTFOLD;
1284 LJFOLD(SUB SUB any)
1285 LJFOLDF(simplify_intsubsub_leftcancel)
1287 if (!irt_isnum(fins->t)) {
1288 PHIBARRIER(fleft);
1289 if (fins->op2 == fleft->op1) { /* (i - j) - i ==> 0 - j */
1290 fins->op1 = (IRRef1)lj_ir_kint(J, 0);
1291 fins->op2 = fleft->op2;
1292 return RETRYFOLD;
1295 return NEXTFOLD;
1298 LJFOLD(SUB any SUB)
1299 LJFOLDF(simplify_intsubsub_rightcancel)
1301 if (!irt_isnum(fins->t)) {
1302 PHIBARRIER(fright);
1303 if (fins->op1 == fright->op1) /* i - (i - j) ==> j */
1304 return fright->op2;
1306 return NEXTFOLD;
1309 LJFOLD(SUB any ADD)
1310 LJFOLDF(simplify_intsubadd_rightcancel)
1312 if (!irt_isnum(fins->t)) {
1313 PHIBARRIER(fright);
1314 if (fins->op1 == fright->op1) { /* i - (i + j) ==> 0 - j */
1315 fins->op2 = fright->op2;
1316 fins->op1 = (IRRef1)lj_ir_kint(J, 0);
1317 return RETRYFOLD;
1319 if (fins->op1 == fright->op2) { /* i - (j + i) ==> 0 - j */
1320 fins->op2 = fright->op1;
1321 fins->op1 = (IRRef1)lj_ir_kint(J, 0);
1322 return RETRYFOLD;
1325 return NEXTFOLD;
1328 LJFOLD(SUB ADD ADD)
1329 LJFOLDF(simplify_intsubaddadd_cancel)
1331 if (!irt_isnum(fins->t)) {
1332 PHIBARRIER(fleft);
1333 PHIBARRIER(fright);
1334 if (fleft->op1 == fright->op1) { /* (i + j1) - (i + j2) ==> j1 - j2 */
1335 fins->op1 = fleft->op2;
1336 fins->op2 = fright->op2;
1337 return RETRYFOLD;
1339 if (fleft->op1 == fright->op2) { /* (i + j1) - (j2 + i) ==> j1 - j2 */
1340 fins->op1 = fleft->op2;
1341 fins->op2 = fright->op1;
1342 return RETRYFOLD;
1344 if (fleft->op2 == fright->op1) { /* (j1 + i) - (i + j2) ==> j1 - j2 */
1345 fins->op1 = fleft->op1;
1346 fins->op2 = fright->op2;
1347 return RETRYFOLD;
1349 if (fleft->op2 == fright->op2) { /* (j1 + i) - (j2 + i) ==> j1 - j2 */
1350 fins->op1 = fleft->op1;
1351 fins->op2 = fright->op1;
1352 return RETRYFOLD;
1355 return NEXTFOLD;
1358 LJFOLD(BAND any KINT)
1359 LJFOLD(BAND any KINT64)
1360 LJFOLDF(simplify_band_k)
1362 int64_t k = fright->o == IR_KINT ? (int64_t)fright->i :
1363 (int64_t)ir_k64(fright)->u64;
1364 if (k == 0) /* i & 0 ==> 0 */
1365 return RIGHTFOLD;
1366 if (k == -1) /* i & -1 ==> i */
1367 return LEFTFOLD;
1368 return NEXTFOLD;
1371 LJFOLD(BOR any KINT)
1372 LJFOLD(BOR any KINT64)
1373 LJFOLDF(simplify_bor_k)
1375 int64_t k = fright->o == IR_KINT ? (int64_t)fright->i :
1376 (int64_t)ir_k64(fright)->u64;
1377 if (k == 0) /* i | 0 ==> i */
1378 return LEFTFOLD;
1379 if (k == -1) /* i | -1 ==> -1 */
1380 return RIGHTFOLD;
1381 return NEXTFOLD;
1384 LJFOLD(BXOR any KINT)
1385 LJFOLD(BXOR any KINT64)
1386 LJFOLDF(simplify_bxor_k)
1388 int64_t k = fright->o == IR_KINT ? (int64_t)fright->i :
1389 (int64_t)ir_k64(fright)->u64;
1390 if (k == 0) /* i xor 0 ==> i */
1391 return LEFTFOLD;
1392 if (k == -1) { /* i xor -1 ==> ~i */
1393 fins->o = IR_BNOT;
1394 fins->op2 = 0;
1395 return RETRYFOLD;
1397 return NEXTFOLD;
1400 LJFOLD(BSHL any KINT)
1401 LJFOLD(BSHR any KINT)
1402 LJFOLD(BSAR any KINT)
1403 LJFOLD(BROL any KINT)
1404 LJFOLD(BROR any KINT)
1405 LJFOLDF(simplify_shift_ik)
1407 int32_t mask = irt_is64(fins->t) ? 63 : 31;
1408 int32_t k = (fright->i & mask);
1409 if (k == 0) /* i o 0 ==> i */
1410 return LEFTFOLD;
1411 if (k == 1 && fins->o == IR_BSHL) { /* i << 1 ==> i + i */
1412 fins->o = IR_ADD;
1413 fins->op2 = fins->op1;
1414 return RETRYFOLD;
1416 if (k != fright->i) { /* i o k ==> i o (k & mask) */
1417 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1418 return RETRYFOLD;
1420 #ifndef LJ_TARGET_UNIFYROT
1421 if (fins->o == IR_BROR) { /* bror(i, k) ==> brol(i, (-k)&mask) */
1422 fins->o = IR_BROL;
1423 fins->op2 = (IRRef1)lj_ir_kint(J, (-k)&mask);
1424 return RETRYFOLD;
1426 #endif
1427 return NEXTFOLD;
1430 LJFOLD(BSHL any BAND)
1431 LJFOLD(BSHR any BAND)
1432 LJFOLD(BSAR any BAND)
1433 LJFOLD(BROL any BAND)
1434 LJFOLD(BROR any BAND)
1435 LJFOLDF(simplify_shift_andk)
1437 IRIns *irk = IR(fright->op2);
1438 PHIBARRIER(fright);
1439 if ((fins->o < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) &&
1440 irk->o == IR_KINT) { /* i o (j & mask) ==> i o j */
1441 int32_t mask = irt_is64(fins->t) ? 63 : 31;
1442 int32_t k = irk->i & mask;
1443 if (k == mask) {
1444 fins->op2 = fright->op1;
1445 return RETRYFOLD;
1448 return NEXTFOLD;
1451 LJFOLD(BSHL KINT any)
1452 LJFOLD(BSHR KINT any)
1453 LJFOLD(BSHL KINT64 any)
1454 LJFOLD(BSHR KINT64 any)
1455 LJFOLDF(simplify_shift1_ki)
1457 int64_t k = fleft->o == IR_KINT ? (int64_t)fleft->i :
1458 (int64_t)ir_k64(fleft)->u64;
1459 if (k == 0) /* 0 o i ==> 0 */
1460 return LEFTFOLD;
1461 return NEXTFOLD;
1464 LJFOLD(BSAR KINT any)
1465 LJFOLD(BROL KINT any)
1466 LJFOLD(BROR KINT any)
1467 LJFOLD(BSAR KINT64 any)
1468 LJFOLD(BROL KINT64 any)
1469 LJFOLD(BROR KINT64 any)
1470 LJFOLDF(simplify_shift2_ki)
1472 int64_t k = fleft->o == IR_KINT ? (int64_t)fleft->i :
1473 (int64_t)ir_k64(fleft)->u64;
1474 if (k == 0 || k == -1) /* 0 o i ==> 0; -1 o i ==> -1 */
1475 return LEFTFOLD;
1476 return NEXTFOLD;
1479 LJFOLD(BSHL BAND KINT)
1480 LJFOLD(BSHR BAND KINT)
1481 LJFOLD(BROL BAND KINT)
1482 LJFOLD(BROR BAND KINT)
1483 LJFOLDF(simplify_shiftk_andk)
1485 IRIns *irk = IR(fleft->op2);
1486 PHIBARRIER(fleft);
1487 if (irk->o == IR_KINT) { /* (i & k1) o k2 ==> (i o k2) & (k1 o k2) */
1488 int32_t k = kfold_intop(irk->i, fright->i, (IROp)fins->o);
1489 fins->op1 = fleft->op1;
1490 fins->op1 = (IRRef1)lj_opt_fold(J);
1491 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1492 fins->ot = IRTI(IR_BAND);
1493 return RETRYFOLD;
1495 return NEXTFOLD;
1498 LJFOLD(BAND BSHL KINT)
1499 LJFOLD(BAND BSHR KINT)
1500 LJFOLDF(simplify_andk_shiftk)
1502 IRIns *irk = IR(fleft->op2);
1503 if (irk->o == IR_KINT &&
1504 kfold_intop(-1, irk->i, (IROp)fleft->o) == fright->i)
1505 return LEFTFOLD; /* (i o k1) & k2 ==> i, if (-1 o k1) == k2 */
1506 return NEXTFOLD;
1509 /* -- Reassociation ------------------------------------------------------- */
1511 LJFOLD(ADD ADD KINT)
1512 LJFOLD(MUL MUL KINT)
1513 LJFOLD(BAND BAND KINT)
1514 LJFOLD(BOR BOR KINT)
1515 LJFOLD(BXOR BXOR KINT)
1516 LJFOLDF(reassoc_intarith_k)
1518 IRIns *irk = IR(fleft->op2);
1519 if (irk->o == IR_KINT) {
1520 int32_t k = kfold_intop(irk->i, fright->i, (IROp)fins->o);
1521 if (k == irk->i) /* (i o k1) o k2 ==> i o k1, if (k1 o k2) == k1. */
1522 return LEFTFOLD;
1523 PHIBARRIER(fleft);
1524 fins->op1 = fleft->op1;
1525 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1526 return RETRYFOLD; /* (i o k1) o k2 ==> i o (k1 o k2) */
1528 return NEXTFOLD;
1531 LJFOLD(ADD ADD KINT64)
1532 LJFOLD(MUL MUL KINT64)
1533 LJFOLD(BAND BAND KINT64)
1534 LJFOLD(BOR BOR KINT64)
1535 LJFOLD(BXOR BXOR KINT64)
1536 LJFOLDF(reassoc_intarith_k64)
1538 #if LJ_HASFFI || LJ_64
1539 IRIns *irk = IR(fleft->op2);
1540 if (irk->o == IR_KINT64) {
1541 uint64_t k = kfold_int64arith(ir_k64(irk)->u64,
1542 ir_k64(fright)->u64, (IROp)fins->o);
1543 PHIBARRIER(fleft);
1544 fins->op1 = fleft->op1;
1545 fins->op2 = (IRRef1)lj_ir_kint64(J, k);
1546 return RETRYFOLD; /* (i o k1) o k2 ==> i o (k1 o k2) */
1548 return NEXTFOLD;
1549 #else
1550 UNUSED(J); lua_assert(0); return FAILFOLD;
1551 #endif
1554 LJFOLD(MIN MIN any)
1555 LJFOLD(MAX MAX any)
1556 LJFOLD(BAND BAND any)
1557 LJFOLD(BOR BOR any)
1558 LJFOLDF(reassoc_dup)
1560 if (fins->op2 == fleft->op1 || fins->op2 == fleft->op2)
1561 return LEFTFOLD; /* (a o b) o a ==> a o b; (a o b) o b ==> a o b */
1562 return NEXTFOLD;
1565 LJFOLD(BXOR BXOR any)
1566 LJFOLDF(reassoc_bxor)
1568 PHIBARRIER(fleft);
1569 if (fins->op2 == fleft->op1) /* (a xor b) xor a ==> b */
1570 return fleft->op2;
1571 if (fins->op2 == fleft->op2) /* (a xor b) xor b ==> a */
1572 return fleft->op1;
1573 return NEXTFOLD;
1576 LJFOLD(BSHL BSHL KINT)
1577 LJFOLD(BSHR BSHR KINT)
1578 LJFOLD(BSAR BSAR KINT)
1579 LJFOLD(BROL BROL KINT)
1580 LJFOLD(BROR BROR KINT)
1581 LJFOLDF(reassoc_shift)
1583 IRIns *irk = IR(fleft->op2);
1584 PHIBARRIER(fleft); /* The (shift any KINT) rule covers k2 == 0 and more. */
1585 if (irk->o == IR_KINT) { /* (i o k1) o k2 ==> i o (k1 + k2) */
1586 int32_t mask = irt_is64(fins->t) ? 63 : 31;
1587 int32_t k = (irk->i & mask) + (fright->i & mask);
1588 if (k > mask) { /* Combined shift too wide? */
1589 if (fins->o == IR_BSHL || fins->o == IR_BSHR)
1590 return mask == 31 ? INTFOLD(0) : INT64FOLD(0);
1591 else if (fins->o == IR_BSAR)
1592 k = mask;
1593 else
1594 k &= mask;
1596 fins->op1 = fleft->op1;
1597 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1598 return RETRYFOLD;
1600 return NEXTFOLD;
1603 LJFOLD(MIN MIN KNUM)
1604 LJFOLD(MAX MAX KNUM)
1605 LJFOLD(MIN MIN KINT)
1606 LJFOLD(MAX MAX KINT)
1607 LJFOLDF(reassoc_minmax_k)
1609 IRIns *irk = IR(fleft->op2);
1610 if (irk->o == IR_KNUM) {
1611 lua_Number a = ir_knum(irk)->n;
1612 lua_Number y = lj_vm_foldarith(a, knumright, fins->o - IR_ADD);
1613 if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */
1614 return LEFTFOLD;
1615 PHIBARRIER(fleft);
1616 fins->op1 = fleft->op1;
1617 fins->op2 = (IRRef1)lj_ir_knum(J, y);
1618 return RETRYFOLD; /* (x o k1) o k2 ==> x o (k1 o k2) */
1619 } else if (irk->o == IR_KINT) {
1620 int32_t a = irk->i;
1621 int32_t y = kfold_intop(a, fright->i, fins->o);
1622 if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */
1623 return LEFTFOLD;
1624 PHIBARRIER(fleft);
1625 fins->op1 = fleft->op1;
1626 fins->op2 = (IRRef1)lj_ir_kint(J, y);
1627 return RETRYFOLD; /* (x o k1) o k2 ==> x o (k1 o k2) */
1629 return NEXTFOLD;
1632 LJFOLD(MIN MAX any)
1633 LJFOLD(MAX MIN any)
1634 LJFOLDF(reassoc_minmax_left)
1636 if (fins->op2 == fleft->op1 || fins->op2 == fleft->op2)
1637 return RIGHTFOLD; /* (b o1 a) o2 b ==> b; (a o1 b) o2 b ==> b */
1638 return NEXTFOLD;
1641 LJFOLD(MIN any MAX)
1642 LJFOLD(MAX any MIN)
1643 LJFOLDF(reassoc_minmax_right)
1645 if (fins->op1 == fright->op1 || fins->op1 == fright->op2)
1646 return LEFTFOLD; /* a o2 (a o1 b) ==> a; a o2 (b o1 a) ==> a */
1647 return NEXTFOLD;
1650 /* -- Array bounds check elimination -------------------------------------- */
1652 /* Eliminate ABC across PHIs to handle t[i-1] forwarding case.
1653 ** ABC(asize, (i+k)+(-k)) ==> ABC(asize, i), but only if it already exists.
1654 ** Could be generalized to (i+k1)+k2 ==> i+(k1+k2), but needs better disambig.
1656 LJFOLD(ABC any ADD)
1657 LJFOLDF(abc_fwd)
1659 if (LJ_LIKELY(J->flags & JIT_F_OPT_ABC)) {
1660 if (irref_isk(fright->op2)) {
1661 IRIns *add2 = IR(fright->op1);
1662 if (add2->o == IR_ADD && irref_isk(add2->op2) &&
1663 IR(fright->op2)->i == -IR(add2->op2)->i) {
1664 IRRef ref = J->chain[IR_ABC];
1665 IRRef lim = add2->op1;
1666 if (fins->op1 > lim) lim = fins->op1;
1667 while (ref > lim) {
1668 IRIns *ir = IR(ref);
1669 if (ir->op1 == fins->op1 && ir->op2 == add2->op1)
1670 return DROPFOLD;
1671 ref = ir->prev;
1676 return NEXTFOLD;
1679 /* Eliminate ABC for constants.
1680 ** ABC(asize, k1), ABC(asize k2) ==> ABC(asize, max(k1, k2))
1681 ** Drop second ABC if k2 is lower. Otherwise patch first ABC with k2.
1683 LJFOLD(ABC any KINT)
1684 LJFOLDF(abc_k)
1686 if (LJ_LIKELY(J->flags & JIT_F_OPT_ABC)) {
1687 IRRef ref = J->chain[IR_ABC];
1688 IRRef asize = fins->op1;
1689 while (ref > asize) {
1690 IRIns *ir = IR(ref);
1691 if (ir->op1 == asize && irref_isk(ir->op2)) {
1692 int32_t k = IR(ir->op2)->i;
1693 if (fright->i > k)
1694 ir->op2 = fins->op2;
1695 return DROPFOLD;
1697 ref = ir->prev;
1699 return EMITFOLD; /* Already performed CSE. */
1701 return NEXTFOLD;
1704 /* Eliminate invariant ABC inside loop. */
1705 LJFOLD(ABC any any)
1706 LJFOLDF(abc_invar)
1708 /* Invariant ABC marked as PTR. Drop if op1 is invariant, too. */
1709 if (!irt_isint(fins->t) && fins->op1 < J->chain[IR_LOOP] &&
1710 !irt_isphi(IR(fins->op1)->t))
1711 return DROPFOLD;
1712 return NEXTFOLD;
1715 /* -- Commutativity ------------------------------------------------------- */
1717 /* The refs of commutative ops are canonicalized. Lower refs go to the right.
1718 ** Rationale behind this:
1719 ** - It (also) moves constants to the right.
1720 ** - It reduces the number of FOLD rules (e.g. (BOR any KINT) suffices).
1721 ** - It helps CSE to find more matches.
1722 ** - The assembler generates better code with constants at the right.
1725 LJFOLD(ADD any any)
1726 LJFOLD(MUL any any)
1727 LJFOLD(ADDOV any any)
1728 LJFOLD(MULOV any any)
1729 LJFOLDF(comm_swap)
1731 if (fins->op1 < fins->op2) { /* Move lower ref to the right. */
1732 IRRef1 tmp = fins->op1;
1733 fins->op1 = fins->op2;
1734 fins->op2 = tmp;
1735 return RETRYFOLD;
1737 return NEXTFOLD;
1740 LJFOLD(EQ any any)
1741 LJFOLD(NE any any)
1742 LJFOLDF(comm_equal)
1744 /* For non-numbers only: x == x ==> drop; x ~= x ==> fail */
1745 if (fins->op1 == fins->op2 && !irt_isnum(fins->t))
1746 return CONDFOLD(fins->o == IR_EQ);
1747 return fold_comm_swap(J);
1750 LJFOLD(LT any any)
1751 LJFOLD(GE any any)
1752 LJFOLD(LE any any)
1753 LJFOLD(GT any any)
1754 LJFOLD(ULT any any)
1755 LJFOLD(UGE any any)
1756 LJFOLD(ULE any any)
1757 LJFOLD(UGT any any)
1758 LJFOLDF(comm_comp)
1760 /* For non-numbers only: x <=> x ==> drop; x <> x ==> fail */
1761 if (fins->op1 == fins->op2 && !irt_isnum(fins->t))
1762 return CONDFOLD((fins->o ^ (fins->o >> 1)) & 1);
1763 if (fins->op1 < fins->op2) { /* Move lower ref to the right. */
1764 IRRef1 tmp = fins->op1;
1765 fins->op1 = fins->op2;
1766 fins->op2 = tmp;
1767 fins->o ^= 3; /* GT <-> LT, GE <-> LE, does not affect U */
1768 return RETRYFOLD;
1770 return NEXTFOLD;
1773 LJFOLD(BAND any any)
1774 LJFOLD(BOR any any)
1775 LJFOLD(MIN any any)
1776 LJFOLD(MAX any any)
1777 LJFOLDF(comm_dup)
1779 if (fins->op1 == fins->op2) /* x o x ==> x */
1780 return LEFTFOLD;
1781 return fold_comm_swap(J);
1784 LJFOLD(BXOR any any)
1785 LJFOLDF(comm_bxor)
1787 if (fins->op1 == fins->op2) /* i xor i ==> 0 */
1788 return irt_is64(fins->t) ? INT64FOLD(0) : INTFOLD(0);
1789 return fold_comm_swap(J);
1792 /* -- Simplification of compound expressions ------------------------------ */
1794 static TRef kfold_xload(jit_State *J, IRIns *ir, const void *p)
1796 int32_t k;
1797 switch (irt_type(ir->t)) {
1798 case IRT_NUM: return lj_ir_knum_u64(J, *(uint64_t *)p);
1799 case IRT_I8: k = (int32_t)*(int8_t *)p; break;
1800 case IRT_U8: k = (int32_t)*(uint8_t *)p; break;
1801 case IRT_I16: k = (int32_t)(int16_t)lj_getu16(p); break;
1802 case IRT_U16: k = (int32_t)(uint16_t)lj_getu16(p); break;
1803 case IRT_INT: case IRT_U32: k = (int32_t)lj_getu32(p); break;
1804 case IRT_I64: case IRT_U64: return lj_ir_kint64(J, *(uint64_t *)p);
1805 default: return 0;
1807 return lj_ir_kint(J, k);
1810 /* Turn: string.sub(str, a, b) == kstr
1811 ** into: string.byte(str, a) == string.byte(kstr, 1) etc.
1812 ** Note: this creates unaligned XLOADs on x86/x64.
1814 LJFOLD(EQ SNEW KGC)
1815 LJFOLD(NE SNEW KGC)
1816 LJFOLDF(merge_eqne_snew_kgc)
1818 GCstr *kstr = ir_kstr(fright);
1819 int32_t len = (int32_t)kstr->len;
1820 lua_assert(irt_isstr(fins->t));
1822 #if LJ_TARGET_UNALIGNED
1823 #define FOLD_SNEW_MAX_LEN 4 /* Handle string lengths 0, 1, 2, 3, 4. */
1824 #define FOLD_SNEW_TYPE8 IRT_I8 /* Creates shorter immediates. */
1825 #else
1826 #define FOLD_SNEW_MAX_LEN 1 /* Handle string lengths 0 or 1. */
1827 #define FOLD_SNEW_TYPE8 IRT_U8 /* Prefer unsigned loads. */
1828 #endif
1830 PHIBARRIER(fleft);
1831 if (len <= FOLD_SNEW_MAX_LEN) {
1832 IROp op = (IROp)fins->o;
1833 IRRef strref = fleft->op1;
1834 if (IR(strref)->o != IR_STRREF)
1835 return NEXTFOLD;
1836 if (op == IR_EQ) {
1837 emitir(IRTGI(IR_EQ), fleft->op2, lj_ir_kint(J, len));
1838 /* Caveat: fins/fleft/fright is no longer valid after emitir. */
1839 } else {
1840 /* NE is not expanded since this would need an OR of two conds. */
1841 if (!irref_isk(fleft->op2)) /* Only handle the constant length case. */
1842 return NEXTFOLD;
1843 if (IR(fleft->op2)->i != len)
1844 return DROPFOLD;
1846 if (len > 0) {
1847 /* A 4 byte load for length 3 is ok -- all strings have an extra NUL. */
1848 uint16_t ot = (uint16_t)(len == 1 ? IRT(IR_XLOAD, FOLD_SNEW_TYPE8) :
1849 len == 2 ? IRT(IR_XLOAD, IRT_U16) :
1850 IRTI(IR_XLOAD));
1851 TRef tmp = emitir(ot, strref,
1852 IRXLOAD_READONLY | (len > 1 ? IRXLOAD_UNALIGNED : 0));
1853 TRef val = kfold_xload(J, IR(tref_ref(tmp)), strdata(kstr));
1854 if (len == 3)
1855 tmp = emitir(IRTI(IR_BAND), tmp,
1856 lj_ir_kint(J, LJ_ENDIAN_SELECT(0x00ffffff, 0xffffff00)));
1857 fins->op1 = (IRRef1)tmp;
1858 fins->op2 = (IRRef1)val;
1859 fins->ot = (IROpT)IRTGI(op);
1860 return RETRYFOLD;
1861 } else {
1862 return DROPFOLD;
1865 return NEXTFOLD;
1868 /* -- Loads --------------------------------------------------------------- */
1870 /* Loads cannot be folded or passed on to CSE in general.
1871 ** Alias analysis is needed to check for forwarding opportunities.
1873 ** Caveat: *all* loads must be listed here or they end up at CSE!
1876 LJFOLD(ALOAD any)
1877 LJFOLDX(lj_opt_fwd_aload)
1879 /* From HREF fwd (see below). Must eliminate, not supported by fwd/backend. */
1880 LJFOLD(HLOAD KKPTR)
1881 LJFOLDF(kfold_hload_kkptr)
1883 UNUSED(J);
1884 lua_assert(ir_kptr(fleft) == niltvg(J2G(J)));
1885 return TREF_NIL;
1888 LJFOLD(HLOAD any)
1889 LJFOLDX(lj_opt_fwd_hload)
1891 LJFOLD(ULOAD any)
1892 LJFOLDX(lj_opt_fwd_uload)
1894 LJFOLD(CALLL any IRCALL_lj_tab_len)
1895 LJFOLDX(lj_opt_fwd_tab_len)
1897 /* Upvalue refs are really loads, but there are no corresponding stores.
1898 ** So CSE is ok for them, except for UREFO across a GC step (see below).
1899 ** If the referenced function is const, its upvalue addresses are const, too.
1900 ** This can be used to improve CSE by looking for the same address,
1901 ** even if the upvalues originate from a different function.
1903 LJFOLD(UREFO KGC any)
1904 LJFOLD(UREFC KGC any)
1905 LJFOLDF(cse_uref)
1907 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
1908 IRRef ref = J->chain[fins->o];
1909 GCfunc *fn = ir_kfunc(fleft);
1910 GCupval *uv = gco2uv(gcref(fn->l.uvptr[(fins->op2 >> 8)]));
1911 while (ref > 0) {
1912 IRIns *ir = IR(ref);
1913 if (irref_isk(ir->op1)) {
1914 GCfunc *fn2 = ir_kfunc(IR(ir->op1));
1915 if (gco2uv(gcref(fn2->l.uvptr[(ir->op2 >> 8)])) == uv) {
1916 if (fins->o == IR_UREFO && gcstep_barrier(J, ref))
1917 break;
1918 return ref;
1921 ref = ir->prev;
1924 return EMITFOLD;
1927 LJFOLD(HREFK any any)
1928 LJFOLDX(lj_opt_fwd_hrefk)
1930 LJFOLD(HREF TNEW any)
1931 LJFOLDF(fwd_href_tnew)
1933 if (lj_opt_fwd_href_nokey(J))
1934 return lj_ir_kkptr(J, niltvg(J2G(J)));
1935 return NEXTFOLD;
1938 LJFOLD(HREF TDUP KPRI)
1939 LJFOLD(HREF TDUP KGC)
1940 LJFOLD(HREF TDUP KNUM)
1941 LJFOLDF(fwd_href_tdup)
1943 TValue keyv;
1944 lj_ir_kvalue(J->L, &keyv, fright);
1945 if (lj_tab_get(J->L, ir_ktab(IR(fleft->op1)), &keyv) == niltvg(J2G(J)) &&
1946 lj_opt_fwd_href_nokey(J))
1947 return lj_ir_kkptr(J, niltvg(J2G(J)));
1948 return NEXTFOLD;
1951 /* We can safely FOLD/CSE array/hash refs and field loads, since there
1952 ** are no corresponding stores. But we need to check for any NEWREF with
1953 ** an aliased table, as it may invalidate all of the pointers and fields.
1954 ** Only HREF needs the NEWREF check -- AREF and HREFK already depend on
1955 ** FLOADs. And NEWREF itself is treated like a store (see below).
1957 LJFOLD(FLOAD TNEW IRFL_TAB_ASIZE)
1958 LJFOLDF(fload_tab_tnew_asize)
1960 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
1961 return INTFOLD(fleft->op1);
1962 return NEXTFOLD;
1965 LJFOLD(FLOAD TNEW IRFL_TAB_HMASK)
1966 LJFOLDF(fload_tab_tnew_hmask)
1968 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
1969 return INTFOLD((1 << fleft->op2)-1);
1970 return NEXTFOLD;
1973 LJFOLD(FLOAD TDUP IRFL_TAB_ASIZE)
1974 LJFOLDF(fload_tab_tdup_asize)
1976 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
1977 return INTFOLD((int32_t)ir_ktab(IR(fleft->op1))->asize);
1978 return NEXTFOLD;
1981 LJFOLD(FLOAD TDUP IRFL_TAB_HMASK)
1982 LJFOLDF(fload_tab_tdup_hmask)
1984 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
1985 return INTFOLD((int32_t)ir_ktab(IR(fleft->op1))->hmask);
1986 return NEXTFOLD;
1989 LJFOLD(HREF any any)
1990 LJFOLD(FLOAD any IRFL_TAB_ARRAY)
1991 LJFOLD(FLOAD any IRFL_TAB_NODE)
1992 LJFOLD(FLOAD any IRFL_TAB_ASIZE)
1993 LJFOLD(FLOAD any IRFL_TAB_HMASK)
1994 LJFOLDF(fload_tab_ah)
1996 TRef tr = lj_opt_cse(J);
1997 return lj_opt_fwd_tptr(J, tref_ref(tr)) ? tr : EMITFOLD;
2000 /* Strings are immutable, so we can safely FOLD/CSE the related FLOAD. */
2001 LJFOLD(FLOAD KGC IRFL_STR_LEN)
2002 LJFOLDF(fload_str_len_kgc)
2004 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
2005 return INTFOLD((int32_t)ir_kstr(fleft)->len);
2006 return NEXTFOLD;
2009 LJFOLD(FLOAD SNEW IRFL_STR_LEN)
2010 LJFOLDF(fload_str_len_snew)
2012 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
2013 PHIBARRIER(fleft);
2014 return fleft->op2;
2016 return NEXTFOLD;
2019 /* The C type ID of cdata objects is immutable. */
2020 LJFOLD(FLOAD KGC IRFL_CDATA_CTYPEID)
2021 LJFOLDF(fload_cdata_typeid_kgc)
2023 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
2024 return INTFOLD((int32_t)ir_kcdata(fleft)->ctypeid);
2025 return NEXTFOLD;
2028 /* Get the contents of immutable cdata objects. */
2029 LJFOLD(FLOAD KGC IRFL_CDATA_PTR)
2030 LJFOLD(FLOAD KGC IRFL_CDATA_INT)
2031 LJFOLD(FLOAD KGC IRFL_CDATA_INT64)
2032 LJFOLDF(fload_cdata_int64_kgc)
2034 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
2035 void *p = cdataptr(ir_kcdata(fleft));
2036 if (irt_is64(fins->t))
2037 return INT64FOLD(*(uint64_t *)p);
2038 else
2039 return INTFOLD(*(int32_t *)p);
2041 return NEXTFOLD;
2044 LJFOLD(FLOAD CNEW IRFL_CDATA_CTYPEID)
2045 LJFOLD(FLOAD CNEWI IRFL_CDATA_CTYPEID)
2046 LJFOLDF(fload_cdata_typeid_cnew)
2048 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
2049 return fleft->op1; /* No PHI barrier needed. CNEW/CNEWI op1 is const. */
2050 return NEXTFOLD;
2053 /* Pointer, int and int64 cdata objects are immutable. */
2054 LJFOLD(FLOAD CNEWI IRFL_CDATA_PTR)
2055 LJFOLD(FLOAD CNEWI IRFL_CDATA_INT)
2056 LJFOLD(FLOAD CNEWI IRFL_CDATA_INT64)
2057 LJFOLDF(fload_cdata_ptr_int64_cnew)
2059 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
2060 return fleft->op2; /* Fold even across PHI to avoid allocations. */
2061 return NEXTFOLD;
2064 LJFOLD(FLOAD any IRFL_STR_LEN)
2065 LJFOLD(FLOAD any IRFL_CDATA_CTYPEID)
2066 LJFOLD(FLOAD any IRFL_CDATA_PTR)
2067 LJFOLD(FLOAD any IRFL_CDATA_INT)
2068 LJFOLD(FLOAD any IRFL_CDATA_INT64)
2069 LJFOLD(VLOAD any any) /* Vararg loads have no corresponding stores. */
2070 LJFOLDX(lj_opt_cse)
2072 /* All other field loads need alias analysis. */
2073 LJFOLD(FLOAD any any)
2074 LJFOLDX(lj_opt_fwd_fload)
2076 /* This is for LOOP only. Recording handles SLOADs internally. */
2077 LJFOLD(SLOAD any any)
2078 LJFOLDF(fwd_sload)
2080 if ((fins->op2 & IRSLOAD_FRAME)) {
2081 TRef tr = lj_opt_cse(J);
2082 return tref_ref(tr) < J->chain[IR_RETF] ? EMITFOLD : tr;
2083 } else {
2084 lua_assert(J->slot[fins->op1] != 0);
2085 return J->slot[fins->op1];
2089 /* Only fold for KKPTR. The pointer _and_ the contents must be const. */
2090 LJFOLD(XLOAD KKPTR any)
2091 LJFOLDF(xload_kptr)
2093 TRef tr = kfold_xload(J, fins, ir_kptr(fleft));
2094 return tr ? tr : NEXTFOLD;
2097 LJFOLD(XLOAD any any)
2098 LJFOLDX(lj_opt_fwd_xload)
2100 /* -- Write barriers ------------------------------------------------------ */
2102 /* Write barriers are amenable to CSE, but not across any incremental
2103 ** GC steps.
2105 ** The same logic applies to open upvalue references, because a stack
2106 ** may be resized during a GC step (not the current stack, but maybe that
2107 ** of a coroutine).
2109 LJFOLD(TBAR any)
2110 LJFOLD(OBAR any any)
2111 LJFOLD(UREFO any any)
2112 LJFOLDF(barrier_tab)
2114 TRef tr = lj_opt_cse(J);
2115 if (gcstep_barrier(J, tref_ref(tr))) /* CSE across GC step? */
2116 return EMITFOLD; /* Raw emit. Assumes fins is left intact by CSE. */
2117 return tr;
2120 LJFOLD(TBAR TNEW)
2121 LJFOLD(TBAR TDUP)
2122 LJFOLDF(barrier_tnew_tdup)
2124 /* New tables are always white and never need a barrier. */
2125 if (fins->op1 < J->chain[IR_LOOP]) /* Except across a GC step. */
2126 return NEXTFOLD;
2127 return DROPFOLD;
2130 /* -- Stores and allocations ---------------------------------------------- */
2132 /* Stores and allocations cannot be folded or passed on to CSE in general.
2133 ** But some stores can be eliminated with dead-store elimination (DSE).
2135 ** Caveat: *all* stores and allocs must be listed here or they end up at CSE!
2138 LJFOLD(ASTORE any any)
2139 LJFOLD(HSTORE any any)
2140 LJFOLDX(lj_opt_dse_ahstore)
2142 LJFOLD(USTORE any any)
2143 LJFOLDX(lj_opt_dse_ustore)
2145 LJFOLD(FSTORE any any)
2146 LJFOLDX(lj_opt_dse_fstore)
2148 LJFOLD(XSTORE any any)
2149 LJFOLDX(lj_opt_dse_xstore)
2151 LJFOLD(NEWREF any any) /* Treated like a store. */
2152 LJFOLD(CALLS any any)
2153 LJFOLD(CALLL any any) /* Safeguard fallback. */
2154 LJFOLD(CALLXS any any)
2155 LJFOLD(XBAR)
2156 LJFOLD(RETF any any) /* Modifies BASE. */
2157 LJFOLD(TNEW any any)
2158 LJFOLD(TDUP any)
2159 LJFOLD(CNEW any any)
2160 LJFOLD(XSNEW any any)
2161 LJFOLDX(lj_ir_emit)
2163 /* ------------------------------------------------------------------------ */
2165 /* Every entry in the generated hash table is a 32 bit pattern:
2167 ** xxxxxxxx iiiiiii lllllll rrrrrrrrrr
2169 ** xxxxxxxx = 8 bit index into fold function table
2170 ** iiiiiii = 7 bit folded instruction opcode
2171 ** lllllll = 7 bit left instruction opcode
2172 ** rrrrrrrrrr = 8 bit right instruction opcode or 10 bits from literal field
2175 #include "lj_folddef.h"
2177 /* ------------------------------------------------------------------------ */
2179 /* Fold IR instruction. */
2180 TRef LJ_FASTCALL lj_opt_fold(jit_State *J)
2182 uint32_t key, any;
2183 IRRef ref;
2185 if (LJ_UNLIKELY((J->flags & JIT_F_OPT_MASK) != JIT_F_OPT_DEFAULT)) {
2186 lua_assert(((JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE|JIT_F_OPT_DSE) |
2187 JIT_F_OPT_DEFAULT) == JIT_F_OPT_DEFAULT);
2188 /* Folding disabled? Chain to CSE, but not for loads/stores/allocs. */
2189 if (!(J->flags & JIT_F_OPT_FOLD) && irm_kind(lj_ir_mode[fins->o]) == IRM_N)
2190 return lj_opt_cse(J);
2192 /* No FOLD, forwarding or CSE? Emit raw IR for loads, except for SLOAD. */
2193 if ((J->flags & (JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE)) !=
2194 (JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE) &&
2195 irm_kind(lj_ir_mode[fins->o]) == IRM_L && fins->o != IR_SLOAD)
2196 return lj_ir_emit(J);
2198 /* No FOLD or DSE? Emit raw IR for stores. */
2199 if ((J->flags & (JIT_F_OPT_FOLD|JIT_F_OPT_DSE)) !=
2200 (JIT_F_OPT_FOLD|JIT_F_OPT_DSE) &&
2201 irm_kind(lj_ir_mode[fins->o]) == IRM_S)
2202 return lj_ir_emit(J);
2205 /* Fold engine start/retry point. */
2206 retry:
2207 /* Construct key from opcode and operand opcodes (unless literal/none). */
2208 key = ((uint32_t)fins->o << 17);
2209 if (fins->op1 >= J->cur.nk) {
2210 key += (uint32_t)IR(fins->op1)->o << 10;
2211 *fleft = *IR(fins->op1);
2213 if (fins->op2 >= J->cur.nk) {
2214 key += (uint32_t)IR(fins->op2)->o;
2215 *fright = *IR(fins->op2);
2216 } else {
2217 key += (fins->op2 & 0x3ffu); /* Literal mask. Must include IRCONV_*MASK. */
2220 /* Check for a match in order from most specific to least specific. */
2221 any = 0;
2222 for (;;) {
2223 uint32_t k = key | (any & 0x1ffff);
2224 uint32_t h = fold_hashkey(k);
2225 uint32_t fh = fold_hash[h]; /* Lookup key in semi-perfect hash table. */
2226 if ((fh & 0xffffff) == k || (fh = fold_hash[h+1], (fh & 0xffffff) == k)) {
2227 ref = (IRRef)tref_ref(fold_func[fh >> 24](J));
2228 if (ref != NEXTFOLD)
2229 break;
2231 if (any == 0xfffff) /* Exhausted folding. Pass on to CSE. */
2232 return lj_opt_cse(J);
2233 any = (any | (any >> 10)) ^ 0xffc00;
2236 /* Return value processing, ordered by frequency. */
2237 if (LJ_LIKELY(ref >= MAX_FOLD))
2238 return TREF(ref, irt_t(IR(ref)->t));
2239 if (ref == RETRYFOLD)
2240 goto retry;
2241 if (ref == KINTFOLD)
2242 return lj_ir_kint(J, fins->i);
2243 if (ref == FAILFOLD)
2244 lj_trace_err(J, LJ_TRERR_GFAIL);
2245 lua_assert(ref == DROPFOLD);
2246 return REF_DROP;
2249 /* -- Common-Subexpression Elimination ------------------------------------ */
2251 /* CSE an IR instruction. This is very fast due to the skip-list chains. */
2252 TRef LJ_FASTCALL lj_opt_cse(jit_State *J)
2254 /* Avoid narrow to wide store-to-load forwarding stall */
2255 IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16);
2256 IROp op = fins->o;
2257 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
2258 /* Limited search for same operands in per-opcode chain. */
2259 IRRef ref = J->chain[op];
2260 IRRef lim = fins->op1;
2261 if (fins->op2 > lim) lim = fins->op2; /* Relies on lit < REF_BIAS. */
2262 while (ref > lim) {
2263 if (IR(ref)->op12 == op12)
2264 return TREF(ref, irt_t(IR(ref)->t)); /* Common subexpression found. */
2265 ref = IR(ref)->prev;
2268 /* Otherwise emit IR (inlined for speed). */
2270 IRRef ref = lj_ir_nextins(J);
2271 IRIns *ir = IR(ref);
2272 ir->prev = J->chain[op];
2273 ir->op12 = op12;
2274 J->chain[op] = (IRRef1)ref;
2275 ir->o = fins->o;
2276 J->guardemit.irt |= fins->t.irt;
2277 return TREF(ref, irt_t((ir->t = fins->t)));
2281 /* CSE with explicit search limit. */
2282 TRef LJ_FASTCALL lj_opt_cselim(jit_State *J, IRRef lim)
2284 IRRef ref = J->chain[fins->o];
2285 IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16);
2286 while (ref > lim) {
2287 if (IR(ref)->op12 == op12)
2288 return ref;
2289 ref = IR(ref)->prev;
2291 return lj_ir_emit(J);
2294 /* ------------------------------------------------------------------------ */
2296 #undef IR
2297 #undef fins
2298 #undef fleft
2299 #undef fright
2300 #undef knumleft
2301 #undef knumright
2302 #undef emitir
2304 #endif