beta-0.89.2
[luatex.git] / source / libs / luajit / LuaJIT-src / src / lj_opt_fold.c
blobf809a991a73729956e5cbaa6a29ac14b103225ea
1 /*
2 ** FOLD: Constant Folding, Algebraic Simplifications and Reassociation.
3 ** ABCelim: Array Bounds Check Elimination.
4 ** CSE: Common-Subexpression Elimination.
5 ** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
6 */
8 #define lj_opt_fold_c
9 #define LUA_CORE
11 #include <math.h>
13 #include "lj_obj.h"
15 #if LJ_HASJIT
17 #include "lj_buf.h"
18 #include "lj_str.h"
19 #include "lj_tab.h"
20 #include "lj_ir.h"
21 #include "lj_jit.h"
22 #include "lj_ircall.h"
23 #include "lj_iropt.h"
24 #include "lj_trace.h"
25 #if LJ_HASFFI
26 #include "lj_ctype.h"
27 #include "lj_carith.h"
28 #endif
29 #include "lj_vm.h"
30 #include "lj_strscan.h"
31 #include "lj_strfmt.h"
33 /* Here's a short description how the FOLD engine processes instructions:
35 ** The FOLD engine receives a single instruction stored in fins (J->fold.ins).
36 ** The instruction and its operands are used to select matching fold rules.
37 ** These are applied iteratively until a fixed point is reached.
39 ** The 8 bit opcode of the instruction itself plus the opcodes of the
40 ** two instructions referenced by its operands form a 24 bit key
41 ** 'ins left right' (unused operands -> 0, literals -> lowest 8 bits).
43 ** This key is used for partial matching against the fold rules. The
44 ** left/right operand fields of the key are successively masked with
45 ** the 'any' wildcard, from most specific to least specific:
47 ** ins left right
48 ** ins any right
49 ** ins left any
50 ** ins any any
52 ** The masked key is used to lookup a matching fold rule in a semi-perfect
53 ** hash table. If a matching rule is found, the related fold function is run.
54 ** Multiple rules can share the same fold function. A fold rule may return
55 ** one of several special values:
57 ** - NEXTFOLD means no folding was applied, because an additional test
58 ** inside the fold function failed. Matching continues against less
59 ** specific fold rules. Finally the instruction is passed on to CSE.
61 ** - RETRYFOLD means the instruction was modified in-place. Folding is
62 ** retried as if this instruction had just been received.
64 ** All other return values are terminal actions -- no further folding is
65 ** applied:
67 ** - INTFOLD(i) returns a reference to the integer constant i.
69 ** - LEFTFOLD and RIGHTFOLD return the left/right operand reference
70 ** without emitting an instruction.
72 ** - CSEFOLD and EMITFOLD pass the instruction directly to CSE or emit
73 ** it without passing through any further optimizations.
75 ** - FAILFOLD, DROPFOLD and CONDFOLD only apply to instructions which have
76 ** no result (e.g. guarded assertions): FAILFOLD means the guard would
77 ** always fail, i.e. the current trace is pointless. DROPFOLD means
78 ** the guard is always true and has been eliminated. CONDFOLD is a
79 ** shortcut for FAILFOLD + cond (i.e. drop if true, otherwise fail).
81 ** - Any other return value is interpreted as an IRRef or TRef. This
82 ** can be a reference to an existing or a newly created instruction.
83 ** Only the least-significant 16 bits (IRRef1) are used to form a TRef
84 ** which is finally returned to the caller.
86 ** The FOLD engine receives instructions both from the trace recorder and
87 ** substituted instructions from LOOP unrolling. This means all types
88 ** of instructions may end up here, even though the recorder bypasses
89 ** FOLD in some cases. Thus all loads, stores and allocations must have
90 ** an any/any rule to avoid being passed on to CSE.
92 ** Carefully read the following requirements before adding or modifying
93 ** any fold rules:
95 ** Requirement #1: All fold rules must preserve their destination type.
97 ** Consistently use INTFOLD() (KINT result) or lj_ir_knum() (KNUM result).
98 ** Never use lj_ir_knumint() which can have either a KINT or KNUM result.
100 ** Requirement #2: Fold rules should not create *new* instructions which
101 ** reference operands *across* PHIs.
103 ** E.g. a RETRYFOLD with 'fins->op1 = fleft->op1' is invalid if the
104 ** left operand is a PHI. Then fleft->op1 would point across the PHI
105 ** frontier to an invariant instruction. Adding a PHI for this instruction
106 ** would be counterproductive. The solution is to add a barrier which
107 ** prevents folding across PHIs, i.e. 'PHIBARRIER(fleft)' in this case.
108 ** The only exception is for recurrences with high latencies like
109 ** repeated int->num->int conversions.
111 ** One could relax this condition a bit if the referenced instruction is
112 ** a PHI, too. But this often leads to worse code due to excessive
113 ** register shuffling.
115 ** Note: returning *existing* instructions (e.g. LEFTFOLD) is ok, though.
116 ** Even returning fleft->op1 would be ok, because a new PHI will added,
117 ** if needed. But again, this leads to excessive register shuffling and
118 ** should be avoided.
120 ** Requirement #3: The set of all fold rules must be monotonic to guarantee
121 ** termination.
123 ** The goal is optimization, so one primarily wants to add strength-reducing
124 ** rules. This means eliminating an instruction or replacing an instruction
125 ** with one or more simpler instructions. Don't add fold rules which point
126 ** into the other direction.
128 ** Some rules (like commutativity) do not directly reduce the strength of
129 ** an instruction, but enable other fold rules (e.g. by moving constants
130 ** to the right operand). These rules must be made unidirectional to avoid
131 ** cycles.
133 ** Rule of thumb: the trace recorder expands the IR and FOLD shrinks it.
136 /* Some local macros to save typing. Undef'd at the end. */
137 #define IR(ref) (&J->cur.ir[(ref)])
138 #define fins (&J->fold.ins)
139 #define fleft (&J->fold.left)
140 #define fright (&J->fold.right)
141 #define knumleft (ir_knum(fleft)->n)
142 #define knumright (ir_knum(fright)->n)
144 /* Pass IR on to next optimization in chain (FOLD). */
145 #define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
147 /* Fold function type. Fastcall on x86 significantly reduces their size. */
148 typedef IRRef (LJ_FASTCALL *FoldFunc)(jit_State *J);
150 /* Macros for the fold specs, so buildvm can recognize them. */
151 #define LJFOLD(x)
152 #define LJFOLDX(x)
153 #define LJFOLDF(name) static TRef LJ_FASTCALL fold_##name(jit_State *J)
154 /* Note: They must be at the start of a line or buildvm ignores them! */
156 /* Barrier to prevent using operands across PHIs. */
157 #define PHIBARRIER(ir) if (irt_isphi((ir)->t)) return NEXTFOLD
159 /* Barrier to prevent folding across a GC step.
160 ** GC steps can only happen at the head of a trace and at LOOP.
161 ** And the GC is only driven forward if there's at least one allocation.
163 #define gcstep_barrier(J, ref) \
164 ((ref) < J->chain[IR_LOOP] && \
165 (J->chain[IR_SNEW] || J->chain[IR_XSNEW] || \
166 J->chain[IR_TNEW] || J->chain[IR_TDUP] || \
167 J->chain[IR_CNEW] || J->chain[IR_CNEWI] || \
168 J->chain[IR_BUFSTR] || J->chain[IR_TOSTR] || J->chain[IR_CALLA]))
170 /* -- Constant folding for FP numbers ------------------------------------- */
172 LJFOLD(ADD KNUM KNUM)
173 LJFOLD(SUB KNUM KNUM)
174 LJFOLD(MUL KNUM KNUM)
175 LJFOLD(DIV KNUM KNUM)
176 LJFOLD(NEG KNUM KNUM)
177 LJFOLD(ABS KNUM KNUM)
178 LJFOLD(ATAN2 KNUM KNUM)
179 LJFOLD(LDEXP KNUM KNUM)
180 LJFOLD(MIN KNUM KNUM)
181 LJFOLD(MAX KNUM KNUM)
182 LJFOLDF(kfold_numarith)
184 lua_Number a = knumleft;
185 lua_Number b = knumright;
186 lua_Number y = lj_vm_foldarith(a, b, fins->o - IR_ADD);
187 return lj_ir_knum(J, y);
190 LJFOLD(LDEXP KNUM KINT)
191 LJFOLDF(kfold_ldexp)
193 #if LJ_TARGET_X86ORX64
194 UNUSED(J);
195 return NEXTFOLD;
196 #else
197 return lj_ir_knum(J, ldexp(knumleft, fright->i));
198 #endif
201 LJFOLD(FPMATH KNUM any)
202 LJFOLDF(kfold_fpmath)
204 lua_Number a = knumleft;
205 lua_Number y = lj_vm_foldfpm(a, fins->op2);
206 return lj_ir_knum(J, y);
209 LJFOLD(POW KNUM KINT)
210 LJFOLDF(kfold_numpow)
212 lua_Number a = knumleft;
213 lua_Number b = (lua_Number)fright->i;
214 lua_Number y = lj_vm_foldarith(a, b, IR_POW - IR_ADD);
215 return lj_ir_knum(J, y);
218 /* Must not use kfold_kref for numbers (could be NaN). */
219 LJFOLD(EQ KNUM KNUM)
220 LJFOLD(NE KNUM KNUM)
221 LJFOLD(LT KNUM KNUM)
222 LJFOLD(GE KNUM KNUM)
223 LJFOLD(LE KNUM KNUM)
224 LJFOLD(GT KNUM KNUM)
225 LJFOLD(ULT KNUM KNUM)
226 LJFOLD(UGE KNUM KNUM)
227 LJFOLD(ULE KNUM KNUM)
228 LJFOLD(UGT KNUM KNUM)
229 LJFOLDF(kfold_numcomp)
231 return CONDFOLD(lj_ir_numcmp(knumleft, knumright, (IROp)fins->o));
234 /* -- Constant folding for 32 bit integers -------------------------------- */
236 static int32_t kfold_intop(int32_t k1, int32_t k2, IROp op)
238 switch (op) {
239 case IR_ADD: k1 += k2; break;
240 case IR_SUB: k1 -= k2; break;
241 case IR_MUL: k1 *= k2; break;
242 case IR_MOD: k1 = lj_vm_modi(k1, k2); break;
243 case IR_NEG: k1 = -k1; break;
244 case IR_BAND: k1 &= k2; break;
245 case IR_BOR: k1 |= k2; break;
246 case IR_BXOR: k1 ^= k2; break;
247 case IR_BSHL: k1 <<= (k2 & 31); break;
248 case IR_BSHR: k1 = (int32_t)((uint32_t)k1 >> (k2 & 31)); break;
249 case IR_BSAR: k1 >>= (k2 & 31); break;
250 case IR_BROL: k1 = (int32_t)lj_rol((uint32_t)k1, (k2 & 31)); break;
251 case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 31)); break;
252 case IR_MIN: k1 = k1 < k2 ? k1 : k2; break;
253 case IR_MAX: k1 = k1 > k2 ? k1 : k2; break;
254 default: lua_assert(0); break;
256 return k1;
259 LJFOLD(ADD KINT KINT)
260 LJFOLD(SUB KINT KINT)
261 LJFOLD(MUL KINT KINT)
262 LJFOLD(MOD KINT KINT)
263 LJFOLD(NEG KINT KINT)
264 LJFOLD(BAND KINT KINT)
265 LJFOLD(BOR KINT KINT)
266 LJFOLD(BXOR KINT KINT)
267 LJFOLD(BSHL KINT KINT)
268 LJFOLD(BSHR KINT KINT)
269 LJFOLD(BSAR KINT KINT)
270 LJFOLD(BROL KINT KINT)
271 LJFOLD(BROR KINT KINT)
272 LJFOLD(MIN KINT KINT)
273 LJFOLD(MAX KINT KINT)
274 LJFOLDF(kfold_intarith)
276 return INTFOLD(kfold_intop(fleft->i, fright->i, (IROp)fins->o));
279 LJFOLD(ADDOV KINT KINT)
280 LJFOLD(SUBOV KINT KINT)
281 LJFOLD(MULOV KINT KINT)
282 LJFOLDF(kfold_intovarith)
284 lua_Number n = lj_vm_foldarith((lua_Number)fleft->i, (lua_Number)fright->i,
285 fins->o - IR_ADDOV);
286 int32_t k = lj_num2int(n);
287 if (n != (lua_Number)k)
288 return FAILFOLD;
289 return INTFOLD(k);
292 LJFOLD(BNOT KINT)
293 LJFOLDF(kfold_bnot)
295 return INTFOLD(~fleft->i);
298 LJFOLD(BSWAP KINT)
299 LJFOLDF(kfold_bswap)
301 return INTFOLD((int32_t)lj_bswap((uint32_t)fleft->i));
304 LJFOLD(LT KINT KINT)
305 LJFOLD(GE KINT KINT)
306 LJFOLD(LE KINT KINT)
307 LJFOLD(GT KINT KINT)
308 LJFOLD(ULT KINT KINT)
309 LJFOLD(UGE KINT KINT)
310 LJFOLD(ULE KINT KINT)
311 LJFOLD(UGT KINT KINT)
312 LJFOLD(ABC KINT KINT)
313 LJFOLDF(kfold_intcomp)
315 int32_t a = fleft->i, b = fright->i;
316 switch ((IROp)fins->o) {
317 case IR_LT: return CONDFOLD(a < b);
318 case IR_GE: return CONDFOLD(a >= b);
319 case IR_LE: return CONDFOLD(a <= b);
320 case IR_GT: return CONDFOLD(a > b);
321 case IR_ULT: return CONDFOLD((uint32_t)a < (uint32_t)b);
322 case IR_UGE: return CONDFOLD((uint32_t)a >= (uint32_t)b);
323 case IR_ULE: return CONDFOLD((uint32_t)a <= (uint32_t)b);
324 case IR_ABC:
325 case IR_UGT: return CONDFOLD((uint32_t)a > (uint32_t)b);
326 default: lua_assert(0); return FAILFOLD;
330 LJFOLD(UGE any KINT)
331 LJFOLDF(kfold_intcomp0)
333 if (fright->i == 0)
334 return DROPFOLD;
335 return NEXTFOLD;
338 /* -- Constant folding for 64 bit integers -------------------------------- */
340 static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op)
342 switch (op) {
343 #if LJ_HASFFI
344 case IR_ADD: k1 += k2; break;
345 case IR_SUB: k1 -= k2; break;
346 case IR_MUL: k1 *= k2; break;
347 case IR_BAND: k1 &= k2; break;
348 case IR_BOR: k1 |= k2; break;
349 case IR_BXOR: k1 ^= k2; break;
350 #endif
351 default: UNUSED(k2); lua_assert(0); break;
353 return k1;
356 LJFOLD(ADD KINT64 KINT64)
357 LJFOLD(SUB KINT64 KINT64)
358 LJFOLD(MUL KINT64 KINT64)
359 LJFOLD(BAND KINT64 KINT64)
360 LJFOLD(BOR KINT64 KINT64)
361 LJFOLD(BXOR KINT64 KINT64)
362 LJFOLDF(kfold_int64arith)
364 return INT64FOLD(kfold_int64arith(ir_k64(fleft)->u64,
365 ir_k64(fright)->u64, (IROp)fins->o));
368 LJFOLD(DIV KINT64 KINT64)
369 LJFOLD(MOD KINT64 KINT64)
370 LJFOLD(POW KINT64 KINT64)
371 LJFOLDF(kfold_int64arith2)
373 #if LJ_HASFFI
374 uint64_t k1 = ir_k64(fleft)->u64, k2 = ir_k64(fright)->u64;
375 if (irt_isi64(fins->t)) {
376 k1 = fins->o == IR_DIV ? lj_carith_divi64((int64_t)k1, (int64_t)k2) :
377 fins->o == IR_MOD ? lj_carith_modi64((int64_t)k1, (int64_t)k2) :
378 lj_carith_powi64((int64_t)k1, (int64_t)k2);
379 } else {
380 k1 = fins->o == IR_DIV ? lj_carith_divu64(k1, k2) :
381 fins->o == IR_MOD ? lj_carith_modu64(k1, k2) :
382 lj_carith_powu64(k1, k2);
384 return INT64FOLD(k1);
385 #else
386 UNUSED(J); lua_assert(0); return FAILFOLD;
387 #endif
390 LJFOLD(BSHL KINT64 KINT)
391 LJFOLD(BSHR KINT64 KINT)
392 LJFOLD(BSAR KINT64 KINT)
393 LJFOLD(BROL KINT64 KINT)
394 LJFOLD(BROR KINT64 KINT)
395 LJFOLDF(kfold_int64shift)
397 #if LJ_HASFFI
398 uint64_t k = ir_k64(fleft)->u64;
399 int32_t sh = (fright->i & 63);
400 return INT64FOLD(lj_carith_shift64(k, sh, fins->o - IR_BSHL));
401 #else
402 UNUSED(J); lua_assert(0); return FAILFOLD;
403 #endif
406 LJFOLD(BNOT KINT64)
407 LJFOLDF(kfold_bnot64)
409 #if LJ_HASFFI
410 return INT64FOLD(~ir_k64(fleft)->u64);
411 #else
412 UNUSED(J); lua_assert(0); return FAILFOLD;
413 #endif
416 LJFOLD(BSWAP KINT64)
417 LJFOLDF(kfold_bswap64)
419 #if LJ_HASFFI
420 return INT64FOLD(lj_bswap64(ir_k64(fleft)->u64));
421 #else
422 UNUSED(J); lua_assert(0); return FAILFOLD;
423 #endif
426 LJFOLD(LT KINT64 KINT64)
427 LJFOLD(GE KINT64 KINT64)
428 LJFOLD(LE KINT64 KINT64)
429 LJFOLD(GT KINT64 KINT64)
430 LJFOLD(ULT KINT64 KINT64)
431 LJFOLD(UGE KINT64 KINT64)
432 LJFOLD(ULE KINT64 KINT64)
433 LJFOLD(UGT KINT64 KINT64)
434 LJFOLDF(kfold_int64comp)
436 #if LJ_HASFFI
437 uint64_t a = ir_k64(fleft)->u64, b = ir_k64(fright)->u64;
438 switch ((IROp)fins->o) {
439 case IR_LT: return CONDFOLD(a < b);
440 case IR_GE: return CONDFOLD(a >= b);
441 case IR_LE: return CONDFOLD(a <= b);
442 case IR_GT: return CONDFOLD(a > b);
443 case IR_ULT: return CONDFOLD((uint64_t)a < (uint64_t)b);
444 case IR_UGE: return CONDFOLD((uint64_t)a >= (uint64_t)b);
445 case IR_ULE: return CONDFOLD((uint64_t)a <= (uint64_t)b);
446 case IR_UGT: return CONDFOLD((uint64_t)a > (uint64_t)b);
447 default: lua_assert(0); return FAILFOLD;
449 #else
450 UNUSED(J); lua_assert(0); return FAILFOLD;
451 #endif
454 LJFOLD(UGE any KINT64)
455 LJFOLDF(kfold_int64comp0)
457 #if LJ_HASFFI
458 if (ir_k64(fright)->u64 == 0)
459 return DROPFOLD;
460 return NEXTFOLD;
461 #else
462 UNUSED(J); lua_assert(0); return FAILFOLD;
463 #endif
466 /* -- Constant folding for strings ---------------------------------------- */
468 LJFOLD(SNEW KKPTR KINT)
469 LJFOLDF(kfold_snew_kptr)
471 GCstr *s = lj_str_new(J->L, (const char *)ir_kptr(fleft), (size_t)fright->i);
472 return lj_ir_kstr(J, s);
475 LJFOLD(SNEW any KINT)
476 LJFOLDF(kfold_snew_empty)
478 if (fright->i == 0)
479 return lj_ir_kstr(J, &J2G(J)->strempty);
480 return NEXTFOLD;
483 LJFOLD(STRREF KGC KINT)
484 LJFOLDF(kfold_strref)
486 GCstr *str = ir_kstr(fleft);
487 lua_assert((MSize)fright->i <= str->len);
488 return lj_ir_kkptr(J, (char *)strdata(str) + fright->i);
491 LJFOLD(STRREF SNEW any)
492 LJFOLDF(kfold_strref_snew)
494 PHIBARRIER(fleft);
495 if (irref_isk(fins->op2) && fright->i == 0) {
496 return fleft->op1; /* strref(snew(ptr, len), 0) ==> ptr */
497 } else {
498 /* Reassociate: strref(snew(strref(str, a), len), b) ==> strref(str, a+b) */
499 IRIns *ir = IR(fleft->op1);
500 if (ir->o == IR_STRREF) {
501 IRRef1 str = ir->op1; /* IRIns * is not valid across emitir. */
502 PHIBARRIER(ir);
503 fins->op2 = emitir(IRTI(IR_ADD), ir->op2, fins->op2); /* Clobbers fins! */
504 fins->op1 = str;
505 fins->ot = IRT(IR_STRREF, IRT_P32);
506 return RETRYFOLD;
509 return NEXTFOLD;
512 LJFOLD(CALLN CARG IRCALL_lj_str_cmp)
513 LJFOLDF(kfold_strcmp)
515 if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) {
516 GCstr *a = ir_kstr(IR(fleft->op1));
517 GCstr *b = ir_kstr(IR(fleft->op2));
518 return INTFOLD(lj_str_cmp(a, b));
520 return NEXTFOLD;
523 /* -- Constant folding and forwarding for buffers ------------------------- */
526 ** Buffer ops perform stores, but their effect is limited to the buffer
527 ** itself. Also, buffer ops are chained: a use of an op implies a use of
528 ** all other ops up the chain. Conversely, if an op is unused, all ops
529 ** up the chain can go unsed. This largely eliminates the need to treat
530 ** them as stores.
532 ** Alas, treating them as normal (IRM_N) ops doesn't work, because they
533 ** cannot be CSEd in isolation. CSE for IRM_N is implicitly done in LOOP
534 ** or if FOLD is disabled.
536 ** The compromise is to declare them as loads, emit them like stores and
537 ** CSE whole chains manually when the BUFSTR is to be emitted. Any chain
538 ** fragments left over from CSE are eliminated by DCE.
541 /* BUFHDR is emitted like a store, see below. */
543 LJFOLD(BUFPUT BUFHDR BUFSTR)
544 LJFOLDF(bufput_append)
546 /* New buffer, no other buffer op inbetween and same buffer? */
547 if ((J->flags & JIT_F_OPT_FWD) &&
548 !(fleft->op2 & IRBUFHDR_APPEND) &&
549 fleft->prev == fright->op2 &&
550 fleft->op1 == IR(fright->op2)->op1) {
551 IRRef ref = fins->op1;
552 IR(ref)->op2 = (fleft->op2 | IRBUFHDR_APPEND); /* Modify BUFHDR. */
553 IR(ref)->op1 = fright->op1;
554 return ref;
556 return EMITFOLD; /* Always emit, CSE later. */
559 LJFOLD(BUFPUT any any)
560 LJFOLDF(bufput_kgc)
562 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fright->o == IR_KGC) {
563 GCstr *s2 = ir_kstr(fright);
564 if (s2->len == 0) { /* Empty string? */
565 return LEFTFOLD;
566 } else {
567 if (fleft->o == IR_BUFPUT && irref_isk(fleft->op2) &&
568 !irt_isphi(fleft->t)) { /* Join two constant string puts in a row. */
569 GCstr *s1 = ir_kstr(IR(fleft->op2));
570 IRRef kref = lj_ir_kstr(J, lj_buf_cat2str(J->L, s1, s2));
571 /* lj_ir_kstr() may realloc the IR and invalidates any IRIns *. */
572 IR(fins->op1)->op2 = kref; /* Modify previous BUFPUT. */
573 return fins->op1;
577 return EMITFOLD; /* Always emit, CSE later. */
580 LJFOLD(BUFSTR any any)
581 LJFOLDF(bufstr_kfold_cse)
583 lua_assert(fleft->o == IR_BUFHDR || fleft->o == IR_BUFPUT ||
584 fleft->o == IR_CALLL);
585 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
586 if (fleft->o == IR_BUFHDR) { /* No put operations? */
587 if (!(fleft->op2 & IRBUFHDR_APPEND)) /* Empty buffer? */
588 return lj_ir_kstr(J, &J2G(J)->strempty);
589 fins->op1 = fleft->op1;
590 fins->op2 = fleft->prev; /* Relies on checks in bufput_append. */
591 return CSEFOLD;
592 } else if (fleft->o == IR_BUFPUT) {
593 IRIns *irb = IR(fleft->op1);
594 if (irb->o == IR_BUFHDR && !(irb->op2 & IRBUFHDR_APPEND))
595 return fleft->op2; /* Shortcut for a single put operation. */
598 /* Try to CSE the whole chain. */
599 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
600 IRRef ref = J->chain[IR_BUFSTR];
601 while (ref) {
602 IRIns *irs = IR(ref), *ira = fleft, *irb = IR(irs->op1);
603 while (ira->o == irb->o && ira->op2 == irb->op2) {
604 lua_assert(ira->o == IR_BUFHDR || ira->o == IR_BUFPUT ||
605 ira->o == IR_CALLL || ira->o == IR_CARG);
606 if (ira->o == IR_BUFHDR && !(ira->op2 & IRBUFHDR_APPEND))
607 return ref; /* CSE succeeded. */
608 if (ira->o == IR_CALLL && ira->op2 == IRCALL_lj_buf_puttab)
609 break;
610 ira = IR(ira->op1);
611 irb = IR(irb->op1);
613 ref = irs->prev;
616 return EMITFOLD; /* No CSE possible. */
619 LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_reverse)
620 LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_upper)
621 LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_lower)
622 LJFOLD(CALLL CARG IRCALL_lj_strfmt_putquoted)
623 LJFOLDF(bufput_kfold_op)
625 if (irref_isk(fleft->op2)) {
626 const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
627 SBuf *sb = lj_buf_tmp_(J->L);
628 sb = ((SBuf * (LJ_FASTCALL *)(SBuf *, GCstr *))ci->func)(sb,
629 ir_kstr(IR(fleft->op2)));
630 fins->o = IR_BUFPUT;
631 fins->op1 = fleft->op1;
632 fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb));
633 return RETRYFOLD;
635 return EMITFOLD; /* Always emit, CSE later. */
638 LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_rep)
639 LJFOLDF(bufput_kfold_rep)
641 if (irref_isk(fleft->op2)) {
642 IRIns *irc = IR(fleft->op1);
643 if (irref_isk(irc->op2)) {
644 SBuf *sb = lj_buf_tmp_(J->L);
645 sb = lj_buf_putstr_rep(sb, ir_kstr(IR(irc->op2)), IR(fleft->op2)->i);
646 fins->o = IR_BUFPUT;
647 fins->op1 = irc->op1;
648 fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb));
649 return RETRYFOLD;
652 return EMITFOLD; /* Always emit, CSE later. */
655 LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfxint)
656 LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum_int)
657 LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum_uint)
658 LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum)
659 LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfstr)
660 LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfchar)
661 LJFOLDF(bufput_kfold_fmt)
663 IRIns *irc = IR(fleft->op1);
664 lua_assert(irref_isk(irc->op2)); /* SFormat must be const. */
665 if (irref_isk(fleft->op2)) {
666 SFormat sf = (SFormat)IR(irc->op2)->i;
667 IRIns *ira = IR(fleft->op2);
668 SBuf *sb = lj_buf_tmp_(J->L);
669 switch (fins->op2) {
670 case IRCALL_lj_strfmt_putfxint:
671 sb = lj_strfmt_putfxint(sb, sf, ir_k64(ira)->u64);
672 break;
673 case IRCALL_lj_strfmt_putfstr:
674 sb = lj_strfmt_putfstr(sb, sf, ir_kstr(ira));
675 break;
676 case IRCALL_lj_strfmt_putfchar:
677 sb = lj_strfmt_putfchar(sb, sf, ira->i);
678 break;
679 case IRCALL_lj_strfmt_putfnum_int:
680 case IRCALL_lj_strfmt_putfnum_uint:
681 case IRCALL_lj_strfmt_putfnum:
682 default: {
683 const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
684 sb = ((SBuf * (*)(SBuf *, SFormat, lua_Number))ci->func)(sb, sf,
685 ir_knum(ira)->n);
686 break;
689 fins->o = IR_BUFPUT;
690 fins->op1 = irc->op1;
691 fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb));
692 return RETRYFOLD;
694 return EMITFOLD; /* Always emit, CSE later. */
697 /* -- Constant folding of pointer arithmetic ------------------------------ */
699 LJFOLD(ADD KGC KINT)
700 LJFOLD(ADD KGC KINT64)
701 LJFOLDF(kfold_add_kgc)
703 GCobj *o = ir_kgc(fleft);
704 #if LJ_64
705 ptrdiff_t ofs = (ptrdiff_t)ir_kint64(fright)->u64;
706 #else
707 ptrdiff_t ofs = fright->i;
708 #endif
709 #if LJ_HASFFI
710 if (irt_iscdata(fleft->t)) {
711 CType *ct = ctype_raw(ctype_ctsG(J2G(J)), gco2cd(o)->ctypeid);
712 if (ctype_isnum(ct->info) || ctype_isenum(ct->info) ||
713 ctype_isptr(ct->info) || ctype_isfunc(ct->info) ||
714 ctype_iscomplex(ct->info) || ctype_isvector(ct->info))
715 return lj_ir_kkptr(J, (char *)o + ofs);
717 #endif
718 return lj_ir_kptr(J, (char *)o + ofs);
721 LJFOLD(ADD KPTR KINT)
722 LJFOLD(ADD KPTR KINT64)
723 LJFOLD(ADD KKPTR KINT)
724 LJFOLD(ADD KKPTR KINT64)
725 LJFOLDF(kfold_add_kptr)
727 void *p = ir_kptr(fleft);
728 #if LJ_64
729 ptrdiff_t ofs = (ptrdiff_t)ir_kint64(fright)->u64;
730 #else
731 ptrdiff_t ofs = fright->i;
732 #endif
733 return lj_ir_kptr_(J, fleft->o, (char *)p + ofs);
736 LJFOLD(ADD any KGC)
737 LJFOLD(ADD any KPTR)
738 LJFOLD(ADD any KKPTR)
739 LJFOLDF(kfold_add_kright)
741 if (fleft->o == IR_KINT || fleft->o == IR_KINT64) {
742 IRRef1 tmp = fins->op1; fins->op1 = fins->op2; fins->op2 = tmp;
743 return RETRYFOLD;
745 return NEXTFOLD;
748 /* -- Constant folding of conversions ------------------------------------- */
750 LJFOLD(TOBIT KNUM KNUM)
751 LJFOLDF(kfold_tobit)
753 return INTFOLD(lj_num2bit(knumleft));
756 LJFOLD(CONV KINT IRCONV_NUM_INT)
757 LJFOLDF(kfold_conv_kint_num)
759 return lj_ir_knum(J, (lua_Number)fleft->i);
762 LJFOLD(CONV KINT IRCONV_NUM_U32)
763 LJFOLDF(kfold_conv_kintu32_num)
765 return lj_ir_knum(J, (lua_Number)(uint32_t)fleft->i);
768 LJFOLD(CONV KINT IRCONV_INT_I8)
769 LJFOLD(CONV KINT IRCONV_INT_U8)
770 LJFOLD(CONV KINT IRCONV_INT_I16)
771 LJFOLD(CONV KINT IRCONV_INT_U16)
772 LJFOLDF(kfold_conv_kint_ext)
774 int32_t k = fleft->i;
775 if ((fins->op2 & IRCONV_SRCMASK) == IRT_I8) k = (int8_t)k;
776 else if ((fins->op2 & IRCONV_SRCMASK) == IRT_U8) k = (uint8_t)k;
777 else if ((fins->op2 & IRCONV_SRCMASK) == IRT_I16) k = (int16_t)k;
778 else k = (uint16_t)k;
779 return INTFOLD(k);
782 LJFOLD(CONV KINT IRCONV_I64_INT)
783 LJFOLD(CONV KINT IRCONV_U64_INT)
784 LJFOLD(CONV KINT IRCONV_I64_U32)
785 LJFOLD(CONV KINT IRCONV_U64_U32)
786 LJFOLDF(kfold_conv_kint_i64)
788 if ((fins->op2 & IRCONV_SEXT))
789 return INT64FOLD((uint64_t)(int64_t)fleft->i);
790 else
791 return INT64FOLD((uint64_t)(int64_t)(uint32_t)fleft->i);
794 LJFOLD(CONV KINT64 IRCONV_NUM_I64)
795 LJFOLDF(kfold_conv_kint64_num_i64)
797 return lj_ir_knum(J, (lua_Number)(int64_t)ir_kint64(fleft)->u64);
800 LJFOLD(CONV KINT64 IRCONV_NUM_U64)
801 LJFOLDF(kfold_conv_kint64_num_u64)
803 return lj_ir_knum(J, (lua_Number)ir_kint64(fleft)->u64);
806 LJFOLD(CONV KINT64 IRCONV_INT_I64)
807 LJFOLD(CONV KINT64 IRCONV_U32_I64)
808 LJFOLDF(kfold_conv_kint64_int_i64)
810 return INTFOLD((int32_t)ir_kint64(fleft)->u64);
813 LJFOLD(CONV KNUM IRCONV_INT_NUM)
814 LJFOLDF(kfold_conv_knum_int_num)
816 lua_Number n = knumleft;
817 int32_t k = lj_num2int(n);
818 if (irt_isguard(fins->t) && n != (lua_Number)k) {
819 /* We're about to create a guard which always fails, like CONV +1.5.
820 ** Some pathological loops cause this during LICM, e.g.:
821 ** local x,k,t = 0,1.5,{1,[1.5]=2}
822 ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end
823 ** assert(x == 300)
825 return FAILFOLD;
827 return INTFOLD(k);
830 LJFOLD(CONV KNUM IRCONV_U32_NUM)
831 LJFOLDF(kfold_conv_knum_u32_num)
833 #ifdef _MSC_VER
834 { /* Workaround for MSVC bug. */
835 volatile uint32_t u = (uint32_t)knumleft;
836 return INTFOLD((int32_t)u);
838 #else
839 return INTFOLD((int32_t)(uint32_t)knumleft);
840 #endif
843 LJFOLD(CONV KNUM IRCONV_I64_NUM)
844 LJFOLDF(kfold_conv_knum_i64_num)
846 return INT64FOLD((uint64_t)(int64_t)knumleft);
849 LJFOLD(CONV KNUM IRCONV_U64_NUM)
850 LJFOLDF(kfold_conv_knum_u64_num)
852 return INT64FOLD(lj_num2u64(knumleft));
855 LJFOLD(TOSTR KNUM any)
856 LJFOLDF(kfold_tostr_knum)
858 return lj_ir_kstr(J, lj_strfmt_num(J->L, ir_knum(fleft)));
861 LJFOLD(TOSTR KINT any)
862 LJFOLDF(kfold_tostr_kint)
864 return lj_ir_kstr(J, fins->op2 == IRTOSTR_INT ?
865 lj_strfmt_int(J->L, fleft->i) :
866 lj_strfmt_char(J->L, fleft->i));
869 LJFOLD(STRTO KGC)
870 LJFOLDF(kfold_strto)
872 TValue n;
873 if (lj_strscan_num(ir_kstr(fleft), &n))
874 return lj_ir_knum(J, numV(&n));
875 return FAILFOLD;
878 /* -- Constant folding of equality checks --------------------------------- */
880 /* Don't constant-fold away FLOAD checks against KNULL. */
881 LJFOLD(EQ FLOAD KNULL)
882 LJFOLD(NE FLOAD KNULL)
883 LJFOLDX(lj_opt_cse)
885 /* But fold all other KNULL compares, since only KNULL is equal to KNULL. */
886 LJFOLD(EQ any KNULL)
887 LJFOLD(NE any KNULL)
888 LJFOLD(EQ KNULL any)
889 LJFOLD(NE KNULL any)
890 LJFOLD(EQ KINT KINT) /* Constants are unique, so same refs <==> same value. */
891 LJFOLD(NE KINT KINT)
892 LJFOLD(EQ KINT64 KINT64)
893 LJFOLD(NE KINT64 KINT64)
894 LJFOLD(EQ KGC KGC)
895 LJFOLD(NE KGC KGC)
896 LJFOLDF(kfold_kref)
898 return CONDFOLD((fins->op1 == fins->op2) ^ (fins->o == IR_NE));
901 /* -- Algebraic shortcuts ------------------------------------------------- */
903 LJFOLD(FPMATH FPMATH IRFPM_FLOOR)
904 LJFOLD(FPMATH FPMATH IRFPM_CEIL)
905 LJFOLD(FPMATH FPMATH IRFPM_TRUNC)
906 LJFOLDF(shortcut_round)
908 IRFPMathOp op = (IRFPMathOp)fleft->op2;
909 if (op == IRFPM_FLOOR || op == IRFPM_CEIL || op == IRFPM_TRUNC)
910 return LEFTFOLD; /* round(round_left(x)) = round_left(x) */
911 return NEXTFOLD;
914 LJFOLD(ABS ABS KNUM)
915 LJFOLDF(shortcut_left)
917 return LEFTFOLD; /* f(g(x)) ==> g(x) */
920 LJFOLD(ABS NEG KNUM)
921 LJFOLDF(shortcut_dropleft)
923 PHIBARRIER(fleft);
924 fins->op1 = fleft->op1; /* abs(neg(x)) ==> abs(x) */
925 return RETRYFOLD;
928 /* Note: no safe shortcuts with STRTO and TOSTR ("1e2" ==> +100 ==> "100"). */
929 LJFOLD(NEG NEG any)
930 LJFOLD(BNOT BNOT)
931 LJFOLD(BSWAP BSWAP)
932 LJFOLDF(shortcut_leftleft)
934 PHIBARRIER(fleft); /* See above. Fold would be ok, but not beneficial. */
935 return fleft->op1; /* f(g(x)) ==> x */
938 /* -- FP algebraic simplifications ---------------------------------------- */
940 /* FP arithmetic is tricky -- there's not much to simplify.
941 ** Please note the following common pitfalls before sending "improvements":
942 ** x+0 ==> x is INVALID for x=-0
943 ** 0-x ==> -x is INVALID for x=+0
944 ** x*0 ==> 0 is INVALID for x=-0, x=+-Inf or x=NaN
947 LJFOLD(ADD NEG any)
948 LJFOLDF(simplify_numadd_negx)
950 PHIBARRIER(fleft);
951 fins->o = IR_SUB; /* (-a) + b ==> b - a */
952 fins->op1 = fins->op2;
953 fins->op2 = fleft->op1;
954 return RETRYFOLD;
957 LJFOLD(ADD any NEG)
958 LJFOLDF(simplify_numadd_xneg)
960 PHIBARRIER(fright);
961 fins->o = IR_SUB; /* a + (-b) ==> a - b */
962 fins->op2 = fright->op1;
963 return RETRYFOLD;
966 LJFOLD(SUB any KNUM)
967 LJFOLDF(simplify_numsub_k)
969 lua_Number n = knumright;
970 if (n == 0.0) /* x - (+-0) ==> x */
971 return LEFTFOLD;
972 return NEXTFOLD;
975 LJFOLD(SUB NEG KNUM)
976 LJFOLDF(simplify_numsub_negk)
978 PHIBARRIER(fleft);
979 fins->op2 = fleft->op1; /* (-x) - k ==> (-k) - x */
980 fins->op1 = (IRRef1)lj_ir_knum(J, -knumright);
981 return RETRYFOLD;
984 LJFOLD(SUB any NEG)
985 LJFOLDF(simplify_numsub_xneg)
987 PHIBARRIER(fright);
988 fins->o = IR_ADD; /* a - (-b) ==> a + b */
989 fins->op2 = fright->op1;
990 return RETRYFOLD;
993 LJFOLD(MUL any KNUM)
994 LJFOLD(DIV any KNUM)
995 LJFOLDF(simplify_nummuldiv_k)
997 lua_Number n = knumright;
998 if (n == 1.0) { /* x o 1 ==> x */
999 return LEFTFOLD;
1000 } else if (n == -1.0) { /* x o -1 ==> -x */
1001 fins->o = IR_NEG;
1002 fins->op2 = (IRRef1)lj_ir_knum_neg(J);
1003 return RETRYFOLD;
1004 } else if (fins->o == IR_MUL && n == 2.0) { /* x * 2 ==> x + x */
1005 fins->o = IR_ADD;
1006 fins->op2 = fins->op1;
1007 return RETRYFOLD;
1008 } else if (fins->o == IR_DIV) { /* x / 2^k ==> x * 2^-k */
1009 uint64_t u = ir_knum(fright)->u64;
1010 uint32_t ex = ((uint32_t)(u >> 52) & 0x7ff);
1011 if ((u & U64x(000fffff,ffffffff)) == 0 && ex - 1 < 0x7fd) {
1012 u = (u & ((uint64_t)1 << 63)) | ((uint64_t)(0x7fe - ex) << 52);
1013 fins->o = IR_MUL; /* Multiply by exact reciprocal. */
1014 fins->op2 = lj_ir_knum_u64(J, u);
1015 return RETRYFOLD;
1018 return NEXTFOLD;
1021 LJFOLD(MUL NEG KNUM)
1022 LJFOLD(DIV NEG KNUM)
1023 LJFOLDF(simplify_nummuldiv_negk)
1025 PHIBARRIER(fleft);
1026 fins->op1 = fleft->op1; /* (-a) o k ==> a o (-k) */
1027 fins->op2 = (IRRef1)lj_ir_knum(J, -knumright);
1028 return RETRYFOLD;
1031 LJFOLD(MUL NEG NEG)
1032 LJFOLD(DIV NEG NEG)
1033 LJFOLDF(simplify_nummuldiv_negneg)
1035 PHIBARRIER(fleft);
1036 PHIBARRIER(fright);
1037 fins->op1 = fleft->op1; /* (-a) o (-b) ==> a o b */
1038 fins->op2 = fright->op1;
1039 return RETRYFOLD;
1042 LJFOLD(POW any KINT)
1043 LJFOLDF(simplify_numpow_xk)
1045 int32_t k = fright->i;
1046 TRef ref = fins->op1;
1047 if (k == 0) /* x ^ 0 ==> 1 */
1048 return lj_ir_knum_one(J); /* Result must be a number, not an int. */
1049 if (k == 1) /* x ^ 1 ==> x */
1050 return LEFTFOLD;
1051 if ((uint32_t)(k+65536) > 2*65536u) /* Limit code explosion. */
1052 return NEXTFOLD;
1053 if (k < 0) { /* x ^ (-k) ==> (1/x) ^ k. */
1054 ref = emitir(IRTN(IR_DIV), lj_ir_knum_one(J), ref);
1055 k = -k;
1057 /* Unroll x^k for 1 <= k <= 65536. */
1058 for (; (k & 1) == 0; k >>= 1) /* Handle leading zeros. */
1059 ref = emitir(IRTN(IR_MUL), ref, ref);
1060 if ((k >>= 1) != 0) { /* Handle trailing bits. */
1061 TRef tmp = emitir(IRTN(IR_MUL), ref, ref);
1062 for (; k != 1; k >>= 1) {
1063 if (k & 1)
1064 ref = emitir(IRTN(IR_MUL), ref, tmp);
1065 tmp = emitir(IRTN(IR_MUL), tmp, tmp);
1067 ref = emitir(IRTN(IR_MUL), ref, tmp);
1069 return ref;
1072 LJFOLD(POW KNUM any)
1073 LJFOLDF(simplify_numpow_kx)
1075 lua_Number n = knumleft;
1076 if (n == 2.0) { /* 2.0 ^ i ==> ldexp(1.0, tonum(i)) */
1077 fins->o = IR_CONV;
1078 #if LJ_TARGET_X86ORX64
1079 fins->op1 = fins->op2;
1080 fins->op2 = IRCONV_NUM_INT;
1081 fins->op2 = (IRRef1)lj_opt_fold(J);
1082 #endif
1083 fins->op1 = (IRRef1)lj_ir_knum_one(J);
1084 fins->o = IR_LDEXP;
1085 return RETRYFOLD;
1087 return NEXTFOLD;
1090 /* -- Simplify conversions ------------------------------------------------ */
1092 LJFOLD(CONV CONV IRCONV_NUM_INT) /* _NUM */
1093 LJFOLDF(shortcut_conv_num_int)
1095 PHIBARRIER(fleft);
1096 /* Only safe with a guarded conversion to int. */
1097 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_NUM && irt_isguard(fleft->t))
1098 return fleft->op1; /* f(g(x)) ==> x */
1099 return NEXTFOLD;
1102 LJFOLD(CONV CONV IRCONV_INT_NUM) /* _INT */
1103 LJFOLD(CONV CONV IRCONV_U32_NUM) /* _U32*/
1104 LJFOLDF(simplify_conv_int_num)
1106 /* Fold even across PHI to avoid expensive num->int conversions in loop. */
1107 if ((fleft->op2 & IRCONV_SRCMASK) ==
1108 ((fins->op2 & IRCONV_DSTMASK) >> IRCONV_DSH))
1109 return fleft->op1;
1110 return NEXTFOLD;
1113 LJFOLD(CONV CONV IRCONV_I64_NUM) /* _INT or _U32 */
1114 LJFOLD(CONV CONV IRCONV_U64_NUM) /* _INT or _U32 */
1115 LJFOLDF(simplify_conv_i64_num)
1117 PHIBARRIER(fleft);
1118 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT) {
1119 /* Reduce to a sign-extension. */
1120 fins->op1 = fleft->op1;
1121 fins->op2 = ((IRT_I64<<5)|IRT_INT|IRCONV_SEXT);
1122 return RETRYFOLD;
1123 } else if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) {
1124 #if LJ_TARGET_X64
1125 return fleft->op1;
1126 #else
1127 /* Reduce to a zero-extension. */
1128 fins->op1 = fleft->op1;
1129 fins->op2 = (IRT_I64<<5)|IRT_U32;
1130 return RETRYFOLD;
1131 #endif
1133 return NEXTFOLD;
1136 LJFOLD(CONV CONV IRCONV_INT_I64) /* _INT or _U32 */
1137 LJFOLD(CONV CONV IRCONV_INT_U64) /* _INT or _U32 */
1138 LJFOLD(CONV CONV IRCONV_U32_I64) /* _INT or _U32 */
1139 LJFOLD(CONV CONV IRCONV_U32_U64) /* _INT or _U32 */
1140 LJFOLDF(simplify_conv_int_i64)
1142 int src;
1143 PHIBARRIER(fleft);
1144 src = (fleft->op2 & IRCONV_SRCMASK);
1145 if (src == IRT_INT || src == IRT_U32) {
1146 if (src == ((fins->op2 & IRCONV_DSTMASK) >> IRCONV_DSH)) {
1147 return fleft->op1;
1148 } else {
1149 fins->op2 = ((fins->op2 & IRCONV_DSTMASK) | src);
1150 fins->op1 = fleft->op1;
1151 return RETRYFOLD;
1154 return NEXTFOLD;
1157 LJFOLD(CONV CONV IRCONV_FLOAT_NUM) /* _FLOAT */
1158 LJFOLDF(simplify_conv_flt_num)
1160 PHIBARRIER(fleft);
1161 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_FLOAT)
1162 return fleft->op1;
1163 return NEXTFOLD;
1166 /* Shortcut TOBIT + IRT_NUM <- IRT_INT/IRT_U32 conversion. */
1167 LJFOLD(TOBIT CONV KNUM)
1168 LJFOLDF(simplify_tobit_conv)
1170 /* Fold even across PHI to avoid expensive num->int conversions in loop. */
1171 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT) {
1172 lua_assert(irt_isnum(fleft->t));
1173 return fleft->op1;
1174 } else if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) {
1175 lua_assert(irt_isnum(fleft->t));
1176 fins->o = IR_CONV;
1177 fins->op1 = fleft->op1;
1178 fins->op2 = (IRT_INT<<5)|IRT_U32;
1179 return RETRYFOLD;
1181 return NEXTFOLD;
1184 /* Shortcut floor/ceil/round + IRT_NUM <- IRT_INT/IRT_U32 conversion. */
1185 LJFOLD(FPMATH CONV IRFPM_FLOOR)
1186 LJFOLD(FPMATH CONV IRFPM_CEIL)
1187 LJFOLD(FPMATH CONV IRFPM_TRUNC)
1188 LJFOLDF(simplify_floor_conv)
1190 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT ||
1191 (fleft->op2 & IRCONV_SRCMASK) == IRT_U32)
1192 return LEFTFOLD;
1193 return NEXTFOLD;
1196 /* Strength reduction of widening. */
1197 LJFOLD(CONV any IRCONV_I64_INT)
1198 LJFOLD(CONV any IRCONV_U64_INT)
1199 LJFOLDF(simplify_conv_sext)
1201 IRRef ref = fins->op1;
1202 int64_t ofs = 0;
1203 if (!(fins->op2 & IRCONV_SEXT))
1204 return NEXTFOLD;
1205 PHIBARRIER(fleft);
1206 if (fleft->o == IR_XLOAD && (irt_isu8(fleft->t) || irt_isu16(fleft->t)))
1207 goto ok_reduce;
1208 if (fleft->o == IR_ADD && irref_isk(fleft->op2)) {
1209 ofs = (int64_t)IR(fleft->op2)->i;
1210 ref = fleft->op1;
1212 /* Use scalar evolution analysis results to strength-reduce sign-extension. */
1213 if (ref == J->scev.idx) {
1214 IRRef lo = J->scev.dir ? J->scev.start : J->scev.stop;
1215 lua_assert(irt_isint(J->scev.t));
1216 if (lo && IR(lo)->i + ofs >= 0) {
1217 ok_reduce:
1218 #if LJ_TARGET_X64
1219 /* Eliminate widening. All 32 bit ops do an implicit zero-extension. */
1220 return LEFTFOLD;
1221 #else
1222 /* Reduce to a (cheaper) zero-extension. */
1223 fins->op2 &= ~IRCONV_SEXT;
1224 return RETRYFOLD;
1225 #endif
1228 return NEXTFOLD;
1231 /* Strength reduction of narrowing. */
1232 LJFOLD(CONV ADD IRCONV_INT_I64)
1233 LJFOLD(CONV SUB IRCONV_INT_I64)
1234 LJFOLD(CONV MUL IRCONV_INT_I64)
1235 LJFOLD(CONV ADD IRCONV_INT_U64)
1236 LJFOLD(CONV SUB IRCONV_INT_U64)
1237 LJFOLD(CONV MUL IRCONV_INT_U64)
1238 LJFOLD(CONV ADD IRCONV_U32_I64)
1239 LJFOLD(CONV SUB IRCONV_U32_I64)
1240 LJFOLD(CONV MUL IRCONV_U32_I64)
1241 LJFOLD(CONV ADD IRCONV_U32_U64)
1242 LJFOLD(CONV SUB IRCONV_U32_U64)
1243 LJFOLD(CONV MUL IRCONV_U32_U64)
1244 LJFOLDF(simplify_conv_narrow)
1246 IROp op = (IROp)fleft->o;
1247 IRType t = irt_type(fins->t);
1248 IRRef op1 = fleft->op1, op2 = fleft->op2, mode = fins->op2;
1249 PHIBARRIER(fleft);
1250 op1 = emitir(IRTI(IR_CONV), op1, mode);
1251 op2 = emitir(IRTI(IR_CONV), op2, mode);
1252 fins->ot = IRT(op, t);
1253 fins->op1 = op1;
1254 fins->op2 = op2;
1255 return RETRYFOLD;
1258 /* Special CSE rule for CONV. */
1259 LJFOLD(CONV any any)
1260 LJFOLDF(cse_conv)
1262 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
1263 IRRef op1 = fins->op1, op2 = (fins->op2 & IRCONV_MODEMASK);
1264 uint8_t guard = irt_isguard(fins->t);
1265 IRRef ref = J->chain[IR_CONV];
1266 while (ref > op1) {
1267 IRIns *ir = IR(ref);
1268 /* Commoning with stronger checks is ok. */
1269 if (ir->op1 == op1 && (ir->op2 & IRCONV_MODEMASK) == op2 &&
1270 irt_isguard(ir->t) >= guard)
1271 return ref;
1272 ref = ir->prev;
1275 return EMITFOLD; /* No fallthrough to regular CSE. */
1278 /* FP conversion narrowing. */
1279 LJFOLD(TOBIT ADD KNUM)
1280 LJFOLD(TOBIT SUB KNUM)
1281 LJFOLD(CONV ADD IRCONV_INT_NUM)
1282 LJFOLD(CONV SUB IRCONV_INT_NUM)
1283 LJFOLD(CONV ADD IRCONV_I64_NUM)
1284 LJFOLD(CONV SUB IRCONV_I64_NUM)
1285 LJFOLDF(narrow_convert)
1287 PHIBARRIER(fleft);
1288 /* Narrowing ignores PHIs and repeating it inside the loop is not useful. */
1289 if (J->chain[IR_LOOP])
1290 return NEXTFOLD;
1291 lua_assert(fins->o != IR_CONV || (fins->op2&IRCONV_CONVMASK) != IRCONV_TOBIT);
1292 return lj_opt_narrow_convert(J);
1295 /* -- Integer algebraic simplifications ----------------------------------- */
1297 LJFOLD(ADD any KINT)
1298 LJFOLD(ADDOV any KINT)
1299 LJFOLD(SUBOV any KINT)
1300 LJFOLDF(simplify_intadd_k)
1302 if (fright->i == 0) /* i o 0 ==> i */
1303 return LEFTFOLD;
1304 return NEXTFOLD;
1307 LJFOLD(MULOV any KINT)
1308 LJFOLDF(simplify_intmul_k)
1310 if (fright->i == 0) /* i * 0 ==> 0 */
1311 return RIGHTFOLD;
1312 if (fright->i == 1) /* i * 1 ==> i */
1313 return LEFTFOLD;
1314 if (fright->i == 2) { /* i * 2 ==> i + i */
1315 fins->o = IR_ADDOV;
1316 fins->op2 = fins->op1;
1317 return RETRYFOLD;
1319 return NEXTFOLD;
1322 LJFOLD(SUB any KINT)
1323 LJFOLDF(simplify_intsub_k)
1325 if (fright->i == 0) /* i - 0 ==> i */
1326 return LEFTFOLD;
1327 fins->o = IR_ADD; /* i - k ==> i + (-k) */
1328 fins->op2 = (IRRef1)lj_ir_kint(J, -fright->i); /* Overflow for -2^31 ok. */
1329 return RETRYFOLD;
1332 LJFOLD(SUB KINT any)
1333 LJFOLD(SUB KINT64 any)
1334 LJFOLDF(simplify_intsub_kleft)
1336 if (fleft->o == IR_KINT ? (fleft->i == 0) : (ir_kint64(fleft)->u64 == 0)) {
1337 fins->o = IR_NEG; /* 0 - i ==> -i */
1338 fins->op1 = fins->op2;
1339 return RETRYFOLD;
1341 return NEXTFOLD;
1344 LJFOLD(ADD any KINT64)
1345 LJFOLDF(simplify_intadd_k64)
1347 if (ir_kint64(fright)->u64 == 0) /* i + 0 ==> i */
1348 return LEFTFOLD;
1349 return NEXTFOLD;
1352 LJFOLD(SUB any KINT64)
1353 LJFOLDF(simplify_intsub_k64)
1355 uint64_t k = ir_kint64(fright)->u64;
1356 if (k == 0) /* i - 0 ==> i */
1357 return LEFTFOLD;
1358 fins->o = IR_ADD; /* i - k ==> i + (-k) */
1359 fins->op2 = (IRRef1)lj_ir_kint64(J, (uint64_t)-(int64_t)k);
1360 return RETRYFOLD;
1363 static TRef simplify_intmul_k(jit_State *J, int32_t k)
1365 /* Note: many more simplifications are possible, e.g. 2^k1 +- 2^k2.
1366 ** But this is mainly intended for simple address arithmetic.
1367 ** Also it's easier for the backend to optimize the original multiplies.
1369 if (k == 0) { /* i * 0 ==> 0 */
1370 return RIGHTFOLD;
1371 } else if (k == 1) { /* i * 1 ==> i */
1372 return LEFTFOLD;
1373 } else if ((k & (k-1)) == 0) { /* i * 2^k ==> i << k */
1374 fins->o = IR_BSHL;
1375 fins->op2 = lj_ir_kint(J, lj_fls((uint32_t)k));
1376 return RETRYFOLD;
1378 return NEXTFOLD;
1381 LJFOLD(MUL any KINT)
1382 LJFOLDF(simplify_intmul_k32)
1384 if (fright->i >= 0)
1385 return simplify_intmul_k(J, fright->i);
1386 return NEXTFOLD;
1389 LJFOLD(MUL any KINT64)
1390 LJFOLDF(simplify_intmul_k64)
1392 #if LJ_HASFFI
1393 if (ir_kint64(fright)->u64 < 0x80000000u)
1394 return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64);
1395 return NEXTFOLD;
1396 #else
1397 UNUSED(J); lua_assert(0); return FAILFOLD;
1398 #endif
1401 LJFOLD(MOD any KINT)
1402 LJFOLDF(simplify_intmod_k)
1404 int32_t k = fright->i;
1405 lua_assert(k != 0);
1406 if (k > 0 && (k & (k-1)) == 0) { /* i % (2^k) ==> i & (2^k-1) */
1407 fins->o = IR_BAND;
1408 fins->op2 = lj_ir_kint(J, k-1);
1409 return RETRYFOLD;
1411 return NEXTFOLD;
1414 LJFOLD(MOD KINT any)
1415 LJFOLDF(simplify_intmod_kleft)
1417 if (fleft->i == 0)
1418 return INTFOLD(0);
1419 return NEXTFOLD;
1422 LJFOLD(SUB any any)
1423 LJFOLD(SUBOV any any)
1424 LJFOLDF(simplify_intsub)
1426 if (fins->op1 == fins->op2 && !irt_isnum(fins->t)) /* i - i ==> 0 */
1427 return irt_is64(fins->t) ? INT64FOLD(0) : INTFOLD(0);
1428 return NEXTFOLD;
1431 LJFOLD(SUB ADD any)
1432 LJFOLDF(simplify_intsubadd_leftcancel)
1434 if (!irt_isnum(fins->t)) {
1435 PHIBARRIER(fleft);
1436 if (fins->op2 == fleft->op1) /* (i + j) - i ==> j */
1437 return fleft->op2;
1438 if (fins->op2 == fleft->op2) /* (i + j) - j ==> i */
1439 return fleft->op1;
1441 return NEXTFOLD;
1444 LJFOLD(SUB SUB any)
1445 LJFOLDF(simplify_intsubsub_leftcancel)
1447 if (!irt_isnum(fins->t)) {
1448 PHIBARRIER(fleft);
1449 if (fins->op2 == fleft->op1) { /* (i - j) - i ==> 0 - j */
1450 fins->op1 = (IRRef1)lj_ir_kint(J, 0);
1451 fins->op2 = fleft->op2;
1452 return RETRYFOLD;
1455 return NEXTFOLD;
1458 LJFOLD(SUB any SUB)
1459 LJFOLDF(simplify_intsubsub_rightcancel)
1461 if (!irt_isnum(fins->t)) {
1462 PHIBARRIER(fright);
1463 if (fins->op1 == fright->op1) /* i - (i - j) ==> j */
1464 return fright->op2;
1466 return NEXTFOLD;
1469 LJFOLD(SUB any ADD)
1470 LJFOLDF(simplify_intsubadd_rightcancel)
1472 if (!irt_isnum(fins->t)) {
1473 PHIBARRIER(fright);
1474 if (fins->op1 == fright->op1) { /* i - (i + j) ==> 0 - j */
1475 fins->op2 = fright->op2;
1476 fins->op1 = (IRRef1)lj_ir_kint(J, 0);
1477 return RETRYFOLD;
1479 if (fins->op1 == fright->op2) { /* i - (j + i) ==> 0 - j */
1480 fins->op2 = fright->op1;
1481 fins->op1 = (IRRef1)lj_ir_kint(J, 0);
1482 return RETRYFOLD;
1485 return NEXTFOLD;
1488 LJFOLD(SUB ADD ADD)
1489 LJFOLDF(simplify_intsubaddadd_cancel)
1491 if (!irt_isnum(fins->t)) {
1492 PHIBARRIER(fleft);
1493 PHIBARRIER(fright);
1494 if (fleft->op1 == fright->op1) { /* (i + j1) - (i + j2) ==> j1 - j2 */
1495 fins->op1 = fleft->op2;
1496 fins->op2 = fright->op2;
1497 return RETRYFOLD;
1499 if (fleft->op1 == fright->op2) { /* (i + j1) - (j2 + i) ==> j1 - j2 */
1500 fins->op1 = fleft->op2;
1501 fins->op2 = fright->op1;
1502 return RETRYFOLD;
1504 if (fleft->op2 == fright->op1) { /* (j1 + i) - (i + j2) ==> j1 - j2 */
1505 fins->op1 = fleft->op1;
1506 fins->op2 = fright->op2;
1507 return RETRYFOLD;
1509 if (fleft->op2 == fright->op2) { /* (j1 + i) - (j2 + i) ==> j1 - j2 */
1510 fins->op1 = fleft->op1;
1511 fins->op2 = fright->op1;
1512 return RETRYFOLD;
1515 return NEXTFOLD;
1518 LJFOLD(BAND any KINT)
1519 LJFOLD(BAND any KINT64)
1520 LJFOLDF(simplify_band_k)
1522 int64_t k = fright->o == IR_KINT ? (int64_t)fright->i :
1523 (int64_t)ir_k64(fright)->u64;
1524 if (k == 0) /* i & 0 ==> 0 */
1525 return RIGHTFOLD;
1526 if (k == -1) /* i & -1 ==> i */
1527 return LEFTFOLD;
1528 return NEXTFOLD;
1531 LJFOLD(BOR any KINT)
1532 LJFOLD(BOR any KINT64)
1533 LJFOLDF(simplify_bor_k)
1535 int64_t k = fright->o == IR_KINT ? (int64_t)fright->i :
1536 (int64_t)ir_k64(fright)->u64;
1537 if (k == 0) /* i | 0 ==> i */
1538 return LEFTFOLD;
1539 if (k == -1) /* i | -1 ==> -1 */
1540 return RIGHTFOLD;
1541 return NEXTFOLD;
1544 LJFOLD(BXOR any KINT)
1545 LJFOLD(BXOR any KINT64)
1546 LJFOLDF(simplify_bxor_k)
1548 int64_t k = fright->o == IR_KINT ? (int64_t)fright->i :
1549 (int64_t)ir_k64(fright)->u64;
1550 if (k == 0) /* i xor 0 ==> i */
1551 return LEFTFOLD;
1552 if (k == -1) { /* i xor -1 ==> ~i */
1553 fins->o = IR_BNOT;
1554 fins->op2 = 0;
1555 return RETRYFOLD;
1557 return NEXTFOLD;
1560 LJFOLD(BSHL any KINT)
1561 LJFOLD(BSHR any KINT)
1562 LJFOLD(BSAR any KINT)
1563 LJFOLD(BROL any KINT)
1564 LJFOLD(BROR any KINT)
1565 LJFOLDF(simplify_shift_ik)
1567 int32_t mask = irt_is64(fins->t) ? 63 : 31;
1568 int32_t k = (fright->i & mask);
1569 if (k == 0) /* i o 0 ==> i */
1570 return LEFTFOLD;
1571 if (k == 1 && fins->o == IR_BSHL) { /* i << 1 ==> i + i */
1572 fins->o = IR_ADD;
1573 fins->op2 = fins->op1;
1574 return RETRYFOLD;
1576 if (k != fright->i) { /* i o k ==> i o (k & mask) */
1577 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1578 return RETRYFOLD;
1580 #ifndef LJ_TARGET_UNIFYROT
1581 if (fins->o == IR_BROR) { /* bror(i, k) ==> brol(i, (-k)&mask) */
1582 fins->o = IR_BROL;
1583 fins->op2 = (IRRef1)lj_ir_kint(J, (-k)&mask);
1584 return RETRYFOLD;
1586 #endif
1587 return NEXTFOLD;
1590 LJFOLD(BSHL any BAND)
1591 LJFOLD(BSHR any BAND)
1592 LJFOLD(BSAR any BAND)
1593 LJFOLD(BROL any BAND)
1594 LJFOLD(BROR any BAND)
1595 LJFOLDF(simplify_shift_andk)
1597 IRIns *irk = IR(fright->op2);
1598 PHIBARRIER(fright);
1599 if ((fins->o < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) &&
1600 irk->o == IR_KINT) { /* i o (j & mask) ==> i o j */
1601 int32_t mask = irt_is64(fins->t) ? 63 : 31;
1602 int32_t k = irk->i & mask;
1603 if (k == mask) {
1604 fins->op2 = fright->op1;
1605 return RETRYFOLD;
1608 return NEXTFOLD;
1611 LJFOLD(BSHL KINT any)
1612 LJFOLD(BSHR KINT any)
1613 LJFOLD(BSHL KINT64 any)
1614 LJFOLD(BSHR KINT64 any)
1615 LJFOLDF(simplify_shift1_ki)
1617 int64_t k = fleft->o == IR_KINT ? (int64_t)fleft->i :
1618 (int64_t)ir_k64(fleft)->u64;
1619 if (k == 0) /* 0 o i ==> 0 */
1620 return LEFTFOLD;
1621 return NEXTFOLD;
1624 LJFOLD(BSAR KINT any)
1625 LJFOLD(BROL KINT any)
1626 LJFOLD(BROR KINT any)
1627 LJFOLD(BSAR KINT64 any)
1628 LJFOLD(BROL KINT64 any)
1629 LJFOLD(BROR KINT64 any)
1630 LJFOLDF(simplify_shift2_ki)
1632 int64_t k = fleft->o == IR_KINT ? (int64_t)fleft->i :
1633 (int64_t)ir_k64(fleft)->u64;
1634 if (k == 0 || k == -1) /* 0 o i ==> 0; -1 o i ==> -1 */
1635 return LEFTFOLD;
1636 return NEXTFOLD;
1639 LJFOLD(BSHL BAND KINT)
1640 LJFOLD(BSHR BAND KINT)
1641 LJFOLD(BROL BAND KINT)
1642 LJFOLD(BROR BAND KINT)
1643 LJFOLDF(simplify_shiftk_andk)
1645 IRIns *irk = IR(fleft->op2);
1646 PHIBARRIER(fleft);
1647 if (irk->o == IR_KINT) { /* (i & k1) o k2 ==> (i o k2) & (k1 o k2) */
1648 int32_t k = kfold_intop(irk->i, fright->i, (IROp)fins->o);
1649 fins->op1 = fleft->op1;
1650 fins->op1 = (IRRef1)lj_opt_fold(J);
1651 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1652 fins->ot = IRTI(IR_BAND);
1653 return RETRYFOLD;
1655 return NEXTFOLD;
1658 LJFOLD(BAND BSHL KINT)
1659 LJFOLD(BAND BSHR KINT)
1660 LJFOLDF(simplify_andk_shiftk)
1662 IRIns *irk = IR(fleft->op2);
1663 if (irk->o == IR_KINT &&
1664 kfold_intop(-1, irk->i, (IROp)fleft->o) == fright->i)
1665 return LEFTFOLD; /* (i o k1) & k2 ==> i, if (-1 o k1) == k2 */
1666 return NEXTFOLD;
1669 /* -- Reassociation ------------------------------------------------------- */
1671 LJFOLD(ADD ADD KINT)
1672 LJFOLD(MUL MUL KINT)
1673 LJFOLD(BAND BAND KINT)
1674 LJFOLD(BOR BOR KINT)
1675 LJFOLD(BXOR BXOR KINT)
1676 LJFOLDF(reassoc_intarith_k)
1678 IRIns *irk = IR(fleft->op2);
1679 if (irk->o == IR_KINT) {
1680 int32_t k = kfold_intop(irk->i, fright->i, (IROp)fins->o);
1681 if (k == irk->i) /* (i o k1) o k2 ==> i o k1, if (k1 o k2) == k1. */
1682 return LEFTFOLD;
1683 PHIBARRIER(fleft);
1684 fins->op1 = fleft->op1;
1685 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1686 return RETRYFOLD; /* (i o k1) o k2 ==> i o (k1 o k2) */
1688 return NEXTFOLD;
1691 LJFOLD(ADD ADD KINT64)
1692 LJFOLD(MUL MUL KINT64)
1693 LJFOLD(BAND BAND KINT64)
1694 LJFOLD(BOR BOR KINT64)
1695 LJFOLD(BXOR BXOR KINT64)
1696 LJFOLDF(reassoc_intarith_k64)
1698 #if LJ_HASFFI
1699 IRIns *irk = IR(fleft->op2);
1700 if (irk->o == IR_KINT64) {
1701 uint64_t k = kfold_int64arith(ir_k64(irk)->u64,
1702 ir_k64(fright)->u64, (IROp)fins->o);
1703 PHIBARRIER(fleft);
1704 fins->op1 = fleft->op1;
1705 fins->op2 = (IRRef1)lj_ir_kint64(J, k);
1706 return RETRYFOLD; /* (i o k1) o k2 ==> i o (k1 o k2) */
1708 return NEXTFOLD;
1709 #else
1710 UNUSED(J); lua_assert(0); return FAILFOLD;
1711 #endif
1714 LJFOLD(MIN MIN any)
1715 LJFOLD(MAX MAX any)
1716 LJFOLD(BAND BAND any)
1717 LJFOLD(BOR BOR any)
1718 LJFOLDF(reassoc_dup)
1720 if (fins->op2 == fleft->op1 || fins->op2 == fleft->op2)
1721 return LEFTFOLD; /* (a o b) o a ==> a o b; (a o b) o b ==> a o b */
1722 return NEXTFOLD;
1725 LJFOLD(BXOR BXOR any)
1726 LJFOLDF(reassoc_bxor)
1728 PHIBARRIER(fleft);
1729 if (fins->op2 == fleft->op1) /* (a xor b) xor a ==> b */
1730 return fleft->op2;
1731 if (fins->op2 == fleft->op2) /* (a xor b) xor b ==> a */
1732 return fleft->op1;
1733 return NEXTFOLD;
1736 LJFOLD(BSHL BSHL KINT)
1737 LJFOLD(BSHR BSHR KINT)
1738 LJFOLD(BSAR BSAR KINT)
1739 LJFOLD(BROL BROL KINT)
1740 LJFOLD(BROR BROR KINT)
1741 LJFOLDF(reassoc_shift)
1743 IRIns *irk = IR(fleft->op2);
1744 PHIBARRIER(fleft); /* The (shift any KINT) rule covers k2 == 0 and more. */
1745 if (irk->o == IR_KINT) { /* (i o k1) o k2 ==> i o (k1 + k2) */
1746 int32_t mask = irt_is64(fins->t) ? 63 : 31;
1747 int32_t k = (irk->i & mask) + (fright->i & mask);
1748 if (k > mask) { /* Combined shift too wide? */
1749 if (fins->o == IR_BSHL || fins->o == IR_BSHR)
1750 return mask == 31 ? INTFOLD(0) : INT64FOLD(0);
1751 else if (fins->o == IR_BSAR)
1752 k = mask;
1753 else
1754 k &= mask;
1756 fins->op1 = fleft->op1;
1757 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1758 return RETRYFOLD;
1760 return NEXTFOLD;
1763 LJFOLD(MIN MIN KNUM)
1764 LJFOLD(MAX MAX KNUM)
1765 LJFOLD(MIN MIN KINT)
1766 LJFOLD(MAX MAX KINT)
1767 LJFOLDF(reassoc_minmax_k)
1769 IRIns *irk = IR(fleft->op2);
1770 if (irk->o == IR_KNUM) {
1771 lua_Number a = ir_knum(irk)->n;
1772 lua_Number y = lj_vm_foldarith(a, knumright, fins->o - IR_ADD);
1773 if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */
1774 return LEFTFOLD;
1775 PHIBARRIER(fleft);
1776 fins->op1 = fleft->op1;
1777 fins->op2 = (IRRef1)lj_ir_knum(J, y);
1778 return RETRYFOLD; /* (x o k1) o k2 ==> x o (k1 o k2) */
1779 } else if (irk->o == IR_KINT) {
1780 int32_t a = irk->i;
1781 int32_t y = kfold_intop(a, fright->i, fins->o);
1782 if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */
1783 return LEFTFOLD;
1784 PHIBARRIER(fleft);
1785 fins->op1 = fleft->op1;
1786 fins->op2 = (IRRef1)lj_ir_kint(J, y);
1787 return RETRYFOLD; /* (x o k1) o k2 ==> x o (k1 o k2) */
1789 return NEXTFOLD;
1792 LJFOLD(MIN MAX any)
1793 LJFOLD(MAX MIN any)
1794 LJFOLDF(reassoc_minmax_left)
1796 if (fins->op2 == fleft->op1 || fins->op2 == fleft->op2)
1797 return RIGHTFOLD; /* (b o1 a) o2 b ==> b; (a o1 b) o2 b ==> b */
1798 return NEXTFOLD;
1801 LJFOLD(MIN any MAX)
1802 LJFOLD(MAX any MIN)
1803 LJFOLDF(reassoc_minmax_right)
1805 if (fins->op1 == fright->op1 || fins->op1 == fright->op2)
1806 return LEFTFOLD; /* a o2 (a o1 b) ==> a; a o2 (b o1 a) ==> a */
1807 return NEXTFOLD;
1810 /* -- Array bounds check elimination -------------------------------------- */
1812 /* Eliminate ABC across PHIs to handle t[i-1] forwarding case.
1813 ** ABC(asize, (i+k)+(-k)) ==> ABC(asize, i), but only if it already exists.
1814 ** Could be generalized to (i+k1)+k2 ==> i+(k1+k2), but needs better disambig.
1816 LJFOLD(ABC any ADD)
1817 LJFOLDF(abc_fwd)
1819 if (LJ_LIKELY(J->flags & JIT_F_OPT_ABC)) {
1820 if (irref_isk(fright->op2)) {
1821 IRIns *add2 = IR(fright->op1);
1822 if (add2->o == IR_ADD && irref_isk(add2->op2) &&
1823 IR(fright->op2)->i == -IR(add2->op2)->i) {
1824 IRRef ref = J->chain[IR_ABC];
1825 IRRef lim = add2->op1;
1826 if (fins->op1 > lim) lim = fins->op1;
1827 while (ref > lim) {
1828 IRIns *ir = IR(ref);
1829 if (ir->op1 == fins->op1 && ir->op2 == add2->op1)
1830 return DROPFOLD;
1831 ref = ir->prev;
1836 return NEXTFOLD;
1839 /* Eliminate ABC for constants.
1840 ** ABC(asize, k1), ABC(asize k2) ==> ABC(asize, max(k1, k2))
1841 ** Drop second ABC if k2 is lower. Otherwise patch first ABC with k2.
1843 LJFOLD(ABC any KINT)
1844 LJFOLDF(abc_k)
1846 if (LJ_LIKELY(J->flags & JIT_F_OPT_ABC)) {
1847 IRRef ref = J->chain[IR_ABC];
1848 IRRef asize = fins->op1;
1849 while (ref > asize) {
1850 IRIns *ir = IR(ref);
1851 if (ir->op1 == asize && irref_isk(ir->op2)) {
1852 int32_t k = IR(ir->op2)->i;
1853 if (fright->i > k)
1854 ir->op2 = fins->op2;
1855 return DROPFOLD;
1857 ref = ir->prev;
1859 return EMITFOLD; /* Already performed CSE. */
1861 return NEXTFOLD;
1864 /* Eliminate invariant ABC inside loop. */
1865 LJFOLD(ABC any any)
1866 LJFOLDF(abc_invar)
1868 /* Invariant ABC marked as PTR. Drop if op1 is invariant, too. */
1869 if (!irt_isint(fins->t) && fins->op1 < J->chain[IR_LOOP] &&
1870 !irt_isphi(IR(fins->op1)->t))
1871 return DROPFOLD;
1872 return NEXTFOLD;
1875 /* -- Commutativity ------------------------------------------------------- */
1877 /* The refs of commutative ops are canonicalized. Lower refs go to the right.
1878 ** Rationale behind this:
1879 ** - It (also) moves constants to the right.
1880 ** - It reduces the number of FOLD rules (e.g. (BOR any KINT) suffices).
1881 ** - It helps CSE to find more matches.
1882 ** - The assembler generates better code with constants at the right.
1885 LJFOLD(ADD any any)
1886 LJFOLD(MUL any any)
1887 LJFOLD(ADDOV any any)
1888 LJFOLD(MULOV any any)
1889 LJFOLDF(comm_swap)
1891 if (fins->op1 < fins->op2) { /* Move lower ref to the right. */
1892 IRRef1 tmp = fins->op1;
1893 fins->op1 = fins->op2;
1894 fins->op2 = tmp;
1895 return RETRYFOLD;
1897 return NEXTFOLD;
1900 LJFOLD(EQ any any)
1901 LJFOLD(NE any any)
1902 LJFOLDF(comm_equal)
1904 /* For non-numbers only: x == x ==> drop; x ~= x ==> fail */
1905 if (fins->op1 == fins->op2 && !irt_isnum(fins->t))
1906 return CONDFOLD(fins->o == IR_EQ);
1907 return fold_comm_swap(J);
1910 LJFOLD(LT any any)
1911 LJFOLD(GE any any)
1912 LJFOLD(LE any any)
1913 LJFOLD(GT any any)
1914 LJFOLD(ULT any any)
1915 LJFOLD(UGE any any)
1916 LJFOLD(ULE any any)
1917 LJFOLD(UGT any any)
1918 LJFOLDF(comm_comp)
1920 /* For non-numbers only: x <=> x ==> drop; x <> x ==> fail */
1921 if (fins->op1 == fins->op2 && !irt_isnum(fins->t))
1922 return CONDFOLD((fins->o ^ (fins->o >> 1)) & 1);
1923 if (fins->op1 < fins->op2) { /* Move lower ref to the right. */
1924 IRRef1 tmp = fins->op1;
1925 fins->op1 = fins->op2;
1926 fins->op2 = tmp;
1927 fins->o ^= 3; /* GT <-> LT, GE <-> LE, does not affect U */
1928 return RETRYFOLD;
1930 return NEXTFOLD;
1933 LJFOLD(BAND any any)
1934 LJFOLD(BOR any any)
1935 LJFOLD(MIN any any)
1936 LJFOLD(MAX any any)
1937 LJFOLDF(comm_dup)
1939 if (fins->op1 == fins->op2) /* x o x ==> x */
1940 return LEFTFOLD;
1941 return fold_comm_swap(J);
1944 LJFOLD(BXOR any any)
1945 LJFOLDF(comm_bxor)
1947 if (fins->op1 == fins->op2) /* i xor i ==> 0 */
1948 return irt_is64(fins->t) ? INT64FOLD(0) : INTFOLD(0);
1949 return fold_comm_swap(J);
1952 /* -- Simplification of compound expressions ------------------------------ */
1954 static TRef kfold_xload(jit_State *J, IRIns *ir, const void *p)
1956 int32_t k;
1957 switch (irt_type(ir->t)) {
1958 case IRT_NUM: return lj_ir_knum_u64(J, *(uint64_t *)p);
1959 case IRT_I8: k = (int32_t)*(int8_t *)p; break;
1960 case IRT_U8: k = (int32_t)*(uint8_t *)p; break;
1961 case IRT_I16: k = (int32_t)(int16_t)lj_getu16(p); break;
1962 case IRT_U16: k = (int32_t)(uint16_t)lj_getu16(p); break;
1963 case IRT_INT: case IRT_U32: k = (int32_t)lj_getu32(p); break;
1964 case IRT_I64: case IRT_U64: return lj_ir_kint64(J, *(uint64_t *)p);
1965 default: return 0;
1967 return lj_ir_kint(J, k);
1970 /* Turn: string.sub(str, a, b) == kstr
1971 ** into: string.byte(str, a) == string.byte(kstr, 1) etc.
1972 ** Note: this creates unaligned XLOADs on x86/x64.
1974 LJFOLD(EQ SNEW KGC)
1975 LJFOLD(NE SNEW KGC)
1976 LJFOLDF(merge_eqne_snew_kgc)
1978 GCstr *kstr = ir_kstr(fright);
1979 int32_t len = (int32_t)kstr->len;
1980 lua_assert(irt_isstr(fins->t));
1982 #if LJ_TARGET_UNALIGNED
1983 #define FOLD_SNEW_MAX_LEN 4 /* Handle string lengths 0, 1, 2, 3, 4. */
1984 #define FOLD_SNEW_TYPE8 IRT_I8 /* Creates shorter immediates. */
1985 #else
1986 #define FOLD_SNEW_MAX_LEN 1 /* Handle string lengths 0 or 1. */
1987 #define FOLD_SNEW_TYPE8 IRT_U8 /* Prefer unsigned loads. */
1988 #endif
1990 PHIBARRIER(fleft);
1991 if (len <= FOLD_SNEW_MAX_LEN) {
1992 IROp op = (IROp)fins->o;
1993 IRRef strref = fleft->op1;
1994 if (IR(strref)->o != IR_STRREF)
1995 return NEXTFOLD;
1996 if (op == IR_EQ) {
1997 emitir(IRTGI(IR_EQ), fleft->op2, lj_ir_kint(J, len));
1998 /* Caveat: fins/fleft/fright is no longer valid after emitir. */
1999 } else {
2000 /* NE is not expanded since this would need an OR of two conds. */
2001 if (!irref_isk(fleft->op2)) /* Only handle the constant length case. */
2002 return NEXTFOLD;
2003 if (IR(fleft->op2)->i != len)
2004 return DROPFOLD;
2006 if (len > 0) {
2007 /* A 4 byte load for length 3 is ok -- all strings have an extra NUL. */
2008 uint16_t ot = (uint16_t)(len == 1 ? IRT(IR_XLOAD, FOLD_SNEW_TYPE8) :
2009 len == 2 ? IRT(IR_XLOAD, IRT_U16) :
2010 IRTI(IR_XLOAD));
2011 TRef tmp = emitir(ot, strref,
2012 IRXLOAD_READONLY | (len > 1 ? IRXLOAD_UNALIGNED : 0));
2013 TRef val = kfold_xload(J, IR(tref_ref(tmp)), strdata(kstr));
2014 if (len == 3)
2015 tmp = emitir(IRTI(IR_BAND), tmp,
2016 lj_ir_kint(J, LJ_ENDIAN_SELECT(0x00ffffff, 0xffffff00)));
2017 fins->op1 = (IRRef1)tmp;
2018 fins->op2 = (IRRef1)val;
2019 fins->ot = (IROpT)IRTGI(op);
2020 return RETRYFOLD;
2021 } else {
2022 return DROPFOLD;
2025 return NEXTFOLD;
2028 /* -- Loads --------------------------------------------------------------- */
2030 /* Loads cannot be folded or passed on to CSE in general.
2031 ** Alias analysis is needed to check for forwarding opportunities.
2033 ** Caveat: *all* loads must be listed here or they end up at CSE!
2036 LJFOLD(ALOAD any)
2037 LJFOLDX(lj_opt_fwd_aload)
2039 /* From HREF fwd (see below). Must eliminate, not supported by fwd/backend. */
2040 LJFOLD(HLOAD KKPTR)
2041 LJFOLDF(kfold_hload_kkptr)
2043 UNUSED(J);
2044 lua_assert(ir_kptr(fleft) == niltvg(J2G(J)));
2045 return TREF_NIL;
2048 LJFOLD(HLOAD any)
2049 LJFOLDX(lj_opt_fwd_hload)
2051 LJFOLD(ULOAD any)
2052 LJFOLDX(lj_opt_fwd_uload)
2054 LJFOLD(CALLL any IRCALL_lj_tab_len)
2055 LJFOLDX(lj_opt_fwd_tab_len)
2057 /* Upvalue refs are really loads, but there are no corresponding stores.
2058 ** So CSE is ok for them, except for UREFO across a GC step (see below).
2059 ** If the referenced function is const, its upvalue addresses are const, too.
2060 ** This can be used to improve CSE by looking for the same address,
2061 ** even if the upvalues originate from a different function.
2063 LJFOLD(UREFO KGC any)
2064 LJFOLD(UREFC KGC any)
2065 LJFOLDF(cse_uref)
2067 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
2068 IRRef ref = J->chain[fins->o];
2069 GCfunc *fn = ir_kfunc(fleft);
2070 GCupval *uv = gco2uv(gcref(fn->l.uvptr[(fins->op2 >> 8)]));
2071 while (ref > 0) {
2072 IRIns *ir = IR(ref);
2073 if (irref_isk(ir->op1)) {
2074 GCfunc *fn2 = ir_kfunc(IR(ir->op1));
2075 if (gco2uv(gcref(fn2->l.uvptr[(ir->op2 >> 8)])) == uv) {
2076 if (fins->o == IR_UREFO && gcstep_barrier(J, ref))
2077 break;
2078 return ref;
2081 ref = ir->prev;
2084 return EMITFOLD;
2087 LJFOLD(HREFK any any)
2088 LJFOLDX(lj_opt_fwd_hrefk)
2090 LJFOLD(HREF TNEW any)
2091 LJFOLDF(fwd_href_tnew)
2093 if (lj_opt_fwd_href_nokey(J))
2094 return lj_ir_kkptr(J, niltvg(J2G(J)));
2095 return NEXTFOLD;
2098 LJFOLD(HREF TDUP KPRI)
2099 LJFOLD(HREF TDUP KGC)
2100 LJFOLD(HREF TDUP KNUM)
2101 LJFOLDF(fwd_href_tdup)
2103 TValue keyv;
2104 lj_ir_kvalue(J->L, &keyv, fright);
2105 if (lj_tab_get(J->L, ir_ktab(IR(fleft->op1)), &keyv) == niltvg(J2G(J)) &&
2106 lj_opt_fwd_href_nokey(J))
2107 return lj_ir_kkptr(J, niltvg(J2G(J)));
2108 return NEXTFOLD;
2111 /* We can safely FOLD/CSE array/hash refs and field loads, since there
2112 ** are no corresponding stores. But we need to check for any NEWREF with
2113 ** an aliased table, as it may invalidate all of the pointers and fields.
2114 ** Only HREF needs the NEWREF check -- AREF and HREFK already depend on
2115 ** FLOADs. And NEWREF itself is treated like a store (see below).
2116 ** LREF is constant (per trace) since coroutine switches are not inlined.
2118 LJFOLD(FLOAD TNEW IRFL_TAB_ASIZE)
2119 LJFOLDF(fload_tab_tnew_asize)
2121 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
2122 return INTFOLD(fleft->op1);
2123 return NEXTFOLD;
2126 LJFOLD(FLOAD TNEW IRFL_TAB_HMASK)
2127 LJFOLDF(fload_tab_tnew_hmask)
2129 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
2130 return INTFOLD((1 << fleft->op2)-1);
2131 return NEXTFOLD;
2134 LJFOLD(FLOAD TDUP IRFL_TAB_ASIZE)
2135 LJFOLDF(fload_tab_tdup_asize)
2137 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
2138 return INTFOLD((int32_t)ir_ktab(IR(fleft->op1))->asize);
2139 return NEXTFOLD;
2142 LJFOLD(FLOAD TDUP IRFL_TAB_HMASK)
2143 LJFOLDF(fload_tab_tdup_hmask)
2145 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
2146 return INTFOLD((int32_t)ir_ktab(IR(fleft->op1))->hmask);
2147 return NEXTFOLD;
2150 LJFOLD(HREF any any)
2151 LJFOLD(FLOAD any IRFL_TAB_ARRAY)
2152 LJFOLD(FLOAD any IRFL_TAB_NODE)
2153 LJFOLD(FLOAD any IRFL_TAB_ASIZE)
2154 LJFOLD(FLOAD any IRFL_TAB_HMASK)
2155 LJFOLDF(fload_tab_ah)
2157 TRef tr = lj_opt_cse(J);
2158 return lj_opt_fwd_tptr(J, tref_ref(tr)) ? tr : EMITFOLD;
2161 /* Strings are immutable, so we can safely FOLD/CSE the related FLOAD. */
2162 LJFOLD(FLOAD KGC IRFL_STR_LEN)
2163 LJFOLDF(fload_str_len_kgc)
2165 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
2166 return INTFOLD((int32_t)ir_kstr(fleft)->len);
2167 return NEXTFOLD;
2170 LJFOLD(FLOAD SNEW IRFL_STR_LEN)
2171 LJFOLDF(fload_str_len_snew)
2173 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
2174 PHIBARRIER(fleft);
2175 return fleft->op2;
2177 return NEXTFOLD;
2180 LJFOLD(FLOAD TOSTR IRFL_STR_LEN)
2181 LJFOLDF(fload_str_len_tostr)
2183 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fleft->op2 == IRTOSTR_CHAR)
2184 return INTFOLD(1);
2185 return NEXTFOLD;
2188 /* The C type ID of cdata objects is immutable. */
2189 LJFOLD(FLOAD KGC IRFL_CDATA_CTYPEID)
2190 LJFOLDF(fload_cdata_typeid_kgc)
2192 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
2193 return INTFOLD((int32_t)ir_kcdata(fleft)->ctypeid);
2194 return NEXTFOLD;
2197 /* Get the contents of immutable cdata objects. */
2198 LJFOLD(FLOAD KGC IRFL_CDATA_PTR)
2199 LJFOLD(FLOAD KGC IRFL_CDATA_INT)
2200 LJFOLD(FLOAD KGC IRFL_CDATA_INT64)
2201 LJFOLDF(fload_cdata_int64_kgc)
2203 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
2204 void *p = cdataptr(ir_kcdata(fleft));
2205 if (irt_is64(fins->t))
2206 return INT64FOLD(*(uint64_t *)p);
2207 else
2208 return INTFOLD(*(int32_t *)p);
2210 return NEXTFOLD;
2213 LJFOLD(FLOAD CNEW IRFL_CDATA_CTYPEID)
2214 LJFOLD(FLOAD CNEWI IRFL_CDATA_CTYPEID)
2215 LJFOLDF(fload_cdata_typeid_cnew)
2217 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
2218 return fleft->op1; /* No PHI barrier needed. CNEW/CNEWI op1 is const. */
2219 return NEXTFOLD;
2222 /* Pointer, int and int64 cdata objects are immutable. */
2223 LJFOLD(FLOAD CNEWI IRFL_CDATA_PTR)
2224 LJFOLD(FLOAD CNEWI IRFL_CDATA_INT)
2225 LJFOLD(FLOAD CNEWI IRFL_CDATA_INT64)
2226 LJFOLDF(fload_cdata_ptr_int64_cnew)
2228 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
2229 return fleft->op2; /* Fold even across PHI to avoid allocations. */
2230 return NEXTFOLD;
2233 LJFOLD(FLOAD any IRFL_STR_LEN)
2234 LJFOLD(FLOAD any IRFL_FUNC_ENV)
2235 LJFOLD(FLOAD any IRFL_THREAD_ENV)
2236 LJFOLD(FLOAD any IRFL_CDATA_CTYPEID)
2237 LJFOLD(FLOAD any IRFL_CDATA_PTR)
2238 LJFOLD(FLOAD any IRFL_CDATA_INT)
2239 LJFOLD(FLOAD any IRFL_CDATA_INT64)
2240 LJFOLD(VLOAD any any) /* Vararg loads have no corresponding stores. */
2241 LJFOLDX(lj_opt_cse)
2243 /* All other field loads need alias analysis. */
2244 LJFOLD(FLOAD any any)
2245 LJFOLDX(lj_opt_fwd_fload)
2247 /* This is for LOOP only. Recording handles SLOADs internally. */
2248 LJFOLD(SLOAD any any)
2249 LJFOLDF(fwd_sload)
2251 if ((fins->op2 & IRSLOAD_FRAME)) {
2252 TRef tr = lj_opt_cse(J);
2253 return tref_ref(tr) < J->chain[IR_RETF] ? EMITFOLD : tr;
2254 } else {
2255 lua_assert(J->slot[fins->op1] != 0);
2256 return J->slot[fins->op1];
2260 /* Only fold for KKPTR. The pointer _and_ the contents must be const. */
2261 LJFOLD(XLOAD KKPTR any)
2262 LJFOLDF(xload_kptr)
2264 TRef tr = kfold_xload(J, fins, ir_kptr(fleft));
2265 return tr ? tr : NEXTFOLD;
2268 LJFOLD(XLOAD any any)
2269 LJFOLDX(lj_opt_fwd_xload)
2271 /* -- Write barriers ------------------------------------------------------ */
2273 /* Write barriers are amenable to CSE, but not across any incremental
2274 ** GC steps.
2276 ** The same logic applies to open upvalue references, because a stack
2277 ** may be resized during a GC step (not the current stack, but maybe that
2278 ** of a coroutine).
2280 LJFOLD(TBAR any)
2281 LJFOLD(OBAR any any)
2282 LJFOLD(UREFO any any)
2283 LJFOLDF(barrier_tab)
2285 TRef tr = lj_opt_cse(J);
2286 if (gcstep_barrier(J, tref_ref(tr))) /* CSE across GC step? */
2287 return EMITFOLD; /* Raw emit. Assumes fins is left intact by CSE. */
2288 return tr;
2291 LJFOLD(TBAR TNEW)
2292 LJFOLD(TBAR TDUP)
2293 LJFOLDF(barrier_tnew_tdup)
2295 /* New tables are always white and never need a barrier. */
2296 if (fins->op1 < J->chain[IR_LOOP]) /* Except across a GC step. */
2297 return NEXTFOLD;
2298 return DROPFOLD;
2301 /* -- Profiling ----------------------------------------------------------- */
2303 LJFOLD(PROF any any)
2304 LJFOLDF(prof)
2306 IRRef ref = J->chain[IR_PROF];
2307 if (ref+1 == J->cur.nins) /* Drop neighbouring IR_PROF. */
2308 return ref;
2309 return EMITFOLD;
2312 /* -- Stores and allocations ---------------------------------------------- */
2314 /* Stores and allocations cannot be folded or passed on to CSE in general.
2315 ** But some stores can be eliminated with dead-store elimination (DSE).
2317 ** Caveat: *all* stores and allocs must be listed here or they end up at CSE!
2320 LJFOLD(ASTORE any any)
2321 LJFOLD(HSTORE any any)
2322 LJFOLDX(lj_opt_dse_ahstore)
2324 LJFOLD(USTORE any any)
2325 LJFOLDX(lj_opt_dse_ustore)
2327 LJFOLD(FSTORE any any)
2328 LJFOLDX(lj_opt_dse_fstore)
2330 LJFOLD(XSTORE any any)
2331 LJFOLDX(lj_opt_dse_xstore)
2333 LJFOLD(NEWREF any any) /* Treated like a store. */
2334 LJFOLD(CALLA any any)
2335 LJFOLD(CALLL any any) /* Safeguard fallback. */
2336 LJFOLD(CALLS any any)
2337 LJFOLD(CALLXS any any)
2338 LJFOLD(XBAR)
2339 LJFOLD(RETF any any) /* Modifies BASE. */
2340 LJFOLD(TNEW any any)
2341 LJFOLD(TDUP any)
2342 LJFOLD(CNEW any any)
2343 LJFOLD(XSNEW any any)
2344 LJFOLD(BUFHDR any any)
2345 LJFOLDX(lj_ir_emit)
2347 /* ------------------------------------------------------------------------ */
2349 /* Every entry in the generated hash table is a 32 bit pattern:
2351 ** xxxxxxxx iiiiiii lllllll rrrrrrrrrr
2353 ** xxxxxxxx = 8 bit index into fold function table
2354 ** iiiiiii = 7 bit folded instruction opcode
2355 ** lllllll = 7 bit left instruction opcode
2356 ** rrrrrrrrrr = 8 bit right instruction opcode or 10 bits from literal field
2359 #include "lj_folddef.h"
2361 /* ------------------------------------------------------------------------ */
2363 /* Fold IR instruction. */
2364 TRef LJ_FASTCALL lj_opt_fold(jit_State *J)
2366 uint32_t key, any;
2367 IRRef ref;
2369 if (LJ_UNLIKELY((J->flags & JIT_F_OPT_MASK) != JIT_F_OPT_DEFAULT)) {
2370 lua_assert(((JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE|JIT_F_OPT_DSE) |
2371 JIT_F_OPT_DEFAULT) == JIT_F_OPT_DEFAULT);
2372 /* Folding disabled? Chain to CSE, but not for loads/stores/allocs. */
2373 if (!(J->flags & JIT_F_OPT_FOLD) && irm_kind(lj_ir_mode[fins->o]) == IRM_N)
2374 return lj_opt_cse(J);
2376 /* No FOLD, forwarding or CSE? Emit raw IR for loads, except for SLOAD. */
2377 if ((J->flags & (JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE)) !=
2378 (JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE) &&
2379 irm_kind(lj_ir_mode[fins->o]) == IRM_L && fins->o != IR_SLOAD)
2380 return lj_ir_emit(J);
2382 /* No FOLD or DSE? Emit raw IR for stores. */
2383 if ((J->flags & (JIT_F_OPT_FOLD|JIT_F_OPT_DSE)) !=
2384 (JIT_F_OPT_FOLD|JIT_F_OPT_DSE) &&
2385 irm_kind(lj_ir_mode[fins->o]) == IRM_S)
2386 return lj_ir_emit(J);
2389 /* Fold engine start/retry point. */
2390 retry:
2391 /* Construct key from opcode and operand opcodes (unless literal/none). */
2392 key = ((uint32_t)fins->o << 17);
2393 if (fins->op1 >= J->cur.nk) {
2394 key += (uint32_t)IR(fins->op1)->o << 10;
2395 *fleft = *IR(fins->op1);
2397 if (fins->op2 >= J->cur.nk) {
2398 key += (uint32_t)IR(fins->op2)->o;
2399 *fright = *IR(fins->op2);
2400 } else {
2401 key += (fins->op2 & 0x3ffu); /* Literal mask. Must include IRCONV_*MASK. */
2404 /* Check for a match in order from most specific to least specific. */
2405 any = 0;
2406 for (;;) {
2407 uint32_t k = key | (any & 0x1ffff);
2408 uint32_t h = fold_hashkey(k);
2409 uint32_t fh = fold_hash[h]; /* Lookup key in semi-perfect hash table. */
2410 if ((fh & 0xffffff) == k || (fh = fold_hash[h+1], (fh & 0xffffff) == k)) {
2411 ref = (IRRef)tref_ref(fold_func[fh >> 24](J));
2412 if (ref != NEXTFOLD)
2413 break;
2415 if (any == 0xfffff) /* Exhausted folding. Pass on to CSE. */
2416 return lj_opt_cse(J);
2417 any = (any | (any >> 10)) ^ 0xffc00;
2420 /* Return value processing, ordered by frequency. */
2421 if (LJ_LIKELY(ref >= MAX_FOLD))
2422 return TREF(ref, irt_t(IR(ref)->t));
2423 if (ref == RETRYFOLD)
2424 goto retry;
2425 if (ref == KINTFOLD)
2426 return lj_ir_kint(J, fins->i);
2427 if (ref == FAILFOLD)
2428 lj_trace_err(J, LJ_TRERR_GFAIL);
2429 lua_assert(ref == DROPFOLD);
2430 return REF_DROP;
2433 /* -- Common-Subexpression Elimination ------------------------------------ */
2435 /* CSE an IR instruction. This is very fast due to the skip-list chains. */
2436 TRef LJ_FASTCALL lj_opt_cse(jit_State *J)
2438 /* Avoid narrow to wide store-to-load forwarding stall */
2439 IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16);
2440 IROp op = fins->o;
2441 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
2442 /* Limited search for same operands in per-opcode chain. */
2443 IRRef ref = J->chain[op];
2444 IRRef lim = fins->op1;
2445 if (fins->op2 > lim) lim = fins->op2; /* Relies on lit < REF_BIAS. */
2446 while (ref > lim) {
2447 if (IR(ref)->op12 == op12)
2448 return TREF(ref, irt_t(IR(ref)->t)); /* Common subexpression found. */
2449 ref = IR(ref)->prev;
2452 /* Otherwise emit IR (inlined for speed). */
2454 IRRef ref = lj_ir_nextins(J);
2455 IRIns *ir = IR(ref);
2456 ir->prev = J->chain[op];
2457 ir->op12 = op12;
2458 J->chain[op] = (IRRef1)ref;
2459 ir->o = fins->o;
2460 J->guardemit.irt |= fins->t.irt;
2461 return TREF(ref, irt_t((ir->t = fins->t)));
2465 /* CSE with explicit search limit. */
2466 TRef LJ_FASTCALL lj_opt_cselim(jit_State *J, IRRef lim)
2468 IRRef ref = J->chain[fins->o];
2469 IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16);
2470 while (ref > lim) {
2471 if (IR(ref)->op12 == op12)
2472 return ref;
2473 ref = IR(ref)->prev;
2475 return lj_ir_emit(J);
2478 /* ------------------------------------------------------------------------ */
2480 #undef IR
2481 #undef fins
2482 #undef fleft
2483 #undef fright
2484 #undef knumleft
2485 #undef knumright
2486 #undef emitir
2488 #endif