OSX/iOS: Fix SDK incompatibility.
[luajit-2.0.git] / src / lj_opt_fold.c
blobce78505b1e8390ef7ce9d3b4e0a7c8569adedfec
1 /*
2 ** FOLD: Constant Folding, Algebraic Simplifications and Reassociation.
3 ** ABCelim: Array Bounds Check Elimination.
4 ** CSE: Common-Subexpression Elimination.
5 ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
6 */
8 #define lj_opt_fold_c
9 #define LUA_CORE
11 #include <math.h>
13 #include "lj_obj.h"
15 #if LJ_HASJIT
17 #include "lj_buf.h"
18 #include "lj_str.h"
19 #include "lj_tab.h"
20 #include "lj_ir.h"
21 #include "lj_jit.h"
22 #include "lj_ircall.h"
23 #include "lj_iropt.h"
24 #include "lj_trace.h"
25 #if LJ_HASFFI
26 #include "lj_ctype.h"
27 #include "lj_carith.h"
28 #endif
29 #include "lj_vm.h"
30 #include "lj_strscan.h"
31 #include "lj_strfmt.h"
33 /* Here's a short description how the FOLD engine processes instructions:
35 ** The FOLD engine receives a single instruction stored in fins (J->fold.ins).
36 ** The instruction and its operands are used to select matching fold rules.
37 ** These are applied iteratively until a fixed point is reached.
39 ** The 8 bit opcode of the instruction itself plus the opcodes of the
40 ** two instructions referenced by its operands form a 24 bit key
41 ** 'ins left right' (unused operands -> 0, literals -> lowest 8 bits).
43 ** This key is used for partial matching against the fold rules. The
44 ** left/right operand fields of the key are successively masked with
45 ** the 'any' wildcard, from most specific to least specific:
47 ** ins left right
48 ** ins any right
49 ** ins left any
50 ** ins any any
52 ** The masked key is used to lookup a matching fold rule in a semi-perfect
53 ** hash table. If a matching rule is found, the related fold function is run.
54 ** Multiple rules can share the same fold function. A fold rule may return
55 ** one of several special values:
57 ** - NEXTFOLD means no folding was applied, because an additional test
58 ** inside the fold function failed. Matching continues against less
59 ** specific fold rules. Finally the instruction is passed on to CSE.
61 ** - RETRYFOLD means the instruction was modified in-place. Folding is
62 ** retried as if this instruction had just been received.
64 ** All other return values are terminal actions -- no further folding is
65 ** applied:
67 ** - INTFOLD(i) returns a reference to the integer constant i.
69 ** - LEFTFOLD and RIGHTFOLD return the left/right operand reference
70 ** without emitting an instruction.
72 ** - CSEFOLD and EMITFOLD pass the instruction directly to CSE or emit
73 ** it without passing through any further optimizations.
75 ** - FAILFOLD, DROPFOLD and CONDFOLD only apply to instructions which have
76 ** no result (e.g. guarded assertions): FAILFOLD means the guard would
77 ** always fail, i.e. the current trace is pointless. DROPFOLD means
78 ** the guard is always true and has been eliminated. CONDFOLD is a
79 ** shortcut for FAILFOLD + cond (i.e. drop if true, otherwise fail).
81 ** - Any other return value is interpreted as an IRRef or TRef. This
82 ** can be a reference to an existing or a newly created instruction.
83 ** Only the least-significant 16 bits (IRRef1) are used to form a TRef
84 ** which is finally returned to the caller.
86 ** The FOLD engine receives instructions both from the trace recorder and
87 ** substituted instructions from LOOP unrolling. This means all types
88 ** of instructions may end up here, even though the recorder bypasses
89 ** FOLD in some cases. Thus all loads, stores and allocations must have
90 ** an any/any rule to avoid being passed on to CSE.
92 ** Carefully read the following requirements before adding or modifying
93 ** any fold rules:
95 ** Requirement #1: All fold rules must preserve their destination type.
97 ** Consistently use INTFOLD() (KINT result) or lj_ir_knum() (KNUM result).
98 ** Never use lj_ir_knumint() which can have either a KINT or KNUM result.
100 ** Requirement #2: Fold rules should not create *new* instructions which
101 ** reference operands *across* PHIs.
103 ** E.g. a RETRYFOLD with 'fins->op1 = fleft->op1' is invalid if the
104 ** left operand is a PHI. Then fleft->op1 would point across the PHI
105 ** frontier to an invariant instruction. Adding a PHI for this instruction
106 ** would be counterproductive. The solution is to add a barrier which
107 ** prevents folding across PHIs, i.e. 'PHIBARRIER(fleft)' in this case.
108 ** The only exception is for recurrences with high latencies like
109 ** repeated int->num->int conversions.
111 ** One could relax this condition a bit if the referenced instruction is
112 ** a PHI, too. But this often leads to worse code due to excessive
113 ** register shuffling.
115 ** Note: returning *existing* instructions (e.g. LEFTFOLD) is ok, though.
116 ** Even returning fleft->op1 would be ok, because a new PHI will added,
117 ** if needed. But again, this leads to excessive register shuffling and
118 ** should be avoided.
120 ** Requirement #3: The set of all fold rules must be monotonic to guarantee
121 ** termination.
123 ** The goal is optimization, so one primarily wants to add strength-reducing
124 ** rules. This means eliminating an instruction or replacing an instruction
125 ** with one or more simpler instructions. Don't add fold rules which point
126 ** into the other direction.
128 ** Some rules (like commutativity) do not directly reduce the strength of
129 ** an instruction, but enable other fold rules (e.g. by moving constants
130 ** to the right operand). These rules must be made unidirectional to avoid
131 ** cycles.
133 ** Rule of thumb: the trace recorder expands the IR and FOLD shrinks it.
136 /* Some local macros to save typing. Undef'd at the end. */
137 #define IR(ref) (&J->cur.ir[(ref)])
138 #define fins (&J->fold.ins)
139 #define fleft (J->fold.left)
140 #define fright (J->fold.right)
141 #define knumleft (ir_knum(fleft)->n)
142 #define knumright (ir_knum(fright)->n)
144 /* Pass IR on to next optimization in chain (FOLD). */
145 #define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
147 /* Fold function type. Fastcall on x86 significantly reduces their size. */
148 typedef IRRef (LJ_FASTCALL *FoldFunc)(jit_State *J);
150 /* Macros for the fold specs, so buildvm can recognize them. */
151 #define LJFOLD(x)
152 #define LJFOLDX(x)
153 #define LJFOLDF(name) static TRef LJ_FASTCALL fold_##name(jit_State *J)
154 /* Note: They must be at the start of a line or buildvm ignores them! */
156 /* Barrier to prevent using operands across PHIs. */
157 #define PHIBARRIER(ir) if (irt_isphi((ir)->t)) return NEXTFOLD
159 /* Barrier to prevent folding across a GC step.
160 ** GC steps can only happen at the head of a trace and at LOOP.
161 ** And the GC is only driven forward if there's at least one allocation.
163 #define gcstep_barrier(J, ref) \
164 ((ref) < J->chain[IR_LOOP] && \
165 (J->chain[IR_SNEW] || J->chain[IR_XSNEW] || \
166 J->chain[IR_TNEW] || J->chain[IR_TDUP] || \
167 J->chain[IR_CNEW] || J->chain[IR_CNEWI] || \
168 J->chain[IR_BUFSTR] || J->chain[IR_TOSTR] || J->chain[IR_CALLA]))
170 /* -- Constant folding for FP numbers ------------------------------------- */
172 LJFOLD(ADD KNUM KNUM)
173 LJFOLD(SUB KNUM KNUM)
174 LJFOLD(MUL KNUM KNUM)
175 LJFOLD(DIV KNUM KNUM)
176 LJFOLD(LDEXP KNUM KNUM)
177 LJFOLD(MIN KNUM KNUM)
178 LJFOLD(MAX KNUM KNUM)
179 LJFOLDF(kfold_numarith)
181 lua_Number a = knumleft;
182 lua_Number b = knumright;
183 lua_Number y = lj_vm_foldarith(a, b, fins->o - IR_ADD);
184 return lj_ir_knum(J, y);
187 LJFOLD(NEG KNUM FLOAD)
188 LJFOLD(ABS KNUM FLOAD)
189 LJFOLDF(kfold_numabsneg)
191 lua_Number a = knumleft;
192 lua_Number y = lj_vm_foldarith(a, a, fins->o - IR_ADD);
193 return lj_ir_knum(J, y);
196 LJFOLD(LDEXP KNUM KINT)
197 LJFOLDF(kfold_ldexp)
199 #if LJ_TARGET_X86ORX64
200 UNUSED(J);
201 return NEXTFOLD;
202 #else
203 return lj_ir_knum(J, ldexp(knumleft, fright->i));
204 #endif
207 LJFOLD(FPMATH KNUM any)
208 LJFOLDF(kfold_fpmath)
210 lua_Number a = knumleft;
211 lua_Number y = lj_vm_foldfpm(a, fins->op2);
212 return lj_ir_knum(J, y);
215 LJFOLD(CALLN KNUM any)
216 LJFOLDF(kfold_fpcall1)
218 const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
219 if (CCI_TYPE(ci) == IRT_NUM) {
220 double y = ((double (*)(double))ci->func)(knumleft);
221 return lj_ir_knum(J, y);
223 return NEXTFOLD;
226 LJFOLD(CALLN CARG IRCALL_atan2)
227 LJFOLDF(kfold_fpcall2)
229 if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) {
230 const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
231 double a = ir_knum(IR(fleft->op1))->n;
232 double b = ir_knum(IR(fleft->op2))->n;
233 double y = ((double (*)(double, double))ci->func)(a, b);
234 return lj_ir_knum(J, y);
236 return NEXTFOLD;
239 LJFOLD(POW KNUM KNUM)
240 LJFOLDF(kfold_numpow)
242 return lj_ir_knum(J, lj_vm_foldarith(knumleft, knumright, IR_POW - IR_ADD));
245 /* Must not use kfold_kref for numbers (could be NaN). */
246 LJFOLD(EQ KNUM KNUM)
247 LJFOLD(NE KNUM KNUM)
248 LJFOLD(LT KNUM KNUM)
249 LJFOLD(GE KNUM KNUM)
250 LJFOLD(LE KNUM KNUM)
251 LJFOLD(GT KNUM KNUM)
252 LJFOLD(ULT KNUM KNUM)
253 LJFOLD(UGE KNUM KNUM)
254 LJFOLD(ULE KNUM KNUM)
255 LJFOLD(UGT KNUM KNUM)
256 LJFOLDF(kfold_numcomp)
258 return CONDFOLD(lj_ir_numcmp(knumleft, knumright, (IROp)fins->o));
261 /* -- Constant folding for 32 bit integers -------------------------------- */
263 static int32_t kfold_intop(int32_t k1, int32_t k2, IROp op)
265 switch (op) {
266 case IR_ADD: k1 += k2; break;
267 case IR_SUB: k1 -= k2; break;
268 case IR_MUL: k1 *= k2; break;
269 case IR_MOD: k1 = lj_vm_modi(k1, k2); break;
270 case IR_NEG: k1 = (int32_t)(~(uint32_t)k1+1u); break;
271 case IR_BAND: k1 &= k2; break;
272 case IR_BOR: k1 |= k2; break;
273 case IR_BXOR: k1 ^= k2; break;
274 case IR_BSHL: k1 <<= (k2 & 31); break;
275 case IR_BSHR: k1 = (int32_t)((uint32_t)k1 >> (k2 & 31)); break;
276 case IR_BSAR: k1 >>= (k2 & 31); break;
277 case IR_BROL: k1 = (int32_t)lj_rol((uint32_t)k1, (k2 & 31)); break;
278 case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 31)); break;
279 case IR_MIN: k1 = k1 < k2 ? k1 : k2; break;
280 case IR_MAX: k1 = k1 > k2 ? k1 : k2; break;
281 default: lj_assertX(0, "bad IR op %d", op); break;
283 return k1;
286 LJFOLD(ADD KINT KINT)
287 LJFOLD(SUB KINT KINT)
288 LJFOLD(MUL KINT KINT)
289 LJFOLD(MOD KINT KINT)
290 LJFOLD(NEG KINT KINT)
291 LJFOLD(BAND KINT KINT)
292 LJFOLD(BOR KINT KINT)
293 LJFOLD(BXOR KINT KINT)
294 LJFOLD(BSHL KINT KINT)
295 LJFOLD(BSHR KINT KINT)
296 LJFOLD(BSAR KINT KINT)
297 LJFOLD(BROL KINT KINT)
298 LJFOLD(BROR KINT KINT)
299 LJFOLD(MIN KINT KINT)
300 LJFOLD(MAX KINT KINT)
301 LJFOLDF(kfold_intarith)
303 return INTFOLD(kfold_intop(fleft->i, fright->i, (IROp)fins->o));
306 LJFOLD(ADDOV KINT KINT)
307 LJFOLD(SUBOV KINT KINT)
308 LJFOLD(MULOV KINT KINT)
309 LJFOLDF(kfold_intovarith)
311 lua_Number n = lj_vm_foldarith((lua_Number)fleft->i, (lua_Number)fright->i,
312 fins->o - IR_ADDOV);
313 int32_t k = lj_num2int(n);
314 if (n != (lua_Number)k)
315 return FAILFOLD;
316 return INTFOLD(k);
319 LJFOLD(BNOT KINT)
320 LJFOLDF(kfold_bnot)
322 return INTFOLD(~fleft->i);
325 LJFOLD(BSWAP KINT)
326 LJFOLDF(kfold_bswap)
328 return INTFOLD((int32_t)lj_bswap((uint32_t)fleft->i));
331 LJFOLD(LT KINT KINT)
332 LJFOLD(GE KINT KINT)
333 LJFOLD(LE KINT KINT)
334 LJFOLD(GT KINT KINT)
335 LJFOLD(ULT KINT KINT)
336 LJFOLD(UGE KINT KINT)
337 LJFOLD(ULE KINT KINT)
338 LJFOLD(UGT KINT KINT)
339 LJFOLD(ABC KINT KINT)
340 LJFOLDF(kfold_intcomp)
342 int32_t a = fleft->i, b = fright->i;
343 switch ((IROp)fins->o) {
344 case IR_LT: return CONDFOLD(a < b);
345 case IR_GE: return CONDFOLD(a >= b);
346 case IR_LE: return CONDFOLD(a <= b);
347 case IR_GT: return CONDFOLD(a > b);
348 case IR_ULT: return CONDFOLD((uint32_t)a < (uint32_t)b);
349 case IR_UGE: return CONDFOLD((uint32_t)a >= (uint32_t)b);
350 case IR_ULE: return CONDFOLD((uint32_t)a <= (uint32_t)b);
351 case IR_ABC:
352 case IR_UGT: return CONDFOLD((uint32_t)a > (uint32_t)b);
353 default: lj_assertJ(0, "bad IR op %d", fins->o); return FAILFOLD;
357 LJFOLD(UGE any KINT)
358 LJFOLDF(kfold_intcomp0)
360 if (fright->i == 0)
361 return DROPFOLD;
362 return NEXTFOLD;
365 /* -- Constant folding for 64 bit integers -------------------------------- */
367 static uint64_t kfold_int64arith(jit_State *J, uint64_t k1, uint64_t k2,
368 IROp op)
370 UNUSED(J);
371 #if LJ_HASFFI
372 switch (op) {
373 case IR_ADD: k1 += k2; break;
374 case IR_SUB: k1 -= k2; break;
375 case IR_MUL: k1 *= k2; break;
376 case IR_BAND: k1 &= k2; break;
377 case IR_BOR: k1 |= k2; break;
378 case IR_BXOR: k1 ^= k2; break;
379 case IR_BSHL: k1 <<= (k2 & 63); break;
380 case IR_BSHR: k1 >>= (k2 & 63); break;
381 case IR_BSAR: k1 = (uint64_t)((int64_t)k1 >> (k2 & 63)); break;
382 case IR_BROL: k1 = lj_rol(k1, (k2 & 63)); break;
383 case IR_BROR: k1 = lj_ror(k1, (k2 & 63)); break;
384 default: lj_assertJ(0, "bad IR op %d", op); break;
386 #else
387 UNUSED(k2); UNUSED(op);
388 lj_assertJ(0, "FFI IR op without FFI");
389 #endif
390 return k1;
393 LJFOLD(ADD KINT64 KINT64)
394 LJFOLD(SUB KINT64 KINT64)
395 LJFOLD(MUL KINT64 KINT64)
396 LJFOLD(BAND KINT64 KINT64)
397 LJFOLD(BOR KINT64 KINT64)
398 LJFOLD(BXOR KINT64 KINT64)
399 LJFOLDF(kfold_int64arith)
401 return INT64FOLD(kfold_int64arith(J, ir_k64(fleft)->u64,
402 ir_k64(fright)->u64, (IROp)fins->o));
405 LJFOLD(DIV KINT64 KINT64)
406 LJFOLD(MOD KINT64 KINT64)
407 LJFOLD(POW KINT64 KINT64)
408 LJFOLDF(kfold_int64arith2)
410 #if LJ_HASFFI
411 uint64_t k1 = ir_k64(fleft)->u64, k2 = ir_k64(fright)->u64;
412 if (irt_isi64(fins->t)) {
413 k1 = fins->o == IR_DIV ? lj_carith_divi64((int64_t)k1, (int64_t)k2) :
414 fins->o == IR_MOD ? lj_carith_modi64((int64_t)k1, (int64_t)k2) :
415 lj_carith_powi64((int64_t)k1, (int64_t)k2);
416 } else {
417 k1 = fins->o == IR_DIV ? lj_carith_divu64(k1, k2) :
418 fins->o == IR_MOD ? lj_carith_modu64(k1, k2) :
419 lj_carith_powu64(k1, k2);
421 return INT64FOLD(k1);
422 #else
423 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
424 #endif
427 LJFOLD(BSHL KINT64 KINT)
428 LJFOLD(BSHR KINT64 KINT)
429 LJFOLD(BSAR KINT64 KINT)
430 LJFOLD(BROL KINT64 KINT)
431 LJFOLD(BROR KINT64 KINT)
432 LJFOLDF(kfold_int64shift)
434 #if LJ_HASFFI
435 uint64_t k = ir_k64(fleft)->u64;
436 int32_t sh = (fright->i & 63);
437 return INT64FOLD(lj_carith_shift64(k, sh, fins->o - IR_BSHL));
438 #else
439 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
440 #endif
443 LJFOLD(BNOT KINT64)
444 LJFOLDF(kfold_bnot64)
446 #if LJ_HASFFI
447 return INT64FOLD(~ir_k64(fleft)->u64);
448 #else
449 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
450 #endif
453 LJFOLD(BSWAP KINT64)
454 LJFOLDF(kfold_bswap64)
456 #if LJ_HASFFI
457 return INT64FOLD(lj_bswap64(ir_k64(fleft)->u64));
458 #else
459 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
460 #endif
463 LJFOLD(LT KINT64 KINT64)
464 LJFOLD(GE KINT64 KINT64)
465 LJFOLD(LE KINT64 KINT64)
466 LJFOLD(GT KINT64 KINT64)
467 LJFOLD(ULT KINT64 KINT64)
468 LJFOLD(UGE KINT64 KINT64)
469 LJFOLD(ULE KINT64 KINT64)
470 LJFOLD(UGT KINT64 KINT64)
471 LJFOLDF(kfold_int64comp)
473 #if LJ_HASFFI
474 uint64_t a = ir_k64(fleft)->u64, b = ir_k64(fright)->u64;
475 switch ((IROp)fins->o) {
476 case IR_LT: return CONDFOLD((int64_t)a < (int64_t)b);
477 case IR_GE: return CONDFOLD((int64_t)a >= (int64_t)b);
478 case IR_LE: return CONDFOLD((int64_t)a <= (int64_t)b);
479 case IR_GT: return CONDFOLD((int64_t)a > (int64_t)b);
480 case IR_ULT: return CONDFOLD(a < b);
481 case IR_UGE: return CONDFOLD(a >= b);
482 case IR_ULE: return CONDFOLD(a <= b);
483 case IR_UGT: return CONDFOLD(a > b);
484 default: lj_assertJ(0, "bad IR op %d", fins->o); return FAILFOLD;
486 #else
487 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
488 #endif
491 LJFOLD(UGE any KINT64)
492 LJFOLDF(kfold_int64comp0)
494 #if LJ_HASFFI
495 if (ir_k64(fright)->u64 == 0)
496 return DROPFOLD;
497 return NEXTFOLD;
498 #else
499 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
500 #endif
503 /* -- Constant folding for strings ---------------------------------------- */
505 LJFOLD(SNEW KKPTR KINT)
506 LJFOLDF(kfold_snew_kptr)
508 GCstr *s = lj_str_new(J->L, (const char *)ir_kptr(fleft), (size_t)fright->i);
509 return lj_ir_kstr(J, s);
512 LJFOLD(SNEW any KINT)
513 LJFOLD(XSNEW any KINT)
514 LJFOLDF(kfold_snew_empty)
516 if (fright->i == 0)
517 return lj_ir_kstr(J, &J2G(J)->strempty);
518 return NEXTFOLD;
521 LJFOLD(STRREF KGC KINT)
522 LJFOLDF(kfold_strref)
524 GCstr *str = ir_kstr(fleft);
525 lj_assertJ((MSize)fright->i <= str->len, "bad string ref");
526 return lj_ir_kkptr(J, (char *)strdata(str) + fright->i);
529 LJFOLD(STRREF SNEW any)
530 LJFOLDF(kfold_strref_snew)
532 PHIBARRIER(fleft);
533 if (irref_isk(fins->op2) && fright->i == 0) {
534 return fleft->op1; /* strref(snew(ptr, len), 0) ==> ptr */
535 } else {
536 /* Reassociate: strref(snew(strref(str, a), len), b) ==> strref(str, a+b) */
537 IRIns *ir = IR(fleft->op1);
538 if (ir->o == IR_STRREF) {
539 IRRef1 str = ir->op1; /* IRIns * is not valid across emitir. */
540 PHIBARRIER(ir);
541 fins->op2 = emitir(IRTI(IR_ADD), ir->op2, fins->op2); /* Clobbers fins! */
542 fins->op1 = str;
543 fins->ot = IRT(IR_STRREF, IRT_PGC);
544 return RETRYFOLD;
547 return NEXTFOLD;
550 LJFOLD(CALLN CARG IRCALL_lj_str_cmp)
551 LJFOLDF(kfold_strcmp)
553 if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) {
554 GCstr *a = ir_kstr(IR(fleft->op1));
555 GCstr *b = ir_kstr(IR(fleft->op2));
556 return INTFOLD(lj_str_cmp(a, b));
558 return NEXTFOLD;
561 /* -- Constant folding and forwarding for buffers ------------------------- */
564 ** Buffer ops perform stores, but their effect is limited to the buffer
565 ** itself. Also, buffer ops are chained: a use of an op implies a use of
566 ** all other ops up the chain. Conversely, if an op is unused, all ops
567 ** up the chain can go unsed. This largely eliminates the need to treat
568 ** them as stores.
570 ** Alas, treating them as normal (IRM_N) ops doesn't work, because they
571 ** cannot be CSEd in isolation. CSE for IRM_N is implicitly done in LOOP
572 ** or if FOLD is disabled.
574 ** The compromise is to declare them as loads, emit them like stores and
575 ** CSE whole chains manually when the BUFSTR is to be emitted. Any chain
576 ** fragments left over from CSE are eliminated by DCE.
578 ** The string buffer methods emit a USE instead of a BUFSTR to keep the
579 ** chain alive.
582 LJFOLD(BUFHDR any any)
583 LJFOLDF(bufhdr_merge)
585 return fins->op2 == IRBUFHDR_WRITE ? CSEFOLD : EMITFOLD;
588 LJFOLD(BUFPUT any BUFSTR)
589 LJFOLDF(bufput_bufstr)
591 if ((J->flags & JIT_F_OPT_FWD)) {
592 IRRef hdr = fright->op2;
593 /* New buffer, no other buffer op inbetween and same buffer? */
594 if (fleft->o == IR_BUFHDR && fleft->op2 == IRBUFHDR_RESET &&
595 fleft->prev == hdr &&
596 fleft->op1 == IR(hdr)->op1 &&
597 !(irt_isphi(fright->t) && IR(hdr)->prev) &&
598 (!LJ_HASBUFFER || J->chain[IR_CALLA] < hdr)) {
599 IRRef ref = fins->op1;
600 IR(ref)->op2 = IRBUFHDR_APPEND; /* Modify BUFHDR. */
601 IR(ref)->op1 = fright->op1;
602 return ref;
604 /* Replay puts to global temporary buffer. */
605 if (IR(hdr)->op2 == IRBUFHDR_RESET && !irt_isphi(fright->t)) {
606 IRIns *ir = IR(fright->op1);
607 /* For now only handle single string.reverse .lower .upper .rep. */
608 if (ir->o == IR_CALLL &&
609 ir->op2 >= IRCALL_lj_buf_putstr_reverse &&
610 ir->op2 <= IRCALL_lj_buf_putstr_rep) {
611 IRIns *carg1 = IR(ir->op1);
612 if (ir->op2 == IRCALL_lj_buf_putstr_rep) {
613 IRIns *carg2 = IR(carg1->op1);
614 if (carg2->op1 == hdr) {
615 return lj_ir_call(J, ir->op2, fins->op1, carg2->op2, carg1->op2);
617 } else if (carg1->op1 == hdr) {
618 return lj_ir_call(J, ir->op2, fins->op1, carg1->op2);
623 return EMITFOLD; /* Always emit, CSE later. */
626 LJFOLD(BUFPUT any any)
627 LJFOLDF(bufput_kgc)
629 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fright->o == IR_KGC) {
630 GCstr *s2 = ir_kstr(fright);
631 if (s2->len == 0) { /* Empty string? */
632 return LEFTFOLD;
633 } else {
634 if (fleft->o == IR_BUFPUT && irref_isk(fleft->op2) &&
635 !irt_isphi(fleft->t)) { /* Join two constant string puts in a row. */
636 GCstr *s1 = ir_kstr(IR(fleft->op2));
637 IRRef kref = lj_ir_kstr(J, lj_buf_cat2str(J->L, s1, s2));
638 /* lj_ir_kstr() may realloc the IR and invalidates any IRIns *. */
639 IR(fins->op1)->op2 = kref; /* Modify previous BUFPUT. */
640 return fins->op1;
644 return EMITFOLD; /* Always emit, CSE later. */
647 LJFOLD(BUFSTR any any)
648 LJFOLDF(bufstr_kfold_cse)
650 lj_assertJ(fleft->o == IR_BUFHDR || fleft->o == IR_BUFPUT ||
651 fleft->o == IR_CALLL,
652 "bad buffer constructor IR op %d", fleft->o);
653 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
654 if (fleft->o == IR_BUFHDR) { /* No put operations? */
655 if (fleft->op2 == IRBUFHDR_RESET) /* Empty buffer? */
656 return lj_ir_kstr(J, &J2G(J)->strempty);
657 fins->op1 = fleft->op1;
658 fins->op2 = fleft->prev; /* Relies on checks in bufput_append. */
659 return CSEFOLD;
660 } else if (fleft->o == IR_BUFPUT) {
661 IRIns *irb = IR(fleft->op1);
662 if (irb->o == IR_BUFHDR && irb->op2 == IRBUFHDR_RESET)
663 return fleft->op2; /* Shortcut for a single put operation. */
666 /* Try to CSE the whole chain. */
667 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
668 IRRef ref = J->chain[IR_BUFSTR];
669 while (ref) {
670 IRIns *irs = IR(ref), *ira = fleft, *irb = IR(irs->op1);
671 while (ira->o == irb->o && ira->op2 == irb->op2) {
672 lj_assertJ(ira->o == IR_BUFHDR || ira->o == IR_BUFPUT ||
673 ira->o == IR_CALLL || ira->o == IR_CARG,
674 "bad buffer constructor IR op %d", ira->o);
675 if (ira->o == IR_BUFHDR && ira->op2 == IRBUFHDR_RESET)
676 return ref; /* CSE succeeded. */
677 if (ira->o == IR_CALLL && ira->op2 == IRCALL_lj_buf_puttab)
678 break;
679 ira = IR(ira->op1);
680 irb = IR(irb->op1);
682 ref = irs->prev;
685 return EMITFOLD; /* No CSE possible. */
688 LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_reverse)
689 LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_upper)
690 LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_lower)
691 LJFOLD(CALLL CARG IRCALL_lj_strfmt_putquoted)
692 LJFOLDF(bufput_kfold_op)
694 if (irref_isk(fleft->op2)) {
695 const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
696 SBuf *sb = lj_buf_tmp_(J->L);
697 sb = ((SBuf * (LJ_FASTCALL *)(SBuf *, GCstr *))ci->func)(sb,
698 ir_kstr(IR(fleft->op2)));
699 fins->o = IR_BUFPUT;
700 fins->op1 = fleft->op1;
701 fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb));
702 return RETRYFOLD;
704 return EMITFOLD; /* Always emit, CSE later. */
707 LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_rep)
708 LJFOLDF(bufput_kfold_rep)
710 if (irref_isk(fleft->op2)) {
711 IRIns *irc = IR(fleft->op1);
712 if (irref_isk(irc->op2)) {
713 SBuf *sb = lj_buf_tmp_(J->L);
714 sb = lj_buf_putstr_rep(sb, ir_kstr(IR(irc->op2)), IR(fleft->op2)->i);
715 fins->o = IR_BUFPUT;
716 fins->op1 = irc->op1;
717 fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb));
718 return RETRYFOLD;
721 return EMITFOLD; /* Always emit, CSE later. */
724 LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfxint)
725 LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum_int)
726 LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum_uint)
727 LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum)
728 LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfstr)
729 LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfchar)
730 LJFOLDF(bufput_kfold_fmt)
732 IRIns *irc = IR(fleft->op1);
733 lj_assertJ(irref_isk(irc->op2), "SFormat must be const");
734 if (irref_isk(fleft->op2)) {
735 SFormat sf = (SFormat)IR(irc->op2)->i;
736 IRIns *ira = IR(fleft->op2);
737 SBuf *sb = lj_buf_tmp_(J->L);
738 switch (fins->op2) {
739 case IRCALL_lj_strfmt_putfxint:
740 sb = lj_strfmt_putfxint(sb, sf, ir_k64(ira)->u64);
741 break;
742 case IRCALL_lj_strfmt_putfstr:
743 sb = lj_strfmt_putfstr(sb, sf, ir_kstr(ira));
744 break;
745 case IRCALL_lj_strfmt_putfchar:
746 sb = lj_strfmt_putfchar(sb, sf, ira->i);
747 break;
748 case IRCALL_lj_strfmt_putfnum_int:
749 case IRCALL_lj_strfmt_putfnum_uint:
750 case IRCALL_lj_strfmt_putfnum:
751 default: {
752 const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
753 sb = ((SBuf * (*)(SBuf *, SFormat, lua_Number))ci->func)(sb, sf,
754 ir_knum(ira)->n);
755 break;
758 fins->o = IR_BUFPUT;
759 fins->op1 = irc->op1;
760 fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb));
761 return RETRYFOLD;
763 return EMITFOLD; /* Always emit, CSE later. */
766 /* -- Constant folding of pointer arithmetic ------------------------------ */
768 LJFOLD(ADD KGC KINT)
769 LJFOLD(ADD KGC KINT64)
770 LJFOLDF(kfold_add_kgc)
772 GCobj *o = ir_kgc(fleft);
773 #if LJ_64
774 ptrdiff_t ofs = (ptrdiff_t)ir_kint64(fright)->u64;
775 #else
776 ptrdiff_t ofs = fright->i;
777 #endif
778 #if LJ_HASFFI
779 if (irt_iscdata(fleft->t)) {
780 CType *ct = ctype_raw(ctype_ctsG(J2G(J)), gco2cd(o)->ctypeid);
781 if (ctype_isnum(ct->info) || ctype_isenum(ct->info) ||
782 ctype_isptr(ct->info) || ctype_isfunc(ct->info) ||
783 ctype_iscomplex(ct->info) || ctype_isvector(ct->info))
784 return lj_ir_kkptr(J, (char *)o + ofs);
786 #endif
787 return lj_ir_kptr(J, (char *)o + ofs);
790 LJFOLD(ADD KPTR KINT)
791 LJFOLD(ADD KPTR KINT64)
792 LJFOLD(ADD KKPTR KINT)
793 LJFOLD(ADD KKPTR KINT64)
794 LJFOLDF(kfold_add_kptr)
796 void *p = ir_kptr(fleft);
797 #if LJ_64
798 ptrdiff_t ofs = (ptrdiff_t)ir_kint64(fright)->u64;
799 #else
800 ptrdiff_t ofs = fright->i;
801 #endif
802 return lj_ir_kptr_(J, fleft->o, (char *)p + ofs);
805 LJFOLD(ADD any KGC)
806 LJFOLD(ADD any KPTR)
807 LJFOLD(ADD any KKPTR)
808 LJFOLDF(kfold_add_kright)
810 if (fleft->o == IR_KINT || fleft->o == IR_KINT64) {
811 IRRef1 tmp = fins->op1; fins->op1 = fins->op2; fins->op2 = tmp;
812 return RETRYFOLD;
814 return NEXTFOLD;
817 /* -- Constant folding of conversions ------------------------------------- */
819 LJFOLD(TOBIT KNUM KNUM)
820 LJFOLDF(kfold_tobit)
822 return INTFOLD(lj_num2bit(knumleft));
825 LJFOLD(CONV KINT IRCONV_NUM_INT)
826 LJFOLDF(kfold_conv_kint_num)
828 return lj_ir_knum(J, (lua_Number)fleft->i);
831 LJFOLD(CONV KINT IRCONV_NUM_U32)
832 LJFOLDF(kfold_conv_kintu32_num)
834 return lj_ir_knum(J, (lua_Number)(uint32_t)fleft->i);
837 LJFOLD(CONV KINT IRCONV_INT_I8)
838 LJFOLD(CONV KINT IRCONV_INT_U8)
839 LJFOLD(CONV KINT IRCONV_INT_I16)
840 LJFOLD(CONV KINT IRCONV_INT_U16)
841 LJFOLDF(kfold_conv_kint_ext)
843 int32_t k = fleft->i;
844 if ((fins->op2 & IRCONV_SRCMASK) == IRT_I8) k = (int8_t)k;
845 else if ((fins->op2 & IRCONV_SRCMASK) == IRT_U8) k = (uint8_t)k;
846 else if ((fins->op2 & IRCONV_SRCMASK) == IRT_I16) k = (int16_t)k;
847 else k = (uint16_t)k;
848 return INTFOLD(k);
851 LJFOLD(CONV KINT IRCONV_I64_INT)
852 LJFOLD(CONV KINT IRCONV_U64_INT)
853 LJFOLD(CONV KINT IRCONV_I64_U32)
854 LJFOLD(CONV KINT IRCONV_U64_U32)
855 LJFOLDF(kfold_conv_kint_i64)
857 if ((fins->op2 & IRCONV_SEXT))
858 return INT64FOLD((uint64_t)(int64_t)fleft->i);
859 else
860 return INT64FOLD((uint64_t)(int64_t)(uint32_t)fleft->i);
863 LJFOLD(CONV KINT64 IRCONV_NUM_I64)
864 LJFOLDF(kfold_conv_kint64_num_i64)
866 return lj_ir_knum(J, (lua_Number)(int64_t)ir_kint64(fleft)->u64);
869 LJFOLD(CONV KINT64 IRCONV_NUM_U64)
870 LJFOLDF(kfold_conv_kint64_num_u64)
872 return lj_ir_knum(J, (lua_Number)ir_kint64(fleft)->u64);
875 LJFOLD(CONV KINT64 IRCONV_INT_I64)
876 LJFOLD(CONV KINT64 IRCONV_U32_I64)
877 LJFOLDF(kfold_conv_kint64_int_i64)
879 return INTFOLD((int32_t)ir_kint64(fleft)->u64);
882 LJFOLD(CONV KNUM IRCONV_INT_NUM)
883 LJFOLDF(kfold_conv_knum_int_num)
885 lua_Number n = knumleft;
886 int32_t k = lj_num2int(n);
887 if (irt_isguard(fins->t) && n != (lua_Number)k) {
888 /* We're about to create a guard which always fails, like CONV +1.5.
889 ** Some pathological loops cause this during LICM, e.g.:
890 ** local x,k,t = 0,1.5,{1,[1.5]=2}
891 ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end
892 ** assert(x == 300)
894 return FAILFOLD;
896 return INTFOLD(k);
899 LJFOLD(CONV KNUM IRCONV_U32_NUM)
900 LJFOLDF(kfold_conv_knum_u32_num)
902 #ifdef _MSC_VER
903 { /* Workaround for MSVC bug. */
904 volatile uint32_t u = (uint32_t)knumleft;
905 return INTFOLD((int32_t)u);
907 #else
908 return INTFOLD((int32_t)(uint32_t)knumleft);
909 #endif
912 LJFOLD(CONV KNUM IRCONV_I64_NUM)
913 LJFOLDF(kfold_conv_knum_i64_num)
915 return INT64FOLD((uint64_t)(int64_t)knumleft);
918 LJFOLD(CONV KNUM IRCONV_U64_NUM)
919 LJFOLDF(kfold_conv_knum_u64_num)
921 return INT64FOLD(lj_num2u64(knumleft));
924 LJFOLD(TOSTR KNUM any)
925 LJFOLDF(kfold_tostr_knum)
927 return lj_ir_kstr(J, lj_strfmt_num(J->L, ir_knum(fleft)));
930 LJFOLD(TOSTR KINT any)
931 LJFOLDF(kfold_tostr_kint)
933 return lj_ir_kstr(J, fins->op2 == IRTOSTR_INT ?
934 lj_strfmt_int(J->L, fleft->i) :
935 lj_strfmt_char(J->L, fleft->i));
938 LJFOLD(STRTO KGC)
939 LJFOLDF(kfold_strto)
941 TValue n;
942 if (lj_strscan_num(ir_kstr(fleft), &n))
943 return lj_ir_knum(J, numV(&n));
944 return FAILFOLD;
947 /* -- Constant folding of equality checks --------------------------------- */
949 /* Don't constant-fold away FLOAD checks against KNULL. */
950 LJFOLD(EQ FLOAD KNULL)
951 LJFOLD(NE FLOAD KNULL)
952 LJFOLDX(lj_opt_cse)
954 /* But fold all other KNULL compares, since only KNULL is equal to KNULL. */
955 LJFOLD(EQ any KNULL)
956 LJFOLD(NE any KNULL)
957 LJFOLD(EQ KNULL any)
958 LJFOLD(NE KNULL any)
959 LJFOLD(EQ KINT KINT) /* Constants are unique, so same refs <==> same value. */
960 LJFOLD(NE KINT KINT)
961 LJFOLD(EQ KINT64 KINT64)
962 LJFOLD(NE KINT64 KINT64)
963 LJFOLD(EQ KGC KGC)
964 LJFOLD(NE KGC KGC)
965 LJFOLDF(kfold_kref)
967 return CONDFOLD((fins->op1 == fins->op2) ^ (fins->o == IR_NE));
970 /* -- Algebraic shortcuts ------------------------------------------------- */
972 LJFOLD(FPMATH FPMATH IRFPM_FLOOR)
973 LJFOLD(FPMATH FPMATH IRFPM_CEIL)
974 LJFOLD(FPMATH FPMATH IRFPM_TRUNC)
975 LJFOLDF(shortcut_round)
977 IRFPMathOp op = (IRFPMathOp)fleft->op2;
978 if (op == IRFPM_FLOOR || op == IRFPM_CEIL || op == IRFPM_TRUNC)
979 return LEFTFOLD; /* round(round_left(x)) = round_left(x) */
980 return NEXTFOLD;
983 LJFOLD(ABS ABS FLOAD)
984 LJFOLDF(shortcut_left)
986 return LEFTFOLD; /* f(g(x)) ==> g(x) */
989 LJFOLD(ABS NEG FLOAD)
990 LJFOLDF(shortcut_dropleft)
992 PHIBARRIER(fleft);
993 fins->op1 = fleft->op1; /* abs(neg(x)) ==> abs(x) */
994 return RETRYFOLD;
997 /* Note: no safe shortcuts with STRTO and TOSTR ("1e2" ==> +100 ==> "100"). */
998 LJFOLD(NEG NEG any)
999 LJFOLD(BNOT BNOT)
1000 LJFOLD(BSWAP BSWAP)
1001 LJFOLDF(shortcut_leftleft)
1003 PHIBARRIER(fleft); /* See above. Fold would be ok, but not beneficial. */
1004 return fleft->op1; /* f(g(x)) ==> x */
1007 /* -- FP algebraic simplifications ---------------------------------------- */
1009 /* FP arithmetic is tricky -- there's not much to simplify.
1010 ** Please note the following common pitfalls before sending "improvements":
1011 ** x+0 ==> x is INVALID for x=-0
1012 ** 0-x ==> -x is INVALID for x=+0
1013 ** x*0 ==> 0 is INVALID for x=-0, x=+-Inf or x=NaN
1016 LJFOLD(ADD NEG any)
1017 LJFOLDF(simplify_numadd_negx)
1019 PHIBARRIER(fleft);
1020 fins->o = IR_SUB; /* (-a) + b ==> b - a */
1021 fins->op1 = fins->op2;
1022 fins->op2 = fleft->op1;
1023 return RETRYFOLD;
1026 LJFOLD(ADD any NEG)
1027 LJFOLDF(simplify_numadd_xneg)
1029 PHIBARRIER(fright);
1030 fins->o = IR_SUB; /* a + (-b) ==> a - b */
1031 fins->op2 = fright->op1;
1032 return RETRYFOLD;
1035 LJFOLD(SUB any KNUM)
1036 LJFOLDF(simplify_numsub_k)
1038 if (ir_knum(fright)->u64 == 0) /* x - (+0) ==> x */
1039 return LEFTFOLD;
1040 return NEXTFOLD;
1043 LJFOLD(SUB NEG KNUM)
1044 LJFOLDF(simplify_numsub_negk)
1046 PHIBARRIER(fleft);
1047 fins->op2 = fleft->op1; /* (-x) - k ==> (-k) - x */
1048 fins->op1 = (IRRef1)lj_ir_knum(J, -knumright);
1049 return RETRYFOLD;
1052 LJFOLD(SUB any NEG)
1053 LJFOLDF(simplify_numsub_xneg)
1055 PHIBARRIER(fright);
1056 fins->o = IR_ADD; /* a - (-b) ==> a + b */
1057 fins->op2 = fright->op1;
1058 return RETRYFOLD;
1061 LJFOLD(MUL any KNUM)
1062 LJFOLD(DIV any KNUM)
1063 LJFOLDF(simplify_nummuldiv_k)
1065 lua_Number n = knumright;
1066 if (n == 1.0) { /* x o 1 ==> x */
1067 return LEFTFOLD;
1068 } else if (n == -1.0) { /* x o -1 ==> -x */
1069 IRRef op1 = fins->op1;
1070 fins->op2 = (IRRef1)lj_ir_ksimd(J, LJ_KSIMD_NEG); /* Modifies fins. */
1071 fins->op1 = op1;
1072 fins->o = IR_NEG;
1073 return RETRYFOLD;
1074 } else if (fins->o == IR_MUL && n == 2.0) { /* x * 2 ==> x + x */
1075 fins->o = IR_ADD;
1076 fins->op2 = fins->op1;
1077 return RETRYFOLD;
1078 } else if (fins->o == IR_DIV) { /* x / 2^k ==> x * 2^-k */
1079 uint64_t u = ir_knum(fright)->u64;
1080 uint32_t ex = ((uint32_t)(u >> 52) & 0x7ff);
1081 if ((u & U64x(000fffff,ffffffff)) == 0 && ex - 1 < 0x7fd) {
1082 u = (u & ((uint64_t)1 << 63)) | ((uint64_t)(0x7fe - ex) << 52);
1083 fins->o = IR_MUL; /* Multiply by exact reciprocal. */
1084 fins->op2 = lj_ir_knum_u64(J, u);
1085 return RETRYFOLD;
1088 return NEXTFOLD;
1091 LJFOLD(MUL NEG KNUM)
1092 LJFOLD(DIV NEG KNUM)
1093 LJFOLDF(simplify_nummuldiv_negk)
1095 PHIBARRIER(fleft);
1096 fins->op1 = fleft->op1; /* (-a) o k ==> a o (-k) */
1097 fins->op2 = (IRRef1)lj_ir_knum(J, -knumright);
1098 return RETRYFOLD;
1101 LJFOLD(MUL NEG NEG)
1102 LJFOLD(DIV NEG NEG)
1103 LJFOLDF(simplify_nummuldiv_negneg)
1105 PHIBARRIER(fleft);
1106 PHIBARRIER(fright);
1107 fins->op1 = fleft->op1; /* (-a) o (-b) ==> a o b */
1108 fins->op2 = fright->op1;
1109 return RETRYFOLD;
1112 LJFOLD(POW any KNUM)
1113 LJFOLDF(simplify_numpow_k)
1115 if (knumright == 0.0) /* x ^ 0 ==> 1 */
1116 return lj_ir_knum_one(J); /* Result must be a number, not an int. */
1117 else if (knumright == 1.0) /* x ^ 1 ==> x */
1118 return LEFTFOLD;
1119 else if (knumright == 2.0) /* x ^ 2 ==> x * x */
1120 return emitir(IRTN(IR_MUL), fins->op1, fins->op1);
1121 else
1122 return NEXTFOLD;
1125 /* -- Simplify conversions ------------------------------------------------ */
1127 LJFOLD(CONV CONV IRCONV_NUM_INT) /* _NUM */
1128 LJFOLDF(shortcut_conv_num_int)
1130 PHIBARRIER(fleft);
1131 /* Only safe with a guarded conversion to int. */
1132 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_NUM && irt_isguard(fleft->t))
1133 return fleft->op1; /* f(g(x)) ==> x */
1134 return NEXTFOLD;
1137 LJFOLD(CONV CONV IRCONV_INT_NUM) /* _INT */
1138 LJFOLD(CONV CONV IRCONV_U32_NUM) /* _U32*/
1139 LJFOLDF(simplify_conv_int_num)
1141 /* Fold even across PHI to avoid expensive num->int conversions in loop. */
1142 if ((fleft->op2 & IRCONV_SRCMASK) ==
1143 ((fins->op2 & IRCONV_DSTMASK) >> IRCONV_DSH))
1144 return fleft->op1;
1145 return NEXTFOLD;
1148 LJFOLD(CONV CONV IRCONV_I64_NUM) /* _INT or _U32 */
1149 LJFOLD(CONV CONV IRCONV_U64_NUM) /* _INT or _U32 */
1150 LJFOLDF(simplify_conv_i64_num)
1152 PHIBARRIER(fleft);
1153 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT) {
1154 /* Reduce to a sign-extension. */
1155 fins->op1 = fleft->op1;
1156 fins->op2 = ((IRT_I64<<5)|IRT_INT|IRCONV_SEXT);
1157 return RETRYFOLD;
1158 } else if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) {
1159 #if LJ_TARGET_X64
1160 return fleft->op1;
1161 #else
1162 /* Reduce to a zero-extension. */
1163 fins->op1 = fleft->op1;
1164 fins->op2 = (IRT_I64<<5)|IRT_U32;
1165 return RETRYFOLD;
1166 #endif
1168 return NEXTFOLD;
1171 LJFOLD(CONV CONV IRCONV_INT_I64) /* _INT or _U32 */
1172 LJFOLD(CONV CONV IRCONV_INT_U64) /* _INT or _U32 */
1173 LJFOLD(CONV CONV IRCONV_U32_I64) /* _INT or _U32 */
1174 LJFOLD(CONV CONV IRCONV_U32_U64) /* _INT or _U32 */
1175 LJFOLDF(simplify_conv_int_i64)
1177 int src;
1178 PHIBARRIER(fleft);
1179 src = (fleft->op2 & IRCONV_SRCMASK);
1180 if (src == IRT_INT || src == IRT_U32) {
1181 if (src == ((fins->op2 & IRCONV_DSTMASK) >> IRCONV_DSH)) {
1182 return fleft->op1;
1183 } else {
1184 fins->op2 = ((fins->op2 & IRCONV_DSTMASK) | src);
1185 fins->op1 = fleft->op1;
1186 return RETRYFOLD;
1189 return NEXTFOLD;
1192 LJFOLD(CONV CONV IRCONV_FLOAT_NUM) /* _FLOAT */
1193 LJFOLDF(simplify_conv_flt_num)
1195 PHIBARRIER(fleft);
1196 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_FLOAT)
1197 return fleft->op1;
1198 return NEXTFOLD;
1201 /* Shortcut TOBIT + IRT_NUM <- IRT_INT/IRT_U32 conversion. */
1202 LJFOLD(TOBIT CONV KNUM)
1203 LJFOLDF(simplify_tobit_conv)
1205 /* Fold even across PHI to avoid expensive num->int conversions in loop. */
1206 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT) {
1207 lj_assertJ(irt_isnum(fleft->t), "expected TOBIT number arg");
1208 return fleft->op1;
1209 } else if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) {
1210 lj_assertJ(irt_isnum(fleft->t), "expected TOBIT number arg");
1211 fins->o = IR_CONV;
1212 fins->op1 = fleft->op1;
1213 fins->op2 = (IRT_INT<<5)|IRT_U32;
1214 return RETRYFOLD;
1216 return NEXTFOLD;
1219 /* Shortcut floor/ceil/round + IRT_NUM <- IRT_INT/IRT_U32 conversion. */
1220 LJFOLD(FPMATH CONV IRFPM_FLOOR)
1221 LJFOLD(FPMATH CONV IRFPM_CEIL)
1222 LJFOLD(FPMATH CONV IRFPM_TRUNC)
1223 LJFOLDF(simplify_floor_conv)
1225 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT ||
1226 (fleft->op2 & IRCONV_SRCMASK) == IRT_U32)
1227 return LEFTFOLD;
1228 return NEXTFOLD;
1231 /* Strength reduction of widening. */
1232 LJFOLD(CONV any IRCONV_I64_INT)
1233 LJFOLD(CONV any IRCONV_U64_INT)
1234 LJFOLDF(simplify_conv_sext)
1236 IRRef ref = fins->op1;
1237 int64_t ofs = 0;
1238 if (!(fins->op2 & IRCONV_SEXT))
1239 return NEXTFOLD;
1240 PHIBARRIER(fleft);
1241 if (fleft->o == IR_XLOAD && (irt_isu8(fleft->t) || irt_isu16(fleft->t)))
1242 goto ok_reduce;
1243 if (fleft->o == IR_ADD && irref_isk(fleft->op2)) {
1244 ofs = (int64_t)IR(fleft->op2)->i;
1245 ref = fleft->op1;
1247 /* Use scalar evolution analysis results to strength-reduce sign-extension. */
1248 if (ref == J->scev.idx) {
1249 IRRef lo = J->scev.dir ? J->scev.start : J->scev.stop;
1250 lj_assertJ(irt_isint(J->scev.t), "only int SCEV supported");
1251 if (lo && IR(lo)->o == IR_KINT && IR(lo)->i + ofs >= 0) {
1252 ok_reduce:
1253 #if LJ_TARGET_X64
1254 /* Eliminate widening. All 32 bit ops do an implicit zero-extension. */
1255 return LEFTFOLD;
1256 #else
1257 /* Reduce to a (cheaper) zero-extension. */
1258 fins->op2 &= ~IRCONV_SEXT;
1259 return RETRYFOLD;
1260 #endif
1263 return NEXTFOLD;
1266 /* Strength reduction of narrowing. */
1267 LJFOLD(CONV ADD IRCONV_INT_I64)
1268 LJFOLD(CONV SUB IRCONV_INT_I64)
1269 LJFOLD(CONV MUL IRCONV_INT_I64)
1270 LJFOLD(CONV ADD IRCONV_INT_U64)
1271 LJFOLD(CONV SUB IRCONV_INT_U64)
1272 LJFOLD(CONV MUL IRCONV_INT_U64)
1273 LJFOLD(CONV ADD IRCONV_U32_I64)
1274 LJFOLD(CONV SUB IRCONV_U32_I64)
1275 LJFOLD(CONV MUL IRCONV_U32_I64)
1276 LJFOLD(CONV ADD IRCONV_U32_U64)
1277 LJFOLD(CONV SUB IRCONV_U32_U64)
1278 LJFOLD(CONV MUL IRCONV_U32_U64)
1279 LJFOLDF(simplify_conv_narrow)
1281 #if LJ_64
1282 UNUSED(J);
1283 return NEXTFOLD;
1284 #else
1285 IROp op = (IROp)fleft->o;
1286 IRType t = irt_type(fins->t);
1287 IRRef op1 = fleft->op1, op2 = fleft->op2, mode = fins->op2;
1288 PHIBARRIER(fleft);
1289 op1 = emitir(IRT(IR_CONV, t), op1, mode);
1290 op2 = emitir(IRT(IR_CONV, t), op2, mode);
1291 fins->ot = IRT(op, t);
1292 fins->op1 = op1;
1293 fins->op2 = op2;
1294 return RETRYFOLD;
1295 #endif
1298 /* Special CSE rule for CONV. */
1299 LJFOLD(CONV any any)
1300 LJFOLDF(cse_conv)
1302 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
1303 IRRef op1 = fins->op1, op2 = (fins->op2 & IRCONV_MODEMASK);
1304 uint8_t guard = irt_isguard(fins->t);
1305 IRRef ref = J->chain[IR_CONV];
1306 while (ref > op1) {
1307 IRIns *ir = IR(ref);
1308 /* Commoning with stronger checks is ok. */
1309 if (ir->op1 == op1 && (ir->op2 & IRCONV_MODEMASK) == op2 &&
1310 irt_isguard(ir->t) >= guard)
1311 return ref;
1312 ref = ir->prev;
1315 return EMITFOLD; /* No fallthrough to regular CSE. */
1318 /* FP conversion narrowing. */
1319 LJFOLD(TOBIT ADD KNUM)
1320 LJFOLD(TOBIT SUB KNUM)
1321 LJFOLD(CONV ADD IRCONV_INT_NUM)
1322 LJFOLD(CONV SUB IRCONV_INT_NUM)
1323 LJFOLD(CONV ADD IRCONV_I64_NUM)
1324 LJFOLD(CONV SUB IRCONV_I64_NUM)
1325 LJFOLDF(narrow_convert)
1327 PHIBARRIER(fleft);
1328 /* Narrowing ignores PHIs and repeating it inside the loop is not useful. */
1329 if (J->chain[IR_LOOP])
1330 return NEXTFOLD;
1331 lj_assertJ(fins->o != IR_CONV || (fins->op2&IRCONV_CONVMASK) != IRCONV_TOBIT,
1332 "unexpected CONV TOBIT");
1333 return lj_opt_narrow_convert(J);
1336 /* -- Integer algebraic simplifications ----------------------------------- */
1338 LJFOLD(ADD any KINT)
1339 LJFOLD(ADDOV any KINT)
1340 LJFOLD(SUBOV any KINT)
1341 LJFOLDF(simplify_intadd_k)
1343 if (fright->i == 0) /* i o 0 ==> i */
1344 return LEFTFOLD;
1345 return NEXTFOLD;
1348 LJFOLD(MULOV any KINT)
1349 LJFOLDF(simplify_intmul_k)
1351 if (fright->i == 0) /* i * 0 ==> 0 */
1352 return RIGHTFOLD;
1353 if (fright->i == 1) /* i * 1 ==> i */
1354 return LEFTFOLD;
1355 if (fright->i == 2) { /* i * 2 ==> i + i */
1356 fins->o = IR_ADDOV;
1357 fins->op2 = fins->op1;
1358 return RETRYFOLD;
1360 return NEXTFOLD;
1363 LJFOLD(SUB any KINT)
1364 LJFOLDF(simplify_intsub_k)
1366 if (fright->i == 0) /* i - 0 ==> i */
1367 return LEFTFOLD;
1368 fins->o = IR_ADD; /* i - k ==> i + (-k) */
1369 fins->op2 = (IRRef1)lj_ir_kint(J, (int32_t)(~(uint32_t)fright->i+1u)); /* Overflow for -2^31 ok. */
1370 return RETRYFOLD;
1373 LJFOLD(SUB KINT any)
1374 LJFOLD(SUB KINT64 any)
1375 LJFOLDF(simplify_intsub_kleft)
1377 if (fleft->o == IR_KINT ? (fleft->i == 0) : (ir_kint64(fleft)->u64 == 0)) {
1378 fins->o = IR_NEG; /* 0 - i ==> -i */
1379 fins->op1 = fins->op2;
1380 return RETRYFOLD;
1382 return NEXTFOLD;
1385 LJFOLD(ADD any KINT64)
1386 LJFOLDF(simplify_intadd_k64)
1388 if (ir_kint64(fright)->u64 == 0) /* i + 0 ==> i */
1389 return LEFTFOLD;
1390 return NEXTFOLD;
1393 LJFOLD(SUB any KINT64)
1394 LJFOLDF(simplify_intsub_k64)
1396 uint64_t k = ir_kint64(fright)->u64;
1397 if (k == 0) /* i - 0 ==> i */
1398 return LEFTFOLD;
1399 fins->o = IR_ADD; /* i - k ==> i + (-k) */
1400 fins->op2 = (IRRef1)lj_ir_kint64(J, ~k+1u);
1401 return RETRYFOLD;
1404 static TRef simplify_intmul_k(jit_State *J, int32_t k)
1406 /* Note: many more simplifications are possible, e.g. 2^k1 +- 2^k2.
1407 ** But this is mainly intended for simple address arithmetic.
1408 ** Also it's easier for the backend to optimize the original multiplies.
1410 if (k == 0) { /* i * 0 ==> 0 */
1411 return RIGHTFOLD;
1412 } else if (k == 1) { /* i * 1 ==> i */
1413 return LEFTFOLD;
1414 } else if ((k & (k-1)) == 0) { /* i * 2^k ==> i << k */
1415 fins->o = IR_BSHL;
1416 fins->op2 = lj_ir_kint(J, lj_fls((uint32_t)k));
1417 return RETRYFOLD;
1419 return NEXTFOLD;
1422 LJFOLD(MUL any KINT)
1423 LJFOLDF(simplify_intmul_k32)
1425 if (fright->i >= 0)
1426 return simplify_intmul_k(J, fright->i);
1427 return NEXTFOLD;
1430 LJFOLD(MUL any KINT64)
1431 LJFOLDF(simplify_intmul_k64)
1433 #if LJ_HASFFI
1434 if (ir_kint64(fright)->u64 < 0x80000000u)
1435 return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64);
1436 return NEXTFOLD;
1437 #else
1438 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
1439 #endif
1442 LJFOLD(MOD any KINT)
1443 LJFOLDF(simplify_intmod_k)
1445 int32_t k = fright->i;
1446 lj_assertJ(k != 0, "integer mod 0");
1447 if (k > 0 && (k & (k-1)) == 0) { /* i % (2^k) ==> i & (2^k-1) */
1448 fins->o = IR_BAND;
1449 fins->op2 = lj_ir_kint(J, k-1);
1450 return RETRYFOLD;
1452 return NEXTFOLD;
1455 LJFOLD(MOD KINT any)
1456 LJFOLDF(simplify_intmod_kleft)
1458 if (fleft->i == 0)
1459 return INTFOLD(0);
1460 return NEXTFOLD;
1463 LJFOLD(SUB any any)
1464 LJFOLD(SUBOV any any)
1465 LJFOLDF(simplify_intsub)
1467 if (fins->op1 == fins->op2 && !irt_isnum(fins->t)) /* i - i ==> 0 */
1468 return irt_is64(fins->t) ? INT64FOLD(0) : INTFOLD(0);
1469 return NEXTFOLD;
1472 LJFOLD(SUB ADD any)
1473 LJFOLDF(simplify_intsubadd_leftcancel)
1475 if (!irt_isnum(fins->t)) {
1476 PHIBARRIER(fleft);
1477 if (fins->op2 == fleft->op1) /* (i + j) - i ==> j */
1478 return fleft->op2;
1479 if (fins->op2 == fleft->op2) /* (i + j) - j ==> i */
1480 return fleft->op1;
1482 return NEXTFOLD;
1485 LJFOLD(SUB SUB any)
1486 LJFOLDF(simplify_intsubsub_leftcancel)
1488 if (!irt_isnum(fins->t)) {
1489 PHIBARRIER(fleft);
1490 if (fins->op2 == fleft->op1) { /* (i - j) - i ==> 0 - j */
1491 fins->op1 = (IRRef1)lj_ir_kint(J, 0);
1492 fins->op2 = fleft->op2;
1493 return RETRYFOLD;
1496 return NEXTFOLD;
1499 LJFOLD(SUB any SUB)
1500 LJFOLDF(simplify_intsubsub_rightcancel)
1502 if (!irt_isnum(fins->t)) {
1503 PHIBARRIER(fright);
1504 if (fins->op1 == fright->op1) /* i - (i - j) ==> j */
1505 return fright->op2;
1507 return NEXTFOLD;
1510 LJFOLD(SUB any ADD)
1511 LJFOLDF(simplify_intsubadd_rightcancel)
1513 if (!irt_isnum(fins->t)) {
1514 PHIBARRIER(fright);
1515 if (fins->op1 == fright->op1) { /* i - (i + j) ==> 0 - j */
1516 fins->op2 = fright->op2;
1517 fins->op1 = (IRRef1)lj_ir_kint(J, 0);
1518 return RETRYFOLD;
1520 if (fins->op1 == fright->op2) { /* i - (j + i) ==> 0 - j */
1521 fins->op2 = fright->op1;
1522 fins->op1 = (IRRef1)lj_ir_kint(J, 0);
1523 return RETRYFOLD;
1526 return NEXTFOLD;
1529 LJFOLD(SUB ADD ADD)
1530 LJFOLDF(simplify_intsubaddadd_cancel)
1532 if (!irt_isnum(fins->t)) {
1533 PHIBARRIER(fleft);
1534 PHIBARRIER(fright);
1535 if (fleft->op1 == fright->op1) { /* (i + j1) - (i + j2) ==> j1 - j2 */
1536 fins->op1 = fleft->op2;
1537 fins->op2 = fright->op2;
1538 return RETRYFOLD;
1540 if (fleft->op1 == fright->op2) { /* (i + j1) - (j2 + i) ==> j1 - j2 */
1541 fins->op1 = fleft->op2;
1542 fins->op2 = fright->op1;
1543 return RETRYFOLD;
1545 if (fleft->op2 == fright->op1) { /* (j1 + i) - (i + j2) ==> j1 - j2 */
1546 fins->op1 = fleft->op1;
1547 fins->op2 = fright->op2;
1548 return RETRYFOLD;
1550 if (fleft->op2 == fright->op2) { /* (j1 + i) - (j2 + i) ==> j1 - j2 */
1551 fins->op1 = fleft->op1;
1552 fins->op2 = fright->op1;
1553 return RETRYFOLD;
1556 return NEXTFOLD;
1559 LJFOLD(BAND any KINT)
1560 LJFOLD(BAND any KINT64)
1561 LJFOLDF(simplify_band_k)
1563 int64_t k = fright->o == IR_KINT ? (int64_t)fright->i :
1564 (int64_t)ir_k64(fright)->u64;
1565 if (k == 0) /* i & 0 ==> 0 */
1566 return RIGHTFOLD;
1567 if (k == -1) /* i & -1 ==> i */
1568 return LEFTFOLD;
1569 return NEXTFOLD;
1572 LJFOLD(BOR any KINT)
1573 LJFOLD(BOR any KINT64)
1574 LJFOLDF(simplify_bor_k)
1576 int64_t k = fright->o == IR_KINT ? (int64_t)fright->i :
1577 (int64_t)ir_k64(fright)->u64;
1578 if (k == 0) /* i | 0 ==> i */
1579 return LEFTFOLD;
1580 if (k == -1) /* i | -1 ==> -1 */
1581 return RIGHTFOLD;
1582 return NEXTFOLD;
1585 LJFOLD(BXOR any KINT)
1586 LJFOLD(BXOR any KINT64)
1587 LJFOLDF(simplify_bxor_k)
1589 int64_t k = fright->o == IR_KINT ? (int64_t)fright->i :
1590 (int64_t)ir_k64(fright)->u64;
1591 if (k == 0) /* i xor 0 ==> i */
1592 return LEFTFOLD;
1593 if (k == -1) { /* i xor -1 ==> ~i */
1594 fins->o = IR_BNOT;
1595 fins->op2 = 0;
1596 return RETRYFOLD;
1598 return NEXTFOLD;
1601 LJFOLD(BSHL any KINT)
1602 LJFOLD(BSHR any KINT)
1603 LJFOLD(BSAR any KINT)
1604 LJFOLD(BROL any KINT)
1605 LJFOLD(BROR any KINT)
1606 LJFOLDF(simplify_shift_ik)
1608 int32_t mask = irt_is64(fins->t) ? 63 : 31;
1609 int32_t k = (fright->i & mask);
1610 if (k == 0) /* i o 0 ==> i */
1611 return LEFTFOLD;
1612 if (k == 1 && fins->o == IR_BSHL) { /* i << 1 ==> i + i */
1613 fins->o = IR_ADD;
1614 fins->op2 = fins->op1;
1615 return RETRYFOLD;
1617 if (k != fright->i) { /* i o k ==> i o (k & mask) */
1618 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1619 return RETRYFOLD;
1621 #ifndef LJ_TARGET_UNIFYROT
1622 if (fins->o == IR_BROR) { /* bror(i, k) ==> brol(i, (-k)&mask) */
1623 fins->o = IR_BROL;
1624 fins->op2 = (IRRef1)lj_ir_kint(J, (-k)&mask);
1625 return RETRYFOLD;
1627 #endif
1628 return NEXTFOLD;
1631 LJFOLD(BSHL any BAND)
1632 LJFOLD(BSHR any BAND)
1633 LJFOLD(BSAR any BAND)
1634 LJFOLD(BROL any BAND)
1635 LJFOLD(BROR any BAND)
1636 LJFOLDF(simplify_shift_andk)
1638 IRIns *irk = IR(fright->op2);
1639 PHIBARRIER(fright);
1640 if ((fins->o < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) &&
1641 irk->o == IR_KINT) { /* i o (j & mask) ==> i o j */
1642 int32_t mask = irt_is64(fins->t) ? 63 : 31;
1643 int32_t k = irk->i & mask;
1644 if (k == mask) {
1645 fins->op2 = fright->op1;
1646 return RETRYFOLD;
1649 return NEXTFOLD;
1652 LJFOLD(BSHL KINT any)
1653 LJFOLD(BSHR KINT any)
1654 LJFOLD(BSHL KINT64 any)
1655 LJFOLD(BSHR KINT64 any)
1656 LJFOLDF(simplify_shift1_ki)
1658 int64_t k = fleft->o == IR_KINT ? (int64_t)fleft->i :
1659 (int64_t)ir_k64(fleft)->u64;
1660 if (k == 0) /* 0 o i ==> 0 */
1661 return LEFTFOLD;
1662 return NEXTFOLD;
1665 LJFOLD(BSAR KINT any)
1666 LJFOLD(BROL KINT any)
1667 LJFOLD(BROR KINT any)
1668 LJFOLD(BSAR KINT64 any)
1669 LJFOLD(BROL KINT64 any)
1670 LJFOLD(BROR KINT64 any)
1671 LJFOLDF(simplify_shift2_ki)
1673 int64_t k = fleft->o == IR_KINT ? (int64_t)fleft->i :
1674 (int64_t)ir_k64(fleft)->u64;
1675 if (k == 0 || k == -1) /* 0 o i ==> 0; -1 o i ==> -1 */
1676 return LEFTFOLD;
1677 return NEXTFOLD;
1680 LJFOLD(BSHL BAND KINT)
1681 LJFOLD(BSHR BAND KINT)
1682 LJFOLD(BROL BAND KINT)
1683 LJFOLD(BROR BAND KINT)
1684 LJFOLDF(simplify_shiftk_andk)
1686 IRIns *irk = IR(fleft->op2);
1687 PHIBARRIER(fleft);
1688 if (irk->o == IR_KINT) { /* (i & k1) o k2 ==> (i o k2) & (k1 o k2) */
1689 int32_t k = kfold_intop(irk->i, fright->i, (IROp)fins->o);
1690 fins->op1 = fleft->op1;
1691 fins->op1 = (IRRef1)lj_opt_fold(J);
1692 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1693 fins->ot = IRTI(IR_BAND);
1694 return RETRYFOLD;
1695 } else if (irk->o == IR_KINT64) {
1696 uint64_t k = kfold_int64arith(J, ir_k64(irk)->u64, fright->i,
1697 (IROp)fins->o);
1698 IROpT ot = fleft->ot;
1699 fins->op1 = fleft->op1;
1700 fins->op1 = (IRRef1)lj_opt_fold(J);
1701 fins->op2 = (IRRef1)lj_ir_kint64(J, k);
1702 fins->ot = ot;
1703 return RETRYFOLD;
1705 return NEXTFOLD;
1708 LJFOLD(BAND BSHL KINT)
1709 LJFOLD(BAND BSHR KINT)
1710 LJFOLDF(simplify_andk_shiftk)
1712 IRIns *irk = IR(fleft->op2);
1713 if (irk->o == IR_KINT &&
1714 kfold_intop(-1, irk->i, (IROp)fleft->o) == fright->i)
1715 return LEFTFOLD; /* (i o k1) & k2 ==> i, if (-1 o k1) == k2 */
1716 return NEXTFOLD;
1719 LJFOLD(BAND BOR KINT)
1720 LJFOLD(BOR BAND KINT)
1721 LJFOLDF(simplify_andor_k)
1723 IRIns *irk = IR(fleft->op2);
1724 PHIBARRIER(fleft);
1725 if (irk->o == IR_KINT) {
1726 int32_t k = kfold_intop(irk->i, fright->i, (IROp)fins->o);
1727 /* (i | k1) & k2 ==> i & k2, if (k1 & k2) == 0. */
1728 /* (i & k1) | k2 ==> i | k2, if (k1 | k2) == -1. */
1729 if (k == (fins->o == IR_BAND ? 0 : -1)) {
1730 fins->op1 = fleft->op1;
1731 return RETRYFOLD;
1734 return NEXTFOLD;
1737 LJFOLD(BAND BOR KINT64)
1738 LJFOLD(BOR BAND KINT64)
1739 LJFOLDF(simplify_andor_k64)
1741 #if LJ_HASFFI
1742 IRIns *irk = IR(fleft->op2);
1743 PHIBARRIER(fleft);
1744 if (irk->o == IR_KINT64) {
1745 uint64_t k = kfold_int64arith(J, ir_k64(irk)->u64, ir_k64(fright)->u64,
1746 (IROp)fins->o);
1747 /* (i | k1) & k2 ==> i & k2, if (k1 & k2) == 0. */
1748 /* (i & k1) | k2 ==> i | k2, if (k1 | k2) == -1. */
1749 if (k == (fins->o == IR_BAND ? (uint64_t)0 : ~(uint64_t)0)) {
1750 fins->op1 = fleft->op1;
1751 return RETRYFOLD;
1754 return NEXTFOLD;
1755 #else
1756 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
1757 #endif
1760 /* -- Reassociation ------------------------------------------------------- */
1762 LJFOLD(ADD ADD KINT)
1763 LJFOLD(MUL MUL KINT)
1764 LJFOLD(BAND BAND KINT)
1765 LJFOLD(BOR BOR KINT)
1766 LJFOLD(BXOR BXOR KINT)
1767 LJFOLDF(reassoc_intarith_k)
1769 IRIns *irk = IR(fleft->op2);
1770 if (irk->o == IR_KINT) {
1771 int32_t k = kfold_intop(irk->i, fright->i, (IROp)fins->o);
1772 if (k == irk->i) /* (i o k1) o k2 ==> i o k1, if (k1 o k2) == k1. */
1773 return LEFTFOLD;
1774 PHIBARRIER(fleft);
1775 fins->op1 = fleft->op1;
1776 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1777 return RETRYFOLD; /* (i o k1) o k2 ==> i o (k1 o k2) */
1779 return NEXTFOLD;
1782 LJFOLD(ADD ADD KINT64)
1783 LJFOLD(MUL MUL KINT64)
1784 LJFOLD(BAND BAND KINT64)
1785 LJFOLD(BOR BOR KINT64)
1786 LJFOLD(BXOR BXOR KINT64)
1787 LJFOLDF(reassoc_intarith_k64)
1789 #if LJ_HASFFI
1790 IRIns *irk = IR(fleft->op2);
1791 if (irk->o == IR_KINT64) {
1792 uint64_t k = kfold_int64arith(J, ir_k64(irk)->u64, ir_k64(fright)->u64,
1793 (IROp)fins->o);
1794 PHIBARRIER(fleft);
1795 fins->op1 = fleft->op1;
1796 fins->op2 = (IRRef1)lj_ir_kint64(J, k);
1797 return RETRYFOLD; /* (i o k1) o k2 ==> i o (k1 o k2) */
1799 return NEXTFOLD;
1800 #else
1801 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
1802 #endif
1805 LJFOLD(BAND BAND any)
1806 LJFOLD(BOR BOR any)
1807 LJFOLDF(reassoc_dup)
1809 if (fins->op2 == fleft->op1 || fins->op2 == fleft->op2)
1810 return LEFTFOLD; /* (a o b) o a ==> a o b; (a o b) o b ==> a o b */
1811 return NEXTFOLD;
1814 LJFOLD(MIN MIN any)
1815 LJFOLD(MAX MAX any)
1816 LJFOLDF(reassoc_dup_minmax)
1818 if (fins->op2 == fleft->op2)
1819 return LEFTFOLD; /* (a o b) o b ==> a o b */
1820 return NEXTFOLD;
1823 LJFOLD(BXOR BXOR any)
1824 LJFOLDF(reassoc_bxor)
1826 PHIBARRIER(fleft);
1827 if (fins->op2 == fleft->op1) /* (a xor b) xor a ==> b */
1828 return fleft->op2;
1829 if (fins->op2 == fleft->op2) /* (a xor b) xor b ==> a */
1830 return fleft->op1;
1831 return NEXTFOLD;
1834 LJFOLD(BSHL BSHL KINT)
1835 LJFOLD(BSHR BSHR KINT)
1836 LJFOLD(BSAR BSAR KINT)
1837 LJFOLD(BROL BROL KINT)
1838 LJFOLD(BROR BROR KINT)
1839 LJFOLDF(reassoc_shift)
1841 IRIns *irk = IR(fleft->op2);
1842 PHIBARRIER(fleft); /* The (shift any KINT) rule covers k2 == 0 and more. */
1843 if (irk->o == IR_KINT) { /* (i o k1) o k2 ==> i o (k1 + k2) */
1844 int32_t mask = irt_is64(fins->t) ? 63 : 31;
1845 int32_t k = (irk->i & mask) + (fright->i & mask);
1846 if (k > mask) { /* Combined shift too wide? */
1847 if (fins->o == IR_BSHL || fins->o == IR_BSHR)
1848 return mask == 31 ? INTFOLD(0) : INT64FOLD(0);
1849 else if (fins->o == IR_BSAR)
1850 k = mask;
1851 else
1852 k &= mask;
1854 fins->op1 = fleft->op1;
1855 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1856 return RETRYFOLD;
1858 return NEXTFOLD;
1861 LJFOLD(MIN MIN KINT)
1862 LJFOLD(MAX MAX KINT)
1863 LJFOLDF(reassoc_minmax_k)
1865 IRIns *irk = IR(fleft->op2);
1866 if (irk->o == IR_KINT) {
1867 int32_t a = irk->i;
1868 int32_t y = kfold_intop(a, fright->i, fins->o);
1869 if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */
1870 return LEFTFOLD;
1871 PHIBARRIER(fleft);
1872 fins->op1 = fleft->op1;
1873 fins->op2 = (IRRef1)lj_ir_kint(J, y);
1874 return RETRYFOLD; /* (x o k1) o k2 ==> x o (k1 o k2) */
1876 return NEXTFOLD;
1879 /* -- Array bounds check elimination -------------------------------------- */
1881 /* Eliminate ABC across PHIs to handle t[i-1] forwarding case.
1882 ** ABC(asize, (i+k)+(-k)) ==> ABC(asize, i), but only if it already exists.
1883 ** Could be generalized to (i+k1)+k2 ==> i+(k1+k2), but needs better disambig.
1885 LJFOLD(ABC any ADD)
1886 LJFOLDF(abc_fwd)
1888 if (LJ_LIKELY(J->flags & JIT_F_OPT_ABC)) {
1889 if (irref_isk(fright->op2)) {
1890 IRIns *add2 = IR(fright->op1);
1891 if (add2->o == IR_ADD && irref_isk(add2->op2) &&
1892 IR(fright->op2)->i == -IR(add2->op2)->i) {
1893 IRRef ref = J->chain[IR_ABC];
1894 IRRef lim = add2->op1;
1895 if (fins->op1 > lim) lim = fins->op1;
1896 while (ref > lim) {
1897 IRIns *ir = IR(ref);
1898 if (ir->op1 == fins->op1 && ir->op2 == add2->op1)
1899 return DROPFOLD;
1900 ref = ir->prev;
1905 return NEXTFOLD;
1908 /* Eliminate ABC for constants.
1909 ** ABC(asize, k1), ABC(asize k2) ==> ABC(asize, max(k1, k2))
1910 ** Drop second ABC if k2 is lower. Otherwise patch first ABC with k2.
1912 LJFOLD(ABC any KINT)
1913 LJFOLDF(abc_k)
1915 PHIBARRIER(fleft);
1916 if (LJ_LIKELY(J->flags & JIT_F_OPT_ABC)) {
1917 IRRef ref = J->chain[IR_ABC];
1918 IRRef asize = fins->op1;
1919 while (ref > asize) {
1920 IRIns *ir = IR(ref);
1921 if (ir->op1 == asize && irref_isk(ir->op2)) {
1922 uint32_t k = (uint32_t)IR(ir->op2)->i;
1923 if ((uint32_t)fright->i > k)
1924 ir->op2 = fins->op2;
1925 return DROPFOLD;
1927 ref = ir->prev;
1929 return EMITFOLD; /* Already performed CSE. */
1931 return NEXTFOLD;
1934 /* Eliminate invariant ABC inside loop. */
1935 LJFOLD(ABC any any)
1936 LJFOLDF(abc_invar)
1938 /* Invariant ABC marked as PTR. Drop if op1 is invariant, too. */
1939 if (!irt_isint(fins->t) && fins->op1 < J->chain[IR_LOOP] &&
1940 !irt_isphi(IR(fins->op1)->t))
1941 return DROPFOLD;
1942 return NEXTFOLD;
1945 /* -- Commutativity ------------------------------------------------------- */
1947 /* The refs of commutative ops are canonicalized. Lower refs go to the right.
1948 ** Rationale behind this:
1949 ** - It (also) moves constants to the right.
1950 ** - It reduces the number of FOLD rules (e.g. (BOR any KINT) suffices).
1951 ** - It helps CSE to find more matches.
1952 ** - The assembler generates better code with constants at the right.
1955 LJFOLD(ADD any any)
1956 LJFOLD(MUL any any)
1957 LJFOLD(ADDOV any any)
1958 LJFOLD(MULOV any any)
1959 LJFOLDF(comm_swap)
1961 if (fins->op1 < fins->op2) { /* Move lower ref to the right. */
1962 IRRef1 tmp = fins->op1;
1963 fins->op1 = fins->op2;
1964 fins->op2 = tmp;
1965 return RETRYFOLD;
1967 return NEXTFOLD;
1970 LJFOLD(EQ any any)
1971 LJFOLD(NE any any)
1972 LJFOLDF(comm_equal)
1974 /* For non-numbers only: x == x ==> drop; x ~= x ==> fail */
1975 if (fins->op1 == fins->op2 &&
1976 (!irt_isnum(fins->t) ||
1977 (fleft->o == IR_CONV && /* Converted integers cannot be NaN. */
1978 (uint32_t)(fleft->op2 & IRCONV_SRCMASK) - (uint32_t)IRT_I8 <= (uint32_t)(IRT_U64 - IRT_U8))))
1979 return CONDFOLD(fins->o == IR_EQ);
1980 return fold_comm_swap(J);
1983 LJFOLD(LT any any)
1984 LJFOLD(GE any any)
1985 LJFOLD(LE any any)
1986 LJFOLD(GT any any)
1987 LJFOLD(ULT any any)
1988 LJFOLD(UGE any any)
1989 LJFOLD(ULE any any)
1990 LJFOLD(UGT any any)
1991 LJFOLDF(comm_comp)
1993 /* For non-numbers only: x <=> x ==> drop; x <> x ==> fail */
1994 if (fins->op1 == fins->op2 && !irt_isnum(fins->t))
1995 return CONDFOLD((fins->o ^ (fins->o >> 1)) & 1);
1996 if (fins->op1 < fins->op2) { /* Move lower ref to the right. */
1997 IRRef1 tmp = fins->op1;
1998 fins->op1 = fins->op2;
1999 fins->op2 = tmp;
2000 fins->o ^= 3; /* GT <-> LT, GE <-> LE, does not affect U */
2001 return RETRYFOLD;
2003 return NEXTFOLD;
2006 LJFOLD(BAND any any)
2007 LJFOLD(BOR any any)
2008 LJFOLDF(comm_dup)
2010 if (fins->op1 == fins->op2) /* x o x ==> x */
2011 return LEFTFOLD;
2012 return fold_comm_swap(J);
2015 LJFOLD(MIN any any)
2016 LJFOLD(MAX any any)
2017 LJFOLDF(comm_dup_minmax)
2019 if (fins->op1 == fins->op2) /* x o x ==> x */
2020 return LEFTFOLD;
2021 return NEXTFOLD;
2024 LJFOLD(BXOR any any)
2025 LJFOLDF(comm_bxor)
2027 if (fins->op1 == fins->op2) /* i xor i ==> 0 */
2028 return irt_is64(fins->t) ? INT64FOLD(0) : INTFOLD(0);
2029 return fold_comm_swap(J);
2032 /* -- Simplification of compound expressions ------------------------------ */
2034 static TRef kfold_xload(jit_State *J, IRIns *ir, const void *p)
2036 int32_t k;
2037 switch (irt_type(ir->t)) {
2038 case IRT_NUM: return lj_ir_knum_u64(J, *(uint64_t *)p);
2039 case IRT_I8: k = (int32_t)*(int8_t *)p; break;
2040 case IRT_U8: k = (int32_t)*(uint8_t *)p; break;
2041 case IRT_I16: k = (int32_t)(int16_t)lj_getu16(p); break;
2042 case IRT_U16: k = (int32_t)(uint16_t)lj_getu16(p); break;
2043 case IRT_INT: case IRT_U32: k = (int32_t)lj_getu32(p); break;
2044 case IRT_I64: case IRT_U64: return lj_ir_kint64(J, *(uint64_t *)p);
2045 default: return 0;
2047 return lj_ir_kint(J, k);
2050 /* Turn: string.sub(str, a, b) == kstr
2051 ** into: string.byte(str, a) == string.byte(kstr, 1) etc.
2052 ** Note: this creates unaligned XLOADs on x86/x64.
2054 LJFOLD(EQ SNEW KGC)
2055 LJFOLD(NE SNEW KGC)
2056 LJFOLDF(merge_eqne_snew_kgc)
2058 GCstr *kstr = ir_kstr(fright);
2059 int32_t len = (int32_t)kstr->len;
2060 lj_assertJ(irt_isstr(fins->t), "bad equality IR type");
2062 #if LJ_TARGET_UNALIGNED
2063 #define FOLD_SNEW_MAX_LEN 4 /* Handle string lengths 0, 1, 2, 3, 4. */
2064 #define FOLD_SNEW_TYPE8 IRT_I8 /* Creates shorter immediates. */
2065 #else
2066 #define FOLD_SNEW_MAX_LEN 1 /* Handle string lengths 0 or 1. */
2067 #define FOLD_SNEW_TYPE8 IRT_U8 /* Prefer unsigned loads. */
2068 #endif
2070 PHIBARRIER(fleft);
2071 if (len <= FOLD_SNEW_MAX_LEN) {
2072 IROp op = (IROp)fins->o;
2073 IRRef strref = fleft->op1;
2074 if (IR(strref)->o != IR_STRREF)
2075 return NEXTFOLD;
2076 if (op == IR_EQ) {
2077 emitir(IRTGI(IR_EQ), fleft->op2, lj_ir_kint(J, len));
2078 /* Caveat: fins/fleft/fright is no longer valid after emitir. */
2079 } else {
2080 /* NE is not expanded since this would need an OR of two conds. */
2081 if (!irref_isk(fleft->op2)) /* Only handle the constant length case. */
2082 return NEXTFOLD;
2083 if (IR(fleft->op2)->i != len)
2084 return DROPFOLD;
2086 if (len > 0) {
2087 /* A 4 byte load for length 3 is ok -- all strings have an extra NUL. */
2088 uint16_t ot = (uint16_t)(len == 1 ? IRT(IR_XLOAD, FOLD_SNEW_TYPE8) :
2089 len == 2 ? IRT(IR_XLOAD, IRT_U16) :
2090 IRTI(IR_XLOAD));
2091 TRef tmp = emitir(ot, strref,
2092 IRXLOAD_READONLY | (len > 1 ? IRXLOAD_UNALIGNED : 0));
2093 TRef val = kfold_xload(J, IR(tref_ref(tmp)), strdata(kstr));
2094 if (len == 3)
2095 tmp = emitir(IRTI(IR_BAND), tmp,
2096 lj_ir_kint(J, LJ_ENDIAN_SELECT(0x00ffffff, 0xffffff00)));
2097 fins->op1 = (IRRef1)tmp;
2098 fins->op2 = (IRRef1)val;
2099 fins->ot = (IROpT)IRTGI(op);
2100 return RETRYFOLD;
2101 } else {
2102 return DROPFOLD;
2105 return NEXTFOLD;
2108 /* -- Loads --------------------------------------------------------------- */
2110 /* Loads cannot be folded or passed on to CSE in general.
2111 ** Alias analysis is needed to check for forwarding opportunities.
2113 ** Caveat: *all* loads must be listed here or they end up at CSE!
2116 LJFOLD(ALOAD any)
2117 LJFOLDX(lj_opt_fwd_aload)
2119 /* From HREF fwd (see below). Must eliminate, not supported by fwd/backend. */
2120 LJFOLD(HLOAD KKPTR)
2121 LJFOLDF(kfold_hload_kkptr)
2123 UNUSED(J);
2124 lj_assertJ(ir_kptr(fleft) == niltvg(J2G(J)), "expected niltv");
2125 return TREF_NIL;
2128 LJFOLD(HLOAD any)
2129 LJFOLDX(lj_opt_fwd_hload)
2131 LJFOLD(ULOAD any)
2132 LJFOLDX(lj_opt_fwd_uload)
2134 LJFOLD(ALEN any any)
2135 LJFOLDX(lj_opt_fwd_alen)
2137 /* Try to merge UREFO/UREFC into referenced instruction. */
2138 static TRef merge_uref(jit_State *J, IRRef ref, IRIns* ir)
2140 if (ir->o == IR_UREFO && irt_isguard(ir->t)) {
2141 /* Might be pointing to some other coroutine's stack.
2142 ** And GC might shrink said stack, thereby repointing the upvalue.
2143 ** GC might even collect said coroutine, thereby closing the upvalue.
2145 if (gcstep_barrier(J, ref))
2146 return EMITFOLD; /* So cannot merge. */
2147 /* Current fins wants a check, but ir doesn't have one. */
2148 if ((irt_t(fins->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC) &&
2149 irt_type(ir->t) == IRT_IGC)
2150 ir->t.irt += IRT_PGC-IRT_IGC; /* So install a check. */
2152 return ref; /* Not a TRef, but the caller doesn't care. */
2155 /* Upvalue refs are really loads, but there are no corresponding stores.
2156 ** So CSE is ok for them, except for guarded UREFO across a GC step.
2157 ** If the referenced function is const, its upvalue addresses are const, too.
2158 ** This can be used to improve CSE by looking for the same address,
2159 ** even if the upvalues originate from a different function.
2161 LJFOLD(UREFO KGC any)
2162 LJFOLD(UREFC KGC any)
2163 LJFOLDF(cse_uref)
2165 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
2166 IRRef ref = J->chain[fins->o];
2167 GCfunc *fn = ir_kfunc(fleft);
2168 GCupval *uv = gco2uv(gcref(fn->l.uvptr[(fins->op2 >> 8)]));
2169 while (ref > 0) {
2170 IRIns *ir = IR(ref);
2171 if (irref_isk(ir->op1)) {
2172 GCfunc *fn2 = ir_kfunc(IR(ir->op1));
2173 if (gco2uv(gcref(fn2->l.uvptr[(ir->op2 >> 8)])) == uv) {
2174 return merge_uref(J, ref, ir);
2177 ref = ir->prev;
2180 return EMITFOLD;
2183 /* Custom CSE for UREFO. */
2184 LJFOLD(UREFO any any)
2185 LJFOLDF(cse_urefo)
2187 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
2188 IRRef ref = J->chain[IR_UREFO];
2189 IRRef lim = fins->op1;
2190 IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16);
2191 while (ref > lim) {
2192 IRIns *ir = IR(ref);
2193 if (ir->op12 == op12)
2194 return merge_uref(J, ref, ir);
2195 ref = ir->prev;
2198 return EMITFOLD;
2201 LJFOLD(HREFK any any)
2202 LJFOLDX(lj_opt_fwd_hrefk)
2204 LJFOLD(HREF TNEW any)
2205 LJFOLDF(fwd_href_tnew)
2207 if (lj_opt_fwd_href_nokey(J))
2208 return lj_ir_kkptr(J, niltvg(J2G(J)));
2209 return NEXTFOLD;
2212 LJFOLD(HREF TDUP KPRI)
2213 LJFOLD(HREF TDUP KGC)
2214 LJFOLD(HREF TDUP KNUM)
2215 LJFOLDF(fwd_href_tdup)
2217 TValue keyv;
2218 lj_ir_kvalue(J->L, &keyv, fright);
2219 if (lj_tab_get(J->L, ir_ktab(IR(fleft->op1)), &keyv) == niltvg(J2G(J)) &&
2220 lj_opt_fwd_href_nokey(J))
2221 return lj_ir_kkptr(J, niltvg(J2G(J)));
2222 return NEXTFOLD;
2225 /* We can safely FOLD/CSE array/hash refs and field loads, since there
2226 ** are no corresponding stores. But we need to check for any NEWREF with
2227 ** an aliased table, as it may invalidate all of the pointers and fields.
2228 ** Only HREF needs the NEWREF check -- AREF and HREFK already depend on
2229 ** FLOADs. And NEWREF itself is treated like a store (see below).
2230 ** LREF is constant (per trace) since coroutine switches are not inlined.
2232 LJFOLD(FLOAD TNEW IRFL_TAB_ASIZE)
2233 LJFOLDF(fload_tab_tnew_asize)
2235 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
2236 return INTFOLD(fleft->op1);
2237 return NEXTFOLD;
2240 LJFOLD(FLOAD TNEW IRFL_TAB_HMASK)
2241 LJFOLDF(fload_tab_tnew_hmask)
2243 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
2244 return INTFOLD((1 << fleft->op2)-1);
2245 return NEXTFOLD;
2248 LJFOLD(FLOAD TDUP IRFL_TAB_ASIZE)
2249 LJFOLDF(fload_tab_tdup_asize)
2251 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
2252 return INTFOLD((int32_t)ir_ktab(IR(fleft->op1))->asize);
2253 return NEXTFOLD;
2256 LJFOLD(FLOAD TDUP IRFL_TAB_HMASK)
2257 LJFOLDF(fload_tab_tdup_hmask)
2259 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
2260 return INTFOLD((int32_t)ir_ktab(IR(fleft->op1))->hmask);
2261 return NEXTFOLD;
2264 LJFOLD(HREF any any)
2265 LJFOLD(FLOAD any IRFL_TAB_ARRAY)
2266 LJFOLD(FLOAD any IRFL_TAB_NODE)
2267 LJFOLD(FLOAD any IRFL_TAB_ASIZE)
2268 LJFOLD(FLOAD any IRFL_TAB_HMASK)
2269 LJFOLDF(fload_tab_ah)
2271 TRef tr = lj_opt_cse(J);
2272 return lj_opt_fwd_tptr(J, tref_ref(tr)) ? tr : EMITFOLD;
2275 /* Strings are immutable, so we can safely FOLD/CSE the related FLOAD. */
2276 LJFOLD(FLOAD KGC IRFL_STR_LEN)
2277 LJFOLDF(fload_str_len_kgc)
2279 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
2280 return INTFOLD((int32_t)ir_kstr(fleft)->len);
2281 return NEXTFOLD;
2284 LJFOLD(FLOAD SNEW IRFL_STR_LEN)
2285 LJFOLDF(fload_str_len_snew)
2287 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
2288 PHIBARRIER(fleft);
2289 return fleft->op2;
2291 return NEXTFOLD;
2294 LJFOLD(FLOAD TOSTR IRFL_STR_LEN)
2295 LJFOLDF(fload_str_len_tostr)
2297 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fleft->op2 == IRTOSTR_CHAR)
2298 return INTFOLD(1);
2299 return NEXTFOLD;
2302 LJFOLD(FLOAD any IRFL_SBUF_W)
2303 LJFOLD(FLOAD any IRFL_SBUF_E)
2304 LJFOLD(FLOAD any IRFL_SBUF_B)
2305 LJFOLD(FLOAD any IRFL_SBUF_L)
2306 LJFOLD(FLOAD any IRFL_SBUF_REF)
2307 LJFOLD(FLOAD any IRFL_SBUF_R)
2308 LJFOLDF(fload_sbuf)
2310 TRef tr = lj_opt_fwd_fload(J);
2311 return lj_opt_fwd_sbuf(J, tref_ref(tr)) ? tr : EMITFOLD;
2314 /* The fast function ID of function objects is immutable. */
2315 LJFOLD(FLOAD KGC IRFL_FUNC_FFID)
2316 LJFOLDF(fload_func_ffid_kgc)
2318 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
2319 return INTFOLD((int32_t)ir_kfunc(fleft)->c.ffid);
2320 return NEXTFOLD;
2323 /* The C type ID of cdata objects is immutable. */
2324 LJFOLD(FLOAD KGC IRFL_CDATA_CTYPEID)
2325 LJFOLDF(fload_cdata_typeid_kgc)
2327 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
2328 return INTFOLD((int32_t)ir_kcdata(fleft)->ctypeid);
2329 return NEXTFOLD;
2332 /* Get the contents of immutable cdata objects. */
2333 LJFOLD(FLOAD KGC IRFL_CDATA_PTR)
2334 LJFOLD(FLOAD KGC IRFL_CDATA_INT)
2335 LJFOLD(FLOAD KGC IRFL_CDATA_INT64)
2336 LJFOLDF(fload_cdata_int64_kgc)
2338 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
2339 void *p = cdataptr(ir_kcdata(fleft));
2340 if (irt_is64(fins->t))
2341 return INT64FOLD(*(uint64_t *)p);
2342 else
2343 return INTFOLD(*(int32_t *)p);
2345 return NEXTFOLD;
2348 LJFOLD(FLOAD CNEW IRFL_CDATA_CTYPEID)
2349 LJFOLD(FLOAD CNEWI IRFL_CDATA_CTYPEID)
2350 LJFOLDF(fload_cdata_typeid_cnew)
2352 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
2353 return fleft->op1; /* No PHI barrier needed. CNEW/CNEWI op1 is const. */
2354 return NEXTFOLD;
2357 /* Pointer, int and int64 cdata objects are immutable. */
2358 LJFOLD(FLOAD CNEWI IRFL_CDATA_PTR)
2359 LJFOLD(FLOAD CNEWI IRFL_CDATA_INT)
2360 LJFOLD(FLOAD CNEWI IRFL_CDATA_INT64)
2361 LJFOLDF(fload_cdata_ptr_int64_cnew)
2363 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
2364 return fleft->op2; /* Fold even across PHI to avoid allocations. */
2365 return NEXTFOLD;
2368 LJFOLD(FLOAD any IRFL_STR_LEN)
2369 LJFOLD(FLOAD any IRFL_FUNC_ENV)
2370 LJFOLD(FLOAD any IRFL_THREAD_ENV)
2371 LJFOLD(FLOAD any IRFL_CDATA_CTYPEID)
2372 LJFOLD(FLOAD any IRFL_CDATA_PTR)
2373 LJFOLD(FLOAD any IRFL_CDATA_INT)
2374 LJFOLD(FLOAD any IRFL_CDATA_INT64)
2375 LJFOLD(VLOAD any any) /* Vararg loads have no corresponding stores. */
2376 LJFOLDX(lj_opt_cse)
2378 /* All other field loads need alias analysis. */
2379 LJFOLD(FLOAD any any)
2380 LJFOLDX(lj_opt_fwd_fload)
2382 /* This is for LOOP only. Recording handles SLOADs internally. */
2383 LJFOLD(SLOAD any any)
2384 LJFOLDF(fwd_sload)
2386 if ((fins->op2 & IRSLOAD_FRAME)) {
2387 TRef tr = lj_opt_cse(J);
2388 return tref_ref(tr) < J->chain[IR_RETF] ? EMITFOLD : tr;
2389 } else {
2390 lj_assertJ(J->slot[fins->op1] != 0, "uninitialized slot accessed");
2391 return J->slot[fins->op1];
2395 /* Only fold for KKPTR. The pointer _and_ the contents must be const. */
2396 LJFOLD(XLOAD KKPTR any)
2397 LJFOLDF(xload_kptr)
2399 TRef tr = kfold_xload(J, fins, ir_kptr(fleft));
2400 return tr ? tr : NEXTFOLD;
2403 LJFOLD(XLOAD any any)
2404 LJFOLDX(lj_opt_fwd_xload)
2406 /* -- Frame handling ------------------------------------------------------ */
2408 /* Prevent CSE of a REF_BASE operand across IR_RETF. */
2409 LJFOLD(SUB any BASE)
2410 LJFOLD(SUB BASE any)
2411 LJFOLD(EQ any BASE)
2412 LJFOLDF(fold_base)
2414 return lj_opt_cselim(J, J->chain[IR_RETF]);
2417 /* -- Write barriers ------------------------------------------------------ */
2419 /* Write barriers are amenable to CSE, but not across any incremental
2420 ** GC steps.
2422 LJFOLD(TBAR any)
2423 LJFOLD(OBAR any any)
2424 LJFOLDF(barrier_tab)
2426 TRef tr = lj_opt_cse(J);
2427 if (gcstep_barrier(J, tref_ref(tr))) /* CSE across GC step? */
2428 return EMITFOLD; /* Raw emit. Assumes fins is left intact by CSE. */
2429 return tr;
2432 LJFOLD(TBAR TNEW)
2433 LJFOLD(TBAR TDUP)
2434 LJFOLDF(barrier_tnew_tdup)
2436 /* New tables are always white and never need a barrier. */
2437 if (fins->op1 < J->chain[IR_LOOP]) /* Except across a GC step. */
2438 return NEXTFOLD;
2439 return DROPFOLD;
2442 /* -- Profiling ----------------------------------------------------------- */
2444 LJFOLD(PROF any any)
2445 LJFOLDF(prof)
2447 IRRef ref = J->chain[IR_PROF];
2448 if (ref+1 == J->cur.nins) /* Drop neighbouring IR_PROF. */
2449 return ref;
2450 return EMITFOLD;
2453 /* -- Stores and allocations ---------------------------------------------- */
2455 /* Stores and allocations cannot be folded or passed on to CSE in general.
2456 ** But some stores can be eliminated with dead-store elimination (DSE).
2458 ** Caveat: *all* stores and allocs must be listed here or they end up at CSE!
2461 LJFOLD(ASTORE any any)
2462 LJFOLD(HSTORE any any)
2463 LJFOLDX(lj_opt_dse_ahstore)
2465 LJFOLD(USTORE any any)
2466 LJFOLDX(lj_opt_dse_ustore)
2468 LJFOLD(FSTORE any any)
2469 LJFOLDX(lj_opt_dse_fstore)
2471 LJFOLD(XSTORE any any)
2472 LJFOLDX(lj_opt_dse_xstore)
2474 LJFOLD(NEWREF any any) /* Treated like a store. */
2475 LJFOLD(TMPREF any any)
2476 LJFOLD(CALLA any any)
2477 LJFOLD(CALLL any any) /* Safeguard fallback. */
2478 LJFOLD(CALLS any any)
2479 LJFOLD(CALLXS any any)
2480 LJFOLD(XBAR)
2481 LJFOLD(RETF any any) /* Modifies BASE. */
2482 LJFOLD(TNEW any any)
2483 LJFOLD(TDUP any)
2484 LJFOLD(CNEW any any)
2485 LJFOLD(XSNEW any any)
2486 LJFOLDX(lj_ir_emit)
2488 /* ------------------------------------------------------------------------ */
2490 /* Every entry in the generated hash table is a 32 bit pattern:
2492 ** xxxxxxxx iiiiiii lllllll rrrrrrrrrr
2494 ** xxxxxxxx = 8 bit index into fold function table
2495 ** iiiiiii = 7 bit folded instruction opcode
2496 ** lllllll = 7 bit left instruction opcode
2497 ** rrrrrrrrrr = 8 bit right instruction opcode or 10 bits from literal field
2500 #include "lj_folddef.h"
2502 /* ------------------------------------------------------------------------ */
2504 /* Fold IR instruction. */
2505 TRef LJ_FASTCALL lj_opt_fold(jit_State *J)
2507 uint32_t key, any;
2508 IRRef ref;
2510 if (LJ_UNLIKELY((J->flags & JIT_F_OPT_MASK) != JIT_F_OPT_DEFAULT)) {
2511 lj_assertJ(((JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE|JIT_F_OPT_DSE) |
2512 JIT_F_OPT_DEFAULT) == JIT_F_OPT_DEFAULT,
2513 "bad JIT_F_OPT_DEFAULT");
2514 /* Folding disabled? Chain to CSE, but not for loads/stores/allocs. */
2515 if (!(J->flags & JIT_F_OPT_FOLD) && irm_kind(lj_ir_mode[fins->o]) == IRM_N)
2516 return lj_opt_cse(J);
2518 /* No FOLD, forwarding or CSE? Emit raw IR for loads, except for SLOAD. */
2519 if ((J->flags & (JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE)) !=
2520 (JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE) &&
2521 irm_kind(lj_ir_mode[fins->o]) == IRM_L && fins->o != IR_SLOAD)
2522 return lj_ir_emit(J);
2524 /* No FOLD or DSE? Emit raw IR for stores. */
2525 if ((J->flags & (JIT_F_OPT_FOLD|JIT_F_OPT_DSE)) !=
2526 (JIT_F_OPT_FOLD|JIT_F_OPT_DSE) &&
2527 irm_kind(lj_ir_mode[fins->o]) == IRM_S)
2528 return lj_ir_emit(J);
2531 /* Fold engine start/retry point. */
2532 retry:
2533 /* Construct key from opcode and operand opcodes (unless literal/none). */
2534 key = ((uint32_t)fins->o << 17);
2535 if (fins->op1 >= J->cur.nk) {
2536 key += (uint32_t)IR(fins->op1)->o << 10;
2537 *fleft = *IR(fins->op1);
2538 if (fins->op1 < REF_TRUE)
2539 fleft[1] = IR(fins->op1)[1];
2541 if (fins->op2 >= J->cur.nk) {
2542 key += (uint32_t)IR(fins->op2)->o;
2543 *fright = *IR(fins->op2);
2544 if (fins->op2 < REF_TRUE)
2545 fright[1] = IR(fins->op2)[1];
2546 } else {
2547 key += (fins->op2 & 0x3ffu); /* Literal mask. Must include IRCONV_*MASK. */
2550 /* Check for a match in order from most specific to least specific. */
2551 any = 0;
2552 for (;;) {
2553 uint32_t k = key | (any & 0x1ffff);
2554 uint32_t h = fold_hashkey(k);
2555 uint32_t fh = fold_hash[h]; /* Lookup key in semi-perfect hash table. */
2556 if ((fh & 0xffffff) == k || (fh = fold_hash[h+1], (fh & 0xffffff) == k)) {
2557 ref = (IRRef)tref_ref(fold_func[fh >> 24](J));
2558 if (ref != NEXTFOLD)
2559 break;
2561 if (any == 0xfffff) /* Exhausted folding. Pass on to CSE. */
2562 return lj_opt_cse(J);
2563 any = (any | (any >> 10)) ^ 0xffc00;
2566 /* Return value processing, ordered by frequency. */
2567 if (LJ_LIKELY(ref >= MAX_FOLD))
2568 return TREF(ref, irt_t(IR(ref)->t));
2569 if (ref == RETRYFOLD)
2570 goto retry;
2571 if (ref == KINTFOLD)
2572 return lj_ir_kint(J, fins->i);
2573 if (ref == FAILFOLD)
2574 lj_trace_err(J, LJ_TRERR_GFAIL);
2575 lj_assertJ(ref == DROPFOLD, "bad fold result");
2576 return REF_DROP;
2579 /* -- Common-Subexpression Elimination ------------------------------------ */
2581 /* CSE an IR instruction. This is very fast due to the skip-list chains. */
2582 TRef LJ_FASTCALL lj_opt_cse(jit_State *J)
2584 /* Avoid narrow to wide store-to-load forwarding stall */
2585 IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16);
2586 IROp op = fins->o;
2587 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
2588 /* Limited search for same operands in per-opcode chain. */
2589 IRRef ref = J->chain[op];
2590 IRRef lim = fins->op1;
2591 if (fins->op2 > lim) lim = fins->op2; /* Relies on lit < REF_BIAS. */
2592 while (ref > lim) {
2593 if (IR(ref)->op12 == op12)
2594 return TREF(ref, irt_t(IR(ref)->t)); /* Common subexpression found. */
2595 ref = IR(ref)->prev;
2598 /* Otherwise emit IR (inlined for speed). */
2600 IRRef ref = lj_ir_nextins(J);
2601 IRIns *ir = IR(ref);
2602 ir->prev = J->chain[op];
2603 ir->op12 = op12;
2604 J->chain[op] = (IRRef1)ref;
2605 ir->o = fins->o;
2606 J->guardemit.irt |= fins->t.irt;
2607 return TREF(ref, irt_t((ir->t = fins->t)));
2611 /* CSE with explicit search limit. */
2612 TRef LJ_FASTCALL lj_opt_cselim(jit_State *J, IRRef lim)
2614 IRRef ref = J->chain[fins->o];
2615 IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16);
2616 while (ref > lim) {
2617 if (IR(ref)->op12 == op12)
2618 return ref;
2619 ref = IR(ref)->prev;
2621 return lj_ir_emit(J);
2624 /* ------------------------------------------------------------------------ */
2626 #undef IR
2627 #undef fins
2628 #undef fleft
2629 #undef fright
2630 #undef knumleft
2631 #undef knumright
2632 #undef emitir
2634 #endif