Codemod asserts to assertxs in the runtime
[hiphop-php.git] / hphp / runtime / vm / jit / vasm-simplify.cpp
blob5775436529bb37a9a96cf2d8b7cd13335e2171a8
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #include "hphp/runtime/vm/jit/vasm.h"
18 #include "hphp/runtime/vm/jit/vasm-simplify-internal.h"
20 #include "hphp/runtime/vm/jit/containers.h"
21 #include "hphp/runtime/vm/jit/vasm-gen.h"
22 #include "hphp/runtime/vm/jit/vasm-info.h"
23 #include "hphp/runtime/vm/jit/vasm-instr.h"
24 #include "hphp/runtime/vm/jit/vasm-print.h"
25 #include "hphp/runtime/vm/jit/vasm-unit.h"
26 #include "hphp/runtime/vm/jit/vasm-util.h"
27 #include "hphp/runtime/vm/jit/vasm-visit.h"
29 #include "hphp/util/arch.h"
30 #include "hphp/util/asm-x64.h"
32 #include <utility>
34 TRACE_SET_MOD(vasm);
36 namespace HPHP { namespace jit {
38 namespace {
40 ///////////////////////////////////////////////////////////////////////////////
42 enum class ResValid : uint8_t {
43 Empty,
44 Valid,
45 Invalid
48 struct GetMemOp {
49 template<class T> void imm (T) {}
50 template<class T> void def (T) {}
51 template<class T> void use (T) {}
52 void def (Vptr mem) {
53 if (rv != ResValid::Empty) {
54 rv = ResValid::Invalid;
55 } else {
56 mr = mem;
57 rv = ResValid::Valid;
60 void use (Vptr mem) { def(mem); }
61 template<class T> void across (T) {}
62 template<class T, class H> void useHint(T,H) {}
63 template<class T, class H> void defHint(T,H) {}
65 bool isValid() { return rv == ResValid::Valid;}
67 Vptr mr;
68 ResValid rv{ResValid::Empty};
72 * Return the Vptr and size for simple insts that read/write memory.
74 * Return a size zero to indicate a non-simple inst, e.g. a call.
76 std::pair<Vptr, uint8_t> getMemOpAndSize(const Vinstr& inst) {
77 GetMemOp g;
78 visitOperands(inst, g);
80 auto const vop = g.mr;
81 if (!g.isValid()) return {vop, 0};
83 auto sz = width(inst.op);
84 // workaround: load and store opcodes report a width of Octa,
85 // while in reality they are Quad.
86 if (inst.op == Vinstr::load || inst.op == Vinstr::store) {
87 sz = Width::Quad;
89 sz &= Width::AnyNF;
91 auto const size = [&] {
92 switch (sz) {
93 case Width::Byte:
94 return sz::byte;
95 case Width::Word: case Width::WordN:
96 return sz::word;
97 case Width::Long: case Width::LongN:
98 return sz::dword;
99 case Width::Quad: case Width::QuadN:
100 return sz::qword;
101 case Width::Octa: case Width::AnyNF:
102 return 16;
103 default: return 0;
105 }();
106 return {vop, size};
110 * Return true if we are sure that `inst1' does not write to the same memory
111 * location that `inst2' reads or writes (and vice versa).
113 * (Note that a false return does /not/ make any positive indication about
114 * aliasing.)
116 bool cannot_alias_write(const Vinstr& inst1, const Vinstr& inst2) {
117 if (!writesMemory(inst1.op) && !writesMemory(inst2.op)) return true;
118 auto const p1 = getMemOpAndSize(inst1);
119 auto const p2 = getMemOpAndSize(inst2);
120 if (p1.second == 0 || p2.second == 0) return false;
121 auto const v1 = p1.first;
122 auto const v2 = p2.first;
123 auto const size1 = p1.second;
124 auto const size2 = p2.second;
125 if (v1.base != v2.base || v1.seg != v2.seg) return false;
126 if ((v1.index != v2.index) ||
127 (v1.index.isValid() && (v1.scale != v2.scale))) {
128 return false;
130 if (v1.disp == v2.disp) return false;
131 if (v1.disp < v2.disp) {
132 return v2.disp >= v1.disp + size1;
133 } else {
134 return v1.disp >= v2.disp + size2;
139 * Metadata about a potentially-foldable load Vinstr.
141 struct FoldableLoadInfo {
143 * Source memory location of the load.
145 const Vptr* operand;
147 * Index into the instruction stream.
149 * Note that this makes FoldableLoadInfo block-context-sensitive.
151 size_t idx;
153 * Whether we need to check for interfering writes to the same location.
155 bool check_writes;
159 * If reg is defined by a load in b with index j < i, and there are no
160 * interfering stores between j and i, return a pointer to its Vptr argument.
162 * Note that during simplification, we can end up with mismatched register
163 * sizes, so that a loadzbq feeds a byte sized instruction. In that case, its
164 * ok to treat the loadzbq as if it were a loadb, provided we're expecting a
165 * byte sized operand. Since Vreg doesn't carry a register-width around, we
166 * pass it in via the `size' param, so that we can verify the zero-extend
167 * variants.
169 FoldableLoadInfo foldable_load_helper(Env& env, Vreg reg, int size,
170 Vlabel b, size_t i, bool mw) {
171 if (!i || env.use_counts[reg] != 1) return { nullptr, 0, false };
173 auto const def_inst = env.def_insts[reg];
174 switch (def_inst) {
175 case Vinstr::loadb:
176 case Vinstr::loadw:
177 case Vinstr::loadl:
178 case Vinstr::load:
179 break;
180 case Vinstr::loadzbq:
181 case Vinstr::loadzbl:
182 if (size != sz::byte) return { nullptr, 0, false };
183 break;
184 case Vinstr::loadzlq:
185 if (size != sz::dword) return { nullptr, 0, false };
186 break;
187 case Vinstr::copy:
188 break;
189 default:
190 return { nullptr, 0, false };
193 #define CHECK_LOAD(load) \
194 case Vinstr::load: \
195 if (inst.load##_.d != reg) break; \
196 return { &inst.load##_.s, i, mw };
198 while (i--) {
199 auto const& inst = env.unit.blocks[b].code[i];
200 if (inst.op == def_inst) {
201 switch (def_inst) {
202 CHECK_LOAD(loadb);
203 CHECK_LOAD(loadw);
204 CHECK_LOAD(loadl);
205 CHECK_LOAD(load);
206 CHECK_LOAD(loadzbq);
207 CHECK_LOAD(loadzbl);
208 CHECK_LOAD(loadzlq);
209 case Vinstr::copy:
210 if (inst.copy_.d != reg) break;
211 return foldable_load_helper(env, inst.copy_.s, size, b, i, mw);
212 default:
213 not_reached();
216 auto const op = env.unit.blocks[b].code[i].op;
217 mw |= writesMemory(op);
220 return { nullptr, 0, false };
223 const Vptr* foldable_load(Env& env, Vreg reg, int size,
224 Vlabel b, size_t i) {
225 auto const info = foldable_load_helper(env, reg, size, b, i, false);
226 if (!info.operand || info.idx + 1 == i) return info.operand;
228 std::vector<Vreg> nonSSARegs;
229 visit(env.unit, *info.operand, [&] (Vreg r, Width) {
230 if (r.isPhys()) nonSSARegs.push_back(r);
232 if (nonSSARegs.empty() && !info.check_writes) return info.operand;
234 // The Vptr contains non-ssa regs, so we need to check that they're
235 // not modified between the load and the use.
236 auto const loadInst = env.unit.blocks[b].code[info.idx];
237 for (auto ix = info.idx; ++ix < i; ) {
238 auto const& inst = env.unit.blocks[b].code[ix];
239 if (isCall(inst)) return nullptr;
240 bool ok = true;
241 if (!nonSSARegs.empty()) {
242 visitDefs(env.unit, inst, [&] (Vreg r, Width) {
243 for (auto const t : nonSSARegs) {
244 if (t == r) ok = false;
247 if (!ok) return nullptr;
249 if (info.check_writes && writesMemory(inst.op) &&
250 !cannot_alias_write(loadInst, inst)) {
251 return nullptr;
254 return info.operand;
257 const Vptr* foldable_load(Env& env, Vreg8 reg, Vlabel b, size_t i) {
258 return foldable_load(env, reg, sz::byte, b, i);
261 const Vptr* foldable_load(Env& env, Vreg16 reg, Vlabel b, size_t i) {
262 return foldable_load(env, reg, sz::word, b, i);
265 const Vptr* foldable_load(Env& env, Vreg32 reg, Vlabel b, size_t i) {
266 return foldable_load(env, reg, sz::dword, b, i);
269 const Vptr* foldable_load(Env& env, Vreg64 reg, Vlabel b, size_t i) {
270 return foldable_load(env, reg, sz::qword, b, i);
273 ///////////////////////////////////////////////////////////////////////////////
276 * Simplify an `inst' at block `b', instruction `i', returning whether or not
277 * any changes were made.
279 * Specializations are below.
281 template <typename Inst>
282 bool simplify(Env&, const Inst& /*inst*/, Vlabel /*b*/, size_t /*i*/) {
283 return false;
286 ///////////////////////////////////////////////////////////////////////////////
288 * Arithmetic instructions.
291 template<typename Test, typename And>
292 bool simplify_and(Env& env, const And& vandq, Vlabel b, size_t i) {
293 return if_inst<Vinstr::testq>(env, b, i + 1, [&] (const testq& vtestq) {
294 // And{s0, s1, tmp, _}; testq{tmp, tmp, sf} -> Test{s0, s1, sf}
295 // where And/Test is either andq/testq, or andqi/testqi.
296 if (!(env.use_counts[vandq.d] == 2 &&
297 env.use_counts[vandq.sf] == 0 &&
298 vtestq.s0 == vandq.d &&
299 vtestq.s1 == vandq.d)) return false;
301 return simplify_impl(env, b, i, [&] (Vout& v) {
302 v << Test{vandq.s0, vandq.s1, vtestq.sf};
303 return 2;
308 bool simplify(Env& env, const andq& vandq, Vlabel b, size_t i) {
309 return simplify_and<testq>(env, vandq, b, i);
312 bool simplify(Env& env, const andqi& vandqi, Vlabel b, size_t i) {
313 return simplify_and<testqi>(env, vandqi, b, i);
317 * Simplify masking values with -1 in andXi{}:
318 * andbi{0xff, s, d} -> copy{s, d}
319 * andli{0xffffffff, s, d} -> copy{s, d}
321 template<typename andi>
322 bool simplify_andi(Env& env, const andi& inst, Vlabel b, size_t i) {
323 if (inst.s0.l() != -1 ||
324 env.use_counts[inst.sf] != 0) return false;
325 return simplify_impl(env, b, i, [&] (Vout& v) {
326 v << copy{inst.s1, inst.d};
327 return 1;
331 bool simplify(Env& env, const andbi& andbi, Vlabel b, size_t i) {
332 return simplify_andi(env, andbi, b, i);
335 bool simplify(Env& env, const andli& andli, Vlabel b, size_t i) {
336 return simplify_andi(env, andli, b, i);
339 ////////////////////////////////////////////////////////////////////////////////
341 * Narrow compares
345 * Return a bound on the size of a Vreg.
346 * If its known to fit in:
347 * - 8 unsigned bits, return sz::byte
348 * - 16 unsigned bits, return sz::word
349 * - 32 unsigned bits, return sz::dword
350 * - otherwise return sz::qword.
352 int value_width(Env& env, Vreg reg) {
353 auto const it = env.unit.regToConst.find(reg);
354 if (it != env.unit.regToConst.end()) {
355 if (!it->second.isUndef &&
356 it->second.kind != Vconst::Double) {
357 if (it->second.val <= 0xff) return sz::byte;
358 if (it->second.val <= 0xffff) return sz::word;
359 if (it->second.val <= 0xffffffff) return sz::dword;
361 return sz::qword;
364 switch (env.def_insts[reg]) {
365 case Vinstr::loadzbq:
366 case Vinstr::loadzbl:
367 case Vinstr::movzbw:
368 case Vinstr::movzbl:
369 case Vinstr::movzbq:
370 return sz::byte;
372 case Vinstr::movzwl:
373 case Vinstr::movzwq:
374 return sz::word;
376 case Vinstr::movzlq:
377 case Vinstr::loadzlq:
378 return sz::dword;
379 default:
380 break;
383 return sz::qword;
387 * Return a suitable register for r, with width size.
389 * - if r is constant, just return a constant.
390 * - if r is the result of a zero-extending move from the
391 * correct size, return the source of the move
392 * - if r is the result of a zero-extending load with one use,
393 * convert the load to a non-zero extending form
394 * - otherwise apply a movtq<sz> to the register, and return the dst.
396 Vreg narrow_reg(Env& env, int size, Vreg r, Vlabel b, size_t i, Vout& v) {
397 auto const it = env.unit.regToConst.find(r);
398 if (it != env.unit.regToConst.end()) {
399 assertx(!it->second.isUndef && it->second.kind != Vconst::Double);
400 return r;
403 while (i--) {
404 auto replace = [&] (const Vinstr& rep) {
405 return simplify_impl(env, b, i, [&] (Vout& v) {
406 v << rep;
407 return 1;
410 auto match = [&] (int rsz, size_t rn) {
411 if (rsz != size) return Vreg{};
412 if (env.use_counts[r] == 1) {
413 replace(nop{});
415 return Vreg{ rn };
418 auto const& inst = env.unit.blocks[b].code[i];
419 switch (inst.op) {
420 case Vinstr::loadzbq: {
421 auto const& load = inst.get<Vinstr::loadzbq>();
422 if (load.d == r) {
423 if (size == sz::byte && env.use_counts[r] == 1) {
424 replace(loadb{load.s, r});
425 return r;
427 return {};
429 break;
431 case Vinstr::loadzbl: {
432 auto const& load = inst.get<Vinstr::loadzbl>();
433 if (load.d == r) {
434 if (size == sz::byte && env.use_counts[r] == 1) {
435 replace(loadb{load.s, r});
436 return r;
438 return {};
440 break;
442 case Vinstr::movzbw:
443 if (inst.movzbw_.d == r) return match(sz::byte, inst.movzbw_.s);
444 break;
445 case Vinstr::movzbl:
446 if (inst.movzbl_.d == r) return match(sz::byte, inst.movzbl_.s);
447 break;
448 case Vinstr::movzbq:
449 if (inst.movzbq_.d == r) return match(sz::byte, inst.movzbq_.s);
450 break;
452 case Vinstr::movzwl:
453 if (inst.movzwl_.d == r) return match(sz::word, inst.movzwl_.s);
454 break;
455 case Vinstr::movzwq:
456 if (inst.movzwq_.d == r) return match(sz::word, inst.movzwq_.s);
457 break;
459 case Vinstr::movzlq:
460 if (inst.movzlq_.d == r) return match(sz::dword, inst.movzlq_.s);
461 break;
463 case Vinstr::loadzlq: {
464 auto const& load = inst.get<Vinstr::loadzlq>();
465 if (load.d == r) {
466 if (size == sz::dword && env.use_counts[r] == 1) {
467 replace(loadl{load.s, r});
468 return r;
470 return {};
472 break;
474 default:
475 break;
478 return {};
481 int narrow_cmp(Env& env, int size, const cmpq& vcmp, Vlabel b, size_t i,
482 Vout& v) {
483 auto getreg = [&] (Vreg reg, Vout& v) {
484 auto out = narrow_reg(env, size, reg, b, i, v);
485 if (!out.isValid()) {
486 out = v.makeReg();
487 switch (size) {
488 case sz::byte:
489 v << movtqb{reg, out};
490 break;
491 case sz::word:
492 v << movtqw{reg, out};
493 break;
494 case sz::dword:
495 v << movtql{reg, out};
496 break;
497 default:
498 always_assert(false);
501 return out;
503 auto const s0 = getreg(vcmp.s0, v);
504 auto const s1 = getreg(vcmp.s1, v);
505 switch (size) {
506 case sz::byte:
507 v << cmpb{s0, s1, vcmp.sf};
508 break;
509 case sz::word:
510 v << cmpw{s0, s1, vcmp.sf};
511 break;
512 case sz::dword:
513 v << cmpl{s0, s1, vcmp.sf};
514 break;
515 default:
516 always_assert(false);
518 return 1;
521 enum class SFUsage {
523 Fixable,
524 Unfixable
528 * Check whether all uses of sf are suitable, or can be converted to
529 * suitable uses, as determined by fun.
531 * fun takes a CC as its argument, and returns
532 * - CC_None to indicate the usage is unsuitable
533 * - The input cc to indicate that its already suitable
534 * - Some other cc to indicate that we could convert to that.
536 * If fix is true, this will convert any uses as specified.
538 template<class F>
539 SFUsage check_sf_usage_helper(Env& env, Vreg sf, Vlabel b, size_t i, bool fix,
540 F fun) {
541 auto ret = SFUsage::Ok;
542 auto uses = env.use_counts[sf];
543 while (uses) {
544 auto const& code = env.unit.blocks[b].code;
545 if (++i == code.size()) return SFUsage::Unfixable;
546 if (getSFUseReg(code[i]) == sf) {
547 uses--;
548 auto tmp = code[i];
549 auto &cc = getConditionCode(tmp);
550 auto const newCC = fun(cc);
551 if (newCC == CC_None) {
552 return SFUsage::Unfixable;
554 if (newCC != cc) {
555 if (fix) {
556 cc = newCC;
557 simplify_impl(env, b, i, [&] (Vout& v) {
558 v << tmp;
559 return 1;
562 ret = SFUsage::Fixable;
566 return ret;
569 template<class F>
570 bool check_sf_usage(Env& env, Vreg sf, Vlabel b, size_t i, F fun) {
571 switch (check_sf_usage_helper(env, sf, b, i, false, fun)) {
572 case SFUsage::Ok:
573 return true;
574 case SFUsage::Fixable:
575 check_sf_usage_helper(env, sf, b, i, true, fun);
576 return true;
577 case SFUsage::Unfixable:
578 return false;
580 not_reached();
583 // Try to change the condition codes so they work when the inputs to
584 // the compare are swapped. Return true if successful (or there was
585 // nothing to do).
586 bool flip_ordered_uses(Env& env, Vreg sf, Vlabel b, size_t i) {
587 return check_sf_usage(
588 env, sf, b, i,
589 [] (ConditionCode cc) {
590 switch (cc) {
591 case CC_None:
592 always_assert(false);
593 case CC_O:
594 case CC_NO:
595 // can't be fixed
596 return CC_None;
597 case CC_B: return CC_A;
598 case CC_AE: return CC_BE;
599 case CC_BE: return CC_AE;
600 case CC_A: return CC_B;
602 case CC_E:
603 case CC_NE:
604 // already unordered
605 return cc;
606 case CC_S:
607 case CC_NS:
608 case CC_P:
609 case CC_NP:
610 // can't be fixed
611 return CC_None;
612 case CC_L: return CC_G;
613 case CC_GE: return CC_LE;
614 case CC_LE: return CC_GE;
615 case CC_G: return CC_L;
617 not_reached();
622 // Change any signed conditions to the corresponding unsigned
623 // condition. Return true if successful (or there was nothing to do).
624 bool fix_signed_uses(Env& env, Vreg sf, Vlabel b, size_t i) {
625 return check_sf_usage(
626 env, sf, b, i,
627 [] (ConditionCode cc) {
628 switch (cc) {
629 case CC_None:
630 always_assert(false);
631 case CC_O:
632 case CC_NO:
633 // can't be fixed
634 return CC_None;
635 case CC_B:
636 case CC_AE:
637 case CC_E:
638 case CC_NE:
639 case CC_BE:
640 case CC_A:
641 // already unsigned
642 return cc;
643 case CC_S:
644 case CC_NS:
645 case CC_P:
646 case CC_NP:
647 // can't be fixed
648 return CC_None;
649 case CC_L: return CC_B;
650 case CC_GE: return CC_AE;
651 case CC_LE: return CC_BE;
652 case CC_G: return CC_A;
654 not_reached();
660 * Return a (Vptr*, idx in `b') handle to the first store of `reg' within block
661 * `b' after position `i'.
663 * Returns { nullptr, 0 } if no such store is found, or if that store is not
664 * the only use of `reg'.
666 std::pair<const Vptr*, size_t>
667 storeq(Env& env, Vreg64 reg, Vlabel b, size_t i) {
668 if (env.use_counts[reg] != 1) return { nullptr, 0 };
670 while (++i < env.unit.blocks[b].code.size()) {
671 auto const& inst = env.unit.blocks[b].code[i];
672 if (inst.op == Vinstr::store && inst.store_.s == reg) {
673 return { &(inst.store_.d), i };
676 return { nullptr, 0 };
679 bool is_reg_const(Env& env, Vreg reg) {
680 return env.unit.regToConst.find(reg) != env.unit.regToConst.end();
683 template <typename Op>
684 bool flip_operands_helper(Env& env, const Op& op, Vlabel b, size_t i) {
685 // We want any constant register to be in the first operand, and any foldable
686 // load to be in the second.
687 if (!(is_reg_const(env, op.s0) || foldable_load(env, op.s1, b, i)) &&
688 (is_reg_const(env, op.s1) || foldable_load(env, op.s0, b, i))) {
689 if (flip_ordered_uses(env, op.sf, b, i)) {
690 return simplify_impl(env, b, i, Op { op.s1, op.s0, op.sf });
693 return false;
696 bool simplify(Env& env, const addq& vadd, Vlabel b, size_t i) {
697 if (arch_any(Arch::ARM, Arch::PPC64)) return false;
699 auto stPair = storeq(env, vadd.d, b, i);
700 auto const vptrs = stPair.first;
701 auto const stIdx = stPair.second;
703 if (vptrs) {
704 auto const vptrl = foldable_load(env, vadd.s0, b, stIdx);
705 if (vptrl && (*vptrl == *vptrs)) {
706 bool ret = simplify_impl(env, b, i, addqrm { vadd.s1, *vptrs, vadd.sf });
707 // need to do it here because a store will not be removed as dead code
708 if (ret) env.unit.blocks[b].code[stIdx] = nop{};
709 return ret;
711 auto const vptrl2 = foldable_load(env, vadd.s1, b, stIdx);
712 if (vptrl2 && (*vptrl2 == *vptrs)) {
713 bool ret = simplify_impl(env, b, i, addqrm { vadd.s0, *vptrs, vadd.sf });
714 if (ret) env.unit.blocks[b].code[stIdx] = nop{};
715 return ret;
718 if (auto const vptr = foldable_load(env, vadd.s0, b, i)) {
719 return simplify_impl(env, b, i, addqmr{*vptr, vadd.s1, vadd.d, vadd.sf});
721 if (auto const vptr = foldable_load(env, vadd.s1, b, i)) {
722 return simplify_impl(env, b, i, addqmr{*vptr, vadd.s0, vadd.d, vadd.sf});
724 return false;
727 bool simplify(Env& env, const cmpq& vcmp, Vlabel b, size_t i) {
728 if (flip_operands_helper(env, vcmp, b, i)) return true;
730 if (!arch_any(Arch::ARM, Arch::PPC64)) {
731 if (auto const vptr = foldable_load(env, vcmp.s1, b, i)) {
732 return simplify_impl(env, b, i, cmpqm { vcmp.s0, *vptr, vcmp.sf });
734 if (auto const vptr = foldable_load(env, vcmp.s0, b, i)) {
735 if (flip_ordered_uses(env, vcmp.sf, b, i)) {
736 return simplify_impl(env, b, i, cmpqm { vcmp.s1, *vptr, vcmp.sf });
741 auto const sz0 = value_width(env, vcmp.s0);
742 if (sz0 == sz::qword) return false;
743 auto const sz1 = value_width(env, vcmp.s1);
744 if (sz1 == sz::qword) return false;
746 auto const sz = sz1 > sz0 ? sz1 : sz0;
748 if (!fix_signed_uses(env, vcmp.sf, b, i)) return false;
750 return simplify_impl(env, b, i, [&] (Vout& v) {
751 return narrow_cmp(env, sz, vcmp, b, i, v);
755 bool simplify(Env& env, const cmpl& vcmp, Vlabel b, size_t i) {
756 if (flip_operands_helper(env, vcmp, b, i)) return true;
758 if (arch() == Arch::ARM) return false;
759 if (auto const vptr = foldable_load(env, vcmp.s1, b, i)) {
760 return simplify_impl(env, b, i,
761 cmplm { vcmp.s0, *vptr, vcmp.sf });
763 return false;
766 bool simplify(Env& env, const cmpw& vcmp, Vlabel b, size_t i) {
767 if (flip_operands_helper(env, vcmp, b, i)) return true;
769 if (auto const vptr = foldable_load(env, vcmp.s1, b, i)) {
770 return simplify_impl(env, b, i,
771 cmpwm { vcmp.s0, *vptr, vcmp.sf });
773 return false;
776 bool simplify(Env& env, const cmpb& vcmp, Vlabel b, size_t i) {
777 if (flip_operands_helper(env, vcmp, b, i)) return true;
779 if (auto const vptr = foldable_load(env, vcmp.s1, b, i)) {
780 return simplify_impl(env, b, i,
781 cmpbm { vcmp.s0, *vptr, vcmp.sf });
783 return false;
786 bool simplify(Env& env, const cmpqi& vcmp, Vlabel b, size_t i) {
787 if (arch_any(Arch::ARM, Arch::PPC64)) return false;
788 if (auto const vptr = foldable_load(env, vcmp.s1, b, i)) {
789 return simplify_impl(env, b, i,
790 cmpqim { vcmp.s0, *vptr, vcmp.sf });
792 return false;
795 bool simplify(Env& env, const cmpli& vcmp, Vlabel b, size_t i) {
796 if (arch() == Arch::ARM) return false;
797 if (auto const vptr = foldable_load(env, vcmp.s1, b, i)) {
798 return simplify_impl(env, b, i,
799 cmplim { vcmp.s0, *vptr, vcmp.sf });
801 return false;
804 bool simplify(Env& env, const cmpwi& vcmp, Vlabel b, size_t i) {
805 if (arch() == Arch::ARM) return false;
806 if (auto const vptr = foldable_load(env, vcmp.s1, b, i)) {
807 return simplify_impl(env, b, i,
808 cmpwim { vcmp.s0, *vptr, vcmp.sf });
810 return false;
813 bool simplify(Env& env, const cmpbi& vcmp, Vlabel b, size_t i) {
814 if (arch_any(Arch::ARM, Arch::PPC64)) return false;
815 if (auto const vptr = foldable_load(env, vcmp.s1, b, i)) {
816 return simplify_impl(env, b, i,
817 cmpbim { vcmp.s0, *vptr, vcmp.sf });
819 return false;
823 ////////////////////////////////////////////////////////////////////////////////
825 * Conditional operations.
828 bool simplify(Env& env, const setcc& vsetcc, Vlabel b, size_t i) {
829 return if_inst<Vinstr::xorbi>(env, b, i + 1, [&] (const xorbi& vxorbi) {
830 // setcc{cc, _, tmp}; xorbi{1, tmp, d, _}; -> setcc{~cc, _, tmp};
831 if (!(env.use_counts[vsetcc.d] == 1 &&
832 vxorbi.s0.b() == 1 &&
833 vxorbi.s1 == vsetcc.d &&
834 env.use_counts[vxorbi.sf] == 0)) return false;
836 return simplify_impl(env, b, i, [&] (Vout& v) {
837 v << setcc{ccNegate(vsetcc.cc), vsetcc.sf, vxorbi.d};
838 return 2;
844 * Fold a cmov of a certain width into a copy if both values are the same
845 * register or have the same known constant value.
847 template <typename Inst>
848 bool cmov_fold_impl(Env& env, const Inst& inst, Vlabel b, size_t i) {
849 auto const equivalent = [&]{
850 if (inst.t == inst.f) return true;
852 auto const t_it = env.unit.regToConst.find(inst.t);
853 if (t_it == env.unit.regToConst.end()) return false;
854 auto const f_it = env.unit.regToConst.find(inst.f);
855 if (f_it == env.unit.regToConst.end()) return false;
857 auto const t_const = t_it->second;
858 auto const f_const = f_it->second;
859 if (t_const.isUndef || f_const.isUndef) return false;
860 if (t_const.kind != f_const.kind) return false;
861 return t_const.val == f_const.val;
862 }();
863 if (!equivalent) return false;
865 return simplify_impl(
866 env, b, i,
867 [&] (Vout& v) {
868 v << copy{inst.t, inst.d};
869 return 1;
875 * Turn a cmov of a certain width into a matching setcc instruction if the
876 * conditions are correct (both sources are constants of value 0 or 1).
878 template <typename Inst, typename Extend>
879 bool cmov_setcc_impl(Env& env, const Inst& inst, Vlabel b,
880 size_t i, Extend extend) {
881 auto const t_it = env.unit.regToConst.find(inst.t);
882 if (t_it == env.unit.regToConst.end()) return false;
883 auto const f_it = env.unit.regToConst.find(inst.f);
884 if (f_it == env.unit.regToConst.end()) return false;
886 auto const check_const = [](Vconst c, bool& val) {
887 if (c.isUndef) return false;
888 switch (c.kind) {
889 case Vconst::Quad:
890 case Vconst::Long:
891 case Vconst::Byte:
892 if (c.val == 0) {
893 val = false;
894 return true;
895 } else if (c.val == 1) {
896 val = true;
897 return true;
898 } else {
899 return false;
901 case Vconst::Double:
902 return false;
904 not_reached();
907 bool t_val;
908 if (!check_const(t_it->second, t_val)) return false;
909 bool f_val;
910 if (!check_const(f_it->second, f_val)) return false;
912 return simplify_impl(env, b, i, [&] (Vout& v) {
913 auto const d = env.unit.makeReg();
914 if (t_val == f_val) {
915 v << copy{env.unit.makeConst(t_val), d};
916 } else if (t_val) {
917 v << setcc{inst.cc, inst.sf, d};
918 } else {
919 v << setcc{ccNegate(inst.cc), inst.sf, d};
921 extend(v, d, inst.d);
922 return 1;
926 bool simplify(Env& env, const cmovb& inst, Vlabel b, size_t i) {
927 if (cmov_fold_impl(env, inst, b, i)) return true;
928 return cmov_setcc_impl(
929 env, inst, b, i,
930 [](Vout& v, Vreg8 src, Vreg dest) { v << copy{src, dest}; }
934 bool simplify(Env& env, const cmovw& inst, Vlabel b, size_t i) {
935 if (cmov_fold_impl(env, inst, b, i)) return true;
936 return cmov_setcc_impl(
937 env, inst, b, i,
938 [](Vout& v, Vreg8 src, Vreg dest) { v << movzbw{src, dest}; }
942 bool simplify(Env& env, const cmovl& inst, Vlabel b, size_t i) {
943 if (cmov_fold_impl(env, inst, b, i)) return true;
944 return cmov_setcc_impl(
945 env, inst, b, i,
946 [](Vout& v, Vreg8 src, Vreg dest) { v << movzbl{src, dest}; }
950 bool simplify(Env& env, const cmovq& inst, Vlabel b, size_t i) {
951 if (cmov_fold_impl(env, inst, b, i)) return true;
952 return cmov_setcc_impl(
953 env, inst, b, i,
954 [](Vout& v, Vreg8 src, Vreg dest) { v << movzbq{src, dest}; }
958 ////////////////////////////////////////////////////////////////////////////////
960 * Copies, loads, and stores.
963 bool simplify(Env& env, const copyargs& inst, Vlabel b, size_t i) {
964 auto const& srcs = env.unit.tuples[inst.s];
965 auto const& dsts = env.unit.tuples[inst.d];
966 assertx(srcs.size() == dsts.size());
968 for (auto const src : srcs) {
969 for (auto const dst : dsts) {
970 if (src == dst) return false;
974 // If the srcs and dsts don't intersect, simplify to a sequence of copies.
975 return simplify_impl(env, b, i, [&] (Vout& v) {
976 for (auto i = 0; i < srcs.size(); ++i) {
977 v << copy{srcs[i], dsts[i]};
979 return 1;
984 * Simplify load followed by truncation:
985 * load{s, tmp}; movtqb{tmp, d} -> loadtqb{s, d}
986 * load{s, tmp}; movtql{tmp, d} -> loadtql{s, d}
987 * loadzbq{s, tmp}; movtqb{tmp, d} -> loadb{s, d}
988 * loadzlq{s, tmp}; movtql{tmp, d} -> loadl{s, d}
990 template<Vinstr::Opcode mov_op, typename loadt, typename load_in>
991 bool simplify_load_truncate(Env& env, const load_in& load, Vlabel b, size_t i) {
992 if (env.use_counts[load.d] != 1) return false;
993 auto const& code = env.unit.blocks[b].code;
994 if (i + 1 >= code.size()) return false;
996 return if_inst<mov_op>(env, b, i + 1, [&] (const op_type<mov_op>& mov) {
997 if (load.d != mov.s) return false;
998 return simplify_impl(env, b, i, [&] (Vout& v) {
999 v << loadt{load.s, mov.d};
1000 return 2;
1005 bool simplify(Env& env, const load& load, Vlabel b, size_t i) {
1006 return
1007 simplify_load_truncate<Vinstr::movtqb, loadtqb>(env, load, b, i) ||
1008 simplify_load_truncate<Vinstr::movtql, loadtql>(env, load, b, i);
1011 bool simplify(Env& env, const loadzbq& load, Vlabel b, size_t i) {
1012 return simplify_load_truncate<Vinstr::movtqb, loadb>(env, load, b, i);
1015 bool simplify(Env& env, const loadzlq& load, Vlabel b, size_t i) {
1016 return simplify_load_truncate<Vinstr::movtql, loadl>(env, load, b, i);
1019 bool simplify(Env& env, const movzlq& inst, Vlabel b, size_t i) {
1020 auto const def_op = env.def_insts[inst.s];
1022 // Check if `inst.s' was defined by an instruction with Vreg32 operands, or
1023 // movzbl{} in particular (which lowers to a movl{}).
1024 if (width(def_op) != Width::Long &&
1025 def_op != Vinstr::movzbl) {
1026 return false;
1029 // If so, the movzlq{} is redundant---instructions on 32-bit registers on x64
1030 // always zero the upper bits.
1031 return simplify_impl(env, b, i, [&] (Vout& v) {
1032 v << copy{inst.s, inst.d};
1033 return 1;
1037 ///////////////////////////////////////////////////////////////////////////////
1039 * Pushes and pops.
1042 bool simplify(Env& env, const pop& inst, Vlabel b, size_t i) {
1043 if (env.use_counts[inst.d]) return false;
1045 // Convert to an lea when popping to a reg without any uses.
1046 return simplify_impl(env, b, i, [&] (Vout& v) {
1047 v << lea{reg::rsp[8], reg::rsp};
1048 return 1;
1052 ///////////////////////////////////////////////////////////////////////////////
1054 // Try to do constant folding on Vptr operands.
1056 struct SimplifyVptrVisit {
1057 explicit SimplifyVptrVisit(Env& env) : env{env} {}
1058 template<class T> void def(T) {}
1059 template<class T> void imm(const T&) {}
1060 template<class T> void across(T) {}
1061 template<class T, class H> void defHint(T, H) {}
1062 template<class T, class H> void useHint(T r, H) { use(r); }
1063 template <typename T> void use(T) {}
1065 void use(Vptr& ptr) {
1066 // If the base is a constant, we can fold it into the displacement (if it
1067 // fits).
1068 if (ptr.base.isValid()) {
1069 auto const it = env.unit.regToConst.find(ptr.base);
1070 if (it != env.unit.regToConst.end()) {
1071 auto const newDisp = static_cast<int64_t>(ptr.disp) + it->second.val;
1072 if (deltaFits(newDisp, sz::dword)) {
1073 ptr.disp = newDisp;
1074 ptr.base = Vreg{};
1075 changed = true;
1080 // If the index is a constant, we can fold it into the displacement (if it
1081 // fits).
1082 if (ptr.index.isValid()) {
1083 auto const it = env.unit.regToConst.find(ptr.index);
1084 if (it != env.unit.regToConst.end()) {
1085 auto const newDisp =
1086 static_cast<int64_t>(ptr.disp) + it->second.val * ptr.scale;
1087 if (deltaFits(newDisp, sz::dword)) {
1088 ptr.disp = newDisp;
1089 ptr.index = Vreg{};
1090 ptr.scale = 1;
1091 changed = true;
1097 Env& env;
1098 bool changed = false;
1101 bool simplify_vptr(Env& env, Vinstr& inst) {
1102 SimplifyVptrVisit v{env};
1103 visitOperands(inst, v);
1104 return v.changed;
1107 ///////////////////////////////////////////////////////////////////////////////
1110 * Perform peephole simplification at instruction `i' of block `b'.
1112 * Return true if changes were made, else false.
1114 bool simplify(Env& env, Vlabel b, size_t i) {
1115 assertx(i <= env.unit.blocks[b].code.size());
1116 auto& inst = env.unit.blocks[b].code[i];
1118 auto const changed = simplify_vptr(env, inst);
1120 switch (inst.op) {
1121 #define O(name, ...) \
1122 case Vinstr::name: \
1123 return simplify(env, inst.name##_, b, i) || changed; \
1125 VASM_OPCODES
1126 #undef O
1128 not_reached();
1132 * Perform architecture-specific peephole simplification.
1134 bool simplify_arch(Env& env, Vlabel b, size_t i) {
1135 return ARCH_SWITCH_CALL(simplify, env, b, i);
1138 ///////////////////////////////////////////////////////////////////////////////
1143 * Peephole simplification pass for a Vunit.
1145 void simplify(Vunit& unit) {
1146 assertx(check(unit));
1147 auto& blocks = unit.blocks;
1149 Env env { unit };
1150 env.use_counts.resize(unit.next_vr);
1151 env.def_insts.resize(unit.next_vr, Vinstr::nop);
1153 auto const labels = sortBlocks(unit);
1155 // Set up Env, only visiting reachable blocks.
1156 for (auto const b : labels) {
1157 assertx(!blocks[b].code.empty());
1159 for (auto const& inst : blocks[b].code) {
1160 visitDefs(unit, inst, [&] (Vreg r) { env.def_insts[r] = inst.op; });
1161 visitUses(unit, inst, [&] (Vreg r) { ++env.use_counts[r]; });
1165 // The simplify() implementations may allocate scratch blocks and modify
1166 // instruction streams, so we cannot use standard iterators here.
1167 for (auto const b : labels) {
1168 for (size_t i = 0; i < blocks[b].code.size(); ++i) {
1169 // Simplify at this index until no changes are made.
1170 while (simplify(env, b, i) || simplify_arch(env, b, i)) {
1171 // Stop if we simplified away the tail of the block.
1172 if (i >= blocks[b].code.size()) break;
1177 printUnit(kVasmSimplifyLevel, "after vasm simplify", unit);
1180 ///////////////////////////////////////////////////////////////////////////////