2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #include "hphp/runtime/vm/jit/vasm.h"
18 #include "hphp/runtime/vm/jit/vasm-simplify-internal.h"
20 #include "hphp/runtime/vm/jit/containers.h"
21 #include "hphp/runtime/vm/jit/vasm-gen.h"
22 #include "hphp/runtime/vm/jit/vasm-info.h"
23 #include "hphp/runtime/vm/jit/vasm-instr.h"
24 #include "hphp/runtime/vm/jit/vasm-print.h"
25 #include "hphp/runtime/vm/jit/vasm-unit.h"
26 #include "hphp/runtime/vm/jit/vasm-util.h"
27 #include "hphp/runtime/vm/jit/vasm-visit.h"
29 #include "hphp/util/arch.h"
30 #include "hphp/util/asm-x64.h"
36 namespace HPHP
{ namespace jit
{
40 ///////////////////////////////////////////////////////////////////////////////
42 enum class ResValid
: uint8_t {
49 template<class T
> void imm (T
) {}
50 template<class T
> void def (T
) {}
51 template<class T
> void use (T
) {}
53 if (rv
!= ResValid::Empty
) {
54 rv
= ResValid::Invalid
;
60 void use (Vptr mem
) { def(mem
); }
61 template<class T
> void across (T
) {}
62 template<class T
, class H
> void useHint(T
,H
) {}
63 template<class T
, class H
> void defHint(T
,H
) {}
65 bool isValid() { return rv
== ResValid::Valid
;}
68 ResValid rv
{ResValid::Empty
};
72 * Return the Vptr and size for simple insts that read/write memory.
74 * Return a size zero to indicate a non-simple inst, e.g. a call.
76 std::pair
<Vptr
, uint8_t> getMemOpAndSize(const Vinstr
& inst
) {
78 visitOperands(inst
, g
);
80 auto const vop
= g
.mr
;
81 if (!g
.isValid()) return {vop
, 0};
83 auto sz
= width(inst
.op
);
84 // workaround: load and store opcodes report a width of Octa,
85 // while in reality they are Quad.
86 if (inst
.op
== Vinstr::load
|| inst
.op
== Vinstr::store
) {
91 auto const size
= [&] {
95 case Width::Word
: case Width::WordN
:
97 case Width::Long
: case Width::LongN
:
99 case Width::Quad
: case Width::QuadN
:
101 case Width::Octa
: case Width::AnyNF
:
110 * Return true if we are sure that `inst1' does not write to the same memory
111 * location that `inst2' reads or writes (and vice versa).
113 * (Note that a false return does /not/ make any positive indication about
116 bool cannot_alias_write(const Vinstr
& inst1
, const Vinstr
& inst2
) {
117 if (!writesMemory(inst1
.op
) && !writesMemory(inst2
.op
)) return true;
118 auto const p1
= getMemOpAndSize(inst1
);
119 auto const p2
= getMemOpAndSize(inst2
);
120 if (p1
.second
== 0 || p2
.second
== 0) return false;
121 auto const v1
= p1
.first
;
122 auto const v2
= p2
.first
;
123 auto const size1
= p1
.second
;
124 auto const size2
= p2
.second
;
125 if (v1
.base
!= v2
.base
|| v1
.seg
!= v2
.seg
) return false;
126 if ((v1
.index
!= v2
.index
) ||
127 (v1
.index
.isValid() && (v1
.scale
!= v2
.scale
))) {
130 if (v1
.disp
== v2
.disp
) return false;
131 if (v1
.disp
< v2
.disp
) {
132 return v2
.disp
>= v1
.disp
+ size1
;
134 return v1
.disp
>= v2
.disp
+ size2
;
139 * Metadata about a potentially-foldable load Vinstr.
141 struct FoldableLoadInfo
{
143 * Source memory location of the load.
147 * Index into the instruction stream.
149 * Note that this makes FoldableLoadInfo block-context-sensitive.
153 * Whether we need to check for interfering writes to the same location.
159 * If reg is defined by a load in b with index j < i, and there are no
160 * interfering stores between j and i, return a pointer to its Vptr argument.
162 * Note that during simplification, we can end up with mismatched register
163 * sizes, so that a loadzbq feeds a byte sized instruction. In that case, its
164 * ok to treat the loadzbq as if it were a loadb, provided we're expecting a
165 * byte sized operand. Since Vreg doesn't carry a register-width around, we
166 * pass it in via the `size' param, so that we can verify the zero-extend
169 FoldableLoadInfo
foldable_load_helper(Env
& env
, Vreg reg
, int size
,
170 Vlabel b
, size_t i
, bool mw
) {
171 if (!i
|| env
.use_counts
[reg
] != 1) return { nullptr, 0, false };
173 auto const def_inst
= env
.def_insts
[reg
];
180 case Vinstr::loadzbq
:
181 case Vinstr::loadzbl
:
182 if (size
!= sz::byte
) return { nullptr, 0, false };
184 case Vinstr::loadzlq
:
185 if (size
!= sz::dword
) return { nullptr, 0, false };
190 return { nullptr, 0, false };
193 #define CHECK_LOAD(load) \
195 if (inst.load##_.d != reg) break; \
196 return { &inst.load##_.s, i, mw };
199 auto const& inst
= env
.unit
.blocks
[b
].code
[i
];
200 if (inst
.op
== def_inst
) {
210 if (inst
.copy_
.d
!= reg
) break;
211 return foldable_load_helper(env
, inst
.copy_
.s
, size
, b
, i
, mw
);
216 auto const op
= env
.unit
.blocks
[b
].code
[i
].op
;
217 mw
|= writesMemory(op
);
220 return { nullptr, 0, false };
223 const Vptr
* foldable_load(Env
& env
, Vreg reg
, int size
,
224 Vlabel b
, size_t i
) {
225 auto const info
= foldable_load_helper(env
, reg
, size
, b
, i
, false);
226 if (!info
.operand
|| info
.idx
+ 1 == i
) return info
.operand
;
228 std::vector
<Vreg
> nonSSARegs
;
229 visit(env
.unit
, *info
.operand
, [&] (Vreg r
, Width
) {
230 if (r
.isPhys()) nonSSARegs
.push_back(r
);
232 if (nonSSARegs
.empty() && !info
.check_writes
) return info
.operand
;
234 // The Vptr contains non-ssa regs, so we need to check that they're
235 // not modified between the load and the use.
236 auto const loadInst
= env
.unit
.blocks
[b
].code
[info
.idx
];
237 for (auto ix
= info
.idx
; ++ix
< i
; ) {
238 auto const& inst
= env
.unit
.blocks
[b
].code
[ix
];
239 if (isCall(inst
)) return nullptr;
241 if (!nonSSARegs
.empty()) {
242 visitDefs(env
.unit
, inst
, [&] (Vreg r
, Width
) {
243 for (auto const t
: nonSSARegs
) {
244 if (t
== r
) ok
= false;
247 if (!ok
) return nullptr;
249 if (info
.check_writes
&& writesMemory(inst
.op
) &&
250 !cannot_alias_write(loadInst
, inst
)) {
257 const Vptr
* foldable_load(Env
& env
, Vreg8 reg
, Vlabel b
, size_t i
) {
258 return foldable_load(env
, reg
, sz::byte
, b
, i
);
261 const Vptr
* foldable_load(Env
& env
, Vreg16 reg
, Vlabel b
, size_t i
) {
262 return foldable_load(env
, reg
, sz::word
, b
, i
);
265 const Vptr
* foldable_load(Env
& env
, Vreg32 reg
, Vlabel b
, size_t i
) {
266 return foldable_load(env
, reg
, sz::dword
, b
, i
);
269 const Vptr
* foldable_load(Env
& env
, Vreg64 reg
, Vlabel b
, size_t i
) {
270 return foldable_load(env
, reg
, sz::qword
, b
, i
);
273 ///////////////////////////////////////////////////////////////////////////////
276 * Simplify an `inst' at block `b', instruction `i', returning whether or not
277 * any changes were made.
279 * Specializations are below.
281 template <typename Inst
>
282 bool simplify(Env
&, const Inst
& /*inst*/, Vlabel
/*b*/, size_t /*i*/) {
286 ///////////////////////////////////////////////////////////////////////////////
288 * Arithmetic instructions.
291 template<typename Test
, typename And
>
292 bool simplify_and(Env
& env
, const And
& vandq
, Vlabel b
, size_t i
) {
293 return if_inst
<Vinstr::testq
>(env
, b
, i
+ 1, [&] (const testq
& vtestq
) {
294 // And{s0, s1, tmp, _}; testq{tmp, tmp, sf} -> Test{s0, s1, sf}
295 // where And/Test is either andq/testq, or andqi/testqi.
296 if (!(env
.use_counts
[vandq
.d
] == 2 &&
297 env
.use_counts
[vandq
.sf
] == 0 &&
298 vtestq
.s0
== vandq
.d
&&
299 vtestq
.s1
== vandq
.d
)) return false;
301 return simplify_impl(env
, b
, i
, [&] (Vout
& v
) {
302 v
<< Test
{vandq
.s0
, vandq
.s1
, vtestq
.sf
};
308 bool simplify(Env
& env
, const andq
& vandq
, Vlabel b
, size_t i
) {
309 return simplify_and
<testq
>(env
, vandq
, b
, i
);
312 bool simplify(Env
& env
, const andqi
& vandqi
, Vlabel b
, size_t i
) {
313 return simplify_and
<testqi
>(env
, vandqi
, b
, i
);
317 * Simplify masking values with -1 in andXi{}:
318 * andbi{0xff, s, d} -> copy{s, d}
319 * andli{0xffffffff, s, d} -> copy{s, d}
321 template<typename andi
>
322 bool simplify_andi(Env
& env
, const andi
& inst
, Vlabel b
, size_t i
) {
323 if (inst
.s0
.l() != -1 ||
324 env
.use_counts
[inst
.sf
] != 0) return false;
325 return simplify_impl(env
, b
, i
, [&] (Vout
& v
) {
326 v
<< copy
{inst
.s1
, inst
.d
};
331 bool simplify(Env
& env
, const andbi
& andbi
, Vlabel b
, size_t i
) {
332 return simplify_andi(env
, andbi
, b
, i
);
335 bool simplify(Env
& env
, const andli
& andli
, Vlabel b
, size_t i
) {
336 return simplify_andi(env
, andli
, b
, i
);
339 ////////////////////////////////////////////////////////////////////////////////
345 * Return a bound on the size of a Vreg.
346 * If its known to fit in:
347 * - 8 unsigned bits, return sz::byte
348 * - 16 unsigned bits, return sz::word
349 * - 32 unsigned bits, return sz::dword
350 * - otherwise return sz::qword.
352 int value_width(Env
& env
, Vreg reg
) {
353 auto const it
= env
.unit
.regToConst
.find(reg
);
354 if (it
!= env
.unit
.regToConst
.end()) {
355 if (!it
->second
.isUndef
&&
356 it
->second
.kind
!= Vconst::Double
) {
357 if (it
->second
.val
<= 0xff) return sz::byte
;
358 if (it
->second
.val
<= 0xffff) return sz::word
;
359 if (it
->second
.val
<= 0xffffffff) return sz::dword
;
364 switch (env
.def_insts
[reg
]) {
365 case Vinstr::loadzbq
:
366 case Vinstr::loadzbl
:
377 case Vinstr::loadzlq
:
387 * Return a suitable register for r, with width size.
389 * - if r is constant, just return a constant.
390 * - if r is the result of a zero-extending move from the
391 * correct size, return the source of the move
392 * - if r is the result of a zero-extending load with one use,
393 * convert the load to a non-zero extending form
394 * - otherwise apply a movtq<sz> to the register, and return the dst.
396 Vreg
narrow_reg(Env
& env
, int size
, Vreg r
, Vlabel b
, size_t i
, Vout
& v
) {
397 auto const it
= env
.unit
.regToConst
.find(r
);
398 if (it
!= env
.unit
.regToConst
.end()) {
399 assertx(!it
->second
.isUndef
&& it
->second
.kind
!= Vconst::Double
);
404 auto replace
= [&] (const Vinstr
& rep
) {
405 return simplify_impl(env
, b
, i
, [&] (Vout
& v
) {
410 auto match
= [&] (int rsz
, size_t rn
) {
411 if (rsz
!= size
) return Vreg
{};
412 if (env
.use_counts
[r
] == 1) {
418 auto const& inst
= env
.unit
.blocks
[b
].code
[i
];
420 case Vinstr::loadzbq
: {
421 auto const& load
= inst
.get
<Vinstr::loadzbq
>();
423 if (size
== sz::byte
&& env
.use_counts
[r
] == 1) {
424 replace(loadb
{load
.s
, r
});
431 case Vinstr::loadzbl
: {
432 auto const& load
= inst
.get
<Vinstr::loadzbl
>();
434 if (size
== sz::byte
&& env
.use_counts
[r
] == 1) {
435 replace(loadb
{load
.s
, r
});
443 if (inst
.movzbw_
.d
== r
) return match(sz::byte
, inst
.movzbw_
.s
);
446 if (inst
.movzbl_
.d
== r
) return match(sz::byte
, inst
.movzbl_
.s
);
449 if (inst
.movzbq_
.d
== r
) return match(sz::byte
, inst
.movzbq_
.s
);
453 if (inst
.movzwl_
.d
== r
) return match(sz::word
, inst
.movzwl_
.s
);
456 if (inst
.movzwq_
.d
== r
) return match(sz::word
, inst
.movzwq_
.s
);
460 if (inst
.movzlq_
.d
== r
) return match(sz::dword
, inst
.movzlq_
.s
);
463 case Vinstr::loadzlq
: {
464 auto const& load
= inst
.get
<Vinstr::loadzlq
>();
466 if (size
== sz::dword
&& env
.use_counts
[r
] == 1) {
467 replace(loadl
{load
.s
, r
});
481 int narrow_cmp(Env
& env
, int size
, const cmpq
& vcmp
, Vlabel b
, size_t i
,
483 auto getreg
= [&] (Vreg reg
, Vout
& v
) {
484 auto out
= narrow_reg(env
, size
, reg
, b
, i
, v
);
485 if (!out
.isValid()) {
489 v
<< movtqb
{reg
, out
};
492 v
<< movtqw
{reg
, out
};
495 v
<< movtql
{reg
, out
};
498 always_assert(false);
503 auto const s0
= getreg(vcmp
.s0
, v
);
504 auto const s1
= getreg(vcmp
.s1
, v
);
507 v
<< cmpb
{s0
, s1
, vcmp
.sf
};
510 v
<< cmpw
{s0
, s1
, vcmp
.sf
};
513 v
<< cmpl
{s0
, s1
, vcmp
.sf
};
516 always_assert(false);
528 * Check whether all uses of sf are suitable, or can be converted to
529 * suitable uses, as determined by fun.
531 * fun takes a CC as its argument, and returns
532 * - CC_None to indicate the usage is unsuitable
533 * - The input cc to indicate that its already suitable
534 * - Some other cc to indicate that we could convert to that.
536 * If fix is true, this will convert any uses as specified.
539 SFUsage
check_sf_usage_helper(Env
& env
, Vreg sf
, Vlabel b
, size_t i
, bool fix
,
541 auto ret
= SFUsage::Ok
;
542 auto uses
= env
.use_counts
[sf
];
544 auto const& code
= env
.unit
.blocks
[b
].code
;
545 if (++i
== code
.size()) return SFUsage::Unfixable
;
546 if (getSFUseReg(code
[i
]) == sf
) {
549 auto &cc
= getConditionCode(tmp
);
550 auto const newCC
= fun(cc
);
551 if (newCC
== CC_None
) {
552 return SFUsage::Unfixable
;
557 simplify_impl(env
, b
, i
, [&] (Vout
& v
) {
562 ret
= SFUsage::Fixable
;
570 bool check_sf_usage(Env
& env
, Vreg sf
, Vlabel b
, size_t i
, F fun
) {
571 switch (check_sf_usage_helper(env
, sf
, b
, i
, false, fun
)) {
574 case SFUsage::Fixable
:
575 check_sf_usage_helper(env
, sf
, b
, i
, true, fun
);
577 case SFUsage::Unfixable
:
583 // Try to change the condition codes so they work when the inputs to
584 // the compare are swapped. Return true if successful (or there was
586 bool flip_ordered_uses(Env
& env
, Vreg sf
, Vlabel b
, size_t i
) {
587 return check_sf_usage(
589 [] (ConditionCode cc
) {
592 always_assert(false);
597 case CC_B
: return CC_A
;
598 case CC_AE
: return CC_BE
;
599 case CC_BE
: return CC_AE
;
600 case CC_A
: return CC_B
;
612 case CC_L
: return CC_G
;
613 case CC_GE
: return CC_LE
;
614 case CC_LE
: return CC_GE
;
615 case CC_G
: return CC_L
;
622 // Change any signed conditions to the corresponding unsigned
623 // condition. Return true if successful (or there was nothing to do).
624 bool fix_signed_uses(Env
& env
, Vreg sf
, Vlabel b
, size_t i
) {
625 return check_sf_usage(
627 [] (ConditionCode cc
) {
630 always_assert(false);
649 case CC_L
: return CC_B
;
650 case CC_GE
: return CC_AE
;
651 case CC_LE
: return CC_BE
;
652 case CC_G
: return CC_A
;
660 * Return a (Vptr*, idx in `b') handle to the first store of `reg' within block
661 * `b' after position `i'.
663 * Returns { nullptr, 0 } if no such store is found, or if that store is not
664 * the only use of `reg'.
666 std::pair
<const Vptr
*, size_t>
667 storeq(Env
& env
, Vreg64 reg
, Vlabel b
, size_t i
) {
668 if (env
.use_counts
[reg
] != 1) return { nullptr, 0 };
670 while (++i
< env
.unit
.blocks
[b
].code
.size()) {
671 auto const& inst
= env
.unit
.blocks
[b
].code
[i
];
672 if (inst
.op
== Vinstr::store
&& inst
.store_
.s
== reg
) {
673 return { &(inst
.store_
.d
), i
};
676 return { nullptr, 0 };
679 bool is_reg_const(Env
& env
, Vreg reg
) {
680 return env
.unit
.regToConst
.find(reg
) != env
.unit
.regToConst
.end();
683 template <typename Op
>
684 bool flip_operands_helper(Env
& env
, const Op
& op
, Vlabel b
, size_t i
) {
685 // We want any constant register to be in the first operand, and any foldable
686 // load to be in the second.
687 if (!(is_reg_const(env
, op
.s0
) || foldable_load(env
, op
.s1
, b
, i
)) &&
688 (is_reg_const(env
, op
.s1
) || foldable_load(env
, op
.s0
, b
, i
))) {
689 if (flip_ordered_uses(env
, op
.sf
, b
, i
)) {
690 return simplify_impl(env
, b
, i
, Op
{ op
.s1
, op
.s0
, op
.sf
});
696 bool simplify(Env
& env
, const addq
& vadd
, Vlabel b
, size_t i
) {
697 if (arch_any(Arch::ARM
, Arch::PPC64
)) return false;
699 auto stPair
= storeq(env
, vadd
.d
, b
, i
);
700 auto const vptrs
= stPair
.first
;
701 auto const stIdx
= stPair
.second
;
704 auto const vptrl
= foldable_load(env
, vadd
.s0
, b
, stIdx
);
705 if (vptrl
&& (*vptrl
== *vptrs
)) {
706 bool ret
= simplify_impl(env
, b
, i
, addqrm
{ vadd
.s1
, *vptrs
, vadd
.sf
});
707 // need to do it here because a store will not be removed as dead code
708 if (ret
) env
.unit
.blocks
[b
].code
[stIdx
] = nop
{};
711 auto const vptrl2
= foldable_load(env
, vadd
.s1
, b
, stIdx
);
712 if (vptrl2
&& (*vptrl2
== *vptrs
)) {
713 bool ret
= simplify_impl(env
, b
, i
, addqrm
{ vadd
.s0
, *vptrs
, vadd
.sf
});
714 if (ret
) env
.unit
.blocks
[b
].code
[stIdx
] = nop
{};
718 if (auto const vptr
= foldable_load(env
, vadd
.s0
, b
, i
)) {
719 return simplify_impl(env
, b
, i
, addqmr
{*vptr
, vadd
.s1
, vadd
.d
, vadd
.sf
});
721 if (auto const vptr
= foldable_load(env
, vadd
.s1
, b
, i
)) {
722 return simplify_impl(env
, b
, i
, addqmr
{*vptr
, vadd
.s0
, vadd
.d
, vadd
.sf
});
727 bool simplify(Env
& env
, const cmpq
& vcmp
, Vlabel b
, size_t i
) {
728 if (flip_operands_helper(env
, vcmp
, b
, i
)) return true;
730 if (!arch_any(Arch::ARM
, Arch::PPC64
)) {
731 if (auto const vptr
= foldable_load(env
, vcmp
.s1
, b
, i
)) {
732 return simplify_impl(env
, b
, i
, cmpqm
{ vcmp
.s0
, *vptr
, vcmp
.sf
});
734 if (auto const vptr
= foldable_load(env
, vcmp
.s0
, b
, i
)) {
735 if (flip_ordered_uses(env
, vcmp
.sf
, b
, i
)) {
736 return simplify_impl(env
, b
, i
, cmpqm
{ vcmp
.s1
, *vptr
, vcmp
.sf
});
741 auto const sz0
= value_width(env
, vcmp
.s0
);
742 if (sz0
== sz::qword
) return false;
743 auto const sz1
= value_width(env
, vcmp
.s1
);
744 if (sz1
== sz::qword
) return false;
746 auto const sz
= sz1
> sz0
? sz1
: sz0
;
748 if (!fix_signed_uses(env
, vcmp
.sf
, b
, i
)) return false;
750 return simplify_impl(env
, b
, i
, [&] (Vout
& v
) {
751 return narrow_cmp(env
, sz
, vcmp
, b
, i
, v
);
755 bool simplify(Env
& env
, const cmpl
& vcmp
, Vlabel b
, size_t i
) {
756 if (flip_operands_helper(env
, vcmp
, b
, i
)) return true;
758 if (arch() == Arch::ARM
) return false;
759 if (auto const vptr
= foldable_load(env
, vcmp
.s1
, b
, i
)) {
760 return simplify_impl(env
, b
, i
,
761 cmplm
{ vcmp
.s0
, *vptr
, vcmp
.sf
});
766 bool simplify(Env
& env
, const cmpw
& vcmp
, Vlabel b
, size_t i
) {
767 if (flip_operands_helper(env
, vcmp
, b
, i
)) return true;
769 if (auto const vptr
= foldable_load(env
, vcmp
.s1
, b
, i
)) {
770 return simplify_impl(env
, b
, i
,
771 cmpwm
{ vcmp
.s0
, *vptr
, vcmp
.sf
});
776 bool simplify(Env
& env
, const cmpb
& vcmp
, Vlabel b
, size_t i
) {
777 if (flip_operands_helper(env
, vcmp
, b
, i
)) return true;
779 if (auto const vptr
= foldable_load(env
, vcmp
.s1
, b
, i
)) {
780 return simplify_impl(env
, b
, i
,
781 cmpbm
{ vcmp
.s0
, *vptr
, vcmp
.sf
});
786 bool simplify(Env
& env
, const cmpqi
& vcmp
, Vlabel b
, size_t i
) {
787 if (arch_any(Arch::ARM
, Arch::PPC64
)) return false;
788 if (auto const vptr
= foldable_load(env
, vcmp
.s1
, b
, i
)) {
789 return simplify_impl(env
, b
, i
,
790 cmpqim
{ vcmp
.s0
, *vptr
, vcmp
.sf
});
795 bool simplify(Env
& env
, const cmpli
& vcmp
, Vlabel b
, size_t i
) {
796 if (arch() == Arch::ARM
) return false;
797 if (auto const vptr
= foldable_load(env
, vcmp
.s1
, b
, i
)) {
798 return simplify_impl(env
, b
, i
,
799 cmplim
{ vcmp
.s0
, *vptr
, vcmp
.sf
});
804 bool simplify(Env
& env
, const cmpwi
& vcmp
, Vlabel b
, size_t i
) {
805 if (arch() == Arch::ARM
) return false;
806 if (auto const vptr
= foldable_load(env
, vcmp
.s1
, b
, i
)) {
807 return simplify_impl(env
, b
, i
,
808 cmpwim
{ vcmp
.s0
, *vptr
, vcmp
.sf
});
813 bool simplify(Env
& env
, const cmpbi
& vcmp
, Vlabel b
, size_t i
) {
814 if (arch_any(Arch::ARM
, Arch::PPC64
)) return false;
815 if (auto const vptr
= foldable_load(env
, vcmp
.s1
, b
, i
)) {
816 return simplify_impl(env
, b
, i
,
817 cmpbim
{ vcmp
.s0
, *vptr
, vcmp
.sf
});
823 ////////////////////////////////////////////////////////////////////////////////
825 * Conditional operations.
828 bool simplify(Env
& env
, const setcc
& vsetcc
, Vlabel b
, size_t i
) {
829 return if_inst
<Vinstr::xorbi
>(env
, b
, i
+ 1, [&] (const xorbi
& vxorbi
) {
830 // setcc{cc, _, tmp}; xorbi{1, tmp, d, _}; -> setcc{~cc, _, tmp};
831 if (!(env
.use_counts
[vsetcc
.d
] == 1 &&
832 vxorbi
.s0
.b() == 1 &&
833 vxorbi
.s1
== vsetcc
.d
&&
834 env
.use_counts
[vxorbi
.sf
] == 0)) return false;
836 return simplify_impl(env
, b
, i
, [&] (Vout
& v
) {
837 v
<< setcc
{ccNegate(vsetcc
.cc
), vsetcc
.sf
, vxorbi
.d
};
844 * Fold a cmov of a certain width into a copy if both values are the same
845 * register or have the same known constant value.
847 template <typename Inst
>
848 bool cmov_fold_impl(Env
& env
, const Inst
& inst
, Vlabel b
, size_t i
) {
849 auto const equivalent
= [&]{
850 if (inst
.t
== inst
.f
) return true;
852 auto const t_it
= env
.unit
.regToConst
.find(inst
.t
);
853 if (t_it
== env
.unit
.regToConst
.end()) return false;
854 auto const f_it
= env
.unit
.regToConst
.find(inst
.f
);
855 if (f_it
== env
.unit
.regToConst
.end()) return false;
857 auto const t_const
= t_it
->second
;
858 auto const f_const
= f_it
->second
;
859 if (t_const
.isUndef
|| f_const
.isUndef
) return false;
860 if (t_const
.kind
!= f_const
.kind
) return false;
861 return t_const
.val
== f_const
.val
;
863 if (!equivalent
) return false;
865 return simplify_impl(
868 v
<< copy
{inst
.t
, inst
.d
};
875 * Turn a cmov of a certain width into a matching setcc instruction if the
876 * conditions are correct (both sources are constants of value 0 or 1).
878 template <typename Inst
, typename Extend
>
879 bool cmov_setcc_impl(Env
& env
, const Inst
& inst
, Vlabel b
,
880 size_t i
, Extend extend
) {
881 auto const t_it
= env
.unit
.regToConst
.find(inst
.t
);
882 if (t_it
== env
.unit
.regToConst
.end()) return false;
883 auto const f_it
= env
.unit
.regToConst
.find(inst
.f
);
884 if (f_it
== env
.unit
.regToConst
.end()) return false;
886 auto const check_const
= [](Vconst c
, bool& val
) {
887 if (c
.isUndef
) return false;
895 } else if (c
.val
== 1) {
908 if (!check_const(t_it
->second
, t_val
)) return false;
910 if (!check_const(f_it
->second
, f_val
)) return false;
912 return simplify_impl(env
, b
, i
, [&] (Vout
& v
) {
913 auto const d
= env
.unit
.makeReg();
914 if (t_val
== f_val
) {
915 v
<< copy
{env
.unit
.makeConst(t_val
), d
};
917 v
<< setcc
{inst
.cc
, inst
.sf
, d
};
919 v
<< setcc
{ccNegate(inst
.cc
), inst
.sf
, d
};
921 extend(v
, d
, inst
.d
);
926 bool simplify(Env
& env
, const cmovb
& inst
, Vlabel b
, size_t i
) {
927 if (cmov_fold_impl(env
, inst
, b
, i
)) return true;
928 return cmov_setcc_impl(
930 [](Vout
& v
, Vreg8 src
, Vreg dest
) { v
<< copy
{src
, dest
}; }
934 bool simplify(Env
& env
, const cmovw
& inst
, Vlabel b
, size_t i
) {
935 if (cmov_fold_impl(env
, inst
, b
, i
)) return true;
936 return cmov_setcc_impl(
938 [](Vout
& v
, Vreg8 src
, Vreg dest
) { v
<< movzbw
{src
, dest
}; }
942 bool simplify(Env
& env
, const cmovl
& inst
, Vlabel b
, size_t i
) {
943 if (cmov_fold_impl(env
, inst
, b
, i
)) return true;
944 return cmov_setcc_impl(
946 [](Vout
& v
, Vreg8 src
, Vreg dest
) { v
<< movzbl
{src
, dest
}; }
950 bool simplify(Env
& env
, const cmovq
& inst
, Vlabel b
, size_t i
) {
951 if (cmov_fold_impl(env
, inst
, b
, i
)) return true;
952 return cmov_setcc_impl(
954 [](Vout
& v
, Vreg8 src
, Vreg dest
) { v
<< movzbq
{src
, dest
}; }
958 ////////////////////////////////////////////////////////////////////////////////
960 * Copies, loads, and stores.
963 bool simplify(Env
& env
, const copyargs
& inst
, Vlabel b
, size_t i
) {
964 auto const& srcs
= env
.unit
.tuples
[inst
.s
];
965 auto const& dsts
= env
.unit
.tuples
[inst
.d
];
966 assertx(srcs
.size() == dsts
.size());
968 for (auto const src
: srcs
) {
969 for (auto const dst
: dsts
) {
970 if (src
== dst
) return false;
974 // If the srcs and dsts don't intersect, simplify to a sequence of copies.
975 return simplify_impl(env
, b
, i
, [&] (Vout
& v
) {
976 for (auto i
= 0; i
< srcs
.size(); ++i
) {
977 v
<< copy
{srcs
[i
], dsts
[i
]};
984 * Simplify load followed by truncation:
985 * load{s, tmp}; movtqb{tmp, d} -> loadtqb{s, d}
986 * load{s, tmp}; movtql{tmp, d} -> loadtql{s, d}
987 * loadzbq{s, tmp}; movtqb{tmp, d} -> loadb{s, d}
988 * loadzlq{s, tmp}; movtql{tmp, d} -> loadl{s, d}
990 template<Vinstr::Opcode mov_op
, typename loadt
, typename load_in
>
991 bool simplify_load_truncate(Env
& env
, const load_in
& load
, Vlabel b
, size_t i
) {
992 if (env
.use_counts
[load
.d
] != 1) return false;
993 auto const& code
= env
.unit
.blocks
[b
].code
;
994 if (i
+ 1 >= code
.size()) return false;
996 return if_inst
<mov_op
>(env
, b
, i
+ 1, [&] (const op_type
<mov_op
>& mov
) {
997 if (load
.d
!= mov
.s
) return false;
998 return simplify_impl(env
, b
, i
, [&] (Vout
& v
) {
999 v
<< loadt
{load
.s
, mov
.d
};
1005 bool simplify(Env
& env
, const load
& load
, Vlabel b
, size_t i
) {
1007 simplify_load_truncate
<Vinstr::movtqb
, loadtqb
>(env
, load
, b
, i
) ||
1008 simplify_load_truncate
<Vinstr::movtql
, loadtql
>(env
, load
, b
, i
);
1011 bool simplify(Env
& env
, const loadzbq
& load
, Vlabel b
, size_t i
) {
1012 return simplify_load_truncate
<Vinstr::movtqb
, loadb
>(env
, load
, b
, i
);
1015 bool simplify(Env
& env
, const loadzlq
& load
, Vlabel b
, size_t i
) {
1016 return simplify_load_truncate
<Vinstr::movtql
, loadl
>(env
, load
, b
, i
);
1019 bool simplify(Env
& env
, const movzlq
& inst
, Vlabel b
, size_t i
) {
1020 auto const def_op
= env
.def_insts
[inst
.s
];
1022 // Check if `inst.s' was defined by an instruction with Vreg32 operands, or
1023 // movzbl{} in particular (which lowers to a movl{}).
1024 if (width(def_op
) != Width::Long
&&
1025 def_op
!= Vinstr::movzbl
) {
1029 // If so, the movzlq{} is redundant---instructions on 32-bit registers on x64
1030 // always zero the upper bits.
1031 return simplify_impl(env
, b
, i
, [&] (Vout
& v
) {
1032 v
<< copy
{inst
.s
, inst
.d
};
1037 ///////////////////////////////////////////////////////////////////////////////
1042 bool simplify(Env
& env
, const pop
& inst
, Vlabel b
, size_t i
) {
1043 if (env
.use_counts
[inst
.d
]) return false;
1045 // Convert to an lea when popping to a reg without any uses.
1046 return simplify_impl(env
, b
, i
, [&] (Vout
& v
) {
1047 v
<< lea
{reg::rsp
[8], reg::rsp
};
1052 ///////////////////////////////////////////////////////////////////////////////
1054 // Try to do constant folding on Vptr operands.
1056 struct SimplifyVptrVisit
{
1057 explicit SimplifyVptrVisit(Env
& env
) : env
{env
} {}
1058 template<class T
> void def(T
) {}
1059 template<class T
> void imm(const T
&) {}
1060 template<class T
> void across(T
) {}
1061 template<class T
, class H
> void defHint(T
, H
) {}
1062 template<class T
, class H
> void useHint(T r
, H
) { use(r
); }
1063 template <typename T
> void use(T
) {}
1065 void use(Vptr
& ptr
) {
1066 // If the base is a constant, we can fold it into the displacement (if it
1068 if (ptr
.base
.isValid()) {
1069 auto const it
= env
.unit
.regToConst
.find(ptr
.base
);
1070 if (it
!= env
.unit
.regToConst
.end()) {
1071 auto const newDisp
= static_cast<int64_t>(ptr
.disp
) + it
->second
.val
;
1072 if (deltaFits(newDisp
, sz::dword
)) {
1080 // If the index is a constant, we can fold it into the displacement (if it
1082 if (ptr
.index
.isValid()) {
1083 auto const it
= env
.unit
.regToConst
.find(ptr
.index
);
1084 if (it
!= env
.unit
.regToConst
.end()) {
1085 auto const newDisp
=
1086 static_cast<int64_t>(ptr
.disp
) + it
->second
.val
* ptr
.scale
;
1087 if (deltaFits(newDisp
, sz::dword
)) {
1098 bool changed
= false;
1101 bool simplify_vptr(Env
& env
, Vinstr
& inst
) {
1102 SimplifyVptrVisit v
{env
};
1103 visitOperands(inst
, v
);
1107 ///////////////////////////////////////////////////////////////////////////////
1110 * Perform peephole simplification at instruction `i' of block `b'.
1112 * Return true if changes were made, else false.
1114 bool simplify(Env
& env
, Vlabel b
, size_t i
) {
1115 assertx(i
<= env
.unit
.blocks
[b
].code
.size());
1116 auto& inst
= env
.unit
.blocks
[b
].code
[i
];
1118 auto const changed
= simplify_vptr(env
, inst
);
1121 #define O(name, ...) \
1122 case Vinstr::name: \
1123 return simplify(env, inst.name##_, b, i) || changed; \
1132 * Perform architecture-specific peephole simplification.
1134 bool simplify_arch(Env
& env
, Vlabel b
, size_t i
) {
1135 return ARCH_SWITCH_CALL(simplify
, env
, b
, i
);
1138 ///////////////////////////////////////////////////////////////////////////////
1143 * Peephole simplification pass for a Vunit.
1145 void simplify(Vunit
& unit
) {
1146 assertx(check(unit
));
1147 auto& blocks
= unit
.blocks
;
1150 env
.use_counts
.resize(unit
.next_vr
);
1151 env
.def_insts
.resize(unit
.next_vr
, Vinstr::nop
);
1153 auto const labels
= sortBlocks(unit
);
1155 // Set up Env, only visiting reachable blocks.
1156 for (auto const b
: labels
) {
1157 assertx(!blocks
[b
].code
.empty());
1159 for (auto const& inst
: blocks
[b
].code
) {
1160 visitDefs(unit
, inst
, [&] (Vreg r
) { env
.def_insts
[r
] = inst
.op
; });
1161 visitUses(unit
, inst
, [&] (Vreg r
) { ++env
.use_counts
[r
]; });
1165 // The simplify() implementations may allocate scratch blocks and modify
1166 // instruction streams, so we cannot use standard iterators here.
1167 for (auto const b
: labels
) {
1168 for (size_t i
= 0; i
< blocks
[b
].code
.size(); ++i
) {
1169 // Simplify at this index until no changes are made.
1170 while (simplify(env
, b
, i
) || simplify_arch(env
, b
, i
)) {
1171 // Stop if we simplified away the tail of the block.
1172 if (i
>= blocks
[b
].code
.size()) break;
1177 printUnit(kVasmSimplifyLevel
, "after vasm simplify", unit
);
1180 ///////////////////////////////////////////////////////////////////////////////