codemod 2010-2016 to 2010-present
[hiphop-php.git] / hphp / runtime / vm / jit / vasm-copy.cpp
blobbe0327a4386f1b0f393349b4a2db11feb59c00d2
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #include "hphp/runtime/vm/jit/vasm.h"
19 #include "hphp/runtime/vm/jit/abi.h"
20 #include "hphp/runtime/vm/jit/containers.h"
21 #include "hphp/runtime/vm/jit/pass-tracer.h"
22 #include "hphp/runtime/vm/jit/phys-reg.h"
23 #include "hphp/runtime/vm/jit/reg-alloc.h"
24 #include "hphp/runtime/vm/jit/timer.h"
25 #include "hphp/runtime/vm/jit/vasm-instr.h"
26 #include "hphp/runtime/vm/jit/vasm-print.h"
27 #include "hphp/runtime/vm/jit/vasm-reg.h"
28 #include "hphp/runtime/vm/jit/vasm-unit.h"
29 #include "hphp/runtime/vm/jit/vasm-visit.h"
31 #include "hphp/util/dataflow-worklist.h"
32 #include "hphp/util/either.h"
33 #include "hphp/util/trace.h"
35 #include <boost/dynamic_bitset.hpp>
37 #include <folly/Format.h>
39 #include <algorithm>
40 #include <limits>
41 #include <string>
42 #include <type_traits>
44 namespace HPHP { namespace jit {
46 namespace {
48 TRACE_SET_MOD(vasm_copy);
50 ///////////////////////////////////////////////////////////////////////////////
52 using RpoID = size_t;
54 constexpr auto kInvalidDisp = std::numeric_limits<int32_t>::max();
57 * The value of a physical register at a program point, relative to the values
58 * physical registers had on entry to the program.
60 * Currently, we only track expressions of the form: %base + disp.
62 struct PhysExpr {
63 PhysExpr() : base(InvalidReg), disp(kInvalidDisp) {}
64 PhysExpr(PhysReg base, int32_t disp) : base(base), disp(disp) {}
66 PhysExpr(const PhysExpr&) = default;
67 PhysExpr& operator=(const PhysExpr&) = default;
69 bool operator==(const PhysExpr& o) const {
70 return base == o.base && disp == o.disp;
72 bool operator!=(const PhysExpr& o) const { return !(*this == o); }
74 PhysReg base;
75 int32_t disp;
79 * Information about the definition of a virtual register.
81 * The `base' register may be a physical register, in which case `expr' is its
82 * abstract value at the def (if known).
84 struct DefInfo {
85 bool operator==(const DefInfo& o) const {
86 return base == o.base && disp == o.disp && expr == o.expr;
88 bool operator!=(const DefInfo& o) const { return !(*this == o); }
90 Vreg base;
91 int32_t disp;
92 PhysExpr expr;
96 * State of registers at a program point.
98 struct RegState {
99 PhysReg::Map<PhysExpr> phys;
102 * Whether a Vreg def has been seen, or invalidated due to a dataflow
103 * conflict, at this program point.
105 * The virtual `defs' metadata vector in Env is mutated during our
106 * flow-sensitive abstract evaluation pass. Since virtual Vregs are SSA, and
107 * since we don't "chase" through physical register metadata when setting
108 * their DefInfo, we don't need to flow `defs'.
110 * However, the converse is not true. When we populate `phys', we do chase
111 * through the information in `defs'. This means we need to track changes to
112 * that information in our dataflow analysis. Fortunately, SSA gives us the
113 * convenient invariant that the DefInfo for a Vreg can only change up to
114 * twice: once when we first see the def, and possibly a second time if the
115 * def was relative to a physical register and we see a different PhysExpr
116 * for that register at the def.
118 * This is an optimization to avoid per-block tracking of all virtual
119 * register defs.
121 boost::dynamic_bitset<> virt_seen;
122 boost::dynamic_bitset<> virt_invalid;
124 bool init{false};
128 * Inputs to, and results of, the analysis passes.
130 struct Env {
131 explicit Env(Vunit& unit, const Abi& abi)
132 : unit(unit)
133 , abi(abi)
134 , rpo_blocks(sortBlocks(unit))
135 , block_to_rpo(unit.blocks.size())
136 , defs(unit.next_vr)
138 for (size_t i = 0, n = rpo_blocks.size(); i < n; ++i) {
139 block_to_rpo[rpo_blocks[i]] = i;
143 Vunit& unit;
144 const Abi& abi;
145 jit::vector<Vlabel> rpo_blocks;
146 jit::vector<RpoID> block_to_rpo;
149 * RegState at the entry of each block.
151 * Computed by a flow-sensitive abstract evaluation pass.
153 jit::vector<RegState> block_states;
156 * Per-Vreg analysis info.
158 * This is computed alongside `block_states', but we can merge as we go since
159 * we only keep DefInfo for virtual Vregs, which are SSA.
161 * The information in the DefInfo is "usable" if it is also virtual (and thus
162 * SSA), or if it makes references to a physical register whose value at both
163 * the use and def site are a known displacement from one another.
165 jit::vector<DefInfo> defs;
168 ///////////////////////////////////////////////////////////////////////////////
170 DEBUG_ONLY std::string show(PhysExpr x) {
171 return folly::sformat("{} + {}", show(x.base), x.disp);
174 DEBUG_ONLY std::string show(DefInfo x) {
175 return folly::sformat("{} + {}", show(x.base), x.disp);
178 ///////////////////////////////////////////////////////////////////////////////
181 * Call `f' on every Vreg def'd by `inst'.
183 template<class F>
184 void for_all_defs(Env& env, const Vinstr& inst, F f) {
185 visitDefs(env.unit, inst, f);
186 auto uses = RegSet{};
187 auto across = RegSet{};
188 auto defs = RegSet{};
189 getEffects(env.abi, inst, uses, across, defs);
190 defs.forEach(f);
194 * Initialize the block-in RegState vector in `env'.
196 * We invalidate all the physical register states except for in the entry
197 * block, whose state is initialized to {r, 0} for each reserved PhysReg `r'.
199 void initialize_reg_states(Env& env) {
200 env.block_states.resize(env.unit.blocks.size());
201 if (env.unit.blocks.empty()) return;
203 auto& state = env.block_states[env.unit.entry];
205 for (auto const r : state.phys) {
206 if (!env.abi.reserved().contains(r)) continue;
207 state.phys[r] = PhysExpr { r, 0 };
209 state.virt_seen = boost::dynamic_bitset<>(env.unit.next_vr);
210 state.virt_invalid = boost::dynamic_bitset<>(env.unit.next_vr);
212 state.init = true;
216 * Merge `src' into `dst', returning whether we updated `dst'.
218 bool merge_into(RegState& dst, const RegState& src) {
219 assertx(src.init);
221 if (!dst.init) {
222 dst = src;
223 return true;
226 auto changed = false;
228 for (auto const r : dst.phys) {
229 if (dst.phys[r] == src.phys[r]) continue;
231 // Any two different PhysExprs are incompatible.
232 dst.phys[r] = PhysExpr{};
233 changed = true;
236 changed |= (dst.virt_seen != src.virt_seen) ||
237 (dst.virt_invalid != src.virt_invalid);
238 dst.virt_seen |= src.virt_seen;
239 dst.virt_invalid |= src.virt_invalid;
241 return changed;
245 * Stringify the analysis state for the beginning of each block.
247 DEBUG_ONLY std::string show_fixed_point(Env& env) {
248 auto ret = std::string{};
250 for (auto b : env.rpo_blocks) {
251 auto const& state = env.block_states[b];
252 folly::format(&ret, "{: <4}:\n", b);
254 for (auto const r : state.phys) {
255 auto const expr = state.phys[r];
256 if (expr == PhysExpr{}) continue;
258 folly::format(&ret, " {} := {}\n", show(r), show(expr));
262 folly::format(&ret, "virtuals:\n");
263 for (unsigned i = 0, n = env.defs.size(); i < n; ++i) {
264 auto const& def = env.defs[i];
265 if (!def.base.isValid()) continue;
267 folly::format(&ret, " {} := {}", show(Vreg{i}), show(def));
268 if (def.expr != PhysExpr{}) {
269 folly::format(&ret, " ({} := {})", show(def.base), show(def.expr));
271 folly::format(&ret, "\n");
274 return ret;
277 ///////////////////////////////////////////////////////////////////////////////
280 * Whether `r' is a reserved physical register.
282 bool is_phys_tracked(const Env& env, Vreg r) {
283 return r.isPhys() && env.abi.reserved().contains(r);
287 * "Chase" the def metadata for `r' through its sources until we arrive at a
288 * physical source, then compute a PhysExpr.
290 * If no physical source is found, return an invalid PhysExpr.
292 PhysExpr chase_thru(const Env& env, Vreg r) {
293 if (!r.isVirt()) return PhysExpr{};
295 auto const& def = env.defs[r];
296 if (!def.base.isValid()) return PhysExpr{};
298 if (def.base.isPhys()) {
299 if (def.expr == PhysExpr{}) {
300 return PhysExpr{};
302 auto expr = def.expr;
303 expr.disp += def.disp;
304 return expr;
306 assertx(def.base.isVirt());
308 auto expr = chase_thru(env, def.base);
309 if (expr != PhysExpr{}) {
310 expr.disp += def.disp;
311 return expr;
313 return PhysExpr{};
317 * Get or compute a PhysExpr for `s', else return an invalid PhysExpr.
319 PhysExpr expr_for(const Env& env, RegState& state, Vreg s) {
320 return is_phys_tracked(env, s)
321 ? state.phys[s]
322 : chase_thru(env, s);
325 ///////////////////////////////////////////////////////////////////////////////
328 * Analyze instructions that are part of a callphp{} sequence.
330 * Returns true if no further analysis is needed for the def to `d'.
332 bool analyze_phys_callseq(const Env& env, Vreg d,
333 const Vinstr& inst, const Vinstr* next) {
334 if (d != rvmfp()) return false;
337 * A common pattern for us is to load an address into the frame pointer
338 * right before a PHP call. In this case, if the frame pointer was not
339 * altered before this redefinition, it will effectively still be
340 * not-altered after the call, because callphp{} restores it to the
341 * previous value.
343 * We don't need to worry about not setting the redefined flag in between
344 * this instruction and the callphp{}, because callphp{}'s uses are only of
345 * a RegSet---we cannot mis-optimize any of its args based on the state
346 * we're tracking for the frame pointer.
348 * We also skip over callphp{}'s definition of rvmfp() for this reason.
349 * Really callphp{} only preserves rvmfp() if we properly set up the
350 * rvmfp() arg to it, but the program is ill-formed if it's not doing
351 * that so it's ok to just ignore that definition here.
353 if (next && next->op == Vinstr::callphp) {
354 FTRACE(3, " post-dominated by callphp---preserving frame ptr\n");
355 return true;
357 if (inst.op == Vinstr::callphp) return true;
359 return false;
363 * Analyze a copy from `s' to `d'.
365 * Returns true if no further analysis is needed for the def to `d'.
367 bool analyze_phys_copy(const Env& env, RegState& state, Vreg d, Vreg s) {
368 if (!is_phys_tracked(env, d)) return true;
370 auto const expr = expr_for(env, state, s);
371 if (expr == PhysExpr{}) return false;
373 state.phys[d] = expr;
374 FTRACE(3, " {} = {}\n", show(d), show(state.phys[d]));
375 return true;
379 * Analyze an instruction which performs `d := s + disp'.
381 * Returns true if no further analysis is needed for the def to `d'.
383 bool analyze_phys_disp(const Env& env, RegState& state,
384 Vreg d, Vreg s, int32_t disp) {
385 if (!is_phys_tracked(env, d)) return true;
387 auto const expr = expr_for(env, state, s);
388 if (expr == PhysExpr{}) return false;
390 state.phys[d] = expr;
391 state.phys[d].disp += disp;
392 FTRACE(3, " {} = {}\n", show(d), show(state.phys[d]));
393 return true;
397 * Analyze a def that can't be tracked as a copy or displacement.
399 * Always returns true, for easy chaining of analysis routines.
401 bool analyze_phys_def(const Env& env, RegState& state, Vreg d) {
402 if (!is_phys_tracked(env, d)) return true;
404 FTRACE(3, " kill {}\n", show(d));
405 state.phys[d] = PhysExpr{};
407 return true;
411 * Merge `src' into the DefInfo for `d'.
413 * This just sets `env.defs[d]' to `src' if it was uninitialized, else checks
414 * if it matches, and invalidates it if not.
416 * Since virtual Vregs are SSA, two DefInfos should always match /unless/ they
417 * differ in the def-time PhysExpr. For this reason, since we track the
418 * PhysExpr explicitly as part of our dataflow loop, we don't need to track
419 * whether a Vreg's DefInfo has changed.
421 void merge_def_info(Env& env, RegState& state, Vreg d, const DefInfo& src) {
422 auto& def = env.defs[d];
424 auto const s = src.base;
425 auto const expr = s.isPhys() ? state.phys[s] : PhysExpr{};
427 if (!def.base.isValid()) {
428 def = src;
429 def.expr = expr;
430 state.virt_seen[d] = true;
432 FTRACE(3, " {} = {}\n", show(d), show(def));
433 return;
435 assertx(def.base == src.base &&
436 def.disp == src.disp);
438 if (def != src) {
439 def = DefInfo{};
440 state.virt_invalid[d] = true;
441 FTRACE(3, " kill {}\n", show(d));
446 * Analyze an instruction which performs `d := s [+ disp]'.
448 void analyze_virt_copy(Env& env, RegState& state, Vreg d, Vreg s) {
449 if (!d.isVirt()) return;
450 merge_def_info(env, state, d, DefInfo { s, 0 });
452 void analyze_virt_disp(Env& env, RegState& state,
453 Vreg d, Vreg s, int32_t disp) {
454 if (!d.isVirt()) return;
455 merge_def_info(env, state, d, DefInfo { s, disp });
458 #define VASM_ADDS \
459 V(addli) \
460 V(addqi)
461 #define VASM_SUBS \
462 V(subbi) \
463 V(subli) \
464 V(subqi)
467 * Analyze the virtual defs of `inst'.
469 void analyze_inst_virtual(Env& env, RegState& state, const Vinstr& inst) {
470 switch (inst.op) {
471 case Vinstr::copy:
472 return analyze_virt_copy(env, state, inst.copy_.d, inst.copy_.s);
474 case Vinstr::lea:
476 auto const& i = inst.lea_;
477 if (i.s.seg == Vptr::DS && i.s.index == InvalidReg) {
478 analyze_virt_disp(env, state, i.d, i.s.base, i.s.disp);
480 return;
483 #define V(add) \
484 case Vinstr::add: \
485 return analyze_virt_disp(env, state, inst.add##_.d, \
486 inst.add##_.s1, inst.add##_.s0.l());
487 VASM_ADDS
488 #undef V
489 #define V(sub) \
490 case Vinstr::sub: \
491 return analyze_virt_disp(env, state, inst.sub##_.d, \
492 inst.sub##_.s1, -inst.sub##_.s0.l());
493 VASM_SUBS
494 #undef V
496 default: break;
501 * Analyze the physical defs of `inst'.
503 void analyze_inst_physical(Env& env, RegState& state,
504 const Vinstr& inst, const Vinstr* next) {
505 auto const is_call_seq = [&] {
506 auto result = false;
507 for_all_defs(env, inst, [&] (Vreg d) {
508 result |= analyze_phys_callseq(env, d, inst, next);
510 return result;
511 }();
513 auto const done = [&] {
514 // If this instruction is part of a callphp{} sequence (i.e., fill rvmfp(),
515 // then callphp{}), we don't want to do instruction-specific analysis---but
516 // we stillneed to analyze any non-rvmfp() physical defs.
517 if (is_call_seq) return false;
519 switch (inst.op) {
520 case Vinstr::copy:
521 return analyze_phys_copy(env, state, inst.copy_.d, inst.copy_.s);
523 case Vinstr::lea:
525 auto const& i = inst.lea_;
526 return i.s.seg == Vptr::DS &&
527 i.s.index == InvalidReg &&
528 analyze_phys_disp(env, state, i.d, i.s.base, i.s.disp);
531 #define V(add) \
532 case Vinstr::add: \
533 return analyze_phys_disp(env, state, inst.add##_.d, \
534 inst.add##_.s1, inst.add##_.s0.l());
535 VASM_ADDS
536 #undef V
537 #define V(sub) \
538 case Vinstr::sub: \
539 return analyze_phys_disp(env, state, inst.sub##_.d, \
540 inst.sub##_.s1, -inst.sub##_.s0.l());
541 VASM_SUBS
542 #undef V
544 default: break;
546 return false;
547 }();
548 if (done) return;
550 for_all_defs(env, inst, [&] (Vreg d) {
551 return analyze_phys_callseq(env, d, inst, next) ||
552 analyze_phys_def(env, state, d);
556 #undef VASM_SUBS
557 #undef VASM_ADDS
560 * Toplevel def analysis pass.
562 void analyze_defs(Env& env) {
563 FTRACE(1, "analyze_defs -----------------------------------------\n");
565 initialize_reg_states(env);
567 auto workQ = dataflow_worklist<RpoID>(env.unit.blocks.size());
568 workQ.push(RpoID{0});
570 do {
571 auto const b = env.rpo_blocks[workQ.pop()];
572 FTRACE(1, "{}:\n", b);
574 auto& code = env.unit.blocks[b].code;
575 auto state = env.block_states[b];
577 for (size_t i = 0, n = code.size(); i < n; ++i) {
578 auto& inst = code[i];
579 FTRACE(2, " {}\n", show(env.unit, inst));
581 auto const next_inst = i != n - 1 ? &code[i + 1] : nullptr;
582 analyze_inst_virtual(env, state, inst);
583 analyze_inst_physical(env, state, inst, next_inst);
586 for (auto const s : succs(code.back())) {
587 FTRACE(4, " -> {}\n", s);
588 auto& succ_state = env.block_states[s];
589 if (merge_into(succ_state, state)) workQ.push(env.block_to_rpo[s]);
591 } while (!workQ.empty());
593 FTRACE(5, "\nfixed point:\n{}\n", show_fixed_point(env));
596 ///////////////////////////////////////////////////////////////////////////////
599 * Chase the sources of `def', recursively folding the "root" DefInfos into
600 * their dependent defs.
602 void flatten_impl(Env& env, DefInfo& def) {
603 auto const s = def.base;
604 if (!s.isVirt()) return;
606 auto& src = env.defs[s];
607 flatten_impl(env, src);
609 if (!src.base.isVirt()) return;
610 def.base = src.base;
611 def.disp += src.disp;
615 * Chase sources of Vregs in `defs', recursively folding the "root" DefInfos
616 * into their dependent defs.
618 * This routine does not fold physical defs, in case they end up being unusable
619 * due to mismatched PhysExprs.
621 void flatten_def_infos(Env& env) {
622 for (auto& def : env.defs) flatten_impl(env, def);
625 ///////////////////////////////////////////////////////////////////////////////
628 * Call `f(def)' if `r' can be rewritten as `def' at the program point given by
629 * `state'.
631 template<class F>
632 void if_rewritable(const Env& env, const RegState& state, Vreg r, F f) {
633 if (!r.isVirt()) return;
635 auto const& def = env.defs[r];
636 if (!def.base.isValid()) return;
638 auto const try_phys_rewrite = [&] (DefInfo def) {
639 // We can't fold defs relative to unreserved physical registers.
640 if (!is_phys_tracked(env, def.base)) return false;
642 // If we don't know anything about the physical register's value, we can't
643 // do any rewriting.
644 if (def.expr == PhysExpr{}) return false;
646 // At this point, we know that `r' is defined relative to some physical
647 // register `def.base' which is statically derivable from the value of some
648 // physical register `def.expr.base' at entry to the program. We want to
649 // find another register that is defined relative to that same on-entry
650 // value at the current program point, given by `state'.
652 // We could in theory use any physical register or any virtual register
653 // whose def dominates the current point. Instead, we only try the two we
654 // mentioned: `def.base' and `def.expr.base', based on the assumption that
655 // physical registers whose values are known relative to on-entry values
656 // stay relative to those values.
657 auto const try_rewrite = [&] (PhysReg s) {
658 auto const& cur = state.phys[s];
659 if (def.expr.base != cur.base) {
660 if (cur == PhysExpr{}) {
661 FTRACE(4, " incompatible: {} =/> {} + ?? (base unknown)\n",
662 show(r), show(s));
663 } else {
664 FTRACE(4, " incompatible: {} =/> {} + {} "
665 "(at def: {}, currently: {})\n",
666 show(r), show(s), def.disp - (cur.disp - def.expr.disp),
667 show(def.expr), show(cur));
669 return false;
672 // We need to subtract out the change in displacement of `s' relative to
673 // `cur.base' from the site of the def until now. Or, algebraically:
675 // r := s_def + def.disp
676 // s_def := cur.base + def.expr.disp
677 // s_cur := cur.base + cur.disp
679 // s_def = (cur.base + cur.disp) - cur.disp + def.expr.disp
680 // r = s_cur - (cur.disp - def.expr.disp) + def.disp
681 def.base = s;
682 def.disp -= (cur.disp - def.expr.disp);
683 f(def);
684 return true;
687 return try_rewrite(def.base) ||
688 try_rewrite(def.expr.base);
691 if (def.base.isPhys()) {
692 try_phys_rewrite(def);
693 return;
695 assertx(def.base.isVirt());
697 auto const& src = env.defs[def.base];
698 // The flatten_def_infos() pass should have folded chains of virtual defs.
699 assertx(!src.base.isVirt());
701 if (src.base.isPhys()) {
702 auto folded = src;
703 folded.disp += def.disp;
705 // Try rewriting to the physical `src'; but even if we can't, we can still
706 // just rewrite to `def'.
707 if (try_phys_rewrite(folded)) return;
710 f(def);
714 * Visitor for rewriting Vreg uses, replacing them with the expressions for
715 * their defs.
717 struct OptVisit {
718 const Env& env;
719 const RegState& state;
721 template<class T> void imm(T&) {}
722 template<class T> void across(T& t) { use(t); }
723 template<class T, class H> void useHint(T& t, H&) { use(t); }
724 template<class T, class H> void defHint(T& t, H&) { def(t); }
725 template<class T> void def(T&) {}
727 void use(RegSet) {}
728 void use(VregSF) {}
729 void use(VcallArgsId) {}
730 void use(Vreg128) {}
732 void use(Vtuple t) { for (auto& reg : env.unit.tuples[t]) use(reg); }
734 void use(Vptr& ptr) {
735 // Rewrite memory operands that are based on registers we've copied or
736 // lea'd off of other registers.
737 if (ptr.seg != Vptr::DS) return;
738 if_rewritable(env, state, ptr.base, [&] (const DefInfo& def) {
739 if (arch() == Arch::ARM) {
740 // After lowering, only [base, index lsl #scale] and [base, #imm]
741 // are allowed where the range of #imm is [-256 .. 255]
742 assert(ptr.base.isValid());
743 auto disp = ptr.disp + def.disp;
744 if (ptr.index.isValid()) {
745 if (disp != 0) return;
746 } else {
747 if (disp < -256 || disp > 255) return;
750 FTRACE(2, " rewrite: {} => {}\n", show(ptr.base), show(def));
751 ptr.base = def.base;
752 ptr.disp += def.disp;
756 template<class T>
757 typename std::enable_if<
758 std::is_same<Vreg,T>::value ||
759 std::is_same<Vreg8,T>::value ||
760 std::is_same<Vreg16,T>::value ||
761 std::is_same<Vreg32,T>::value ||
762 std::is_same<Vreg64,T>::value ||
763 std::is_same<VregDbl,T>::value
764 >::type use(T& reg) {
765 // Rewrite to another register if it's just a copy.
766 if_rewritable(env, state, reg, [&] (const DefInfo& def) {
767 if (def.disp != 0) return;
768 FTRACE(2, " rewrite: {} => {}\n", show(reg), show(def.base));
769 reg = def.base;
775 * Rewrite a copy{} as an lea{} if possible.
777 * Note that if the copy could have been rewritten as a different copy, the
778 * above visitor would have taken care of it.
780 void optimize_copy(const Env& env, const RegState& state, Vinstr& inst) {
781 auto& copy = inst.copy_;
782 if_rewritable(env, state, copy.s, [&] (const DefInfo& def) {
783 if (def.disp == 0) return;
784 FTRACE(2, " copy => lea {}\n", show(def));
785 inst = lea{def.base[def.disp], copy.d};
790 * Rewrite the srcs of `inst' as the expressions used to def them.
792 void optimize_inst(const Env& env, const RegState& state, Vinstr& inst) {
793 auto visit = OptVisit { env, state };
794 visitOperands(inst, visit);
796 switch (inst.op) {
797 case Vinstr::copy:
798 optimize_copy(env, state, inst);
799 break;
800 default: break;
805 * Post-analysis expression-rewriting pass.
807 void optimize(Env& env) {
808 FTRACE(1, "\noptimize ---------------------------------------------\n");
810 for (auto const& b : env.rpo_blocks) {
811 FTRACE(1, "{}:\n", b);
812 auto& code = env.unit.blocks[b].code;
814 auto state = env.block_states[b];
816 for (auto it = code.begin(); it != code.end(); ++it) {
817 auto& inst = *it;
818 FTRACE(2, " {}\n", show(env.unit, inst));
820 auto const next_it = std::next(it);
821 auto const next_inst = next_it != code.end() ? &*next_it : nullptr;
823 optimize_inst(env, state, inst);
824 analyze_inst_physical(env, state, inst, next_inst);
829 ///////////////////////////////////////////////////////////////////////////////
834 * This pass performs straight-forward copy propagation, along with stateful
835 * copy propagation of values through physical registers. (Tracking the values
836 * of physical registers requires dataflow analysis, because they do not have
837 * single definitions.)
839 * The pass also tracks registers defined via lea instructions, and it knows
840 * when a register holds a value that is the same as another register plus some
841 * offset. It then folds offsets in memory operands to try to require fewer
842 * registers. The main motivation for this is to generally eliminate the need
843 * for a separate stack pointer (the result of HHIR's DefSP instruction, which
844 * will just be an lea off of the rvmfp() physical register).
846 void optimizeCopies(Vunit& unit, const Abi& abi) {
847 Timer timer(Timer::vasm_copy);
848 VpassTracer tracer{&unit, Trace::vasm_copy, "vasm-copy"};
849 Env env { unit, abi };
850 analyze_defs(env);
851 flatten_def_infos(env);
852 optimize(env);
855 ///////////////////////////////////////////////////////////////////////////////