Store num args instead of offset in prologue and func entry SrcKeys
[hiphop-php.git] / hphp / runtime / vm / jit / vasm-copy.cpp
blob4542a04613dc778e9c9cf9f44dfbc79e037c5fda
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #include "hphp/runtime/vm/jit/vasm.h"
19 #include "hphp/runtime/vm/jit/abi.h"
20 #include "hphp/runtime/vm/jit/containers.h"
21 #include "hphp/runtime/vm/jit/pass-tracer.h"
22 #include "hphp/runtime/vm/jit/phys-reg.h"
23 #include "hphp/runtime/vm/jit/reg-alloc.h"
24 #include "hphp/runtime/vm/jit/timer.h"
25 #include "hphp/runtime/vm/jit/vasm-instr.h"
26 #include "hphp/runtime/vm/jit/vasm-print.h"
27 #include "hphp/runtime/vm/jit/vasm-reg.h"
28 #include "hphp/runtime/vm/jit/vasm-unit.h"
29 #include "hphp/runtime/vm/jit/vasm-visit.h"
31 #include "hphp/util/dataflow-worklist.h"
32 #include "hphp/util/trace.h"
34 #include <boost/dynamic_bitset.hpp>
36 #include <folly/Format.h>
38 #include <algorithm>
39 #include <limits>
40 #include <string>
41 #include <type_traits>
43 namespace HPHP::jit {
45 namespace {
47 TRACE_SET_MOD(vasm_copy);
49 ///////////////////////////////////////////////////////////////////////////////
51 using RpoID = size_t;
53 constexpr auto kInvalidDisp = std::numeric_limits<int32_t>::max();
56 * The value of a physical register at a program point, relative to the values
57 * physical registers had on entry to the program.
59 * Currently, we only track expressions of the form: %base + disp.
61 struct PhysExpr {
62 PhysExpr() : base(InvalidReg), disp(kInvalidDisp) {}
63 PhysExpr(PhysReg base, int32_t disp) : base(base), disp(disp) {}
65 PhysExpr(const PhysExpr&) = default;
66 PhysExpr& operator=(const PhysExpr&) = default;
68 bool operator==(const PhysExpr& o) const {
69 return base == o.base && disp == o.disp;
71 bool operator!=(const PhysExpr& o) const { return !(*this == o); }
73 PhysReg base;
74 int32_t disp;
78 * Information about the definition of a virtual register.
80 * The `base' register may be a physical register, in which case `expr' is its
81 * abstract value at the def (if known).
83 struct DefInfo {
84 bool operator==(const DefInfo& o) const {
85 return base == o.base && disp == o.disp && expr == o.expr && copy == o.copy;
87 bool operator!=(const DefInfo& o) const { return !(*this == o); }
89 Vreg base;
90 int32_t disp;
91 Vreg copy; // This Vreg is identical to "copy" (which might be
92 // different from "base" if the displacement is
93 // non-zero).
94 PhysExpr expr;
98 * State of registers at a program point.
100 struct RegState {
101 PhysReg::Map<PhysExpr> phys;
104 * Whether a Vreg def has been seen, or invalidated due to a dataflow
105 * conflict, at this program point.
107 * The virtual `defs' metadata vector in Env is mutated during our
108 * flow-sensitive abstract evaluation pass. Since virtual Vregs are SSA, and
109 * since we don't "chase" through physical register metadata when setting
110 * their DefInfo, we don't need to flow `defs'.
112 * However, the converse is not true. When we populate `phys', we do chase
113 * through the information in `defs'. This means we need to track changes to
114 * that information in our dataflow analysis. Fortunately, SSA gives us the
115 * convenient invariant that the DefInfo for a Vreg can only change up to
116 * twice: once when we first see the def, and possibly a second time if the
117 * def was relative to a physical register and we see a different PhysExpr
118 * for that register at the def.
120 * This is an optimization to avoid per-block tracking of all virtual
121 * register defs.
123 boost::dynamic_bitset<> virt_seen;
124 boost::dynamic_bitset<> virt_invalid;
126 bool init{false};
130 * Inputs to, and results of, the analysis passes.
132 struct Env {
133 explicit Env(Vunit& unit, const Abi& abi)
134 : unit(unit)
135 , abi(abi)
136 , rpo_blocks(sortBlocks(unit))
137 , block_to_rpo(unit.blocks.size())
138 , defs(unit.next_vr)
140 for (size_t i = 0, n = rpo_blocks.size(); i < n; ++i) {
141 block_to_rpo[rpo_blocks[i]] = i;
145 Vunit& unit;
146 const Abi& abi;
147 jit::vector<Vlabel> rpo_blocks;
148 jit::vector<RpoID> block_to_rpo;
151 * RegState at the entry of each block.
153 * Computed by a flow-sensitive abstract evaluation pass.
155 jit::vector<RegState> block_states;
158 * Per-Vreg analysis info.
160 * This is computed alongside `block_states', but we can merge as we go since
161 * we only keep DefInfo for virtual Vregs, which are SSA.
163 * The information in the DefInfo is "usable" if it is also virtual (and thus
164 * SSA), or if it makes references to a physical register whose value at both
165 * the use and def site are a known displacement from one another.
167 jit::vector<DefInfo> defs;
170 ///////////////////////////////////////////////////////////////////////////////
172 DEBUG_ONLY std::string show(PhysExpr x) {
173 return folly::sformat("{} + {}", show(x.base), x.disp);
176 DEBUG_ONLY std::string show(const DefInfo& x) {
177 return folly::sformat(
178 "{} + {}{}{}{}",
179 show(x.base), x.disp,
180 x.copy.isValid() ? " [" : "",
181 x.copy.isValid() ? show(x.copy) : "",
182 x.copy.isValid() ? "]" : ""
186 ///////////////////////////////////////////////////////////////////////////////
189 * Call `f' on every Vreg def'd by `inst'.
191 template<class F>
192 void for_all_defs(Env& env, const Vinstr& inst, F f) {
193 visitDefs(env.unit, inst, f);
194 auto uses = RegSet{};
195 auto across = RegSet{};
196 auto defs = RegSet{};
197 getEffects(env.abi, inst, uses, across, defs);
198 defs.forEach(f);
202 * Initialize the block-in RegState vector in `env'.
204 * We invalidate all the physical register states except for in the entry
205 * block, whose state is initialized to {r, 0} for each reserved PhysReg `r'.
207 void initialize_reg_states(Env& env) {
208 env.block_states.resize(env.unit.blocks.size());
209 if (env.unit.blocks.empty()) return;
211 auto& state = env.block_states[env.unit.entry];
213 for (auto const r : state.phys) {
214 state.phys[r] = PhysExpr { r, 0 };
216 state.virt_seen = boost::dynamic_bitset<>(env.unit.next_vr);
217 state.virt_invalid = boost::dynamic_bitset<>(env.unit.next_vr);
219 state.init = true;
223 * Merge `src' into `dst', returning whether we updated `dst'.
225 bool merge_into(RegState& dst, const RegState& src) {
226 assertx(src.init);
228 if (!dst.init) {
229 dst = src;
230 return true;
233 auto changed = false;
235 for (auto const r : dst.phys) {
236 if (dst.phys[r] == src.phys[r]) continue;
238 // Any two different PhysExprs are incompatible.
239 dst.phys[r] = PhysExpr{};
240 changed = true;
243 changed |= (dst.virt_seen != src.virt_seen) ||
244 (dst.virt_invalid != src.virt_invalid);
245 dst.virt_seen |= src.virt_seen;
246 dst.virt_invalid |= src.virt_invalid;
248 return changed;
252 * Stringify the analysis state for the beginning of each block.
254 DEBUG_ONLY std::string show_fixed_point(Env& env) {
255 auto ret = std::string{};
257 for (auto b : env.rpo_blocks) {
258 auto const& state = env.block_states[b];
259 folly::format(&ret, "{: <4}:\n", b);
261 for (auto const r : state.phys) {
262 auto const expr = state.phys[r];
263 if (expr == PhysExpr{}) continue;
265 folly::format(&ret, " {} := {}\n", show(r), show(expr));
269 folly::format(&ret, "virtuals:\n");
270 for (unsigned i = 0, n = env.defs.size(); i < n; ++i) {
271 auto const& def = env.defs[i];
272 if (!def.base.isValid()) continue;
274 folly::format(&ret, " {} := {}", show(Vreg{i}), show(def));
275 if (def.expr != PhysExpr{}) {
276 folly::format(&ret, " ({} := {})", show(def.base), show(def.expr));
278 folly::format(&ret, "\n");
281 return ret;
284 ///////////////////////////////////////////////////////////////////////////////
287 * "Chase" the def metadata for `r' through its sources until we arrive at a
288 * physical source, then compute a PhysExpr.
290 * If no physical source is found, return an invalid PhysExpr.
292 PhysExpr chase_thru(const Env& env, Vreg r) {
293 if (!r.isVirt()) return PhysExpr{};
295 auto const& def = env.defs[r];
296 if (!def.base.isValid()) return PhysExpr{};
298 if (def.base.isPhys()) {
299 if (def.expr == PhysExpr{}) {
300 return PhysExpr{};
302 auto expr = def.expr;
303 expr.disp += def.disp;
304 return expr;
306 assertx(def.base.isVirt());
308 auto expr = chase_thru(env, def.base);
309 if (expr != PhysExpr{}) {
310 expr.disp += def.disp;
311 return expr;
313 return PhysExpr{};
317 * Get or compute a PhysExpr for `s', else return an invalid PhysExpr.
319 PhysExpr expr_for(const Env& env, RegState& state, Vreg s) {
320 return s.isPhys()
321 ? state.phys[s]
322 : chase_thru(env, s);
325 ///////////////////////////////////////////////////////////////////////////////
328 * Analyze a copy from `s' to `d'.
330 * Returns true if no further analysis is needed for the def to `d'.
332 bool analyze_phys_copy(const Env& env, RegState& state, Vreg d, Vreg s) {
333 if (!d.isPhys()) return true;
335 auto const expr = expr_for(env, state, s);
336 if (expr == PhysExpr{}) return false;
338 state.phys[d] = expr;
339 FTRACE(3, " {} = {}\n", show(d), show(state.phys[d]));
340 return true;
344 * Analyze an instruction which performs `d := s + disp'.
346 * Returns true if no further analysis is needed for the def to `d'.
348 bool analyze_phys_disp(const Env& env, RegState& state,
349 Vreg d, Vreg s, int32_t disp) {
350 if (!d.isPhys()) return true;
352 auto const expr = expr_for(env, state, s);
353 if (expr == PhysExpr{}) return false;
355 state.phys[d] = expr;
356 state.phys[d].disp += disp;
357 FTRACE(3, " {} = {}\n", show(d), show(state.phys[d]));
358 return true;
362 * Analyze a def that can't be tracked as a copy or displacement.
364 * Always returns true, for easy chaining of analysis routines.
366 bool analyze_phys_def(const Env& env, RegState& state, Vreg d) {
367 if (!d.isPhys()) return true;
369 FTRACE(3, " kill {}\n", show(d));
370 state.phys[d] = PhysExpr{};
372 return true;
376 * Merge `src' into the DefInfo for `d'.
378 * This just sets `env.defs[d]' to `src' if it was uninitialized, else checks
379 * if it matches, and invalidates it if not.
381 * Since virtual Vregs are SSA, two DefInfos should always match /unless/ they
382 * differ in the def-time PhysExpr. For this reason, since we track the
383 * PhysExpr explicitly as part of our dataflow loop, we don't need to track
384 * whether a Vreg's DefInfo has changed.
386 void merge_def_info(Env& env, RegState& state, Vreg d, const DefInfo& src) {
387 auto& def = env.defs[d];
389 auto const s = src.base;
390 auto const expr = s.isPhys() ? state.phys[s] : PhysExpr{};
392 if (!def.base.isValid()) {
393 def = src;
394 def.expr = expr;
395 state.virt_seen[d] = true;
397 FTRACE(3, " {} = {}\n", show(d), show(def));
398 return;
400 assertx(def.base == src.base &&
401 def.disp == src.disp);
403 if (def != src) {
404 def = DefInfo{};
405 state.virt_invalid[d] = true;
406 FTRACE(3, " kill {}\n", show(d));
411 * Analyze an instruction which performs `d := s [+ disp]'.
413 void analyze_virt_copy(Env& env, RegState& state, Vreg d, Vreg s) {
414 if (!d.isVirt()) return;
415 merge_def_info(env, state, d, DefInfo { s, 0, s.isVirt() ? s : Vreg{} });
417 void analyze_virt_disp(Env& env, RegState& state,
418 Vreg d, Vreg s, int32_t disp) {
419 if (!d.isVirt()) return;
420 merge_def_info(env, state, d, DefInfo { s, disp });
423 #define VASM_ADDS \
424 V(addli) \
425 V(addqi)
426 #define VASM_SUBS \
427 V(subli) \
428 V(subqi)
431 * Analyze the virtual defs of `inst'.
433 void analyze_inst_virtual(Env& env, RegState& state, const Vinstr& inst) {
434 switch (inst.op) {
435 case Vinstr::copy:
436 return analyze_virt_copy(env, state, inst.copy_.d, inst.copy_.s);
438 case Vinstr::copyargs: {
439 auto const& s = env.unit.tuples[inst.copyargs_.s];
440 auto const& d = env.unit.tuples[inst.copyargs_.d];
441 assertx(s.size() == d.size());
442 for (size_t i = 0; i < s.size(); ++i) {
443 analyze_virt_copy(env, state, d[i], s[i]);
445 return;
448 case Vinstr::lea:
450 auto const& i = inst.lea_;
451 if (i.s.seg == Segment::DS && i.s.index == InvalidReg) {
452 analyze_virt_disp(env, state, i.d, i.s.base, i.s.disp);
454 return;
457 #define V(add) \
458 case Vinstr::add: \
459 return analyze_virt_disp(env, state, inst.add##_.d, \
460 inst.add##_.s1, inst.add##_.s0.l());
461 VASM_ADDS
462 #undef V
463 #define V(sub) \
464 case Vinstr::sub: \
465 return analyze_virt_disp(env, state, inst.sub##_.d, \
466 inst.sub##_.s1, -inst.sub##_.s0.l());
467 VASM_SUBS
468 #undef V
470 default: break;
475 * Analyze the physical defs of `inst'.
477 void analyze_inst_physical(Env& env, RegState& state,
478 const Vinstr& inst, const Vinstr* next) {
479 auto const done = [&] {
480 switch (inst.op) {
481 case Vinstr::copy:
482 return analyze_phys_copy(env, state, inst.copy_.d, inst.copy_.s);
484 case Vinstr::lea:
486 auto const& i = inst.lea_;
487 return i.s.seg == Segment::DS &&
488 i.s.index == InvalidReg &&
489 analyze_phys_disp(env, state, i.d, i.s.base, i.s.disp);
492 #define V(add) \
493 case Vinstr::add: \
494 return analyze_phys_disp(env, state, inst.add##_.d, \
495 inst.add##_.s1, inst.add##_.s0.l());
496 VASM_ADDS
497 #undef V
498 #define V(sub) \
499 case Vinstr::sub: \
500 return analyze_phys_disp(env, state, inst.sub##_.d, \
501 inst.sub##_.s1, -inst.sub##_.s0.l());
502 VASM_SUBS
503 #undef V
505 default: break;
507 return false;
508 }();
509 if (done) return;
511 for_all_defs(env, inst, [&] (Vreg d) {
512 return analyze_phys_def(env, state, d);
516 #undef VASM_SUBS
517 #undef VASM_ADDS
520 * Toplevel def analysis pass.
522 void analyze_defs(Env& env) {
523 FTRACE(1, "analyze_defs -----------------------------------------\n");
525 initialize_reg_states(env);
527 auto workQ = dataflow_worklist<RpoID>(env.unit.blocks.size());
528 workQ.push(RpoID{0});
530 do {
531 auto const b = env.rpo_blocks[workQ.pop()];
532 FTRACE(1, "{}:\n", b);
534 auto& code = env.unit.blocks[b].code;
535 auto state = env.block_states[b];
537 for (size_t i = 0, n = code.size(); i < n; ++i) {
538 auto& inst = code[i];
539 FTRACE(2, " {}\n", show(env.unit, inst));
541 auto const next_inst = i != n - 1 ? &code[i + 1] : nullptr;
542 analyze_inst_virtual(env, state, inst);
543 analyze_inst_physical(env, state, inst, next_inst);
546 for (auto const s : succs(code.back())) {
547 FTRACE(4, " -> {}\n", s);
548 auto& succ_state = env.block_states[s];
549 if (merge_into(succ_state, state)) workQ.push(env.block_to_rpo[s]);
551 } while (!workQ.empty());
553 FTRACE(5, "\nfixed point:\n{}\n", show_fixed_point(env));
556 ///////////////////////////////////////////////////////////////////////////////
559 * Chase the sources of `def', recursively folding the "root" DefInfos into
560 * their dependent defs.
562 void flatten_impl(Env& env, DefInfo& def) {
563 auto const s = def.base;
564 if (!s.isVirt()) return;
566 auto& src = env.defs[s];
567 flatten_impl(env, src);
569 if (def.copy.isValid()) {
570 assertx(def.copy.isVirt());
571 while (true) {
572 auto const& other = env.defs[def.copy];
573 if (!other.copy.isValid() || other.copy.isPhys()) break;
574 def.copy = other.copy;
578 if (!src.base.isVirt()) return;
579 def.base = src.base;
580 def.disp += src.disp;
584 * Chase sources of Vregs in `defs', recursively folding the "root" DefInfos
585 * into their dependent defs.
587 * This routine does not fold physical defs, in case they end up being unusable
588 * due to mismatched PhysExprs.
590 void flatten_def_infos(Env& env) {
591 for (auto& def : env.defs) flatten_impl(env, def);
594 ///////////////////////////////////////////////////////////////////////////////
597 * Call `f(def)' if `r' can be rewritten as `def' at the program point given by
598 * `state'.
600 template<class F>
601 void if_rewritable(const Env& env, const RegState& state, Vreg r, F f) {
602 if (!r.isVirt()) return;
604 auto const& definition = env.defs[r];
605 if (!definition.base.isValid()) return;
607 auto const try_phys_rewrite = [&] (DefInfo def) {
608 // We can't fold defs relative to unreserved physical registers.
609 if (!def.base.isPhys()) return false;
611 // If we don't know anything about the physical register's value, we can't
612 // do any rewriting.
613 if (def.expr == PhysExpr{}) return false;
615 // At this point, we know that `r' is defined relative to some physical
616 // register `def.base' which is statically derivable from the value of some
617 // physical register `def.expr.base' at entry to the program. We want to
618 // find another register that is defined relative to that same on-entry
619 // value at the current program point, given by `state'.
621 // We could in theory use any physical register or any virtual register
622 // whose def dominates the current point. Instead, we only try the two we
623 // mentioned: `def.base' and `def.expr.base', based on the assumption that
624 // physical registers whose values are known relative to on-entry values
625 // stay relative to those values.
626 auto const try_rewrite = [&] (PhysReg s) {
627 auto const& cur = state.phys[s];
628 if (def.expr.base != cur.base) {
629 if (cur == PhysExpr{}) {
630 FTRACE(4, " incompatible: {} =/> {} + ?? (base unknown)\n",
631 show(r), show(s));
632 } else {
633 FTRACE(4, " incompatible: {} =/> {} + {} "
634 "(at def: {}, currently: {})\n",
635 show(r), show(s), def.disp - (cur.disp - def.expr.disp),
636 show(def.expr), show(cur));
638 return false;
641 // We need to subtract out the change in displacement of `s' relative to
642 // `cur.base' from the site of the def until now. Or, algebraically:
644 // r := s_def + def.disp
645 // s_def := cur.base + def.expr.disp
646 // s_cur := cur.base + cur.disp
648 // s_def = (cur.base + cur.disp) - cur.disp + def.expr.disp
649 // r = s_cur - (cur.disp - def.expr.disp) + def.disp
650 def.base = s;
651 def.disp -= (cur.disp - def.expr.disp);
652 f(def);
653 return true;
656 return try_rewrite(def.base) ||
657 try_rewrite(def.expr.base);
660 if (definition.base.isPhys()) {
661 try_phys_rewrite(definition);
662 return;
664 assertx(definition.base.isVirt());
666 auto const& src = env.defs[definition.base];
667 // The flatten_def_infos() pass should have folded chains of virtual defs.
668 assertx(!src.base.isVirt());
670 if (src.base.isPhys()) {
671 auto folded = src;
672 folded.disp += definition.disp;
674 // Try rewriting to the physical `src`; but even if we can't, we can still
675 // just rewrite to `definition`.
676 if (try_phys_rewrite(folded)) return;
679 f(definition);
683 * Visitor for rewriting Vreg uses, replacing them with the expressions for
684 * their defs.
686 struct OptVisit {
687 const Env& env;
688 const RegState& state;
690 template<class T> void imm(T&) {}
691 template<class T> void across(T& t) { use(t); }
692 template<class T, class H> void useHint(T& t, H&) { use(t); }
693 template<class T, class H> void defHint(T& t, H&) { def(t); }
694 template<class T> void def(T&) {}
696 void use(RegSet) {}
697 void use(VregSF) {}
698 void use(VcallArgsId) {}
699 void use(Vreg128) {}
701 void use(Vtuple t) { for (auto& reg : env.unit.tuples[t]) use(reg); }
703 void use(Vptr& ptr) {
704 // Rewrite memory operands that are based on registers we've copied or
705 // lea'd off of other registers.
706 if (ptr.seg != Segment::DS) return;
707 if_rewritable(env, state, ptr.base, [&] (const DefInfo& def) {
708 if (arch() == Arch::ARM) {
709 // After lowering, only [base, index lsl #scale] and [base, #imm]
710 // are allowed where the range of #imm is [-256 .. 255]
711 assertx(ptr.base.isValid());
712 auto disp = ptr.disp + def.disp;
713 if (ptr.index.isValid()) {
714 if (disp != 0) return;
715 } else {
716 if (disp < -256 || disp > 255) return;
719 FTRACE(2, " rewrite: {} => {}\n", show(ptr.base), show(def));
720 ptr.base = def.base;
721 ptr.disp += def.disp;
723 if_rewritable(env, state, ptr.index, [&] (const DefInfo& def) {
724 if (arch() == Arch::ARM) return;
725 auto const newDisp =
726 static_cast<int64_t>(ptr.disp) + ptr.scale * def.disp;
727 if (!deltaFits(newDisp, sz::dword)) return;
728 FTRACE(2, " rewrite: {} => {}\n", show(ptr.index), show(def));
729 ptr.index = def.base;
730 ptr.disp = newDisp;
734 template<class T>
735 typename std::enable_if<
736 std::is_same<Vreg,T>::value ||
737 std::is_same<Vreg8,T>::value ||
738 std::is_same<Vreg16,T>::value ||
739 std::is_same<Vreg32,T>::value ||
740 std::is_same<Vreg64,T>::value ||
741 std::is_same<VregDbl,T>::value
742 >::type use(T& reg) {
743 // Rewrite to another register if it's just a copy.
744 if_rewritable(env, state, reg, [&] (const DefInfo& def) {
745 // If the displacement is zero, just use the base
746 // register. Otherwise, we might a copy (which isn't necessarily
747 // as good as base). Use that instead.
748 auto const d = (def.disp != 0) ? def.copy : def.base;
749 if (!d.isValid()) return;
750 FTRACE(2, " rewrite: {} => {}\n", show(reg), show(d));
751 reg = d;
757 * Rewrite a copy{} as an lea{} if possible.
759 * Note that if the copy could have been rewritten as a different copy, the
760 * above visitor would have taken care of it.
762 void optimize_copy(const Env& env, const RegState& state, Vinstr& inst) {
763 auto& copy = inst.copy_;
764 if_rewritable(env, state, copy.s, [&] (const DefInfo& def) {
765 if (def.disp == 0) return;
766 FTRACE(2, " copy => lea {}\n", show(def));
767 inst = lea{def.base[def.disp], copy.d};
772 * Rewrite the srcs of `inst' as the expressions used to def them.
774 void optimize_inst(const Env& env, const RegState& state, Vinstr& inst) {
775 // For specialized iterators, we'd like to use a physical single register for
776 // the position. On exit traces, we intentionally recompute the old position
777 // in order to avoid extending Vreg lifetimes. Don't overoptimize this case.
778 if (inst.origin != nullptr && inst.origin->is(StIterPos)) return;
780 auto visit = OptVisit { env, state };
781 visitOperands(inst, visit);
783 switch (inst.op) {
784 case Vinstr::copy:
785 optimize_copy(env, state, inst);
786 break;
787 default: break;
792 * Post-analysis expression-rewriting pass.
794 void optimize(Env& env) {
795 FTRACE(1, "\noptimize ---------------------------------------------\n");
797 for (auto const& b : env.rpo_blocks) {
798 FTRACE(1, "{}:\n", b);
799 auto& code = env.unit.blocks[b].code;
801 auto state = env.block_states[b];
803 for (auto it = code.begin(); it != code.end(); ++it) {
804 auto& inst = *it;
805 FTRACE(2, " {}\n", show(env.unit, inst));
807 auto const next_it = std::next(it);
808 auto const next_inst = next_it != code.end() ? &*next_it : nullptr;
810 optimize_inst(env, state, inst);
811 analyze_inst_physical(env, state, inst, next_inst);
816 ///////////////////////////////////////////////////////////////////////////////
821 * This pass performs straight-forward copy propagation, along with stateful
822 * copy propagation of values through physical registers. (Tracking the values
823 * of physical registers requires dataflow analysis, because they do not have
824 * single definitions.)
826 * The pass also tracks registers defined via lea instructions, and it knows
827 * when a register holds a value that is the same as another register plus some
828 * offset. It then folds offsets in memory operands to try to require fewer
829 * registers. The main motivation for this is to generally eliminate the need
830 * for a separate stack pointer (the result of HHIR's DefFrameRelSP instruction,
831 * which will just be an lea off of the rvmfp() physical register).
833 void optimizeCopies(Vunit& unit, const Abi& abi) {
834 Timer timer(Timer::vasm_copy);
835 VpassTracer tracer{&unit, Trace::vasm_copy, "vasm-copy"};
836 Env env { unit, abi };
837 analyze_defs(env);
838 flatten_def_infos(env);
839 optimize(env);
842 ///////////////////////////////////////////////////////////////////////////////