2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #include "hphp/runtime/vm/jit/vasm-emit.h"
19 #include "hphp/runtime/base/runtime-option.h"
21 #include "hphp/runtime/vm/jit/abi-x64.h"
22 #include "hphp/runtime/vm/jit/block.h"
23 #include "hphp/runtime/vm/jit/code-gen-helpers.h"
24 #include "hphp/runtime/vm/jit/print.h"
25 #include "hphp/runtime/vm/jit/prof-data.h"
26 #include "hphp/runtime/vm/jit/service-requests.h"
27 #include "hphp/runtime/vm/jit/smashable-instr-x64.h"
28 #include "hphp/runtime/vm/jit/target-cache.h"
29 #include "hphp/runtime/vm/jit/timer.h"
30 #include "hphp/runtime/vm/jit/vasm.h"
31 #include "hphp/runtime/vm/jit/vasm-instr.h"
32 #include "hphp/runtime/vm/jit/vasm-internal.h"
33 #include "hphp/runtime/vm/jit/vasm-lower.h"
34 #include "hphp/runtime/vm/jit/vasm-print.h"
35 #include "hphp/runtime/vm/jit/vasm-prof.h"
36 #include "hphp/runtime/vm/jit/vasm-unit.h"
37 #include "hphp/runtime/vm/jit/vasm-util.h"
38 #include "hphp/runtime/vm/jit/vasm-visit.h"
45 namespace HPHP
{ namespace jit
{
46 ///////////////////////////////////////////////////////////////////////////////
51 namespace x64
{ struct ImmFolder
; }
54 ///////////////////////////////////////////////////////////////////////////////
56 static_assert(folly::kIsLittleEndian
,
57 "Code contains little-endian specific optimizations.");
59 template<class X64Asm
>
61 explicit Vgen(Venv
& env
)
64 , current(env
.current
)
68 , catches(env
.catches
)
71 static void emitVeneers(Venv
& env
) {}
72 static void handleLiterals(Venv
& env
) {}
73 static void patch(Venv
& env
);
74 static void pad(CodeBlock
& cb
);
76 /////////////////////////////////////////////////////////////////////////////
78 template<class Inst
> void emit(const Inst
& i
) {
79 always_assert_flog(false, "unimplemented instruction: {} in B{}\n",
80 vinst_names
[Vinstr(i
).op
], size_t(current
));
84 void emit(const copy
& i
);
85 void emit(const copy2
& i
);
86 void emit(const debugtrap
& /*i*/) { a
.int3(); }
87 void emit(const fallthru
& /*i*/) {}
88 void emit(const ldimmb
& i
);
89 void emit(const ldimml
& i
);
90 void emit(const ldimmq
& i
);
91 void emit(const load
& i
);
92 void emit(const store
& i
);
93 void emit(const mcprep
& i
);
95 // native function abi
96 void emit(const call
& i
);
97 void emit(const callm
& i
) { a
.call(i
.target
); }
98 void emit(const callr
& i
) { a
.call(i
.target
); }
99 void emit(const calls
& i
);
100 void emit(const ret
& /*i*/) { a
.ret(); }
103 void emit(const stubret
& i
);
104 void emit(const callstub
& i
);
105 void emit(const callfaststub
& i
);
106 void emit(const tailcallstub
& i
);
109 void emit(const phpret
& i
);
110 void emit(const tailcallphp
& i
);
111 void emit(const callunpack
& i
);
112 void emit(const contenter
& i
);
115 void emit(const inittc
& /*i*/) {}
116 void emit(const calltc
&);
117 void emit(const leavetc
&) { a
.ret(); }
120 void emit(const landingpad
& /*i*/) {}
121 void emit(const nothrow
& i
);
122 void emit(const syncpoint
& i
);
123 void emit(const unwind
& i
);
126 void emit(absdbl i
) { unary(i
); a
.psllq(1, i
.d
); a
.psrlq(1, i
.d
); }
127 void emit(andb i
) { commuteSF(i
); a
.andb(i
.s0
, i
.d
); }
128 void emit(andbi i
) { binary(i
); a
.andb(i
.s0
, i
.d
); }
129 void emit(const andbim
& i
) { a
.andb(i
.s
, i
.m
); }
130 void emit(andw i
) { commuteSF(i
); a
.andw(i
.s0
, i
.d
); }
131 void emit(andwi i
) { binary(i
); a
.andw(i
.s0
, i
.d
); }
132 void emit(andl i
) { commuteSF(i
); a
.andl(i
.s0
, i
.d
); }
133 void emit(andli i
) { binary(i
); a
.andl(i
.s0
, i
.d
); }
134 void emit(andq i
) { commuteSF(i
); a
.andq(i
.s0
, i
.d
); }
136 void emit(const addwm
& i
) { a
.addw(i
.s0
, i
.m
); }
137 void emit(addli i
) { binary(i
); a
.addl(i
.s0
, i
.d
); }
138 void emit(const addlm
& i
) { a
.addl(i
.s0
, i
.m
); }
139 void emit(const addlim
& i
);
140 void emit(addq i
) { commuteSF(i
); a
.addq(i
.s0
, i
.d
); }
141 void emit(addqi i
) { binary(i
); a
.addq(i
.s0
, i
.d
); }
142 void emit(const addqmr
& i
);
143 void emit(const addqrm
& i
);
144 void emit(const addqim
& i
);
145 void emit(addsd i
) { commute(i
); a
.addsd(i
.s0
, i
.d
); }
146 void emit(const cloadq
& i
);
147 template<class cmov
> void emit_cmov(const cmov
& i
);
148 void emit(const cmovb
& i
) { emit_cmov(i
); }
149 void emit(const cmovw
& i
) { emit_cmov(i
); }
150 void emit(const cmovl
& i
) { emit_cmov(i
); }
151 void emit(const cmovq
& i
) { emit_cmov(i
); }
152 void emit(const cmpb
& i
) { a
.cmpb(i
.s0
, i
.s1
); }
153 void emit(const cmpbi
& i
) { a
.cmpb(i
.s0
, i
.s1
); }
154 void emit(const cmpbim
& i
) { a
.prefix(i
.s1
.mr()).cmpb(i
.s0
, i
.s1
); }
155 void emit(const cmpbm
& i
) { a
.prefix(i
.s1
.mr()).cmpb(i
.s0
, i
.s1
); }
156 void emit(const cmpw
& i
) { a
.cmpw(i
.s0
, i
.s1
); }
157 void emit(const cmpwi
& i
) { a
.cmpw(i
.s0
, i
.s1
); }
158 void emit(const cmpwim
& i
) { a
.cmpw(i
.s0
, i
.s1
); }
159 void emit(const cmpwm
& i
) { a
.cmpw(i
.s0
, i
.s1
); }
160 void emit(const cmpl
& i
) { a
.cmpl(i
.s0
, i
.s1
); }
161 void emit(const cmpli
& i
) { a
.cmpl(i
.s0
, i
.s1
); }
162 void emit(const cmplim
& i
) { a
.cmpl(i
.s0
, i
.s1
); }
163 void emit(const cmplm
& i
) { a
.cmpl(i
.s0
, i
.s1
); }
164 void emit(const cmpq
& i
) { a
.cmpq(i
.s0
, i
.s1
); }
165 void emit(const cmpqi
& i
) { a
.cmpq(i
.s0
, i
.s1
); }
166 void emit(const cmpqim
& i
) { a
.cmpq(i
.s0
, i
.s1
); }
167 void emit(const cmpqm
& i
) { a
.cmpq(i
.s0
, i
.s1
); }
168 void emit(cmpsd i
) { noncommute(i
); a
.cmpsd(i
.s0
, i
.d
, i
.pred
); }
169 void emit(const cqo
& /*i*/) { a
.cqo(); }
170 void emit(const cvttsd2siq
& i
) { a
.cvttsd2siq(i
.s
, i
.d
); }
171 void emit(const cvtsi2sd
& i
);
172 void emit(const cvtsi2sdm
& i
);
173 void emit(decl i
) { unary(i
); a
.decl(i
.d
); }
174 void emit(const declm
& i
) { a
.decl(i
.m
); }
175 void emit(decq i
) { unary(i
); a
.decq(i
.d
); }
176 void emit(const decqm
& i
) { a
.decq(i
.m
); }
177 void emit(const decqmlock
& i
) { a
.decqlock(i
.m
); }
178 void emit(divsd i
) { noncommute(i
); a
.divsd(i
.s0
, i
.d
); }
179 void emit(imul i
) { commuteSF(i
); a
.imul(i
.s0
, i
.d
); }
180 void emit(const idiv
& i
) { a
.idiv(i
.s
); }
181 void emit(incl i
) { unary(i
); a
.incl(i
.d
); }
182 void emit(const inclm
& i
) { a
.incl(i
.m
); }
183 void emit(incq i
) { unary(i
); a
.incq(i
.d
); }
184 void emit(const incqm
& i
) { a
.incq(i
.m
); }
185 void emit(const incwm
& i
) { a
.incw(i
.m
); }
186 void emit(const jcc
& i
);
187 void emit(const jcci
& i
);
188 void emit(const jmp
& i
);
189 void emit(const jmpr
& i
) { a
.jmp(i
.target
); }
190 void emit(const jmpm
& i
) { a
.jmp(i
.target
); }
191 void emit(const jmpi
& i
);
192 void emit(const lea
& i
);
193 void emit(const leap
& i
) { a
.lea(i
.s
, i
.d
); }
194 void emit(const leav
& i
);
195 void emit(const lead
& i
) { a
.lea(rip
[(intptr_t)i
.s
.get()], i
.d
); }
196 void emit(const loadups
& i
) { a
.movups(i
.s
, i
.d
); }
197 void emit(const loadtqb
& i
) { a
.loadb(i
.s
, i
.d
); }
198 void emit(const loadb
& i
) { a
.loadb(i
.s
, i
.d
); }
199 void emit(const loadw
& i
) { a
.loadw(i
.s
, i
.d
); }
200 void emit(const loadtql
& i
) { a
.loadl(i
.s
, i
.d
); }
201 void emit(const loadl
& i
) { a
.loadl(i
.s
, i
.d
); }
202 void emit(const loadqp
& i
) { a
.loadq(i
.s
, i
.d
); }
203 void emit(const loadqd
& i
) { a
.loadq(rip
[(intptr_t)i
.s
.get()], i
.d
); }
204 void emit(const loadsd
& i
) { a
.movsd(i
.s
, i
.d
); }
205 void emit(const loadzbl
& i
) { a
.loadzbl(i
.s
, i
.d
); }
206 void emit(const loadzbq
& i
) { a
.loadzbl(i
.s
, Reg32(i
.d
)); }
207 void emit(const loadsbq
& i
) { a
.loadsbq(i
.s
, i
.d
); }
208 void emit(const loadzlq
& i
) { a
.loadl(i
.s
, Reg32(i
.d
)); }
209 void emit(const movb
& i
) { a
.movb(i
.s
, i
.d
); }
210 void emit(const movl
& i
) { a
.movl(i
.s
, i
.d
); }
211 void emit(const movzbw
& i
) { a
.movzbl(i
.s
, Reg32(i
.d
)); }
212 void emit(const movzbl
& i
) { a
.movzbl(i
.s
, i
.d
); }
213 void emit(const movzbq
& i
) { a
.movzbl(i
.s
, Reg32(i
.d
)); }
214 void emit(const movzwl
& i
) { a
.movzwl(i
.s
, i
.d
); }
215 void emit(const movzwq
& i
) { a
.movzwl(i
.s
, Reg32(i
.d
)); }
216 void emit(const movzlq
& i
) { a
.movl(i
.s
, Reg32(i
.d
)); }
217 void emit(const movsbq
& i
) { a
.movsbq(i
.s
, i
.d
); }
218 void emit(mulsd i
) { commute(i
); a
.mulsd(i
.s0
, i
.d
); }
219 void emit(neg i
) { unary(i
); a
.neg(i
.d
); }
220 void emit(const nop
& /*i*/) { a
.nop(); }
221 void emit(not i
) { unary(i
); a
.not(i
.d
); }
222 void emit(notb i
) { unary(i
); a
.notb(i
.d
); }
223 void emit(orbi i
) { binary(i
); a
.orb(i
.s0
, i
.d
); }
224 void emit(const orbim
& i
) { a
.orb(i
.s0
, i
.m
); }
225 void emit(const orwim
& i
) { a
.orw(i
.s0
, i
.m
); }
226 void emit(const orlim
& i
) { a
.orl(i
.s0
, i
.m
); }
227 void emit(orq i
) { commuteSF(i
); a
.orq(i
.s0
, i
.d
); }
228 void emit(orwi i
) { binary(i
); a
.orw(i
.s0
, i
.d
); }
229 void emit(orli i
) { binary(i
); a
.orl(i
.s0
, i
.d
); }
230 void emit(orqi i
) { binary(i
); a
.orq(i
.s0
, i
.d
); }
231 void emit(const orqim
& i
) { a
.orq(i
.s0
, i
.m
); }
232 void emit(const pop
& i
) { a
.pop(i
.d
); }
233 void emit(const popm
& i
) { a
.pop(i
.d
); }
234 void emit(const popf
& i
) { assertx(i
.d
== RegSF
{0}); a
.popf(); }
235 void emit(const push
& i
) { a
.push(i
.s
); }
236 void emit(const pushm
& i
) { a
.push(i
.s
); }
237 void emit(const pushf
& i
) { assertx(i
.s
== RegSF
{0}); a
.pushf(); }
238 void emit(const roundsd
& i
) { a
.roundsd(i
.dir
, i
.s
, i
.d
); }
239 void emit(const sarq
& i
) { unary(i
); a
.sarq(i
.d
); }
240 void emit(sarqi i
) { binary(i
); a
.sarq(i
.s0
, i
.d
); }
241 void emit(const setcc
& i
) { a
.setcc(i
.cc
, i
.d
); }
242 void emit(shlli i
) { binary(i
); a
.shll(i
.s0
, i
.d
); }
243 void emit(shlq i
) { unary(i
); a
.shlq(i
.d
); }
244 void emit(shrq i
) { unary(i
); a
.shrq(i
.d
); }
245 void emit(shlqi i
) { binary(i
); a
.shlq(i
.s0
, i
.d
); }
246 void emit(shrli i
) { binary(i
); a
.shrl(i
.s0
, i
.d
); }
247 void emit(shrqi i
) { binary(i
); a
.shrq(i
.s0
, i
.d
); }
248 void emit(const sqrtsd
& i
) { a
.sqrtsd(i
.s
, i
.d
); }
249 void emit(const storeups
& i
) { a
.movups(i
.s
, i
.m
); }
250 void emit(const storeb
& i
) { a
.storeb(i
.s
, i
.m
); }
251 void emit(const storebi
& i
);
252 void emit(const storel
& i
) { a
.storel(i
.s
, i
.m
); }
253 void emit(const storeli
& i
) { a
.storel(i
.s
, i
.m
); }
254 void emit(const storeqi
& i
);
255 void emit(const storesd
& i
) { a
.movsd(i
.s
, i
.m
); }
256 void emit(const storew
& i
) { a
.storew(i
.s
, i
.m
); }
257 void emit(const storewi
& i
) { a
.storew(i
.s
, i
.m
); }
258 void emit(subl i
) { noncommute(i
); a
.subl(i
.s0
, i
.d
); }
259 void emit(subli i
) { binary(i
); a
.subl(i
.s0
, i
.d
); }
260 void emit(subq i
) { noncommute(i
); a
.subq(i
.s0
, i
.d
); }
261 void emit(subqi i
) { binary(i
); a
.subq(i
.s0
, i
.d
); }
262 void emit(subsd i
) { noncommute(i
); a
.subsd(i
.s0
, i
.d
); }
263 void emit(const testb
& i
) { a
.testb(i
.s0
, i
.s1
); }
264 void emit(const testbi
& i
) { a
.testb(i
.s0
, i
.s1
); }
265 void emit(const testbm
& i
) { a
.testb(i
.s0
, i
.s1
); }
266 void emit(const testbim
& i
) { a
.testb(i
.s0
, i
.s1
); }
267 void emit(const testw
& i
) { a
.testw(i
.s0
, i
.s1
); }
268 void emit(const testwi
& i
);
269 void emit(const testwm
& i
) { a
.testw(i
.s0
, i
.s1
); }
270 void emit(const testwim
& i
);
271 void emit(const testl
& i
) { a
.testl(i
.s0
, i
.s1
); }
272 void emit(const testli
& i
);
273 void emit(const testlm
& i
) { a
.testl(i
.s0
, i
.s1
); }
274 void emit(const testlim
& i
);
275 void emit(const testq
& i
) { a
.testq(i
.s0
, i
.s1
); }
276 void emit(const testqi
& i
);
277 void emit(const testqm
& i
) { a
.testq(i
.s0
, i
.s1
); }
278 void emit(const testqim
& i
);
279 void emit(const trap
& i
);
280 void emit(const ucomisd
& i
) { a
.ucomisd(i
.s0
, i
.s1
); }
281 void emit(unpcklpd i
) { noncommute(i
); a
.unpcklpd(i
.s0
, i
.d
); }
282 void emit(xorb i
) { commuteSF(i
); a
.xorb(i
.s0
, i
.d
); }
283 void emit(xorbi i
) { binary(i
); a
.xorb(i
.s0
, i
.d
); }
284 void emit(xorl i
) { commuteSF(i
); a
.xorl(i
.s0
, i
.d
); }
286 void emit(xorqi i
) { binary(i
); a
.xorq(i
.s0
, i
.d
); }
287 void emit(const conjure
& /*i*/) { always_assert(false); }
288 void emit(const conjureuse
& /*i*/) { always_assert(false); }
291 emit(lea
{rax
[8], rax
});
292 emit(lea
{rax
[-8], rax
});
297 void prep(Reg8 s
, Reg8 d
) { if (s
!= d
) a
.movb(s
, d
); }
298 void prep(Reg16 s
, Reg16 d
) { if (s
!= d
) a
.movw(s
, d
); }
299 void prep(Reg32 s
, Reg32 d
) { if (s
!= d
) a
.movl(s
, d
); }
300 void prep(Reg64 s
, Reg64 d
) { if (s
!= d
) a
.movq(s
, d
); }
301 void prep(RegXMM s
, RegXMM d
) { if (s
!= d
) a
.movdqa(s
, d
); }
302 void emit_simd_imm(int64_t, Vreg
);
304 template<class Inst
> void unary(Inst
& i
) { prep(i
.s
, i
.d
); }
305 template<class Inst
> void binary(Inst
& i
) { prep(i
.s1
, i
.d
); }
307 template<class Inst
> void commuteSF(Inst
&);
308 template<class Inst
> void commute(Inst
&);
309 template<class Inst
> void noncommute(Inst
&);
311 CodeBlock
& frozen() { return env
.text
.frozen().code
; }
317 const Vlabel current
;
319 jit::vector
<Venv::LabelPatch
>& jmps
;
320 jit::vector
<Venv::LabelPatch
>& jccs
;
321 jit::vector
<Venv::LabelPatch
>& catches
;
324 ///////////////////////////////////////////////////////////////////////////////
326 * Prepare a binary op that is not commutative.
328 * s0 must be a different register than s1 so we don't clobber it.
330 template<class X64Asm
>
331 template<class Inst
> void Vgen
<X64Asm
>::noncommute(Inst
& i
) {
332 assertx(i
.s1
== i
.d
|| i
.s0
!= i
.d
); // do not clobber s0
337 * Prepare a binary op that is commutative.
339 * Swap operands if the dest is s0.
341 template<class X64Asm
>
342 template<class Inst
> void Vgen
<X64Asm
>::commuteSF(Inst
& i
) {
343 if (i
.s1
!= i
.d
&& i
.s0
== i
.d
) {
344 i
= Inst
{i
.s1
, i
.s0
, i
.d
, i
.sf
};
350 template<class X64Asm
>
351 template<class Inst
> void Vgen
<X64Asm
>::commute(Inst
& i
) {
352 if (i
.s1
!= i
.d
&& i
.s0
== i
.d
) {
353 i
= Inst
{i
.s1
, i
.s0
, i
.d
};
359 ///////////////////////////////////////////////////////////////////////////////
362 * Returns true iff the status flags necessary to take a j<a> imply that a j<b>
363 * will also be taken.
365 bool ccImplies(ConditionCode a
, ConditionCode b
) {
366 if (a
== b
) return true;
370 case CC_O
: case CC_NO
:
371 case CC_AE
: case CC_BE
:
373 case CC_S
: case CC_NS
:
374 case CC_P
: case CC_NP
:
375 case CC_GE
: case CC_LE
:
378 case CC_B
: return b
== CC_BE
;
379 case CC_E
: return b
== CC_BE
|| b
== CC_LE
;
380 case CC_A
: return b
== CC_AE
|| b
== CC_NE
;
381 case CC_L
: return b
== CC_LE
;
382 case CC_G
: return b
== CC_NE
|| b
== CC_GE
;
384 always_assert(false);
387 static CodeAddress
toReal(Venv
& env
, CodeAddress a
) {
388 if (env
.text
.main().code
.contains(a
)) {
389 return env
.text
.main().code
.toDestAddress(a
);
391 if (env
.text
.cold().code
.contains(a
)) {
392 return env
.text
.cold().code
.toDestAddress(a
);
394 if (env
.text
.frozen().code
.contains(a
)) {
395 return env
.text
.frozen().code
.toDestAddress(a
);
401 * When two jccs go to the same destination, the cc of the first is compatible
402 * with the cc of the second, and they're within a one-byte offset of each
403 * other, retarget the first to jump to the second. This will allow the
404 * relocator to shrink the first one, and the extra jmp shouldn't matter since
405 * we try to only do this to rarely taken jumps.
407 void retargetJumps(Venv
& env
,
408 const jit::hash_map
<TCA
, jit::vector
<TCA
>>& jccs
) {
409 jit::hash_set
<TCA
> retargeted
;
410 for (auto& pair
: jccs
) {
411 auto const& jmps
= pair
.second
;
412 if (jmps
.size() < 2) continue;
414 for (size_t i
= 0; i
< jmps
.size(); ++i
) {
415 DecodedInstruction
di(toReal(env
, jmps
[i
]), jmps
[i
]);
416 // Don't bother if the jump is already a short jump.
417 if (di
.size() != 6) continue;
419 for (size_t j
= jmps
.size() - 1; j
> i
; --j
) {
420 auto const delta
= jmps
[j
] - jmps
[i
] + 2;
421 // Backwards jumps are probably not guards, and don't retarget to a
422 // dest that's more than a one-byte offset away.
423 if (delta
< 0 || !deltaFits(delta
, sz::byte
)) continue;
425 DecodedInstruction
dj(toReal(env
, jmps
[j
]), jmps
[j
]);
426 if (!ccImplies(di
.jccCondCode(), dj
.jccCondCode())) continue;
428 di
.setPicAddress(jmps
[j
]);
429 retargeted
.insert(jmps
[i
]);
431 // We might've converted a smashable jump to a regular in-unit jump, so
432 // remove any smashable alignments.
433 auto range
= env
.meta
.alignments
.equal_range(jmps
[i
]);
434 while (range
.first
!= range
.second
) {
435 auto iter
= range
.first
;
438 auto& align
= iter
->second
;
439 if (align
.first
== Alignment::SmashJcc
&&
440 align
.second
== AlignContext::Live
) {
441 env
.meta
.alignments
.erase(iter
);
450 // Finally, remove any retargeted jmps from inProgressTailJumps.
451 if (!retargeted
.empty()) {
452 GrowableVector
<IncomingBranch
> newTailJumps
;
453 for (auto& jmp
: env
.meta
.inProgressTailJumps
) {
454 if (retargeted
.count(jmp
.toSmash()) == 0) {
455 newTailJumps
.push_back(jmp
);
458 env
.meta
.inProgressTailJumps
.swap(newTailJumps
);
460 // If the retarged jumps were smashable, now they aren't anymore, so remove
461 // them from smashableJumpData.
462 for (auto jmp
: retargeted
) {
463 if (env
.meta
.smashableJumpData
.erase(jmp
) > 0) {
464 FTRACE(3, "retargetJumps: removed {} from smashableJumpData\n", jmp
);
469 template<class X64Asm
>
470 void Vgen
<X64Asm
>::patch(Venv
& env
) {
471 for (auto const& p
: env
.jmps
) {
472 assertx(env
.addrs
[p
.target
]);
473 X64Asm::patchJmp(toReal(env
, p
.instr
), p
.instr
, env
.addrs
[p
.target
]);
476 auto const optLevel
= RuntimeOption::EvalJitRetargetJumps
;
477 jit::hash_map
<TCA
, jit::vector
<TCA
>> jccs
;
478 for (auto const& p
: env
.jccs
) {
479 assertx(env
.addrs
[p
.target
]);
480 X64Asm::patchJcc(toReal(env
, p
.instr
), p
.instr
, env
.addrs
[p
.target
]);
482 (optLevel
== 1 && p
.target
>= env
.unit
.blocks
.size())) {
483 jccs
[env
.addrs
[p
.target
]].emplace_back(p
.instr
);
487 if (!jccs
.empty()) retargetJumps(env
, jccs
);
489 for (auto const& p
: env
.leas
) {
490 assertx(env
.vaddrs
[p
.target
]);
491 DecodedInstruction
di(toReal(env
, p
.instr
), p
.instr
);
492 assertx(di
.hasPicOffset());
493 di
.setPicAddress(env
.vaddrs
[p
.target
]);
497 template<class X64Asm
>
498 void Vgen
<X64Asm
>::pad(CodeBlock
& cb
) {
503 ///////////////////////////////////////////////////////////////////////////////
505 template<class X64Asm
>
506 void Vgen
<X64Asm
>::emit(const copy
& i
) {
507 if (i
.s
== i
.d
) return;
509 if (i
.d
.isGP()) { // GP => GP
511 } else { // GP => XMM
512 assertx(i
.d
.isSIMD());
513 // This generates a movq x86 instruction, which zero extends
514 // the 64-bit value in srcReg into a 128-bit XMM register
518 if (i
.d
.isGP()) { // XMM => GP
520 } else { // XMM => XMM
521 assertx(i
.d
.isSIMD());
522 // This copies all 128 bits in XMM,
523 // thus avoiding partial register stalls
529 template<class X64Asm
>
530 void Vgen
<X64Asm
>::emit(const copy2
& i
) {
531 assertx(i
.s0
.isValid() && i
.s1
.isValid() && i
.d0
.isValid() && i
.d1
.isValid());
532 auto s0
= i
.s0
, s1
= i
.s1
, d0
= i
.d0
, d1
= i
.d1
;
538 // could do this in a simplify pass
539 if (s1
!= d1
) a
.movq(s1
, d1
); // save s1 first; d1 != s0
540 if (s0
!= d0
) a
.movq(s0
, d0
);
543 // could do this in a simplify pass
544 if (s0
!= d0
) a
.movq(s0
, d0
);
545 if (s1
!= d1
) a
.movq(s1
, d1
);
549 template<class X64Asm
>
550 void Vgen
<X64Asm
>::emit_simd_imm(int64_t val
, Vreg d
) {
552 a
.pxor(d
, d
); // does not modify flags
554 auto addr
= alloc_literal(env
, val
);
555 a
.movsd(rip
[(intptr_t)addr
], d
);
559 template<class X64Asm
>
560 void Vgen
<X64Asm
>::emit(const ldimmb
& i
) {
561 // ldimmb is for Vconst::Byte, which is treated as unsigned uint8_t
565 a
.movb(static_cast<int8_t>(val
), d8
);
567 emit_simd_imm(val
, i
.d
);
571 template<class X64Asm
>
572 void Vgen
<X64Asm
>::emit(const ldimml
& i
) {
573 // ldimml is for Vconst::Long, which is treated as unsigned uint32_t
579 emit_simd_imm(uint32_t(val
), i
.d
);
583 template<class X64Asm
>
584 void Vgen
<X64Asm
>::emit(const ldimmq
& i
) {
589 a
.movl(0, d32
); // because emitImmReg tries the xor optimization
591 a
.emitImmReg(i
.s
, i
.d
);
594 emit_simd_imm(val
, i
.d
);
598 template<class X64Asm
>
599 void Vgen
<X64Asm
>::emit(const load
& i
) {
600 auto mref
= i
.s
.mr();
605 assertx(i
.d
.isSIMD());
610 template<class X64Asm
>
611 void Vgen
<X64Asm
>::emit(const store
& i
) {
612 auto const mref
= i
.d
.mr();
617 assertx(i
.s
.isSIMD());
622 ///////////////////////////////////////////////////////////////////////////////
624 template<class X64Asm
>
625 void Vgen
<X64Asm
>::emit(const mcprep
& i
) {
627 * Initially, we set the cache to hold (addr << 1) | 1 (where `addr' is the
628 * address of the movq) so that we can find the movq from the handler.
630 * We set the low bit for two reasons: the Class* will never be a valid
631 * Class*, so we'll always miss the inline check before it's smashed, and
632 * MethodCache::handleStaticCall can tell it's not been smashed yet
634 auto const mov_addr
= emitSmashableMovq(a
.code(), env
.meta
, 0, r64(i
.d
));
635 auto const imm
= reinterpret_cast<uint64_t>(mov_addr
);
636 smashMovq(a
.toDestAddress(mov_addr
), (imm
<< 1) | 1);
638 env
.meta
.addressImmediates
.insert(reinterpret_cast<TCA
>(~imm
));
641 ///////////////////////////////////////////////////////////////////////////////
643 template<class X64Asm
>
644 void Vgen
<X64Asm
>::emit(const call
& i
) {
645 if (a
.jmpDeltaFits(i
.target
)) {
648 // can't do a near call; store address in data section.
649 // call by loading the address using rip-relative addressing. This
650 // assumes the data section is near the current code section. Since
651 // this sequence is directly in-line, rip-relative like this is
652 // more compact than loading a 64-bit immediate.
653 auto addr
= alloc_literal(env
, (uint64_t)i
.target
);
654 a
.call(rip
[(intptr_t)addr
]);
657 *i
.watch
= a
.frontier();
658 env
.meta
.watchpoints
.push_back(i
.watch
);
662 template<class X64Asm
>
663 void Vgen
<X64Asm
>::emit(const calls
& i
) {
664 emitSmashableCall(a
.code(), env
.meta
, i
.target
);
667 ///////////////////////////////////////////////////////////////////////////////
669 template<class X64Asm
>
670 void Vgen
<X64Asm
>::emit(const stubret
& i
) {
679 template<class X64Asm
>
680 void Vgen
<X64Asm
>::emit(const callstub
& i
) {
681 emit(call
{i
.target
, i
.args
});
684 template<class X64Asm
>
685 void Vgen
<X64Asm
>::emit(const callfaststub
& i
) {
686 emit(call
{i
.target
, i
.args
});
687 emit(syncpoint
{i
.fix
});
690 template<class X64Asm
>
691 void Vgen
<X64Asm
>::emit(const tailcallstub
& i
) {
693 emit(jmpi
{i
.target
, i
.args
});
696 ///////////////////////////////////////////////////////////////////////////////
698 template<class X64Asm
>
699 void Vgen
<X64Asm
>::emit(const phpret
& i
) {
700 a
.push(i
.fp
[AROFF(m_savedRip
)]);
702 a
.loadq(i
.fp
[AROFF(m_sfp
)], i
.d
);
707 template<class X64Asm
>
708 void Vgen
<X64Asm
>::emit(const tailcallphp
& i
) {
709 emit(pushm
{i
.fp
[AROFF(m_savedRip
)]});
710 emit(jmpr
{i
.target
, i
.args
});
713 template<class X64Asm
>
714 void Vgen
<X64Asm
>::emit(const callunpack
& i
) {
715 emit(call
{i
.target
, i
.args
});
718 template<class X64Asm
>
719 void Vgen
<X64Asm
>::emit(const contenter
& i
) {
721 Reg64 fp
= i
.fp
, target
= i
.target
;
725 a
.pop(fp
[AROFF(m_savedRip
)]);
730 // m_savedRip will point here.
731 emit(unwind
{{i
.targets
[0], i
.targets
[1]}});
734 ///////////////////////////////////////////////////////////////////////////////
735 template<class X64Asm
>
736 void Vgen
<X64Asm
>::emit(const calltc
& i
) {
738 a
.push(i
.fp
[AROFF(m_savedRip
)]);
744 assertx(!i
.args
.contains(reg::rax
));
745 a
.pop(reg::rax
); // unused
749 ///////////////////////////////////////////////////////////////////////////////
751 template<class X64Asm
>
752 void Vgen
<X64Asm
>::emit(const nothrow
& /*i*/) {
753 env
.meta
.catches
.emplace_back(a
.frontier(), nullptr);
754 env
.record_inline_stack(a
.frontier());
757 template<class X64Asm
>
758 void Vgen
<X64Asm
>::emit(const syncpoint
& i
) {
759 FTRACE(5, "IR recordSyncPoint: {} {} {}\n", a
.frontier(),
760 i
.fix
.pcOffset
, i
.fix
.spOffset
);
761 env
.meta
.fixups
.emplace_back(a
.frontier(), i
.fix
);
762 env
.record_inline_stack(a
.frontier());
765 template<class X64Asm
>
766 void Vgen
<X64Asm
>::emit(const unwind
& i
) {
767 catches
.push_back({a
.frontier(), i
.targets
[1]});
768 env
.record_inline_stack(a
.frontier());
769 emit(jmp
{i
.targets
[0]});
772 ///////////////////////////////////////////////////////////////////////////////
774 template<class X64Asm
>
775 void Vgen
<X64Asm
>::emit(andqi i
) {
776 if (magFits(i
.s0
.q(), sz::dword
)) {
777 emit(andli
{int32_t(i
.s0
.q()), Reg32(i
.s1
), Reg32(i
.d
), i
.sf
});
785 template<class X64Asm
>
786 void Vgen
<X64Asm
>::emit(const addlim
& i
) {
787 auto mref
= i
.m
.mr();
788 a
.prefix(mref
).addl(i
.s0
, mref
);
791 template<typename X64Asm
>
792 void Vgen
<X64Asm
>::emit(const addqmr
& i
) {
794 auto const mref
= i
.m
.mr();
795 a
.prefix(mref
).addq(mref
, i
.d
);
798 template<typename X64Asm
>
799 void Vgen
<X64Asm
>::emit(const addqrm
& i
) {
800 auto const mref
= i
.m
.mr();
801 a
.prefix(mref
).addq(i
.s1
, mref
);
804 template<class X64Asm
>
805 void Vgen
<X64Asm
>::emit(const addqim
& i
) {
806 auto mref
= i
.m
.mr();
807 a
.prefix(mref
).addq(i
.s0
, mref
);
810 template<class X64Asm
>
811 void Vgen
<X64Asm
>::emit(const cloadq
& i
) {
813 always_assert(!m
.index
.isValid()); // not supported, but could be later.
816 // We can't move f over d or we'll clobber the Vptr we need to load from.
817 // Since cload does the load unconditionally anyway, we can just load and
820 a
.cmov_reg64_reg64(ccNegate(i
.cc
), i
.f
, i
.d
);
825 a
.cload_reg64_disp_reg64(i
.cc
, m
.base
, m
.disp
, i
.d
);
828 // add s0 s1 d => mov s1->d; d += s0
829 // cmov cc s d => if cc { mov s->d }
830 template<class X64Asm
>
832 void Vgen
<X64Asm
>::emit_cmov(const cmov
& i
) {
833 if (i
.f
!= i
.d
&& i
.t
== i
.d
) {
834 // negate the condition and swap t/f operands so we dont clobber i.t
835 return emit(cmov
{ccNegate(i
.cc
), i
.sf
, i
.t
, i
.f
, i
.d
});
839 a
.cmov_reg64_reg64(i
.cc
, r64(i
.t
), r64(i
.d
));
842 template<class X64Asm
>
843 void Vgen
<X64Asm
>::emit(const cvtsi2sd
& i
) {
845 a
.cvtsi2sd(i
.s
, i
.d
);
848 template<class X64Asm
>
849 void Vgen
<X64Asm
>::emit(const cvtsi2sdm
& i
) {
851 a
.cvtsi2sd(i
.s
, i
.d
);
854 template<class X64Asm
>
855 void Vgen
<X64Asm
>::emit(const jcc
& i
) {
856 if (i
.targets
[1] != i
.targets
[0]) {
857 if (next
== i
.targets
[1]) {
858 return emit(jcc
{ccNegate(i
.cc
), i
.sf
, {i
.targets
[1], i
.targets
[0]}});
860 auto taken
= i
.targets
[1];
861 jccs
.push_back({a
.frontier(), taken
});
862 a
.jcc(i
.cc
, a
.frontier());
864 emit(jmp
{i
.targets
[0]});
867 template<class X64Asm
>
868 void Vgen
<X64Asm
>::emit(const jcci
& i
) {
869 a
.jcc(i
.cc
, i
.taken
);
873 template<class X64Asm
>
874 void Vgen
<X64Asm
>::emit(const jmp
& i
) {
875 if (next
== i
.target
) return;
876 jmps
.push_back({a
.frontier(), i
.target
});
880 template<class X64Asm
>
881 void Vgen
<X64Asm
>::emit(const jmpi
& i
) {
882 if (a
.jmpDeltaFits(i
.target
)) {
885 // can't do a near jmp - use rip-relative addressing
886 auto addr
= alloc_literal(env
, (uint64_t)i
.target
);
887 a
.jmp(rip
[(intptr_t)addr
]);
891 template<class X64Asm
>
892 void Vgen
<X64Asm
>::emit(const lea
& i
) {
893 // could do this in a simplify pass
894 if (i
.s
.disp
== 0 && i
.s
.base
.isValid() && !i
.s
.index
.isValid()) {
895 emit(copy
{i
.s
.base
, i
.d
});
901 template<class X64Asm
>
902 void Vgen
<X64Asm
>::emit(const leav
& i
) {
903 auto const addr
= a
.frontier();
904 emit(leap
{reg::rip
[0xdeadbeef], i
.d
});
905 env
.leas
.push_back({addr
, i
.s
});
908 template<class X64Asm
>
909 void Vgen
<X64Asm
>::emit(const storebi
& i
) {
910 auto mref
= i
.m
.mr();
911 a
.prefix(mref
).storeb(i
.s
, mref
);
914 template<class X64Asm
>
915 void Vgen
<X64Asm
>::emit(const storeqi
& i
) {
916 auto mref
= i
.m
.mr();
917 a
.prefix(mref
).storeq(i
.s
, mref
);
920 template<class VgenImpl
, typename Inst
>
921 bool testimHelper(VgenImpl
& env
, const Inst
& i
, uint64_t mask
) {
922 // If there's only 1 byte of meaningful bits in the mask, we can adjust the
923 // pointer offset and use testbim instead.
925 while (mask
> 0xff && !(mask
& 0xff)) {
930 if (mask
> 0xff) return false;
932 env
.emit(testbim
{int8_t(mask
), i
.s1
+ off
, i
.sf
});
936 template<class X64Asm
>
937 void Vgen
<X64Asm
>::emit(const testwi
& i
) {
938 if (i
.s0
.w() == -1) {
939 return emit(testw
{i
.s1
, i
.s1
, i
.sf
});
944 template<class X64Asm
>
945 void Vgen
<X64Asm
>::Vgen::emit(const testwim
& i
) {
946 if (testimHelper(*this, i
, i
.s0
.w())) return;
950 template<class X64Asm
>
951 void Vgen
<X64Asm
>::Vgen::emit(const testlim
& i
) {
952 if (testimHelper(*this, i
, i
.s0
.l())) return;
956 template<class X64Asm
>
957 void Vgen
<X64Asm
>::Vgen::emit(const testli
& i
) {
958 if (i
.s0
.l() == -1) {
959 return emit(testl
{i
.s1
, i
.s1
, i
.sf
});
964 template<class X64Asm
>
965 void Vgen
<X64Asm
>::emit(const testqi
& i
) {
966 auto const imm
= i
.s0
.q();
967 if (magFits(imm
, sz::byte
)) {
968 a
.testb(int8_t(imm
), rbyte(i
.s1
));
969 } else if (magFits(imm
, sz::dword
)) {
970 emit(testli
{int32_t(imm
), Reg32(i
.s1
), i
.sf
});
971 } else if (imm
== -1) {
972 emit(testq
{i
.s1
, i
.s1
, i
.sf
});
978 template<class X64Asm
>
979 void Vgen
<X64Asm
>::emit(const testqim
& i
) {
980 if (testimHelper(*this, i
, i
.s0
.q())) return;
981 if (magFits(i
.s0
.q(), sz::dword
)) {
982 // For an unsigned 32 bit immediate, we can get the same results
983 // by emitting a testlim.
984 emit(testlim
{int32_t(i
.s0
.q()), i
.s1
, i
.sf
});
990 template<class X64Asm
>
991 void Vgen
<X64Asm
>::emit(const trap
& i
) {
992 env
.meta
.trapReasons
.emplace_back(a
.frontier(), i
.reason
);
996 template<class X64Asm
>
997 void Vgen
<X64Asm
>::emit(xorq i
) {
999 // 32-bit xor{s, s, d} zeroes the upper bits of `d'.
1000 return emit(xorl
{r32(i
.s0
), r32(i
.s1
), r32(i
.d
), i
.sf
});
1006 ///////////////////////////////////////////////////////////////////////////////
1008 template<typename Lower
>
1009 void lower_impl(Vunit
& unit
, Vlabel b
, size_t i
, Lower lower
) {
1010 vmodify(unit
, b
, i
, [&] (Vout
& v
) { lower(v
); return 1; });
1013 template <typename Inst
>
1014 void lower(Vunit
& /*unit*/, Inst
& /*inst*/, Vlabel
/*b*/, size_t /*i*/) {}
1016 ///////////////////////////////////////////////////////////////////////////////
1018 void lower(Vunit
& unit
, popp
& inst
, Vlabel b
, size_t i
) {
1019 lower_impl(unit
, b
, i
, [&] (Vout
& v
) {
1025 void lower(Vunit
& unit
, poppm
& inst
, Vlabel b
, size_t i
) {
1026 lower_impl(unit
, b
, i
, [&] (Vout
& v
) {
1032 void lower(Vunit
& unit
, pushp
& inst
, Vlabel b
, size_t i
) {
1033 lower_impl(unit
, b
, i
, [&] (Vout
& v
) {
1039 void lower(Vunit
& unit
, pushpm
& inst
, Vlabel b
, size_t i
) {
1040 lower_impl(unit
, b
, i
, [&] (Vout
& v
) {
1041 v
<< pushm
{inst
.s0
};
1042 v
<< pushm
{inst
.s1
};
1046 ///////////////////////////////////////////////////////////////////////////////
1048 void lower(Vunit
& unit
, stublogue
& inst
, Vlabel b
, size_t i
) {
1049 if (inst
.saveframe
) {
1050 unit
.blocks
[b
].code
[i
] = push
{rvmfp()};
1052 unit
.blocks
[b
].code
[i
] = lea
{reg::rsp
[-8], reg::rsp
};
1056 void lower(Vunit
& unit
, stubunwind
& /*inst*/, Vlabel b
, size_t i
) {
1057 unit
.blocks
[b
].code
[i
] = lea
{reg::rsp
[16], reg::rsp
};
1060 void lower(Vunit
& unit
, stubtophp
& /*inst*/, Vlabel b
, size_t i
) {
1061 unit
.blocks
[b
].code
[i
] = lea
{reg::rsp
[16], reg::rsp
};
1064 void lower(Vunit
& unit
, loadstubret
& inst
, Vlabel b
, size_t i
) {
1065 unit
.blocks
[b
].code
[i
] = load
{reg::rsp
[8], inst
.d
};
1068 void lower(Vunit
& unit
, phplogue
& inst
, Vlabel b
, size_t i
) {
1069 unit
.blocks
[b
].code
[i
] = popm
{inst
.fp
[AROFF(m_savedRip
)]};
1072 void lower(Vunit
& unit
, resumetc
& inst
, Vlabel b
, size_t i
) {
1073 lower_impl(unit
, b
, i
, [&] (Vout
& v
) {
1074 v
<< callr
{inst
.target
, inst
.args
};
1075 v
<< jmpi
{inst
.exittc
};
1079 ///////////////////////////////////////////////////////////////////////////////
1081 void lower(Vunit
& unit
, sar
& inst
, Vlabel b
, size_t i
) {
1082 lower_impl(unit
, b
, i
, [&] (Vout
& v
) {
1083 v
<< copy
{inst
.s0
, rcx
};
1084 v
<< sarq
{inst
.s1
, inst
.d
, inst
.sf
};
1088 void lower(Vunit
& unit
, shl
& inst
, Vlabel b
, size_t i
) {
1089 lower_impl(unit
, b
, i
, [&] (Vout
& v
) {
1090 v
<< copy
{inst
.s0
, rcx
};
1091 v
<< shlq
{inst
.s1
, inst
.d
, inst
.sf
};
1095 void lower(Vunit
& unit
, shr
& inst
, Vlabel b
, size_t i
) {
1096 lower_impl(unit
, b
, i
, [&] (Vout
& v
) {
1097 v
<< copy
{inst
.s0
, rcx
};
1098 v
<< shrq
{inst
.s1
, inst
.d
, inst
.sf
};
1102 void lower(Vunit
& unit
, srem
& inst
, Vlabel b
, size_t i
) {
1103 lower_impl(unit
, b
, i
, [&] (Vout
& v
) {
1104 v
<< copy
{inst
.s0
, rax
};
1105 v
<< cqo
{}; // sign-extend rax => rdx:rax
1106 v
<< idiv
{inst
.s1
, v
.makeReg()}; // rdx:rax/divisor => quot:rax, rem:rdx
1107 v
<< copy
{rdx
, inst
.d
};
1111 void lower(Vunit
& unit
, divint
& inst
, Vlabel b
, size_t i
) {
1112 lower_impl(unit
, b
, i
, [&] (Vout
& v
) {
1113 v
<< copy
{inst
.s0
, rax
};
1114 v
<< cqo
{}; // sign-extend rax => rdx:rax
1115 v
<< idiv
{inst
.s1
, v
.makeReg()}; // rdx:rax/divisor => quot:rax, rem:rdx
1116 v
<< copy
{rax
, inst
.d
};
1120 ///////////////////////////////////////////////////////////////////////////////
1122 void lower(Vunit
& unit
, movtqb
& inst
, Vlabel b
, size_t i
) {
1123 unit
.blocks
[b
].code
[i
] = copy
{inst
.s
, inst
.d
};
1125 void lower(Vunit
& unit
, movtdb
& inst
, Vlabel b
, size_t i
) {
1126 unit
.blocks
[b
].code
[i
] = copy
{inst
.s
, inst
.d
};
1128 void lower(Vunit
& unit
, movtdq
& inst
, Vlabel b
, size_t i
) {
1129 unit
.blocks
[b
].code
[i
] = copy
{inst
.s
, inst
.d
};
1131 void lower(Vunit
& unit
, movtqw
& inst
, Vlabel b
, size_t i
) {
1132 unit
.blocks
[b
].code
[i
] = copy
{inst
.s
, inst
.d
};
1134 void lower(Vunit
& unit
, movtql
& inst
, Vlabel b
, size_t i
) {
1135 unit
.blocks
[b
].code
[i
] = copy
{inst
.s
, inst
.d
};
1138 ///////////////////////////////////////////////////////////////////////////////
1141 * Lower a few abstractions to facilitate straightforward x64 codegen.
1143 void lowerForX64(Vunit
& unit
) {
1144 vasm_lower(unit
, [&](const VLS
& /*env*/, Vinstr
& inst
, Vlabel b
, size_t i
) {
1146 #define O(name, ...) \
1147 case Vinstr::name: \
1148 lower(unit, inst.name##_, b, i); \
1157 ///////////////////////////////////////////////////////////////////////////////
1160 void optimizeX64(Vunit
& unit
, const Abi
& abi
, bool regalloc
) {
1161 Timer
timer(Timer::vasm_optimize
, unit
.log_entry
);
1163 auto const doPass
= [&] (const char* name
, auto fun
) {
1164 rqtrace::EventGuard trace
{name
};
1168 doPass("VOPT_NOP", removeTrivialNops
);
1169 doPass("VOPT_PHI", optimizePhis
);
1170 doPass("VOPT_BRANCH", fuseBranches
);
1171 doPass("VOPT_JMP", [] (Vunit
& u
) { optimizeJmps(u
); });
1172 doPass("VOPT_EXIT", [] (Vunit
& u
) { optimizeExits(u
); });
1174 assertx(checkWidths(unit
));
1176 if (unit
.context
&& !isProfiling(unit
.context
->kind
) && abi
.canSpill
&&
1177 RuntimeOption::EvalProfBranchSampleFreq
> 0) {
1178 // Even when branch profiling is on, we still only want to profile
1179 // non-profiling translations of PHP functions. We also require that we
1180 // can spill, so that we can generate arbitrary profiling code, and also to
1181 // ensure we don't profile unique stubs and such.
1182 doPass("VOPT_PROF_BRANCH", profile_branches
);
1185 doPass("VOPT_X64", lowerForX64
);
1186 doPass("VOPT_SIMPLIFY", simplify
);
1187 doPass("VOPT_X64", lowerForX64
);
1189 if (!unit
.constToReg
.empty()) {
1190 doPass("VOPT_FOLD_IMM", foldImms
<x64::ImmFolder
>);
1193 doPass("VOPT_COPY", [&] (Vunit
& u
) { optimizeCopies(u
, abi
); });
1195 if (unit
.needsRegAlloc()) {
1196 doPass("VOPT_DCE", [] (Vunit
& u
) { removeDeadCode(u
); });
1197 doPass("VOPT_JMP", [] (Vunit
& u
) { optimizeJmps(u
); });
1198 doPass("VOPT_DCE", [] (Vunit
& u
) { removeDeadCode(u
); });
1200 if (RuntimeOption::EvalUseGraphColor
&&
1202 (unit
.context
->kind
== TransKind::Optimize
||
1203 unit
.context
->kind
== TransKind::OptPrologue
)) {
1204 rqtrace::EventGuard trace
{"VOPT_GRAPH_COLOR"};
1205 allocateRegistersWithGraphColor(unit
, abi
);
1207 rqtrace::EventGuard trace
{"VOPT_XLS"};
1208 allocateRegistersWithXLS(unit
, abi
);
1210 doPass("VOPT_SF_PEEPHOLES", [&] (Vunit
& u
) { sfPeepholes(u
, abi
); });
1211 doPass("VOPT_POST_RA_SIMPLIFY", postRASimplify
);
1214 if (unit
.blocks
.size() > 1) {
1215 doPass("VOPT_JMP", [] (Vunit
& u
) { optimizeJmps(u
); });
1219 void emitX64(Vunit
& unit
, Vtext
& text
, CGMeta
& fixups
,
1222 if (RuntimeOption::EvalUseXedAssembler
) {
1223 return vasm_emit
<Vgen
<XedAssembler
>>(unit
, text
, fixups
, asmInfo
);
1226 vasm_emit
<Vgen
<X64Assembler
>>(unit
, text
, fixups
, asmInfo
);
1229 ///////////////////////////////////////////////////////////////////////////////