2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #include "hphp/runtime/vm/jit/vasm-emit.h"
19 #include "hphp/runtime/base/runtime-option.h"
20 #include "hphp/runtime/base/tracing.h"
22 #include "hphp/runtime/vm/jit/abi-x64.h"
23 #include "hphp/runtime/vm/jit/block.h"
24 #include "hphp/runtime/vm/jit/code-gen-helpers.h"
25 #include "hphp/runtime/vm/jit/print.h"
26 #include "hphp/runtime/vm/jit/prof-data.h"
27 #include "hphp/runtime/vm/jit/service-requests.h"
28 #include "hphp/runtime/vm/jit/smashable-instr-x64.h"
29 #include "hphp/runtime/vm/jit/target-cache.h"
30 #include "hphp/runtime/vm/jit/timer.h"
31 #include "hphp/runtime/vm/jit/vasm.h"
32 #include "hphp/runtime/vm/jit/vasm-block-counters.h"
33 #include "hphp/runtime/vm/jit/vasm-instr.h"
34 #include "hphp/runtime/vm/jit/vasm-internal.h"
35 #include "hphp/runtime/vm/jit/vasm-lower.h"
36 #include "hphp/runtime/vm/jit/vasm-print.h"
37 #include "hphp/runtime/vm/jit/vasm-prof.h"
38 #include "hphp/runtime/vm/jit/vasm-unit.h"
39 #include "hphp/runtime/vm/jit/vasm-util.h"
40 #include "hphp/runtime/vm/jit/vasm-visit.h"
47 namespace HPHP
{ namespace jit
{
48 ///////////////////////////////////////////////////////////////////////////////
53 namespace x64
{ struct ImmFolder
; }
56 ///////////////////////////////////////////////////////////////////////////////
58 static_assert(folly::kIsLittleEndian
,
59 "Code contains little-endian specific optimizations.");
61 template<class X64Asm
>
63 explicit Vgen(Venv
& env
)
66 , current(env
.current
)
70 , catches(env
.catches
)
73 static void emitVeneers(Venv
& env
) {}
74 static void handleLiterals(Venv
& env
) {}
75 static void retargetBinds(Venv
& env
);
76 static void patch(Venv
& env
);
77 static void pad(CodeBlock
& cb
);
79 /////////////////////////////////////////////////////////////////////////////
81 template<class Inst
> void emit(const Inst
& i
) {
82 always_assert_flog(false, "unimplemented instruction: {} in B{}\n",
83 vinst_names
[Vinstr(i
).op
], size_t(current
));
87 void emit(const prefetch
& i
) { a
.prefetch(i
.m
.mr()); }
88 void emit(const copy
& i
);
89 void emit(const copy2
& i
);
90 void emit(const debugtrap
& /*i*/) { a
.int3(); }
91 void emit(const fallthru
&);
92 void emit(const ldimmb
& i
);
93 void emit(const ldimml
& i
);
94 void emit(const ldimmq
& i
);
95 void emit(const ldundefq
& /*i*/) {}
96 void emit(const load
& i
);
97 void emit(const store
& i
);
98 void emit(const mcprep
& i
);
100 // native function abi
101 void emit(const call
& i
);
102 void emit(const callm
& i
) { a
.prefix(i
.target
.mr()).call(i
.target
); }
103 void emit(const callr
& i
) { a
.call(i
.target
); }
104 void emit(const calls
& i
);
105 void emit(const ret
& /*i*/) { a
.ret(); }
108 void emit(const stubret
& i
);
109 void emit(const callstub
& i
);
110 void emit(const callfaststub
& i
);
111 void emit(const tailcallstub
& i
);
112 void emit(const tailcallstubr
& i
);
115 void emit(const callphp
& i
) {
116 emit(call
{i
.target
, i
.args
});
117 setCallFuncId(env
, a
.frontier());
119 void emit(const callphpr
& i
) {
120 emit(callr
{i
.target
, i
.args
});
121 setCallFuncId(env
, a
.frontier());
123 void emit(const phpret
& i
);
124 void emit(const contenter
& i
);
127 void emit(const inittc
& /*i*/) {}
128 void emit(const leavetc
&) { a
.ret(); }
131 void emit(const landingpad
& /*i*/) {}
132 void emit(const nothrow
& i
);
133 void emit(const syncpoint
& i
);
134 void emit(const unwind
& i
);
137 void emit(absdbl i
) { unary(i
); a
.psllq(1, i
.d
); a
.psrlq(1, i
.d
); }
138 void emit(andb i
) { commuteSF(i
); a
.andb(i
.s0
, i
.d
); }
139 void emit(andbi i
) { binary(i
); a
.andb(i
.s0
, i
.d
); }
140 void emit(const andbim
& i
) { a
.prefix(i
.m
.mr()).andb(i
.s
, i
.m
); }
141 void emit(andw i
) { commuteSF(i
); a
.andw(i
.s0
, i
.d
); }
142 void emit(andwi i
) { binary(i
); a
.andw(i
.s0
, i
.d
); }
143 void emit(andl i
) { commuteSF(i
); a
.andl(i
.s0
, i
.d
); }
144 void emit(andli i
) { binary(i
); a
.andl(i
.s0
, i
.d
); }
145 void emit(andq i
) { commuteSF(i
); a
.andq(i
.s0
, i
.d
); }
147 void emit(const addwm
& i
) { a
.prefix(i
.m
.mr()).addw(i
.s0
, i
.m
); }
148 void emit(addl i
) { commuteSF(i
); a
.addl(i
.s0
, i
.d
); }
149 void emit(addli i
) { binary(i
); a
.addl(i
.s0
, i
.d
); }
150 void emit(const addlm
& i
) { a
.prefix(i
.m
.mr()).addl(i
.s0
, i
.m
); }
151 void emit(const addlim
& i
);
152 void emit(addq i
) { commuteSF(i
); a
.addq(i
.s0
, i
.d
); }
153 void emit(addqi i
) { binary(i
); a
.addq(i
.s0
, i
.d
); }
154 void emit(const addqmr
& i
);
155 void emit(const addqrm
& i
);
156 void emit(const addqim
& i
);
157 void emit(addsd i
) { commute(i
); a
.addsd(i
.s0
, i
.d
); }
158 void emit(const btrq
& i
) { binary(i
); a
.btrq(i
.s0
, i
.d
); }
159 void emit(const cloadq
& i
);
160 template<class cmov
> void emit_cmov(const cmov
& i
);
161 void emit(const cmovb
& i
) { emit_cmov(i
); }
162 void emit(const cmovw
& i
) { emit_cmov(i
); }
163 void emit(const cmovl
& i
) { emit_cmov(i
); }
164 void emit(const cmovq
& i
) { emit_cmov(i
); }
165 void emit(const cmpb
& i
) { a
.cmpb(i
.s0
, i
.s1
); }
166 void emit(const cmpbi
& i
) { a
.cmpb(i
.s0
, i
.s1
); }
167 void emit(const cmpbim
& i
) { a
.prefix(i
.s1
.mr()).cmpb(i
.s0
, i
.s1
); }
168 void emit(const cmpbm
& i
) { a
.prefix(i
.s1
.mr()).cmpb(i
.s0
, i
.s1
); }
169 void emit(const cmpw
& i
) { a
.cmpw(i
.s0
, i
.s1
); }
170 void emit(const cmpwi
& i
) { a
.cmpw(i
.s0
, i
.s1
); }
171 void emit(const cmpwim
& i
) { a
.prefix(i
.s1
.mr()).cmpw(i
.s0
, i
.s1
); }
172 void emit(const cmpwm
& i
) { a
.prefix(i
.s1
.mr()).cmpw(i
.s0
, i
.s1
); }
173 void emit(const cmpl
& i
) { a
.cmpl(i
.s0
, i
.s1
); }
174 void emit(const cmpli
& i
) { a
.cmpl(i
.s0
, i
.s1
); }
175 void emit(const cmplim
& i
) { a
.prefix(i
.s1
.mr()).cmpl(i
.s0
, i
.s1
); }
176 void emit(const cmplm
& i
) { a
.prefix(i
.s1
.mr()).cmpl(i
.s0
, i
.s1
); }
177 void emit(const cmpq
& i
) { a
.cmpq(i
.s0
, i
.s1
); }
178 void emit(const cmpqi
& i
) { a
.cmpq(i
.s0
, i
.s1
); }
179 void emit(const cmpqim
& i
) { a
.prefix(i
.s1
.mr()).cmpq(i
.s0
, i
.s1
); }
180 void emit(const cmpqm
& i
) { a
.prefix(i
.s1
.mr()).cmpq(i
.s0
, i
.s1
); }
181 void emit(cmpsd i
) { noncommute(i
); a
.cmpsd(i
.s0
, i
.d
, i
.pred
); }
182 void emit(const cqo
& /*i*/) { a
.cqo(); }
183 void emit(const cvttsd2siq
& i
) { a
.cvttsd2siq(i
.s
, i
.d
); }
184 void emit(const cvtsi2sd
& i
);
185 void emit(const cvtsi2sdm
& i
);
186 void emit(decl i
) { unary(i
); a
.decl(i
.d
); }
187 void emit(const declm
& i
) { a
.prefix(i
.m
.mr()).decl(i
.m
); }
188 void emit(decq i
) { unary(i
); a
.decq(i
.d
); }
189 void emit(const decqm
& i
) { a
.prefix(i
.m
.mr()).decq(i
.m
); }
190 void emit(const decqmlock
& i
) { a
.prefix(i
.m
.mr()).decqlock(i
.m
); }
191 void emit(const decqmlocknosf
&);
192 void emit(divsd i
) { noncommute(i
); a
.divsd(i
.s0
, i
.d
); }
193 void emit(imul i
) { commuteSF(i
); a
.imul(i
.s0
, i
.d
); }
194 void emit(const idiv
& i
) { a
.idiv(i
.s
); }
195 void emit(incl i
) { unary(i
); a
.incl(i
.d
); }
196 void emit(const inclm
& i
) { a
.prefix(i
.m
.mr()).incl(i
.m
); }
197 void emit(incq i
) { unary(i
); a
.incq(i
.d
); }
198 void emit(const incqm
& i
) { a
.prefix(i
.m
.mr()).incq(i
.m
); }
199 void emit(const incwm
& i
) { a
.prefix(i
.m
.mr()).incw(i
.m
); }
200 void emit(const jcc
& i
);
201 void emit(const jcci
& i
);
202 void emit(const jmp
& i
);
203 void emit(const jmpr
& i
) { a
.jmp(i
.target
); }
204 void emit(const jmpm
& i
) { a
.prefix(i
.target
.mr()).jmp(i
.target
); }
205 void emit(const jmpi
& i
);
206 void emit(const ldbindretaddr
& i
);
207 void emit(const lea
& i
);
208 void emit(const leap
& i
) { a
.lea(i
.s
, i
.d
); }
209 void emit(const leav
& i
);
210 void emit(const lead
& i
) { a
.lea(rip
[(intptr_t)i
.s
.get()], i
.d
); }
211 void emit(const loadups
& i
) { a
.prefix(i
.s
.mr()).movups(i
.s
, i
.d
); }
212 void emit(const loadtqb
& i
) { a
.prefix(i
.s
.mr()).loadb(i
.s
, i
.d
); }
213 void emit(const loadb
& i
) { a
.prefix(i
.s
.mr()).loadb(i
.s
, i
.d
); }
214 void emit(const loadw
& i
) { a
.prefix(i
.s
.mr()).loadw(i
.s
, i
.d
); }
215 void emit(const loadtql
& i
) { a
.prefix(i
.s
.mr()).loadl(i
.s
, i
.d
); }
216 void emit(const loadl
& i
) { a
.prefix(i
.s
.mr()).loadl(i
.s
, i
.d
); }
217 void emit(const loadqp
& i
) { a
.loadq(i
.s
, i
.d
); }
218 void emit(const loadqd
& i
) { a
.loadq(rip
[(intptr_t)i
.s
.get()], i
.d
); }
219 void emit(const loadsd
& i
) { a
.prefix(i
.s
.mr()).movsd(i
.s
, i
.d
); }
220 void emit(const loadzbl
& i
) { a
.prefix(i
.s
.mr()).loadzbl(i
.s
, i
.d
); }
221 void emit(const loadzbq
& i
) { a
.prefix(i
.s
.mr()).loadzbl(i
.s
, Reg32(i
.d
)); }
222 void emit(const loadsbq
& i
) { a
.prefix(i
.s
.mr()).loadsbq(i
.s
, i
.d
); }
223 void emit(const loadzwq
& i
) { a
.prefix(i
.s
.mr()).loadzwl(i
.s
, Reg32(i
.d
)); }
224 void emit(const loadzlq
& i
) { a
.prefix(i
.s
.mr()).loadl(i
.s
, Reg32(i
.d
)); }
225 void emit(const movb
& i
) { a
.movb(i
.s
, i
.d
); }
226 void emit(const movl
& i
) { a
.movl(i
.s
, i
.d
); }
227 void emit(const movzbw
& i
) { a
.movzbl(i
.s
, Reg32(i
.d
)); }
228 void emit(const movzbl
& i
) { a
.movzbl(i
.s
, i
.d
); }
229 void emit(const movzbq
& i
) { a
.movzbl(i
.s
, Reg32(i
.d
)); }
230 void emit(const movzwl
& i
) { a
.movzwl(i
.s
, i
.d
); }
231 void emit(const movzwq
& i
) { a
.movzwl(i
.s
, Reg32(i
.d
)); }
232 void emit(const movzlq
& i
) { a
.movl(i
.s
, Reg32(i
.d
)); }
233 void emit(const movsbq
& i
) { a
.movsbq(i
.s
, i
.d
); }
234 void emit(mulsd i
) { commute(i
); a
.mulsd(i
.s0
, i
.d
); }
235 void emit(neg i
) { unary(i
); a
.neg(i
.d
); }
236 void emit(const nop
& /*i*/) { a
.nop(); }
237 void emit(not i
) { unary(i
); a
.not(i
.d
); }
238 void emit(notb i
) { unary(i
); a
.notb(i
.d
); }
239 void emit(orbi i
) { binary(i
); a
.orb(i
.s0
, i
.d
); }
240 void emit(const orbim
& i
) { a
.prefix(i
.m
.mr()).orb(i
.s0
, i
.m
); }
241 void emit(const orwim
& i
) { a
.prefix(i
.m
.mr()).orw(i
.s0
, i
.m
); }
242 void emit(const orlim
& i
) { a
.prefix(i
.m
.mr()).orl(i
.s0
, i
.m
); }
243 void emit(orq i
) { commuteSF(i
); a
.orq(i
.s0
, i
.d
); }
244 void emit(orwi i
) { binary(i
); a
.orw(i
.s0
, i
.d
); }
245 void emit(orli i
) { binary(i
); a
.orl(i
.s0
, i
.d
); }
246 void emit(orqi i
) { binary(i
); a
.orq(i
.s0
, i
.d
); }
247 void emit(const orqim
& i
) { a
.prefix(i
.m
.mr()).orq(i
.s0
, i
.m
); }
248 void emit(const pop
& i
) { a
.pop(i
.d
); }
249 void emit(const popm
& i
) { a
.prefix(i
.d
.mr()).pop(i
.d
); }
250 void emit(const popf
& i
) { assertx(i
.d
== RegSF
{0}); a
.popf(); }
251 void emit(const push
& i
) { a
.push(i
.s
); }
252 void emit(const pushm
& i
) { a
.prefix(i
.s
.mr()).push(i
.s
); }
253 void emit(const pushf
& i
) { assertx(i
.s
== RegSF
{0}); a
.pushf(); }
254 void emit(const roundsd
& i
) { a
.roundsd(i
.dir
, i
.s
, i
.d
); }
255 void emit(const sarq
& i
) { unary(i
); a
.sarq(i
.d
); }
256 void emit(sarqi i
) { binary(i
); a
.sarq(i
.s0
, i
.d
); }
257 void emit(const setcc
& i
) { a
.setcc(i
.cc
, i
.d
); }
258 void emit(shlli i
) { binary(i
); a
.shll(i
.s0
, i
.d
); }
259 void emit(shlq i
) { unary(i
); a
.shlq(i
.d
); }
260 void emit(shrq i
) { unary(i
); a
.shrq(i
.d
); }
261 void emit(shlqi i
) { binary(i
); a
.shlq(i
.s0
, i
.d
); }
262 void emit(shrli i
) { binary(i
); a
.shrl(i
.s0
, i
.d
); }
263 void emit(shrqi i
) { binary(i
); a
.shrq(i
.s0
, i
.d
); }
264 void emit(const sqrtsd
& i
) { a
.sqrtsd(i
.s
, i
.d
); }
265 void emit(const storeups
& i
) { a
.prefix(i
.m
.mr()).movups(i
.s
, i
.m
); }
266 void emit(const storeb
& i
) { a
.prefix(i
.m
.mr()).storeb(i
.s
, i
.m
); }
267 void emit(const storebi
& i
);
268 void emit(const storel
& i
) { a
.prefix(i
.m
.mr()).storel(i
.s
, i
.m
); }
269 void emit(const storeli
& i
) { a
.prefix(i
.m
.mr()).storel(i
.s
, i
.m
); }
270 void emit(const storeqi
& i
);
271 void emit(const storesd
& i
) { a
.prefix(i
.m
.mr()).movsd(i
.s
, i
.m
); }
272 void emit(const storew
& i
) { a
.prefix(i
.m
.mr()).storew(i
.s
, i
.m
); }
273 void emit(const storewi
& i
) { a
.prefix(i
.m
.mr()).storew(i
.s
, i
.m
); }
274 void emit(subl i
) { noncommute(i
); a
.subl(i
.s0
, i
.d
); }
275 void emit(subli i
) { binary(i
); a
.subl(i
.s0
, i
.d
); }
276 void emit(subq i
) { noncommute(i
); a
.subq(i
.s0
, i
.d
); }
277 void emit(subqi i
) { binary(i
); a
.subq(i
.s0
, i
.d
); }
278 void emit(const subqim
& i
);
279 void emit(subsd i
) { noncommute(i
); a
.subsd(i
.s0
, i
.d
); }
280 void emit(const testb
& i
) { a
.testb(i
.s0
, i
.s1
); }
281 void emit(const testbi
& i
) { a
.testb(i
.s0
, i
.s1
); }
282 void emit(const testbm
& i
) { a
.prefix(i
.s1
.mr()).testb(i
.s0
, i
.s1
); }
283 void emit(const testbim
& i
) { a
.prefix(i
.s1
.mr()).testb(i
.s0
, i
.s1
); }
284 void emit(const testw
& i
) { a
.testw(i
.s0
, i
.s1
); }
285 void emit(const testwi
& i
);
286 void emit(const testwm
& i
) { a
.prefix(i
.s1
.mr()).testw(i
.s0
, i
.s1
); }
287 void emit(const testwim
& i
);
288 void emit(const testl
& i
) { a
.testl(i
.s0
, i
.s1
); }
289 void emit(const testli
& i
);
290 void emit(const testlm
& i
) { a
.prefix(i
.s1
.mr()).testl(i
.s0
, i
.s1
); }
291 void emit(const testlim
& i
);
292 void emit(const testq
& i
) { a
.testq(i
.s0
, i
.s1
); }
293 void emit(const testqi
& i
);
294 void emit(const testqm
& i
) { a
.prefix(i
.s1
.mr()).testq(i
.s0
, i
.s1
); }
295 void emit(const testqim
& i
);
296 void emit(const trap
& i
);
297 void emit(const ucomisd
& i
) { a
.ucomisd(i
.s0
, i
.s1
); }
298 void emit(unpcklpd i
) { noncommute(i
); a
.unpcklpd(i
.s0
, i
.d
); }
299 void emit(xorb i
) { commuteSF(i
); a
.xorb(i
.s0
, i
.d
); }
300 void emit(xorbi i
) { binary(i
); a
.xorb(i
.s0
, i
.d
); }
301 void emit(xorw i
) { commuteSF(i
); a
.xorw(i
.s0
, i
.d
); }
302 void emit(xorwi i
) { binary(i
); a
.xorw(i
.s0
, i
.d
); }
303 void emit(xorl i
) { commuteSF(i
); a
.xorl(i
.s0
, i
.d
); }
305 void emit(xorqi i
) { binary(i
); a
.xorq(i
.s0
, i
.d
); }
306 void emit(const conjure
& /*i*/) { always_assert(false); }
307 void emit(const conjureuse
& /*i*/) { always_assert(false); }
308 void emit(const crc32q
& i
);
311 emit(lea
{rax
[8], rax
});
312 emit(lea
{rax
[-8], rax
});
317 void prep(Reg8 s
, Reg8 d
) { if (s
!= d
) a
.movb(s
, d
); }
318 void prep(Reg16 s
, Reg16 d
) { if (s
!= d
) a
.movw(s
, d
); }
319 void prep(Reg32 s
, Reg32 d
) { if (s
!= d
) a
.movl(s
, d
); }
320 void prep(Reg64 s
, Reg64 d
) { if (s
!= d
) a
.movq(s
, d
); }
321 void prep(RegXMM s
, RegXMM d
) { if (s
!= d
) a
.movdqa(s
, d
); }
322 void emit_simd_imm(int64_t, Vreg
);
324 template<class Inst
> void unary(Inst
& i
) { prep(i
.s
, i
.d
); }
325 template<class Inst
> void binary(Inst
& i
) { prep(i
.s1
, i
.d
); }
327 template<class Inst
> void commuteSF(Inst
&);
328 template<class Inst
> void commute(Inst
&);
329 template<class Inst
> void noncommute(Inst
&);
331 CodeBlock
& frozen() { return env
.text
.frozen().code
; }
337 const Vlabel current
;
339 jit::vector
<Venv::LabelPatch
>& jmps
;
340 jit::vector
<Venv::LabelPatch
>& jccs
;
341 jit::vector
<Venv::LabelPatch
>& catches
;
344 ///////////////////////////////////////////////////////////////////////////////
346 * Prepare a binary op that is not commutative.
348 * s0 must be a different register than s1 so we don't clobber it.
350 template<class X64Asm
>
351 template<class Inst
> void Vgen
<X64Asm
>::noncommute(Inst
& i
) {
352 assertx(i
.s1
== i
.d
|| i
.s0
!= i
.d
); // do not clobber s0
357 * Prepare a binary op that is commutative.
359 * Swap operands if the dest is s0.
361 template<class X64Asm
>
362 template<class Inst
> void Vgen
<X64Asm
>::commuteSF(Inst
& i
) {
363 if (i
.s1
!= i
.d
&& i
.s0
== i
.d
) {
364 i
= Inst
{i
.s1
, i
.s0
, i
.d
, i
.sf
};
370 template<class X64Asm
>
371 template<class Inst
> void Vgen
<X64Asm
>::commute(Inst
& i
) {
372 if (i
.s1
!= i
.d
&& i
.s0
== i
.d
) {
373 i
= Inst
{i
.s1
, i
.s0
, i
.d
};
379 ///////////////////////////////////////////////////////////////////////////////
382 * Returns true iff the status flags necessary to take a j<a> imply that a j<b>
383 * will also be taken.
385 bool ccImplies(ConditionCode a
, ConditionCode b
) {
386 if (a
== b
) return true;
390 case CC_O
: case CC_NO
:
391 case CC_AE
: case CC_BE
:
393 case CC_S
: case CC_NS
:
394 case CC_P
: case CC_NP
:
395 case CC_GE
: case CC_LE
:
398 case CC_B
: return b
== CC_BE
;
399 case CC_E
: return b
== CC_BE
|| b
== CC_LE
;
400 case CC_A
: return b
== CC_AE
|| b
== CC_NE
;
401 case CC_L
: return b
== CC_LE
;
402 case CC_G
: return b
== CC_NE
|| b
== CC_GE
;
404 always_assert(false);
408 * When two jccs go to the same destination, the cc of the first is compatible
409 * with the cc of the second, and they're within a one-byte offset of each
410 * other, retarget the first to jump to the second. This will allow the
411 * relocator to shrink the first one, and the extra jmp shouldn't matter since
412 * we try to only do this to rarely taken jumps.
414 template<typename Key
, typename Hash
>
415 jit::hash_set
<TCA
> retargetJumps(
417 const jit::hash_map
<Key
, jit::vector
<TCA
>, Hash
>& jccs
419 jit::hash_set
<TCA
> retargeted
;
420 for (auto& pair
: jccs
) {
421 auto const& jmps
= pair
.second
;
422 if (jmps
.size() < 2) continue;
424 for (size_t i
= 0; i
< jmps
.size(); ++i
) {
425 DecodedInstruction
di(env
.text
.toDestAddress(jmps
[i
]), jmps
[i
]);
426 // Don't bother if the jump is already a short jump.
427 if (di
.size() != 6) continue;
429 for (size_t j
= jmps
.size() - 1; j
> i
; --j
) {
430 auto const delta
= jmps
[j
] - jmps
[i
] + 2;
431 // Backwards jumps are probably not guards, and don't retarget to a
432 // dest that's more than a one-byte offset away.
433 if (delta
< 0 || !deltaFits(delta
, sz::byte
)) continue;
435 DecodedInstruction
dj(env
.text
.toDestAddress(jmps
[j
]), jmps
[j
]);
436 if (!ccImplies(di
.jccCondCode(), dj
.jccCondCode())) continue;
438 di
.setPicAddress(jmps
[j
]);
439 retargeted
.insert(jmps
[i
]);
441 // We might've converted a smashable jump to a regular in-unit jump, so
442 // remove any smashable alignments.
443 auto range
= env
.meta
.alignments
.equal_range(jmps
[i
]);
444 while (range
.first
!= range
.second
) {
445 auto iter
= range
.first
;
448 auto& align
= iter
->second
;
449 if (align
.first
== Alignment::SmashJcc
&&
450 align
.second
== AlignContext::Live
) {
451 env
.meta
.alignments
.erase(iter
);
464 struct SrcKeyBoolTupleHasher
{
465 size_t operator()(std::tuple
<SrcKey
, bool> v
) const {
466 return folly::hash::hash_combine(
467 std::get
<0>(v
).toAtomicInt(),
474 template<class X64Asm
>
475 void Vgen
<X64Asm
>::retargetBinds(Venv
& env
) {
476 if (RuntimeOption::EvalJitRetargetJumps
< 1) return;
478 // The target is unique per the SrcKey and the fallback flag.
480 std::pair
<SrcKey
, bool>,
482 SrcKeyBoolTupleHasher
485 for (auto const& b
: env
.meta
.smashableBinds
) {
486 if (b
.smashable
.type() == IncomingBranch::Tag::JCC
) {
487 binds
[std::make_pair(b
.sk
, b
.fallback
)]
488 .emplace_back(b
.smashable
.toSmash());
492 auto const retargeted
= retargetJumps(env
, std::move(binds
));
493 if (retargeted
.empty()) return;
495 // Finally, remove any retargeted jmps from inProgressTailJumps and
497 GrowableVector
<IncomingBranch
> newTailJumps
;
498 for (auto& jmp
: env
.meta
.inProgressTailJumps
) {
499 if (retargeted
.count(jmp
.toSmash()) == 0) {
500 newTailJumps
.push_back(jmp
);
503 env
.meta
.inProgressTailJumps
.swap(newTailJumps
);
505 decltype(env
.meta
.smashableBinds
) newBinds
;
506 for (auto& bind
: env
.meta
.smashableBinds
) {
507 if (retargeted
.count(bind
.smashable
.toSmash()) == 0) {
508 newBinds
.push_back(bind
);
510 FTRACE(3, "retargetBinds: removed {} from smashableBinds\n",
511 bind
.smashable
.toSmash());
514 env
.meta
.smashableBinds
.swap(newBinds
);
517 template<class X64Asm
>
518 void Vgen
<X64Asm
>::patch(Venv
& env
) {
519 for (auto const& p
: env
.jmps
) {
520 assertx(env
.addrs
[p
.target
]);
522 env
.text
.toDestAddress(p
.instr
), p
.instr
, env
.addrs
[p
.target
]);
525 auto const optLevel
= RuntimeOption::EvalJitRetargetJumps
;
526 jit::hash_map
<TCA
, jit::vector
<TCA
>> jccs
;
527 for (auto const& p
: env
.jccs
) {
528 assertx(env
.addrs
[p
.target
]);
530 env
.text
.toDestAddress(p
.instr
), p
.instr
, env
.addrs
[p
.target
]);
532 jccs
[env
.addrs
[p
.target
]].emplace_back(p
.instr
);
536 if (!jccs
.empty()) retargetJumps(env
, jccs
);
538 for (auto const& p
: env
.leas
) {
539 assertx(env
.vaddrs
[p
.target
]);
540 DecodedInstruction
di(env
.text
.toDestAddress(p
.instr
), p
.instr
);
541 assertx(di
.hasPicOffset());
542 di
.setPicAddress(env
.vaddrs
[p
.target
]);
546 template<class X64Asm
>
547 void Vgen
<X64Asm
>::pad(CodeBlock
& cb
) {
552 ///////////////////////////////////////////////////////////////////////////////
554 template<class X64Asm
>
555 void Vgen
<X64Asm
>::emit(const copy
& i
) {
556 if (i
.s
== i
.d
) return;
558 if (i
.d
.isGP()) { // GP => GP
560 } else { // GP => XMM
561 assertx(i
.d
.isSIMD());
562 // This generates a movq x86 instruction, which zero extends
563 // the 64-bit value in srcReg into a 128-bit XMM register
567 if (i
.d
.isGP()) { // XMM => GP
569 } else { // XMM => XMM
570 assertx(i
.d
.isSIMD());
571 // This copies all 128 bits in XMM,
572 // thus avoiding partial register stalls
578 template<class X64Asm
>
579 void Vgen
<X64Asm
>::emit(const copy2
& i
) {
580 assertx(i
.s0
.isValid() && i
.s1
.isValid() && i
.d0
.isValid() && i
.d1
.isValid());
581 auto s0
= i
.s0
, s1
= i
.s1
, d0
= i
.d0
, d1
= i
.d1
;
587 // could do this in a simplify pass
588 if (s1
!= d1
) a
.movq(s1
, d1
); // save s1 first; d1 != s0
589 if (s0
!= d0
) a
.movq(s0
, d0
);
592 // could do this in a simplify pass
593 if (s0
!= d0
) a
.movq(s0
, d0
);
594 if (s1
!= d1
) a
.movq(s1
, d1
);
598 template<class X64Asm
>
599 void Vgen
<X64Asm
>::emit_simd_imm(int64_t val
, Vreg d
) {
601 a
.pxor(d
, d
); // does not modify flags
603 auto addr
= alloc_literal(env
, val
);
604 a
.movsd(rip
[(intptr_t)addr
], d
);
608 template<class X64Asm
>
609 void Vgen
<X64Asm
>::emit(const ldimmb
& i
) {
610 // ldimmb is for Vconst::Byte, which is treated as unsigned uint8_t
614 a
.movb(static_cast<int8_t>(val
), d8
);
616 emit_simd_imm(val
, i
.d
);
620 template<class X64Asm
>
621 void Vgen
<X64Asm
>::emit(const ldimml
& i
) {
622 // ldimml is for Vconst::Long, which is treated as unsigned uint32_t
628 emit_simd_imm(uint32_t(val
), i
.d
);
632 template<class X64Asm
>
633 void Vgen
<X64Asm
>::emit(const ldimmq
& i
) {
638 a
.movl(0, d32
); // because emitImmReg tries the xor optimization
640 a
.emitImmReg(i
.s
, i
.d
);
643 emit_simd_imm(val
, i
.d
);
647 template<class X64Asm
>
648 void Vgen
<X64Asm
>::emit(const load
& i
) {
649 auto mref
= i
.s
.mr();
654 assertx(i
.d
.isSIMD());
659 template<class X64Asm
>
660 void Vgen
<X64Asm
>::emit(const store
& i
) {
661 auto const mref
= i
.d
.mr();
666 assertx(i
.s
.isSIMD());
671 ///////////////////////////////////////////////////////////////////////////////
673 template<class X64Asm
>
674 void Vgen
<X64Asm
>::emit(const mcprep
& i
) {
676 * Initially, we set the cache to hold (addr << 1) | 1 (where `addr' is the
677 * address of the movq) so that we can find the movq from the handler.
679 * We set the low bit for two reasons: the Class* will never be a valid
680 * Class*, so we'll always miss the inline check before it's smashed, and
681 * MethodCache::handleStaticCall can tell it's not been smashed yet
683 auto const mov_addr
= emitSmashableMovq(a
.code(), env
.meta
, 0, r64(i
.d
));
684 auto const imm
= reinterpret_cast<uint64_t>(mov_addr
);
685 smashMovq(a
.toDestAddress(mov_addr
), (imm
<< 1) | 1);
687 env
.meta
.addressImmediates
.insert(reinterpret_cast<TCA
>(~imm
));
690 ///////////////////////////////////////////////////////////////////////////////
692 template<class X64Asm
>
693 void Vgen
<X64Asm
>::emit(const call
& i
) {
694 if (a
.jmpDeltaFits(i
.target
)) {
697 // can't do a near call; store address in data section.
698 // call by loading the address using rip-relative addressing. This
699 // assumes the data section is near the current code section. Since
700 // this sequence is directly in-line, rip-relative like this is
701 // more compact than loading a 64-bit immediate.
702 auto addr
= alloc_literal(env
, (uint64_t)i
.target
);
703 a
.call(rip
[(intptr_t)addr
]);
706 *i
.watch
= a
.frontier();
707 env
.meta
.watchpoints
.push_back(i
.watch
);
711 template<class X64Asm
>
712 void Vgen
<X64Asm
>::emit(const calls
& i
) {
713 emitSmashableCall(a
.code(), env
.meta
, i
.target
);
716 ///////////////////////////////////////////////////////////////////////////////
718 template<class X64Asm
>
719 void Vgen
<X64Asm
>::emit(const stubret
& i
) {
728 template<class X64Asm
>
729 void Vgen
<X64Asm
>::emit(const callstub
& i
) {
730 emit(call
{i
.target
, i
.args
});
733 template<class X64Asm
>
734 void Vgen
<X64Asm
>::emit(const callfaststub
& i
) {
735 emit(call
{i
.target
, i
.args
});
738 template<class X64Asm
>
739 void Vgen
<X64Asm
>::emit(const tailcallstub
& i
) {
741 emit(jmpi
{i
.target
, i
.args
});
744 template<class X64Asm
>
745 void Vgen
<X64Asm
>::emit(const tailcallstubr
& i
) {
747 emit(jmpr
{i
.target
, i
.args
});
750 ///////////////////////////////////////////////////////////////////////////////
752 template<class X64Asm
>
753 void Vgen
<X64Asm
>::emit(const phpret
& i
) {
754 a
.push(i
.fp
[AROFF(m_savedRip
)]);
756 a
.loadq(i
.fp
[AROFF(m_sfp
)], x64::rvmfp());
761 template<class X64Asm
>
762 void Vgen
<X64Asm
>::emit(const contenter
& i
) {
764 Reg64 fp
= i
.fp
, target
= i
.target
;
768 a
.pop(fp
[AROFF(m_savedRip
)]);
773 // m_savedRip will point here.
774 emit(unwind
{{i
.targets
[0], i
.targets
[1]}});
777 ///////////////////////////////////////////////////////////////////////////////
779 template<class X64Asm
>
780 void Vgen
<X64Asm
>::emit(const nothrow
& /*i*/) {
781 env
.meta
.catches
.emplace_back(a
.frontier(), nullptr);
784 template<class X64Asm
>
785 void Vgen
<X64Asm
>::emit(const syncpoint
& i
) {
786 FTRACE(5, "IR recordSyncPoint: {} {}\n", a
.frontier(), i
.fix
.show());
787 env
.meta
.fixups
.emplace_back(a
.frontier(), i
.fix
);
788 env
.record_inline_stack(a
.frontier());
791 template<class X64Asm
>
792 void Vgen
<X64Asm
>::emit(const unwind
& i
) {
793 catches
.push_back({a
.frontier(), i
.targets
[1]});
794 env
.record_inline_stack(a
.frontier());
795 emit(jmp
{i
.targets
[0]});
798 ///////////////////////////////////////////////////////////////////////////////
800 template<class X64Asm
>
801 void Vgen
<X64Asm
>::emit(const fallthru
&) {
805 ///////////////////////////////////////////////////////////////////////////////
807 template<class X64Asm
>
808 void Vgen
<X64Asm
>::emit(andqi i
) {
809 if (magFits(i
.s0
.q(), sz::dword
)) {
810 emit(andli
{int32_t(i
.s0
.q()), Reg32(i
.s1
), Reg32(i
.d
), i
.sf
});
818 template<class X64Asm
>
819 void Vgen
<X64Asm
>::emit(const addlim
& i
) {
820 auto mref
= i
.m
.mr();
821 a
.prefix(mref
).addl(i
.s0
, mref
);
824 template<typename X64Asm
>
825 void Vgen
<X64Asm
>::emit(const addqmr
& i
) {
827 auto const mref
= i
.m
.mr();
828 a
.prefix(mref
).addq(mref
, i
.d
);
831 template<typename X64Asm
>
832 void Vgen
<X64Asm
>::emit(const addqrm
& i
) {
833 auto const mref
= i
.m
.mr();
834 a
.prefix(mref
).addq(i
.s1
, mref
);
837 template<class X64Asm
>
838 void Vgen
<X64Asm
>::emit(const addqim
& i
) {
839 auto mref
= i
.m
.mr();
840 a
.prefix(mref
).addq(i
.s0
, mref
);
843 template<class X64Asm
>
844 void Vgen
<X64Asm
>::emit(const subqim
& i
) {
845 auto mref
= i
.m
.mr();
846 a
.prefix(mref
).subq(i
.s0
, mref
);
849 template<class X64Asm
>
850 void Vgen
<X64Asm
>::emit(const cloadq
& i
) {
852 always_assert(!m
.index
.isValid()); // not supported, but could be later.
855 // We can't move f over d or we'll clobber the Vptr we need to load from.
856 // Since cload does the load unconditionally anyway, we can just load and
858 a
.prefix(m
.mr()).loadq(i
.t
, i
.d
);
859 a
.cmov_reg64_reg64(ccNegate(i
.cc
), i
.f
, i
.d
);
864 a
.prefix(m
.mr()).cload_reg64_disp_reg64(i
.cc
, m
.base
, m
.disp
, i
.d
);
867 // add s0 s1 d => mov s1->d; d += s0
868 // cmov cc s d => if cc { mov s->d }
869 template<class X64Asm
>
871 void Vgen
<X64Asm
>::emit_cmov(const cmov
& i
) {
872 if (i
.f
!= i
.d
&& i
.t
== i
.d
) {
873 // negate the condition and swap t/f operands so we dont clobber i.t
874 return emit(cmov
{ccNegate(i
.cc
), i
.sf
, i
.t
, i
.f
, i
.d
});
878 a
.cmov_reg64_reg64(i
.cc
, r64(i
.t
), r64(i
.d
));
881 template<class X64Asm
>
882 void Vgen
<X64Asm
>::emit(const cvtsi2sd
& i
) {
884 a
.cvtsi2sd(i
.s
, i
.d
);
887 template<class X64Asm
>
888 void Vgen
<X64Asm
>::emit(const cvtsi2sdm
& i
) {
890 a
.cvtsi2sd(i
.s
, i
.d
);
893 template<class X64Asm
>
894 void Vgen
<X64Asm
>::emit(const jcc
& i
) {
895 if (i
.targets
[1] != i
.targets
[0]) {
896 if (next
== i
.targets
[1]) {
897 return emit(jcc
{ccNegate(i
.cc
), i
.sf
, {i
.targets
[1], i
.targets
[0]}});
899 auto taken
= i
.targets
[1];
900 jccs
.push_back({a
.frontier(), taken
});
901 a
.jcc(i
.cc
, a
.frontier());
903 emit(jmp
{i
.targets
[0]});
906 template<class X64Asm
>
907 void Vgen
<X64Asm
>::emit(const jcci
& i
) {
908 a
.jcc(i
.cc
, i
.taken
);
911 template<class X64Asm
>
912 void Vgen
<X64Asm
>::emit(const jmp
& i
) {
913 if (next
== i
.target
) return;
914 jmps
.push_back({a
.frontier(), i
.target
});
918 template<class X64Asm
>
919 void Vgen
<X64Asm
>::emit(const jmpi
& i
) {
920 if (a
.jmpDeltaFits(i
.target
)) {
923 // can't do a near jmp - use rip-relative addressing
924 auto addr
= alloc_literal(env
, (uint64_t)i
.target
);
925 a
.jmp(rip
[(intptr_t)addr
]);
929 template<class X64Asm
>
930 void Vgen
<X64Asm
>::emit(const ldbindretaddr
& i
) {
931 auto const addr
= a
.frontier();
932 emit(leap
{reg::rip
[(intptr_t)addr
], i
.d
});
933 env
.ldbindretaddrs
.push_back({addr
, i
.target
, i
.spOff
});
936 template<class X64Asm
>
937 void Vgen
<X64Asm
>::emit(const lea
& i
) {
938 assertx(i
.s
.seg
== Segment::DS
);
939 // could do this in a simplify pass
940 if (i
.s
.disp
== 0 && i
.s
.base
.isValid() && !i
.s
.index
.isValid()) {
941 emit(copy
{i
.s
.base
, i
.d
});
947 template<class X64Asm
>
948 void Vgen
<X64Asm
>::emit(const leav
& i
) {
949 auto const addr
= a
.frontier();
950 emit(leap
{reg::rip
[(intptr_t)addr
], i
.d
});
951 env
.leas
.push_back({addr
, i
.s
});
954 template<class X64Asm
>
955 void Vgen
<X64Asm
>::emit(const storebi
& i
) {
956 auto mref
= i
.m
.mr();
957 a
.prefix(mref
).storeb(i
.s
, mref
);
960 template<class X64Asm
>
961 void Vgen
<X64Asm
>::emit(const storeqi
& i
) {
962 auto mref
= i
.m
.mr();
963 a
.prefix(mref
).storeq(i
.s
, mref
);
966 template<class VgenImpl
, typename Inst
>
967 bool testimHelper(VgenImpl
& env
, const Inst
& i
, uint64_t mask
) {
968 // If there's only 1 byte of meaningful bits in the mask, we can adjust the
969 // pointer offset and use testbim instead.
971 while (mask
> 0xff && !(mask
& 0xff)) {
976 if (mask
> 0xff) return false;
978 env
.emit(testbim
{int8_t(mask
), i
.s1
+ off
, i
.sf
});
982 template<class X64Asm
>
983 void Vgen
<X64Asm
>::emit(const testwi
& i
) {
984 if (i
.s0
.w() == -1) {
985 return emit(testw
{i
.s1
, i
.s1
, i
.sf
});
990 template<class X64Asm
>
991 void Vgen
<X64Asm
>::Vgen::emit(const testwim
& i
) {
992 if (testimHelper(*this, i
, i
.s0
.w())) return;
993 a
.prefix(i
.s1
.mr()).testw(i
.s0
, i
.s1
);
996 template<class X64Asm
>
997 void Vgen
<X64Asm
>::Vgen::emit(const testlim
& i
) {
998 if (testimHelper(*this, i
, i
.s0
.l())) return;
999 a
.prefix(i
.s1
.mr()).testl(i
.s0
, i
.s1
);
1002 template<class X64Asm
>
1003 void Vgen
<X64Asm
>::Vgen::emit(const testli
& i
) {
1004 if (i
.s0
.l() == -1) {
1005 return emit(testl
{i
.s1
, i
.s1
, i
.sf
});
1007 a
.testl(i
.s0
, i
.s1
);
1010 template<class X64Asm
>
1011 void Vgen
<X64Asm
>::emit(const testqi
& i
) {
1012 auto const imm
= i
.s0
.q();
1013 if (magFits(imm
, sz::byte
)) {
1014 a
.testb(int8_t(imm
), rbyte(i
.s1
));
1015 } else if (magFits(imm
, sz::dword
)) {
1016 emit(testli
{int32_t(imm
), Reg32(i
.s1
), i
.sf
});
1017 } else if (imm
== -1) {
1018 emit(testq
{i
.s1
, i
.s1
, i
.sf
});
1020 a
.testq(i
.s0
, i
.s1
);
1024 template<class X64Asm
>
1025 void Vgen
<X64Asm
>::emit(const testqim
& i
) {
1026 if (testimHelper(*this, i
, i
.s0
.q())) return;
1027 if (magFits(i
.s0
.q(), sz::dword
)) {
1028 // For an unsigned 32 bit immediate, we can get the same results
1029 // by emitting a testlim.
1030 emit(testlim
{int32_t(i
.s0
.q()), i
.s1
, i
.sf
});
1032 a
.prefix(i
.s1
.mr()).testq(i
.s0
, i
.s1
);
1036 template<class X64Asm
>
1037 void Vgen
<X64Asm
>::emit(const trap
& i
) {
1038 env
.meta
.trapReasons
.emplace_back(a
.frontier(), i
.reason
);
1042 template<class X64Asm
>
1043 void Vgen
<X64Asm
>::emit(xorq i
) {
1045 // 32-bit xor{s, s, d} zeroes the upper bits of `d'.
1046 return emit(xorl
{r32(i
.s0
), r32(i
.s1
), r32(i
.d
), i
.sf
});
1052 template<class X64Asm
>
1053 void Vgen
<X64Asm
>::emit(const crc32q
& i
) {
1055 a
.crc32q(i
.s0
, i
.d
);
1058 template<typename X64Asm
>
1059 void Vgen
<X64Asm
>::emit(const decqmlocknosf
& i
) {
1061 a
.prefix(i
.m
.mr()).decqlock(i
.m
);
1065 ///////////////////////////////////////////////////////////////////////////////
1067 template<typename Lower
>
1068 void lower_impl(Vunit
& unit
, Vlabel b
, size_t i
, Lower lower
) {
1069 vmodify(unit
, b
, i
, [&] (Vout
& v
) { lower(v
); return 1; });
1072 template <typename Inst
>
1073 void lower(Vunit
& /*unit*/, Inst
& /*inst*/, Vlabel
/*b*/, size_t /*i*/) {}
1075 ///////////////////////////////////////////////////////////////////////////////
1077 void lower(Vunit
& unit
, popp
& inst
, Vlabel b
, size_t i
) {
1078 lower_impl(unit
, b
, i
, [&] (Vout
& v
) {
1084 void lower(Vunit
& unit
, poppm
& inst
, Vlabel b
, size_t i
) {
1085 lower_impl(unit
, b
, i
, [&] (Vout
& v
) {
1091 void lower(Vunit
& unit
, pushp
& inst
, Vlabel b
, size_t i
) {
1092 lower_impl(unit
, b
, i
, [&] (Vout
& v
) {
1098 void lower(Vunit
& unit
, pushpm
& inst
, Vlabel b
, size_t i
) {
1099 lower_impl(unit
, b
, i
, [&] (Vout
& v
) {
1100 v
<< pushm
{inst
.s0
};
1101 v
<< pushm
{inst
.s1
};
1105 ///////////////////////////////////////////////////////////////////////////////
1107 void lower(Vunit
& unit
, stublogue
& inst
, Vlabel b
, size_t i
) {
1108 if (inst
.saveframe
) {
1109 unit
.blocks
[b
].code
[i
] = push
{x64::rvmfp()};
1111 unit
.blocks
[b
].code
[i
] = lea
{reg::rsp
[-8], reg::rsp
};
1115 void lower(Vunit
& unit
, unstublogue
& /*inst*/, Vlabel b
, size_t i
) {
1116 unit
.blocks
[b
].code
[i
] = lea
{reg::rsp
[8], reg::rsp
};
1119 void lower(Vunit
& unit
, stubunwind
& inst
, Vlabel b
, size_t i
) {
1120 lower_impl(unit
, b
, i
, [&] (Vout
& v
) {
1121 v
<< lea
{reg::rsp
[8], reg::rsp
};
1126 void lower(Vunit
& unit
, stubtophp
& /*inst*/, Vlabel b
, size_t i
) {
1127 unit
.blocks
[b
].code
[i
] = lea
{reg::rsp
[16], reg::rsp
};
1130 void lower(Vunit
& unit
, loadstubret
& inst
, Vlabel b
, size_t i
) {
1131 unit
.blocks
[b
].code
[i
] = load
{reg::rsp
[8], inst
.d
};
1134 void lower(Vunit
& unit
, phplogue
& inst
, Vlabel b
, size_t i
) {
1135 unit
.blocks
[b
].code
[i
] = popm
{inst
.fp
[AROFF(m_savedRip
)]};
1138 void lower(Vunit
& unit
, resumetc
& inst
, Vlabel b
, size_t i
) {
1139 lower_impl(unit
, b
, i
, [&] (Vout
& v
) {
1140 v
<< callr
{inst
.target
, inst
.args
};
1141 v
<< jmpi
{inst
.exittc
};
1145 ///////////////////////////////////////////////////////////////////////////////
1147 void lower(Vunit
& unit
, sar
& inst
, Vlabel b
, size_t i
) {
1148 lower_impl(unit
, b
, i
, [&] (Vout
& v
) {
1149 v
<< copy
{inst
.s0
, rcx
};
1150 v
<< sarq
{inst
.s1
, inst
.d
, inst
.sf
};
1154 void lower(Vunit
& unit
, shl
& inst
, Vlabel b
, size_t i
) {
1155 lower_impl(unit
, b
, i
, [&] (Vout
& v
) {
1156 v
<< copy
{inst
.s0
, rcx
};
1157 v
<< shlq
{inst
.s1
, inst
.d
, inst
.sf
};
1161 void lower(Vunit
& unit
, shr
& inst
, Vlabel b
, size_t i
) {
1162 lower_impl(unit
, b
, i
, [&] (Vout
& v
) {
1163 v
<< copy
{inst
.s0
, rcx
};
1164 v
<< shrq
{inst
.s1
, inst
.d
, inst
.sf
};
1168 void lower(Vunit
& unit
, srem
& inst
, Vlabel b
, size_t i
) {
1169 lower_impl(unit
, b
, i
, [&] (Vout
& v
) {
1170 v
<< copy
{inst
.s0
, rax
};
1171 v
<< cqo
{}; // sign-extend rax => rdx:rax
1172 v
<< idiv
{inst
.s1
, v
.makeReg()}; // rdx:rax/divisor => quot:rax, rem:rdx
1173 v
<< copy
{rdx
, inst
.d
};
1177 void lower(Vunit
& unit
, divint
& inst
, Vlabel b
, size_t i
) {
1178 lower_impl(unit
, b
, i
, [&] (Vout
& v
) {
1179 v
<< copy
{inst
.s0
, rax
};
1180 v
<< cqo
{}; // sign-extend rax => rdx:rax
1181 v
<< idiv
{inst
.s1
, v
.makeReg()}; // rdx:rax/divisor => quot:rax, rem:rdx
1182 v
<< copy
{rax
, inst
.d
};
1186 ///////////////////////////////////////////////////////////////////////////////
1188 void lower(Vunit
& unit
, movtqb
& inst
, Vlabel b
, size_t i
) {
1189 unit
.blocks
[b
].code
[i
] = copy
{inst
.s
, inst
.d
};
1191 void lower(Vunit
& unit
, movtdb
& inst
, Vlabel b
, size_t i
) {
1192 unit
.blocks
[b
].code
[i
] = copy
{inst
.s
, inst
.d
};
1194 void lower(Vunit
& unit
, movtdq
& inst
, Vlabel b
, size_t i
) {
1195 unit
.blocks
[b
].code
[i
] = copy
{inst
.s
, inst
.d
};
1197 void lower(Vunit
& unit
, movtqw
& inst
, Vlabel b
, size_t i
) {
1198 unit
.blocks
[b
].code
[i
] = copy
{inst
.s
, inst
.d
};
1200 void lower(Vunit
& unit
, movtql
& inst
, Vlabel b
, size_t i
) {
1201 unit
.blocks
[b
].code
[i
] = copy
{inst
.s
, inst
.d
};
1204 ///////////////////////////////////////////////////////////////////////////////
1207 * Lower a few abstractions to facilitate straightforward x64 codegen.
1209 void lowerForX64(Vunit
& unit
) {
1210 vasm_lower(unit
, [&](const VLS
& /*env*/, Vinstr
& inst
, Vlabel b
, size_t i
) {
1212 #define O(name, ...) \
1213 case Vinstr::name: \
1214 lower(unit, inst.name##_, b, i); \
1223 ///////////////////////////////////////////////////////////////////////////////
1227 void optimizeX64(Vunit
& unit
, const Abi
& abi
, bool regalloc
) {
1228 Timer
timer(Timer::vasm_optimize
, unit
.log_entry
);
1232 [&] { return traceProps(unit
).add("reg_alloc", regalloc
); }
1235 auto const doPass
= [&] (const char* name
, auto fun
) {
1236 rqtrace::EventGuard trace
{name
};
1240 doPass("VOPT_DCE", removeDeadCode
);
1241 doPass("VOPT_PHI", optimizePhis
);
1242 doPass("VOPT_BRANCH", fuseBranches
);
1243 doPass("VOPT_JMP", [] (Vunit
& u
) { optimizeJmps(u
, false); });
1245 assertx(checkWidths(unit
));
1247 if (unit
.context
&& unit
.context
->kind
== TransKind::Optimize
&&
1248 RuntimeOption::EvalProfBranchSampleFreq
> 0) {
1249 // Even when branch profiling is on, we still only want to profile
1250 // non-profiling translations of PHP functions. We also require that we
1251 // can spill, so that we can generate arbitrary profiling code, and also to
1252 // ensure we don't profile unique stubs and such.
1253 doPass("VOPT_PROF_BRANCH", profile_branches
);
1256 doPass("VOPT_X64", lowerForX64
);
1257 doPass("VOPT_SIMPLIFY", simplify
);
1258 doPass("VOPT_X64", lowerForX64
);
1260 if (!unit
.constToReg
.empty()) {
1261 doPass("VOPT_FOLD_IMM", foldImms
<x64::ImmFolder
>);
1264 doPass("VOPT_COPY", [&] (Vunit
& u
) { optimizeCopies(u
, abi
); });
1265 doPass("VOPT_DCE", removeDeadCode
);
1266 doPass("VOPT_BRANCH", fuseBranches
);
1268 if (unit
.needsRegAlloc()) {
1269 doPass("VOPT_JMP", [] (Vunit
& u
) { optimizeJmps(u
, false); });
1270 doPass("VOPT_DCE", removeDeadCode
);
1273 // vasm-block-counts and register allocation require edges to
1275 splitCriticalEdges(unit
);
1277 doPass("VOPT_BLOCK_WEIGHTS", VasmBlockCounters::profileGuidedUpdate
);
1279 if (RuntimeOption::EvalUseGraphColor
&&
1281 (unit
.context
->kind
== TransKind::Optimize
||
1282 unit
.context
->kind
== TransKind::OptPrologue
)) {
1283 rqtrace::EventGuard trace
{"VOPT_GRAPH_COLOR"};
1284 allocateRegistersWithGraphColor(unit
, abi
);
1286 rqtrace::EventGuard trace
{"VOPT_XLS"};
1287 allocateRegistersWithXLS(unit
, abi
);
1289 doPass("VOPT_SF_PEEPHOLES", [&] (Vunit
& u
) { sfPeepholes(u
, abi
); });
1290 doPass("VOPT_POST_RA_SIMPLIFY", postRASimplify
);
1294 // We can add side-exiting instructions now
1295 doPass("VOPT_EXIT", optimizeExits
);
1296 doPass("VOPT_JMP", [] (Vunit
& u
) { optimizeJmps(u
, true); });
1299 void emitX64(Vunit
& unit
, Vtext
& text
, CGMeta
& fixups
,
1301 tracing::Block _
{"emit-X64", [&] { return traceProps(unit
); }};
1304 if (RuntimeOption::EvalUseXedAssembler
) {
1305 return vasm_emit
<Vgen
<XedAssembler
>>(unit
, text
, fixups
, asmInfo
);
1308 vasm_emit
<Vgen
<X64Assembler
>>(unit
, text
, fixups
, asmInfo
);
1311 ///////////////////////////////////////////////////////////////////////////////