arm64 isel: in a couple places, use `xzr` as a source rather than loading zero into...
[valgrind.git] / VEX / priv / host_amd64_defs.c
blob3d237e112dfa061cf3dfe101b425e7d20fc50845
2 /*---------------------------------------------------------------*/
3 /*--- begin host_amd64_defs.c ---*/
4 /*---------------------------------------------------------------*/
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
10 Copyright (C) 2004-2017 OpenWorks LLP
11 info@open-works.net
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, see <http://www.gnu.org/licenses/>.
26 The GNU General Public License is contained in the file COPYING.
28 Neither the names of the U.S. Department of Energy nor the
29 University of California nor the names of its contributors may be
30 used to endorse or promote products derived from this software
31 without prior written permission.
34 #include "libvex_basictypes.h"
35 #include "libvex.h"
36 #include "libvex_trc_values.h"
38 #include "main_util.h"
39 #include "host_generic_regs.h"
40 #include "host_amd64_defs.h"
43 /* --------- Registers. --------- */
45 const RRegUniverse* getRRegUniverse_AMD64 ( void )
47 /* The real-register universe is a big constant, so we just want to
48 initialise it once. */
49 static RRegUniverse rRegUniverse_AMD64;
50 static Bool rRegUniverse_AMD64_initted = False;
52 /* Handy shorthand, nothing more */
53 RRegUniverse* ru = &rRegUniverse_AMD64;
55 /* This isn't thread-safe. Sigh. */
56 if (LIKELY(rRegUniverse_AMD64_initted))
57 return ru;
59 RRegUniverse__init(ru);
61 /* Add the registers. The initial segment of this array must be
62 those available for allocation by reg-alloc, and those that
63 follow are not available for allocation. */
64 ru->allocable_start[HRcInt64] = ru->size;
65 ru->regs[ru->size++] = hregAMD64_R12();
66 ru->regs[ru->size++] = hregAMD64_R13();
67 ru->regs[ru->size++] = hregAMD64_R14();
68 ru->regs[ru->size++] = hregAMD64_R15();
69 ru->regs[ru->size++] = hregAMD64_RBX();
70 ru->regs[ru->size++] = hregAMD64_RSI();
71 ru->regs[ru->size++] = hregAMD64_RDI();
72 ru->regs[ru->size++] = hregAMD64_R8();
73 ru->regs[ru->size++] = hregAMD64_R9();
74 ru->regs[ru->size++] = hregAMD64_R10();
75 ru->allocable_end[HRcInt64] = ru->size - 1;
77 ru->allocable_start[HRcVec128] = ru->size;
78 ru->regs[ru->size++] = hregAMD64_XMM3();
79 ru->regs[ru->size++] = hregAMD64_XMM4();
80 ru->regs[ru->size++] = hregAMD64_XMM5();
81 ru->regs[ru->size++] = hregAMD64_XMM6();
82 ru->regs[ru->size++] = hregAMD64_XMM7();
83 ru->regs[ru->size++] = hregAMD64_XMM8();
84 ru->regs[ru->size++] = hregAMD64_XMM9();
85 ru->regs[ru->size++] = hregAMD64_XMM10();
86 ru->regs[ru->size++] = hregAMD64_XMM11();
87 ru->regs[ru->size++] = hregAMD64_XMM12();
88 ru->allocable_end[HRcVec128] = ru->size - 1;
89 ru->allocable = ru->size;
91 /* And other regs, not available to the allocator. */
92 ru->regs[ru->size++] = hregAMD64_RAX();
93 ru->regs[ru->size++] = hregAMD64_RCX();
94 ru->regs[ru->size++] = hregAMD64_RDX();
95 ru->regs[ru->size++] = hregAMD64_RSP();
96 ru->regs[ru->size++] = hregAMD64_RBP();
97 ru->regs[ru->size++] = hregAMD64_R11();
98 ru->regs[ru->size++] = hregAMD64_XMM0();
99 ru->regs[ru->size++] = hregAMD64_XMM1();
101 rRegUniverse_AMD64_initted = True;
103 RRegUniverse__check_is_sane(ru);
104 return ru;
108 UInt ppHRegAMD64 ( HReg reg )
110 Int r;
111 static const HChar* ireg64_names[16]
112 = { "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
113 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" };
114 /* Be generic for all virtual regs. */
115 if (hregIsVirtual(reg)) {
116 return ppHReg(reg);
118 /* But specific for real regs. */
119 switch (hregClass(reg)) {
120 case HRcInt64:
121 r = hregEncoding(reg);
122 vassert(r >= 0 && r < 16);
123 return vex_printf("%s", ireg64_names[r]);
124 case HRcVec128:
125 r = hregEncoding(reg);
126 vassert(r >= 0 && r < 16);
127 return vex_printf("%%xmm%d", r);
128 default:
129 vpanic("ppHRegAMD64");
133 static UInt ppHRegAMD64_lo32 ( HReg reg )
135 Int r;
136 static const HChar* ireg32_names[16]
137 = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
138 "%r8d", "%r9d", "%r10d", "%r11d", "%r12d", "%r13d", "%r14d", "%r15d" };
139 /* Be generic for all virtual regs. */
140 if (hregIsVirtual(reg)) {
141 UInt written = ppHReg(reg);
142 written += vex_printf("d");
143 return written;
145 /* But specific for real regs. */
146 switch (hregClass(reg)) {
147 case HRcInt64:
148 r = hregEncoding(reg);
149 vassert(r >= 0 && r < 16);
150 return vex_printf("%s", ireg32_names[r]);
151 default:
152 vpanic("ppHRegAMD64_lo32: invalid regclass");
157 /* --------- Condition codes, Intel encoding. --------- */
159 const HChar* showAMD64CondCode ( AMD64CondCode cond )
161 switch (cond) {
162 case Acc_O: return "o";
163 case Acc_NO: return "no";
164 case Acc_B: return "b";
165 case Acc_NB: return "nb";
166 case Acc_Z: return "z";
167 case Acc_NZ: return "nz";
168 case Acc_BE: return "be";
169 case Acc_NBE: return "nbe";
170 case Acc_S: return "s";
171 case Acc_NS: return "ns";
172 case Acc_P: return "p";
173 case Acc_NP: return "np";
174 case Acc_L: return "l";
175 case Acc_NL: return "nl";
176 case Acc_LE: return "le";
177 case Acc_NLE: return "nle";
178 case Acc_ALWAYS: return "ALWAYS";
179 default: vpanic("ppAMD64CondCode");
184 /* --------- AMD64AMode: memory address expressions. --------- */
186 AMD64AMode* AMD64AMode_IR ( UInt imm32, HReg reg ) {
187 AMD64AMode* am = LibVEX_Alloc_inline(sizeof(AMD64AMode));
188 am->tag = Aam_IR;
189 am->Aam.IR.imm = imm32;
190 am->Aam.IR.reg = reg;
191 return am;
193 AMD64AMode* AMD64AMode_IRRS ( UInt imm32, HReg base, HReg indEx, Int shift ) {
194 AMD64AMode* am = LibVEX_Alloc_inline(sizeof(AMD64AMode));
195 am->tag = Aam_IRRS;
196 am->Aam.IRRS.imm = imm32;
197 am->Aam.IRRS.base = base;
198 am->Aam.IRRS.index = indEx;
199 am->Aam.IRRS.shift = shift;
200 vassert(shift >= 0 && shift <= 3);
201 return am;
204 void ppAMD64AMode ( AMD64AMode* am ) {
205 switch (am->tag) {
206 case Aam_IR:
207 if (am->Aam.IR.imm == 0)
208 vex_printf("(");
209 else
210 vex_printf("0x%x(", am->Aam.IR.imm);
211 ppHRegAMD64(am->Aam.IR.reg);
212 vex_printf(")");
213 return;
214 case Aam_IRRS:
215 vex_printf("0x%x(", am->Aam.IRRS.imm);
216 ppHRegAMD64(am->Aam.IRRS.base);
217 vex_printf(",");
218 ppHRegAMD64(am->Aam.IRRS.index);
219 vex_printf(",%d)", 1 << am->Aam.IRRS.shift);
220 return;
221 default:
222 vpanic("ppAMD64AMode");
226 static void addRegUsage_AMD64AMode ( HRegUsage* u, AMD64AMode* am ) {
227 switch (am->tag) {
228 case Aam_IR:
229 addHRegUse(u, HRmRead, am->Aam.IR.reg);
230 return;
231 case Aam_IRRS:
232 addHRegUse(u, HRmRead, am->Aam.IRRS.base);
233 addHRegUse(u, HRmRead, am->Aam.IRRS.index);
234 return;
235 default:
236 vpanic("addRegUsage_AMD64AMode");
240 static void mapRegs_AMD64AMode ( HRegRemap* m, AMD64AMode* am ) {
241 switch (am->tag) {
242 case Aam_IR:
243 am->Aam.IR.reg = lookupHRegRemap(m, am->Aam.IR.reg);
244 return;
245 case Aam_IRRS:
246 am->Aam.IRRS.base = lookupHRegRemap(m, am->Aam.IRRS.base);
247 am->Aam.IRRS.index = lookupHRegRemap(m, am->Aam.IRRS.index);
248 return;
249 default:
250 vpanic("mapRegs_AMD64AMode");
254 /* --------- Operand, which can be reg, immediate or memory. --------- */
256 AMD64RMI* AMD64RMI_Imm ( UInt imm32 ) {
257 AMD64RMI* op = LibVEX_Alloc_inline(sizeof(AMD64RMI));
258 op->tag = Armi_Imm;
259 op->Armi.Imm.imm32 = imm32;
260 return op;
262 AMD64RMI* AMD64RMI_Reg ( HReg reg ) {
263 AMD64RMI* op = LibVEX_Alloc_inline(sizeof(AMD64RMI));
264 op->tag = Armi_Reg;
265 op->Armi.Reg.reg = reg;
266 return op;
268 AMD64RMI* AMD64RMI_Mem ( AMD64AMode* am ) {
269 AMD64RMI* op = LibVEX_Alloc_inline(sizeof(AMD64RMI));
270 op->tag = Armi_Mem;
271 op->Armi.Mem.am = am;
272 return op;
275 static void ppAMD64RMI_wrk ( AMD64RMI* op, Bool lo32 ) {
276 switch (op->tag) {
277 case Armi_Imm:
278 vex_printf("$0x%x", op->Armi.Imm.imm32);
279 return;
280 case Armi_Reg:
281 if (lo32)
282 ppHRegAMD64_lo32(op->Armi.Reg.reg);
283 else
284 ppHRegAMD64(op->Armi.Reg.reg);
285 return;
286 case Armi_Mem:
287 ppAMD64AMode(op->Armi.Mem.am);
288 return;
289 default:
290 vpanic("ppAMD64RMI");
293 void ppAMD64RMI ( AMD64RMI* op ) {
294 ppAMD64RMI_wrk(op, False/*!lo32*/);
296 void ppAMD64RMI_lo32 ( AMD64RMI* op ) {
297 ppAMD64RMI_wrk(op, True/*lo32*/);
300 /* An AMD64RMI can only be used in a "read" context (what would it mean
301 to write or modify a literal?) and so we enumerate its registers
302 accordingly. */
303 static void addRegUsage_AMD64RMI ( HRegUsage* u, AMD64RMI* op ) {
304 switch (op->tag) {
305 case Armi_Imm:
306 return;
307 case Armi_Reg:
308 addHRegUse(u, HRmRead, op->Armi.Reg.reg);
309 return;
310 case Armi_Mem:
311 addRegUsage_AMD64AMode(u, op->Armi.Mem.am);
312 return;
313 default:
314 vpanic("addRegUsage_AMD64RMI");
318 static void mapRegs_AMD64RMI ( HRegRemap* m, AMD64RMI* op ) {
319 switch (op->tag) {
320 case Armi_Imm:
321 return;
322 case Armi_Reg:
323 op->Armi.Reg.reg = lookupHRegRemap(m, op->Armi.Reg.reg);
324 return;
325 case Armi_Mem:
326 mapRegs_AMD64AMode(m, op->Armi.Mem.am);
327 return;
328 default:
329 vpanic("mapRegs_AMD64RMI");
334 /* --------- Operand, which can be reg or immediate only. --------- */
336 AMD64RI* AMD64RI_Imm ( UInt imm32 ) {
337 AMD64RI* op = LibVEX_Alloc_inline(sizeof(AMD64RI));
338 op->tag = Ari_Imm;
339 op->Ari.Imm.imm32 = imm32;
340 return op;
342 AMD64RI* AMD64RI_Reg ( HReg reg ) {
343 AMD64RI* op = LibVEX_Alloc_inline(sizeof(AMD64RI));
344 op->tag = Ari_Reg;
345 op->Ari.Reg.reg = reg;
346 return op;
349 void ppAMD64RI ( AMD64RI* op ) {
350 switch (op->tag) {
351 case Ari_Imm:
352 vex_printf("$0x%x", op->Ari.Imm.imm32);
353 return;
354 case Ari_Reg:
355 ppHRegAMD64(op->Ari.Reg.reg);
356 return;
357 default:
358 vpanic("ppAMD64RI");
362 /* An AMD64RI can only be used in a "read" context (what would it mean
363 to write or modify a literal?) and so we enumerate its registers
364 accordingly. */
365 static void addRegUsage_AMD64RI ( HRegUsage* u, AMD64RI* op ) {
366 switch (op->tag) {
367 case Ari_Imm:
368 return;
369 case Ari_Reg:
370 addHRegUse(u, HRmRead, op->Ari.Reg.reg);
371 return;
372 default:
373 vpanic("addRegUsage_AMD64RI");
377 static void mapRegs_AMD64RI ( HRegRemap* m, AMD64RI* op ) {
378 switch (op->tag) {
379 case Ari_Imm:
380 return;
381 case Ari_Reg:
382 op->Ari.Reg.reg = lookupHRegRemap(m, op->Ari.Reg.reg);
383 return;
384 default:
385 vpanic("mapRegs_AMD64RI");
390 /* --------- Operand, which can be reg or memory only. --------- */
392 AMD64RM* AMD64RM_Reg ( HReg reg ) {
393 AMD64RM* op = LibVEX_Alloc_inline(sizeof(AMD64RM));
394 op->tag = Arm_Reg;
395 op->Arm.Reg.reg = reg;
396 return op;
398 AMD64RM* AMD64RM_Mem ( AMD64AMode* am ) {
399 AMD64RM* op = LibVEX_Alloc_inline(sizeof(AMD64RM));
400 op->tag = Arm_Mem;
401 op->Arm.Mem.am = am;
402 return op;
405 void ppAMD64RM ( AMD64RM* op ) {
406 switch (op->tag) {
407 case Arm_Mem:
408 ppAMD64AMode(op->Arm.Mem.am);
409 return;
410 case Arm_Reg:
411 ppHRegAMD64(op->Arm.Reg.reg);
412 return;
413 default:
414 vpanic("ppAMD64RM");
418 /* Because an AMD64RM can be both a source or destination operand, we
419 have to supply a mode -- pertaining to the operand as a whole --
420 indicating how it's being used. */
421 static void addRegUsage_AMD64RM ( HRegUsage* u, AMD64RM* op, HRegMode mode ) {
422 switch (op->tag) {
423 case Arm_Mem:
424 /* Memory is read, written or modified. So we just want to
425 know the regs read by the amode. */
426 addRegUsage_AMD64AMode(u, op->Arm.Mem.am);
427 return;
428 case Arm_Reg:
429 /* reg is read, written or modified. Add it in the
430 appropriate way. */
431 addHRegUse(u, mode, op->Arm.Reg.reg);
432 return;
433 default:
434 vpanic("addRegUsage_AMD64RM");
438 static void mapRegs_AMD64RM ( HRegRemap* m, AMD64RM* op )
440 switch (op->tag) {
441 case Arm_Mem:
442 mapRegs_AMD64AMode(m, op->Arm.Mem.am);
443 return;
444 case Arm_Reg:
445 op->Arm.Reg.reg = lookupHRegRemap(m, op->Arm.Reg.reg);
446 return;
447 default:
448 vpanic("mapRegs_AMD64RM");
453 /* --------- Instructions. --------- */
455 static const HChar* showAMD64ScalarSz ( Int sz ) {
456 switch (sz) {
457 case 2: return "w";
458 case 4: return "l";
459 case 8: return "q";
460 default: vpanic("showAMD64ScalarSz");
464 const HChar* showAMD64UnaryOp ( AMD64UnaryOp op ) {
465 switch (op) {
466 case Aun_NOT: return "not";
467 case Aun_NEG: return "neg";
468 default: vpanic("showAMD64UnaryOp");
472 const HChar* showAMD64AluOp ( AMD64AluOp op ) {
473 switch (op) {
474 case Aalu_MOV: return "mov";
475 case Aalu_CMP: return "cmp";
476 case Aalu_ADD: return "add";
477 case Aalu_SUB: return "sub";
478 case Aalu_ADC: return "adc";
479 case Aalu_SBB: return "sbb";
480 case Aalu_AND: return "and";
481 case Aalu_OR: return "or";
482 case Aalu_XOR: return "xor";
483 case Aalu_MUL: return "imul";
484 default: vpanic("showAMD64AluOp");
488 const HChar* showAMD64ShiftOp ( AMD64ShiftOp op ) {
489 switch (op) {
490 case Ash_SHL: return "shl";
491 case Ash_SHR: return "shr";
492 case Ash_SAR: return "sar";
493 default: vpanic("showAMD64ShiftOp");
497 const HChar* showA87FpOp ( A87FpOp op ) {
498 switch (op) {
499 case Afp_SCALE: return "scale";
500 case Afp_ATAN: return "atan";
501 case Afp_YL2X: return "yl2x";
502 case Afp_YL2XP1: return "yl2xp1";
503 case Afp_PREM: return "prem";
504 case Afp_PREM1: return "prem1";
505 case Afp_SQRT: return "sqrt";
506 case Afp_SIN: return "sin";
507 case Afp_COS: return "cos";
508 case Afp_TAN: return "tan";
509 case Afp_ROUND: return "round";
510 case Afp_2XM1: return "2xm1";
511 default: vpanic("showA87FpOp");
515 const HChar* showAMD64SseOp ( AMD64SseOp op ) {
516 switch (op) {
517 case Asse_MOV: return "movups";
518 case Asse_ADDF: return "add";
519 case Asse_SUBF: return "sub";
520 case Asse_MULF: return "mul";
521 case Asse_DIVF: return "div";
522 case Asse_MAXF: return "max";
523 case Asse_MINF: return "min";
524 case Asse_CMPEQF: return "cmpFeq";
525 case Asse_CMPLTF: return "cmpFlt";
526 case Asse_CMPLEF: return "cmpFle";
527 case Asse_CMPUNF: return "cmpFun";
528 case Asse_RCPF: return "rcp";
529 case Asse_RSQRTF: return "rsqrt";
530 case Asse_SQRTF: return "sqrt";
531 case Asse_I2F: return "cvtdq2ps.";
532 case Asse_F2I: return "cvtps2dq.";
533 case Asse_AND: return "and";
534 case Asse_OR: return "or";
535 case Asse_XOR: return "xor";
536 case Asse_ANDN: return "andn";
537 case Asse_ADD8: return "paddb";
538 case Asse_ADD16: return "paddw";
539 case Asse_ADD32: return "paddd";
540 case Asse_ADD64: return "paddq";
541 case Asse_QADD8U: return "paddusb";
542 case Asse_QADD16U: return "paddusw";
543 case Asse_QADD8S: return "paddsb";
544 case Asse_QADD16S: return "paddsw";
545 case Asse_SUB8: return "psubb";
546 case Asse_SUB16: return "psubw";
547 case Asse_SUB32: return "psubd";
548 case Asse_SUB64: return "psubq";
549 case Asse_QSUB8U: return "psubusb";
550 case Asse_QSUB16U: return "psubusw";
551 case Asse_QSUB8S: return "psubsb";
552 case Asse_QSUB16S: return "psubsw";
553 case Asse_MUL16: return "pmullw";
554 case Asse_MULHI16U: return "pmulhuw";
555 case Asse_MULHI16S: return "pmulhw";
556 case Asse_AVG8U: return "pavgb";
557 case Asse_AVG16U: return "pavgw";
558 case Asse_MAX16S: return "pmaxw";
559 case Asse_MAX8U: return "pmaxub";
560 case Asse_MIN16S: return "pminw";
561 case Asse_MIN8U: return "pminub";
562 case Asse_CMPEQ8: return "pcmpeqb";
563 case Asse_CMPEQ16: return "pcmpeqw";
564 case Asse_CMPEQ32: return "pcmpeqd";
565 case Asse_CMPGT8S: return "pcmpgtb";
566 case Asse_CMPGT16S: return "pcmpgtw";
567 case Asse_CMPGT32S: return "pcmpgtd";
568 case Asse_SHL16: return "psllw";
569 case Asse_SHL32: return "pslld";
570 case Asse_SHL64: return "psllq";
571 case Asse_SHL128: return "pslldq";
572 case Asse_SHR16: return "psrlw";
573 case Asse_SHR32: return "psrld";
574 case Asse_SHR64: return "psrlq";
575 case Asse_SHR128: return "psrldq";
576 case Asse_SAR16: return "psraw";
577 case Asse_SAR32: return "psrad";
578 case Asse_PACKSSD: return "packssdw";
579 case Asse_PACKSSW: return "packsswb";
580 case Asse_PACKUSW: return "packuswb";
581 case Asse_UNPCKHB: return "punpckhb";
582 case Asse_UNPCKHW: return "punpckhw";
583 case Asse_UNPCKHD: return "punpckhd";
584 case Asse_UNPCKHQ: return "punpckhq";
585 case Asse_UNPCKLB: return "punpcklb";
586 case Asse_UNPCKLW: return "punpcklw";
587 case Asse_UNPCKLD: return "punpckld";
588 case Asse_UNPCKLQ: return "punpcklq";
589 case Asse_PSHUFB: return "pshufb";
590 case Asse_PMADDUBSW: return "pmaddubsw";
591 case Asse_F32toF16: return "vcvtps2ph(rm_field=$0x4).";
592 case Asse_F16toF32: return "vcvtph2ps.";
593 default: vpanic("showAMD64SseOp");
597 AMD64Instr* AMD64Instr_Imm64 ( ULong imm64, HReg dst ) {
598 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
599 i->tag = Ain_Imm64;
600 i->Ain.Imm64.imm64 = imm64;
601 i->Ain.Imm64.dst = dst;
602 return i;
604 AMD64Instr* AMD64Instr_Alu64R ( AMD64AluOp op, AMD64RMI* src, HReg dst ) {
605 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
606 i->tag = Ain_Alu64R;
607 i->Ain.Alu64R.op = op;
608 i->Ain.Alu64R.src = src;
609 i->Ain.Alu64R.dst = dst;
610 return i;
612 AMD64Instr* AMD64Instr_Alu64M ( AMD64AluOp op, AMD64RI* src, AMD64AMode* dst ) {
613 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
614 i->tag = Ain_Alu64M;
615 i->Ain.Alu64M.op = op;
616 i->Ain.Alu64M.src = src;
617 i->Ain.Alu64M.dst = dst;
618 vassert(op != Aalu_MUL);
619 return i;
621 AMD64Instr* AMD64Instr_Sh64 ( AMD64ShiftOp op, UInt src, HReg dst ) {
622 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
623 i->tag = Ain_Sh64;
624 i->Ain.Sh64.op = op;
625 i->Ain.Sh64.src = src;
626 i->Ain.Sh64.dst = dst;
627 return i;
629 AMD64Instr* AMD64Instr_Sh32 ( AMD64ShiftOp op, UInt src, HReg dst ) {
630 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
631 i->tag = Ain_Sh32;
632 i->Ain.Sh32.op = op;
633 i->Ain.Sh32.src = src;
634 i->Ain.Sh32.dst = dst;
635 return i;
637 AMD64Instr* AMD64Instr_Test64 ( UInt imm32, HReg dst ) {
638 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
639 i->tag = Ain_Test64;
640 i->Ain.Test64.imm32 = imm32;
641 i->Ain.Test64.dst = dst;
642 return i;
644 AMD64Instr* AMD64Instr_Unary64 ( AMD64UnaryOp op, HReg dst ) {
645 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
646 i->tag = Ain_Unary64;
647 i->Ain.Unary64.op = op;
648 i->Ain.Unary64.dst = dst;
649 return i;
651 AMD64Instr* AMD64Instr_Lea64 ( AMD64AMode* am, HReg dst ) {
652 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
653 i->tag = Ain_Lea64;
654 i->Ain.Lea64.am = am;
655 i->Ain.Lea64.dst = dst;
656 return i;
658 AMD64Instr* AMD64Instr_Alu32R ( AMD64AluOp op, AMD64RMI* src, HReg dst ) {
659 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
660 i->tag = Ain_Alu32R;
661 i->Ain.Alu32R.op = op;
662 i->Ain.Alu32R.src = src;
663 i->Ain.Alu32R.dst = dst;
664 switch (op) {
665 case Aalu_ADD: case Aalu_SUB: case Aalu_CMP:
666 case Aalu_AND: case Aalu_OR: case Aalu_XOR: break;
667 default: vassert(0);
669 return i;
671 AMD64Instr* AMD64Instr_MulL ( Bool syned, AMD64RM* src ) {
672 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
673 i->tag = Ain_MulL;
674 i->Ain.MulL.syned = syned;
675 i->Ain.MulL.src = src;
676 return i;
678 AMD64Instr* AMD64Instr_Div ( Bool syned, Int sz, AMD64RM* src ) {
679 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
680 i->tag = Ain_Div;
681 i->Ain.Div.syned = syned;
682 i->Ain.Div.sz = sz;
683 i->Ain.Div.src = src;
684 vassert(sz == 4 || sz == 8);
685 return i;
687 AMD64Instr* AMD64Instr_Push( AMD64RMI* src ) {
688 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
689 i->tag = Ain_Push;
690 i->Ain.Push.src = src;
691 return i;
693 AMD64Instr* AMD64Instr_Call ( AMD64CondCode cond, Addr64 target, Int regparms,
694 RetLoc rloc ) {
695 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
696 i->tag = Ain_Call;
697 i->Ain.Call.cond = cond;
698 i->Ain.Call.target = target;
699 i->Ain.Call.regparms = regparms;
700 i->Ain.Call.rloc = rloc;
701 vassert(regparms >= 0 && regparms <= 6);
702 vassert(is_sane_RetLoc(rloc));
703 return i;
706 AMD64Instr* AMD64Instr_XDirect ( Addr64 dstGA, AMD64AMode* amRIP,
707 AMD64CondCode cond, Bool toFastEP ) {
708 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
709 i->tag = Ain_XDirect;
710 i->Ain.XDirect.dstGA = dstGA;
711 i->Ain.XDirect.amRIP = amRIP;
712 i->Ain.XDirect.cond = cond;
713 i->Ain.XDirect.toFastEP = toFastEP;
714 return i;
716 AMD64Instr* AMD64Instr_XIndir ( HReg dstGA, AMD64AMode* amRIP,
717 AMD64CondCode cond ) {
718 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
719 i->tag = Ain_XIndir;
720 i->Ain.XIndir.dstGA = dstGA;
721 i->Ain.XIndir.amRIP = amRIP;
722 i->Ain.XIndir.cond = cond;
723 return i;
725 AMD64Instr* AMD64Instr_XAssisted ( HReg dstGA, AMD64AMode* amRIP,
726 AMD64CondCode cond, IRJumpKind jk ) {
727 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
728 i->tag = Ain_XAssisted;
729 i->Ain.XAssisted.dstGA = dstGA;
730 i->Ain.XAssisted.amRIP = amRIP;
731 i->Ain.XAssisted.cond = cond;
732 i->Ain.XAssisted.jk = jk;
733 return i;
736 AMD64Instr* AMD64Instr_CMov64 ( AMD64CondCode cond, HReg src, HReg dst ) {
737 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
738 i->tag = Ain_CMov64;
739 i->Ain.CMov64.cond = cond;
740 i->Ain.CMov64.src = src;
741 i->Ain.CMov64.dst = dst;
742 vassert(cond != Acc_ALWAYS);
743 return i;
745 AMD64Instr* AMD64Instr_CLoad ( AMD64CondCode cond, UChar szB,
746 AMD64AMode* addr, HReg dst ) {
747 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
748 i->tag = Ain_CLoad;
749 i->Ain.CLoad.cond = cond;
750 i->Ain.CLoad.szB = szB;
751 i->Ain.CLoad.addr = addr;
752 i->Ain.CLoad.dst = dst;
753 vassert(cond != Acc_ALWAYS && (szB == 4 || szB == 8));
754 return i;
756 AMD64Instr* AMD64Instr_CStore ( AMD64CondCode cond, UChar szB,
757 HReg src, AMD64AMode* addr ) {
758 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
759 i->tag = Ain_CStore;
760 i->Ain.CStore.cond = cond;
761 i->Ain.CStore.szB = szB;
762 i->Ain.CStore.src = src;
763 i->Ain.CStore.addr = addr;
764 vassert(cond != Acc_ALWAYS && (szB == 4 || szB == 8));
765 return i;
767 AMD64Instr* AMD64Instr_MovxLQ ( Bool syned, HReg src, HReg dst ) {
768 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
769 i->tag = Ain_MovxLQ;
770 i->Ain.MovxLQ.syned = syned;
771 i->Ain.MovxLQ.src = src;
772 i->Ain.MovxLQ.dst = dst;
773 return i;
775 AMD64Instr* AMD64Instr_LoadEX ( UChar szSmall, Bool syned,
776 AMD64AMode* src, HReg dst ) {
777 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
778 i->tag = Ain_LoadEX;
779 i->Ain.LoadEX.szSmall = szSmall;
780 i->Ain.LoadEX.syned = syned;
781 i->Ain.LoadEX.src = src;
782 i->Ain.LoadEX.dst = dst;
783 vassert(szSmall == 1 || szSmall == 2 || szSmall == 4);
784 return i;
786 AMD64Instr* AMD64Instr_Store ( UChar sz, HReg src, AMD64AMode* dst ) {
787 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
788 i->tag = Ain_Store;
789 i->Ain.Store.sz = sz;
790 i->Ain.Store.src = src;
791 i->Ain.Store.dst = dst;
792 vassert(sz == 1 || sz == 2 || sz == 4);
793 return i;
795 AMD64Instr* AMD64Instr_Set64 ( AMD64CondCode cond, HReg dst ) {
796 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
797 i->tag = Ain_Set64;
798 i->Ain.Set64.cond = cond;
799 i->Ain.Set64.dst = dst;
800 return i;
802 AMD64Instr* AMD64Instr_Bsfr64 ( Bool isFwds, HReg src, HReg dst ) {
803 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
804 i->tag = Ain_Bsfr64;
805 i->Ain.Bsfr64.isFwds = isFwds;
806 i->Ain.Bsfr64.src = src;
807 i->Ain.Bsfr64.dst = dst;
808 return i;
810 AMD64Instr* AMD64Instr_MFence ( void ) {
811 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
812 i->tag = Ain_MFence;
813 return i;
815 AMD64Instr* AMD64Instr_ACAS ( AMD64AMode* addr, UChar sz ) {
816 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
817 i->tag = Ain_ACAS;
818 i->Ain.ACAS.addr = addr;
819 i->Ain.ACAS.sz = sz;
820 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
821 return i;
823 AMD64Instr* AMD64Instr_DACAS ( AMD64AMode* addr, UChar sz ) {
824 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
825 i->tag = Ain_DACAS;
826 i->Ain.DACAS.addr = addr;
827 i->Ain.DACAS.sz = sz;
828 vassert(sz == 8 || sz == 4);
829 return i;
832 AMD64Instr* AMD64Instr_A87Free ( Int nregs )
834 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
835 i->tag = Ain_A87Free;
836 i->Ain.A87Free.nregs = nregs;
837 vassert(nregs >= 1 && nregs <= 7);
838 return i;
840 AMD64Instr* AMD64Instr_A87PushPop ( AMD64AMode* addr, Bool isPush, UChar szB )
842 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
843 i->tag = Ain_A87PushPop;
844 i->Ain.A87PushPop.addr = addr;
845 i->Ain.A87PushPop.isPush = isPush;
846 i->Ain.A87PushPop.szB = szB;
847 vassert(szB == 8 || szB == 4);
848 return i;
850 AMD64Instr* AMD64Instr_A87FpOp ( A87FpOp op )
852 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
853 i->tag = Ain_A87FpOp;
854 i->Ain.A87FpOp.op = op;
855 return i;
857 AMD64Instr* AMD64Instr_A87LdCW ( AMD64AMode* addr )
859 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
860 i->tag = Ain_A87LdCW;
861 i->Ain.A87LdCW.addr = addr;
862 return i;
864 AMD64Instr* AMD64Instr_A87StSW ( AMD64AMode* addr )
866 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
867 i->tag = Ain_A87StSW;
868 i->Ain.A87StSW.addr = addr;
869 return i;
871 AMD64Instr* AMD64Instr_LdMXCSR ( AMD64AMode* addr ) {
872 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
873 i->tag = Ain_LdMXCSR;
874 i->Ain.LdMXCSR.addr = addr;
875 return i;
877 AMD64Instr* AMD64Instr_SseUComIS ( Int sz, HReg srcL, HReg srcR, HReg dst ) {
878 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
879 i->tag = Ain_SseUComIS;
880 i->Ain.SseUComIS.sz = toUChar(sz);
881 i->Ain.SseUComIS.srcL = srcL;
882 i->Ain.SseUComIS.srcR = srcR;
883 i->Ain.SseUComIS.dst = dst;
884 vassert(sz == 4 || sz == 8);
885 return i;
887 AMD64Instr* AMD64Instr_SseSI2SF ( Int szS, Int szD, HReg src, HReg dst ) {
888 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
889 i->tag = Ain_SseSI2SF;
890 i->Ain.SseSI2SF.szS = toUChar(szS);
891 i->Ain.SseSI2SF.szD = toUChar(szD);
892 i->Ain.SseSI2SF.src = src;
893 i->Ain.SseSI2SF.dst = dst;
894 vassert(szS == 4 || szS == 8);
895 vassert(szD == 4 || szD == 8);
896 return i;
898 AMD64Instr* AMD64Instr_SseSF2SI ( Int szS, Int szD, HReg src, HReg dst ) {
899 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
900 i->tag = Ain_SseSF2SI;
901 i->Ain.SseSF2SI.szS = toUChar(szS);
902 i->Ain.SseSF2SI.szD = toUChar(szD);
903 i->Ain.SseSF2SI.src = src;
904 i->Ain.SseSF2SI.dst = dst;
905 vassert(szS == 4 || szS == 8);
906 vassert(szD == 4 || szD == 8);
907 return i;
909 AMD64Instr* AMD64Instr_SseSDSS ( Bool from64, HReg src, HReg dst )
911 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
912 i->tag = Ain_SseSDSS;
913 i->Ain.SseSDSS.from64 = from64;
914 i->Ain.SseSDSS.src = src;
915 i->Ain.SseSDSS.dst = dst;
916 return i;
918 AMD64Instr* AMD64Instr_SseLdSt ( Bool isLoad, Int sz,
919 HReg reg, AMD64AMode* addr ) {
920 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
921 i->tag = Ain_SseLdSt;
922 i->Ain.SseLdSt.isLoad = isLoad;
923 i->Ain.SseLdSt.sz = toUChar(sz);
924 i->Ain.SseLdSt.reg = reg;
925 i->Ain.SseLdSt.addr = addr;
926 vassert(sz == 4 || sz == 8 || sz == 16);
927 return i;
929 AMD64Instr* AMD64Instr_SseCStore ( AMD64CondCode cond,
930 HReg src, AMD64AMode* addr )
932 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
933 i->tag = Ain_SseCStore;
934 i->Ain.SseCStore.cond = cond;
935 i->Ain.SseCStore.src = src;
936 i->Ain.SseCStore.addr = addr;
937 vassert(cond != Acc_ALWAYS);
938 return i;
940 AMD64Instr* AMD64Instr_SseCLoad ( AMD64CondCode cond,
941 AMD64AMode* addr, HReg dst )
943 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
944 i->tag = Ain_SseCLoad;
945 i->Ain.SseCLoad.cond = cond;
946 i->Ain.SseCLoad.addr = addr;
947 i->Ain.SseCLoad.dst = dst;
948 vassert(cond != Acc_ALWAYS);
949 return i;
951 AMD64Instr* AMD64Instr_SseLdzLO ( Int sz, HReg reg, AMD64AMode* addr )
953 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
954 i->tag = Ain_SseLdzLO;
955 i->Ain.SseLdzLO.sz = sz;
956 i->Ain.SseLdzLO.reg = reg;
957 i->Ain.SseLdzLO.addr = addr;
958 vassert(sz == 4 || sz == 8);
959 return i;
961 AMD64Instr* AMD64Instr_Sse32Fx4 ( AMD64SseOp op, HReg src, HReg dst ) {
962 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
963 i->tag = Ain_Sse32Fx4;
964 i->Ain.Sse32Fx4.op = op;
965 i->Ain.Sse32Fx4.src = src;
966 i->Ain.Sse32Fx4.dst = dst;
967 vassert(op != Asse_MOV);
968 return i;
970 AMD64Instr* AMD64Instr_Sse32FLo ( AMD64SseOp op, HReg src, HReg dst ) {
971 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
972 i->tag = Ain_Sse32FLo;
973 i->Ain.Sse32FLo.op = op;
974 i->Ain.Sse32FLo.src = src;
975 i->Ain.Sse32FLo.dst = dst;
976 vassert(op != Asse_MOV);
977 return i;
979 AMD64Instr* AMD64Instr_Sse64Fx2 ( AMD64SseOp op, HReg src, HReg dst ) {
980 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
981 i->tag = Ain_Sse64Fx2;
982 i->Ain.Sse64Fx2.op = op;
983 i->Ain.Sse64Fx2.src = src;
984 i->Ain.Sse64Fx2.dst = dst;
985 vassert(op != Asse_MOV);
986 return i;
988 AMD64Instr* AMD64Instr_Sse64FLo ( AMD64SseOp op, HReg src, HReg dst ) {
989 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
990 i->tag = Ain_Sse64FLo;
991 i->Ain.Sse64FLo.op = op;
992 i->Ain.Sse64FLo.src = src;
993 i->Ain.Sse64FLo.dst = dst;
994 vassert(op != Asse_MOV);
995 return i;
997 AMD64Instr* AMD64Instr_SseReRg ( AMD64SseOp op, HReg re, HReg rg ) {
998 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
999 i->tag = Ain_SseReRg;
1000 i->Ain.SseReRg.op = op;
1001 i->Ain.SseReRg.src = re;
1002 i->Ain.SseReRg.dst = rg;
1003 return i;
1005 AMD64Instr* AMD64Instr_SseCMov ( AMD64CondCode cond, HReg src, HReg dst ) {
1006 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
1007 i->tag = Ain_SseCMov;
1008 i->Ain.SseCMov.cond = cond;
1009 i->Ain.SseCMov.src = src;
1010 i->Ain.SseCMov.dst = dst;
1011 vassert(cond != Acc_ALWAYS);
1012 return i;
1014 AMD64Instr* AMD64Instr_SseShuf ( Int order, HReg src, HReg dst ) {
1015 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
1016 i->tag = Ain_SseShuf;
1017 i->Ain.SseShuf.order = order;
1018 i->Ain.SseShuf.src = src;
1019 i->Ain.SseShuf.dst = dst;
1020 vassert(order >= 0 && order <= 0xFF);
1021 return i;
1023 AMD64Instr* AMD64Instr_SseShiftN ( AMD64SseOp op,
1024 UInt shiftBits, HReg dst ) {
1025 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
1026 i->tag = Ain_SseShiftN;
1027 i->Ain.SseShiftN.op = op;
1028 i->Ain.SseShiftN.shiftBits = shiftBits;
1029 i->Ain.SseShiftN.dst = dst;
1030 return i;
1032 AMD64Instr* AMD64Instr_SseMOVQ ( HReg gpr, HReg xmm, Bool toXMM ) {
1033 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
1034 i->tag = Ain_SseMOVQ;
1035 i->Ain.SseMOVQ.gpr = gpr;
1036 i->Ain.SseMOVQ.xmm = xmm;
1037 i->Ain.SseMOVQ.toXMM = toXMM;
1038 vassert(hregClass(gpr) == HRcInt64);
1039 vassert(hregClass(xmm) == HRcVec128);
1040 return i;
1042 //uu AMD64Instr* AMD64Instr_AvxLdSt ( Bool isLoad,
1043 //uu HReg reg, AMD64AMode* addr ) {
1044 //uu AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
1045 //uu i->tag = Ain_AvxLdSt;
1046 //uu i->Ain.AvxLdSt.isLoad = isLoad;
1047 //uu i->Ain.AvxLdSt.reg = reg;
1048 //uu i->Ain.AvxLdSt.addr = addr;
1049 //uu return i;
1050 //uu }
1051 //uu AMD64Instr* AMD64Instr_AvxReRg ( AMD64SseOp op, HReg re, HReg rg ) {
1052 //uu AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
1053 //uu i->tag = Ain_AvxReRg;
1054 //uu i->Ain.AvxReRg.op = op;
1055 //uu i->Ain.AvxReRg.src = re;
1056 //uu i->Ain.AvxReRg.dst = rg;
1057 //uu return i;
1058 //uu }
1059 AMD64Instr* AMD64Instr_EvCheck ( AMD64AMode* amCounter,
1060 AMD64AMode* amFailAddr ) {
1061 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
1062 i->tag = Ain_EvCheck;
1063 i->Ain.EvCheck.amCounter = amCounter;
1064 i->Ain.EvCheck.amFailAddr = amFailAddr;
1065 return i;
1067 AMD64Instr* AMD64Instr_ProfInc ( void ) {
1068 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
1069 i->tag = Ain_ProfInc;
1070 return i;
1073 void ppAMD64Instr ( const AMD64Instr* i, Bool mode64 )
1075 vassert(mode64 == True);
1076 switch (i->tag) {
1077 case Ain_Imm64:
1078 vex_printf("movabsq $0x%llx,", i->Ain.Imm64.imm64);
1079 ppHRegAMD64(i->Ain.Imm64.dst);
1080 return;
1081 case Ain_Alu64R:
1082 vex_printf("%sq ", showAMD64AluOp(i->Ain.Alu64R.op));
1083 ppAMD64RMI(i->Ain.Alu64R.src);
1084 vex_printf(",");
1085 ppHRegAMD64(i->Ain.Alu64R.dst);
1086 return;
1087 case Ain_Alu64M:
1088 vex_printf("%sq ", showAMD64AluOp(i->Ain.Alu64M.op));
1089 ppAMD64RI(i->Ain.Alu64M.src);
1090 vex_printf(",");
1091 ppAMD64AMode(i->Ain.Alu64M.dst);
1092 return;
1093 case Ain_Sh64:
1094 vex_printf("%sq ", showAMD64ShiftOp(i->Ain.Sh64.op));
1095 if (i->Ain.Sh64.src == 0)
1096 vex_printf("%%cl,");
1097 else
1098 vex_printf("$%d,", (Int)i->Ain.Sh64.src);
1099 ppHRegAMD64(i->Ain.Sh64.dst);
1100 return;
1101 case Ain_Sh32:
1102 vex_printf("%sl ", showAMD64ShiftOp(i->Ain.Sh32.op));
1103 if (i->Ain.Sh32.src == 0)
1104 vex_printf("%%cl,");
1105 else
1106 vex_printf("$%d,", (Int)i->Ain.Sh32.src);
1107 ppHRegAMD64_lo32(i->Ain.Sh32.dst);
1108 return;
1109 case Ain_Test64:
1110 vex_printf("testq $%d,", (Int)i->Ain.Test64.imm32);
1111 ppHRegAMD64(i->Ain.Test64.dst);
1112 return;
1113 case Ain_Unary64:
1114 vex_printf("%sq ", showAMD64UnaryOp(i->Ain.Unary64.op));
1115 ppHRegAMD64(i->Ain.Unary64.dst);
1116 return;
1117 case Ain_Lea64:
1118 vex_printf("leaq ");
1119 ppAMD64AMode(i->Ain.Lea64.am);
1120 vex_printf(",");
1121 ppHRegAMD64(i->Ain.Lea64.dst);
1122 return;
1123 case Ain_Alu32R:
1124 vex_printf("%sl ", showAMD64AluOp(i->Ain.Alu32R.op));
1125 ppAMD64RMI_lo32(i->Ain.Alu32R.src);
1126 vex_printf(",");
1127 ppHRegAMD64_lo32(i->Ain.Alu32R.dst);
1128 return;
1129 case Ain_MulL:
1130 vex_printf("%cmulq ", i->Ain.MulL.syned ? 's' : 'u');
1131 ppAMD64RM(i->Ain.MulL.src);
1132 return;
1133 case Ain_Div:
1134 vex_printf("%cdiv%s ",
1135 i->Ain.Div.syned ? 's' : 'u',
1136 showAMD64ScalarSz(i->Ain.Div.sz));
1137 ppAMD64RM(i->Ain.Div.src);
1138 return;
1139 case Ain_Push:
1140 vex_printf("pushq ");
1141 ppAMD64RMI(i->Ain.Push.src);
1142 return;
1143 case Ain_Call:
1144 vex_printf("call%s[%d,",
1145 i->Ain.Call.cond==Acc_ALWAYS
1146 ? "" : showAMD64CondCode(i->Ain.Call.cond),
1147 i->Ain.Call.regparms );
1148 ppRetLoc(i->Ain.Call.rloc);
1149 vex_printf("] 0x%llx", i->Ain.Call.target);
1150 break;
1152 case Ain_XDirect:
1153 vex_printf("(xDirect) ");
1154 vex_printf("if (%%rflags.%s) { ",
1155 showAMD64CondCode(i->Ain.XDirect.cond));
1156 vex_printf("movabsq $0x%llx,%%r11; ", i->Ain.XDirect.dstGA);
1157 vex_printf("movq %%r11,");
1158 ppAMD64AMode(i->Ain.XDirect.amRIP);
1159 vex_printf("; ");
1160 vex_printf("movabsq $disp_cp_chain_me_to_%sEP,%%r11; call *%%r11 }",
1161 i->Ain.XDirect.toFastEP ? "fast" : "slow");
1162 return;
1163 case Ain_XIndir:
1164 vex_printf("(xIndir) ");
1165 vex_printf("if (%%rflags.%s) { ",
1166 showAMD64CondCode(i->Ain.XIndir.cond));
1167 vex_printf("movq ");
1168 ppHRegAMD64(i->Ain.XIndir.dstGA);
1169 vex_printf(",");
1170 ppAMD64AMode(i->Ain.XIndir.amRIP);
1171 vex_printf("; movabsq $disp_indir,%%r11; jmp *%%r11 }");
1172 return;
1173 case Ain_XAssisted:
1174 vex_printf("(xAssisted) ");
1175 vex_printf("if (%%rflags.%s) { ",
1176 showAMD64CondCode(i->Ain.XAssisted.cond));
1177 vex_printf("movq ");
1178 ppHRegAMD64(i->Ain.XAssisted.dstGA);
1179 vex_printf(",");
1180 ppAMD64AMode(i->Ain.XAssisted.amRIP);
1181 vex_printf("; movl $IRJumpKind_to_TRCVAL(%d),%%rbp",
1182 (Int)i->Ain.XAssisted.jk);
1183 vex_printf("; movabsq $disp_assisted,%%r11; jmp *%%r11 }");
1184 return;
1186 case Ain_CMov64:
1187 vex_printf("cmov%s ", showAMD64CondCode(i->Ain.CMov64.cond));
1188 ppHRegAMD64(i->Ain.CMov64.src);
1189 vex_printf(",");
1190 ppHRegAMD64(i->Ain.CMov64.dst);
1191 return;
1192 case Ain_CLoad:
1193 vex_printf("if (%%rflags.%s) { ",
1194 showAMD64CondCode(i->Ain.CLoad.cond));
1195 vex_printf("mov%c ", i->Ain.CLoad.szB == 4 ? 'l' : 'q');
1196 ppAMD64AMode(i->Ain.CLoad.addr);
1197 vex_printf(", ");
1198 (i->Ain.CLoad.szB == 4 ? ppHRegAMD64_lo32 : ppHRegAMD64)
1199 (i->Ain.CLoad.dst);
1200 vex_printf(" }");
1201 return;
1202 case Ain_CStore:
1203 vex_printf("if (%%rflags.%s) { ",
1204 showAMD64CondCode(i->Ain.CStore.cond));
1205 vex_printf("mov%c ", i->Ain.CStore.szB == 4 ? 'l' : 'q');
1206 (i->Ain.CStore.szB == 4 ? ppHRegAMD64_lo32 : ppHRegAMD64)
1207 (i->Ain.CStore.src);
1208 vex_printf(", ");
1209 ppAMD64AMode(i->Ain.CStore.addr);
1210 vex_printf(" }");
1211 return;
1213 case Ain_MovxLQ:
1214 vex_printf("mov%clq ", i->Ain.MovxLQ.syned ? 's' : 'z');
1215 ppHRegAMD64_lo32(i->Ain.MovxLQ.src);
1216 vex_printf(",");
1217 ppHRegAMD64(i->Ain.MovxLQ.dst);
1218 return;
1219 case Ain_LoadEX:
1220 if (i->Ain.LoadEX.szSmall==4 && !i->Ain.LoadEX.syned) {
1221 vex_printf("movl ");
1222 ppAMD64AMode(i->Ain.LoadEX.src);
1223 vex_printf(",");
1224 ppHRegAMD64_lo32(i->Ain.LoadEX.dst);
1225 } else {
1226 vex_printf("mov%c%cq ",
1227 i->Ain.LoadEX.syned ? 's' : 'z',
1228 i->Ain.LoadEX.szSmall==1
1229 ? 'b'
1230 : (i->Ain.LoadEX.szSmall==2 ? 'w' : 'l'));
1231 ppAMD64AMode(i->Ain.LoadEX.src);
1232 vex_printf(",");
1233 ppHRegAMD64(i->Ain.LoadEX.dst);
1235 return;
1236 case Ain_Store:
1237 vex_printf("mov%c ", i->Ain.Store.sz==1 ? 'b'
1238 : (i->Ain.Store.sz==2 ? 'w' : 'l'));
1239 ppHRegAMD64(i->Ain.Store.src);
1240 vex_printf(",");
1241 ppAMD64AMode(i->Ain.Store.dst);
1242 return;
1243 case Ain_Set64:
1244 vex_printf("setq%s ", showAMD64CondCode(i->Ain.Set64.cond));
1245 ppHRegAMD64(i->Ain.Set64.dst);
1246 return;
1247 case Ain_Bsfr64:
1248 vex_printf("bs%cq ", i->Ain.Bsfr64.isFwds ? 'f' : 'r');
1249 ppHRegAMD64(i->Ain.Bsfr64.src);
1250 vex_printf(",");
1251 ppHRegAMD64(i->Ain.Bsfr64.dst);
1252 return;
1253 case Ain_MFence:
1254 vex_printf("mfence" );
1255 return;
1256 case Ain_ACAS:
1257 vex_printf("lock cmpxchg%c ",
1258 i->Ain.ACAS.sz==1 ? 'b' : i->Ain.ACAS.sz==2 ? 'w'
1259 : i->Ain.ACAS.sz==4 ? 'l' : 'q' );
1260 vex_printf("{%%rax->%%rbx},");
1261 ppAMD64AMode(i->Ain.ACAS.addr);
1262 return;
1263 case Ain_DACAS:
1264 vex_printf("lock cmpxchg%db {%%rdx:%%rax->%%rcx:%%rbx},",
1265 (Int)(2 * i->Ain.DACAS.sz));
1266 ppAMD64AMode(i->Ain.DACAS.addr);
1267 return;
1268 case Ain_A87Free:
1269 vex_printf("ffree %%st(7..%d)", 8 - i->Ain.A87Free.nregs );
1270 break;
1271 case Ain_A87PushPop:
1272 vex_printf(i->Ain.A87PushPop.isPush ? "fld%c " : "fstp%c ",
1273 i->Ain.A87PushPop.szB == 4 ? 's' : 'l');
1274 ppAMD64AMode(i->Ain.A87PushPop.addr);
1275 break;
1276 case Ain_A87FpOp:
1277 vex_printf("f%s", showA87FpOp(i->Ain.A87FpOp.op));
1278 break;
1279 case Ain_A87LdCW:
1280 vex_printf("fldcw ");
1281 ppAMD64AMode(i->Ain.A87LdCW.addr);
1282 break;
1283 case Ain_A87StSW:
1284 vex_printf("fstsw ");
1285 ppAMD64AMode(i->Ain.A87StSW.addr);
1286 break;
1287 case Ain_LdMXCSR:
1288 vex_printf("ldmxcsr ");
1289 ppAMD64AMode(i->Ain.LdMXCSR.addr);
1290 break;
1291 case Ain_SseUComIS:
1292 vex_printf("ucomis%s ", i->Ain.SseUComIS.sz==4 ? "s" : "d");
1293 ppHRegAMD64(i->Ain.SseUComIS.srcL);
1294 vex_printf(",");
1295 ppHRegAMD64(i->Ain.SseUComIS.srcR);
1296 vex_printf(" ; pushfq ; popq ");
1297 ppHRegAMD64(i->Ain.SseUComIS.dst);
1298 break;
1299 case Ain_SseSI2SF:
1300 vex_printf("cvtsi2s%s ", i->Ain.SseSI2SF.szD==4 ? "s" : "d");
1301 (i->Ain.SseSI2SF.szS==4 ? ppHRegAMD64_lo32 : ppHRegAMD64)
1302 (i->Ain.SseSI2SF.src);
1303 vex_printf(",");
1304 ppHRegAMD64(i->Ain.SseSI2SF.dst);
1305 break;
1306 case Ain_SseSF2SI:
1307 vex_printf("cvts%s2si ", i->Ain.SseSF2SI.szS==4 ? "s" : "d");
1308 ppHRegAMD64(i->Ain.SseSF2SI.src);
1309 vex_printf(",");
1310 (i->Ain.SseSF2SI.szD==4 ? ppHRegAMD64_lo32 : ppHRegAMD64)
1311 (i->Ain.SseSF2SI.dst);
1312 break;
1313 case Ain_SseSDSS:
1314 vex_printf(i->Ain.SseSDSS.from64 ? "cvtsd2ss " : "cvtss2sd ");
1315 ppHRegAMD64(i->Ain.SseSDSS.src);
1316 vex_printf(",");
1317 ppHRegAMD64(i->Ain.SseSDSS.dst);
1318 break;
1319 case Ain_SseLdSt:
1320 switch (i->Ain.SseLdSt.sz) {
1321 case 4: vex_printf("movss "); break;
1322 case 8: vex_printf("movsd "); break;
1323 case 16: vex_printf("movups "); break;
1324 default: vassert(0);
1326 if (i->Ain.SseLdSt.isLoad) {
1327 ppAMD64AMode(i->Ain.SseLdSt.addr);
1328 vex_printf(",");
1329 ppHRegAMD64(i->Ain.SseLdSt.reg);
1330 } else {
1331 ppHRegAMD64(i->Ain.SseLdSt.reg);
1332 vex_printf(",");
1333 ppAMD64AMode(i->Ain.SseLdSt.addr);
1335 return;
1336 case Ain_SseCStore:
1337 vex_printf("if (%%rflags.%s) { ",
1338 showAMD64CondCode(i->Ain.SseCStore.cond));
1339 vex_printf("movups ");
1340 ppHRegAMD64(i->Ain.SseCStore.src);
1341 vex_printf(", ");
1342 ppAMD64AMode(i->Ain.SseCStore.addr);
1343 vex_printf(" }");
1344 return;
1345 case Ain_SseCLoad:
1346 vex_printf("if (%%rflags.%s) { ",
1347 showAMD64CondCode(i->Ain.SseCLoad.cond));
1348 vex_printf("movups ");
1349 ppAMD64AMode(i->Ain.SseCLoad.addr);
1350 vex_printf(", ");
1351 ppHRegAMD64(i->Ain.SseCLoad.dst);
1352 vex_printf(" }");
1353 return;
1354 case Ain_SseLdzLO:
1355 vex_printf("movs%s ", i->Ain.SseLdzLO.sz==4 ? "s" : "d");
1356 ppAMD64AMode(i->Ain.SseLdzLO.addr);
1357 vex_printf(",");
1358 ppHRegAMD64(i->Ain.SseLdzLO.reg);
1359 return;
1360 case Ain_Sse32Fx4:
1361 vex_printf("%sps ", showAMD64SseOp(i->Ain.Sse32Fx4.op));
1362 ppHRegAMD64(i->Ain.Sse32Fx4.src);
1363 vex_printf(",");
1364 ppHRegAMD64(i->Ain.Sse32Fx4.dst);
1365 return;
1366 case Ain_Sse32FLo:
1367 vex_printf("%sss ", showAMD64SseOp(i->Ain.Sse32FLo.op));
1368 ppHRegAMD64(i->Ain.Sse32FLo.src);
1369 vex_printf(",");
1370 ppHRegAMD64(i->Ain.Sse32FLo.dst);
1371 return;
1372 case Ain_Sse64Fx2:
1373 vex_printf("%spd ", showAMD64SseOp(i->Ain.Sse64Fx2.op));
1374 ppHRegAMD64(i->Ain.Sse64Fx2.src);
1375 vex_printf(",");
1376 ppHRegAMD64(i->Ain.Sse64Fx2.dst);
1377 return;
1378 case Ain_Sse64FLo:
1379 vex_printf("%ssd ", showAMD64SseOp(i->Ain.Sse64FLo.op));
1380 ppHRegAMD64(i->Ain.Sse64FLo.src);
1381 vex_printf(",");
1382 ppHRegAMD64(i->Ain.Sse64FLo.dst);
1383 return;
1384 case Ain_SseReRg:
1385 vex_printf("%s ", showAMD64SseOp(i->Ain.SseReRg.op));
1386 ppHRegAMD64(i->Ain.SseReRg.src);
1387 vex_printf(",");
1388 ppHRegAMD64(i->Ain.SseReRg.dst);
1389 return;
1390 case Ain_SseCMov:
1391 vex_printf("cmov%s ", showAMD64CondCode(i->Ain.SseCMov.cond));
1392 ppHRegAMD64(i->Ain.SseCMov.src);
1393 vex_printf(",");
1394 ppHRegAMD64(i->Ain.SseCMov.dst);
1395 return;
1396 case Ain_SseShuf:
1397 vex_printf("pshufd $0x%x,", (UInt)i->Ain.SseShuf.order);
1398 ppHRegAMD64(i->Ain.SseShuf.src);
1399 vex_printf(",");
1400 ppHRegAMD64(i->Ain.SseShuf.dst);
1401 return;
1402 case Ain_SseShiftN:
1403 vex_printf("%s $%u, ", showAMD64SseOp(i->Ain.SseShiftN.op),
1404 i->Ain.SseShiftN.shiftBits);
1405 ppHRegAMD64(i->Ain.SseShiftN.dst);
1406 return;
1407 case Ain_SseMOVQ:
1408 vex_printf("movq ");
1409 if (i->Ain.SseMOVQ.toXMM) {
1410 ppHRegAMD64(i->Ain.SseMOVQ.gpr);
1411 vex_printf(",");
1412 ppHRegAMD64(i->Ain.SseMOVQ.xmm);
1413 } else {
1414 ppHRegAMD64(i->Ain.SseMOVQ.xmm);
1415 vex_printf(",");
1416 ppHRegAMD64(i->Ain.SseMOVQ.gpr);
1418 return;
1419 //uu case Ain_AvxLdSt:
1420 //uu vex_printf("vmovups ");
1421 //uu if (i->Ain.AvxLdSt.isLoad) {
1422 //uu ppAMD64AMode(i->Ain.AvxLdSt.addr);
1423 //uu vex_printf(",");
1424 //uu ppHRegAMD64(i->Ain.AvxLdSt.reg);
1425 //uu } else {
1426 //uu ppHRegAMD64(i->Ain.AvxLdSt.reg);
1427 //uu vex_printf(",");
1428 //uu ppAMD64AMode(i->Ain.AvxLdSt.addr);
1429 //uu }
1430 //uu return;
1431 //uu case Ain_AvxReRg:
1432 //uu vex_printf("v%s ", showAMD64SseOp(i->Ain.SseReRg.op));
1433 //uu ppHRegAMD64(i->Ain.AvxReRg.src);
1434 //uu vex_printf(",");
1435 //uu ppHRegAMD64(i->Ain.AvxReRg.dst);
1436 //uu return;
1437 case Ain_EvCheck:
1438 vex_printf("(evCheck) decl ");
1439 ppAMD64AMode(i->Ain.EvCheck.amCounter);
1440 vex_printf("; jns nofail; jmp *");
1441 ppAMD64AMode(i->Ain.EvCheck.amFailAddr);
1442 vex_printf("; nofail:");
1443 return;
1444 case Ain_ProfInc:
1445 vex_printf("(profInc) movabsq $NotKnownYet, %%r11; incq (%%r11)");
1446 return;
1447 default:
1448 vpanic("ppAMD64Instr");
1452 /* --------- Helpers for register allocation. --------- */
1454 void getRegUsage_AMD64Instr ( HRegUsage* u, const AMD64Instr* i, Bool mode64 )
1456 Bool unary;
1457 vassert(mode64 == True);
1458 initHRegUsage(u);
1459 switch (i->tag) {
1460 case Ain_Imm64:
1461 addHRegUse(u, HRmWrite, i->Ain.Imm64.dst);
1462 return;
1463 case Ain_Alu64R:
1464 addRegUsage_AMD64RMI(u, i->Ain.Alu64R.src);
1465 if (i->Ain.Alu64R.op == Aalu_MOV) {
1466 addHRegUse(u, HRmWrite, i->Ain.Alu64R.dst);
1468 if (i->Ain.Alu64R.src->tag == Armi_Reg) {
1469 u->isRegRegMove = True;
1470 u->regMoveSrc = i->Ain.Alu64R.src->Armi.Reg.reg;
1471 u->regMoveDst = i->Ain.Alu64R.dst;
1473 return;
1475 if (i->Ain.Alu64R.op == Aalu_CMP) {
1476 addHRegUse(u, HRmRead, i->Ain.Alu64R.dst);
1477 return;
1479 addHRegUse(u, HRmModify, i->Ain.Alu64R.dst);
1480 return;
1481 case Ain_Alu64M:
1482 addRegUsage_AMD64RI(u, i->Ain.Alu64M.src);
1483 addRegUsage_AMD64AMode(u, i->Ain.Alu64M.dst);
1484 return;
1485 case Ain_Sh64:
1486 addHRegUse(u, HRmModify, i->Ain.Sh64.dst);
1487 if (i->Ain.Sh64.src == 0)
1488 addHRegUse(u, HRmRead, hregAMD64_RCX());
1489 return;
1490 case Ain_Sh32:
1491 addHRegUse(u, HRmModify, i->Ain.Sh32.dst);
1492 if (i->Ain.Sh32.src == 0)
1493 addHRegUse(u, HRmRead, hregAMD64_RCX());
1494 return;
1495 case Ain_Test64:
1496 addHRegUse(u, HRmRead, i->Ain.Test64.dst);
1497 return;
1498 case Ain_Unary64:
1499 addHRegUse(u, HRmModify, i->Ain.Unary64.dst);
1500 return;
1501 case Ain_Lea64:
1502 addRegUsage_AMD64AMode(u, i->Ain.Lea64.am);
1503 addHRegUse(u, HRmWrite, i->Ain.Lea64.dst);
1504 return;
1505 case Ain_Alu32R:
1506 vassert(i->Ain.Alu32R.op != Aalu_MOV);
1507 addRegUsage_AMD64RMI(u, i->Ain.Alu32R.src);
1508 if (i->Ain.Alu32R.op == Aalu_CMP) {
1509 addHRegUse(u, HRmRead, i->Ain.Alu32R.dst);
1510 return;
1512 addHRegUse(u, HRmModify, i->Ain.Alu32R.dst);
1513 return;
1514 case Ain_MulL:
1515 addRegUsage_AMD64RM(u, i->Ain.MulL.src, HRmRead);
1516 addHRegUse(u, HRmModify, hregAMD64_RAX());
1517 addHRegUse(u, HRmWrite, hregAMD64_RDX());
1518 return;
1519 case Ain_Div:
1520 addRegUsage_AMD64RM(u, i->Ain.Div.src, HRmRead);
1521 addHRegUse(u, HRmModify, hregAMD64_RAX());
1522 addHRegUse(u, HRmModify, hregAMD64_RDX());
1523 return;
1524 case Ain_Push:
1525 addRegUsage_AMD64RMI(u, i->Ain.Push.src);
1526 addHRegUse(u, HRmModify, hregAMD64_RSP());
1527 return;
1528 case Ain_Call:
1529 /* This is a bit subtle. */
1530 /* First off, claim it trashes all the caller-saved regs
1531 which fall within the register allocator's jurisdiction.
1532 These I believe to be: rax rcx rdx rdi rsi r8 r9 r10
1533 and all the xmm registers. */
1534 addHRegUse(u, HRmWrite, hregAMD64_RAX());
1535 addHRegUse(u, HRmWrite, hregAMD64_RCX());
1536 addHRegUse(u, HRmWrite, hregAMD64_RDX());
1537 addHRegUse(u, HRmWrite, hregAMD64_RDI());
1538 addHRegUse(u, HRmWrite, hregAMD64_RSI());
1539 addHRegUse(u, HRmWrite, hregAMD64_R8());
1540 addHRegUse(u, HRmWrite, hregAMD64_R9());
1541 addHRegUse(u, HRmWrite, hregAMD64_R10());
1542 addHRegUse(u, HRmWrite, hregAMD64_XMM0());
1543 addHRegUse(u, HRmWrite, hregAMD64_XMM1());
1544 addHRegUse(u, HRmWrite, hregAMD64_XMM3());
1545 addHRegUse(u, HRmWrite, hregAMD64_XMM4());
1546 addHRegUse(u, HRmWrite, hregAMD64_XMM5());
1547 addHRegUse(u, HRmWrite, hregAMD64_XMM6());
1548 addHRegUse(u, HRmWrite, hregAMD64_XMM7());
1549 addHRegUse(u, HRmWrite, hregAMD64_XMM8());
1550 addHRegUse(u, HRmWrite, hregAMD64_XMM9());
1551 addHRegUse(u, HRmWrite, hregAMD64_XMM10());
1552 addHRegUse(u, HRmWrite, hregAMD64_XMM11());
1553 addHRegUse(u, HRmWrite, hregAMD64_XMM12());
1555 /* Now we have to state any parameter-carrying registers
1556 which might be read. This depends on the regparmness. */
1557 switch (i->Ain.Call.regparms) {
1558 case 6: addHRegUse(u, HRmRead, hregAMD64_R9()); /*fallthru*/
1559 case 5: addHRegUse(u, HRmRead, hregAMD64_R8()); /*fallthru*/
1560 case 4: addHRegUse(u, HRmRead, hregAMD64_RCX()); /*fallthru*/
1561 case 3: addHRegUse(u, HRmRead, hregAMD64_RDX()); /*fallthru*/
1562 case 2: addHRegUse(u, HRmRead, hregAMD64_RSI()); /*fallthru*/
1563 case 1: addHRegUse(u, HRmRead, hregAMD64_RDI()); break;
1564 case 0: break;
1565 default: vpanic("getRegUsage_AMD64Instr:Call:regparms");
1567 /* Finally, there is the issue that the insn trashes a
1568 register because the literal target address has to be
1569 loaded into a register. Fortunately, r11 is stated in the
1570 ABI as a scratch register, and so seems a suitable victim. */
1571 addHRegUse(u, HRmWrite, hregAMD64_R11());
1572 /* Upshot of this is that the assembler really must use r11,
1573 and no other, as a destination temporary. */
1574 return;
1575 /* XDirect/XIndir/XAssisted are also a bit subtle. They
1576 conditionally exit the block. Hence we only need to list (1)
1577 the registers that they read, and (2) the registers that they
1578 write in the case where the block is not exited. (2) is
1579 empty, hence only (1) is relevant here. */
1580 case Ain_XDirect:
1581 /* Don't bother to mention the write to %r11, since it is not
1582 available to the allocator. */
1583 addRegUsage_AMD64AMode(u, i->Ain.XDirect.amRIP);
1584 return;
1585 case Ain_XIndir:
1586 /* Ditto re %r11 */
1587 addHRegUse(u, HRmRead, i->Ain.XIndir.dstGA);
1588 addRegUsage_AMD64AMode(u, i->Ain.XIndir.amRIP);
1589 return;
1590 case Ain_XAssisted:
1591 /* Ditto re %r11 and %rbp (the baseblock ptr) */
1592 addHRegUse(u, HRmRead, i->Ain.XAssisted.dstGA);
1593 addRegUsage_AMD64AMode(u, i->Ain.XAssisted.amRIP);
1594 return;
1595 case Ain_CMov64:
1596 addHRegUse(u, HRmRead, i->Ain.CMov64.src);
1597 addHRegUse(u, HRmModify, i->Ain.CMov64.dst);
1598 return;
1599 case Ain_CLoad:
1600 addRegUsage_AMD64AMode(u, i->Ain.CLoad.addr);
1601 addHRegUse(u, HRmModify, i->Ain.CLoad.dst);
1602 return;
1603 case Ain_CStore:
1604 addRegUsage_AMD64AMode(u, i->Ain.CStore.addr);
1605 addHRegUse(u, HRmRead, i->Ain.CStore.src);
1606 return;
1607 case Ain_MovxLQ:
1608 addHRegUse(u, HRmRead, i->Ain.MovxLQ.src);
1609 addHRegUse(u, HRmWrite, i->Ain.MovxLQ.dst);
1610 return;
1611 case Ain_LoadEX:
1612 addRegUsage_AMD64AMode(u, i->Ain.LoadEX.src);
1613 addHRegUse(u, HRmWrite, i->Ain.LoadEX.dst);
1614 return;
1615 case Ain_Store:
1616 addHRegUse(u, HRmRead, i->Ain.Store.src);
1617 addRegUsage_AMD64AMode(u, i->Ain.Store.dst);
1618 return;
1619 case Ain_Set64:
1620 addHRegUse(u, HRmWrite, i->Ain.Set64.dst);
1621 return;
1622 case Ain_Bsfr64:
1623 addHRegUse(u, HRmRead, i->Ain.Bsfr64.src);
1624 addHRegUse(u, HRmWrite, i->Ain.Bsfr64.dst);
1625 return;
1626 case Ain_MFence:
1627 return;
1628 case Ain_ACAS:
1629 addRegUsage_AMD64AMode(u, i->Ain.ACAS.addr);
1630 addHRegUse(u, HRmRead, hregAMD64_RBX());
1631 addHRegUse(u, HRmModify, hregAMD64_RAX());
1632 return;
1633 case Ain_DACAS:
1634 addRegUsage_AMD64AMode(u, i->Ain.DACAS.addr);
1635 addHRegUse(u, HRmRead, hregAMD64_RCX());
1636 addHRegUse(u, HRmRead, hregAMD64_RBX());
1637 addHRegUse(u, HRmModify, hregAMD64_RDX());
1638 addHRegUse(u, HRmModify, hregAMD64_RAX());
1639 return;
1640 case Ain_A87Free:
1641 return;
1642 case Ain_A87PushPop:
1643 addRegUsage_AMD64AMode(u, i->Ain.A87PushPop.addr);
1644 return;
1645 case Ain_A87FpOp:
1646 return;
1647 case Ain_A87LdCW:
1648 addRegUsage_AMD64AMode(u, i->Ain.A87LdCW.addr);
1649 return;
1650 case Ain_A87StSW:
1651 addRegUsage_AMD64AMode(u, i->Ain.A87StSW.addr);
1652 return;
1653 case Ain_LdMXCSR:
1654 addRegUsage_AMD64AMode(u, i->Ain.LdMXCSR.addr);
1655 return;
1656 case Ain_SseUComIS:
1657 addHRegUse(u, HRmRead, i->Ain.SseUComIS.srcL);
1658 addHRegUse(u, HRmRead, i->Ain.SseUComIS.srcR);
1659 addHRegUse(u, HRmWrite, i->Ain.SseUComIS.dst);
1660 return;
1661 case Ain_SseSI2SF:
1662 addHRegUse(u, HRmRead, i->Ain.SseSI2SF.src);
1663 addHRegUse(u, HRmWrite, i->Ain.SseSI2SF.dst);
1664 return;
1665 case Ain_SseSF2SI:
1666 addHRegUse(u, HRmRead, i->Ain.SseSF2SI.src);
1667 addHRegUse(u, HRmWrite, i->Ain.SseSF2SI.dst);
1668 return;
1669 case Ain_SseSDSS:
1670 addHRegUse(u, HRmRead, i->Ain.SseSDSS.src);
1671 addHRegUse(u, HRmWrite, i->Ain.SseSDSS.dst);
1672 return;
1673 case Ain_SseLdSt:
1674 addRegUsage_AMD64AMode(u, i->Ain.SseLdSt.addr);
1675 addHRegUse(u, i->Ain.SseLdSt.isLoad ? HRmWrite : HRmRead,
1676 i->Ain.SseLdSt.reg);
1677 return;
1678 case Ain_SseCStore:
1679 addRegUsage_AMD64AMode(u, i->Ain.SseCStore.addr);
1680 addHRegUse(u, HRmRead, i->Ain.SseCStore.src);
1681 return;
1682 case Ain_SseCLoad:
1683 addRegUsage_AMD64AMode(u, i->Ain.SseCLoad.addr);
1684 addHRegUse(u, HRmModify, i->Ain.SseCLoad.dst);
1685 return;
1686 case Ain_SseLdzLO:
1687 addRegUsage_AMD64AMode(u, i->Ain.SseLdzLO.addr);
1688 addHRegUse(u, HRmWrite, i->Ain.SseLdzLO.reg);
1689 return;
1690 case Ain_Sse32Fx4:
1691 vassert(i->Ain.Sse32Fx4.op != Asse_MOV);
1692 unary = toBool( i->Ain.Sse32Fx4.op == Asse_RCPF
1693 || i->Ain.Sse32Fx4.op == Asse_RSQRTF
1694 || i->Ain.Sse32Fx4.op == Asse_SQRTF
1695 || i->Ain.Sse32Fx4.op == Asse_I2F
1696 || i->Ain.Sse32Fx4.op == Asse_F2I
1697 || i->Ain.Sse32Fx4.op == Asse_F32toF16
1698 || i->Ain.Sse32Fx4.op == Asse_F16toF32 );
1699 addHRegUse(u, HRmRead, i->Ain.Sse32Fx4.src);
1700 addHRegUse(u, unary ? HRmWrite : HRmModify,
1701 i->Ain.Sse32Fx4.dst);
1702 return;
1703 case Ain_Sse32FLo:
1704 vassert(i->Ain.Sse32FLo.op != Asse_MOV);
1705 unary = toBool( i->Ain.Sse32FLo.op == Asse_RCPF
1706 || i->Ain.Sse32FLo.op == Asse_RSQRTF
1707 || i->Ain.Sse32FLo.op == Asse_SQRTF );
1708 addHRegUse(u, HRmRead, i->Ain.Sse32FLo.src);
1709 addHRegUse(u, unary ? HRmWrite : HRmModify,
1710 i->Ain.Sse32FLo.dst);
1711 return;
1712 case Ain_Sse64Fx2:
1713 vassert(i->Ain.Sse64Fx2.op != Asse_MOV);
1714 unary = toBool( i->Ain.Sse64Fx2.op == Asse_RCPF
1715 || i->Ain.Sse64Fx2.op == Asse_RSQRTF
1716 || i->Ain.Sse64Fx2.op == Asse_SQRTF );
1717 addHRegUse(u, HRmRead, i->Ain.Sse64Fx2.src);
1718 addHRegUse(u, unary ? HRmWrite : HRmModify,
1719 i->Ain.Sse64Fx2.dst);
1720 return;
1721 case Ain_Sse64FLo:
1722 vassert(i->Ain.Sse64FLo.op != Asse_MOV);
1723 unary = toBool( i->Ain.Sse64FLo.op == Asse_RCPF
1724 || i->Ain.Sse64FLo.op == Asse_RSQRTF
1725 || i->Ain.Sse64FLo.op == Asse_SQRTF );
1726 addHRegUse(u, HRmRead, i->Ain.Sse64FLo.src);
1727 addHRegUse(u, unary ? HRmWrite : HRmModify,
1728 i->Ain.Sse64FLo.dst);
1729 return;
1730 case Ain_SseReRg:
1731 if ( (i->Ain.SseReRg.op == Asse_XOR
1732 || i->Ain.SseReRg.op == Asse_CMPEQ32)
1733 && sameHReg(i->Ain.SseReRg.src, i->Ain.SseReRg.dst)) {
1734 /* reg-alloc needs to understand 'xor r,r' and 'cmpeqd
1735 r,r' as a write of a value to r, and independent of any
1736 previous value in r */
1737 /* (as opposed to a rite of passage :-) */
1738 addHRegUse(u, HRmWrite, i->Ain.SseReRg.dst);
1739 } else {
1740 addHRegUse(u, HRmRead, i->Ain.SseReRg.src);
1741 addHRegUse(u, i->Ain.SseReRg.op == Asse_MOV
1742 ? HRmWrite : HRmModify,
1743 i->Ain.SseReRg.dst);
1745 if (i->Ain.SseReRg.op == Asse_MOV) {
1746 u->isRegRegMove = True;
1747 u->regMoveSrc = i->Ain.SseReRg.src;
1748 u->regMoveDst = i->Ain.SseReRg.dst;
1751 return;
1752 case Ain_SseCMov:
1753 addHRegUse(u, HRmRead, i->Ain.SseCMov.src);
1754 addHRegUse(u, HRmModify, i->Ain.SseCMov.dst);
1755 return;
1756 case Ain_SseShuf:
1757 addHRegUse(u, HRmRead, i->Ain.SseShuf.src);
1758 addHRegUse(u, HRmWrite, i->Ain.SseShuf.dst);
1759 return;
1760 case Ain_SseShiftN:
1761 addHRegUse(u, HRmModify, i->Ain.SseShiftN.dst);
1762 return;
1763 case Ain_SseMOVQ:
1764 addHRegUse(u, i->Ain.SseMOVQ.toXMM ? HRmRead : HRmWrite,
1765 i->Ain.SseMOVQ.gpr);
1766 addHRegUse(u, i->Ain.SseMOVQ.toXMM ? HRmWrite : HRmRead,
1767 i->Ain.SseMOVQ.xmm);
1768 return;
1769 //uu case Ain_AvxLdSt:
1770 //uu addRegUsage_AMD64AMode(u, i->Ain.AvxLdSt.addr);
1771 //uu addHRegUse(u, i->Ain.AvxLdSt.isLoad ? HRmWrite : HRmRead,
1772 //uu i->Ain.AvxLdSt.reg);
1773 //uu return;
1774 //uu case Ain_AvxReRg:
1775 //uu if ( (i->Ain.AvxReRg.op == Asse_XOR
1776 //uu || i->Ain.AvxReRg.op == Asse_CMPEQ32)
1777 //uu && i->Ain.AvxReRg.src == i->Ain.AvxReRg.dst) {
1778 //uu /* See comments on the case for Ain_SseReRg. */
1779 //uu addHRegUse(u, HRmWrite, i->Ain.AvxReRg.dst);
1780 //uu } else {
1781 //uu addHRegUse(u, HRmRead, i->Ain.AvxReRg.src);
1782 //uu addHRegUse(u, i->Ain.AvxReRg.op == Asse_MOV
1783 //uu ? HRmWrite : HRmModify,
1784 //uu i->Ain.AvxReRg.dst);
1785 //uu
1786 //uu if (i->Ain.AvxReRg.op == Asse_MOV) {
1787 //uu u->isRegRegMove = True;
1788 //uu u->regMoveSrc = i->Ain.AvxReRg.src;
1789 //uu u->regMoveDst = i->Ain.AvxReRg.dst;
1790 //uu }
1791 //uu }
1792 //uu return;
1793 case Ain_EvCheck:
1794 /* We expect both amodes only to mention %rbp, so this is in
1795 fact pointless, since %rbp isn't allocatable, but anyway.. */
1796 addRegUsage_AMD64AMode(u, i->Ain.EvCheck.amCounter);
1797 addRegUsage_AMD64AMode(u, i->Ain.EvCheck.amFailAddr);
1798 return;
1799 case Ain_ProfInc:
1800 addHRegUse(u, HRmWrite, hregAMD64_R11());
1801 return;
1802 default:
1803 ppAMD64Instr(i, mode64);
1804 vpanic("getRegUsage_AMD64Instr");
1808 /* local helper */
1809 static inline void mapReg(HRegRemap* m, HReg* r)
1811 *r = lookupHRegRemap(m, *r);
1814 void mapRegs_AMD64Instr ( HRegRemap* m, AMD64Instr* i, Bool mode64 )
1816 vassert(mode64 == True);
1817 switch (i->tag) {
1818 case Ain_Imm64:
1819 mapReg(m, &i->Ain.Imm64.dst);
1820 return;
1821 case Ain_Alu64R:
1822 mapRegs_AMD64RMI(m, i->Ain.Alu64R.src);
1823 mapReg(m, &i->Ain.Alu64R.dst);
1824 return;
1825 case Ain_Alu64M:
1826 mapRegs_AMD64RI(m, i->Ain.Alu64M.src);
1827 mapRegs_AMD64AMode(m, i->Ain.Alu64M.dst);
1828 return;
1829 case Ain_Sh64:
1830 mapReg(m, &i->Ain.Sh64.dst);
1831 return;
1832 case Ain_Sh32:
1833 mapReg(m, &i->Ain.Sh32.dst);
1834 return;
1835 case Ain_Test64:
1836 mapReg(m, &i->Ain.Test64.dst);
1837 return;
1838 case Ain_Unary64:
1839 mapReg(m, &i->Ain.Unary64.dst);
1840 return;
1841 case Ain_Lea64:
1842 mapRegs_AMD64AMode(m, i->Ain.Lea64.am);
1843 mapReg(m, &i->Ain.Lea64.dst);
1844 return;
1845 case Ain_Alu32R:
1846 mapRegs_AMD64RMI(m, i->Ain.Alu32R.src);
1847 mapReg(m, &i->Ain.Alu32R.dst);
1848 return;
1849 case Ain_MulL:
1850 mapRegs_AMD64RM(m, i->Ain.MulL.src);
1851 return;
1852 case Ain_Div:
1853 mapRegs_AMD64RM(m, i->Ain.Div.src);
1854 return;
1855 case Ain_Push:
1856 mapRegs_AMD64RMI(m, i->Ain.Push.src);
1857 return;
1858 case Ain_Call:
1859 return;
1860 case Ain_XDirect:
1861 mapRegs_AMD64AMode(m, i->Ain.XDirect.amRIP);
1862 return;
1863 case Ain_XIndir:
1864 mapReg(m, &i->Ain.XIndir.dstGA);
1865 mapRegs_AMD64AMode(m, i->Ain.XIndir.amRIP);
1866 return;
1867 case Ain_XAssisted:
1868 mapReg(m, &i->Ain.XAssisted.dstGA);
1869 mapRegs_AMD64AMode(m, i->Ain.XAssisted.amRIP);
1870 return;
1871 case Ain_CMov64:
1872 mapReg(m, &i->Ain.CMov64.src);
1873 mapReg(m, &i->Ain.CMov64.dst);
1874 return;
1875 case Ain_CLoad:
1876 mapRegs_AMD64AMode(m, i->Ain.CLoad.addr);
1877 mapReg(m, &i->Ain.CLoad.dst);
1878 return;
1879 case Ain_CStore:
1880 mapRegs_AMD64AMode(m, i->Ain.CStore.addr);
1881 mapReg(m, &i->Ain.CStore.src);
1882 return;
1883 case Ain_MovxLQ:
1884 mapReg(m, &i->Ain.MovxLQ.src);
1885 mapReg(m, &i->Ain.MovxLQ.dst);
1886 return;
1887 case Ain_LoadEX:
1888 mapRegs_AMD64AMode(m, i->Ain.LoadEX.src);
1889 mapReg(m, &i->Ain.LoadEX.dst);
1890 return;
1891 case Ain_Store:
1892 mapReg(m, &i->Ain.Store.src);
1893 mapRegs_AMD64AMode(m, i->Ain.Store.dst);
1894 return;
1895 case Ain_Set64:
1896 mapReg(m, &i->Ain.Set64.dst);
1897 return;
1898 case Ain_Bsfr64:
1899 mapReg(m, &i->Ain.Bsfr64.src);
1900 mapReg(m, &i->Ain.Bsfr64.dst);
1901 return;
1902 case Ain_MFence:
1903 return;
1904 case Ain_ACAS:
1905 mapRegs_AMD64AMode(m, i->Ain.ACAS.addr);
1906 return;
1907 case Ain_DACAS:
1908 mapRegs_AMD64AMode(m, i->Ain.DACAS.addr);
1909 return;
1910 case Ain_A87Free:
1911 return;
1912 case Ain_A87PushPop:
1913 mapRegs_AMD64AMode(m, i->Ain.A87PushPop.addr);
1914 return;
1915 case Ain_A87FpOp:
1916 return;
1917 case Ain_A87LdCW:
1918 mapRegs_AMD64AMode(m, i->Ain.A87LdCW.addr);
1919 return;
1920 case Ain_A87StSW:
1921 mapRegs_AMD64AMode(m, i->Ain.A87StSW.addr);
1922 return;
1923 case Ain_LdMXCSR:
1924 mapRegs_AMD64AMode(m, i->Ain.LdMXCSR.addr);
1925 return;
1926 case Ain_SseUComIS:
1927 mapReg(m, &i->Ain.SseUComIS.srcL);
1928 mapReg(m, &i->Ain.SseUComIS.srcR);
1929 mapReg(m, &i->Ain.SseUComIS.dst);
1930 return;
1931 case Ain_SseSI2SF:
1932 mapReg(m, &i->Ain.SseSI2SF.src);
1933 mapReg(m, &i->Ain.SseSI2SF.dst);
1934 return;
1935 case Ain_SseSF2SI:
1936 mapReg(m, &i->Ain.SseSF2SI.src);
1937 mapReg(m, &i->Ain.SseSF2SI.dst);
1938 return;
1939 case Ain_SseSDSS:
1940 mapReg(m, &i->Ain.SseSDSS.src);
1941 mapReg(m, &i->Ain.SseSDSS.dst);
1942 return;
1943 case Ain_SseLdSt:
1944 mapReg(m, &i->Ain.SseLdSt.reg);
1945 mapRegs_AMD64AMode(m, i->Ain.SseLdSt.addr);
1946 break;
1947 case Ain_SseCStore:
1948 mapRegs_AMD64AMode(m, i->Ain.SseCStore.addr);
1949 mapReg(m, &i->Ain.SseCStore.src);
1950 return;
1951 case Ain_SseCLoad:
1952 mapRegs_AMD64AMode(m, i->Ain.SseCLoad.addr);
1953 mapReg(m, &i->Ain.SseCLoad.dst);
1954 return;
1955 case Ain_SseLdzLO:
1956 mapReg(m, &i->Ain.SseLdzLO.reg);
1957 mapRegs_AMD64AMode(m, i->Ain.SseLdzLO.addr);
1958 break;
1959 case Ain_Sse32Fx4:
1960 mapReg(m, &i->Ain.Sse32Fx4.src);
1961 mapReg(m, &i->Ain.Sse32Fx4.dst);
1962 return;
1963 case Ain_Sse32FLo:
1964 mapReg(m, &i->Ain.Sse32FLo.src);
1965 mapReg(m, &i->Ain.Sse32FLo.dst);
1966 return;
1967 case Ain_Sse64Fx2:
1968 mapReg(m, &i->Ain.Sse64Fx2.src);
1969 mapReg(m, &i->Ain.Sse64Fx2.dst);
1970 return;
1971 case Ain_Sse64FLo:
1972 mapReg(m, &i->Ain.Sse64FLo.src);
1973 mapReg(m, &i->Ain.Sse64FLo.dst);
1974 return;
1975 case Ain_SseReRg:
1976 mapReg(m, &i->Ain.SseReRg.src);
1977 mapReg(m, &i->Ain.SseReRg.dst);
1978 return;
1979 case Ain_SseCMov:
1980 mapReg(m, &i->Ain.SseCMov.src);
1981 mapReg(m, &i->Ain.SseCMov.dst);
1982 return;
1983 case Ain_SseShuf:
1984 mapReg(m, &i->Ain.SseShuf.src);
1985 mapReg(m, &i->Ain.SseShuf.dst);
1986 return;
1987 case Ain_SseShiftN:
1988 mapReg(m, &i->Ain.SseShiftN.dst);
1989 return;
1990 case Ain_SseMOVQ:
1991 mapReg(m, &i->Ain.SseMOVQ.gpr);
1992 mapReg(m, &i->Ain.SseMOVQ.xmm);
1993 return;
1994 //uu case Ain_AvxLdSt:
1995 //uu mapReg(m, &i->Ain.AvxLdSt.reg);
1996 //uu mapRegs_AMD64AMode(m, i->Ain.AvxLdSt.addr);
1997 //uu break;
1998 //uu case Ain_AvxReRg:
1999 //uu mapReg(m, &i->Ain.AvxReRg.src);
2000 //uu mapReg(m, &i->Ain.AvxReRg.dst);
2001 //uu return;
2002 case Ain_EvCheck:
2003 /* We expect both amodes only to mention %rbp, so this is in
2004 fact pointless, since %rbp isn't allocatable, but anyway.. */
2005 mapRegs_AMD64AMode(m, i->Ain.EvCheck.amCounter);
2006 mapRegs_AMD64AMode(m, i->Ain.EvCheck.amFailAddr);
2007 return;
2008 case Ain_ProfInc:
2009 /* hardwires r11 -- nothing to modify. */
2010 return;
2011 default:
2012 ppAMD64Instr(i, mode64);
2013 vpanic("mapRegs_AMD64Instr");
2017 /* Generate amd64 spill/reload instructions under the direction of the
2018 register allocator. Note it's critical these don't write the
2019 condition codes. */
2021 void genSpill_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
2022 HReg rreg, Int offsetB, Bool mode64 )
2024 AMD64AMode* am;
2025 vassert(offsetB >= 0);
2026 vassert(!hregIsVirtual(rreg));
2027 vassert(mode64 == True);
2028 *i1 = *i2 = NULL;
2029 am = AMD64AMode_IR(offsetB, hregAMD64_RBP());
2030 switch (hregClass(rreg)) {
2031 case HRcInt64:
2032 *i1 = AMD64Instr_Alu64M ( Aalu_MOV, AMD64RI_Reg(rreg), am );
2033 return;
2034 case HRcVec128:
2035 *i1 = AMD64Instr_SseLdSt ( False/*store*/, 16, rreg, am );
2036 return;
2037 default:
2038 ppHRegClass(hregClass(rreg));
2039 vpanic("genSpill_AMD64: unimplemented regclass");
2043 void genReload_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
2044 HReg rreg, Int offsetB, Bool mode64 )
2046 AMD64AMode* am;
2047 vassert(offsetB >= 0);
2048 vassert(!hregIsVirtual(rreg));
2049 vassert(mode64 == True);
2050 *i1 = *i2 = NULL;
2051 am = AMD64AMode_IR(offsetB, hregAMD64_RBP());
2052 switch (hregClass(rreg)) {
2053 case HRcInt64:
2054 *i1 = AMD64Instr_Alu64R ( Aalu_MOV, AMD64RMI_Mem(am), rreg );
2055 return;
2056 case HRcVec128:
2057 *i1 = AMD64Instr_SseLdSt ( True/*load*/, 16, rreg, am );
2058 return;
2059 default:
2060 ppHRegClass(hregClass(rreg));
2061 vpanic("genReload_AMD64: unimplemented regclass");
2065 AMD64Instr* genMove_AMD64(HReg from, HReg to, Bool mode64)
2067 switch (hregClass(from)) {
2068 case HRcInt64:
2069 return AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Reg(from), to);
2070 case HRcVec128:
2071 return AMD64Instr_SseReRg(Asse_MOV, from, to);
2072 default:
2073 ppHRegClass(hregClass(from));
2074 vpanic("genMove_AMD64: unimplemented regclass");
2078 AMD64Instr* directReload_AMD64( AMD64Instr* i, HReg vreg, Short spill_off )
2080 vassert(spill_off >= 0 && spill_off < 10000); /* let's say */
2082 /* Deal with form: src=RMI_Reg, dst=Reg where src == vreg
2083 Convert to: src=RMI_Mem, dst=Reg
2085 if (i->tag == Ain_Alu64R
2086 && (i->Ain.Alu64R.op == Aalu_MOV || i->Ain.Alu64R.op == Aalu_OR
2087 || i->Ain.Alu64R.op == Aalu_XOR)
2088 && i->Ain.Alu64R.src->tag == Armi_Reg
2089 && sameHReg(i->Ain.Alu64R.src->Armi.Reg.reg, vreg)) {
2090 vassert(! sameHReg(i->Ain.Alu64R.dst, vreg));
2091 return AMD64Instr_Alu64R(
2092 i->Ain.Alu64R.op,
2093 AMD64RMI_Mem( AMD64AMode_IR( spill_off, hregAMD64_RBP())),
2094 i->Ain.Alu64R.dst
2098 /* Deal with form: src=RMI_Imm, dst=Reg where dst == vreg
2099 Convert to: src=RI_Imm, dst=Mem
2101 if (i->tag == Ain_Alu64R
2102 && (i->Ain.Alu64R.op == Aalu_CMP)
2103 && i->Ain.Alu64R.src->tag == Armi_Imm
2104 && sameHReg(i->Ain.Alu64R.dst, vreg)) {
2105 return AMD64Instr_Alu64M(
2106 i->Ain.Alu64R.op,
2107 AMD64RI_Imm( i->Ain.Alu64R.src->Armi.Imm.imm32 ),
2108 AMD64AMode_IR( spill_off, hregAMD64_RBP())
2112 return NULL;
2116 /* --------- The amd64 assembler (bleh.) --------- */
2118 /* Produce the low three bits of an integer register number. */
2119 inline static UInt iregEnc210 ( HReg r )
2121 UInt n;
2122 vassert(hregClass(r) == HRcInt64);
2123 vassert(!hregIsVirtual(r));
2124 n = hregEncoding(r);
2125 vassert(n <= 15);
2126 return n & 7;
2129 /* Produce bit 3 of an integer register number. */
2130 inline static UInt iregEnc3 ( HReg r )
2132 UInt n;
2133 vassert(hregClass(r) == HRcInt64);
2134 vassert(!hregIsVirtual(r));
2135 n = hregEncoding(r);
2136 vassert(n <= 15);
2137 return (n >> 3) & 1;
2140 /* Produce a complete 4-bit integer register number. */
2141 inline static UInt iregEnc3210 ( HReg r )
2143 UInt n;
2144 vassert(hregClass(r) == HRcInt64);
2145 vassert(!hregIsVirtual(r));
2146 n = hregEncoding(r);
2147 vassert(n <= 15);
2148 return n;
2151 /* Produce a complete 4-bit integer register number. */
2152 inline static UInt vregEnc3210 ( HReg r )
2154 UInt n;
2155 vassert(hregClass(r) == HRcVec128);
2156 vassert(!hregIsVirtual(r));
2157 n = hregEncoding(r);
2158 vassert(n <= 15);
2159 return n;
2162 inline static UChar mkModRegRM ( UInt mod, UInt reg, UInt regmem )
2164 vassert(mod < 4);
2165 vassert((reg|regmem) < 8);
2166 return (UChar)( ((mod & 3) << 6) | ((reg & 7) << 3) | (regmem & 7) );
2169 inline static UChar mkSIB ( UInt shift, UInt regindex, UInt regbase )
2171 vassert(shift < 4);
2172 vassert((regindex|regbase) < 8);
2173 return (UChar)( ((shift & 3) << 6) | ((regindex & 7) << 3) | (regbase & 7) );
2176 static UChar* emit32 ( UChar* p, UInt w32 )
2178 *p++ = toUChar((w32) & 0x000000FF);
2179 *p++ = toUChar((w32 >> 8) & 0x000000FF);
2180 *p++ = toUChar((w32 >> 16) & 0x000000FF);
2181 *p++ = toUChar((w32 >> 24) & 0x000000FF);
2182 return p;
2185 static UChar* emit64 ( UChar* p, ULong w64 )
2187 p = emit32(p, toUInt(w64 & 0xFFFFFFFF));
2188 p = emit32(p, toUInt((w64 >> 32) & 0xFFFFFFFF));
2189 return p;
2192 /* Does a sign-extend of the lowest 8 bits give
2193 the original number? */
2194 static Bool fits8bits ( UInt w32 )
2196 Int i32 = (Int)w32;
2197 return toBool(i32 == ((Int)(w32 << 24) >> 24));
2199 /* Can the lower 32 bits be signedly widened to produce the whole
2200 64-bit value? In other words, are the top 33 bits either all 0 or
2201 all 1 ? */
2202 static Bool fitsIn32Bits ( ULong x )
2204 Long y1;
2205 y1 = x << 32;
2206 y1 >>=/*s*/ 32;
2207 return toBool(x == y1);
2211 /* Forming mod-reg-rm bytes and scale-index-base bytes.
2213 greg, 0(ereg) | ereg is not any of: RSP RBP R12 R13
2214 = 00 greg ereg
2216 greg, d8(ereg) | ereg is neither of: RSP R12
2217 = 01 greg ereg, d8
2219 greg, d32(ereg) | ereg is neither of: RSP R12
2220 = 10 greg ereg, d32
2222 greg, d8(ereg) | ereg is either: RSP R12
2223 = 01 greg 100, 0x24, d8
2224 (lowest bit of rex distinguishes R12/RSP)
2226 greg, d32(ereg) | ereg is either: RSP R12
2227 = 10 greg 100, 0x24, d32
2228 (lowest bit of rex distinguishes R12/RSP)
2230 -----------------------------------------------
2232 greg, d8(base,index,scale)
2233 | index != RSP
2234 = 01 greg 100, scale index base, d8
2236 greg, d32(base,index,scale)
2237 | index != RSP
2238 = 10 greg 100, scale index base, d32
2240 static UChar* doAMode_M__wrk ( UChar* p, UInt gregEnc3210, AMD64AMode* am )
2242 UInt gregEnc210 = gregEnc3210 & 7;
2243 if (am->tag == Aam_IR) {
2244 if (am->Aam.IR.imm == 0
2245 && ! sameHReg(am->Aam.IR.reg, hregAMD64_RSP())
2246 && ! sameHReg(am->Aam.IR.reg, hregAMD64_RBP())
2247 && ! sameHReg(am->Aam.IR.reg, hregAMD64_R12())
2248 && ! sameHReg(am->Aam.IR.reg, hregAMD64_R13())
2250 *p++ = mkModRegRM(0, gregEnc210, iregEnc210(am->Aam.IR.reg));
2251 return p;
2253 if (fits8bits(am->Aam.IR.imm)
2254 && ! sameHReg(am->Aam.IR.reg, hregAMD64_RSP())
2255 && ! sameHReg(am->Aam.IR.reg, hregAMD64_R12())
2257 *p++ = mkModRegRM(1, gregEnc210, iregEnc210(am->Aam.IR.reg));
2258 *p++ = toUChar(am->Aam.IR.imm & 0xFF);
2259 return p;
2261 if (! sameHReg(am->Aam.IR.reg, hregAMD64_RSP())
2262 && ! sameHReg(am->Aam.IR.reg, hregAMD64_R12())
2264 *p++ = mkModRegRM(2, gregEnc210, iregEnc210(am->Aam.IR.reg));
2265 p = emit32(p, am->Aam.IR.imm);
2266 return p;
2268 if ((sameHReg(am->Aam.IR.reg, hregAMD64_RSP())
2269 || sameHReg(am->Aam.IR.reg, hregAMD64_R12()))
2270 && fits8bits(am->Aam.IR.imm)) {
2271 *p++ = mkModRegRM(1, gregEnc210, 4);
2272 *p++ = 0x24;
2273 *p++ = toUChar(am->Aam.IR.imm & 0xFF);
2274 return p;
2276 if (/* (sameHReg(am->Aam.IR.reg, hregAMD64_RSP())
2277 || wait for test case for RSP case */
2278 sameHReg(am->Aam.IR.reg, hregAMD64_R12())) {
2279 *p++ = mkModRegRM(2, gregEnc210, 4);
2280 *p++ = 0x24;
2281 p = emit32(p, am->Aam.IR.imm);
2282 return p;
2284 ppAMD64AMode(am);
2285 vpanic("doAMode_M: can't emit amode IR");
2286 /*NOTREACHED*/
2288 if (am->tag == Aam_IRRS) {
2289 if (fits8bits(am->Aam.IRRS.imm)
2290 && ! sameHReg(am->Aam.IRRS.index, hregAMD64_RSP())) {
2291 *p++ = mkModRegRM(1, gregEnc210, 4);
2292 *p++ = mkSIB(am->Aam.IRRS.shift, iregEnc210(am->Aam.IRRS.index),
2293 iregEnc210(am->Aam.IRRS.base));
2294 *p++ = toUChar(am->Aam.IRRS.imm & 0xFF);
2295 return p;
2297 if (! sameHReg(am->Aam.IRRS.index, hregAMD64_RSP())) {
2298 *p++ = mkModRegRM(2, gregEnc210, 4);
2299 *p++ = mkSIB(am->Aam.IRRS.shift, iregEnc210(am->Aam.IRRS.index),
2300 iregEnc210(am->Aam.IRRS.base));
2301 p = emit32(p, am->Aam.IRRS.imm);
2302 return p;
2304 ppAMD64AMode(am);
2305 vpanic("doAMode_M: can't emit amode IRRS");
2306 /*NOTREACHED*/
2308 vpanic("doAMode_M: unknown amode");
2309 /*NOTREACHED*/
2312 static UChar* doAMode_M ( UChar* p, HReg greg, AMD64AMode* am )
2314 return doAMode_M__wrk(p, iregEnc3210(greg), am);
2317 static UChar* doAMode_M_enc ( UChar* p, UInt gregEnc3210, AMD64AMode* am )
2319 vassert(gregEnc3210 < 16);
2320 return doAMode_M__wrk(p, gregEnc3210, am);
2324 /* Emit a mod-reg-rm byte when the rm bit denotes a reg. */
2325 inline
2326 static UChar* doAMode_R__wrk ( UChar* p, UInt gregEnc3210, UInt eregEnc3210 )
2328 *p++ = mkModRegRM(3, gregEnc3210 & 7, eregEnc3210 & 7);
2329 return p;
2332 static UChar* doAMode_R ( UChar* p, HReg greg, HReg ereg )
2334 return doAMode_R__wrk(p, iregEnc3210(greg), iregEnc3210(ereg));
2337 static UChar* doAMode_R_enc_reg ( UChar* p, UInt gregEnc3210, HReg ereg )
2339 vassert(gregEnc3210 < 16);
2340 return doAMode_R__wrk(p, gregEnc3210, iregEnc3210(ereg));
2343 static UChar* doAMode_R_reg_enc ( UChar* p, HReg greg, UInt eregEnc3210 )
2345 vassert(eregEnc3210 < 16);
2346 return doAMode_R__wrk(p, iregEnc3210(greg), eregEnc3210);
2349 static UChar* doAMode_R_enc_enc ( UChar* p, UInt gregEnc3210, UInt eregEnc3210 )
2351 vassert( (gregEnc3210|eregEnc3210) < 16);
2352 return doAMode_R__wrk(p, gregEnc3210, eregEnc3210);
2356 /* Clear the W bit on a REX byte, thereby changing the operand size
2357 back to whatever that instruction's default operand size is. */
2358 static inline UChar clearWBit ( UChar rex )
2360 return rex & ~(1<<3);
2363 static inline UChar setWBit ( UChar rex )
2365 return rex | (1<<3);
2369 /* Make up a REX byte, with W=1 (size=64), for a (greg,amode) pair. */
2370 inline static UChar rexAMode_M__wrk ( UInt gregEnc3210, AMD64AMode* am )
2372 if (am->tag == Aam_IR) {
2373 UChar W = 1; /* we want 64-bit mode */
2374 UChar R = (gregEnc3210 >> 3) & 1;
2375 UChar X = 0; /* not relevant */
2376 UChar B = iregEnc3(am->Aam.IR.reg);
2377 return 0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0));
2379 if (am->tag == Aam_IRRS) {
2380 UChar W = 1; /* we want 64-bit mode */
2381 UChar R = (gregEnc3210 >> 3) & 1;
2382 UChar X = iregEnc3(am->Aam.IRRS.index);
2383 UChar B = iregEnc3(am->Aam.IRRS.base);
2384 return 0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0));
2386 vassert(0);
2387 return 0; /*NOTREACHED*/
2390 static UChar rexAMode_M ( HReg greg, AMD64AMode* am )
2392 return rexAMode_M__wrk(iregEnc3210(greg), am);
2395 static UChar rexAMode_M_enc ( UInt gregEnc3210, AMD64AMode* am )
2397 vassert(gregEnc3210 < 16);
2398 return rexAMode_M__wrk(gregEnc3210, am);
2402 /* Make up a REX byte, with W=1 (size=64), for a (greg,ereg) pair. */
2403 inline static UChar rexAMode_R__wrk ( UInt gregEnc3210, UInt eregEnc3210 )
2405 UChar W = 1; /* we want 64-bit mode */
2406 UChar R = (gregEnc3210 >> 3) & 1;
2407 UChar X = 0; /* not relevant */
2408 UChar B = (eregEnc3210 >> 3) & 1;
2409 return 0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0));
2412 static UChar rexAMode_R ( HReg greg, HReg ereg )
2414 return rexAMode_R__wrk(iregEnc3210(greg), iregEnc3210(ereg));
2417 static UChar rexAMode_R_enc_reg ( UInt gregEnc3210, HReg ereg )
2419 vassert(gregEnc3210 < 16);
2420 return rexAMode_R__wrk(gregEnc3210, iregEnc3210(ereg));
2423 static UChar rexAMode_R_reg_enc ( HReg greg, UInt eregEnc3210 )
2425 vassert(eregEnc3210 < 16);
2426 return rexAMode_R__wrk(iregEnc3210(greg), eregEnc3210);
2429 static UChar rexAMode_R_enc_enc ( UInt gregEnc3210, UInt eregEnc3210 )
2431 vassert((gregEnc3210|eregEnc3210) < 16);
2432 return rexAMode_R__wrk(gregEnc3210, eregEnc3210);
2436 //uu /* May 2012: this VEX prefix stuff is currently unused, but has
2437 //uu verified correct (I reckon). Certainly it has been known to
2438 //uu produce correct VEX prefixes during testing. */
2439 //uu
2440 //uu /* Assemble a 2 or 3 byte VEX prefix from parts. rexR, rexX, rexB and
2441 //uu notVvvvv need to be not-ed before packing. mmmmm, rexW, L and pp go
2442 //uu in verbatim. There's no range checking on the bits. */
2443 //uu static UInt packVexPrefix ( UInt rexR, UInt rexX, UInt rexB,
2444 //uu UInt mmmmm, UInt rexW, UInt notVvvv,
2445 //uu UInt L, UInt pp )
2446 //uu {
2447 //uu UChar byte0 = 0;
2448 //uu UChar byte1 = 0;
2449 //uu UChar byte2 = 0;
2450 //uu if (rexX == 0 && rexB == 0 && mmmmm == 1 && rexW == 0) {
2451 //uu /* 2 byte encoding is possible. */
2452 //uu byte0 = 0xC5;
2453 //uu byte1 = ((rexR ^ 1) << 7) | ((notVvvv ^ 0xF) << 3)
2454 //uu | (L << 2) | pp;
2455 //uu } else {
2456 //uu /* 3 byte encoding is needed. */
2457 //uu byte0 = 0xC4;
2458 //uu byte1 = ((rexR ^ 1) << 7) | ((rexX ^ 1) << 6)
2459 //uu | ((rexB ^ 1) << 5) | mmmmm;
2460 //uu byte2 = (rexW << 7) | ((notVvvv ^ 0xF) << 3) | (L << 2) | pp;
2461 //uu }
2462 //uu return (((UInt)byte2) << 16) | (((UInt)byte1) << 8) | ((UInt)byte0);
2463 //uu }
2464 //uu
2465 //uu /* Make up a VEX prefix for a (greg,amode) pair. First byte in bits
2466 //uu 7:0 of result, second in 15:8, third (for a 3 byte prefix) in
2467 //uu 23:16. Has m-mmmm set to indicate a prefix of 0F, pp set to
2468 //uu indicate no SIMD prefix, W=0 (ignore), L=1 (size=256), and
2469 //uu vvvv=1111 (unused 3rd reg). */
2470 //uu static UInt vexAMode_M ( HReg greg, AMD64AMode* am )
2471 //uu {
2472 //uu UChar L = 1; /* size = 256 */
2473 //uu UChar pp = 0; /* no SIMD prefix */
2474 //uu UChar mmmmm = 1; /* 0F */
2475 //uu UChar notVvvv = 0; /* unused */
2476 //uu UChar rexW = 0;
2477 //uu UChar rexR = 0;
2478 //uu UChar rexX = 0;
2479 //uu UChar rexB = 0;
2480 //uu /* Same logic as in rexAMode_M. */
2481 //uu if (am->tag == Aam_IR) {
2482 //uu rexR = iregEnc3(greg);
2483 //uu rexX = 0; /* not relevant */
2484 //uu rexB = iregEnc3(am->Aam.IR.reg);
2485 //uu }
2486 //uu else if (am->tag == Aam_IRRS) {
2487 //uu rexR = iregEnc3(greg);
2488 //uu rexX = iregEnc3(am->Aam.IRRS.index);
2489 //uu rexB = iregEnc3(am->Aam.IRRS.base);
2490 //uu } else {
2491 //uu vassert(0);
2492 //uu }
2493 //uu return packVexPrefix( rexR, rexX, rexB, mmmmm, rexW, notVvvv, L, pp );
2494 //uu }
2495 //uu
2496 //uu static UChar* emitVexPrefix ( UChar* p, UInt vex )
2497 //uu {
2498 //uu switch (vex & 0xFF) {
2499 //uu case 0xC5:
2500 //uu *p++ = 0xC5;
2501 //uu *p++ = (vex >> 8) & 0xFF;
2502 //uu vassert(0 == (vex >> 16));
2503 //uu break;
2504 //uu case 0xC4:
2505 //uu *p++ = 0xC4;
2506 //uu *p++ = (vex >> 8) & 0xFF;
2507 //uu *p++ = (vex >> 16) & 0xFF;
2508 //uu vassert(0 == (vex >> 24));
2509 //uu break;
2510 //uu default:
2511 //uu vassert(0);
2512 //uu }
2513 //uu return p;
2514 //uu }
2517 /* Emit ffree %st(N) */
2518 static UChar* do_ffree_st ( UChar* p, Int n )
2520 vassert(n >= 0 && n <= 7);
2521 *p++ = 0xDD;
2522 *p++ = toUChar(0xC0 + n);
2523 return p;
2526 /* Emit an instruction into buf and return the number of bytes used.
2527 Note that buf is not the insn's final place, and therefore it is
2528 imperative to emit position-independent code. If the emitted
2529 instruction was a profiler inc, set *is_profInc to True, else
2530 leave it unchanged. */
2532 Int emit_AMD64Instr ( /*MB_MOD*/Bool* is_profInc,
2533 UChar* buf, Int nbuf, const AMD64Instr* i,
2534 Bool mode64, VexEndness endness_host,
2535 const void* disp_cp_chain_me_to_slowEP,
2536 const void* disp_cp_chain_me_to_fastEP,
2537 const void* disp_cp_xindir,
2538 const void* disp_cp_xassisted )
2540 UInt /*irno,*/ opc, opc_rr, subopc_imm, opc_imma, opc_cl, opc_imm, subopc;
2541 UInt xtra;
2542 UInt reg;
2543 UChar rex;
2544 UChar* p = &buf[0];
2545 UChar* ptmp;
2546 Int j;
2547 vassert(nbuf >= 64);
2548 vassert(mode64 == True);
2550 /* vex_printf("asm "); ppAMD64Instr(i, mode64); vex_printf("\n"); */
2552 switch (i->tag) {
2554 case Ain_Imm64:
2555 if (i->Ain.Imm64.imm64 <= 0xFFFFFULL) {
2556 /* Use the short form (load into 32 bit reg, + default
2557 widening rule) for constants under 1 million. We could
2558 use this form for the range 0 to 0x7FFFFFFF inclusive, but
2559 limit it to a smaller range for verifiability purposes. */
2560 if (1 & iregEnc3(i->Ain.Imm64.dst))
2561 *p++ = 0x41;
2562 *p++ = 0xB8 + iregEnc210(i->Ain.Imm64.dst);
2563 p = emit32(p, (UInt)i->Ain.Imm64.imm64);
2564 } else {
2565 *p++ = toUChar(0x48 + (1 & iregEnc3(i->Ain.Imm64.dst)));
2566 *p++ = toUChar(0xB8 + iregEnc210(i->Ain.Imm64.dst));
2567 p = emit64(p, i->Ain.Imm64.imm64);
2569 goto done;
2571 case Ain_Alu64R:
2572 /* Deal specially with MOV */
2573 if (i->Ain.Alu64R.op == Aalu_MOV) {
2574 switch (i->Ain.Alu64R.src->tag) {
2575 case Armi_Imm:
2576 if (0 == (i->Ain.Alu64R.src->Armi.Imm.imm32 & ~0xFFFFF)) {
2577 /* Actually we could use this form for constants in
2578 the range 0 through 0x7FFFFFFF inclusive, but
2579 limit it to a small range for verifiability
2580 purposes. */
2581 /* Generate "movl $imm32, 32-bit-register" and let
2582 the default zero-extend rule cause the upper half
2583 of the dst to be zeroed out too. This saves 1
2584 and sometimes 2 bytes compared to the more
2585 obvious encoding in the 'else' branch. */
2586 if (1 & iregEnc3(i->Ain.Alu64R.dst))
2587 *p++ = 0x41;
2588 *p++ = 0xB8 + iregEnc210(i->Ain.Alu64R.dst);
2589 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2590 } else {
2591 *p++ = toUChar(0x48 + (1 & iregEnc3(i->Ain.Alu64R.dst)));
2592 *p++ = 0xC7;
2593 *p++ = toUChar(0xC0 + iregEnc210(i->Ain.Alu64R.dst));
2594 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2596 goto done;
2597 case Armi_Reg:
2598 *p++ = rexAMode_R( i->Ain.Alu64R.src->Armi.Reg.reg,
2599 i->Ain.Alu64R.dst );
2600 *p++ = 0x89;
2601 p = doAMode_R(p, i->Ain.Alu64R.src->Armi.Reg.reg,
2602 i->Ain.Alu64R.dst);
2603 goto done;
2604 case Armi_Mem:
2605 *p++ = rexAMode_M(i->Ain.Alu64R.dst,
2606 i->Ain.Alu64R.src->Armi.Mem.am);
2607 *p++ = 0x8B;
2608 p = doAMode_M(p, i->Ain.Alu64R.dst,
2609 i->Ain.Alu64R.src->Armi.Mem.am);
2610 goto done;
2611 default:
2612 goto bad;
2615 /* MUL */
2616 if (i->Ain.Alu64R.op == Aalu_MUL) {
2617 switch (i->Ain.Alu64R.src->tag) {
2618 case Armi_Reg:
2619 *p++ = rexAMode_R( i->Ain.Alu64R.dst,
2620 i->Ain.Alu64R.src->Armi.Reg.reg);
2621 *p++ = 0x0F;
2622 *p++ = 0xAF;
2623 p = doAMode_R(p, i->Ain.Alu64R.dst,
2624 i->Ain.Alu64R.src->Armi.Reg.reg);
2625 goto done;
2626 case Armi_Mem:
2627 *p++ = rexAMode_M(i->Ain.Alu64R.dst,
2628 i->Ain.Alu64R.src->Armi.Mem.am);
2629 *p++ = 0x0F;
2630 *p++ = 0xAF;
2631 p = doAMode_M(p, i->Ain.Alu64R.dst,
2632 i->Ain.Alu64R.src->Armi.Mem.am);
2633 goto done;
2634 case Armi_Imm:
2635 if (fits8bits(i->Ain.Alu64R.src->Armi.Imm.imm32)) {
2636 *p++ = rexAMode_R(i->Ain.Alu64R.dst, i->Ain.Alu64R.dst);
2637 *p++ = 0x6B;
2638 p = doAMode_R(p, i->Ain.Alu64R.dst, i->Ain.Alu64R.dst);
2639 *p++ = toUChar(0xFF & i->Ain.Alu64R.src->Armi.Imm.imm32);
2640 } else {
2641 *p++ = rexAMode_R(i->Ain.Alu64R.dst, i->Ain.Alu64R.dst);
2642 *p++ = 0x69;
2643 p = doAMode_R(p, i->Ain.Alu64R.dst, i->Ain.Alu64R.dst);
2644 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2646 goto done;
2647 default:
2648 goto bad;
2651 /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP */
2652 opc = opc_rr = subopc_imm = opc_imma = 0;
2653 switch (i->Ain.Alu64R.op) {
2654 case Aalu_ADC: opc = 0x13; opc_rr = 0x11;
2655 subopc_imm = 2; opc_imma = 0x15; break;
2656 case Aalu_ADD: opc = 0x03; opc_rr = 0x01;
2657 subopc_imm = 0; opc_imma = 0x05; break;
2658 case Aalu_SUB: opc = 0x2B; opc_rr = 0x29;
2659 subopc_imm = 5; opc_imma = 0x2D; break;
2660 case Aalu_SBB: opc = 0x1B; opc_rr = 0x19;
2661 subopc_imm = 3; opc_imma = 0x1D; break;
2662 case Aalu_AND: opc = 0x23; opc_rr = 0x21;
2663 subopc_imm = 4; opc_imma = 0x25; break;
2664 case Aalu_XOR: opc = 0x33; opc_rr = 0x31;
2665 subopc_imm = 6; opc_imma = 0x35; break;
2666 case Aalu_OR: opc = 0x0B; opc_rr = 0x09;
2667 subopc_imm = 1; opc_imma = 0x0D; break;
2668 case Aalu_CMP: opc = 0x3B; opc_rr = 0x39;
2669 subopc_imm = 7; opc_imma = 0x3D; break;
2670 default: goto bad;
2672 switch (i->Ain.Alu64R.src->tag) {
2673 case Armi_Imm:
2674 if (sameHReg(i->Ain.Alu64R.dst, hregAMD64_RAX())
2675 && !fits8bits(i->Ain.Alu64R.src->Armi.Imm.imm32)) {
2676 goto bad; /* FIXME: awaiting test case */
2677 *p++ = toUChar(opc_imma);
2678 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2679 } else
2680 if (fits8bits(i->Ain.Alu64R.src->Armi.Imm.imm32)) {
2681 *p++ = rexAMode_R_enc_reg( 0, i->Ain.Alu64R.dst );
2682 *p++ = 0x83;
2683 p = doAMode_R_enc_reg(p, subopc_imm, i->Ain.Alu64R.dst);
2684 *p++ = toUChar(0xFF & i->Ain.Alu64R.src->Armi.Imm.imm32);
2685 } else {
2686 *p++ = rexAMode_R_enc_reg( 0, i->Ain.Alu64R.dst);
2687 *p++ = 0x81;
2688 p = doAMode_R_enc_reg(p, subopc_imm, i->Ain.Alu64R.dst);
2689 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2691 goto done;
2692 case Armi_Reg:
2693 *p++ = rexAMode_R( i->Ain.Alu64R.src->Armi.Reg.reg,
2694 i->Ain.Alu64R.dst);
2695 *p++ = toUChar(opc_rr);
2696 p = doAMode_R(p, i->Ain.Alu64R.src->Armi.Reg.reg,
2697 i->Ain.Alu64R.dst);
2698 goto done;
2699 case Armi_Mem:
2700 *p++ = rexAMode_M( i->Ain.Alu64R.dst,
2701 i->Ain.Alu64R.src->Armi.Mem.am);
2702 *p++ = toUChar(opc);
2703 p = doAMode_M(p, i->Ain.Alu64R.dst,
2704 i->Ain.Alu64R.src->Armi.Mem.am);
2705 goto done;
2706 default:
2707 goto bad;
2709 break;
2711 case Ain_Alu64M:
2712 /* Deal specially with MOV */
2713 if (i->Ain.Alu64M.op == Aalu_MOV) {
2714 switch (i->Ain.Alu64M.src->tag) {
2715 case Ari_Reg:
2716 *p++ = rexAMode_M(i->Ain.Alu64M.src->Ari.Reg.reg,
2717 i->Ain.Alu64M.dst);
2718 *p++ = 0x89;
2719 p = doAMode_M(p, i->Ain.Alu64M.src->Ari.Reg.reg,
2720 i->Ain.Alu64M.dst);
2721 goto done;
2722 case Ari_Imm:
2723 *p++ = rexAMode_M_enc(0, i->Ain.Alu64M.dst);
2724 *p++ = 0xC7;
2725 p = doAMode_M_enc(p, 0, i->Ain.Alu64M.dst);
2726 p = emit32(p, i->Ain.Alu64M.src->Ari.Imm.imm32);
2727 goto done;
2728 default:
2729 goto bad;
2732 /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP. MUL is not
2733 allowed here. (This is derived from the x86 version of same). */
2734 opc = subopc_imm = opc_imma = 0;
2735 switch (i->Ain.Alu64M.op) {
2736 case Aalu_CMP: opc = 0x39; subopc_imm = 7; break;
2737 default: goto bad;
2739 switch (i->Ain.Alu64M.src->tag) {
2741 case Xri_Reg:
2742 *p++ = toUChar(opc);
2743 p = doAMode_M(p, i->Xin.Alu32M.src->Xri.Reg.reg,
2744 i->Xin.Alu32M.dst);
2745 goto done;
2747 case Ari_Imm:
2748 if (fits8bits(i->Ain.Alu64M.src->Ari.Imm.imm32)) {
2749 *p++ = rexAMode_M_enc(subopc_imm, i->Ain.Alu64M.dst);
2750 *p++ = 0x83;
2751 p = doAMode_M_enc(p, subopc_imm, i->Ain.Alu64M.dst);
2752 *p++ = toUChar(0xFF & i->Ain.Alu64M.src->Ari.Imm.imm32);
2753 goto done;
2754 } else {
2755 *p++ = rexAMode_M_enc(subopc_imm, i->Ain.Alu64M.dst);
2756 *p++ = 0x81;
2757 p = doAMode_M_enc(p, subopc_imm, i->Ain.Alu64M.dst);
2758 p = emit32(p, i->Ain.Alu64M.src->Ari.Imm.imm32);
2759 goto done;
2761 default:
2762 goto bad;
2765 break;
2767 case Ain_Sh64:
2768 opc_cl = opc_imm = subopc = 0;
2769 switch (i->Ain.Sh64.op) {
2770 case Ash_SHR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 5; break;
2771 case Ash_SAR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 7; break;
2772 case Ash_SHL: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 4; break;
2773 default: goto bad;
2775 if (i->Ain.Sh64.src == 0) {
2776 *p++ = rexAMode_R_enc_reg(0, i->Ain.Sh64.dst);
2777 *p++ = toUChar(opc_cl);
2778 p = doAMode_R_enc_reg(p, subopc, i->Ain.Sh64.dst);
2779 goto done;
2780 } else {
2781 *p++ = rexAMode_R_enc_reg(0, i->Ain.Sh64.dst);
2782 *p++ = toUChar(opc_imm);
2783 p = doAMode_R_enc_reg(p, subopc, i->Ain.Sh64.dst);
2784 *p++ = (UChar)(i->Ain.Sh64.src);
2785 goto done;
2787 break;
2789 case Ain_Sh32:
2790 opc_cl = opc_imm = subopc = 0;
2791 switch (i->Ain.Sh32.op) {
2792 case Ash_SHR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 5; break;
2793 case Ash_SAR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 7; break;
2794 case Ash_SHL: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 4; break;
2795 default: goto bad;
2797 if (i->Ain.Sh32.src == 0) {
2798 rex = clearWBit( rexAMode_R_enc_reg(0, i->Ain.Sh32.dst) );
2799 if (rex != 0x40) *p++ = rex;
2800 *p++ = toUChar(opc_cl);
2801 p = doAMode_R_enc_reg(p, subopc, i->Ain.Sh32.dst);
2802 goto done;
2803 } else {
2804 rex = clearWBit( rexAMode_R_enc_reg(0, i->Ain.Sh32.dst) );
2805 if (rex != 0x40) *p++ = rex;
2806 *p++ = toUChar(opc_imm);
2807 p = doAMode_R_enc_reg(p, subopc, i->Ain.Sh32.dst);
2808 *p++ = (UChar)(i->Ain.Sh32.src);
2809 goto done;
2811 break;
2813 case Ain_Test64:
2814 /* testq sign-extend($imm32), %reg */
2815 *p++ = rexAMode_R_enc_reg(0, i->Ain.Test64.dst);
2816 *p++ = 0xF7;
2817 p = doAMode_R_enc_reg(p, 0, i->Ain.Test64.dst);
2818 p = emit32(p, i->Ain.Test64.imm32);
2819 goto done;
2821 case Ain_Unary64:
2822 if (i->Ain.Unary64.op == Aun_NOT) {
2823 *p++ = rexAMode_R_enc_reg(0, i->Ain.Unary64.dst);
2824 *p++ = 0xF7;
2825 p = doAMode_R_enc_reg(p, 2, i->Ain.Unary64.dst);
2826 goto done;
2828 if (i->Ain.Unary64.op == Aun_NEG) {
2829 *p++ = rexAMode_R_enc_reg(0, i->Ain.Unary64.dst);
2830 *p++ = 0xF7;
2831 p = doAMode_R_enc_reg(p, 3, i->Ain.Unary64.dst);
2832 goto done;
2834 break;
2836 case Ain_Lea64:
2837 *p++ = rexAMode_M(i->Ain.Lea64.dst, i->Ain.Lea64.am);
2838 *p++ = 0x8D;
2839 p = doAMode_M(p, i->Ain.Lea64.dst, i->Ain.Lea64.am);
2840 goto done;
2842 case Ain_Alu32R:
2843 /* ADD/SUB/AND/OR/XOR/CMP */
2844 opc = opc_rr = subopc_imm = opc_imma = 0;
2845 switch (i->Ain.Alu32R.op) {
2846 case Aalu_ADD: opc = 0x03; opc_rr = 0x01;
2847 subopc_imm = 0; opc_imma = 0x05; break;
2848 case Aalu_SUB: opc = 0x2B; opc_rr = 0x29;
2849 subopc_imm = 5; opc_imma = 0x2D; break;
2850 case Aalu_AND: opc = 0x23; opc_rr = 0x21;
2851 subopc_imm = 4; opc_imma = 0x25; break;
2852 case Aalu_XOR: opc = 0x33; opc_rr = 0x31;
2853 subopc_imm = 6; opc_imma = 0x35; break;
2854 case Aalu_OR: opc = 0x0B; opc_rr = 0x09;
2855 subopc_imm = 1; opc_imma = 0x0D; break;
2856 case Aalu_CMP: opc = 0x3B; opc_rr = 0x39;
2857 subopc_imm = 7; opc_imma = 0x3D; break;
2858 default: goto bad;
2860 switch (i->Ain.Alu32R.src->tag) {
2861 case Armi_Imm:
2862 if (sameHReg(i->Ain.Alu32R.dst, hregAMD64_RAX())
2863 && !fits8bits(i->Ain.Alu32R.src->Armi.Imm.imm32)) {
2864 goto bad; /* FIXME: awaiting test case */
2865 *p++ = toUChar(opc_imma);
2866 p = emit32(p, i->Ain.Alu32R.src->Armi.Imm.imm32);
2867 } else
2868 if (fits8bits(i->Ain.Alu32R.src->Armi.Imm.imm32)) {
2869 rex = clearWBit( rexAMode_R_enc_reg( 0, i->Ain.Alu32R.dst ) );
2870 if (rex != 0x40) *p++ = rex;
2871 *p++ = 0x83;
2872 p = doAMode_R_enc_reg(p, subopc_imm, i->Ain.Alu32R.dst);
2873 *p++ = toUChar(0xFF & i->Ain.Alu32R.src->Armi.Imm.imm32);
2874 } else {
2875 rex = clearWBit( rexAMode_R_enc_reg( 0, i->Ain.Alu32R.dst) );
2876 if (rex != 0x40) *p++ = rex;
2877 *p++ = 0x81;
2878 p = doAMode_R_enc_reg(p, subopc_imm, i->Ain.Alu32R.dst);
2879 p = emit32(p, i->Ain.Alu32R.src->Armi.Imm.imm32);
2881 goto done;
2882 case Armi_Reg:
2883 rex = clearWBit(
2884 rexAMode_R( i->Ain.Alu32R.src->Armi.Reg.reg,
2885 i->Ain.Alu32R.dst) );
2886 if (rex != 0x40) *p++ = rex;
2887 *p++ = toUChar(opc_rr);
2888 p = doAMode_R(p, i->Ain.Alu32R.src->Armi.Reg.reg,
2889 i->Ain.Alu32R.dst);
2890 goto done;
2891 case Armi_Mem:
2892 rex = clearWBit(
2893 rexAMode_M( i->Ain.Alu32R.dst,
2894 i->Ain.Alu32R.src->Armi.Mem.am) );
2895 if (rex != 0x40) *p++ = rex;
2896 *p++ = toUChar(opc);
2897 p = doAMode_M(p, i->Ain.Alu32R.dst,
2898 i->Ain.Alu32R.src->Armi.Mem.am);
2899 goto done;
2900 default:
2901 goto bad;
2903 break;
2905 case Ain_MulL:
2906 subopc = i->Ain.MulL.syned ? 5 : 4;
2907 switch (i->Ain.MulL.src->tag) {
2908 case Arm_Mem:
2909 *p++ = rexAMode_M_enc(0, i->Ain.MulL.src->Arm.Mem.am);
2910 *p++ = 0xF7;
2911 p = doAMode_M_enc(p, subopc, i->Ain.MulL.src->Arm.Mem.am);
2912 goto done;
2913 case Arm_Reg:
2914 *p++ = rexAMode_R_enc_reg(0, i->Ain.MulL.src->Arm.Reg.reg);
2915 *p++ = 0xF7;
2916 p = doAMode_R_enc_reg(p, subopc, i->Ain.MulL.src->Arm.Reg.reg);
2917 goto done;
2918 default:
2919 goto bad;
2921 break;
2923 case Ain_Div:
2924 subopc = i->Ain.Div.syned ? 7 : 6;
2925 if (i->Ain.Div.sz == 4) {
2926 switch (i->Ain.Div.src->tag) {
2927 case Arm_Mem:
2928 goto bad;
2929 /*FIXME*/
2930 *p++ = 0xF7;
2931 p = doAMode_M_enc(p, subopc, i->Ain.Div.src->Arm.Mem.am);
2932 goto done;
2933 case Arm_Reg:
2934 *p++ = clearWBit(
2935 rexAMode_R_enc_reg(0, i->Ain.Div.src->Arm.Reg.reg));
2936 *p++ = 0xF7;
2937 p = doAMode_R_enc_reg(p, subopc, i->Ain.Div.src->Arm.Reg.reg);
2938 goto done;
2939 default:
2940 goto bad;
2943 if (i->Ain.Div.sz == 8) {
2944 switch (i->Ain.Div.src->tag) {
2945 case Arm_Mem:
2946 *p++ = rexAMode_M_enc(0, i->Ain.Div.src->Arm.Mem.am);
2947 *p++ = 0xF7;
2948 p = doAMode_M_enc(p, subopc, i->Ain.Div.src->Arm.Mem.am);
2949 goto done;
2950 case Arm_Reg:
2951 *p++ = rexAMode_R_enc_reg(0, i->Ain.Div.src->Arm.Reg.reg);
2952 *p++ = 0xF7;
2953 p = doAMode_R_enc_reg(p, subopc, i->Ain.Div.src->Arm.Reg.reg);
2954 goto done;
2955 default:
2956 goto bad;
2959 break;
2961 case Ain_Push:
2962 switch (i->Ain.Push.src->tag) {
2963 case Armi_Mem:
2964 *p++ = clearWBit(
2965 rexAMode_M_enc(0, i->Ain.Push.src->Armi.Mem.am));
2966 *p++ = 0xFF;
2967 p = doAMode_M_enc(p, 6, i->Ain.Push.src->Armi.Mem.am);
2968 goto done;
2969 case Armi_Imm:
2970 *p++ = 0x68;
2971 p = emit32(p, i->Ain.Push.src->Armi.Imm.imm32);
2972 goto done;
2973 case Armi_Reg:
2974 *p++ = toUChar(0x40 + (1 & iregEnc3(i->Ain.Push.src->Armi.Reg.reg)));
2975 *p++ = toUChar(0x50 + iregEnc210(i->Ain.Push.src->Armi.Reg.reg));
2976 goto done;
2977 default:
2978 goto bad;
2981 case Ain_Call: {
2982 /* As per detailed comment for Ain_Call in getRegUsage_AMD64Instr
2983 above, %r11 is used as an address temporary. */
2984 /* If we don't need to do any fixup actions in the case that the
2985 call doesn't happen, just do the simple thing and emit
2986 straight-line code. This is usually the case. */
2987 if (i->Ain.Call.cond == Acc_ALWAYS/*call always happens*/
2988 || i->Ain.Call.rloc.pri == RLPri_None/*no fixup action*/) {
2989 /* jump over the following two insns if the condition does
2990 not hold */
2991 Bool shortImm = fitsIn32Bits(i->Ain.Call.target);
2992 if (i->Ain.Call.cond != Acc_ALWAYS) {
2993 *p++ = toUChar(0x70 + (0xF & (i->Ain.Call.cond ^ 1)));
2994 *p++ = shortImm ? 10 : 13;
2995 /* 10 or 13 bytes in the next two insns */
2997 if (shortImm) {
2998 /* 7 bytes: movl sign-extend(imm32), %r11 */
2999 *p++ = 0x49;
3000 *p++ = 0xC7;
3001 *p++ = 0xC3;
3002 p = emit32(p, (UInt)i->Ain.Call.target);
3003 } else {
3004 /* 10 bytes: movabsq $target, %r11 */
3005 *p++ = 0x49;
3006 *p++ = 0xBB;
3007 p = emit64(p, i->Ain.Call.target);
3009 /* 3 bytes: call *%r11 */
3010 *p++ = 0x41;
3011 *p++ = 0xFF;
3012 *p++ = 0xD3;
3013 } else {
3014 Int delta;
3015 /* Complex case. We have to generate an if-then-else diamond. */
3016 // before:
3017 // j{!cond} else:
3018 // movabsq $target, %r11
3019 // call* %r11
3020 // preElse:
3021 // jmp after:
3022 // else:
3023 // movabsq $0x5555555555555555, %rax // possibly
3024 // movq %rax, %rdx // possibly
3025 // after:
3027 // before:
3028 UChar* pBefore = p;
3030 // j{!cond} else:
3031 *p++ = toUChar(0x70 + (0xF & (i->Ain.Call.cond ^ 1)));
3032 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
3034 // movabsq $target, %r11
3035 *p++ = 0x49;
3036 *p++ = 0xBB;
3037 p = emit64(p, i->Ain.Call.target);
3039 // call* %r11
3040 *p++ = 0x41;
3041 *p++ = 0xFF;
3042 *p++ = 0xD3;
3044 // preElse:
3045 UChar* pPreElse = p;
3047 // jmp after:
3048 *p++ = 0xEB;
3049 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
3051 // else:
3052 UChar* pElse = p;
3054 /* Do the 'else' actions */
3055 switch (i->Ain.Call.rloc.pri) {
3056 case RLPri_Int:
3057 // movabsq $0x5555555555555555, %rax
3058 *p++ = 0x48; *p++ = 0xB8; p = emit64(p, 0x5555555555555555ULL);
3059 break;
3060 case RLPri_2Int:
3061 goto bad; //ATC
3062 // movabsq $0x5555555555555555, %rax
3063 *p++ = 0x48; *p++ = 0xB8; p = emit64(p, 0x5555555555555555ULL);
3064 // movq %rax, %rdx
3065 *p++ = 0x48; *p++ = 0x89; *p++ = 0xC2;
3066 break;
3067 case RLPri_V128SpRel:
3068 if (i->Ain.Call.rloc.spOff == 0) {
3069 // We could accept any |spOff| here, but that's more
3070 // hassle and the only value we're ever going to get
3071 // is zero (I believe.) Hence take the easy path :)
3072 // We need a scag register -- r11 can be it.
3073 // movabsq $0x5555555555555555, %r11
3074 *p++ = 0x49; *p++ = 0xBB;
3075 p = emit64(p, 0x5555555555555555ULL);
3076 // movq %r11, 0(%rsp)
3077 *p++ = 0x4C; *p++ = 0x89; *p++ = 0x1C; *p++ = 0x24;
3078 // movq %r11, 8(%rsp)
3079 *p++ = 0x4C; *p++ = 0x89; *p++ = 0x5C; *p++ = 0x24;
3080 *p++ = 0x08;
3081 break;
3083 goto bad; //ATC for all other spOff values
3084 case RLPri_V256SpRel:
3085 goto bad; //ATC
3086 case RLPri_None: case RLPri_INVALID: default:
3087 vassert(0); // should never get here
3090 // after:
3091 UChar* pAfter = p;
3093 // Fix up the branch offsets. The +2s in the offset
3094 // calculations are there because x86 requires conditional
3095 // branches to have their offset stated relative to the
3096 // instruction immediately following the branch insn. And in
3097 // both cases the branch insns are 2 bytes long.
3099 // First, the "j{!cond} else:" at pBefore.
3100 delta = (Int)(Long)(pElse - (pBefore + 2));
3101 vassert(delta >= 0 && delta < 100/*arbitrary*/);
3102 *(pBefore+1) = (UChar)delta;
3104 // And secondly, the "jmp after:" at pPreElse.
3105 delta = (Int)(Long)(pAfter - (pPreElse + 2));
3106 vassert(delta >= 0 && delta < 100/*arbitrary*/);
3107 *(pPreElse+1) = (UChar)delta;
3109 goto done;
3112 case Ain_XDirect: {
3113 /* NB: what goes on here has to be very closely coordinated with the
3114 chainXDirect_AMD64 and unchainXDirect_AMD64 below. */
3115 /* We're generating chain-me requests here, so we need to be
3116 sure this is actually allowed -- no-redir translations can't
3117 use chain-me's. Hence: */
3118 vassert(disp_cp_chain_me_to_slowEP != NULL);
3119 vassert(disp_cp_chain_me_to_fastEP != NULL);
3121 HReg r11 = hregAMD64_R11();
3123 /* Use ptmp for backpatching conditional jumps. */
3124 ptmp = NULL;
3126 /* First off, if this is conditional, create a conditional
3127 jump over the rest of it. */
3128 if (i->Ain.XDirect.cond != Acc_ALWAYS) {
3129 /* jmp fwds if !condition */
3130 *p++ = toUChar(0x70 + (0xF & (i->Ain.XDirect.cond ^ 1)));
3131 ptmp = p; /* fill in this bit later */
3132 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
3135 /* Update the guest RIP. */
3136 if (fitsIn32Bits(i->Ain.XDirect.dstGA)) {
3137 /* use a shorter encoding */
3138 /* movl sign-extend(dstGA), %r11 */
3139 *p++ = 0x49;
3140 *p++ = 0xC7;
3141 *p++ = 0xC3;
3142 p = emit32(p, (UInt)i->Ain.XDirect.dstGA);
3143 } else {
3144 /* movabsq $dstGA, %r11 */
3145 *p++ = 0x49;
3146 *p++ = 0xBB;
3147 p = emit64(p, i->Ain.XDirect.dstGA);
3150 /* movq %r11, amRIP */
3151 *p++ = rexAMode_M(r11, i->Ain.XDirect.amRIP);
3152 *p++ = 0x89;
3153 p = doAMode_M(p, r11, i->Ain.XDirect.amRIP);
3155 /* --- FIRST PATCHABLE BYTE follows --- */
3156 /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're calling
3157 to) backs up the return address, so as to find the address of
3158 the first patchable byte. So: don't change the length of the
3159 two instructions below. */
3160 /* movabsq $disp_cp_chain_me_to_{slow,fast}EP,%r11; */
3161 *p++ = 0x49;
3162 *p++ = 0xBB;
3163 const void* disp_cp_chain_me
3164 = i->Ain.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP
3165 : disp_cp_chain_me_to_slowEP;
3166 p = emit64(p, (Addr)disp_cp_chain_me);
3167 /* call *%r11 */
3168 *p++ = 0x41;
3169 *p++ = 0xFF;
3170 *p++ = 0xD3;
3171 /* --- END of PATCHABLE BYTES --- */
3173 /* Fix up the conditional jump, if there was one. */
3174 if (i->Ain.XDirect.cond != Acc_ALWAYS) {
3175 Int delta = p - ptmp;
3176 vassert(delta > 0 && delta < 40);
3177 *ptmp = toUChar(delta-1);
3179 goto done;
3182 case Ain_XIndir: {
3183 /* We're generating transfers that could lead indirectly to a
3184 chain-me, so we need to be sure this is actually allowed --
3185 no-redir translations are not allowed to reach normal
3186 translations without going through the scheduler. That means
3187 no XDirects or XIndirs out from no-redir translations.
3188 Hence: */
3189 vassert(disp_cp_xindir != NULL);
3191 /* Use ptmp for backpatching conditional jumps. */
3192 ptmp = NULL;
3194 /* First off, if this is conditional, create a conditional
3195 jump over the rest of it. */
3196 if (i->Ain.XIndir.cond != Acc_ALWAYS) {
3197 /* jmp fwds if !condition */
3198 *p++ = toUChar(0x70 + (0xF & (i->Ain.XIndir.cond ^ 1)));
3199 ptmp = p; /* fill in this bit later */
3200 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
3203 /* movq dstGA(a reg), amRIP -- copied from Alu64M MOV case */
3204 *p++ = rexAMode_M(i->Ain.XIndir.dstGA, i->Ain.XIndir.amRIP);
3205 *p++ = 0x89;
3206 p = doAMode_M(p, i->Ain.XIndir.dstGA, i->Ain.XIndir.amRIP);
3208 /* get $disp_cp_xindir into %r11 */
3209 if (fitsIn32Bits((Addr)disp_cp_xindir)) {
3210 /* use a shorter encoding */
3211 /* movl sign-extend(disp_cp_xindir), %r11 */
3212 *p++ = 0x49;
3213 *p++ = 0xC7;
3214 *p++ = 0xC3;
3215 p = emit32(p, (UInt)(Addr)disp_cp_xindir);
3216 } else {
3217 /* movabsq $disp_cp_xindir, %r11 */
3218 *p++ = 0x49;
3219 *p++ = 0xBB;
3220 p = emit64(p, (Addr)disp_cp_xindir);
3223 /* jmp *%r11 */
3224 *p++ = 0x41;
3225 *p++ = 0xFF;
3226 *p++ = 0xE3;
3228 /* Fix up the conditional jump, if there was one. */
3229 if (i->Ain.XIndir.cond != Acc_ALWAYS) {
3230 Int delta = p - ptmp;
3231 vassert(delta > 0 && delta < 40);
3232 *ptmp = toUChar(delta-1);
3234 goto done;
3237 case Ain_XAssisted: {
3238 /* Use ptmp for backpatching conditional jumps. */
3239 ptmp = NULL;
3241 /* First off, if this is conditional, create a conditional
3242 jump over the rest of it. */
3243 if (i->Ain.XAssisted.cond != Acc_ALWAYS) {
3244 /* jmp fwds if !condition */
3245 *p++ = toUChar(0x70 + (0xF & (i->Ain.XAssisted.cond ^ 1)));
3246 ptmp = p; /* fill in this bit later */
3247 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
3250 /* movq dstGA(a reg), amRIP -- copied from Alu64M MOV case */
3251 *p++ = rexAMode_M(i->Ain.XAssisted.dstGA, i->Ain.XAssisted.amRIP);
3252 *p++ = 0x89;
3253 p = doAMode_M(p, i->Ain.XAssisted.dstGA, i->Ain.XAssisted.amRIP);
3254 /* movl $magic_number, %ebp. Since these numbers are all small positive
3255 integers, we can get away with "movl $N, %ebp" rather than
3256 the longer "movq $N, %rbp". */
3257 UInt trcval = 0;
3258 switch (i->Ain.XAssisted.jk) {
3259 case Ijk_ClientReq: trcval = VEX_TRC_JMP_CLIENTREQ; break;
3260 case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break;
3261 case Ijk_Sys_int32: trcval = VEX_TRC_JMP_SYS_INT32; break;
3262 case Ijk_Sys_int210: trcval = VEX_TRC_JMP_SYS_INT210; break;
3263 case Ijk_Yield: trcval = VEX_TRC_JMP_YIELD; break;
3264 case Ijk_EmWarn: trcval = VEX_TRC_JMP_EMWARN; break;
3265 case Ijk_MapFail: trcval = VEX_TRC_JMP_MAPFAIL; break;
3266 case Ijk_NoDecode: trcval = VEX_TRC_JMP_NODECODE; break;
3267 case Ijk_InvalICache: trcval = VEX_TRC_JMP_INVALICACHE; break;
3268 case Ijk_NoRedir: trcval = VEX_TRC_JMP_NOREDIR; break;
3269 case Ijk_SigTRAP: trcval = VEX_TRC_JMP_SIGTRAP; break;
3270 case Ijk_SigSEGV: trcval = VEX_TRC_JMP_SIGSEGV; break;
3271 case Ijk_Boring: trcval = VEX_TRC_JMP_BORING; break;
3272 /* We don't expect to see the following being assisted. */
3273 case Ijk_Ret:
3274 case Ijk_Call:
3275 /* fallthrough */
3276 default:
3277 ppIRJumpKind(i->Ain.XAssisted.jk);
3278 vpanic("emit_AMD64Instr.Ain_XAssisted: unexpected jump kind");
3280 vassert(trcval != 0);
3281 *p++ = 0xBD;
3282 p = emit32(p, trcval);
3283 /* movabsq $disp_assisted, %r11 */
3284 *p++ = 0x49;
3285 *p++ = 0xBB;
3286 p = emit64(p, (Addr)disp_cp_xassisted);
3287 /* jmp *%r11 */
3288 *p++ = 0x41;
3289 *p++ = 0xFF;
3290 *p++ = 0xE3;
3292 /* Fix up the conditional jump, if there was one. */
3293 if (i->Ain.XAssisted.cond != Acc_ALWAYS) {
3294 Int delta = p - ptmp;
3295 vassert(delta > 0 && delta < 40);
3296 *ptmp = toUChar(delta-1);
3298 goto done;
3301 case Ain_CMov64:
3302 vassert(i->Ain.CMov64.cond != Acc_ALWAYS);
3303 *p++ = rexAMode_R(i->Ain.CMov64.dst, i->Ain.CMov64.src);
3304 *p++ = 0x0F;
3305 *p++ = toUChar(0x40 + (0xF & i->Ain.CMov64.cond));
3306 p = doAMode_R(p, i->Ain.CMov64.dst, i->Ain.CMov64.src);
3307 goto done;
3309 case Ain_CLoad: {
3310 vassert(i->Ain.CLoad.cond != Acc_ALWAYS);
3312 /* Only 32- or 64-bit variants are allowed. */
3313 vassert(i->Ain.CLoad.szB == 4 || i->Ain.CLoad.szB == 8);
3315 /* Use ptmp for backpatching conditional jumps. */
3316 ptmp = NULL;
3318 /* jmp fwds if !condition */
3319 *p++ = toUChar(0x70 + (0xF & (i->Ain.CLoad.cond ^ 1)));
3320 ptmp = p; /* fill in this bit later */
3321 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
3323 /* Now the load. Either a normal 64 bit load or a normal 32 bit
3324 load, which, by the default zero-extension rule, zeroes out
3325 the upper half of the destination, as required. */
3326 rex = rexAMode_M(i->Ain.CLoad.dst, i->Ain.CLoad.addr);
3327 *p++ = i->Ain.CLoad.szB == 4 ? clearWBit(rex) : rex;
3328 *p++ = 0x8B;
3329 p = doAMode_M(p, i->Ain.CLoad.dst, i->Ain.CLoad.addr);
3331 /* Fix up the conditional branch */
3332 Int delta = p - ptmp;
3333 vassert(delta > 0 && delta < 40);
3334 *ptmp = toUChar(delta-1);
3335 goto done;
3338 case Ain_CStore: {
3339 /* AFAICS this is identical to Ain_CLoad except that the opcode
3340 is 0x89 instead of 0x8B. */
3341 vassert(i->Ain.CStore.cond != Acc_ALWAYS);
3343 /* Only 32- or 64-bit variants are allowed. */
3344 vassert(i->Ain.CStore.szB == 4 || i->Ain.CStore.szB == 8);
3346 /* Use ptmp for backpatching conditional jumps. */
3347 ptmp = NULL;
3349 /* jmp fwds if !condition */
3350 *p++ = toUChar(0x70 + (0xF & (i->Ain.CStore.cond ^ 1)));
3351 ptmp = p; /* fill in this bit later */
3352 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
3354 /* Now the store. */
3355 rex = rexAMode_M(i->Ain.CStore.src, i->Ain.CStore.addr);
3356 *p++ = i->Ain.CStore.szB == 4 ? clearWBit(rex) : rex;
3357 *p++ = 0x89;
3358 p = doAMode_M(p, i->Ain.CStore.src, i->Ain.CStore.addr);
3360 /* Fix up the conditional branch */
3361 Int delta = p - ptmp;
3362 vassert(delta > 0 && delta < 40);
3363 *ptmp = toUChar(delta-1);
3364 goto done;
3367 case Ain_MovxLQ:
3368 /* No, _don't_ ask me why the sense of the args has to be
3369 different in the S vs Z case. I don't know. */
3370 if (i->Ain.MovxLQ.syned) {
3371 /* Need REX.W = 1 here, but rexAMode_R does that for us. */
3372 *p++ = rexAMode_R(i->Ain.MovxLQ.dst, i->Ain.MovxLQ.src);
3373 *p++ = 0x63;
3374 p = doAMode_R(p, i->Ain.MovxLQ.dst, i->Ain.MovxLQ.src);
3375 } else {
3376 /* Produce a 32-bit reg-reg move, since the implicit
3377 zero-extend does what we want. */
3378 *p++ = clearWBit (
3379 rexAMode_R(i->Ain.MovxLQ.src, i->Ain.MovxLQ.dst));
3380 *p++ = 0x89;
3381 p = doAMode_R(p, i->Ain.MovxLQ.src, i->Ain.MovxLQ.dst);
3383 goto done;
3385 case Ain_LoadEX:
3386 if (i->Ain.LoadEX.szSmall == 1 && !i->Ain.LoadEX.syned) {
3387 /* movzbq */
3388 *p++ = rexAMode_M(i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
3389 *p++ = 0x0F;
3390 *p++ = 0xB6;
3391 p = doAMode_M(p, i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
3392 goto done;
3394 if (i->Ain.LoadEX.szSmall == 2 && !i->Ain.LoadEX.syned) {
3395 /* movzwq */
3396 *p++ = rexAMode_M(i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
3397 *p++ = 0x0F;
3398 *p++ = 0xB7;
3399 p = doAMode_M(p, i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
3400 goto done;
3402 if (i->Ain.LoadEX.szSmall == 4 && !i->Ain.LoadEX.syned) {
3403 /* movzlq */
3404 /* This isn't really an existing AMD64 instruction per se.
3405 Rather, we have to do a 32-bit load. Because a 32-bit
3406 write implicitly clears the upper 32 bits of the target
3407 register, we get what we want. */
3408 *p++ = clearWBit(
3409 rexAMode_M(i->Ain.LoadEX.dst, i->Ain.LoadEX.src));
3410 *p++ = 0x8B;
3411 p = doAMode_M(p, i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
3412 goto done;
3414 break;
3416 case Ain_Set64:
3417 /* Make the destination register be 1 or 0, depending on whether
3418 the relevant condition holds. Complication: the top 56 bits
3419 of the destination should be forced to zero, but doing 'xorq
3420 %r,%r' kills the flag(s) we are about to read. Sigh. So
3421 start off my moving $0 into the dest. */
3422 reg = iregEnc3210(i->Ain.Set64.dst);
3423 vassert(reg < 16);
3425 /* movq $0, %dst */
3426 *p++ = toUChar(reg >= 8 ? 0x49 : 0x48);
3427 *p++ = 0xC7;
3428 *p++ = toUChar(0xC0 + (reg & 7));
3429 p = emit32(p, 0);
3431 /* setb lo8(%dst) */
3432 /* note, 8-bit register rex trickyness. Be careful here. */
3433 *p++ = toUChar(reg >= 8 ? 0x41 : 0x40);
3434 *p++ = 0x0F;
3435 *p++ = toUChar(0x90 + (0x0F & i->Ain.Set64.cond));
3436 *p++ = toUChar(0xC0 + (reg & 7));
3437 goto done;
3439 case Ain_Bsfr64:
3440 *p++ = rexAMode_R(i->Ain.Bsfr64.dst, i->Ain.Bsfr64.src);
3441 *p++ = 0x0F;
3442 if (i->Ain.Bsfr64.isFwds) {
3443 *p++ = 0xBC;
3444 } else {
3445 *p++ = 0xBD;
3447 p = doAMode_R(p, i->Ain.Bsfr64.dst, i->Ain.Bsfr64.src);
3448 goto done;
3450 case Ain_MFence:
3451 /* mfence */
3452 *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF0;
3453 goto done;
3455 case Ain_ACAS:
3456 /* lock */
3457 *p++ = 0xF0;
3458 if (i->Ain.ACAS.sz == 2) *p++ = 0x66;
3459 /* cmpxchg{b,w,l,q} %rbx,mem. Expected-value in %rax, new value
3460 in %rbx. The new-value register is hardwired to be %rbx
3461 since dealing with byte integer registers is too much hassle,
3462 so we force the register operand to %rbx (could equally be
3463 %rcx or %rdx). */
3464 rex = rexAMode_M( hregAMD64_RBX(), i->Ain.ACAS.addr );
3465 if (i->Ain.ACAS.sz != 8)
3466 rex = clearWBit(rex);
3468 *p++ = rex; /* this can emit 0x40, which is pointless. oh well. */
3469 *p++ = 0x0F;
3470 if (i->Ain.ACAS.sz == 1) *p++ = 0xB0; else *p++ = 0xB1;
3471 p = doAMode_M(p, hregAMD64_RBX(), i->Ain.ACAS.addr);
3472 goto done;
3474 case Ain_DACAS:
3475 /* lock */
3476 *p++ = 0xF0;
3477 /* cmpxchg{8,16}b m{64,128}. Expected-value in %rdx:%rax, new
3478 value in %rcx:%rbx. All 4 regs are hardwired in the ISA, so
3479 aren't encoded in the insn. */
3480 rex = rexAMode_M_enc(1, i->Ain.ACAS.addr );
3481 if (i->Ain.ACAS.sz != 8)
3482 rex = clearWBit(rex);
3483 *p++ = rex;
3484 *p++ = 0x0F;
3485 *p++ = 0xC7;
3486 p = doAMode_M_enc(p, 1, i->Ain.DACAS.addr);
3487 goto done;
3489 case Ain_A87Free:
3490 vassert(i->Ain.A87Free.nregs > 0 && i->Ain.A87Free.nregs <= 7);
3491 for (j = 0; j < i->Ain.A87Free.nregs; j++) {
3492 p = do_ffree_st(p, 7-j);
3494 goto done;
3496 case Ain_A87PushPop:
3497 vassert(i->Ain.A87PushPop.szB == 8 || i->Ain.A87PushPop.szB == 4);
3498 if (i->Ain.A87PushPop.isPush) {
3499 /* Load from memory into %st(0): flds/fldl amode */
3500 *p++ = clearWBit(
3501 rexAMode_M_enc(0, i->Ain.A87PushPop.addr) );
3502 *p++ = i->Ain.A87PushPop.szB == 4 ? 0xD9 : 0xDD;
3503 p = doAMode_M_enc(p, 0/*subopcode*/, i->Ain.A87PushPop.addr);
3504 } else {
3505 /* Dump %st(0) to memory: fstps/fstpl amode */
3506 *p++ = clearWBit(
3507 rexAMode_M_enc(3, i->Ain.A87PushPop.addr) );
3508 *p++ = i->Ain.A87PushPop.szB == 4 ? 0xD9 : 0xDD;
3509 p = doAMode_M_enc(p, 3/*subopcode*/, i->Ain.A87PushPop.addr);
3510 goto done;
3512 goto done;
3514 case Ain_A87FpOp:
3515 switch (i->Ain.A87FpOp.op) {
3516 case Afp_SQRT: *p++ = 0xD9; *p++ = 0xFA; break;
3517 case Afp_SIN: *p++ = 0xD9; *p++ = 0xFE; break;
3518 case Afp_COS: *p++ = 0xD9; *p++ = 0xFF; break;
3519 case Afp_ROUND: *p++ = 0xD9; *p++ = 0xFC; break;
3520 case Afp_2XM1: *p++ = 0xD9; *p++ = 0xF0; break;
3521 case Afp_SCALE: *p++ = 0xD9; *p++ = 0xFD; break;
3522 case Afp_ATAN: *p++ = 0xD9; *p++ = 0xF3; break;
3523 case Afp_YL2X: *p++ = 0xD9; *p++ = 0xF1; break;
3524 case Afp_YL2XP1: *p++ = 0xD9; *p++ = 0xF9; break;
3525 case Afp_PREM: *p++ = 0xD9; *p++ = 0xF8; break;
3526 case Afp_PREM1: *p++ = 0xD9; *p++ = 0xF5; break;
3527 case Afp_TAN:
3528 /* fptan pushes 1.0 on the FP stack, except when the
3529 argument is out of range. Hence we have to do the
3530 instruction, then inspect C2 to see if there is an out
3531 of range condition. If there is, we skip the fincstp
3532 that is used by the in-range case to get rid of this
3533 extra 1.0 value. */
3534 *p++ = 0xD9; *p++ = 0xF2; // fptan
3535 *p++ = 0x50; // pushq %rax
3536 *p++ = 0xDF; *p++ = 0xE0; // fnstsw %ax
3537 *p++ = 0x66; *p++ = 0xA9;
3538 *p++ = 0x00; *p++ = 0x04; // testw $0x400,%ax
3539 *p++ = 0x75; *p++ = 0x02; // jnz after_fincstp
3540 *p++ = 0xD9; *p++ = 0xF7; // fincstp
3541 *p++ = 0x58; // after_fincstp: popq %rax
3542 break;
3543 default:
3544 goto bad;
3546 goto done;
3548 case Ain_A87LdCW:
3549 *p++ = clearWBit(
3550 rexAMode_M_enc(5, i->Ain.A87LdCW.addr) );
3551 *p++ = 0xD9;
3552 p = doAMode_M_enc(p, 5/*subopcode*/, i->Ain.A87LdCW.addr);
3553 goto done;
3555 case Ain_A87StSW:
3556 *p++ = clearWBit(
3557 rexAMode_M_enc(7, i->Ain.A87StSW.addr) );
3558 *p++ = 0xDD;
3559 p = doAMode_M_enc(p, 7/*subopcode*/, i->Ain.A87StSW.addr);
3560 goto done;
3562 case Ain_Store:
3563 if (i->Ain.Store.sz == 2) {
3564 /* This just goes to show the crazyness of the instruction
3565 set encoding. We have to insert two prefix bytes, but be
3566 careful to avoid a conflict in what the size should be, by
3567 ensuring that REX.W = 0. */
3568 *p++ = 0x66; /* override to 16-bits */
3569 *p++ = clearWBit( rexAMode_M( i->Ain.Store.src, i->Ain.Store.dst) );
3570 *p++ = 0x89;
3571 p = doAMode_M(p, i->Ain.Store.src, i->Ain.Store.dst);
3572 goto done;
3574 if (i->Ain.Store.sz == 4) {
3575 *p++ = clearWBit( rexAMode_M( i->Ain.Store.src, i->Ain.Store.dst) );
3576 *p++ = 0x89;
3577 p = doAMode_M(p, i->Ain.Store.src, i->Ain.Store.dst);
3578 goto done;
3580 if (i->Ain.Store.sz == 1) {
3581 /* This is one place where it would be wrong to skip emitting
3582 a rex byte of 0x40, since the mere presence of rex changes
3583 the meaning of the byte register access. Be careful. */
3584 *p++ = clearWBit( rexAMode_M( i->Ain.Store.src, i->Ain.Store.dst) );
3585 *p++ = 0x88;
3586 p = doAMode_M(p, i->Ain.Store.src, i->Ain.Store.dst);
3587 goto done;
3589 break;
3591 case Ain_LdMXCSR:
3592 *p++ = clearWBit(rexAMode_M_enc(0, i->Ain.LdMXCSR.addr));
3593 *p++ = 0x0F;
3594 *p++ = 0xAE;
3595 p = doAMode_M_enc(p, 2/*subopcode*/, i->Ain.LdMXCSR.addr);
3596 goto done;
3598 case Ain_SseUComIS:
3599 /* ucomi[sd] %srcL, %srcR ; pushfq ; popq %dst */
3600 /* ucomi[sd] %srcL, %srcR */
3601 if (i->Ain.SseUComIS.sz == 8) {
3602 *p++ = 0x66;
3603 } else {
3604 goto bad;
3605 vassert(i->Ain.SseUComIS.sz == 4);
3607 *p++ = clearWBit (
3608 rexAMode_R_enc_enc( vregEnc3210(i->Ain.SseUComIS.srcL),
3609 vregEnc3210(i->Ain.SseUComIS.srcR) ));
3610 *p++ = 0x0F;
3611 *p++ = 0x2E;
3612 p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.SseUComIS.srcL),
3613 vregEnc3210(i->Ain.SseUComIS.srcR) );
3614 /* pushfq */
3615 *p++ = 0x9C;
3616 /* popq %dst */
3617 *p++ = toUChar(0x40 + (1 & iregEnc3(i->Ain.SseUComIS.dst)));
3618 *p++ = toUChar(0x58 + iregEnc210(i->Ain.SseUComIS.dst));
3619 goto done;
3621 case Ain_SseSI2SF:
3622 /* cvssi2s[sd] %src, %dst */
3623 rex = rexAMode_R_enc_reg( vregEnc3210(i->Ain.SseSI2SF.dst),
3624 i->Ain.SseSI2SF.src );
3625 *p++ = toUChar(i->Ain.SseSI2SF.szD==4 ? 0xF3 : 0xF2);
3626 *p++ = toUChar(i->Ain.SseSI2SF.szS==4 ? clearWBit(rex) : rex);
3627 *p++ = 0x0F;
3628 *p++ = 0x2A;
3629 p = doAMode_R_enc_reg( p, vregEnc3210(i->Ain.SseSI2SF.dst),
3630 i->Ain.SseSI2SF.src );
3631 goto done;
3633 case Ain_SseSF2SI:
3634 /* cvss[sd]2si %src, %dst */
3635 rex = rexAMode_R_reg_enc( i->Ain.SseSF2SI.dst,
3636 vregEnc3210(i->Ain.SseSF2SI.src) );
3637 *p++ = toUChar(i->Ain.SseSF2SI.szS==4 ? 0xF3 : 0xF2);
3638 *p++ = toUChar(i->Ain.SseSF2SI.szD==4 ? clearWBit(rex) : rex);
3639 *p++ = 0x0F;
3640 *p++ = 0x2D;
3641 p = doAMode_R_reg_enc( p, i->Ain.SseSF2SI.dst,
3642 vregEnc3210(i->Ain.SseSF2SI.src) );
3643 goto done;
3645 case Ain_SseSDSS:
3646 /* cvtsd2ss/cvtss2sd %src, %dst */
3647 *p++ = toUChar(i->Ain.SseSDSS.from64 ? 0xF2 : 0xF3);
3648 *p++ = clearWBit(
3649 rexAMode_R_enc_enc( vregEnc3210(i->Ain.SseSDSS.dst),
3650 vregEnc3210(i->Ain.SseSDSS.src) ));
3651 *p++ = 0x0F;
3652 *p++ = 0x5A;
3653 p = doAMode_R_enc_enc( p, vregEnc3210(i->Ain.SseSDSS.dst),
3654 vregEnc3210(i->Ain.SseSDSS.src) );
3655 goto done;
3657 case Ain_SseLdSt:
3658 if (i->Ain.SseLdSt.sz == 8) {
3659 *p++ = 0xF2;
3660 } else
3661 if (i->Ain.SseLdSt.sz == 4) {
3662 *p++ = 0xF3;
3663 } else
3664 if (i->Ain.SseLdSt.sz != 16) {
3665 vassert(0);
3667 *p++ = clearWBit(
3668 rexAMode_M_enc(vregEnc3210(i->Ain.SseLdSt.reg),
3669 i->Ain.SseLdSt.addr));
3670 *p++ = 0x0F;
3671 *p++ = toUChar(i->Ain.SseLdSt.isLoad ? 0x10 : 0x11);
3672 p = doAMode_M_enc(p, vregEnc3210(i->Ain.SseLdSt.reg),
3673 i->Ain.SseLdSt.addr);
3674 goto done;
3676 case Ain_SseCStore: {
3677 vassert(i->Ain.SseCStore.cond != Acc_ALWAYS);
3679 /* Use ptmp for backpatching conditional jumps. */
3680 ptmp = NULL;
3682 /* jmp fwds if !condition */
3683 *p++ = toUChar(0x70 + (0xF & (i->Ain.SseCStore.cond ^ 1)));
3684 ptmp = p; /* fill in this bit later */
3685 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
3687 /* Now the store. */
3688 *p++ = clearWBit(
3689 rexAMode_M_enc(vregEnc3210(i->Ain.SseCStore.src),
3690 i->Ain.SseCStore.addr));
3691 *p++ = 0x0F;
3692 *p++ = toUChar(0x11);
3693 p = doAMode_M_enc(p, vregEnc3210(i->Ain.SseCStore.src),
3694 i->Ain.SseCStore.addr);
3696 /* Fix up the conditional branch */
3697 Int delta = p - ptmp;
3698 vassert(delta > 0 && delta < 40);
3699 *ptmp = toUChar(delta-1);
3700 goto done;
3703 case Ain_SseCLoad: {
3704 vassert(i->Ain.SseCLoad.cond != Acc_ALWAYS);
3706 /* Use ptmp for backpatching conditional jumps. */
3707 ptmp = NULL;
3709 /* jmp fwds if !condition */
3710 *p++ = toUChar(0x70 + (0xF & (i->Ain.SseCLoad.cond ^ 1)));
3711 ptmp = p; /* fill in this bit later */
3712 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
3714 /* Now the load. */
3715 *p++ = clearWBit(
3716 rexAMode_M_enc(vregEnc3210(i->Ain.SseCLoad.dst),
3717 i->Ain.SseCLoad.addr));
3718 *p++ = 0x0F;
3719 *p++ = toUChar(0x10);
3720 p = doAMode_M_enc(p, vregEnc3210(i->Ain.SseCLoad.dst),
3721 i->Ain.SseCLoad.addr);
3723 /* Fix up the conditional branch */
3724 Int delta = p - ptmp;
3725 vassert(delta > 0 && delta < 40);
3726 *ptmp = toUChar(delta-1);
3727 goto done;
3730 case Ain_SseLdzLO:
3731 vassert(i->Ain.SseLdzLO.sz == 4 || i->Ain.SseLdzLO.sz == 8);
3732 /* movs[sd] amode, %xmm-dst */
3733 *p++ = toUChar(i->Ain.SseLdzLO.sz==4 ? 0xF3 : 0xF2);
3734 *p++ = clearWBit(
3735 rexAMode_M_enc(vregEnc3210(i->Ain.SseLdzLO.reg),
3736 i->Ain.SseLdzLO.addr));
3737 *p++ = 0x0F;
3738 *p++ = 0x10;
3739 p = doAMode_M_enc(p, vregEnc3210(i->Ain.SseLdzLO.reg),
3740 i->Ain.SseLdzLO.addr);
3741 goto done;
3743 case Ain_Sse32Fx4: {
3744 UInt srcRegNo = vregEnc3210(i->Ain.Sse32Fx4.src);
3745 UInt dstRegNo = vregEnc3210(i->Ain.Sse32Fx4.dst);
3746 // VEX encoded cases
3747 switch (i->Ain.Sse32Fx4.op) {
3748 case Asse_F16toF32: { // vcvtph2ps %xmmS, %xmmD
3749 UInt s = srcRegNo;
3750 UInt d = dstRegNo;
3751 // VCVTPH2PS %xmmS, %xmmD (s and d are both xmm regs, range 0 .. 15)
3752 // 0xC4 : ~d3 1 ~s3 0 0 0 1 0 : 0x79 : 0x13 : 1 1 d2 d1 d0 s2 s1 s0
3753 UInt byte2 = ((((~d)>>3)&1)<<7) | (1<<6)
3754 | ((((~s)>>3)&1)<<5) | (1<<1);
3755 UInt byte5 = (1<<7) | (1<<6) | ((d&7) << 3) | ((s&7) << 0);
3756 *p++ = 0xC4;
3757 *p++ = byte2;
3758 *p++ = 0x79;
3759 *p++ = 0x13;
3760 *p++ = byte5;
3761 goto done;
3763 case Asse_F32toF16: { // vcvtps2ph $4, %xmmS, %xmmD
3764 UInt s = srcRegNo;
3765 UInt d = dstRegNo;
3766 // VCVTPS2PH $4, %xmmS, %xmmD (s and d both xmm regs, range 0 .. 15)
3767 // 0xC4 : ~s3 1 ~d3 0 0 0 1 1 : 0x79
3768 // : 0x1D : 11 s2 s1 s0 d2 d1 d0 : 0x4
3769 UInt byte2 = ((((~s)>>3)&1)<<7) | (1<<6)
3770 | ((((~d)>>3)&1)<<5) | (1<<1) | (1 << 0);
3771 UInt byte5 = (1<<7) | (1<<6) | ((s&7) << 3) | ((d&7) << 0);
3772 *p++ = 0xC4;
3773 *p++ = byte2;
3774 *p++ = 0x79;
3775 *p++ = 0x1D;
3776 *p++ = byte5;
3777 *p++ = 0x04;
3778 goto done;
3780 default: break;
3782 // After this point, REX encoded cases only
3783 xtra = 0;
3784 switch (i->Ain.Sse32Fx4.op) {
3785 case Asse_F2I: *p++ = 0x66; break;
3786 default: break;
3788 *p++ = clearWBit(rexAMode_R_enc_enc(dstRegNo, srcRegNo));
3789 *p++ = 0x0F;
3790 switch (i->Ain.Sse32Fx4.op) {
3791 case Asse_ADDF: *p++ = 0x58; break;
3792 case Asse_DIVF: *p++ = 0x5E; break;
3793 case Asse_MAXF: *p++ = 0x5F; break;
3794 case Asse_MINF: *p++ = 0x5D; break;
3795 case Asse_MULF: *p++ = 0x59; break;
3796 case Asse_RCPF: *p++ = 0x53; break;
3797 case Asse_RSQRTF: *p++ = 0x52; break;
3798 case Asse_SQRTF: *p++ = 0x51; break;
3799 case Asse_I2F: *p++ = 0x5B; break; // cvtdq2ps; no 0x66 pfx
3800 case Asse_F2I: *p++ = 0x5B; break; // cvtps2dq; with 0x66 pfx
3801 case Asse_SUBF: *p++ = 0x5C; break;
3802 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3803 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3804 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3805 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3806 default: goto bad;
3808 p = doAMode_R_enc_enc(p, dstRegNo, srcRegNo);
3809 if (xtra & 0x100)
3810 *p++ = toUChar(xtra & 0xFF);
3811 goto done;
3814 case Ain_Sse64Fx2:
3815 xtra = 0;
3816 *p++ = 0x66;
3817 *p++ = clearWBit(
3818 rexAMode_R_enc_enc( vregEnc3210(i->Ain.Sse64Fx2.dst),
3819 vregEnc3210(i->Ain.Sse64Fx2.src) ));
3820 *p++ = 0x0F;
3821 switch (i->Ain.Sse64Fx2.op) {
3822 case Asse_ADDF: *p++ = 0x58; break;
3823 case Asse_DIVF: *p++ = 0x5E; break;
3824 case Asse_MAXF: *p++ = 0x5F; break;
3825 case Asse_MINF: *p++ = 0x5D; break;
3826 case Asse_MULF: *p++ = 0x59; break;
3827 case Asse_SQRTF: *p++ = 0x51; break;
3828 case Asse_SUBF: *p++ = 0x5C; break;
3829 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3830 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3831 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3832 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3833 default: goto bad;
3835 p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.Sse64Fx2.dst),
3836 vregEnc3210(i->Ain.Sse64Fx2.src) );
3837 if (xtra & 0x100)
3838 *p++ = toUChar(xtra & 0xFF);
3839 goto done;
3841 case Ain_Sse32FLo:
3842 xtra = 0;
3843 *p++ = 0xF3;
3844 *p++ = clearWBit(
3845 rexAMode_R_enc_enc( vregEnc3210(i->Ain.Sse32FLo.dst),
3846 vregEnc3210(i->Ain.Sse32FLo.src) ));
3847 *p++ = 0x0F;
3848 switch (i->Ain.Sse32FLo.op) {
3849 case Asse_ADDF: *p++ = 0x58; break;
3850 case Asse_DIVF: *p++ = 0x5E; break;
3851 case Asse_MAXF: *p++ = 0x5F; break;
3852 case Asse_MINF: *p++ = 0x5D; break;
3853 case Asse_MULF: *p++ = 0x59; break;
3854 case Asse_RCPF: *p++ = 0x53; break;
3855 case Asse_RSQRTF: *p++ = 0x52; break;
3856 case Asse_SQRTF: *p++ = 0x51; break;
3857 case Asse_SUBF: *p++ = 0x5C; break;
3858 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3859 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3860 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3861 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3862 default: goto bad;
3864 p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.Sse32FLo.dst),
3865 vregEnc3210(i->Ain.Sse32FLo.src) );
3866 if (xtra & 0x100)
3867 *p++ = toUChar(xtra & 0xFF);
3868 goto done;
3870 case Ain_Sse64FLo:
3871 xtra = 0;
3872 *p++ = 0xF2;
3873 *p++ = clearWBit(
3874 rexAMode_R_enc_enc( vregEnc3210(i->Ain.Sse64FLo.dst),
3875 vregEnc3210(i->Ain.Sse64FLo.src) ));
3876 *p++ = 0x0F;
3877 switch (i->Ain.Sse64FLo.op) {
3878 case Asse_ADDF: *p++ = 0x58; break;
3879 case Asse_DIVF: *p++ = 0x5E; break;
3880 case Asse_MAXF: *p++ = 0x5F; break;
3881 case Asse_MINF: *p++ = 0x5D; break;
3882 case Asse_MULF: *p++ = 0x59; break;
3883 case Asse_SQRTF: *p++ = 0x51; break;
3884 case Asse_SUBF: *p++ = 0x5C; break;
3885 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3886 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3887 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3888 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3889 default: goto bad;
3891 p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.Sse64FLo.dst),
3892 vregEnc3210(i->Ain.Sse64FLo.src) );
3893 if (xtra & 0x100)
3894 *p++ = toUChar(xtra & 0xFF);
3895 goto done;
3897 case Ain_SseReRg:
3898 # define XX(_n) *p++ = (_n)
3900 rex = clearWBit(
3901 rexAMode_R_enc_enc( vregEnc3210(i->Ain.SseReRg.dst),
3902 vregEnc3210(i->Ain.SseReRg.src) ));
3904 switch (i->Ain.SseReRg.op) {
3905 case Asse_MOV: /*movups*/ XX(rex); XX(0x0F); XX(0x10); break;
3906 case Asse_OR: XX(rex); XX(0x0F); XX(0x56); break;
3907 case Asse_XOR: XX(rex); XX(0x0F); XX(0x57); break;
3908 case Asse_AND: XX(rex); XX(0x0F); XX(0x54); break;
3909 case Asse_ANDN: XX(rex); XX(0x0F); XX(0x55); break;
3910 case Asse_PACKSSD: XX(0x66); XX(rex); XX(0x0F); XX(0x6B); break;
3911 case Asse_PACKSSW: XX(0x66); XX(rex); XX(0x0F); XX(0x63); break;
3912 case Asse_PACKUSW: XX(0x66); XX(rex); XX(0x0F); XX(0x67); break;
3913 case Asse_ADD8: XX(0x66); XX(rex); XX(0x0F); XX(0xFC); break;
3914 case Asse_ADD16: XX(0x66); XX(rex); XX(0x0F); XX(0xFD); break;
3915 case Asse_ADD32: XX(0x66); XX(rex); XX(0x0F); XX(0xFE); break;
3916 case Asse_ADD64: XX(0x66); XX(rex); XX(0x0F); XX(0xD4); break;
3917 case Asse_QADD8S: XX(0x66); XX(rex); XX(0x0F); XX(0xEC); break;
3918 case Asse_QADD16S: XX(0x66); XX(rex); XX(0x0F); XX(0xED); break;
3919 case Asse_QADD8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDC); break;
3920 case Asse_QADD16U: XX(0x66); XX(rex); XX(0x0F); XX(0xDD); break;
3921 case Asse_AVG8U: XX(0x66); XX(rex); XX(0x0F); XX(0xE0); break;
3922 case Asse_AVG16U: XX(0x66); XX(rex); XX(0x0F); XX(0xE3); break;
3923 case Asse_CMPEQ8: XX(0x66); XX(rex); XX(0x0F); XX(0x74); break;
3924 case Asse_CMPEQ16: XX(0x66); XX(rex); XX(0x0F); XX(0x75); break;
3925 case Asse_CMPEQ32: XX(0x66); XX(rex); XX(0x0F); XX(0x76); break;
3926 case Asse_CMPGT8S: XX(0x66); XX(rex); XX(0x0F); XX(0x64); break;
3927 case Asse_CMPGT16S: XX(0x66); XX(rex); XX(0x0F); XX(0x65); break;
3928 case Asse_CMPGT32S: XX(0x66); XX(rex); XX(0x0F); XX(0x66); break;
3929 case Asse_MAX16S: XX(0x66); XX(rex); XX(0x0F); XX(0xEE); break;
3930 case Asse_MAX8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDE); break;
3931 case Asse_MIN16S: XX(0x66); XX(rex); XX(0x0F); XX(0xEA); break;
3932 case Asse_MIN8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDA); break;
3933 case Asse_MULHI16U: XX(0x66); XX(rex); XX(0x0F); XX(0xE4); break;
3934 case Asse_MULHI16S: XX(0x66); XX(rex); XX(0x0F); XX(0xE5); break;
3935 case Asse_MUL16: XX(0x66); XX(rex); XX(0x0F); XX(0xD5); break;
3936 case Asse_SHL16: XX(0x66); XX(rex); XX(0x0F); XX(0xF1); break;
3937 case Asse_SHL32: XX(0x66); XX(rex); XX(0x0F); XX(0xF2); break;
3938 case Asse_SHL64: XX(0x66); XX(rex); XX(0x0F); XX(0xF3); break;
3939 case Asse_SAR16: XX(0x66); XX(rex); XX(0x0F); XX(0xE1); break;
3940 case Asse_SAR32: XX(0x66); XX(rex); XX(0x0F); XX(0xE2); break;
3941 case Asse_SHR16: XX(0x66); XX(rex); XX(0x0F); XX(0xD1); break;
3942 case Asse_SHR32: XX(0x66); XX(rex); XX(0x0F); XX(0xD2); break;
3943 case Asse_SHR64: XX(0x66); XX(rex); XX(0x0F); XX(0xD3); break;
3944 case Asse_SUB8: XX(0x66); XX(rex); XX(0x0F); XX(0xF8); break;
3945 case Asse_SUB16: XX(0x66); XX(rex); XX(0x0F); XX(0xF9); break;
3946 case Asse_SUB32: XX(0x66); XX(rex); XX(0x0F); XX(0xFA); break;
3947 case Asse_SUB64: XX(0x66); XX(rex); XX(0x0F); XX(0xFB); break;
3948 case Asse_QSUB8S: XX(0x66); XX(rex); XX(0x0F); XX(0xE8); break;
3949 case Asse_QSUB16S: XX(0x66); XX(rex); XX(0x0F); XX(0xE9); break;
3950 case Asse_QSUB8U: XX(0x66); XX(rex); XX(0x0F); XX(0xD8); break;
3951 case Asse_QSUB16U: XX(0x66); XX(rex); XX(0x0F); XX(0xD9); break;
3952 case Asse_UNPCKHB: XX(0x66); XX(rex); XX(0x0F); XX(0x68); break;
3953 case Asse_UNPCKHW: XX(0x66); XX(rex); XX(0x0F); XX(0x69); break;
3954 case Asse_UNPCKHD: XX(0x66); XX(rex); XX(0x0F); XX(0x6A); break;
3955 case Asse_UNPCKHQ: XX(0x66); XX(rex); XX(0x0F); XX(0x6D); break;
3956 case Asse_UNPCKLB: XX(0x66); XX(rex); XX(0x0F); XX(0x60); break;
3957 case Asse_UNPCKLW: XX(0x66); XX(rex); XX(0x0F); XX(0x61); break;
3958 case Asse_UNPCKLD: XX(0x66); XX(rex); XX(0x0F); XX(0x62); break;
3959 case Asse_UNPCKLQ: XX(0x66); XX(rex); XX(0x0F); XX(0x6C); break;
3960 case Asse_PSHUFB: XX(0x66); XX(rex);
3961 XX(0x0F); XX(0x38); XX(0x00); break;
3962 case Asse_PMADDUBSW:XX(0x66); XX(rex);
3963 XX(0x0F); XX(0x38); XX(0x04); break;
3964 default: goto bad;
3966 p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.SseReRg.dst),
3967 vregEnc3210(i->Ain.SseReRg.src) );
3968 # undef XX
3969 goto done;
3971 case Ain_SseCMov:
3972 /* jmp fwds if !condition */
3973 *p++ = toUChar(0x70 + (i->Ain.SseCMov.cond ^ 1));
3974 *p++ = 0; /* # of bytes in the next bit, which we don't know yet */
3975 ptmp = p;
3977 /* movaps %src, %dst */
3978 *p++ = clearWBit(
3979 rexAMode_R_enc_enc( vregEnc3210(i->Ain.SseCMov.dst),
3980 vregEnc3210(i->Ain.SseCMov.src) ));
3981 *p++ = 0x0F;
3982 *p++ = 0x28;
3983 p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.SseCMov.dst),
3984 vregEnc3210(i->Ain.SseCMov.src) );
3986 /* Fill in the jump offset. */
3987 *(ptmp-1) = toUChar(p - ptmp);
3988 goto done;
3990 case Ain_SseShuf:
3991 *p++ = 0x66;
3992 *p++ = clearWBit(
3993 rexAMode_R_enc_enc( vregEnc3210(i->Ain.SseShuf.dst),
3994 vregEnc3210(i->Ain.SseShuf.src) ));
3995 *p++ = 0x0F;
3996 *p++ = 0x70;
3997 p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.SseShuf.dst),
3998 vregEnc3210(i->Ain.SseShuf.src) );
3999 *p++ = (UChar)(i->Ain.SseShuf.order);
4000 goto done;
4002 case Ain_SseShiftN: {
4003 UInt limit = 0;
4004 UInt shiftImm = i->Ain.SseShiftN.shiftBits;
4005 switch (i->Ain.SseShiftN.op) {
4006 case Asse_SHL16: limit = 15; opc = 0x71; subopc_imm = 6; break;
4007 case Asse_SHL32: limit = 31; opc = 0x72; subopc_imm = 6; break;
4008 case Asse_SHL64: limit = 63; opc = 0x73; subopc_imm = 6; break;
4009 case Asse_SAR16: limit = 15; opc = 0x71; subopc_imm = 4; break;
4010 case Asse_SAR32: limit = 31; opc = 0x72; subopc_imm = 4; break;
4011 case Asse_SHR16: limit = 15; opc = 0x71; subopc_imm = 2; break;
4012 case Asse_SHR32: limit = 31; opc = 0x72; subopc_imm = 2; break;
4013 case Asse_SHR64: limit = 63; opc = 0x73; subopc_imm = 2; break;
4014 case Asse_SHL128:
4015 if ((shiftImm & 7) != 0) goto bad;
4016 shiftImm >>= 3;
4017 limit = 15; opc = 0x73; subopc_imm = 7;
4018 break;
4019 case Asse_SHR128:
4020 if ((shiftImm & 7) != 0) goto bad;
4021 shiftImm >>= 3;
4022 limit = 15; opc = 0x73; subopc_imm = 3;
4023 break;
4024 default:
4025 // This should never happen .. SSE2 only offers the above 10 insns
4026 // for the "shift with immediate" case
4027 goto bad;
4029 vassert(limit > 0 && opc > 0 && subopc_imm > 0);
4030 if (shiftImm > limit) goto bad;
4031 *p++ = 0x66;
4032 *p++ = clearWBit(
4033 rexAMode_R_enc_enc( subopc_imm,
4034 vregEnc3210(i->Ain.SseShiftN.dst) ));
4035 *p++ = 0x0F;
4036 *p++ = opc;
4037 p = doAMode_R_enc_enc(p, subopc_imm, vregEnc3210(i->Ain.SseShiftN.dst));
4038 *p++ = shiftImm;
4039 goto done;
4042 case Ain_SseMOVQ: {
4043 Bool toXMM = i->Ain.SseMOVQ.toXMM;
4044 HReg gpr = i->Ain.SseMOVQ.gpr;
4045 HReg xmm = i->Ain.SseMOVQ.xmm;
4046 *p++ = 0x66;
4047 *p++ = setWBit( rexAMode_R_enc_enc( vregEnc3210(xmm), iregEnc3210(gpr)) );
4048 *p++ = 0x0F;
4049 *p++ = toXMM ? 0x6E : 0x7E;
4050 p = doAMode_R_enc_enc( p, vregEnc3210(xmm), iregEnc3210(gpr) );
4051 goto done;
4054 //uu case Ain_AvxLdSt: {
4055 //uu UInt vex = vexAMode_M( dvreg2ireg(i->Ain.AvxLdSt.reg),
4056 //uu i->Ain.AvxLdSt.addr );
4057 //uu p = emitVexPrefix(p, vex);
4058 //uu *p++ = toUChar(i->Ain.AvxLdSt.isLoad ? 0x10 : 0x11);
4059 //uu p = doAMode_M(p, dvreg2ireg(i->Ain.AvxLdSt.reg), i->Ain.AvxLdSt.addr);
4060 //uu goto done;
4061 //uu }
4063 case Ain_EvCheck: {
4064 /* We generate:
4065 (3 bytes) decl 8(%rbp) 8 == offsetof(host_EvC_COUNTER)
4066 (2 bytes) jns nofail expected taken
4067 (3 bytes) jmp* 0(%rbp) 0 == offsetof(host_EvC_FAILADDR)
4068 nofail:
4070 /* This is heavily asserted re instruction lengths. It needs to
4071 be. If we get given unexpected forms of .amCounter or
4072 .amFailAddr -- basically, anything that's not of the form
4073 uimm7(%rbp) -- they are likely to fail. */
4074 /* Note also that after the decl we must be very careful not to
4075 read the carry flag, else we get a partial flags stall.
4076 js/jns avoids that, though. */
4077 UChar* p0 = p;
4078 /* --- decl 8(%rbp) --- */
4079 /* Need to compute the REX byte for the decl in order to prove
4080 that we don't need it, since this is a 32-bit inc and all
4081 registers involved in the amode are < r8. "1" because
4082 there's no register in this encoding; instead the register
4083 field is used as a sub opcode. The encoding for "decl r/m32"
4084 is FF /1, hence the "1". */
4085 rex = clearWBit(rexAMode_M_enc(1, i->Ain.EvCheck.amCounter));
4086 if (rex != 0x40) goto bad; /* We don't expect to need the REX byte. */
4087 *p++ = 0xFF;
4088 p = doAMode_M_enc(p, 1, i->Ain.EvCheck.amCounter);
4089 vassert(p - p0 == 3);
4090 /* --- jns nofail --- */
4091 *p++ = 0x79;
4092 *p++ = 0x03; /* need to check this 0x03 after the next insn */
4093 vassert(p - p0 == 5);
4094 /* --- jmp* 0(%rbp) --- */
4095 /* Once again, verify we don't need REX. The encoding is FF /4.
4096 We don't need REX.W since by default FF /4 in 64-bit mode
4097 implies a 64 bit load. */
4098 rex = clearWBit(rexAMode_M_enc(4, i->Ain.EvCheck.amFailAddr));
4099 if (rex != 0x40) goto bad;
4100 *p++ = 0xFF;
4101 p = doAMode_M_enc(p, 4, i->Ain.EvCheck.amFailAddr);
4102 vassert(p - p0 == 8); /* also ensures that 0x03 offset above is ok */
4103 /* And crosscheck .. */
4104 vassert(evCheckSzB_AMD64() == 8);
4105 goto done;
4108 case Ain_ProfInc: {
4109 /* We generate movabsq $0, %r11
4110 incq (%r11)
4111 in the expectation that a later call to LibVEX_patchProfCtr
4112 will be used to fill in the immediate field once the right
4113 value is known.
4114 49 BB 00 00 00 00 00 00 00 00
4115 49 FF 03
4117 *p++ = 0x49; *p++ = 0xBB;
4118 *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00;
4119 *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00;
4120 *p++ = 0x49; *p++ = 0xFF; *p++ = 0x03;
4121 /* Tell the caller .. */
4122 vassert(!(*is_profInc));
4123 *is_profInc = True;
4124 goto done;
4127 default:
4128 goto bad;
4131 bad:
4132 ppAMD64Instr(i, mode64);
4133 vpanic("emit_AMD64Instr");
4134 /*NOTREACHED*/
4136 done:
4137 vassert(p - &buf[0] <= 64);
4138 return p - &buf[0];
4142 /* How big is an event check? See case for Ain_EvCheck in
4143 emit_AMD64Instr just above. That crosschecks what this returns, so
4144 we can tell if we're inconsistent. */
4145 Int evCheckSzB_AMD64 (void)
4147 return 8;
4151 /* NB: what goes on here has to be very closely coordinated with the
4152 emitInstr case for XDirect, above. */
4153 VexInvalRange chainXDirect_AMD64 ( VexEndness endness_host,
4154 void* place_to_chain,
4155 const void* disp_cp_chain_me_EXPECTED,
4156 const void* place_to_jump_to )
4158 vassert(endness_host == VexEndnessLE);
4160 /* What we're expecting to see is:
4161 movabsq $disp_cp_chain_me_EXPECTED, %r11
4162 call *%r11
4164 49 BB <8 bytes value == disp_cp_chain_me_EXPECTED>
4165 41 FF D3
4167 UChar* p = (UChar*)place_to_chain;
4168 vassert(p[0] == 0x49);
4169 vassert(p[1] == 0xBB);
4170 vassert(read_misaligned_ULong_LE(&p[2]) == (Addr)disp_cp_chain_me_EXPECTED);
4171 vassert(p[10] == 0x41);
4172 vassert(p[11] == 0xFF);
4173 vassert(p[12] == 0xD3);
4174 /* And what we want to change it to is either:
4175 (general case):
4176 movabsq $place_to_jump_to, %r11
4177 jmpq *%r11
4179 49 BB <8 bytes value == place_to_jump_to>
4180 41 FF E3
4181 So it's the same length (convenient, huh) and we don't
4182 need to change all the bits.
4183 ---OR---
4184 in the case where the displacement falls within 32 bits
4185 jmpq disp32 where disp32 is relative to the next insn
4186 ud2; ud2; ud2; ud2
4188 E9 <4 bytes == disp32>
4189 0F 0B 0F 0B 0F 0B 0F 0B
4191 In both cases the replacement has the same length as the original.
4192 To remain sane & verifiable,
4193 (1) limit the displacement for the short form to
4194 (say) +/- one billion, so as to avoid wraparound
4195 off-by-ones
4196 (2) even if the short form is applicable, once every (say)
4197 1024 times use the long form anyway, so as to maintain
4198 verifiability
4200 /* This is the delta we need to put into a JMP d32 insn. It's
4201 relative to the start of the next insn, hence the -5. */
4202 Long delta = (Long)((const UChar *)place_to_jump_to - (const UChar*)p) - 5;
4203 Bool shortOK = delta >= -1000*1000*1000 && delta < 1000*1000*1000;
4205 static UInt shortCTR = 0; /* DO NOT MAKE NON-STATIC */
4206 if (shortOK) {
4207 shortCTR++; // thread safety bleh
4208 if (0 == (shortCTR & 0x3FF)) {
4209 shortOK = False;
4210 if (0)
4211 vex_printf("QQQ chainXDirect_AMD64: shortCTR = %u, "
4212 "using long jmp\n", shortCTR);
4216 /* And make the modifications. */
4217 if (shortOK) {
4218 p[0] = 0xE9;
4219 write_misaligned_UInt_LE(&p[1], (UInt)(Int)delta);
4220 p[5] = 0x0F; p[6] = 0x0B;
4221 p[7] = 0x0F; p[8] = 0x0B;
4222 p[9] = 0x0F; p[10] = 0x0B;
4223 p[11] = 0x0F; p[12] = 0x0B;
4224 /* sanity check on the delta -- top 32 are all 0 or all 1 */
4225 delta >>= 32;
4226 vassert(delta == 0LL || delta == -1LL);
4227 } else {
4228 /* Minimal modifications from the starting sequence. */
4229 write_misaligned_ULong_LE(&p[2], (ULong)(Addr)place_to_jump_to);
4230 p[12] = 0xE3;
4232 VexInvalRange vir = { (HWord)place_to_chain, 13 };
4233 return vir;
4237 /* NB: what goes on here has to be very closely coordinated with the
4238 emitInstr case for XDirect, above. */
4239 VexInvalRange unchainXDirect_AMD64 ( VexEndness endness_host,
4240 void* place_to_unchain,
4241 const void* place_to_jump_to_EXPECTED,
4242 const void* disp_cp_chain_me )
4244 vassert(endness_host == VexEndnessLE);
4246 /* What we're expecting to see is either:
4247 (general case)
4248 movabsq $place_to_jump_to_EXPECTED, %r11
4249 jmpq *%r11
4251 49 BB <8 bytes value == place_to_jump_to_EXPECTED>
4252 41 FF E3
4253 ---OR---
4254 in the case where the displacement falls within 32 bits
4255 jmpq d32
4256 ud2; ud2; ud2; ud2
4258 E9 <4 bytes == disp32>
4259 0F 0B 0F 0B 0F 0B 0F 0B
4261 UChar* p = (UChar*)place_to_unchain;
4262 Bool valid = False;
4263 if (p[0] == 0x49 && p[1] == 0xBB
4264 && read_misaligned_ULong_LE(&p[2])
4265 == (ULong)(Addr)place_to_jump_to_EXPECTED
4266 && p[10] == 0x41 && p[11] == 0xFF && p[12] == 0xE3) {
4267 /* it's the long form */
4268 valid = True;
4270 else
4271 if (p[0] == 0xE9
4272 && p[5] == 0x0F && p[6] == 0x0B
4273 && p[7] == 0x0F && p[8] == 0x0B
4274 && p[9] == 0x0F && p[10] == 0x0B
4275 && p[11] == 0x0F && p[12] == 0x0B) {
4276 /* It's the short form. Check the offset is right. */
4277 Int s32 = (Int)read_misaligned_UInt_LE(&p[1]);
4278 Long s64 = (Long)s32;
4279 if ((UChar*)p + 5 + s64 == place_to_jump_to_EXPECTED) {
4280 valid = True;
4281 if (0)
4282 vex_printf("QQQ unchainXDirect_AMD64: found short form\n");
4285 vassert(valid);
4286 /* And what we want to change it to is:
4287 movabsq $disp_cp_chain_me, %r11
4288 call *%r11
4290 49 BB <8 bytes value == disp_cp_chain_me>
4291 41 FF D3
4292 So it's the same length (convenient, huh).
4294 p[0] = 0x49;
4295 p[1] = 0xBB;
4296 write_misaligned_ULong_LE(&p[2], (ULong)(Addr)disp_cp_chain_me);
4297 p[10] = 0x41;
4298 p[11] = 0xFF;
4299 p[12] = 0xD3;
4300 VexInvalRange vir = { (HWord)place_to_unchain, 13 };
4301 return vir;
4305 /* Patch the counter address into a profile inc point, as previously
4306 created by the Ain_ProfInc case for emit_AMD64Instr. */
4307 VexInvalRange patchProfInc_AMD64 ( VexEndness endness_host,
4308 void* place_to_patch,
4309 const ULong* location_of_counter )
4311 vassert(endness_host == VexEndnessLE);
4312 vassert(sizeof(ULong*) == 8);
4313 UChar* p = (UChar*)place_to_patch;
4314 vassert(p[0] == 0x49);
4315 vassert(p[1] == 0xBB);
4316 vassert(p[2] == 0x00);
4317 vassert(p[3] == 0x00);
4318 vassert(p[4] == 0x00);
4319 vassert(p[5] == 0x00);
4320 vassert(p[6] == 0x00);
4321 vassert(p[7] == 0x00);
4322 vassert(p[8] == 0x00);
4323 vassert(p[9] == 0x00);
4324 vassert(p[10] == 0x49);
4325 vassert(p[11] == 0xFF);
4326 vassert(p[12] == 0x03);
4327 ULong imm64 = (ULong)(Addr)location_of_counter;
4328 p[2] = imm64 & 0xFF; imm64 >>= 8;
4329 p[3] = imm64 & 0xFF; imm64 >>= 8;
4330 p[4] = imm64 & 0xFF; imm64 >>= 8;
4331 p[5] = imm64 & 0xFF; imm64 >>= 8;
4332 p[6] = imm64 & 0xFF; imm64 >>= 8;
4333 p[7] = imm64 & 0xFF; imm64 >>= 8;
4334 p[8] = imm64 & 0xFF; imm64 >>= 8;
4335 p[9] = imm64 & 0xFF; imm64 >>= 8;
4336 VexInvalRange vir = { (HWord)place_to_patch, 13 };
4337 return vir;
4341 /*---------------------------------------------------------------*/
4342 /*--- end host_amd64_defs.c ---*/
4343 /*---------------------------------------------------------------*/