2 /*---------------------------------------------------------------*/
3 /*--- begin host_amd64_defs.c ---*/
4 /*---------------------------------------------------------------*/
7 This file is part of Valgrind, a dynamic binary instrumentation
10 Copyright (C) 2004-2017 OpenWorks LLP
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, see <http://www.gnu.org/licenses/>.
26 The GNU General Public License is contained in the file COPYING.
28 Neither the names of the U.S. Department of Energy nor the
29 University of California nor the names of its contributors may be
30 used to endorse or promote products derived from this software
31 without prior written permission.
34 #include "libvex_basictypes.h"
36 #include "libvex_trc_values.h"
38 #include "main_util.h"
39 #include "host_generic_regs.h"
40 #include "host_amd64_defs.h"
43 /* --------- Registers. --------- */
45 const RRegUniverse
* getRRegUniverse_AMD64 ( void )
47 /* The real-register universe is a big constant, so we just want to
48 initialise it once. */
49 static RRegUniverse rRegUniverse_AMD64
;
50 static Bool rRegUniverse_AMD64_initted
= False
;
52 /* Handy shorthand, nothing more */
53 RRegUniverse
* ru
= &rRegUniverse_AMD64
;
55 /* This isn't thread-safe. Sigh. */
56 if (LIKELY(rRegUniverse_AMD64_initted
))
59 RRegUniverse__init(ru
);
61 /* Add the registers. The initial segment of this array must be
62 those available for allocation by reg-alloc, and those that
63 follow are not available for allocation. */
64 ru
->allocable_start
[HRcInt64
] = ru
->size
;
65 ru
->regs
[ru
->size
++] = hregAMD64_R12();
66 ru
->regs
[ru
->size
++] = hregAMD64_R13();
67 ru
->regs
[ru
->size
++] = hregAMD64_R14();
68 ru
->regs
[ru
->size
++] = hregAMD64_R15();
69 ru
->regs
[ru
->size
++] = hregAMD64_RBX();
70 ru
->regs
[ru
->size
++] = hregAMD64_RSI();
71 ru
->regs
[ru
->size
++] = hregAMD64_RDI();
72 ru
->regs
[ru
->size
++] = hregAMD64_R8();
73 ru
->regs
[ru
->size
++] = hregAMD64_R9();
74 ru
->regs
[ru
->size
++] = hregAMD64_R10();
75 ru
->allocable_end
[HRcInt64
] = ru
->size
- 1;
77 ru
->allocable_start
[HRcVec128
] = ru
->size
;
78 ru
->regs
[ru
->size
++] = hregAMD64_XMM3();
79 ru
->regs
[ru
->size
++] = hregAMD64_XMM4();
80 ru
->regs
[ru
->size
++] = hregAMD64_XMM5();
81 ru
->regs
[ru
->size
++] = hregAMD64_XMM6();
82 ru
->regs
[ru
->size
++] = hregAMD64_XMM7();
83 ru
->regs
[ru
->size
++] = hregAMD64_XMM8();
84 ru
->regs
[ru
->size
++] = hregAMD64_XMM9();
85 ru
->regs
[ru
->size
++] = hregAMD64_XMM10();
86 ru
->regs
[ru
->size
++] = hregAMD64_XMM11();
87 ru
->regs
[ru
->size
++] = hregAMD64_XMM12();
88 ru
->allocable_end
[HRcVec128
] = ru
->size
- 1;
89 ru
->allocable
= ru
->size
;
91 /* And other regs, not available to the allocator. */
92 ru
->regs
[ru
->size
++] = hregAMD64_RAX();
93 ru
->regs
[ru
->size
++] = hregAMD64_RCX();
94 ru
->regs
[ru
->size
++] = hregAMD64_RDX();
95 ru
->regs
[ru
->size
++] = hregAMD64_RSP();
96 ru
->regs
[ru
->size
++] = hregAMD64_RBP();
97 ru
->regs
[ru
->size
++] = hregAMD64_R11();
98 ru
->regs
[ru
->size
++] = hregAMD64_XMM0();
99 ru
->regs
[ru
->size
++] = hregAMD64_XMM1();
101 rRegUniverse_AMD64_initted
= True
;
103 RRegUniverse__check_is_sane(ru
);
108 UInt
ppHRegAMD64 ( HReg reg
)
111 static const HChar
* ireg64_names
[16]
112 = { "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
113 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" };
114 /* Be generic for all virtual regs. */
115 if (hregIsVirtual(reg
)) {
118 /* But specific for real regs. */
119 switch (hregClass(reg
)) {
121 r
= hregEncoding(reg
);
122 vassert(r
>= 0 && r
< 16);
123 return vex_printf("%s", ireg64_names
[r
]);
125 r
= hregEncoding(reg
);
126 vassert(r
>= 0 && r
< 16);
127 return vex_printf("%%xmm%d", r
);
129 vpanic("ppHRegAMD64");
133 static UInt
ppHRegAMD64_lo32 ( HReg reg
)
136 static const HChar
* ireg32_names
[16]
137 = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
138 "%r8d", "%r9d", "%r10d", "%r11d", "%r12d", "%r13d", "%r14d", "%r15d" };
139 /* Be generic for all virtual regs. */
140 if (hregIsVirtual(reg
)) {
141 UInt written
= ppHReg(reg
);
142 written
+= vex_printf("d");
145 /* But specific for real regs. */
146 switch (hregClass(reg
)) {
148 r
= hregEncoding(reg
);
149 vassert(r
>= 0 && r
< 16);
150 return vex_printf("%s", ireg32_names
[r
]);
152 vpanic("ppHRegAMD64_lo32: invalid regclass");
157 /* --------- Condition codes, Intel encoding. --------- */
159 const HChar
* showAMD64CondCode ( AMD64CondCode cond
)
162 case Acc_O
: return "o";
163 case Acc_NO
: return "no";
164 case Acc_B
: return "b";
165 case Acc_NB
: return "nb";
166 case Acc_Z
: return "z";
167 case Acc_NZ
: return "nz";
168 case Acc_BE
: return "be";
169 case Acc_NBE
: return "nbe";
170 case Acc_S
: return "s";
171 case Acc_NS
: return "ns";
172 case Acc_P
: return "p";
173 case Acc_NP
: return "np";
174 case Acc_L
: return "l";
175 case Acc_NL
: return "nl";
176 case Acc_LE
: return "le";
177 case Acc_NLE
: return "nle";
178 case Acc_ALWAYS
: return "ALWAYS";
179 default: vpanic("ppAMD64CondCode");
184 /* --------- AMD64AMode: memory address expressions. --------- */
186 AMD64AMode
* AMD64AMode_IR ( UInt imm32
, HReg reg
) {
187 AMD64AMode
* am
= LibVEX_Alloc_inline(sizeof(AMD64AMode
));
189 am
->Aam
.IR
.imm
= imm32
;
190 am
->Aam
.IR
.reg
= reg
;
193 AMD64AMode
* AMD64AMode_IRRS ( UInt imm32
, HReg base
, HReg indEx
, Int shift
) {
194 AMD64AMode
* am
= LibVEX_Alloc_inline(sizeof(AMD64AMode
));
196 am
->Aam
.IRRS
.imm
= imm32
;
197 am
->Aam
.IRRS
.base
= base
;
198 am
->Aam
.IRRS
.index
= indEx
;
199 am
->Aam
.IRRS
.shift
= shift
;
200 vassert(shift
>= 0 && shift
<= 3);
204 void ppAMD64AMode ( AMD64AMode
* am
) {
207 if (am
->Aam
.IR
.imm
== 0)
210 vex_printf("0x%x(", am
->Aam
.IR
.imm
);
211 ppHRegAMD64(am
->Aam
.IR
.reg
);
215 vex_printf("0x%x(", am
->Aam
.IRRS
.imm
);
216 ppHRegAMD64(am
->Aam
.IRRS
.base
);
218 ppHRegAMD64(am
->Aam
.IRRS
.index
);
219 vex_printf(",%d)", 1 << am
->Aam
.IRRS
.shift
);
222 vpanic("ppAMD64AMode");
226 static void addRegUsage_AMD64AMode ( HRegUsage
* u
, AMD64AMode
* am
) {
229 addHRegUse(u
, HRmRead
, am
->Aam
.IR
.reg
);
232 addHRegUse(u
, HRmRead
, am
->Aam
.IRRS
.base
);
233 addHRegUse(u
, HRmRead
, am
->Aam
.IRRS
.index
);
236 vpanic("addRegUsage_AMD64AMode");
240 static void mapRegs_AMD64AMode ( HRegRemap
* m
, AMD64AMode
* am
) {
243 am
->Aam
.IR
.reg
= lookupHRegRemap(m
, am
->Aam
.IR
.reg
);
246 am
->Aam
.IRRS
.base
= lookupHRegRemap(m
, am
->Aam
.IRRS
.base
);
247 am
->Aam
.IRRS
.index
= lookupHRegRemap(m
, am
->Aam
.IRRS
.index
);
250 vpanic("mapRegs_AMD64AMode");
254 /* --------- Operand, which can be reg, immediate or memory. --------- */
256 AMD64RMI
* AMD64RMI_Imm ( UInt imm32
) {
257 AMD64RMI
* op
= LibVEX_Alloc_inline(sizeof(AMD64RMI
));
259 op
->Armi
.Imm
.imm32
= imm32
;
262 AMD64RMI
* AMD64RMI_Reg ( HReg reg
) {
263 AMD64RMI
* op
= LibVEX_Alloc_inline(sizeof(AMD64RMI
));
265 op
->Armi
.Reg
.reg
= reg
;
268 AMD64RMI
* AMD64RMI_Mem ( AMD64AMode
* am
) {
269 AMD64RMI
* op
= LibVEX_Alloc_inline(sizeof(AMD64RMI
));
271 op
->Armi
.Mem
.am
= am
;
275 static void ppAMD64RMI_wrk ( AMD64RMI
* op
, Bool lo32
) {
278 vex_printf("$0x%x", op
->Armi
.Imm
.imm32
);
282 ppHRegAMD64_lo32(op
->Armi
.Reg
.reg
);
284 ppHRegAMD64(op
->Armi
.Reg
.reg
);
287 ppAMD64AMode(op
->Armi
.Mem
.am
);
290 vpanic("ppAMD64RMI");
293 void ppAMD64RMI ( AMD64RMI
* op
) {
294 ppAMD64RMI_wrk(op
, False
/*!lo32*/);
296 void ppAMD64RMI_lo32 ( AMD64RMI
* op
) {
297 ppAMD64RMI_wrk(op
, True
/*lo32*/);
300 /* An AMD64RMI can only be used in a "read" context (what would it mean
301 to write or modify a literal?) and so we enumerate its registers
303 static void addRegUsage_AMD64RMI ( HRegUsage
* u
, AMD64RMI
* op
) {
308 addHRegUse(u
, HRmRead
, op
->Armi
.Reg
.reg
);
311 addRegUsage_AMD64AMode(u
, op
->Armi
.Mem
.am
);
314 vpanic("addRegUsage_AMD64RMI");
318 static void mapRegs_AMD64RMI ( HRegRemap
* m
, AMD64RMI
* op
) {
323 op
->Armi
.Reg
.reg
= lookupHRegRemap(m
, op
->Armi
.Reg
.reg
);
326 mapRegs_AMD64AMode(m
, op
->Armi
.Mem
.am
);
329 vpanic("mapRegs_AMD64RMI");
334 /* --------- Operand, which can be reg or immediate only. --------- */
336 AMD64RI
* AMD64RI_Imm ( UInt imm32
) {
337 AMD64RI
* op
= LibVEX_Alloc_inline(sizeof(AMD64RI
));
339 op
->Ari
.Imm
.imm32
= imm32
;
342 AMD64RI
* AMD64RI_Reg ( HReg reg
) {
343 AMD64RI
* op
= LibVEX_Alloc_inline(sizeof(AMD64RI
));
345 op
->Ari
.Reg
.reg
= reg
;
349 void ppAMD64RI ( AMD64RI
* op
) {
352 vex_printf("$0x%x", op
->Ari
.Imm
.imm32
);
355 ppHRegAMD64(op
->Ari
.Reg
.reg
);
362 /* An AMD64RI can only be used in a "read" context (what would it mean
363 to write or modify a literal?) and so we enumerate its registers
365 static void addRegUsage_AMD64RI ( HRegUsage
* u
, AMD64RI
* op
) {
370 addHRegUse(u
, HRmRead
, op
->Ari
.Reg
.reg
);
373 vpanic("addRegUsage_AMD64RI");
377 static void mapRegs_AMD64RI ( HRegRemap
* m
, AMD64RI
* op
) {
382 op
->Ari
.Reg
.reg
= lookupHRegRemap(m
, op
->Ari
.Reg
.reg
);
385 vpanic("mapRegs_AMD64RI");
390 /* --------- Operand, which can be reg or memory only. --------- */
392 AMD64RM
* AMD64RM_Reg ( HReg reg
) {
393 AMD64RM
* op
= LibVEX_Alloc_inline(sizeof(AMD64RM
));
395 op
->Arm
.Reg
.reg
= reg
;
398 AMD64RM
* AMD64RM_Mem ( AMD64AMode
* am
) {
399 AMD64RM
* op
= LibVEX_Alloc_inline(sizeof(AMD64RM
));
405 void ppAMD64RM ( AMD64RM
* op
) {
408 ppAMD64AMode(op
->Arm
.Mem
.am
);
411 ppHRegAMD64(op
->Arm
.Reg
.reg
);
418 /* Because an AMD64RM can be both a source or destination operand, we
419 have to supply a mode -- pertaining to the operand as a whole --
420 indicating how it's being used. */
421 static void addRegUsage_AMD64RM ( HRegUsage
* u
, AMD64RM
* op
, HRegMode mode
) {
424 /* Memory is read, written or modified. So we just want to
425 know the regs read by the amode. */
426 addRegUsage_AMD64AMode(u
, op
->Arm
.Mem
.am
);
429 /* reg is read, written or modified. Add it in the
431 addHRegUse(u
, mode
, op
->Arm
.Reg
.reg
);
434 vpanic("addRegUsage_AMD64RM");
438 static void mapRegs_AMD64RM ( HRegRemap
* m
, AMD64RM
* op
)
442 mapRegs_AMD64AMode(m
, op
->Arm
.Mem
.am
);
445 op
->Arm
.Reg
.reg
= lookupHRegRemap(m
, op
->Arm
.Reg
.reg
);
448 vpanic("mapRegs_AMD64RM");
453 /* --------- Instructions. --------- */
455 static const HChar
* showAMD64ScalarSz ( Int sz
) {
460 default: vpanic("showAMD64ScalarSz");
464 const HChar
* showAMD64UnaryOp ( AMD64UnaryOp op
) {
466 case Aun_NOT
: return "not";
467 case Aun_NEG
: return "neg";
468 default: vpanic("showAMD64UnaryOp");
472 const HChar
* showAMD64AluOp ( AMD64AluOp op
) {
474 case Aalu_MOV
: return "mov";
475 case Aalu_CMP
: return "cmp";
476 case Aalu_ADD
: return "add";
477 case Aalu_SUB
: return "sub";
478 case Aalu_ADC
: return "adc";
479 case Aalu_SBB
: return "sbb";
480 case Aalu_AND
: return "and";
481 case Aalu_OR
: return "or";
482 case Aalu_XOR
: return "xor";
483 case Aalu_MUL
: return "imul";
484 default: vpanic("showAMD64AluOp");
488 const HChar
* showAMD64ShiftOp ( AMD64ShiftOp op
) {
490 case Ash_SHL
: return "shl";
491 case Ash_SHR
: return "shr";
492 case Ash_SAR
: return "sar";
493 default: vpanic("showAMD64ShiftOp");
497 const HChar
* showA87FpOp ( A87FpOp op
) {
499 case Afp_SCALE
: return "scale";
500 case Afp_ATAN
: return "atan";
501 case Afp_YL2X
: return "yl2x";
502 case Afp_YL2XP1
: return "yl2xp1";
503 case Afp_PREM
: return "prem";
504 case Afp_PREM1
: return "prem1";
505 case Afp_SQRT
: return "sqrt";
506 case Afp_SIN
: return "sin";
507 case Afp_COS
: return "cos";
508 case Afp_TAN
: return "tan";
509 case Afp_ROUND
: return "round";
510 case Afp_2XM1
: return "2xm1";
511 default: vpanic("showA87FpOp");
515 const HChar
* showAMD64SseOp ( AMD64SseOp op
) {
517 case Asse_MOV
: return "movups";
518 case Asse_ADDF
: return "add";
519 case Asse_SUBF
: return "sub";
520 case Asse_MULF
: return "mul";
521 case Asse_DIVF
: return "div";
522 case Asse_MAXF
: return "max";
523 case Asse_MINF
: return "min";
524 case Asse_CMPEQF
: return "cmpFeq";
525 case Asse_CMPLTF
: return "cmpFlt";
526 case Asse_CMPLEF
: return "cmpFle";
527 case Asse_CMPUNF
: return "cmpFun";
528 case Asse_RCPF
: return "rcp";
529 case Asse_RSQRTF
: return "rsqrt";
530 case Asse_SQRTF
: return "sqrt";
531 case Asse_I2F
: return "cvtdq2ps.";
532 case Asse_F2I
: return "cvtps2dq.";
533 case Asse_AND
: return "and";
534 case Asse_OR
: return "or";
535 case Asse_XOR
: return "xor";
536 case Asse_ANDN
: return "andn";
537 case Asse_ADD8
: return "paddb";
538 case Asse_ADD16
: return "paddw";
539 case Asse_ADD32
: return "paddd";
540 case Asse_ADD64
: return "paddq";
541 case Asse_QADD8U
: return "paddusb";
542 case Asse_QADD16U
: return "paddusw";
543 case Asse_QADD8S
: return "paddsb";
544 case Asse_QADD16S
: return "paddsw";
545 case Asse_SUB8
: return "psubb";
546 case Asse_SUB16
: return "psubw";
547 case Asse_SUB32
: return "psubd";
548 case Asse_SUB64
: return "psubq";
549 case Asse_QSUB8U
: return "psubusb";
550 case Asse_QSUB16U
: return "psubusw";
551 case Asse_QSUB8S
: return "psubsb";
552 case Asse_QSUB16S
: return "psubsw";
553 case Asse_MUL16
: return "pmullw";
554 case Asse_MULHI16U
: return "pmulhuw";
555 case Asse_MULHI16S
: return "pmulhw";
556 case Asse_AVG8U
: return "pavgb";
557 case Asse_AVG16U
: return "pavgw";
558 case Asse_MAX16S
: return "pmaxw";
559 case Asse_MAX8U
: return "pmaxub";
560 case Asse_MIN16S
: return "pminw";
561 case Asse_MIN8U
: return "pminub";
562 case Asse_CMPEQ8
: return "pcmpeqb";
563 case Asse_CMPEQ16
: return "pcmpeqw";
564 case Asse_CMPEQ32
: return "pcmpeqd";
565 case Asse_CMPGT8S
: return "pcmpgtb";
566 case Asse_CMPGT16S
: return "pcmpgtw";
567 case Asse_CMPGT32S
: return "pcmpgtd";
568 case Asse_SHL16
: return "psllw";
569 case Asse_SHL32
: return "pslld";
570 case Asse_SHL64
: return "psllq";
571 case Asse_SHL128
: return "pslldq";
572 case Asse_SHR16
: return "psrlw";
573 case Asse_SHR32
: return "psrld";
574 case Asse_SHR64
: return "psrlq";
575 case Asse_SHR128
: return "psrldq";
576 case Asse_SAR16
: return "psraw";
577 case Asse_SAR32
: return "psrad";
578 case Asse_PACKSSD
: return "packssdw";
579 case Asse_PACKSSW
: return "packsswb";
580 case Asse_PACKUSW
: return "packuswb";
581 case Asse_UNPCKHB
: return "punpckhb";
582 case Asse_UNPCKHW
: return "punpckhw";
583 case Asse_UNPCKHD
: return "punpckhd";
584 case Asse_UNPCKHQ
: return "punpckhq";
585 case Asse_UNPCKLB
: return "punpcklb";
586 case Asse_UNPCKLW
: return "punpcklw";
587 case Asse_UNPCKLD
: return "punpckld";
588 case Asse_UNPCKLQ
: return "punpcklq";
589 case Asse_PSHUFB
: return "pshufb";
590 case Asse_PMADDUBSW
: return "pmaddubsw";
591 case Asse_F32toF16
: return "vcvtps2ph(rm_field=$0x4).";
592 case Asse_F16toF32
: return "vcvtph2ps.";
593 default: vpanic("showAMD64SseOp");
597 AMD64Instr
* AMD64Instr_Imm64 ( ULong imm64
, HReg dst
) {
598 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
600 i
->Ain
.Imm64
.imm64
= imm64
;
601 i
->Ain
.Imm64
.dst
= dst
;
604 AMD64Instr
* AMD64Instr_Alu64R ( AMD64AluOp op
, AMD64RMI
* src
, HReg dst
) {
605 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
607 i
->Ain
.Alu64R
.op
= op
;
608 i
->Ain
.Alu64R
.src
= src
;
609 i
->Ain
.Alu64R
.dst
= dst
;
612 AMD64Instr
* AMD64Instr_Alu64M ( AMD64AluOp op
, AMD64RI
* src
, AMD64AMode
* dst
) {
613 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
615 i
->Ain
.Alu64M
.op
= op
;
616 i
->Ain
.Alu64M
.src
= src
;
617 i
->Ain
.Alu64M
.dst
= dst
;
618 vassert(op
!= Aalu_MUL
);
621 AMD64Instr
* AMD64Instr_Sh64 ( AMD64ShiftOp op
, UInt src
, HReg dst
) {
622 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
625 i
->Ain
.Sh64
.src
= src
;
626 i
->Ain
.Sh64
.dst
= dst
;
629 AMD64Instr
* AMD64Instr_Sh32 ( AMD64ShiftOp op
, UInt src
, HReg dst
) {
630 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
633 i
->Ain
.Sh32
.src
= src
;
634 i
->Ain
.Sh32
.dst
= dst
;
637 AMD64Instr
* AMD64Instr_Test64 ( UInt imm32
, HReg dst
) {
638 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
640 i
->Ain
.Test64
.imm32
= imm32
;
641 i
->Ain
.Test64
.dst
= dst
;
644 AMD64Instr
* AMD64Instr_Unary64 ( AMD64UnaryOp op
, HReg dst
) {
645 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
646 i
->tag
= Ain_Unary64
;
647 i
->Ain
.Unary64
.op
= op
;
648 i
->Ain
.Unary64
.dst
= dst
;
651 AMD64Instr
* AMD64Instr_Lea64 ( AMD64AMode
* am
, HReg dst
) {
652 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
654 i
->Ain
.Lea64
.am
= am
;
655 i
->Ain
.Lea64
.dst
= dst
;
658 AMD64Instr
* AMD64Instr_Alu32R ( AMD64AluOp op
, AMD64RMI
* src
, HReg dst
) {
659 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
661 i
->Ain
.Alu32R
.op
= op
;
662 i
->Ain
.Alu32R
.src
= src
;
663 i
->Ain
.Alu32R
.dst
= dst
;
665 case Aalu_ADD
: case Aalu_SUB
: case Aalu_CMP
:
666 case Aalu_AND
: case Aalu_OR
: case Aalu_XOR
: break;
671 AMD64Instr
* AMD64Instr_MulL ( Bool syned
, AMD64RM
* src
) {
672 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
674 i
->Ain
.MulL
.syned
= syned
;
675 i
->Ain
.MulL
.src
= src
;
678 AMD64Instr
* AMD64Instr_Div ( Bool syned
, Int sz
, AMD64RM
* src
) {
679 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
681 i
->Ain
.Div
.syned
= syned
;
683 i
->Ain
.Div
.src
= src
;
684 vassert(sz
== 4 || sz
== 8);
687 AMD64Instr
* AMD64Instr_Push( AMD64RMI
* src
) {
688 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
690 i
->Ain
.Push
.src
= src
;
693 AMD64Instr
* AMD64Instr_Call ( AMD64CondCode cond
, Addr64 target
, Int regparms
,
695 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
697 i
->Ain
.Call
.cond
= cond
;
698 i
->Ain
.Call
.target
= target
;
699 i
->Ain
.Call
.regparms
= regparms
;
700 i
->Ain
.Call
.rloc
= rloc
;
701 vassert(regparms
>= 0 && regparms
<= 6);
702 vassert(is_sane_RetLoc(rloc
));
706 AMD64Instr
* AMD64Instr_XDirect ( Addr64 dstGA
, AMD64AMode
* amRIP
,
707 AMD64CondCode cond
, Bool toFastEP
) {
708 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
709 i
->tag
= Ain_XDirect
;
710 i
->Ain
.XDirect
.dstGA
= dstGA
;
711 i
->Ain
.XDirect
.amRIP
= amRIP
;
712 i
->Ain
.XDirect
.cond
= cond
;
713 i
->Ain
.XDirect
.toFastEP
= toFastEP
;
716 AMD64Instr
* AMD64Instr_XIndir ( HReg dstGA
, AMD64AMode
* amRIP
,
717 AMD64CondCode cond
) {
718 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
720 i
->Ain
.XIndir
.dstGA
= dstGA
;
721 i
->Ain
.XIndir
.amRIP
= amRIP
;
722 i
->Ain
.XIndir
.cond
= cond
;
725 AMD64Instr
* AMD64Instr_XAssisted ( HReg dstGA
, AMD64AMode
* amRIP
,
726 AMD64CondCode cond
, IRJumpKind jk
) {
727 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
728 i
->tag
= Ain_XAssisted
;
729 i
->Ain
.XAssisted
.dstGA
= dstGA
;
730 i
->Ain
.XAssisted
.amRIP
= amRIP
;
731 i
->Ain
.XAssisted
.cond
= cond
;
732 i
->Ain
.XAssisted
.jk
= jk
;
736 AMD64Instr
* AMD64Instr_CMov64 ( AMD64CondCode cond
, HReg src
, HReg dst
) {
737 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
739 i
->Ain
.CMov64
.cond
= cond
;
740 i
->Ain
.CMov64
.src
= src
;
741 i
->Ain
.CMov64
.dst
= dst
;
742 vassert(cond
!= Acc_ALWAYS
);
745 AMD64Instr
* AMD64Instr_CLoad ( AMD64CondCode cond
, UChar szB
,
746 AMD64AMode
* addr
, HReg dst
) {
747 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
749 i
->Ain
.CLoad
.cond
= cond
;
750 i
->Ain
.CLoad
.szB
= szB
;
751 i
->Ain
.CLoad
.addr
= addr
;
752 i
->Ain
.CLoad
.dst
= dst
;
753 vassert(cond
!= Acc_ALWAYS
&& (szB
== 4 || szB
== 8));
756 AMD64Instr
* AMD64Instr_CStore ( AMD64CondCode cond
, UChar szB
,
757 HReg src
, AMD64AMode
* addr
) {
758 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
760 i
->Ain
.CStore
.cond
= cond
;
761 i
->Ain
.CStore
.szB
= szB
;
762 i
->Ain
.CStore
.src
= src
;
763 i
->Ain
.CStore
.addr
= addr
;
764 vassert(cond
!= Acc_ALWAYS
&& (szB
== 4 || szB
== 8));
767 AMD64Instr
* AMD64Instr_MovxLQ ( Bool syned
, HReg src
, HReg dst
) {
768 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
770 i
->Ain
.MovxLQ
.syned
= syned
;
771 i
->Ain
.MovxLQ
.src
= src
;
772 i
->Ain
.MovxLQ
.dst
= dst
;
775 AMD64Instr
* AMD64Instr_LoadEX ( UChar szSmall
, Bool syned
,
776 AMD64AMode
* src
, HReg dst
) {
777 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
779 i
->Ain
.LoadEX
.szSmall
= szSmall
;
780 i
->Ain
.LoadEX
.syned
= syned
;
781 i
->Ain
.LoadEX
.src
= src
;
782 i
->Ain
.LoadEX
.dst
= dst
;
783 vassert(szSmall
== 1 || szSmall
== 2 || szSmall
== 4);
786 AMD64Instr
* AMD64Instr_Store ( UChar sz
, HReg src
, AMD64AMode
* dst
) {
787 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
789 i
->Ain
.Store
.sz
= sz
;
790 i
->Ain
.Store
.src
= src
;
791 i
->Ain
.Store
.dst
= dst
;
792 vassert(sz
== 1 || sz
== 2 || sz
== 4);
795 AMD64Instr
* AMD64Instr_Set64 ( AMD64CondCode cond
, HReg dst
) {
796 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
798 i
->Ain
.Set64
.cond
= cond
;
799 i
->Ain
.Set64
.dst
= dst
;
802 AMD64Instr
* AMD64Instr_Bsfr64 ( Bool isFwds
, HReg src
, HReg dst
) {
803 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
805 i
->Ain
.Bsfr64
.isFwds
= isFwds
;
806 i
->Ain
.Bsfr64
.src
= src
;
807 i
->Ain
.Bsfr64
.dst
= dst
;
810 AMD64Instr
* AMD64Instr_MFence ( void ) {
811 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
815 AMD64Instr
* AMD64Instr_ACAS ( AMD64AMode
* addr
, UChar sz
) {
816 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
818 i
->Ain
.ACAS
.addr
= addr
;
820 vassert(sz
== 8 || sz
== 4 || sz
== 2 || sz
== 1);
823 AMD64Instr
* AMD64Instr_DACAS ( AMD64AMode
* addr
, UChar sz
) {
824 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
826 i
->Ain
.DACAS
.addr
= addr
;
827 i
->Ain
.DACAS
.sz
= sz
;
828 vassert(sz
== 8 || sz
== 4);
832 AMD64Instr
* AMD64Instr_A87Free ( Int nregs
)
834 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
835 i
->tag
= Ain_A87Free
;
836 i
->Ain
.A87Free
.nregs
= nregs
;
837 vassert(nregs
>= 1 && nregs
<= 7);
840 AMD64Instr
* AMD64Instr_A87PushPop ( AMD64AMode
* addr
, Bool isPush
, UChar szB
)
842 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
843 i
->tag
= Ain_A87PushPop
;
844 i
->Ain
.A87PushPop
.addr
= addr
;
845 i
->Ain
.A87PushPop
.isPush
= isPush
;
846 i
->Ain
.A87PushPop
.szB
= szB
;
847 vassert(szB
== 8 || szB
== 4);
850 AMD64Instr
* AMD64Instr_A87FpOp ( A87FpOp op
)
852 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
853 i
->tag
= Ain_A87FpOp
;
854 i
->Ain
.A87FpOp
.op
= op
;
857 AMD64Instr
* AMD64Instr_A87LdCW ( AMD64AMode
* addr
)
859 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
860 i
->tag
= Ain_A87LdCW
;
861 i
->Ain
.A87LdCW
.addr
= addr
;
864 AMD64Instr
* AMD64Instr_A87StSW ( AMD64AMode
* addr
)
866 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
867 i
->tag
= Ain_A87StSW
;
868 i
->Ain
.A87StSW
.addr
= addr
;
871 AMD64Instr
* AMD64Instr_LdMXCSR ( AMD64AMode
* addr
) {
872 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
873 i
->tag
= Ain_LdMXCSR
;
874 i
->Ain
.LdMXCSR
.addr
= addr
;
877 AMD64Instr
* AMD64Instr_SseUComIS ( Int sz
, HReg srcL
, HReg srcR
, HReg dst
) {
878 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
879 i
->tag
= Ain_SseUComIS
;
880 i
->Ain
.SseUComIS
.sz
= toUChar(sz
);
881 i
->Ain
.SseUComIS
.srcL
= srcL
;
882 i
->Ain
.SseUComIS
.srcR
= srcR
;
883 i
->Ain
.SseUComIS
.dst
= dst
;
884 vassert(sz
== 4 || sz
== 8);
887 AMD64Instr
* AMD64Instr_SseSI2SF ( Int szS
, Int szD
, HReg src
, HReg dst
) {
888 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
889 i
->tag
= Ain_SseSI2SF
;
890 i
->Ain
.SseSI2SF
.szS
= toUChar(szS
);
891 i
->Ain
.SseSI2SF
.szD
= toUChar(szD
);
892 i
->Ain
.SseSI2SF
.src
= src
;
893 i
->Ain
.SseSI2SF
.dst
= dst
;
894 vassert(szS
== 4 || szS
== 8);
895 vassert(szD
== 4 || szD
== 8);
898 AMD64Instr
* AMD64Instr_SseSF2SI ( Int szS
, Int szD
, HReg src
, HReg dst
) {
899 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
900 i
->tag
= Ain_SseSF2SI
;
901 i
->Ain
.SseSF2SI
.szS
= toUChar(szS
);
902 i
->Ain
.SseSF2SI
.szD
= toUChar(szD
);
903 i
->Ain
.SseSF2SI
.src
= src
;
904 i
->Ain
.SseSF2SI
.dst
= dst
;
905 vassert(szS
== 4 || szS
== 8);
906 vassert(szD
== 4 || szD
== 8);
909 AMD64Instr
* AMD64Instr_SseSDSS ( Bool from64
, HReg src
, HReg dst
)
911 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
912 i
->tag
= Ain_SseSDSS
;
913 i
->Ain
.SseSDSS
.from64
= from64
;
914 i
->Ain
.SseSDSS
.src
= src
;
915 i
->Ain
.SseSDSS
.dst
= dst
;
918 AMD64Instr
* AMD64Instr_SseLdSt ( Bool isLoad
, Int sz
,
919 HReg reg
, AMD64AMode
* addr
) {
920 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
921 i
->tag
= Ain_SseLdSt
;
922 i
->Ain
.SseLdSt
.isLoad
= isLoad
;
923 i
->Ain
.SseLdSt
.sz
= toUChar(sz
);
924 i
->Ain
.SseLdSt
.reg
= reg
;
925 i
->Ain
.SseLdSt
.addr
= addr
;
926 vassert(sz
== 4 || sz
== 8 || sz
== 16);
929 AMD64Instr
* AMD64Instr_SseCStore ( AMD64CondCode cond
,
930 HReg src
, AMD64AMode
* addr
)
932 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
933 i
->tag
= Ain_SseCStore
;
934 i
->Ain
.SseCStore
.cond
= cond
;
935 i
->Ain
.SseCStore
.src
= src
;
936 i
->Ain
.SseCStore
.addr
= addr
;
937 vassert(cond
!= Acc_ALWAYS
);
940 AMD64Instr
* AMD64Instr_SseCLoad ( AMD64CondCode cond
,
941 AMD64AMode
* addr
, HReg dst
)
943 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
944 i
->tag
= Ain_SseCLoad
;
945 i
->Ain
.SseCLoad
.cond
= cond
;
946 i
->Ain
.SseCLoad
.addr
= addr
;
947 i
->Ain
.SseCLoad
.dst
= dst
;
948 vassert(cond
!= Acc_ALWAYS
);
951 AMD64Instr
* AMD64Instr_SseLdzLO ( Int sz
, HReg reg
, AMD64AMode
* addr
)
953 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
954 i
->tag
= Ain_SseLdzLO
;
955 i
->Ain
.SseLdzLO
.sz
= sz
;
956 i
->Ain
.SseLdzLO
.reg
= reg
;
957 i
->Ain
.SseLdzLO
.addr
= addr
;
958 vassert(sz
== 4 || sz
== 8);
961 AMD64Instr
* AMD64Instr_Sse32Fx4 ( AMD64SseOp op
, HReg src
, HReg dst
) {
962 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
963 i
->tag
= Ain_Sse32Fx4
;
964 i
->Ain
.Sse32Fx4
.op
= op
;
965 i
->Ain
.Sse32Fx4
.src
= src
;
966 i
->Ain
.Sse32Fx4
.dst
= dst
;
967 vassert(op
!= Asse_MOV
);
970 AMD64Instr
* AMD64Instr_Sse32FLo ( AMD64SseOp op
, HReg src
, HReg dst
) {
971 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
972 i
->tag
= Ain_Sse32FLo
;
973 i
->Ain
.Sse32FLo
.op
= op
;
974 i
->Ain
.Sse32FLo
.src
= src
;
975 i
->Ain
.Sse32FLo
.dst
= dst
;
976 vassert(op
!= Asse_MOV
);
979 AMD64Instr
* AMD64Instr_Sse64Fx2 ( AMD64SseOp op
, HReg src
, HReg dst
) {
980 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
981 i
->tag
= Ain_Sse64Fx2
;
982 i
->Ain
.Sse64Fx2
.op
= op
;
983 i
->Ain
.Sse64Fx2
.src
= src
;
984 i
->Ain
.Sse64Fx2
.dst
= dst
;
985 vassert(op
!= Asse_MOV
);
988 AMD64Instr
* AMD64Instr_Sse64FLo ( AMD64SseOp op
, HReg src
, HReg dst
) {
989 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
990 i
->tag
= Ain_Sse64FLo
;
991 i
->Ain
.Sse64FLo
.op
= op
;
992 i
->Ain
.Sse64FLo
.src
= src
;
993 i
->Ain
.Sse64FLo
.dst
= dst
;
994 vassert(op
!= Asse_MOV
);
997 AMD64Instr
* AMD64Instr_SseReRg ( AMD64SseOp op
, HReg re
, HReg rg
) {
998 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
999 i
->tag
= Ain_SseReRg
;
1000 i
->Ain
.SseReRg
.op
= op
;
1001 i
->Ain
.SseReRg
.src
= re
;
1002 i
->Ain
.SseReRg
.dst
= rg
;
1005 AMD64Instr
* AMD64Instr_SseCMov ( AMD64CondCode cond
, HReg src
, HReg dst
) {
1006 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
1007 i
->tag
= Ain_SseCMov
;
1008 i
->Ain
.SseCMov
.cond
= cond
;
1009 i
->Ain
.SseCMov
.src
= src
;
1010 i
->Ain
.SseCMov
.dst
= dst
;
1011 vassert(cond
!= Acc_ALWAYS
);
1014 AMD64Instr
* AMD64Instr_SseShuf ( Int order
, HReg src
, HReg dst
) {
1015 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
1016 i
->tag
= Ain_SseShuf
;
1017 i
->Ain
.SseShuf
.order
= order
;
1018 i
->Ain
.SseShuf
.src
= src
;
1019 i
->Ain
.SseShuf
.dst
= dst
;
1020 vassert(order
>= 0 && order
<= 0xFF);
1023 AMD64Instr
* AMD64Instr_SseShiftN ( AMD64SseOp op
,
1024 UInt shiftBits
, HReg dst
) {
1025 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
1026 i
->tag
= Ain_SseShiftN
;
1027 i
->Ain
.SseShiftN
.op
= op
;
1028 i
->Ain
.SseShiftN
.shiftBits
= shiftBits
;
1029 i
->Ain
.SseShiftN
.dst
= dst
;
1032 AMD64Instr
* AMD64Instr_SseMOVQ ( HReg gpr
, HReg xmm
, Bool toXMM
) {
1033 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
1034 i
->tag
= Ain_SseMOVQ
;
1035 i
->Ain
.SseMOVQ
.gpr
= gpr
;
1036 i
->Ain
.SseMOVQ
.xmm
= xmm
;
1037 i
->Ain
.SseMOVQ
.toXMM
= toXMM
;
1038 vassert(hregClass(gpr
) == HRcInt64
);
1039 vassert(hregClass(xmm
) == HRcVec128
);
1042 //uu AMD64Instr* AMD64Instr_AvxLdSt ( Bool isLoad,
1043 //uu HReg reg, AMD64AMode* addr ) {
1044 //uu AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
1045 //uu i->tag = Ain_AvxLdSt;
1046 //uu i->Ain.AvxLdSt.isLoad = isLoad;
1047 //uu i->Ain.AvxLdSt.reg = reg;
1048 //uu i->Ain.AvxLdSt.addr = addr;
1051 //uu AMD64Instr* AMD64Instr_AvxReRg ( AMD64SseOp op, HReg re, HReg rg ) {
1052 //uu AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
1053 //uu i->tag = Ain_AvxReRg;
1054 //uu i->Ain.AvxReRg.op = op;
1055 //uu i->Ain.AvxReRg.src = re;
1056 //uu i->Ain.AvxReRg.dst = rg;
1059 AMD64Instr
* AMD64Instr_EvCheck ( AMD64AMode
* amCounter
,
1060 AMD64AMode
* amFailAddr
) {
1061 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
1062 i
->tag
= Ain_EvCheck
;
1063 i
->Ain
.EvCheck
.amCounter
= amCounter
;
1064 i
->Ain
.EvCheck
.amFailAddr
= amFailAddr
;
1067 AMD64Instr
* AMD64Instr_ProfInc ( void ) {
1068 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
1069 i
->tag
= Ain_ProfInc
;
1073 void ppAMD64Instr ( const AMD64Instr
* i
, Bool mode64
)
1075 vassert(mode64
== True
);
1078 vex_printf("movabsq $0x%llx,", i
->Ain
.Imm64
.imm64
);
1079 ppHRegAMD64(i
->Ain
.Imm64
.dst
);
1082 vex_printf("%sq ", showAMD64AluOp(i
->Ain
.Alu64R
.op
));
1083 ppAMD64RMI(i
->Ain
.Alu64R
.src
);
1085 ppHRegAMD64(i
->Ain
.Alu64R
.dst
);
1088 vex_printf("%sq ", showAMD64AluOp(i
->Ain
.Alu64M
.op
));
1089 ppAMD64RI(i
->Ain
.Alu64M
.src
);
1091 ppAMD64AMode(i
->Ain
.Alu64M
.dst
);
1094 vex_printf("%sq ", showAMD64ShiftOp(i
->Ain
.Sh64
.op
));
1095 if (i
->Ain
.Sh64
.src
== 0)
1096 vex_printf("%%cl,");
1098 vex_printf("$%d,", (Int
)i
->Ain
.Sh64
.src
);
1099 ppHRegAMD64(i
->Ain
.Sh64
.dst
);
1102 vex_printf("%sl ", showAMD64ShiftOp(i
->Ain
.Sh32
.op
));
1103 if (i
->Ain
.Sh32
.src
== 0)
1104 vex_printf("%%cl,");
1106 vex_printf("$%d,", (Int
)i
->Ain
.Sh32
.src
);
1107 ppHRegAMD64_lo32(i
->Ain
.Sh32
.dst
);
1110 vex_printf("testq $%d,", (Int
)i
->Ain
.Test64
.imm32
);
1111 ppHRegAMD64(i
->Ain
.Test64
.dst
);
1114 vex_printf("%sq ", showAMD64UnaryOp(i
->Ain
.Unary64
.op
));
1115 ppHRegAMD64(i
->Ain
.Unary64
.dst
);
1118 vex_printf("leaq ");
1119 ppAMD64AMode(i
->Ain
.Lea64
.am
);
1121 ppHRegAMD64(i
->Ain
.Lea64
.dst
);
1124 vex_printf("%sl ", showAMD64AluOp(i
->Ain
.Alu32R
.op
));
1125 ppAMD64RMI_lo32(i
->Ain
.Alu32R
.src
);
1127 ppHRegAMD64_lo32(i
->Ain
.Alu32R
.dst
);
1130 vex_printf("%cmulq ", i
->Ain
.MulL
.syned
? 's' : 'u');
1131 ppAMD64RM(i
->Ain
.MulL
.src
);
1134 vex_printf("%cdiv%s ",
1135 i
->Ain
.Div
.syned
? 's' : 'u',
1136 showAMD64ScalarSz(i
->Ain
.Div
.sz
));
1137 ppAMD64RM(i
->Ain
.Div
.src
);
1140 vex_printf("pushq ");
1141 ppAMD64RMI(i
->Ain
.Push
.src
);
1144 vex_printf("call%s[%d,",
1145 i
->Ain
.Call
.cond
==Acc_ALWAYS
1146 ? "" : showAMD64CondCode(i
->Ain
.Call
.cond
),
1147 i
->Ain
.Call
.regparms
);
1148 ppRetLoc(i
->Ain
.Call
.rloc
);
1149 vex_printf("] 0x%llx", i
->Ain
.Call
.target
);
1153 vex_printf("(xDirect) ");
1154 vex_printf("if (%%rflags.%s) { ",
1155 showAMD64CondCode(i
->Ain
.XDirect
.cond
));
1156 vex_printf("movabsq $0x%llx,%%r11; ", i
->Ain
.XDirect
.dstGA
);
1157 vex_printf("movq %%r11,");
1158 ppAMD64AMode(i
->Ain
.XDirect
.amRIP
);
1160 vex_printf("movabsq $disp_cp_chain_me_to_%sEP,%%r11; call *%%r11 }",
1161 i
->Ain
.XDirect
.toFastEP
? "fast" : "slow");
1164 vex_printf("(xIndir) ");
1165 vex_printf("if (%%rflags.%s) { ",
1166 showAMD64CondCode(i
->Ain
.XIndir
.cond
));
1167 vex_printf("movq ");
1168 ppHRegAMD64(i
->Ain
.XIndir
.dstGA
);
1170 ppAMD64AMode(i
->Ain
.XIndir
.amRIP
);
1171 vex_printf("; movabsq $disp_indir,%%r11; jmp *%%r11 }");
1174 vex_printf("(xAssisted) ");
1175 vex_printf("if (%%rflags.%s) { ",
1176 showAMD64CondCode(i
->Ain
.XAssisted
.cond
));
1177 vex_printf("movq ");
1178 ppHRegAMD64(i
->Ain
.XAssisted
.dstGA
);
1180 ppAMD64AMode(i
->Ain
.XAssisted
.amRIP
);
1181 vex_printf("; movl $IRJumpKind_to_TRCVAL(%d),%%rbp",
1182 (Int
)i
->Ain
.XAssisted
.jk
);
1183 vex_printf("; movabsq $disp_assisted,%%r11; jmp *%%r11 }");
1187 vex_printf("cmov%s ", showAMD64CondCode(i
->Ain
.CMov64
.cond
));
1188 ppHRegAMD64(i
->Ain
.CMov64
.src
);
1190 ppHRegAMD64(i
->Ain
.CMov64
.dst
);
1193 vex_printf("if (%%rflags.%s) { ",
1194 showAMD64CondCode(i
->Ain
.CLoad
.cond
));
1195 vex_printf("mov%c ", i
->Ain
.CLoad
.szB
== 4 ? 'l' : 'q');
1196 ppAMD64AMode(i
->Ain
.CLoad
.addr
);
1198 (i
->Ain
.CLoad
.szB
== 4 ? ppHRegAMD64_lo32
: ppHRegAMD64
)
1203 vex_printf("if (%%rflags.%s) { ",
1204 showAMD64CondCode(i
->Ain
.CStore
.cond
));
1205 vex_printf("mov%c ", i
->Ain
.CStore
.szB
== 4 ? 'l' : 'q');
1206 (i
->Ain
.CStore
.szB
== 4 ? ppHRegAMD64_lo32
: ppHRegAMD64
)
1207 (i
->Ain
.CStore
.src
);
1209 ppAMD64AMode(i
->Ain
.CStore
.addr
);
1214 vex_printf("mov%clq ", i
->Ain
.MovxLQ
.syned
? 's' : 'z');
1215 ppHRegAMD64_lo32(i
->Ain
.MovxLQ
.src
);
1217 ppHRegAMD64(i
->Ain
.MovxLQ
.dst
);
1220 if (i
->Ain
.LoadEX
.szSmall
==4 && !i
->Ain
.LoadEX
.syned
) {
1221 vex_printf("movl ");
1222 ppAMD64AMode(i
->Ain
.LoadEX
.src
);
1224 ppHRegAMD64_lo32(i
->Ain
.LoadEX
.dst
);
1226 vex_printf("mov%c%cq ",
1227 i
->Ain
.LoadEX
.syned
? 's' : 'z',
1228 i
->Ain
.LoadEX
.szSmall
==1
1230 : (i
->Ain
.LoadEX
.szSmall
==2 ? 'w' : 'l'));
1231 ppAMD64AMode(i
->Ain
.LoadEX
.src
);
1233 ppHRegAMD64(i
->Ain
.LoadEX
.dst
);
1237 vex_printf("mov%c ", i
->Ain
.Store
.sz
==1 ? 'b'
1238 : (i
->Ain
.Store
.sz
==2 ? 'w' : 'l'));
1239 ppHRegAMD64(i
->Ain
.Store
.src
);
1241 ppAMD64AMode(i
->Ain
.Store
.dst
);
1244 vex_printf("setq%s ", showAMD64CondCode(i
->Ain
.Set64
.cond
));
1245 ppHRegAMD64(i
->Ain
.Set64
.dst
);
1248 vex_printf("bs%cq ", i
->Ain
.Bsfr64
.isFwds
? 'f' : 'r');
1249 ppHRegAMD64(i
->Ain
.Bsfr64
.src
);
1251 ppHRegAMD64(i
->Ain
.Bsfr64
.dst
);
1254 vex_printf("mfence" );
1257 vex_printf("lock cmpxchg%c ",
1258 i
->Ain
.ACAS
.sz
==1 ? 'b' : i
->Ain
.ACAS
.sz
==2 ? 'w'
1259 : i
->Ain
.ACAS
.sz
==4 ? 'l' : 'q' );
1260 vex_printf("{%%rax->%%rbx},");
1261 ppAMD64AMode(i
->Ain
.ACAS
.addr
);
1264 vex_printf("lock cmpxchg%db {%%rdx:%%rax->%%rcx:%%rbx},",
1265 (Int
)(2 * i
->Ain
.DACAS
.sz
));
1266 ppAMD64AMode(i
->Ain
.DACAS
.addr
);
1269 vex_printf("ffree %%st(7..%d)", 8 - i
->Ain
.A87Free
.nregs
);
1271 case Ain_A87PushPop
:
1272 vex_printf(i
->Ain
.A87PushPop
.isPush
? "fld%c " : "fstp%c ",
1273 i
->Ain
.A87PushPop
.szB
== 4 ? 's' : 'l');
1274 ppAMD64AMode(i
->Ain
.A87PushPop
.addr
);
1277 vex_printf("f%s", showA87FpOp(i
->Ain
.A87FpOp
.op
));
1280 vex_printf("fldcw ");
1281 ppAMD64AMode(i
->Ain
.A87LdCW
.addr
);
1284 vex_printf("fstsw ");
1285 ppAMD64AMode(i
->Ain
.A87StSW
.addr
);
1288 vex_printf("ldmxcsr ");
1289 ppAMD64AMode(i
->Ain
.LdMXCSR
.addr
);
1292 vex_printf("ucomis%s ", i
->Ain
.SseUComIS
.sz
==4 ? "s" : "d");
1293 ppHRegAMD64(i
->Ain
.SseUComIS
.srcL
);
1295 ppHRegAMD64(i
->Ain
.SseUComIS
.srcR
);
1296 vex_printf(" ; pushfq ; popq ");
1297 ppHRegAMD64(i
->Ain
.SseUComIS
.dst
);
1300 vex_printf("cvtsi2s%s ", i
->Ain
.SseSI2SF
.szD
==4 ? "s" : "d");
1301 (i
->Ain
.SseSI2SF
.szS
==4 ? ppHRegAMD64_lo32
: ppHRegAMD64
)
1302 (i
->Ain
.SseSI2SF
.src
);
1304 ppHRegAMD64(i
->Ain
.SseSI2SF
.dst
);
1307 vex_printf("cvts%s2si ", i
->Ain
.SseSF2SI
.szS
==4 ? "s" : "d");
1308 ppHRegAMD64(i
->Ain
.SseSF2SI
.src
);
1310 (i
->Ain
.SseSF2SI
.szD
==4 ? ppHRegAMD64_lo32
: ppHRegAMD64
)
1311 (i
->Ain
.SseSF2SI
.dst
);
1314 vex_printf(i
->Ain
.SseSDSS
.from64
? "cvtsd2ss " : "cvtss2sd ");
1315 ppHRegAMD64(i
->Ain
.SseSDSS
.src
);
1317 ppHRegAMD64(i
->Ain
.SseSDSS
.dst
);
1320 switch (i
->Ain
.SseLdSt
.sz
) {
1321 case 4: vex_printf("movss "); break;
1322 case 8: vex_printf("movsd "); break;
1323 case 16: vex_printf("movups "); break;
1324 default: vassert(0);
1326 if (i
->Ain
.SseLdSt
.isLoad
) {
1327 ppAMD64AMode(i
->Ain
.SseLdSt
.addr
);
1329 ppHRegAMD64(i
->Ain
.SseLdSt
.reg
);
1331 ppHRegAMD64(i
->Ain
.SseLdSt
.reg
);
1333 ppAMD64AMode(i
->Ain
.SseLdSt
.addr
);
1337 vex_printf("if (%%rflags.%s) { ",
1338 showAMD64CondCode(i
->Ain
.SseCStore
.cond
));
1339 vex_printf("movups ");
1340 ppHRegAMD64(i
->Ain
.SseCStore
.src
);
1342 ppAMD64AMode(i
->Ain
.SseCStore
.addr
);
1346 vex_printf("if (%%rflags.%s) { ",
1347 showAMD64CondCode(i
->Ain
.SseCLoad
.cond
));
1348 vex_printf("movups ");
1349 ppAMD64AMode(i
->Ain
.SseCLoad
.addr
);
1351 ppHRegAMD64(i
->Ain
.SseCLoad
.dst
);
1355 vex_printf("movs%s ", i
->Ain
.SseLdzLO
.sz
==4 ? "s" : "d");
1356 ppAMD64AMode(i
->Ain
.SseLdzLO
.addr
);
1358 ppHRegAMD64(i
->Ain
.SseLdzLO
.reg
);
1361 vex_printf("%sps ", showAMD64SseOp(i
->Ain
.Sse32Fx4
.op
));
1362 ppHRegAMD64(i
->Ain
.Sse32Fx4
.src
);
1364 ppHRegAMD64(i
->Ain
.Sse32Fx4
.dst
);
1367 vex_printf("%sss ", showAMD64SseOp(i
->Ain
.Sse32FLo
.op
));
1368 ppHRegAMD64(i
->Ain
.Sse32FLo
.src
);
1370 ppHRegAMD64(i
->Ain
.Sse32FLo
.dst
);
1373 vex_printf("%spd ", showAMD64SseOp(i
->Ain
.Sse64Fx2
.op
));
1374 ppHRegAMD64(i
->Ain
.Sse64Fx2
.src
);
1376 ppHRegAMD64(i
->Ain
.Sse64Fx2
.dst
);
1379 vex_printf("%ssd ", showAMD64SseOp(i
->Ain
.Sse64FLo
.op
));
1380 ppHRegAMD64(i
->Ain
.Sse64FLo
.src
);
1382 ppHRegAMD64(i
->Ain
.Sse64FLo
.dst
);
1385 vex_printf("%s ", showAMD64SseOp(i
->Ain
.SseReRg
.op
));
1386 ppHRegAMD64(i
->Ain
.SseReRg
.src
);
1388 ppHRegAMD64(i
->Ain
.SseReRg
.dst
);
1391 vex_printf("cmov%s ", showAMD64CondCode(i
->Ain
.SseCMov
.cond
));
1392 ppHRegAMD64(i
->Ain
.SseCMov
.src
);
1394 ppHRegAMD64(i
->Ain
.SseCMov
.dst
);
1397 vex_printf("pshufd $0x%x,", (UInt
)i
->Ain
.SseShuf
.order
);
1398 ppHRegAMD64(i
->Ain
.SseShuf
.src
);
1400 ppHRegAMD64(i
->Ain
.SseShuf
.dst
);
1403 vex_printf("%s $%u, ", showAMD64SseOp(i
->Ain
.SseShiftN
.op
),
1404 i
->Ain
.SseShiftN
.shiftBits
);
1405 ppHRegAMD64(i
->Ain
.SseShiftN
.dst
);
1408 vex_printf("movq ");
1409 if (i
->Ain
.SseMOVQ
.toXMM
) {
1410 ppHRegAMD64(i
->Ain
.SseMOVQ
.gpr
);
1412 ppHRegAMD64(i
->Ain
.SseMOVQ
.xmm
);
1414 ppHRegAMD64(i
->Ain
.SseMOVQ
.xmm
);
1416 ppHRegAMD64(i
->Ain
.SseMOVQ
.gpr
);
1419 //uu case Ain_AvxLdSt:
1420 //uu vex_printf("vmovups ");
1421 //uu if (i->Ain.AvxLdSt.isLoad) {
1422 //uu ppAMD64AMode(i->Ain.AvxLdSt.addr);
1423 //uu vex_printf(",");
1424 //uu ppHRegAMD64(i->Ain.AvxLdSt.reg);
1426 //uu ppHRegAMD64(i->Ain.AvxLdSt.reg);
1427 //uu vex_printf(",");
1428 //uu ppAMD64AMode(i->Ain.AvxLdSt.addr);
1431 //uu case Ain_AvxReRg:
1432 //uu vex_printf("v%s ", showAMD64SseOp(i->Ain.SseReRg.op));
1433 //uu ppHRegAMD64(i->Ain.AvxReRg.src);
1434 //uu vex_printf(",");
1435 //uu ppHRegAMD64(i->Ain.AvxReRg.dst);
1438 vex_printf("(evCheck) decl ");
1439 ppAMD64AMode(i
->Ain
.EvCheck
.amCounter
);
1440 vex_printf("; jns nofail; jmp *");
1441 ppAMD64AMode(i
->Ain
.EvCheck
.amFailAddr
);
1442 vex_printf("; nofail:");
1445 vex_printf("(profInc) movabsq $NotKnownYet, %%r11; incq (%%r11)");
1448 vpanic("ppAMD64Instr");
1452 /* --------- Helpers for register allocation. --------- */
1454 void getRegUsage_AMD64Instr ( HRegUsage
* u
, const AMD64Instr
* i
, Bool mode64
)
1457 vassert(mode64
== True
);
1461 addHRegUse(u
, HRmWrite
, i
->Ain
.Imm64
.dst
);
1464 addRegUsage_AMD64RMI(u
, i
->Ain
.Alu64R
.src
);
1465 if (i
->Ain
.Alu64R
.op
== Aalu_MOV
) {
1466 addHRegUse(u
, HRmWrite
, i
->Ain
.Alu64R
.dst
);
1468 if (i
->Ain
.Alu64R
.src
->tag
== Armi_Reg
) {
1469 u
->isRegRegMove
= True
;
1470 u
->regMoveSrc
= i
->Ain
.Alu64R
.src
->Armi
.Reg
.reg
;
1471 u
->regMoveDst
= i
->Ain
.Alu64R
.dst
;
1475 if (i
->Ain
.Alu64R
.op
== Aalu_CMP
) {
1476 addHRegUse(u
, HRmRead
, i
->Ain
.Alu64R
.dst
);
1479 addHRegUse(u
, HRmModify
, i
->Ain
.Alu64R
.dst
);
1482 addRegUsage_AMD64RI(u
, i
->Ain
.Alu64M
.src
);
1483 addRegUsage_AMD64AMode(u
, i
->Ain
.Alu64M
.dst
);
1486 addHRegUse(u
, HRmModify
, i
->Ain
.Sh64
.dst
);
1487 if (i
->Ain
.Sh64
.src
== 0)
1488 addHRegUse(u
, HRmRead
, hregAMD64_RCX());
1491 addHRegUse(u
, HRmModify
, i
->Ain
.Sh32
.dst
);
1492 if (i
->Ain
.Sh32
.src
== 0)
1493 addHRegUse(u
, HRmRead
, hregAMD64_RCX());
1496 addHRegUse(u
, HRmRead
, i
->Ain
.Test64
.dst
);
1499 addHRegUse(u
, HRmModify
, i
->Ain
.Unary64
.dst
);
1502 addRegUsage_AMD64AMode(u
, i
->Ain
.Lea64
.am
);
1503 addHRegUse(u
, HRmWrite
, i
->Ain
.Lea64
.dst
);
1506 vassert(i
->Ain
.Alu32R
.op
!= Aalu_MOV
);
1507 addRegUsage_AMD64RMI(u
, i
->Ain
.Alu32R
.src
);
1508 if (i
->Ain
.Alu32R
.op
== Aalu_CMP
) {
1509 addHRegUse(u
, HRmRead
, i
->Ain
.Alu32R
.dst
);
1512 addHRegUse(u
, HRmModify
, i
->Ain
.Alu32R
.dst
);
1515 addRegUsage_AMD64RM(u
, i
->Ain
.MulL
.src
, HRmRead
);
1516 addHRegUse(u
, HRmModify
, hregAMD64_RAX());
1517 addHRegUse(u
, HRmWrite
, hregAMD64_RDX());
1520 addRegUsage_AMD64RM(u
, i
->Ain
.Div
.src
, HRmRead
);
1521 addHRegUse(u
, HRmModify
, hregAMD64_RAX());
1522 addHRegUse(u
, HRmModify
, hregAMD64_RDX());
1525 addRegUsage_AMD64RMI(u
, i
->Ain
.Push
.src
);
1526 addHRegUse(u
, HRmModify
, hregAMD64_RSP());
1529 /* This is a bit subtle. */
1530 /* First off, claim it trashes all the caller-saved regs
1531 which fall within the register allocator's jurisdiction.
1532 These I believe to be: rax rcx rdx rdi rsi r8 r9 r10
1533 and all the xmm registers. */
1534 addHRegUse(u
, HRmWrite
, hregAMD64_RAX());
1535 addHRegUse(u
, HRmWrite
, hregAMD64_RCX());
1536 addHRegUse(u
, HRmWrite
, hregAMD64_RDX());
1537 addHRegUse(u
, HRmWrite
, hregAMD64_RDI());
1538 addHRegUse(u
, HRmWrite
, hregAMD64_RSI());
1539 addHRegUse(u
, HRmWrite
, hregAMD64_R8());
1540 addHRegUse(u
, HRmWrite
, hregAMD64_R9());
1541 addHRegUse(u
, HRmWrite
, hregAMD64_R10());
1542 addHRegUse(u
, HRmWrite
, hregAMD64_XMM0());
1543 addHRegUse(u
, HRmWrite
, hregAMD64_XMM1());
1544 addHRegUse(u
, HRmWrite
, hregAMD64_XMM3());
1545 addHRegUse(u
, HRmWrite
, hregAMD64_XMM4());
1546 addHRegUse(u
, HRmWrite
, hregAMD64_XMM5());
1547 addHRegUse(u
, HRmWrite
, hregAMD64_XMM6());
1548 addHRegUse(u
, HRmWrite
, hregAMD64_XMM7());
1549 addHRegUse(u
, HRmWrite
, hregAMD64_XMM8());
1550 addHRegUse(u
, HRmWrite
, hregAMD64_XMM9());
1551 addHRegUse(u
, HRmWrite
, hregAMD64_XMM10());
1552 addHRegUse(u
, HRmWrite
, hregAMD64_XMM11());
1553 addHRegUse(u
, HRmWrite
, hregAMD64_XMM12());
1555 /* Now we have to state any parameter-carrying registers
1556 which might be read. This depends on the regparmness. */
1557 switch (i
->Ain
.Call
.regparms
) {
1558 case 6: addHRegUse(u
, HRmRead
, hregAMD64_R9()); /*fallthru*/
1559 case 5: addHRegUse(u
, HRmRead
, hregAMD64_R8()); /*fallthru*/
1560 case 4: addHRegUse(u
, HRmRead
, hregAMD64_RCX()); /*fallthru*/
1561 case 3: addHRegUse(u
, HRmRead
, hregAMD64_RDX()); /*fallthru*/
1562 case 2: addHRegUse(u
, HRmRead
, hregAMD64_RSI()); /*fallthru*/
1563 case 1: addHRegUse(u
, HRmRead
, hregAMD64_RDI()); break;
1565 default: vpanic("getRegUsage_AMD64Instr:Call:regparms");
1567 /* Finally, there is the issue that the insn trashes a
1568 register because the literal target address has to be
1569 loaded into a register. Fortunately, r11 is stated in the
1570 ABI as a scratch register, and so seems a suitable victim. */
1571 addHRegUse(u
, HRmWrite
, hregAMD64_R11());
1572 /* Upshot of this is that the assembler really must use r11,
1573 and no other, as a destination temporary. */
1575 /* XDirect/XIndir/XAssisted are also a bit subtle. They
1576 conditionally exit the block. Hence we only need to list (1)
1577 the registers that they read, and (2) the registers that they
1578 write in the case where the block is not exited. (2) is
1579 empty, hence only (1) is relevant here. */
1581 /* Don't bother to mention the write to %r11, since it is not
1582 available to the allocator. */
1583 addRegUsage_AMD64AMode(u
, i
->Ain
.XDirect
.amRIP
);
1587 addHRegUse(u
, HRmRead
, i
->Ain
.XIndir
.dstGA
);
1588 addRegUsage_AMD64AMode(u
, i
->Ain
.XIndir
.amRIP
);
1591 /* Ditto re %r11 and %rbp (the baseblock ptr) */
1592 addHRegUse(u
, HRmRead
, i
->Ain
.XAssisted
.dstGA
);
1593 addRegUsage_AMD64AMode(u
, i
->Ain
.XAssisted
.amRIP
);
1596 addHRegUse(u
, HRmRead
, i
->Ain
.CMov64
.src
);
1597 addHRegUse(u
, HRmModify
, i
->Ain
.CMov64
.dst
);
1600 addRegUsage_AMD64AMode(u
, i
->Ain
.CLoad
.addr
);
1601 addHRegUse(u
, HRmModify
, i
->Ain
.CLoad
.dst
);
1604 addRegUsage_AMD64AMode(u
, i
->Ain
.CStore
.addr
);
1605 addHRegUse(u
, HRmRead
, i
->Ain
.CStore
.src
);
1608 addHRegUse(u
, HRmRead
, i
->Ain
.MovxLQ
.src
);
1609 addHRegUse(u
, HRmWrite
, i
->Ain
.MovxLQ
.dst
);
1612 addRegUsage_AMD64AMode(u
, i
->Ain
.LoadEX
.src
);
1613 addHRegUse(u
, HRmWrite
, i
->Ain
.LoadEX
.dst
);
1616 addHRegUse(u
, HRmRead
, i
->Ain
.Store
.src
);
1617 addRegUsage_AMD64AMode(u
, i
->Ain
.Store
.dst
);
1620 addHRegUse(u
, HRmWrite
, i
->Ain
.Set64
.dst
);
1623 addHRegUse(u
, HRmRead
, i
->Ain
.Bsfr64
.src
);
1624 addHRegUse(u
, HRmWrite
, i
->Ain
.Bsfr64
.dst
);
1629 addRegUsage_AMD64AMode(u
, i
->Ain
.ACAS
.addr
);
1630 addHRegUse(u
, HRmRead
, hregAMD64_RBX());
1631 addHRegUse(u
, HRmModify
, hregAMD64_RAX());
1634 addRegUsage_AMD64AMode(u
, i
->Ain
.DACAS
.addr
);
1635 addHRegUse(u
, HRmRead
, hregAMD64_RCX());
1636 addHRegUse(u
, HRmRead
, hregAMD64_RBX());
1637 addHRegUse(u
, HRmModify
, hregAMD64_RDX());
1638 addHRegUse(u
, HRmModify
, hregAMD64_RAX());
1642 case Ain_A87PushPop
:
1643 addRegUsage_AMD64AMode(u
, i
->Ain
.A87PushPop
.addr
);
1648 addRegUsage_AMD64AMode(u
, i
->Ain
.A87LdCW
.addr
);
1651 addRegUsage_AMD64AMode(u
, i
->Ain
.A87StSW
.addr
);
1654 addRegUsage_AMD64AMode(u
, i
->Ain
.LdMXCSR
.addr
);
1657 addHRegUse(u
, HRmRead
, i
->Ain
.SseUComIS
.srcL
);
1658 addHRegUse(u
, HRmRead
, i
->Ain
.SseUComIS
.srcR
);
1659 addHRegUse(u
, HRmWrite
, i
->Ain
.SseUComIS
.dst
);
1662 addHRegUse(u
, HRmRead
, i
->Ain
.SseSI2SF
.src
);
1663 addHRegUse(u
, HRmWrite
, i
->Ain
.SseSI2SF
.dst
);
1666 addHRegUse(u
, HRmRead
, i
->Ain
.SseSF2SI
.src
);
1667 addHRegUse(u
, HRmWrite
, i
->Ain
.SseSF2SI
.dst
);
1670 addHRegUse(u
, HRmRead
, i
->Ain
.SseSDSS
.src
);
1671 addHRegUse(u
, HRmWrite
, i
->Ain
.SseSDSS
.dst
);
1674 addRegUsage_AMD64AMode(u
, i
->Ain
.SseLdSt
.addr
);
1675 addHRegUse(u
, i
->Ain
.SseLdSt
.isLoad
? HRmWrite
: HRmRead
,
1676 i
->Ain
.SseLdSt
.reg
);
1679 addRegUsage_AMD64AMode(u
, i
->Ain
.SseCStore
.addr
);
1680 addHRegUse(u
, HRmRead
, i
->Ain
.SseCStore
.src
);
1683 addRegUsage_AMD64AMode(u
, i
->Ain
.SseCLoad
.addr
);
1684 addHRegUse(u
, HRmModify
, i
->Ain
.SseCLoad
.dst
);
1687 addRegUsage_AMD64AMode(u
, i
->Ain
.SseLdzLO
.addr
);
1688 addHRegUse(u
, HRmWrite
, i
->Ain
.SseLdzLO
.reg
);
1691 vassert(i
->Ain
.Sse32Fx4
.op
!= Asse_MOV
);
1692 unary
= toBool( i
->Ain
.Sse32Fx4
.op
== Asse_RCPF
1693 || i
->Ain
.Sse32Fx4
.op
== Asse_RSQRTF
1694 || i
->Ain
.Sse32Fx4
.op
== Asse_SQRTF
1695 || i
->Ain
.Sse32Fx4
.op
== Asse_I2F
1696 || i
->Ain
.Sse32Fx4
.op
== Asse_F2I
1697 || i
->Ain
.Sse32Fx4
.op
== Asse_F32toF16
1698 || i
->Ain
.Sse32Fx4
.op
== Asse_F16toF32
);
1699 addHRegUse(u
, HRmRead
, i
->Ain
.Sse32Fx4
.src
);
1700 addHRegUse(u
, unary
? HRmWrite
: HRmModify
,
1701 i
->Ain
.Sse32Fx4
.dst
);
1704 vassert(i
->Ain
.Sse32FLo
.op
!= Asse_MOV
);
1705 unary
= toBool( i
->Ain
.Sse32FLo
.op
== Asse_RCPF
1706 || i
->Ain
.Sse32FLo
.op
== Asse_RSQRTF
1707 || i
->Ain
.Sse32FLo
.op
== Asse_SQRTF
);
1708 addHRegUse(u
, HRmRead
, i
->Ain
.Sse32FLo
.src
);
1709 addHRegUse(u
, unary
? HRmWrite
: HRmModify
,
1710 i
->Ain
.Sse32FLo
.dst
);
1713 vassert(i
->Ain
.Sse64Fx2
.op
!= Asse_MOV
);
1714 unary
= toBool( i
->Ain
.Sse64Fx2
.op
== Asse_RCPF
1715 || i
->Ain
.Sse64Fx2
.op
== Asse_RSQRTF
1716 || i
->Ain
.Sse64Fx2
.op
== Asse_SQRTF
);
1717 addHRegUse(u
, HRmRead
, i
->Ain
.Sse64Fx2
.src
);
1718 addHRegUse(u
, unary
? HRmWrite
: HRmModify
,
1719 i
->Ain
.Sse64Fx2
.dst
);
1722 vassert(i
->Ain
.Sse64FLo
.op
!= Asse_MOV
);
1723 unary
= toBool( i
->Ain
.Sse64FLo
.op
== Asse_RCPF
1724 || i
->Ain
.Sse64FLo
.op
== Asse_RSQRTF
1725 || i
->Ain
.Sse64FLo
.op
== Asse_SQRTF
);
1726 addHRegUse(u
, HRmRead
, i
->Ain
.Sse64FLo
.src
);
1727 addHRegUse(u
, unary
? HRmWrite
: HRmModify
,
1728 i
->Ain
.Sse64FLo
.dst
);
1731 if ( (i
->Ain
.SseReRg
.op
== Asse_XOR
1732 || i
->Ain
.SseReRg
.op
== Asse_CMPEQ32
)
1733 && sameHReg(i
->Ain
.SseReRg
.src
, i
->Ain
.SseReRg
.dst
)) {
1734 /* reg-alloc needs to understand 'xor r,r' and 'cmpeqd
1735 r,r' as a write of a value to r, and independent of any
1736 previous value in r */
1737 /* (as opposed to a rite of passage :-) */
1738 addHRegUse(u
, HRmWrite
, i
->Ain
.SseReRg
.dst
);
1740 addHRegUse(u
, HRmRead
, i
->Ain
.SseReRg
.src
);
1741 addHRegUse(u
, i
->Ain
.SseReRg
.op
== Asse_MOV
1742 ? HRmWrite
: HRmModify
,
1743 i
->Ain
.SseReRg
.dst
);
1745 if (i
->Ain
.SseReRg
.op
== Asse_MOV
) {
1746 u
->isRegRegMove
= True
;
1747 u
->regMoveSrc
= i
->Ain
.SseReRg
.src
;
1748 u
->regMoveDst
= i
->Ain
.SseReRg
.dst
;
1753 addHRegUse(u
, HRmRead
, i
->Ain
.SseCMov
.src
);
1754 addHRegUse(u
, HRmModify
, i
->Ain
.SseCMov
.dst
);
1757 addHRegUse(u
, HRmRead
, i
->Ain
.SseShuf
.src
);
1758 addHRegUse(u
, HRmWrite
, i
->Ain
.SseShuf
.dst
);
1761 addHRegUse(u
, HRmModify
, i
->Ain
.SseShiftN
.dst
);
1764 addHRegUse(u
, i
->Ain
.SseMOVQ
.toXMM
? HRmRead
: HRmWrite
,
1765 i
->Ain
.SseMOVQ
.gpr
);
1766 addHRegUse(u
, i
->Ain
.SseMOVQ
.toXMM
? HRmWrite
: HRmRead
,
1767 i
->Ain
.SseMOVQ
.xmm
);
1769 //uu case Ain_AvxLdSt:
1770 //uu addRegUsage_AMD64AMode(u, i->Ain.AvxLdSt.addr);
1771 //uu addHRegUse(u, i->Ain.AvxLdSt.isLoad ? HRmWrite : HRmRead,
1772 //uu i->Ain.AvxLdSt.reg);
1774 //uu case Ain_AvxReRg:
1775 //uu if ( (i->Ain.AvxReRg.op == Asse_XOR
1776 //uu || i->Ain.AvxReRg.op == Asse_CMPEQ32)
1777 //uu && i->Ain.AvxReRg.src == i->Ain.AvxReRg.dst) {
1778 //uu /* See comments on the case for Ain_SseReRg. */
1779 //uu addHRegUse(u, HRmWrite, i->Ain.AvxReRg.dst);
1781 //uu addHRegUse(u, HRmRead, i->Ain.AvxReRg.src);
1782 //uu addHRegUse(u, i->Ain.AvxReRg.op == Asse_MOV
1783 //uu ? HRmWrite : HRmModify,
1784 //uu i->Ain.AvxReRg.dst);
1786 //uu if (i->Ain.AvxReRg.op == Asse_MOV) {
1787 //uu u->isRegRegMove = True;
1788 //uu u->regMoveSrc = i->Ain.AvxReRg.src;
1789 //uu u->regMoveDst = i->Ain.AvxReRg.dst;
1794 /* We expect both amodes only to mention %rbp, so this is in
1795 fact pointless, since %rbp isn't allocatable, but anyway.. */
1796 addRegUsage_AMD64AMode(u
, i
->Ain
.EvCheck
.amCounter
);
1797 addRegUsage_AMD64AMode(u
, i
->Ain
.EvCheck
.amFailAddr
);
1800 addHRegUse(u
, HRmWrite
, hregAMD64_R11());
1803 ppAMD64Instr(i
, mode64
);
1804 vpanic("getRegUsage_AMD64Instr");
1809 static inline void mapReg(HRegRemap
* m
, HReg
* r
)
1811 *r
= lookupHRegRemap(m
, *r
);
1814 void mapRegs_AMD64Instr ( HRegRemap
* m
, AMD64Instr
* i
, Bool mode64
)
1816 vassert(mode64
== True
);
1819 mapReg(m
, &i
->Ain
.Imm64
.dst
);
1822 mapRegs_AMD64RMI(m
, i
->Ain
.Alu64R
.src
);
1823 mapReg(m
, &i
->Ain
.Alu64R
.dst
);
1826 mapRegs_AMD64RI(m
, i
->Ain
.Alu64M
.src
);
1827 mapRegs_AMD64AMode(m
, i
->Ain
.Alu64M
.dst
);
1830 mapReg(m
, &i
->Ain
.Sh64
.dst
);
1833 mapReg(m
, &i
->Ain
.Sh32
.dst
);
1836 mapReg(m
, &i
->Ain
.Test64
.dst
);
1839 mapReg(m
, &i
->Ain
.Unary64
.dst
);
1842 mapRegs_AMD64AMode(m
, i
->Ain
.Lea64
.am
);
1843 mapReg(m
, &i
->Ain
.Lea64
.dst
);
1846 mapRegs_AMD64RMI(m
, i
->Ain
.Alu32R
.src
);
1847 mapReg(m
, &i
->Ain
.Alu32R
.dst
);
1850 mapRegs_AMD64RM(m
, i
->Ain
.MulL
.src
);
1853 mapRegs_AMD64RM(m
, i
->Ain
.Div
.src
);
1856 mapRegs_AMD64RMI(m
, i
->Ain
.Push
.src
);
1861 mapRegs_AMD64AMode(m
, i
->Ain
.XDirect
.amRIP
);
1864 mapReg(m
, &i
->Ain
.XIndir
.dstGA
);
1865 mapRegs_AMD64AMode(m
, i
->Ain
.XIndir
.amRIP
);
1868 mapReg(m
, &i
->Ain
.XAssisted
.dstGA
);
1869 mapRegs_AMD64AMode(m
, i
->Ain
.XAssisted
.amRIP
);
1872 mapReg(m
, &i
->Ain
.CMov64
.src
);
1873 mapReg(m
, &i
->Ain
.CMov64
.dst
);
1876 mapRegs_AMD64AMode(m
, i
->Ain
.CLoad
.addr
);
1877 mapReg(m
, &i
->Ain
.CLoad
.dst
);
1880 mapRegs_AMD64AMode(m
, i
->Ain
.CStore
.addr
);
1881 mapReg(m
, &i
->Ain
.CStore
.src
);
1884 mapReg(m
, &i
->Ain
.MovxLQ
.src
);
1885 mapReg(m
, &i
->Ain
.MovxLQ
.dst
);
1888 mapRegs_AMD64AMode(m
, i
->Ain
.LoadEX
.src
);
1889 mapReg(m
, &i
->Ain
.LoadEX
.dst
);
1892 mapReg(m
, &i
->Ain
.Store
.src
);
1893 mapRegs_AMD64AMode(m
, i
->Ain
.Store
.dst
);
1896 mapReg(m
, &i
->Ain
.Set64
.dst
);
1899 mapReg(m
, &i
->Ain
.Bsfr64
.src
);
1900 mapReg(m
, &i
->Ain
.Bsfr64
.dst
);
1905 mapRegs_AMD64AMode(m
, i
->Ain
.ACAS
.addr
);
1908 mapRegs_AMD64AMode(m
, i
->Ain
.DACAS
.addr
);
1912 case Ain_A87PushPop
:
1913 mapRegs_AMD64AMode(m
, i
->Ain
.A87PushPop
.addr
);
1918 mapRegs_AMD64AMode(m
, i
->Ain
.A87LdCW
.addr
);
1921 mapRegs_AMD64AMode(m
, i
->Ain
.A87StSW
.addr
);
1924 mapRegs_AMD64AMode(m
, i
->Ain
.LdMXCSR
.addr
);
1927 mapReg(m
, &i
->Ain
.SseUComIS
.srcL
);
1928 mapReg(m
, &i
->Ain
.SseUComIS
.srcR
);
1929 mapReg(m
, &i
->Ain
.SseUComIS
.dst
);
1932 mapReg(m
, &i
->Ain
.SseSI2SF
.src
);
1933 mapReg(m
, &i
->Ain
.SseSI2SF
.dst
);
1936 mapReg(m
, &i
->Ain
.SseSF2SI
.src
);
1937 mapReg(m
, &i
->Ain
.SseSF2SI
.dst
);
1940 mapReg(m
, &i
->Ain
.SseSDSS
.src
);
1941 mapReg(m
, &i
->Ain
.SseSDSS
.dst
);
1944 mapReg(m
, &i
->Ain
.SseLdSt
.reg
);
1945 mapRegs_AMD64AMode(m
, i
->Ain
.SseLdSt
.addr
);
1948 mapRegs_AMD64AMode(m
, i
->Ain
.SseCStore
.addr
);
1949 mapReg(m
, &i
->Ain
.SseCStore
.src
);
1952 mapRegs_AMD64AMode(m
, i
->Ain
.SseCLoad
.addr
);
1953 mapReg(m
, &i
->Ain
.SseCLoad
.dst
);
1956 mapReg(m
, &i
->Ain
.SseLdzLO
.reg
);
1957 mapRegs_AMD64AMode(m
, i
->Ain
.SseLdzLO
.addr
);
1960 mapReg(m
, &i
->Ain
.Sse32Fx4
.src
);
1961 mapReg(m
, &i
->Ain
.Sse32Fx4
.dst
);
1964 mapReg(m
, &i
->Ain
.Sse32FLo
.src
);
1965 mapReg(m
, &i
->Ain
.Sse32FLo
.dst
);
1968 mapReg(m
, &i
->Ain
.Sse64Fx2
.src
);
1969 mapReg(m
, &i
->Ain
.Sse64Fx2
.dst
);
1972 mapReg(m
, &i
->Ain
.Sse64FLo
.src
);
1973 mapReg(m
, &i
->Ain
.Sse64FLo
.dst
);
1976 mapReg(m
, &i
->Ain
.SseReRg
.src
);
1977 mapReg(m
, &i
->Ain
.SseReRg
.dst
);
1980 mapReg(m
, &i
->Ain
.SseCMov
.src
);
1981 mapReg(m
, &i
->Ain
.SseCMov
.dst
);
1984 mapReg(m
, &i
->Ain
.SseShuf
.src
);
1985 mapReg(m
, &i
->Ain
.SseShuf
.dst
);
1988 mapReg(m
, &i
->Ain
.SseShiftN
.dst
);
1991 mapReg(m
, &i
->Ain
.SseMOVQ
.gpr
);
1992 mapReg(m
, &i
->Ain
.SseMOVQ
.xmm
);
1994 //uu case Ain_AvxLdSt:
1995 //uu mapReg(m, &i->Ain.AvxLdSt.reg);
1996 //uu mapRegs_AMD64AMode(m, i->Ain.AvxLdSt.addr);
1998 //uu case Ain_AvxReRg:
1999 //uu mapReg(m, &i->Ain.AvxReRg.src);
2000 //uu mapReg(m, &i->Ain.AvxReRg.dst);
2003 /* We expect both amodes only to mention %rbp, so this is in
2004 fact pointless, since %rbp isn't allocatable, but anyway.. */
2005 mapRegs_AMD64AMode(m
, i
->Ain
.EvCheck
.amCounter
);
2006 mapRegs_AMD64AMode(m
, i
->Ain
.EvCheck
.amFailAddr
);
2009 /* hardwires r11 -- nothing to modify. */
2012 ppAMD64Instr(i
, mode64
);
2013 vpanic("mapRegs_AMD64Instr");
2017 /* Generate amd64 spill/reload instructions under the direction of the
2018 register allocator. Note it's critical these don't write the
2021 void genSpill_AMD64 ( /*OUT*/HInstr
** i1
, /*OUT*/HInstr
** i2
,
2022 HReg rreg
, Int offsetB
, Bool mode64
)
2025 vassert(offsetB
>= 0);
2026 vassert(!hregIsVirtual(rreg
));
2027 vassert(mode64
== True
);
2029 am
= AMD64AMode_IR(offsetB
, hregAMD64_RBP());
2030 switch (hregClass(rreg
)) {
2032 *i1
= AMD64Instr_Alu64M ( Aalu_MOV
, AMD64RI_Reg(rreg
), am
);
2035 *i1
= AMD64Instr_SseLdSt ( False
/*store*/, 16, rreg
, am
);
2038 ppHRegClass(hregClass(rreg
));
2039 vpanic("genSpill_AMD64: unimplemented regclass");
2043 void genReload_AMD64 ( /*OUT*/HInstr
** i1
, /*OUT*/HInstr
** i2
,
2044 HReg rreg
, Int offsetB
, Bool mode64
)
2047 vassert(offsetB
>= 0);
2048 vassert(!hregIsVirtual(rreg
));
2049 vassert(mode64
== True
);
2051 am
= AMD64AMode_IR(offsetB
, hregAMD64_RBP());
2052 switch (hregClass(rreg
)) {
2054 *i1
= AMD64Instr_Alu64R ( Aalu_MOV
, AMD64RMI_Mem(am
), rreg
);
2057 *i1
= AMD64Instr_SseLdSt ( True
/*load*/, 16, rreg
, am
);
2060 ppHRegClass(hregClass(rreg
));
2061 vpanic("genReload_AMD64: unimplemented regclass");
2065 AMD64Instr
* genMove_AMD64(HReg from
, HReg to
, Bool mode64
)
2067 switch (hregClass(from
)) {
2069 return AMD64Instr_Alu64R(Aalu_MOV
, AMD64RMI_Reg(from
), to
);
2071 return AMD64Instr_SseReRg(Asse_MOV
, from
, to
);
2073 ppHRegClass(hregClass(from
));
2074 vpanic("genMove_AMD64: unimplemented regclass");
2078 AMD64Instr
* directReload_AMD64( AMD64Instr
* i
, HReg vreg
, Short spill_off
)
2080 vassert(spill_off
>= 0 && spill_off
< 10000); /* let's say */
2082 /* Deal with form: src=RMI_Reg, dst=Reg where src == vreg
2083 Convert to: src=RMI_Mem, dst=Reg
2085 if (i
->tag
== Ain_Alu64R
2086 && (i
->Ain
.Alu64R
.op
== Aalu_MOV
|| i
->Ain
.Alu64R
.op
== Aalu_OR
2087 || i
->Ain
.Alu64R
.op
== Aalu_XOR
)
2088 && i
->Ain
.Alu64R
.src
->tag
== Armi_Reg
2089 && sameHReg(i
->Ain
.Alu64R
.src
->Armi
.Reg
.reg
, vreg
)) {
2090 vassert(! sameHReg(i
->Ain
.Alu64R
.dst
, vreg
));
2091 return AMD64Instr_Alu64R(
2093 AMD64RMI_Mem( AMD64AMode_IR( spill_off
, hregAMD64_RBP())),
2098 /* Deal with form: src=RMI_Imm, dst=Reg where dst == vreg
2099 Convert to: src=RI_Imm, dst=Mem
2101 if (i
->tag
== Ain_Alu64R
2102 && (i
->Ain
.Alu64R
.op
== Aalu_CMP
)
2103 && i
->Ain
.Alu64R
.src
->tag
== Armi_Imm
2104 && sameHReg(i
->Ain
.Alu64R
.dst
, vreg
)) {
2105 return AMD64Instr_Alu64M(
2107 AMD64RI_Imm( i
->Ain
.Alu64R
.src
->Armi
.Imm
.imm32
),
2108 AMD64AMode_IR( spill_off
, hregAMD64_RBP())
2116 /* --------- The amd64 assembler (bleh.) --------- */
2118 /* Produce the low three bits of an integer register number. */
2119 inline static UInt
iregEnc210 ( HReg r
)
2122 vassert(hregClass(r
) == HRcInt64
);
2123 vassert(!hregIsVirtual(r
));
2124 n
= hregEncoding(r
);
2129 /* Produce bit 3 of an integer register number. */
2130 inline static UInt
iregEnc3 ( HReg r
)
2133 vassert(hregClass(r
) == HRcInt64
);
2134 vassert(!hregIsVirtual(r
));
2135 n
= hregEncoding(r
);
2137 return (n
>> 3) & 1;
2140 /* Produce a complete 4-bit integer register number. */
2141 inline static UInt
iregEnc3210 ( HReg r
)
2144 vassert(hregClass(r
) == HRcInt64
);
2145 vassert(!hregIsVirtual(r
));
2146 n
= hregEncoding(r
);
2151 /* Produce a complete 4-bit integer register number. */
2152 inline static UInt
vregEnc3210 ( HReg r
)
2155 vassert(hregClass(r
) == HRcVec128
);
2156 vassert(!hregIsVirtual(r
));
2157 n
= hregEncoding(r
);
2162 inline static UChar
mkModRegRM ( UInt mod
, UInt reg
, UInt regmem
)
2165 vassert((reg
|regmem
) < 8);
2166 return (UChar
)( ((mod
& 3) << 6) | ((reg
& 7) << 3) | (regmem
& 7) );
2169 inline static UChar
mkSIB ( UInt shift
, UInt regindex
, UInt regbase
)
2172 vassert((regindex
|regbase
) < 8);
2173 return (UChar
)( ((shift
& 3) << 6) | ((regindex
& 7) << 3) | (regbase
& 7) );
2176 static UChar
* emit32 ( UChar
* p
, UInt w32
)
2178 *p
++ = toUChar((w32
) & 0x000000FF);
2179 *p
++ = toUChar((w32
>> 8) & 0x000000FF);
2180 *p
++ = toUChar((w32
>> 16) & 0x000000FF);
2181 *p
++ = toUChar((w32
>> 24) & 0x000000FF);
2185 static UChar
* emit64 ( UChar
* p
, ULong w64
)
2187 p
= emit32(p
, toUInt(w64
& 0xFFFFFFFF));
2188 p
= emit32(p
, toUInt((w64
>> 32) & 0xFFFFFFFF));
2192 /* Does a sign-extend of the lowest 8 bits give
2193 the original number? */
2194 static Bool
fits8bits ( UInt w32
)
2197 return toBool(i32
== ((Int
)(w32
<< 24) >> 24));
2199 /* Can the lower 32 bits be signedly widened to produce the whole
2200 64-bit value? In other words, are the top 33 bits either all 0 or
2202 static Bool
fitsIn32Bits ( ULong x
)
2207 return toBool(x
== y1
);
2211 /* Forming mod-reg-rm bytes and scale-index-base bytes.
2213 greg, 0(ereg) | ereg is not any of: RSP RBP R12 R13
2216 greg, d8(ereg) | ereg is neither of: RSP R12
2219 greg, d32(ereg) | ereg is neither of: RSP R12
2222 greg, d8(ereg) | ereg is either: RSP R12
2223 = 01 greg 100, 0x24, d8
2224 (lowest bit of rex distinguishes R12/RSP)
2226 greg, d32(ereg) | ereg is either: RSP R12
2227 = 10 greg 100, 0x24, d32
2228 (lowest bit of rex distinguishes R12/RSP)
2230 -----------------------------------------------
2232 greg, d8(base,index,scale)
2234 = 01 greg 100, scale index base, d8
2236 greg, d32(base,index,scale)
2238 = 10 greg 100, scale index base, d32
2240 static UChar
* doAMode_M__wrk ( UChar
* p
, UInt gregEnc3210
, AMD64AMode
* am
)
2242 UInt gregEnc210
= gregEnc3210
& 7;
2243 if (am
->tag
== Aam_IR
) {
2244 if (am
->Aam
.IR
.imm
== 0
2245 && ! sameHReg(am
->Aam
.IR
.reg
, hregAMD64_RSP())
2246 && ! sameHReg(am
->Aam
.IR
.reg
, hregAMD64_RBP())
2247 && ! sameHReg(am
->Aam
.IR
.reg
, hregAMD64_R12())
2248 && ! sameHReg(am
->Aam
.IR
.reg
, hregAMD64_R13())
2250 *p
++ = mkModRegRM(0, gregEnc210
, iregEnc210(am
->Aam
.IR
.reg
));
2253 if (fits8bits(am
->Aam
.IR
.imm
)
2254 && ! sameHReg(am
->Aam
.IR
.reg
, hregAMD64_RSP())
2255 && ! sameHReg(am
->Aam
.IR
.reg
, hregAMD64_R12())
2257 *p
++ = mkModRegRM(1, gregEnc210
, iregEnc210(am
->Aam
.IR
.reg
));
2258 *p
++ = toUChar(am
->Aam
.IR
.imm
& 0xFF);
2261 if (! sameHReg(am
->Aam
.IR
.reg
, hregAMD64_RSP())
2262 && ! sameHReg(am
->Aam
.IR
.reg
, hregAMD64_R12())
2264 *p
++ = mkModRegRM(2, gregEnc210
, iregEnc210(am
->Aam
.IR
.reg
));
2265 p
= emit32(p
, am
->Aam
.IR
.imm
);
2268 if ((sameHReg(am
->Aam
.IR
.reg
, hregAMD64_RSP())
2269 || sameHReg(am
->Aam
.IR
.reg
, hregAMD64_R12()))
2270 && fits8bits(am
->Aam
.IR
.imm
)) {
2271 *p
++ = mkModRegRM(1, gregEnc210
, 4);
2273 *p
++ = toUChar(am
->Aam
.IR
.imm
& 0xFF);
2276 if (/* (sameHReg(am->Aam.IR.reg, hregAMD64_RSP())
2277 || wait for test case for RSP case */
2278 sameHReg(am
->Aam
.IR
.reg
, hregAMD64_R12())) {
2279 *p
++ = mkModRegRM(2, gregEnc210
, 4);
2281 p
= emit32(p
, am
->Aam
.IR
.imm
);
2285 vpanic("doAMode_M: can't emit amode IR");
2288 if (am
->tag
== Aam_IRRS
) {
2289 if (fits8bits(am
->Aam
.IRRS
.imm
)
2290 && ! sameHReg(am
->Aam
.IRRS
.index
, hregAMD64_RSP())) {
2291 *p
++ = mkModRegRM(1, gregEnc210
, 4);
2292 *p
++ = mkSIB(am
->Aam
.IRRS
.shift
, iregEnc210(am
->Aam
.IRRS
.index
),
2293 iregEnc210(am
->Aam
.IRRS
.base
));
2294 *p
++ = toUChar(am
->Aam
.IRRS
.imm
& 0xFF);
2297 if (! sameHReg(am
->Aam
.IRRS
.index
, hregAMD64_RSP())) {
2298 *p
++ = mkModRegRM(2, gregEnc210
, 4);
2299 *p
++ = mkSIB(am
->Aam
.IRRS
.shift
, iregEnc210(am
->Aam
.IRRS
.index
),
2300 iregEnc210(am
->Aam
.IRRS
.base
));
2301 p
= emit32(p
, am
->Aam
.IRRS
.imm
);
2305 vpanic("doAMode_M: can't emit amode IRRS");
2308 vpanic("doAMode_M: unknown amode");
2312 static UChar
* doAMode_M ( UChar
* p
, HReg greg
, AMD64AMode
* am
)
2314 return doAMode_M__wrk(p
, iregEnc3210(greg
), am
);
2317 static UChar
* doAMode_M_enc ( UChar
* p
, UInt gregEnc3210
, AMD64AMode
* am
)
2319 vassert(gregEnc3210
< 16);
2320 return doAMode_M__wrk(p
, gregEnc3210
, am
);
2324 /* Emit a mod-reg-rm byte when the rm bit denotes a reg. */
2326 static UChar
* doAMode_R__wrk ( UChar
* p
, UInt gregEnc3210
, UInt eregEnc3210
)
2328 *p
++ = mkModRegRM(3, gregEnc3210
& 7, eregEnc3210
& 7);
2332 static UChar
* doAMode_R ( UChar
* p
, HReg greg
, HReg ereg
)
2334 return doAMode_R__wrk(p
, iregEnc3210(greg
), iregEnc3210(ereg
));
2337 static UChar
* doAMode_R_enc_reg ( UChar
* p
, UInt gregEnc3210
, HReg ereg
)
2339 vassert(gregEnc3210
< 16);
2340 return doAMode_R__wrk(p
, gregEnc3210
, iregEnc3210(ereg
));
2343 static UChar
* doAMode_R_reg_enc ( UChar
* p
, HReg greg
, UInt eregEnc3210
)
2345 vassert(eregEnc3210
< 16);
2346 return doAMode_R__wrk(p
, iregEnc3210(greg
), eregEnc3210
);
2349 static UChar
* doAMode_R_enc_enc ( UChar
* p
, UInt gregEnc3210
, UInt eregEnc3210
)
2351 vassert( (gregEnc3210
|eregEnc3210
) < 16);
2352 return doAMode_R__wrk(p
, gregEnc3210
, eregEnc3210
);
2356 /* Clear the W bit on a REX byte, thereby changing the operand size
2357 back to whatever that instruction's default operand size is. */
2358 static inline UChar
clearWBit ( UChar rex
)
2360 return rex
& ~(1<<3);
2363 static inline UChar
setWBit ( UChar rex
)
2365 return rex
| (1<<3);
2369 /* Make up a REX byte, with W=1 (size=64), for a (greg,amode) pair. */
2370 inline static UChar
rexAMode_M__wrk ( UInt gregEnc3210
, AMD64AMode
* am
)
2372 if (am
->tag
== Aam_IR
) {
2373 UChar W
= 1; /* we want 64-bit mode */
2374 UChar R
= (gregEnc3210
>> 3) & 1;
2375 UChar X
= 0; /* not relevant */
2376 UChar B
= iregEnc3(am
->Aam
.IR
.reg
);
2377 return 0x40 + ((W
<< 3) | (R
<< 2) | (X
<< 1) | (B
<< 0));
2379 if (am
->tag
== Aam_IRRS
) {
2380 UChar W
= 1; /* we want 64-bit mode */
2381 UChar R
= (gregEnc3210
>> 3) & 1;
2382 UChar X
= iregEnc3(am
->Aam
.IRRS
.index
);
2383 UChar B
= iregEnc3(am
->Aam
.IRRS
.base
);
2384 return 0x40 + ((W
<< 3) | (R
<< 2) | (X
<< 1) | (B
<< 0));
2387 return 0; /*NOTREACHED*/
2390 static UChar
rexAMode_M ( HReg greg
, AMD64AMode
* am
)
2392 return rexAMode_M__wrk(iregEnc3210(greg
), am
);
2395 static UChar
rexAMode_M_enc ( UInt gregEnc3210
, AMD64AMode
* am
)
2397 vassert(gregEnc3210
< 16);
2398 return rexAMode_M__wrk(gregEnc3210
, am
);
2402 /* Make up a REX byte, with W=1 (size=64), for a (greg,ereg) pair. */
2403 inline static UChar
rexAMode_R__wrk ( UInt gregEnc3210
, UInt eregEnc3210
)
2405 UChar W
= 1; /* we want 64-bit mode */
2406 UChar R
= (gregEnc3210
>> 3) & 1;
2407 UChar X
= 0; /* not relevant */
2408 UChar B
= (eregEnc3210
>> 3) & 1;
2409 return 0x40 + ((W
<< 3) | (R
<< 2) | (X
<< 1) | (B
<< 0));
2412 static UChar
rexAMode_R ( HReg greg
, HReg ereg
)
2414 return rexAMode_R__wrk(iregEnc3210(greg
), iregEnc3210(ereg
));
2417 static UChar
rexAMode_R_enc_reg ( UInt gregEnc3210
, HReg ereg
)
2419 vassert(gregEnc3210
< 16);
2420 return rexAMode_R__wrk(gregEnc3210
, iregEnc3210(ereg
));
2423 static UChar
rexAMode_R_reg_enc ( HReg greg
, UInt eregEnc3210
)
2425 vassert(eregEnc3210
< 16);
2426 return rexAMode_R__wrk(iregEnc3210(greg
), eregEnc3210
);
2429 static UChar
rexAMode_R_enc_enc ( UInt gregEnc3210
, UInt eregEnc3210
)
2431 vassert((gregEnc3210
|eregEnc3210
) < 16);
2432 return rexAMode_R__wrk(gregEnc3210
, eregEnc3210
);
2436 //uu /* May 2012: this VEX prefix stuff is currently unused, but has
2437 //uu verified correct (I reckon). Certainly it has been known to
2438 //uu produce correct VEX prefixes during testing. */
2440 //uu /* Assemble a 2 or 3 byte VEX prefix from parts. rexR, rexX, rexB and
2441 //uu notVvvvv need to be not-ed before packing. mmmmm, rexW, L and pp go
2442 //uu in verbatim. There's no range checking on the bits. */
2443 //uu static UInt packVexPrefix ( UInt rexR, UInt rexX, UInt rexB,
2444 //uu UInt mmmmm, UInt rexW, UInt notVvvv,
2445 //uu UInt L, UInt pp )
2447 //uu UChar byte0 = 0;
2448 //uu UChar byte1 = 0;
2449 //uu UChar byte2 = 0;
2450 //uu if (rexX == 0 && rexB == 0 && mmmmm == 1 && rexW == 0) {
2451 //uu /* 2 byte encoding is possible. */
2453 //uu byte1 = ((rexR ^ 1) << 7) | ((notVvvv ^ 0xF) << 3)
2454 //uu | (L << 2) | pp;
2456 //uu /* 3 byte encoding is needed. */
2458 //uu byte1 = ((rexR ^ 1) << 7) | ((rexX ^ 1) << 6)
2459 //uu | ((rexB ^ 1) << 5) | mmmmm;
2460 //uu byte2 = (rexW << 7) | ((notVvvv ^ 0xF) << 3) | (L << 2) | pp;
2462 //uu return (((UInt)byte2) << 16) | (((UInt)byte1) << 8) | ((UInt)byte0);
2465 //uu /* Make up a VEX prefix for a (greg,amode) pair. First byte in bits
2466 //uu 7:0 of result, second in 15:8, third (for a 3 byte prefix) in
2467 //uu 23:16. Has m-mmmm set to indicate a prefix of 0F, pp set to
2468 //uu indicate no SIMD prefix, W=0 (ignore), L=1 (size=256), and
2469 //uu vvvv=1111 (unused 3rd reg). */
2470 //uu static UInt vexAMode_M ( HReg greg, AMD64AMode* am )
2472 //uu UChar L = 1; /* size = 256 */
2473 //uu UChar pp = 0; /* no SIMD prefix */
2474 //uu UChar mmmmm = 1; /* 0F */
2475 //uu UChar notVvvv = 0; /* unused */
2476 //uu UChar rexW = 0;
2477 //uu UChar rexR = 0;
2478 //uu UChar rexX = 0;
2479 //uu UChar rexB = 0;
2480 //uu /* Same logic as in rexAMode_M. */
2481 //uu if (am->tag == Aam_IR) {
2482 //uu rexR = iregEnc3(greg);
2483 //uu rexX = 0; /* not relevant */
2484 //uu rexB = iregEnc3(am->Aam.IR.reg);
2486 //uu else if (am->tag == Aam_IRRS) {
2487 //uu rexR = iregEnc3(greg);
2488 //uu rexX = iregEnc3(am->Aam.IRRS.index);
2489 //uu rexB = iregEnc3(am->Aam.IRRS.base);
2493 //uu return packVexPrefix( rexR, rexX, rexB, mmmmm, rexW, notVvvv, L, pp );
2496 //uu static UChar* emitVexPrefix ( UChar* p, UInt vex )
2498 //uu switch (vex & 0xFF) {
2501 //uu *p++ = (vex >> 8) & 0xFF;
2502 //uu vassert(0 == (vex >> 16));
2506 //uu *p++ = (vex >> 8) & 0xFF;
2507 //uu *p++ = (vex >> 16) & 0xFF;
2508 //uu vassert(0 == (vex >> 24));
2517 /* Emit ffree %st(N) */
2518 static UChar
* do_ffree_st ( UChar
* p
, Int n
)
2520 vassert(n
>= 0 && n
<= 7);
2522 *p
++ = toUChar(0xC0 + n
);
2526 /* Emit an instruction into buf and return the number of bytes used.
2527 Note that buf is not the insn's final place, and therefore it is
2528 imperative to emit position-independent code. If the emitted
2529 instruction was a profiler inc, set *is_profInc to True, else
2530 leave it unchanged. */
2532 Int
emit_AMD64Instr ( /*MB_MOD*/Bool
* is_profInc
,
2533 UChar
* buf
, Int nbuf
, const AMD64Instr
* i
,
2534 Bool mode64
, VexEndness endness_host
,
2535 const void* disp_cp_chain_me_to_slowEP
,
2536 const void* disp_cp_chain_me_to_fastEP
,
2537 const void* disp_cp_xindir
,
2538 const void* disp_cp_xassisted
)
2540 UInt
/*irno,*/ opc
, opc_rr
, subopc_imm
, opc_imma
, opc_cl
, opc_imm
, subopc
;
2547 vassert(nbuf
>= 64);
2548 vassert(mode64
== True
);
2550 /* vex_printf("asm "); ppAMD64Instr(i, mode64); vex_printf("\n"); */
2555 if (i
->Ain
.Imm64
.imm64
<= 0xFFFFFULL
) {
2556 /* Use the short form (load into 32 bit reg, + default
2557 widening rule) for constants under 1 million. We could
2558 use this form for the range 0 to 0x7FFFFFFF inclusive, but
2559 limit it to a smaller range for verifiability purposes. */
2560 if (1 & iregEnc3(i
->Ain
.Imm64
.dst
))
2562 *p
++ = 0xB8 + iregEnc210(i
->Ain
.Imm64
.dst
);
2563 p
= emit32(p
, (UInt
)i
->Ain
.Imm64
.imm64
);
2565 *p
++ = toUChar(0x48 + (1 & iregEnc3(i
->Ain
.Imm64
.dst
)));
2566 *p
++ = toUChar(0xB8 + iregEnc210(i
->Ain
.Imm64
.dst
));
2567 p
= emit64(p
, i
->Ain
.Imm64
.imm64
);
2572 /* Deal specially with MOV */
2573 if (i
->Ain
.Alu64R
.op
== Aalu_MOV
) {
2574 switch (i
->Ain
.Alu64R
.src
->tag
) {
2576 if (0 == (i
->Ain
.Alu64R
.src
->Armi
.Imm
.imm32
& ~0xFFFFF)) {
2577 /* Actually we could use this form for constants in
2578 the range 0 through 0x7FFFFFFF inclusive, but
2579 limit it to a small range for verifiability
2581 /* Generate "movl $imm32, 32-bit-register" and let
2582 the default zero-extend rule cause the upper half
2583 of the dst to be zeroed out too. This saves 1
2584 and sometimes 2 bytes compared to the more
2585 obvious encoding in the 'else' branch. */
2586 if (1 & iregEnc3(i
->Ain
.Alu64R
.dst
))
2588 *p
++ = 0xB8 + iregEnc210(i
->Ain
.Alu64R
.dst
);
2589 p
= emit32(p
, i
->Ain
.Alu64R
.src
->Armi
.Imm
.imm32
);
2591 *p
++ = toUChar(0x48 + (1 & iregEnc3(i
->Ain
.Alu64R
.dst
)));
2593 *p
++ = toUChar(0xC0 + iregEnc210(i
->Ain
.Alu64R
.dst
));
2594 p
= emit32(p
, i
->Ain
.Alu64R
.src
->Armi
.Imm
.imm32
);
2598 *p
++ = rexAMode_R( i
->Ain
.Alu64R
.src
->Armi
.Reg
.reg
,
2599 i
->Ain
.Alu64R
.dst
);
2601 p
= doAMode_R(p
, i
->Ain
.Alu64R
.src
->Armi
.Reg
.reg
,
2605 *p
++ = rexAMode_M(i
->Ain
.Alu64R
.dst
,
2606 i
->Ain
.Alu64R
.src
->Armi
.Mem
.am
);
2608 p
= doAMode_M(p
, i
->Ain
.Alu64R
.dst
,
2609 i
->Ain
.Alu64R
.src
->Armi
.Mem
.am
);
2616 if (i
->Ain
.Alu64R
.op
== Aalu_MUL
) {
2617 switch (i
->Ain
.Alu64R
.src
->tag
) {
2619 *p
++ = rexAMode_R( i
->Ain
.Alu64R
.dst
,
2620 i
->Ain
.Alu64R
.src
->Armi
.Reg
.reg
);
2623 p
= doAMode_R(p
, i
->Ain
.Alu64R
.dst
,
2624 i
->Ain
.Alu64R
.src
->Armi
.Reg
.reg
);
2627 *p
++ = rexAMode_M(i
->Ain
.Alu64R
.dst
,
2628 i
->Ain
.Alu64R
.src
->Armi
.Mem
.am
);
2631 p
= doAMode_M(p
, i
->Ain
.Alu64R
.dst
,
2632 i
->Ain
.Alu64R
.src
->Armi
.Mem
.am
);
2635 if (fits8bits(i
->Ain
.Alu64R
.src
->Armi
.Imm
.imm32
)) {
2636 *p
++ = rexAMode_R(i
->Ain
.Alu64R
.dst
, i
->Ain
.Alu64R
.dst
);
2638 p
= doAMode_R(p
, i
->Ain
.Alu64R
.dst
, i
->Ain
.Alu64R
.dst
);
2639 *p
++ = toUChar(0xFF & i
->Ain
.Alu64R
.src
->Armi
.Imm
.imm32
);
2641 *p
++ = rexAMode_R(i
->Ain
.Alu64R
.dst
, i
->Ain
.Alu64R
.dst
);
2643 p
= doAMode_R(p
, i
->Ain
.Alu64R
.dst
, i
->Ain
.Alu64R
.dst
);
2644 p
= emit32(p
, i
->Ain
.Alu64R
.src
->Armi
.Imm
.imm32
);
2651 /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP */
2652 opc
= opc_rr
= subopc_imm
= opc_imma
= 0;
2653 switch (i
->Ain
.Alu64R
.op
) {
2654 case Aalu_ADC
: opc
= 0x13; opc_rr
= 0x11;
2655 subopc_imm
= 2; opc_imma
= 0x15; break;
2656 case Aalu_ADD
: opc
= 0x03; opc_rr
= 0x01;
2657 subopc_imm
= 0; opc_imma
= 0x05; break;
2658 case Aalu_SUB
: opc
= 0x2B; opc_rr
= 0x29;
2659 subopc_imm
= 5; opc_imma
= 0x2D; break;
2660 case Aalu_SBB
: opc
= 0x1B; opc_rr
= 0x19;
2661 subopc_imm
= 3; opc_imma
= 0x1D; break;
2662 case Aalu_AND
: opc
= 0x23; opc_rr
= 0x21;
2663 subopc_imm
= 4; opc_imma
= 0x25; break;
2664 case Aalu_XOR
: opc
= 0x33; opc_rr
= 0x31;
2665 subopc_imm
= 6; opc_imma
= 0x35; break;
2666 case Aalu_OR
: opc
= 0x0B; opc_rr
= 0x09;
2667 subopc_imm
= 1; opc_imma
= 0x0D; break;
2668 case Aalu_CMP
: opc
= 0x3B; opc_rr
= 0x39;
2669 subopc_imm
= 7; opc_imma
= 0x3D; break;
2672 switch (i
->Ain
.Alu64R
.src
->tag
) {
2674 if (sameHReg(i
->Ain
.Alu64R
.dst
, hregAMD64_RAX())
2675 && !fits8bits(i
->Ain
.Alu64R
.src
->Armi
.Imm
.imm32
)) {
2676 goto bad
; /* FIXME: awaiting test case */
2677 *p
++ = toUChar(opc_imma
);
2678 p
= emit32(p
, i
->Ain
.Alu64R
.src
->Armi
.Imm
.imm32
);
2680 if (fits8bits(i
->Ain
.Alu64R
.src
->Armi
.Imm
.imm32
)) {
2681 *p
++ = rexAMode_R_enc_reg( 0, i
->Ain
.Alu64R
.dst
);
2683 p
= doAMode_R_enc_reg(p
, subopc_imm
, i
->Ain
.Alu64R
.dst
);
2684 *p
++ = toUChar(0xFF & i
->Ain
.Alu64R
.src
->Armi
.Imm
.imm32
);
2686 *p
++ = rexAMode_R_enc_reg( 0, i
->Ain
.Alu64R
.dst
);
2688 p
= doAMode_R_enc_reg(p
, subopc_imm
, i
->Ain
.Alu64R
.dst
);
2689 p
= emit32(p
, i
->Ain
.Alu64R
.src
->Armi
.Imm
.imm32
);
2693 *p
++ = rexAMode_R( i
->Ain
.Alu64R
.src
->Armi
.Reg
.reg
,
2695 *p
++ = toUChar(opc_rr
);
2696 p
= doAMode_R(p
, i
->Ain
.Alu64R
.src
->Armi
.Reg
.reg
,
2700 *p
++ = rexAMode_M( i
->Ain
.Alu64R
.dst
,
2701 i
->Ain
.Alu64R
.src
->Armi
.Mem
.am
);
2702 *p
++ = toUChar(opc
);
2703 p
= doAMode_M(p
, i
->Ain
.Alu64R
.dst
,
2704 i
->Ain
.Alu64R
.src
->Armi
.Mem
.am
);
2712 /* Deal specially with MOV */
2713 if (i
->Ain
.Alu64M
.op
== Aalu_MOV
) {
2714 switch (i
->Ain
.Alu64M
.src
->tag
) {
2716 *p
++ = rexAMode_M(i
->Ain
.Alu64M
.src
->Ari
.Reg
.reg
,
2719 p
= doAMode_M(p
, i
->Ain
.Alu64M
.src
->Ari
.Reg
.reg
,
2723 *p
++ = rexAMode_M_enc(0, i
->Ain
.Alu64M
.dst
);
2725 p
= doAMode_M_enc(p
, 0, i
->Ain
.Alu64M
.dst
);
2726 p
= emit32(p
, i
->Ain
.Alu64M
.src
->Ari
.Imm
.imm32
);
2732 /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP. MUL is not
2733 allowed here. (This is derived from the x86 version of same). */
2734 opc
= subopc_imm
= opc_imma
= 0;
2735 switch (i
->Ain
.Alu64M
.op
) {
2736 case Aalu_CMP
: opc
= 0x39; subopc_imm
= 7; break;
2739 switch (i
->Ain
.Alu64M
.src
->tag
) {
2742 *p++ = toUChar(opc);
2743 p = doAMode_M(p, i->Xin.Alu32M.src->Xri.Reg.reg,
2748 if (fits8bits(i
->Ain
.Alu64M
.src
->Ari
.Imm
.imm32
)) {
2749 *p
++ = rexAMode_M_enc(subopc_imm
, i
->Ain
.Alu64M
.dst
);
2751 p
= doAMode_M_enc(p
, subopc_imm
, i
->Ain
.Alu64M
.dst
);
2752 *p
++ = toUChar(0xFF & i
->Ain
.Alu64M
.src
->Ari
.Imm
.imm32
);
2755 *p
++ = rexAMode_M_enc(subopc_imm
, i
->Ain
.Alu64M
.dst
);
2757 p
= doAMode_M_enc(p
, subopc_imm
, i
->Ain
.Alu64M
.dst
);
2758 p
= emit32(p
, i
->Ain
.Alu64M
.src
->Ari
.Imm
.imm32
);
2768 opc_cl
= opc_imm
= subopc
= 0;
2769 switch (i
->Ain
.Sh64
.op
) {
2770 case Ash_SHR
: opc_cl
= 0xD3; opc_imm
= 0xC1; subopc
= 5; break;
2771 case Ash_SAR
: opc_cl
= 0xD3; opc_imm
= 0xC1; subopc
= 7; break;
2772 case Ash_SHL
: opc_cl
= 0xD3; opc_imm
= 0xC1; subopc
= 4; break;
2775 if (i
->Ain
.Sh64
.src
== 0) {
2776 *p
++ = rexAMode_R_enc_reg(0, i
->Ain
.Sh64
.dst
);
2777 *p
++ = toUChar(opc_cl
);
2778 p
= doAMode_R_enc_reg(p
, subopc
, i
->Ain
.Sh64
.dst
);
2781 *p
++ = rexAMode_R_enc_reg(0, i
->Ain
.Sh64
.dst
);
2782 *p
++ = toUChar(opc_imm
);
2783 p
= doAMode_R_enc_reg(p
, subopc
, i
->Ain
.Sh64
.dst
);
2784 *p
++ = (UChar
)(i
->Ain
.Sh64
.src
);
2790 opc_cl
= opc_imm
= subopc
= 0;
2791 switch (i
->Ain
.Sh32
.op
) {
2792 case Ash_SHR
: opc_cl
= 0xD3; opc_imm
= 0xC1; subopc
= 5; break;
2793 case Ash_SAR
: opc_cl
= 0xD3; opc_imm
= 0xC1; subopc
= 7; break;
2794 case Ash_SHL
: opc_cl
= 0xD3; opc_imm
= 0xC1; subopc
= 4; break;
2797 if (i
->Ain
.Sh32
.src
== 0) {
2798 rex
= clearWBit( rexAMode_R_enc_reg(0, i
->Ain
.Sh32
.dst
) );
2799 if (rex
!= 0x40) *p
++ = rex
;
2800 *p
++ = toUChar(opc_cl
);
2801 p
= doAMode_R_enc_reg(p
, subopc
, i
->Ain
.Sh32
.dst
);
2804 rex
= clearWBit( rexAMode_R_enc_reg(0, i
->Ain
.Sh32
.dst
) );
2805 if (rex
!= 0x40) *p
++ = rex
;
2806 *p
++ = toUChar(opc_imm
);
2807 p
= doAMode_R_enc_reg(p
, subopc
, i
->Ain
.Sh32
.dst
);
2808 *p
++ = (UChar
)(i
->Ain
.Sh32
.src
);
2814 /* testq sign-extend($imm32), %reg */
2815 *p
++ = rexAMode_R_enc_reg(0, i
->Ain
.Test64
.dst
);
2817 p
= doAMode_R_enc_reg(p
, 0, i
->Ain
.Test64
.dst
);
2818 p
= emit32(p
, i
->Ain
.Test64
.imm32
);
2822 if (i
->Ain
.Unary64
.op
== Aun_NOT
) {
2823 *p
++ = rexAMode_R_enc_reg(0, i
->Ain
.Unary64
.dst
);
2825 p
= doAMode_R_enc_reg(p
, 2, i
->Ain
.Unary64
.dst
);
2828 if (i
->Ain
.Unary64
.op
== Aun_NEG
) {
2829 *p
++ = rexAMode_R_enc_reg(0, i
->Ain
.Unary64
.dst
);
2831 p
= doAMode_R_enc_reg(p
, 3, i
->Ain
.Unary64
.dst
);
2837 *p
++ = rexAMode_M(i
->Ain
.Lea64
.dst
, i
->Ain
.Lea64
.am
);
2839 p
= doAMode_M(p
, i
->Ain
.Lea64
.dst
, i
->Ain
.Lea64
.am
);
2843 /* ADD/SUB/AND/OR/XOR/CMP */
2844 opc
= opc_rr
= subopc_imm
= opc_imma
= 0;
2845 switch (i
->Ain
.Alu32R
.op
) {
2846 case Aalu_ADD
: opc
= 0x03; opc_rr
= 0x01;
2847 subopc_imm
= 0; opc_imma
= 0x05; break;
2848 case Aalu_SUB
: opc
= 0x2B; opc_rr
= 0x29;
2849 subopc_imm
= 5; opc_imma
= 0x2D; break;
2850 case Aalu_AND
: opc
= 0x23; opc_rr
= 0x21;
2851 subopc_imm
= 4; opc_imma
= 0x25; break;
2852 case Aalu_XOR
: opc
= 0x33; opc_rr
= 0x31;
2853 subopc_imm
= 6; opc_imma
= 0x35; break;
2854 case Aalu_OR
: opc
= 0x0B; opc_rr
= 0x09;
2855 subopc_imm
= 1; opc_imma
= 0x0D; break;
2856 case Aalu_CMP
: opc
= 0x3B; opc_rr
= 0x39;
2857 subopc_imm
= 7; opc_imma
= 0x3D; break;
2860 switch (i
->Ain
.Alu32R
.src
->tag
) {
2862 if (sameHReg(i
->Ain
.Alu32R
.dst
, hregAMD64_RAX())
2863 && !fits8bits(i
->Ain
.Alu32R
.src
->Armi
.Imm
.imm32
)) {
2864 goto bad
; /* FIXME: awaiting test case */
2865 *p
++ = toUChar(opc_imma
);
2866 p
= emit32(p
, i
->Ain
.Alu32R
.src
->Armi
.Imm
.imm32
);
2868 if (fits8bits(i
->Ain
.Alu32R
.src
->Armi
.Imm
.imm32
)) {
2869 rex
= clearWBit( rexAMode_R_enc_reg( 0, i
->Ain
.Alu32R
.dst
) );
2870 if (rex
!= 0x40) *p
++ = rex
;
2872 p
= doAMode_R_enc_reg(p
, subopc_imm
, i
->Ain
.Alu32R
.dst
);
2873 *p
++ = toUChar(0xFF & i
->Ain
.Alu32R
.src
->Armi
.Imm
.imm32
);
2875 rex
= clearWBit( rexAMode_R_enc_reg( 0, i
->Ain
.Alu32R
.dst
) );
2876 if (rex
!= 0x40) *p
++ = rex
;
2878 p
= doAMode_R_enc_reg(p
, subopc_imm
, i
->Ain
.Alu32R
.dst
);
2879 p
= emit32(p
, i
->Ain
.Alu32R
.src
->Armi
.Imm
.imm32
);
2884 rexAMode_R( i
->Ain
.Alu32R
.src
->Armi
.Reg
.reg
,
2885 i
->Ain
.Alu32R
.dst
) );
2886 if (rex
!= 0x40) *p
++ = rex
;
2887 *p
++ = toUChar(opc_rr
);
2888 p
= doAMode_R(p
, i
->Ain
.Alu32R
.src
->Armi
.Reg
.reg
,
2893 rexAMode_M( i
->Ain
.Alu32R
.dst
,
2894 i
->Ain
.Alu32R
.src
->Armi
.Mem
.am
) );
2895 if (rex
!= 0x40) *p
++ = rex
;
2896 *p
++ = toUChar(opc
);
2897 p
= doAMode_M(p
, i
->Ain
.Alu32R
.dst
,
2898 i
->Ain
.Alu32R
.src
->Armi
.Mem
.am
);
2906 subopc
= i
->Ain
.MulL
.syned
? 5 : 4;
2907 switch (i
->Ain
.MulL
.src
->tag
) {
2909 *p
++ = rexAMode_M_enc(0, i
->Ain
.MulL
.src
->Arm
.Mem
.am
);
2911 p
= doAMode_M_enc(p
, subopc
, i
->Ain
.MulL
.src
->Arm
.Mem
.am
);
2914 *p
++ = rexAMode_R_enc_reg(0, i
->Ain
.MulL
.src
->Arm
.Reg
.reg
);
2916 p
= doAMode_R_enc_reg(p
, subopc
, i
->Ain
.MulL
.src
->Arm
.Reg
.reg
);
2924 subopc
= i
->Ain
.Div
.syned
? 7 : 6;
2925 if (i
->Ain
.Div
.sz
== 4) {
2926 switch (i
->Ain
.Div
.src
->tag
) {
2931 p
= doAMode_M_enc(p
, subopc
, i
->Ain
.Div
.src
->Arm
.Mem
.am
);
2935 rexAMode_R_enc_reg(0, i
->Ain
.Div
.src
->Arm
.Reg
.reg
));
2937 p
= doAMode_R_enc_reg(p
, subopc
, i
->Ain
.Div
.src
->Arm
.Reg
.reg
);
2943 if (i
->Ain
.Div
.sz
== 8) {
2944 switch (i
->Ain
.Div
.src
->tag
) {
2946 *p
++ = rexAMode_M_enc(0, i
->Ain
.Div
.src
->Arm
.Mem
.am
);
2948 p
= doAMode_M_enc(p
, subopc
, i
->Ain
.Div
.src
->Arm
.Mem
.am
);
2951 *p
++ = rexAMode_R_enc_reg(0, i
->Ain
.Div
.src
->Arm
.Reg
.reg
);
2953 p
= doAMode_R_enc_reg(p
, subopc
, i
->Ain
.Div
.src
->Arm
.Reg
.reg
);
2962 switch (i
->Ain
.Push
.src
->tag
) {
2965 rexAMode_M_enc(0, i
->Ain
.Push
.src
->Armi
.Mem
.am
));
2967 p
= doAMode_M_enc(p
, 6, i
->Ain
.Push
.src
->Armi
.Mem
.am
);
2971 p
= emit32(p
, i
->Ain
.Push
.src
->Armi
.Imm
.imm32
);
2974 *p
++ = toUChar(0x40 + (1 & iregEnc3(i
->Ain
.Push
.src
->Armi
.Reg
.reg
)));
2975 *p
++ = toUChar(0x50 + iregEnc210(i
->Ain
.Push
.src
->Armi
.Reg
.reg
));
2982 /* As per detailed comment for Ain_Call in getRegUsage_AMD64Instr
2983 above, %r11 is used as an address temporary. */
2984 /* If we don't need to do any fixup actions in the case that the
2985 call doesn't happen, just do the simple thing and emit
2986 straight-line code. This is usually the case. */
2987 if (i
->Ain
.Call
.cond
== Acc_ALWAYS
/*call always happens*/
2988 || i
->Ain
.Call
.rloc
.pri
== RLPri_None
/*no fixup action*/) {
2989 /* jump over the following two insns if the condition does
2991 Bool shortImm
= fitsIn32Bits(i
->Ain
.Call
.target
);
2992 if (i
->Ain
.Call
.cond
!= Acc_ALWAYS
) {
2993 *p
++ = toUChar(0x70 + (0xF & (i
->Ain
.Call
.cond
^ 1)));
2994 *p
++ = shortImm
? 10 : 13;
2995 /* 10 or 13 bytes in the next two insns */
2998 /* 7 bytes: movl sign-extend(imm32), %r11 */
3002 p
= emit32(p
, (UInt
)i
->Ain
.Call
.target
);
3004 /* 10 bytes: movabsq $target, %r11 */
3007 p
= emit64(p
, i
->Ain
.Call
.target
);
3009 /* 3 bytes: call *%r11 */
3015 /* Complex case. We have to generate an if-then-else diamond. */
3018 // movabsq $target, %r11
3023 // movabsq $0x5555555555555555, %rax // possibly
3024 // movq %rax, %rdx // possibly
3031 *p
++ = toUChar(0x70 + (0xF & (i
->Ain
.Call
.cond
^ 1)));
3032 *p
++ = 0; /* # of bytes to jump over; don't know how many yet. */
3034 // movabsq $target, %r11
3037 p
= emit64(p
, i
->Ain
.Call
.target
);
3045 UChar
* pPreElse
= p
;
3049 *p
++ = 0; /* # of bytes to jump over; don't know how many yet. */
3054 /* Do the 'else' actions */
3055 switch (i
->Ain
.Call
.rloc
.pri
) {
3057 // movabsq $0x5555555555555555, %rax
3058 *p
++ = 0x48; *p
++ = 0xB8; p
= emit64(p
, 0x5555555555555555ULL
);
3062 // movabsq $0x5555555555555555, %rax
3063 *p
++ = 0x48; *p
++ = 0xB8; p
= emit64(p
, 0x5555555555555555ULL
);
3065 *p
++ = 0x48; *p
++ = 0x89; *p
++ = 0xC2;
3067 case RLPri_V128SpRel
:
3068 if (i
->Ain
.Call
.rloc
.spOff
== 0) {
3069 // We could accept any |spOff| here, but that's more
3070 // hassle and the only value we're ever going to get
3071 // is zero (I believe.) Hence take the easy path :)
3072 // We need a scag register -- r11 can be it.
3073 // movabsq $0x5555555555555555, %r11
3074 *p
++ = 0x49; *p
++ = 0xBB;
3075 p
= emit64(p
, 0x5555555555555555ULL
);
3076 // movq %r11, 0(%rsp)
3077 *p
++ = 0x4C; *p
++ = 0x89; *p
++ = 0x1C; *p
++ = 0x24;
3078 // movq %r11, 8(%rsp)
3079 *p
++ = 0x4C; *p
++ = 0x89; *p
++ = 0x5C; *p
++ = 0x24;
3083 goto bad
; //ATC for all other spOff values
3084 case RLPri_V256SpRel
:
3086 case RLPri_None
: case RLPri_INVALID
: default:
3087 vassert(0); // should never get here
3093 // Fix up the branch offsets. The +2s in the offset
3094 // calculations are there because x86 requires conditional
3095 // branches to have their offset stated relative to the
3096 // instruction immediately following the branch insn. And in
3097 // both cases the branch insns are 2 bytes long.
3099 // First, the "j{!cond} else:" at pBefore.
3100 delta
= (Int
)(Long
)(pElse
- (pBefore
+ 2));
3101 vassert(delta
>= 0 && delta
< 100/*arbitrary*/);
3102 *(pBefore
+1) = (UChar
)delta
;
3104 // And secondly, the "jmp after:" at pPreElse.
3105 delta
= (Int
)(Long
)(pAfter
- (pPreElse
+ 2));
3106 vassert(delta
>= 0 && delta
< 100/*arbitrary*/);
3107 *(pPreElse
+1) = (UChar
)delta
;
3113 /* NB: what goes on here has to be very closely coordinated with the
3114 chainXDirect_AMD64 and unchainXDirect_AMD64 below. */
3115 /* We're generating chain-me requests here, so we need to be
3116 sure this is actually allowed -- no-redir translations can't
3117 use chain-me's. Hence: */
3118 vassert(disp_cp_chain_me_to_slowEP
!= NULL
);
3119 vassert(disp_cp_chain_me_to_fastEP
!= NULL
);
3121 HReg r11
= hregAMD64_R11();
3123 /* Use ptmp for backpatching conditional jumps. */
3126 /* First off, if this is conditional, create a conditional
3127 jump over the rest of it. */
3128 if (i
->Ain
.XDirect
.cond
!= Acc_ALWAYS
) {
3129 /* jmp fwds if !condition */
3130 *p
++ = toUChar(0x70 + (0xF & (i
->Ain
.XDirect
.cond
^ 1)));
3131 ptmp
= p
; /* fill in this bit later */
3132 *p
++ = 0; /* # of bytes to jump over; don't know how many yet. */
3135 /* Update the guest RIP. */
3136 if (fitsIn32Bits(i
->Ain
.XDirect
.dstGA
)) {
3137 /* use a shorter encoding */
3138 /* movl sign-extend(dstGA), %r11 */
3142 p
= emit32(p
, (UInt
)i
->Ain
.XDirect
.dstGA
);
3144 /* movabsq $dstGA, %r11 */
3147 p
= emit64(p
, i
->Ain
.XDirect
.dstGA
);
3150 /* movq %r11, amRIP */
3151 *p
++ = rexAMode_M(r11
, i
->Ain
.XDirect
.amRIP
);
3153 p
= doAMode_M(p
, r11
, i
->Ain
.XDirect
.amRIP
);
3155 /* --- FIRST PATCHABLE BYTE follows --- */
3156 /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're calling
3157 to) backs up the return address, so as to find the address of
3158 the first patchable byte. So: don't change the length of the
3159 two instructions below. */
3160 /* movabsq $disp_cp_chain_me_to_{slow,fast}EP,%r11; */
3163 const void* disp_cp_chain_me
3164 = i
->Ain
.XDirect
.toFastEP
? disp_cp_chain_me_to_fastEP
3165 : disp_cp_chain_me_to_slowEP
;
3166 p
= emit64(p
, (Addr
)disp_cp_chain_me
);
3171 /* --- END of PATCHABLE BYTES --- */
3173 /* Fix up the conditional jump, if there was one. */
3174 if (i
->Ain
.XDirect
.cond
!= Acc_ALWAYS
) {
3175 Int delta
= p
- ptmp
;
3176 vassert(delta
> 0 && delta
< 40);
3177 *ptmp
= toUChar(delta
-1);
3183 /* We're generating transfers that could lead indirectly to a
3184 chain-me, so we need to be sure this is actually allowed --
3185 no-redir translations are not allowed to reach normal
3186 translations without going through the scheduler. That means
3187 no XDirects or XIndirs out from no-redir translations.
3189 vassert(disp_cp_xindir
!= NULL
);
3191 /* Use ptmp for backpatching conditional jumps. */
3194 /* First off, if this is conditional, create a conditional
3195 jump over the rest of it. */
3196 if (i
->Ain
.XIndir
.cond
!= Acc_ALWAYS
) {
3197 /* jmp fwds if !condition */
3198 *p
++ = toUChar(0x70 + (0xF & (i
->Ain
.XIndir
.cond
^ 1)));
3199 ptmp
= p
; /* fill in this bit later */
3200 *p
++ = 0; /* # of bytes to jump over; don't know how many yet. */
3203 /* movq dstGA(a reg), amRIP -- copied from Alu64M MOV case */
3204 *p
++ = rexAMode_M(i
->Ain
.XIndir
.dstGA
, i
->Ain
.XIndir
.amRIP
);
3206 p
= doAMode_M(p
, i
->Ain
.XIndir
.dstGA
, i
->Ain
.XIndir
.amRIP
);
3208 /* get $disp_cp_xindir into %r11 */
3209 if (fitsIn32Bits((Addr
)disp_cp_xindir
)) {
3210 /* use a shorter encoding */
3211 /* movl sign-extend(disp_cp_xindir), %r11 */
3215 p
= emit32(p
, (UInt
)(Addr
)disp_cp_xindir
);
3217 /* movabsq $disp_cp_xindir, %r11 */
3220 p
= emit64(p
, (Addr
)disp_cp_xindir
);
3228 /* Fix up the conditional jump, if there was one. */
3229 if (i
->Ain
.XIndir
.cond
!= Acc_ALWAYS
) {
3230 Int delta
= p
- ptmp
;
3231 vassert(delta
> 0 && delta
< 40);
3232 *ptmp
= toUChar(delta
-1);
3237 case Ain_XAssisted
: {
3238 /* Use ptmp for backpatching conditional jumps. */
3241 /* First off, if this is conditional, create a conditional
3242 jump over the rest of it. */
3243 if (i
->Ain
.XAssisted
.cond
!= Acc_ALWAYS
) {
3244 /* jmp fwds if !condition */
3245 *p
++ = toUChar(0x70 + (0xF & (i
->Ain
.XAssisted
.cond
^ 1)));
3246 ptmp
= p
; /* fill in this bit later */
3247 *p
++ = 0; /* # of bytes to jump over; don't know how many yet. */
3250 /* movq dstGA(a reg), amRIP -- copied from Alu64M MOV case */
3251 *p
++ = rexAMode_M(i
->Ain
.XAssisted
.dstGA
, i
->Ain
.XAssisted
.amRIP
);
3253 p
= doAMode_M(p
, i
->Ain
.XAssisted
.dstGA
, i
->Ain
.XAssisted
.amRIP
);
3254 /* movl $magic_number, %ebp. Since these numbers are all small positive
3255 integers, we can get away with "movl $N, %ebp" rather than
3256 the longer "movq $N, %rbp". */
3258 switch (i
->Ain
.XAssisted
.jk
) {
3259 case Ijk_ClientReq
: trcval
= VEX_TRC_JMP_CLIENTREQ
; break;
3260 case Ijk_Sys_syscall
: trcval
= VEX_TRC_JMP_SYS_SYSCALL
; break;
3261 case Ijk_Sys_int32
: trcval
= VEX_TRC_JMP_SYS_INT32
; break;
3262 case Ijk_Sys_int210
: trcval
= VEX_TRC_JMP_SYS_INT210
; break;
3263 case Ijk_Yield
: trcval
= VEX_TRC_JMP_YIELD
; break;
3264 case Ijk_EmWarn
: trcval
= VEX_TRC_JMP_EMWARN
; break;
3265 case Ijk_MapFail
: trcval
= VEX_TRC_JMP_MAPFAIL
; break;
3266 case Ijk_NoDecode
: trcval
= VEX_TRC_JMP_NODECODE
; break;
3267 case Ijk_InvalICache
: trcval
= VEX_TRC_JMP_INVALICACHE
; break;
3268 case Ijk_NoRedir
: trcval
= VEX_TRC_JMP_NOREDIR
; break;
3269 case Ijk_SigTRAP
: trcval
= VEX_TRC_JMP_SIGTRAP
; break;
3270 case Ijk_SigSEGV
: trcval
= VEX_TRC_JMP_SIGSEGV
; break;
3271 case Ijk_Boring
: trcval
= VEX_TRC_JMP_BORING
; break;
3272 /* We don't expect to see the following being assisted. */
3277 ppIRJumpKind(i
->Ain
.XAssisted
.jk
);
3278 vpanic("emit_AMD64Instr.Ain_XAssisted: unexpected jump kind");
3280 vassert(trcval
!= 0);
3282 p
= emit32(p
, trcval
);
3283 /* movabsq $disp_assisted, %r11 */
3286 p
= emit64(p
, (Addr
)disp_cp_xassisted
);
3292 /* Fix up the conditional jump, if there was one. */
3293 if (i
->Ain
.XAssisted
.cond
!= Acc_ALWAYS
) {
3294 Int delta
= p
- ptmp
;
3295 vassert(delta
> 0 && delta
< 40);
3296 *ptmp
= toUChar(delta
-1);
3302 vassert(i
->Ain
.CMov64
.cond
!= Acc_ALWAYS
);
3303 *p
++ = rexAMode_R(i
->Ain
.CMov64
.dst
, i
->Ain
.CMov64
.src
);
3305 *p
++ = toUChar(0x40 + (0xF & i
->Ain
.CMov64
.cond
));
3306 p
= doAMode_R(p
, i
->Ain
.CMov64
.dst
, i
->Ain
.CMov64
.src
);
3310 vassert(i
->Ain
.CLoad
.cond
!= Acc_ALWAYS
);
3312 /* Only 32- or 64-bit variants are allowed. */
3313 vassert(i
->Ain
.CLoad
.szB
== 4 || i
->Ain
.CLoad
.szB
== 8);
3315 /* Use ptmp for backpatching conditional jumps. */
3318 /* jmp fwds if !condition */
3319 *p
++ = toUChar(0x70 + (0xF & (i
->Ain
.CLoad
.cond
^ 1)));
3320 ptmp
= p
; /* fill in this bit later */
3321 *p
++ = 0; /* # of bytes to jump over; don't know how many yet. */
3323 /* Now the load. Either a normal 64 bit load or a normal 32 bit
3324 load, which, by the default zero-extension rule, zeroes out
3325 the upper half of the destination, as required. */
3326 rex
= rexAMode_M(i
->Ain
.CLoad
.dst
, i
->Ain
.CLoad
.addr
);
3327 *p
++ = i
->Ain
.CLoad
.szB
== 4 ? clearWBit(rex
) : rex
;
3329 p
= doAMode_M(p
, i
->Ain
.CLoad
.dst
, i
->Ain
.CLoad
.addr
);
3331 /* Fix up the conditional branch */
3332 Int delta
= p
- ptmp
;
3333 vassert(delta
> 0 && delta
< 40);
3334 *ptmp
= toUChar(delta
-1);
3339 /* AFAICS this is identical to Ain_CLoad except that the opcode
3340 is 0x89 instead of 0x8B. */
3341 vassert(i
->Ain
.CStore
.cond
!= Acc_ALWAYS
);
3343 /* Only 32- or 64-bit variants are allowed. */
3344 vassert(i
->Ain
.CStore
.szB
== 4 || i
->Ain
.CStore
.szB
== 8);
3346 /* Use ptmp for backpatching conditional jumps. */
3349 /* jmp fwds if !condition */
3350 *p
++ = toUChar(0x70 + (0xF & (i
->Ain
.CStore
.cond
^ 1)));
3351 ptmp
= p
; /* fill in this bit later */
3352 *p
++ = 0; /* # of bytes to jump over; don't know how many yet. */
3354 /* Now the store. */
3355 rex
= rexAMode_M(i
->Ain
.CStore
.src
, i
->Ain
.CStore
.addr
);
3356 *p
++ = i
->Ain
.CStore
.szB
== 4 ? clearWBit(rex
) : rex
;
3358 p
= doAMode_M(p
, i
->Ain
.CStore
.src
, i
->Ain
.CStore
.addr
);
3360 /* Fix up the conditional branch */
3361 Int delta
= p
- ptmp
;
3362 vassert(delta
> 0 && delta
< 40);
3363 *ptmp
= toUChar(delta
-1);
3368 /* No, _don't_ ask me why the sense of the args has to be
3369 different in the S vs Z case. I don't know. */
3370 if (i
->Ain
.MovxLQ
.syned
) {
3371 /* Need REX.W = 1 here, but rexAMode_R does that for us. */
3372 *p
++ = rexAMode_R(i
->Ain
.MovxLQ
.dst
, i
->Ain
.MovxLQ
.src
);
3374 p
= doAMode_R(p
, i
->Ain
.MovxLQ
.dst
, i
->Ain
.MovxLQ
.src
);
3376 /* Produce a 32-bit reg-reg move, since the implicit
3377 zero-extend does what we want. */
3379 rexAMode_R(i
->Ain
.MovxLQ
.src
, i
->Ain
.MovxLQ
.dst
));
3381 p
= doAMode_R(p
, i
->Ain
.MovxLQ
.src
, i
->Ain
.MovxLQ
.dst
);
3386 if (i
->Ain
.LoadEX
.szSmall
== 1 && !i
->Ain
.LoadEX
.syned
) {
3388 *p
++ = rexAMode_M(i
->Ain
.LoadEX
.dst
, i
->Ain
.LoadEX
.src
);
3391 p
= doAMode_M(p
, i
->Ain
.LoadEX
.dst
, i
->Ain
.LoadEX
.src
);
3394 if (i
->Ain
.LoadEX
.szSmall
== 2 && !i
->Ain
.LoadEX
.syned
) {
3396 *p
++ = rexAMode_M(i
->Ain
.LoadEX
.dst
, i
->Ain
.LoadEX
.src
);
3399 p
= doAMode_M(p
, i
->Ain
.LoadEX
.dst
, i
->Ain
.LoadEX
.src
);
3402 if (i
->Ain
.LoadEX
.szSmall
== 4 && !i
->Ain
.LoadEX
.syned
) {
3404 /* This isn't really an existing AMD64 instruction per se.
3405 Rather, we have to do a 32-bit load. Because a 32-bit
3406 write implicitly clears the upper 32 bits of the target
3407 register, we get what we want. */
3409 rexAMode_M(i
->Ain
.LoadEX
.dst
, i
->Ain
.LoadEX
.src
));
3411 p
= doAMode_M(p
, i
->Ain
.LoadEX
.dst
, i
->Ain
.LoadEX
.src
);
3417 /* Make the destination register be 1 or 0, depending on whether
3418 the relevant condition holds. Complication: the top 56 bits
3419 of the destination should be forced to zero, but doing 'xorq
3420 %r,%r' kills the flag(s) we are about to read. Sigh. So
3421 start off my moving $0 into the dest. */
3422 reg
= iregEnc3210(i
->Ain
.Set64
.dst
);
3426 *p
++ = toUChar(reg
>= 8 ? 0x49 : 0x48);
3428 *p
++ = toUChar(0xC0 + (reg
& 7));
3431 /* setb lo8(%dst) */
3432 /* note, 8-bit register rex trickyness. Be careful here. */
3433 *p
++ = toUChar(reg
>= 8 ? 0x41 : 0x40);
3435 *p
++ = toUChar(0x90 + (0x0F & i
->Ain
.Set64
.cond
));
3436 *p
++ = toUChar(0xC0 + (reg
& 7));
3440 *p
++ = rexAMode_R(i
->Ain
.Bsfr64
.dst
, i
->Ain
.Bsfr64
.src
);
3442 if (i
->Ain
.Bsfr64
.isFwds
) {
3447 p
= doAMode_R(p
, i
->Ain
.Bsfr64
.dst
, i
->Ain
.Bsfr64
.src
);
3452 *p
++ = 0x0F; *p
++ = 0xAE; *p
++ = 0xF0;
3458 if (i
->Ain
.ACAS
.sz
== 2) *p
++ = 0x66;
3459 /* cmpxchg{b,w,l,q} %rbx,mem. Expected-value in %rax, new value
3460 in %rbx. The new-value register is hardwired to be %rbx
3461 since dealing with byte integer registers is too much hassle,
3462 so we force the register operand to %rbx (could equally be
3464 rex
= rexAMode_M( hregAMD64_RBX(), i
->Ain
.ACAS
.addr
);
3465 if (i
->Ain
.ACAS
.sz
!= 8)
3466 rex
= clearWBit(rex
);
3468 *p
++ = rex
; /* this can emit 0x40, which is pointless. oh well. */
3470 if (i
->Ain
.ACAS
.sz
== 1) *p
++ = 0xB0; else *p
++ = 0xB1;
3471 p
= doAMode_M(p
, hregAMD64_RBX(), i
->Ain
.ACAS
.addr
);
3477 /* cmpxchg{8,16}b m{64,128}. Expected-value in %rdx:%rax, new
3478 value in %rcx:%rbx. All 4 regs are hardwired in the ISA, so
3479 aren't encoded in the insn. */
3480 rex
= rexAMode_M_enc(1, i
->Ain
.ACAS
.addr
);
3481 if (i
->Ain
.ACAS
.sz
!= 8)
3482 rex
= clearWBit(rex
);
3486 p
= doAMode_M_enc(p
, 1, i
->Ain
.DACAS
.addr
);
3490 vassert(i
->Ain
.A87Free
.nregs
> 0 && i
->Ain
.A87Free
.nregs
<= 7);
3491 for (j
= 0; j
< i
->Ain
.A87Free
.nregs
; j
++) {
3492 p
= do_ffree_st(p
, 7-j
);
3496 case Ain_A87PushPop
:
3497 vassert(i
->Ain
.A87PushPop
.szB
== 8 || i
->Ain
.A87PushPop
.szB
== 4);
3498 if (i
->Ain
.A87PushPop
.isPush
) {
3499 /* Load from memory into %st(0): flds/fldl amode */
3501 rexAMode_M_enc(0, i
->Ain
.A87PushPop
.addr
) );
3502 *p
++ = i
->Ain
.A87PushPop
.szB
== 4 ? 0xD9 : 0xDD;
3503 p
= doAMode_M_enc(p
, 0/*subopcode*/, i
->Ain
.A87PushPop
.addr
);
3505 /* Dump %st(0) to memory: fstps/fstpl amode */
3507 rexAMode_M_enc(3, i
->Ain
.A87PushPop
.addr
) );
3508 *p
++ = i
->Ain
.A87PushPop
.szB
== 4 ? 0xD9 : 0xDD;
3509 p
= doAMode_M_enc(p
, 3/*subopcode*/, i
->Ain
.A87PushPop
.addr
);
3515 switch (i
->Ain
.A87FpOp
.op
) {
3516 case Afp_SQRT
: *p
++ = 0xD9; *p
++ = 0xFA; break;
3517 case Afp_SIN
: *p
++ = 0xD9; *p
++ = 0xFE; break;
3518 case Afp_COS
: *p
++ = 0xD9; *p
++ = 0xFF; break;
3519 case Afp_ROUND
: *p
++ = 0xD9; *p
++ = 0xFC; break;
3520 case Afp_2XM1
: *p
++ = 0xD9; *p
++ = 0xF0; break;
3521 case Afp_SCALE
: *p
++ = 0xD9; *p
++ = 0xFD; break;
3522 case Afp_ATAN
: *p
++ = 0xD9; *p
++ = 0xF3; break;
3523 case Afp_YL2X
: *p
++ = 0xD9; *p
++ = 0xF1; break;
3524 case Afp_YL2XP1
: *p
++ = 0xD9; *p
++ = 0xF9; break;
3525 case Afp_PREM
: *p
++ = 0xD9; *p
++ = 0xF8; break;
3526 case Afp_PREM1
: *p
++ = 0xD9; *p
++ = 0xF5; break;
3528 /* fptan pushes 1.0 on the FP stack, except when the
3529 argument is out of range. Hence we have to do the
3530 instruction, then inspect C2 to see if there is an out
3531 of range condition. If there is, we skip the fincstp
3532 that is used by the in-range case to get rid of this
3534 *p
++ = 0xD9; *p
++ = 0xF2; // fptan
3535 *p
++ = 0x50; // pushq %rax
3536 *p
++ = 0xDF; *p
++ = 0xE0; // fnstsw %ax
3537 *p
++ = 0x66; *p
++ = 0xA9;
3538 *p
++ = 0x00; *p
++ = 0x04; // testw $0x400,%ax
3539 *p
++ = 0x75; *p
++ = 0x02; // jnz after_fincstp
3540 *p
++ = 0xD9; *p
++ = 0xF7; // fincstp
3541 *p
++ = 0x58; // after_fincstp: popq %rax
3550 rexAMode_M_enc(5, i
->Ain
.A87LdCW
.addr
) );
3552 p
= doAMode_M_enc(p
, 5/*subopcode*/, i
->Ain
.A87LdCW
.addr
);
3557 rexAMode_M_enc(7, i
->Ain
.A87StSW
.addr
) );
3559 p
= doAMode_M_enc(p
, 7/*subopcode*/, i
->Ain
.A87StSW
.addr
);
3563 if (i
->Ain
.Store
.sz
== 2) {
3564 /* This just goes to show the crazyness of the instruction
3565 set encoding. We have to insert two prefix bytes, but be
3566 careful to avoid a conflict in what the size should be, by
3567 ensuring that REX.W = 0. */
3568 *p
++ = 0x66; /* override to 16-bits */
3569 *p
++ = clearWBit( rexAMode_M( i
->Ain
.Store
.src
, i
->Ain
.Store
.dst
) );
3571 p
= doAMode_M(p
, i
->Ain
.Store
.src
, i
->Ain
.Store
.dst
);
3574 if (i
->Ain
.Store
.sz
== 4) {
3575 *p
++ = clearWBit( rexAMode_M( i
->Ain
.Store
.src
, i
->Ain
.Store
.dst
) );
3577 p
= doAMode_M(p
, i
->Ain
.Store
.src
, i
->Ain
.Store
.dst
);
3580 if (i
->Ain
.Store
.sz
== 1) {
3581 /* This is one place where it would be wrong to skip emitting
3582 a rex byte of 0x40, since the mere presence of rex changes
3583 the meaning of the byte register access. Be careful. */
3584 *p
++ = clearWBit( rexAMode_M( i
->Ain
.Store
.src
, i
->Ain
.Store
.dst
) );
3586 p
= doAMode_M(p
, i
->Ain
.Store
.src
, i
->Ain
.Store
.dst
);
3592 *p
++ = clearWBit(rexAMode_M_enc(0, i
->Ain
.LdMXCSR
.addr
));
3595 p
= doAMode_M_enc(p
, 2/*subopcode*/, i
->Ain
.LdMXCSR
.addr
);
3599 /* ucomi[sd] %srcL, %srcR ; pushfq ; popq %dst */
3600 /* ucomi[sd] %srcL, %srcR */
3601 if (i
->Ain
.SseUComIS
.sz
== 8) {
3605 vassert(i
->Ain
.SseUComIS
.sz
== 4);
3608 rexAMode_R_enc_enc( vregEnc3210(i
->Ain
.SseUComIS
.srcL
),
3609 vregEnc3210(i
->Ain
.SseUComIS
.srcR
) ));
3612 p
= doAMode_R_enc_enc(p
, vregEnc3210(i
->Ain
.SseUComIS
.srcL
),
3613 vregEnc3210(i
->Ain
.SseUComIS
.srcR
) );
3617 *p
++ = toUChar(0x40 + (1 & iregEnc3(i
->Ain
.SseUComIS
.dst
)));
3618 *p
++ = toUChar(0x58 + iregEnc210(i
->Ain
.SseUComIS
.dst
));
3622 /* cvssi2s[sd] %src, %dst */
3623 rex
= rexAMode_R_enc_reg( vregEnc3210(i
->Ain
.SseSI2SF
.dst
),
3624 i
->Ain
.SseSI2SF
.src
);
3625 *p
++ = toUChar(i
->Ain
.SseSI2SF
.szD
==4 ? 0xF3 : 0xF2);
3626 *p
++ = toUChar(i
->Ain
.SseSI2SF
.szS
==4 ? clearWBit(rex
) : rex
);
3629 p
= doAMode_R_enc_reg( p
, vregEnc3210(i
->Ain
.SseSI2SF
.dst
),
3630 i
->Ain
.SseSI2SF
.src
);
3634 /* cvss[sd]2si %src, %dst */
3635 rex
= rexAMode_R_reg_enc( i
->Ain
.SseSF2SI
.dst
,
3636 vregEnc3210(i
->Ain
.SseSF2SI
.src
) );
3637 *p
++ = toUChar(i
->Ain
.SseSF2SI
.szS
==4 ? 0xF3 : 0xF2);
3638 *p
++ = toUChar(i
->Ain
.SseSF2SI
.szD
==4 ? clearWBit(rex
) : rex
);
3641 p
= doAMode_R_reg_enc( p
, i
->Ain
.SseSF2SI
.dst
,
3642 vregEnc3210(i
->Ain
.SseSF2SI
.src
) );
3646 /* cvtsd2ss/cvtss2sd %src, %dst */
3647 *p
++ = toUChar(i
->Ain
.SseSDSS
.from64
? 0xF2 : 0xF3);
3649 rexAMode_R_enc_enc( vregEnc3210(i
->Ain
.SseSDSS
.dst
),
3650 vregEnc3210(i
->Ain
.SseSDSS
.src
) ));
3653 p
= doAMode_R_enc_enc( p
, vregEnc3210(i
->Ain
.SseSDSS
.dst
),
3654 vregEnc3210(i
->Ain
.SseSDSS
.src
) );
3658 if (i
->Ain
.SseLdSt
.sz
== 8) {
3661 if (i
->Ain
.SseLdSt
.sz
== 4) {
3664 if (i
->Ain
.SseLdSt
.sz
!= 16) {
3668 rexAMode_M_enc(vregEnc3210(i
->Ain
.SseLdSt
.reg
),
3669 i
->Ain
.SseLdSt
.addr
));
3671 *p
++ = toUChar(i
->Ain
.SseLdSt
.isLoad
? 0x10 : 0x11);
3672 p
= doAMode_M_enc(p
, vregEnc3210(i
->Ain
.SseLdSt
.reg
),
3673 i
->Ain
.SseLdSt
.addr
);
3676 case Ain_SseCStore
: {
3677 vassert(i
->Ain
.SseCStore
.cond
!= Acc_ALWAYS
);
3679 /* Use ptmp for backpatching conditional jumps. */
3682 /* jmp fwds if !condition */
3683 *p
++ = toUChar(0x70 + (0xF & (i
->Ain
.SseCStore
.cond
^ 1)));
3684 ptmp
= p
; /* fill in this bit later */
3685 *p
++ = 0; /* # of bytes to jump over; don't know how many yet. */
3687 /* Now the store. */
3689 rexAMode_M_enc(vregEnc3210(i
->Ain
.SseCStore
.src
),
3690 i
->Ain
.SseCStore
.addr
));
3692 *p
++ = toUChar(0x11);
3693 p
= doAMode_M_enc(p
, vregEnc3210(i
->Ain
.SseCStore
.src
),
3694 i
->Ain
.SseCStore
.addr
);
3696 /* Fix up the conditional branch */
3697 Int delta
= p
- ptmp
;
3698 vassert(delta
> 0 && delta
< 40);
3699 *ptmp
= toUChar(delta
-1);
3703 case Ain_SseCLoad
: {
3704 vassert(i
->Ain
.SseCLoad
.cond
!= Acc_ALWAYS
);
3706 /* Use ptmp for backpatching conditional jumps. */
3709 /* jmp fwds if !condition */
3710 *p
++ = toUChar(0x70 + (0xF & (i
->Ain
.SseCLoad
.cond
^ 1)));
3711 ptmp
= p
; /* fill in this bit later */
3712 *p
++ = 0; /* # of bytes to jump over; don't know how many yet. */
3716 rexAMode_M_enc(vregEnc3210(i
->Ain
.SseCLoad
.dst
),
3717 i
->Ain
.SseCLoad
.addr
));
3719 *p
++ = toUChar(0x10);
3720 p
= doAMode_M_enc(p
, vregEnc3210(i
->Ain
.SseCLoad
.dst
),
3721 i
->Ain
.SseCLoad
.addr
);
3723 /* Fix up the conditional branch */
3724 Int delta
= p
- ptmp
;
3725 vassert(delta
> 0 && delta
< 40);
3726 *ptmp
= toUChar(delta
-1);
3731 vassert(i
->Ain
.SseLdzLO
.sz
== 4 || i
->Ain
.SseLdzLO
.sz
== 8);
3732 /* movs[sd] amode, %xmm-dst */
3733 *p
++ = toUChar(i
->Ain
.SseLdzLO
.sz
==4 ? 0xF3 : 0xF2);
3735 rexAMode_M_enc(vregEnc3210(i
->Ain
.SseLdzLO
.reg
),
3736 i
->Ain
.SseLdzLO
.addr
));
3739 p
= doAMode_M_enc(p
, vregEnc3210(i
->Ain
.SseLdzLO
.reg
),
3740 i
->Ain
.SseLdzLO
.addr
);
3743 case Ain_Sse32Fx4
: {
3744 UInt srcRegNo
= vregEnc3210(i
->Ain
.Sse32Fx4
.src
);
3745 UInt dstRegNo
= vregEnc3210(i
->Ain
.Sse32Fx4
.dst
);
3746 // VEX encoded cases
3747 switch (i
->Ain
.Sse32Fx4
.op
) {
3748 case Asse_F16toF32
: { // vcvtph2ps %xmmS, %xmmD
3751 // VCVTPH2PS %xmmS, %xmmD (s and d are both xmm regs, range 0 .. 15)
3752 // 0xC4 : ~d3 1 ~s3 0 0 0 1 0 : 0x79 : 0x13 : 1 1 d2 d1 d0 s2 s1 s0
3753 UInt byte2
= ((((~d
)>>3)&1)<<7) | (1<<6)
3754 | ((((~s
)>>3)&1)<<5) | (1<<1);
3755 UInt byte5
= (1<<7) | (1<<6) | ((d
&7) << 3) | ((s
&7) << 0);
3763 case Asse_F32toF16
: { // vcvtps2ph $4, %xmmS, %xmmD
3766 // VCVTPS2PH $4, %xmmS, %xmmD (s and d both xmm regs, range 0 .. 15)
3767 // 0xC4 : ~s3 1 ~d3 0 0 0 1 1 : 0x79
3768 // : 0x1D : 11 s2 s1 s0 d2 d1 d0 : 0x4
3769 UInt byte2
= ((((~s
)>>3)&1)<<7) | (1<<6)
3770 | ((((~d
)>>3)&1)<<5) | (1<<1) | (1 << 0);
3771 UInt byte5
= (1<<7) | (1<<6) | ((s
&7) << 3) | ((d
&7) << 0);
3782 // After this point, REX encoded cases only
3784 switch (i
->Ain
.Sse32Fx4
.op
) {
3785 case Asse_F2I
: *p
++ = 0x66; break;
3788 *p
++ = clearWBit(rexAMode_R_enc_enc(dstRegNo
, srcRegNo
));
3790 switch (i
->Ain
.Sse32Fx4
.op
) {
3791 case Asse_ADDF
: *p
++ = 0x58; break;
3792 case Asse_DIVF
: *p
++ = 0x5E; break;
3793 case Asse_MAXF
: *p
++ = 0x5F; break;
3794 case Asse_MINF
: *p
++ = 0x5D; break;
3795 case Asse_MULF
: *p
++ = 0x59; break;
3796 case Asse_RCPF
: *p
++ = 0x53; break;
3797 case Asse_RSQRTF
: *p
++ = 0x52; break;
3798 case Asse_SQRTF
: *p
++ = 0x51; break;
3799 case Asse_I2F
: *p
++ = 0x5B; break; // cvtdq2ps; no 0x66 pfx
3800 case Asse_F2I
: *p
++ = 0x5B; break; // cvtps2dq; with 0x66 pfx
3801 case Asse_SUBF
: *p
++ = 0x5C; break;
3802 case Asse_CMPEQF
: *p
++ = 0xC2; xtra
= 0x100; break;
3803 case Asse_CMPLTF
: *p
++ = 0xC2; xtra
= 0x101; break;
3804 case Asse_CMPLEF
: *p
++ = 0xC2; xtra
= 0x102; break;
3805 case Asse_CMPUNF
: *p
++ = 0xC2; xtra
= 0x103; break;
3808 p
= doAMode_R_enc_enc(p
, dstRegNo
, srcRegNo
);
3810 *p
++ = toUChar(xtra
& 0xFF);
3818 rexAMode_R_enc_enc( vregEnc3210(i
->Ain
.Sse64Fx2
.dst
),
3819 vregEnc3210(i
->Ain
.Sse64Fx2
.src
) ));
3821 switch (i
->Ain
.Sse64Fx2
.op
) {
3822 case Asse_ADDF
: *p
++ = 0x58; break;
3823 case Asse_DIVF
: *p
++ = 0x5E; break;
3824 case Asse_MAXF
: *p
++ = 0x5F; break;
3825 case Asse_MINF
: *p
++ = 0x5D; break;
3826 case Asse_MULF
: *p
++ = 0x59; break;
3827 case Asse_SQRTF
: *p
++ = 0x51; break;
3828 case Asse_SUBF
: *p
++ = 0x5C; break;
3829 case Asse_CMPEQF
: *p
++ = 0xC2; xtra
= 0x100; break;
3830 case Asse_CMPLTF
: *p
++ = 0xC2; xtra
= 0x101; break;
3831 case Asse_CMPLEF
: *p
++ = 0xC2; xtra
= 0x102; break;
3832 case Asse_CMPUNF
: *p
++ = 0xC2; xtra
= 0x103; break;
3835 p
= doAMode_R_enc_enc(p
, vregEnc3210(i
->Ain
.Sse64Fx2
.dst
),
3836 vregEnc3210(i
->Ain
.Sse64Fx2
.src
) );
3838 *p
++ = toUChar(xtra
& 0xFF);
3845 rexAMode_R_enc_enc( vregEnc3210(i
->Ain
.Sse32FLo
.dst
),
3846 vregEnc3210(i
->Ain
.Sse32FLo
.src
) ));
3848 switch (i
->Ain
.Sse32FLo
.op
) {
3849 case Asse_ADDF
: *p
++ = 0x58; break;
3850 case Asse_DIVF
: *p
++ = 0x5E; break;
3851 case Asse_MAXF
: *p
++ = 0x5F; break;
3852 case Asse_MINF
: *p
++ = 0x5D; break;
3853 case Asse_MULF
: *p
++ = 0x59; break;
3854 case Asse_RCPF
: *p
++ = 0x53; break;
3855 case Asse_RSQRTF
: *p
++ = 0x52; break;
3856 case Asse_SQRTF
: *p
++ = 0x51; break;
3857 case Asse_SUBF
: *p
++ = 0x5C; break;
3858 case Asse_CMPEQF
: *p
++ = 0xC2; xtra
= 0x100; break;
3859 case Asse_CMPLTF
: *p
++ = 0xC2; xtra
= 0x101; break;
3860 case Asse_CMPLEF
: *p
++ = 0xC2; xtra
= 0x102; break;
3861 case Asse_CMPUNF
: *p
++ = 0xC2; xtra
= 0x103; break;
3864 p
= doAMode_R_enc_enc(p
, vregEnc3210(i
->Ain
.Sse32FLo
.dst
),
3865 vregEnc3210(i
->Ain
.Sse32FLo
.src
) );
3867 *p
++ = toUChar(xtra
& 0xFF);
3874 rexAMode_R_enc_enc( vregEnc3210(i
->Ain
.Sse64FLo
.dst
),
3875 vregEnc3210(i
->Ain
.Sse64FLo
.src
) ));
3877 switch (i
->Ain
.Sse64FLo
.op
) {
3878 case Asse_ADDF
: *p
++ = 0x58; break;
3879 case Asse_DIVF
: *p
++ = 0x5E; break;
3880 case Asse_MAXF
: *p
++ = 0x5F; break;
3881 case Asse_MINF
: *p
++ = 0x5D; break;
3882 case Asse_MULF
: *p
++ = 0x59; break;
3883 case Asse_SQRTF
: *p
++ = 0x51; break;
3884 case Asse_SUBF
: *p
++ = 0x5C; break;
3885 case Asse_CMPEQF
: *p
++ = 0xC2; xtra
= 0x100; break;
3886 case Asse_CMPLTF
: *p
++ = 0xC2; xtra
= 0x101; break;
3887 case Asse_CMPLEF
: *p
++ = 0xC2; xtra
= 0x102; break;
3888 case Asse_CMPUNF
: *p
++ = 0xC2; xtra
= 0x103; break;
3891 p
= doAMode_R_enc_enc(p
, vregEnc3210(i
->Ain
.Sse64FLo
.dst
),
3892 vregEnc3210(i
->Ain
.Sse64FLo
.src
) );
3894 *p
++ = toUChar(xtra
& 0xFF);
3898 # define XX(_n) *p++ = (_n)
3901 rexAMode_R_enc_enc( vregEnc3210(i
->Ain
.SseReRg
.dst
),
3902 vregEnc3210(i
->Ain
.SseReRg
.src
) ));
3904 switch (i
->Ain
.SseReRg
.op
) {
3905 case Asse_MOV
: /*movups*/ XX(rex
); XX(0x0F); XX(0x10); break;
3906 case Asse_OR
: XX(rex
); XX(0x0F); XX(0x56); break;
3907 case Asse_XOR
: XX(rex
); XX(0x0F); XX(0x57); break;
3908 case Asse_AND
: XX(rex
); XX(0x0F); XX(0x54); break;
3909 case Asse_ANDN
: XX(rex
); XX(0x0F); XX(0x55); break;
3910 case Asse_PACKSSD
: XX(0x66); XX(rex
); XX(0x0F); XX(0x6B); break;
3911 case Asse_PACKSSW
: XX(0x66); XX(rex
); XX(0x0F); XX(0x63); break;
3912 case Asse_PACKUSW
: XX(0x66); XX(rex
); XX(0x0F); XX(0x67); break;
3913 case Asse_ADD8
: XX(0x66); XX(rex
); XX(0x0F); XX(0xFC); break;
3914 case Asse_ADD16
: XX(0x66); XX(rex
); XX(0x0F); XX(0xFD); break;
3915 case Asse_ADD32
: XX(0x66); XX(rex
); XX(0x0F); XX(0xFE); break;
3916 case Asse_ADD64
: XX(0x66); XX(rex
); XX(0x0F); XX(0xD4); break;
3917 case Asse_QADD8S
: XX(0x66); XX(rex
); XX(0x0F); XX(0xEC); break;
3918 case Asse_QADD16S
: XX(0x66); XX(rex
); XX(0x0F); XX(0xED); break;
3919 case Asse_QADD8U
: XX(0x66); XX(rex
); XX(0x0F); XX(0xDC); break;
3920 case Asse_QADD16U
: XX(0x66); XX(rex
); XX(0x0F); XX(0xDD); break;
3921 case Asse_AVG8U
: XX(0x66); XX(rex
); XX(0x0F); XX(0xE0); break;
3922 case Asse_AVG16U
: XX(0x66); XX(rex
); XX(0x0F); XX(0xE3); break;
3923 case Asse_CMPEQ8
: XX(0x66); XX(rex
); XX(0x0F); XX(0x74); break;
3924 case Asse_CMPEQ16
: XX(0x66); XX(rex
); XX(0x0F); XX(0x75); break;
3925 case Asse_CMPEQ32
: XX(0x66); XX(rex
); XX(0x0F); XX(0x76); break;
3926 case Asse_CMPGT8S
: XX(0x66); XX(rex
); XX(0x0F); XX(0x64); break;
3927 case Asse_CMPGT16S
: XX(0x66); XX(rex
); XX(0x0F); XX(0x65); break;
3928 case Asse_CMPGT32S
: XX(0x66); XX(rex
); XX(0x0F); XX(0x66); break;
3929 case Asse_MAX16S
: XX(0x66); XX(rex
); XX(0x0F); XX(0xEE); break;
3930 case Asse_MAX8U
: XX(0x66); XX(rex
); XX(0x0F); XX(0xDE); break;
3931 case Asse_MIN16S
: XX(0x66); XX(rex
); XX(0x0F); XX(0xEA); break;
3932 case Asse_MIN8U
: XX(0x66); XX(rex
); XX(0x0F); XX(0xDA); break;
3933 case Asse_MULHI16U
: XX(0x66); XX(rex
); XX(0x0F); XX(0xE4); break;
3934 case Asse_MULHI16S
: XX(0x66); XX(rex
); XX(0x0F); XX(0xE5); break;
3935 case Asse_MUL16
: XX(0x66); XX(rex
); XX(0x0F); XX(0xD5); break;
3936 case Asse_SHL16
: XX(0x66); XX(rex
); XX(0x0F); XX(0xF1); break;
3937 case Asse_SHL32
: XX(0x66); XX(rex
); XX(0x0F); XX(0xF2); break;
3938 case Asse_SHL64
: XX(0x66); XX(rex
); XX(0x0F); XX(0xF3); break;
3939 case Asse_SAR16
: XX(0x66); XX(rex
); XX(0x0F); XX(0xE1); break;
3940 case Asse_SAR32
: XX(0x66); XX(rex
); XX(0x0F); XX(0xE2); break;
3941 case Asse_SHR16
: XX(0x66); XX(rex
); XX(0x0F); XX(0xD1); break;
3942 case Asse_SHR32
: XX(0x66); XX(rex
); XX(0x0F); XX(0xD2); break;
3943 case Asse_SHR64
: XX(0x66); XX(rex
); XX(0x0F); XX(0xD3); break;
3944 case Asse_SUB8
: XX(0x66); XX(rex
); XX(0x0F); XX(0xF8); break;
3945 case Asse_SUB16
: XX(0x66); XX(rex
); XX(0x0F); XX(0xF9); break;
3946 case Asse_SUB32
: XX(0x66); XX(rex
); XX(0x0F); XX(0xFA); break;
3947 case Asse_SUB64
: XX(0x66); XX(rex
); XX(0x0F); XX(0xFB); break;
3948 case Asse_QSUB8S
: XX(0x66); XX(rex
); XX(0x0F); XX(0xE8); break;
3949 case Asse_QSUB16S
: XX(0x66); XX(rex
); XX(0x0F); XX(0xE9); break;
3950 case Asse_QSUB8U
: XX(0x66); XX(rex
); XX(0x0F); XX(0xD8); break;
3951 case Asse_QSUB16U
: XX(0x66); XX(rex
); XX(0x0F); XX(0xD9); break;
3952 case Asse_UNPCKHB
: XX(0x66); XX(rex
); XX(0x0F); XX(0x68); break;
3953 case Asse_UNPCKHW
: XX(0x66); XX(rex
); XX(0x0F); XX(0x69); break;
3954 case Asse_UNPCKHD
: XX(0x66); XX(rex
); XX(0x0F); XX(0x6A); break;
3955 case Asse_UNPCKHQ
: XX(0x66); XX(rex
); XX(0x0F); XX(0x6D); break;
3956 case Asse_UNPCKLB
: XX(0x66); XX(rex
); XX(0x0F); XX(0x60); break;
3957 case Asse_UNPCKLW
: XX(0x66); XX(rex
); XX(0x0F); XX(0x61); break;
3958 case Asse_UNPCKLD
: XX(0x66); XX(rex
); XX(0x0F); XX(0x62); break;
3959 case Asse_UNPCKLQ
: XX(0x66); XX(rex
); XX(0x0F); XX(0x6C); break;
3960 case Asse_PSHUFB
: XX(0x66); XX(rex
);
3961 XX(0x0F); XX(0x38); XX(0x00); break;
3962 case Asse_PMADDUBSW
:XX(0x66); XX(rex
);
3963 XX(0x0F); XX(0x38); XX(0x04); break;
3966 p
= doAMode_R_enc_enc(p
, vregEnc3210(i
->Ain
.SseReRg
.dst
),
3967 vregEnc3210(i
->Ain
.SseReRg
.src
) );
3972 /* jmp fwds if !condition */
3973 *p
++ = toUChar(0x70 + (i
->Ain
.SseCMov
.cond
^ 1));
3974 *p
++ = 0; /* # of bytes in the next bit, which we don't know yet */
3977 /* movaps %src, %dst */
3979 rexAMode_R_enc_enc( vregEnc3210(i
->Ain
.SseCMov
.dst
),
3980 vregEnc3210(i
->Ain
.SseCMov
.src
) ));
3983 p
= doAMode_R_enc_enc(p
, vregEnc3210(i
->Ain
.SseCMov
.dst
),
3984 vregEnc3210(i
->Ain
.SseCMov
.src
) );
3986 /* Fill in the jump offset. */
3987 *(ptmp
-1) = toUChar(p
- ptmp
);
3993 rexAMode_R_enc_enc( vregEnc3210(i
->Ain
.SseShuf
.dst
),
3994 vregEnc3210(i
->Ain
.SseShuf
.src
) ));
3997 p
= doAMode_R_enc_enc(p
, vregEnc3210(i
->Ain
.SseShuf
.dst
),
3998 vregEnc3210(i
->Ain
.SseShuf
.src
) );
3999 *p
++ = (UChar
)(i
->Ain
.SseShuf
.order
);
4002 case Ain_SseShiftN
: {
4004 UInt shiftImm
= i
->Ain
.SseShiftN
.shiftBits
;
4005 switch (i
->Ain
.SseShiftN
.op
) {
4006 case Asse_SHL16
: limit
= 15; opc
= 0x71; subopc_imm
= 6; break;
4007 case Asse_SHL32
: limit
= 31; opc
= 0x72; subopc_imm
= 6; break;
4008 case Asse_SHL64
: limit
= 63; opc
= 0x73; subopc_imm
= 6; break;
4009 case Asse_SAR16
: limit
= 15; opc
= 0x71; subopc_imm
= 4; break;
4010 case Asse_SAR32
: limit
= 31; opc
= 0x72; subopc_imm
= 4; break;
4011 case Asse_SHR16
: limit
= 15; opc
= 0x71; subopc_imm
= 2; break;
4012 case Asse_SHR32
: limit
= 31; opc
= 0x72; subopc_imm
= 2; break;
4013 case Asse_SHR64
: limit
= 63; opc
= 0x73; subopc_imm
= 2; break;
4015 if ((shiftImm
& 7) != 0) goto bad
;
4017 limit
= 15; opc
= 0x73; subopc_imm
= 7;
4020 if ((shiftImm
& 7) != 0) goto bad
;
4022 limit
= 15; opc
= 0x73; subopc_imm
= 3;
4025 // This should never happen .. SSE2 only offers the above 10 insns
4026 // for the "shift with immediate" case
4029 vassert(limit
> 0 && opc
> 0 && subopc_imm
> 0);
4030 if (shiftImm
> limit
) goto bad
;
4033 rexAMode_R_enc_enc( subopc_imm
,
4034 vregEnc3210(i
->Ain
.SseShiftN
.dst
) ));
4037 p
= doAMode_R_enc_enc(p
, subopc_imm
, vregEnc3210(i
->Ain
.SseShiftN
.dst
));
4043 Bool toXMM
= i
->Ain
.SseMOVQ
.toXMM
;
4044 HReg gpr
= i
->Ain
.SseMOVQ
.gpr
;
4045 HReg xmm
= i
->Ain
.SseMOVQ
.xmm
;
4047 *p
++ = setWBit( rexAMode_R_enc_enc( vregEnc3210(xmm
), iregEnc3210(gpr
)) );
4049 *p
++ = toXMM
? 0x6E : 0x7E;
4050 p
= doAMode_R_enc_enc( p
, vregEnc3210(xmm
), iregEnc3210(gpr
) );
4054 //uu case Ain_AvxLdSt: {
4055 //uu UInt vex = vexAMode_M( dvreg2ireg(i->Ain.AvxLdSt.reg),
4056 //uu i->Ain.AvxLdSt.addr );
4057 //uu p = emitVexPrefix(p, vex);
4058 //uu *p++ = toUChar(i->Ain.AvxLdSt.isLoad ? 0x10 : 0x11);
4059 //uu p = doAMode_M(p, dvreg2ireg(i->Ain.AvxLdSt.reg), i->Ain.AvxLdSt.addr);
4065 (3 bytes) decl 8(%rbp) 8 == offsetof(host_EvC_COUNTER)
4066 (2 bytes) jns nofail expected taken
4067 (3 bytes) jmp* 0(%rbp) 0 == offsetof(host_EvC_FAILADDR)
4070 /* This is heavily asserted re instruction lengths. It needs to
4071 be. If we get given unexpected forms of .amCounter or
4072 .amFailAddr -- basically, anything that's not of the form
4073 uimm7(%rbp) -- they are likely to fail. */
4074 /* Note also that after the decl we must be very careful not to
4075 read the carry flag, else we get a partial flags stall.
4076 js/jns avoids that, though. */
4078 /* --- decl 8(%rbp) --- */
4079 /* Need to compute the REX byte for the decl in order to prove
4080 that we don't need it, since this is a 32-bit inc and all
4081 registers involved in the amode are < r8. "1" because
4082 there's no register in this encoding; instead the register
4083 field is used as a sub opcode. The encoding for "decl r/m32"
4084 is FF /1, hence the "1". */
4085 rex
= clearWBit(rexAMode_M_enc(1, i
->Ain
.EvCheck
.amCounter
));
4086 if (rex
!= 0x40) goto bad
; /* We don't expect to need the REX byte. */
4088 p
= doAMode_M_enc(p
, 1, i
->Ain
.EvCheck
.amCounter
);
4089 vassert(p
- p0
== 3);
4090 /* --- jns nofail --- */
4092 *p
++ = 0x03; /* need to check this 0x03 after the next insn */
4093 vassert(p
- p0
== 5);
4094 /* --- jmp* 0(%rbp) --- */
4095 /* Once again, verify we don't need REX. The encoding is FF /4.
4096 We don't need REX.W since by default FF /4 in 64-bit mode
4097 implies a 64 bit load. */
4098 rex
= clearWBit(rexAMode_M_enc(4, i
->Ain
.EvCheck
.amFailAddr
));
4099 if (rex
!= 0x40) goto bad
;
4101 p
= doAMode_M_enc(p
, 4, i
->Ain
.EvCheck
.amFailAddr
);
4102 vassert(p
- p0
== 8); /* also ensures that 0x03 offset above is ok */
4103 /* And crosscheck .. */
4104 vassert(evCheckSzB_AMD64() == 8);
4109 /* We generate movabsq $0, %r11
4111 in the expectation that a later call to LibVEX_patchProfCtr
4112 will be used to fill in the immediate field once the right
4114 49 BB 00 00 00 00 00 00 00 00
4117 *p
++ = 0x49; *p
++ = 0xBB;
4118 *p
++ = 0x00; *p
++ = 0x00; *p
++ = 0x00; *p
++ = 0x00;
4119 *p
++ = 0x00; *p
++ = 0x00; *p
++ = 0x00; *p
++ = 0x00;
4120 *p
++ = 0x49; *p
++ = 0xFF; *p
++ = 0x03;
4121 /* Tell the caller .. */
4122 vassert(!(*is_profInc
));
4132 ppAMD64Instr(i
, mode64
);
4133 vpanic("emit_AMD64Instr");
4137 vassert(p
- &buf
[0] <= 64);
4142 /* How big is an event check? See case for Ain_EvCheck in
4143 emit_AMD64Instr just above. That crosschecks what this returns, so
4144 we can tell if we're inconsistent. */
4145 Int
evCheckSzB_AMD64 (void)
4151 /* NB: what goes on here has to be very closely coordinated with the
4152 emitInstr case for XDirect, above. */
4153 VexInvalRange
chainXDirect_AMD64 ( VexEndness endness_host
,
4154 void* place_to_chain
,
4155 const void* disp_cp_chain_me_EXPECTED
,
4156 const void* place_to_jump_to
)
4158 vassert(endness_host
== VexEndnessLE
);
4160 /* What we're expecting to see is:
4161 movabsq $disp_cp_chain_me_EXPECTED, %r11
4164 49 BB <8 bytes value == disp_cp_chain_me_EXPECTED>
4167 UChar
* p
= (UChar
*)place_to_chain
;
4168 vassert(p
[0] == 0x49);
4169 vassert(p
[1] == 0xBB);
4170 vassert(read_misaligned_ULong_LE(&p
[2]) == (Addr
)disp_cp_chain_me_EXPECTED
);
4171 vassert(p
[10] == 0x41);
4172 vassert(p
[11] == 0xFF);
4173 vassert(p
[12] == 0xD3);
4174 /* And what we want to change it to is either:
4176 movabsq $place_to_jump_to, %r11
4179 49 BB <8 bytes value == place_to_jump_to>
4181 So it's the same length (convenient, huh) and we don't
4182 need to change all the bits.
4184 in the case where the displacement falls within 32 bits
4185 jmpq disp32 where disp32 is relative to the next insn
4188 E9 <4 bytes == disp32>
4189 0F 0B 0F 0B 0F 0B 0F 0B
4191 In both cases the replacement has the same length as the original.
4192 To remain sane & verifiable,
4193 (1) limit the displacement for the short form to
4194 (say) +/- one billion, so as to avoid wraparound
4196 (2) even if the short form is applicable, once every (say)
4197 1024 times use the long form anyway, so as to maintain
4200 /* This is the delta we need to put into a JMP d32 insn. It's
4201 relative to the start of the next insn, hence the -5. */
4202 Long delta
= (Long
)((const UChar
*)place_to_jump_to
- (const UChar
*)p
) - 5;
4203 Bool shortOK
= delta
>= -1000*1000*1000 && delta
< 1000*1000*1000;
4205 static UInt shortCTR
= 0; /* DO NOT MAKE NON-STATIC */
4207 shortCTR
++; // thread safety bleh
4208 if (0 == (shortCTR
& 0x3FF)) {
4211 vex_printf("QQQ chainXDirect_AMD64: shortCTR = %u, "
4212 "using long jmp\n", shortCTR
);
4216 /* And make the modifications. */
4219 write_misaligned_UInt_LE(&p
[1], (UInt
)(Int
)delta
);
4220 p
[5] = 0x0F; p
[6] = 0x0B;
4221 p
[7] = 0x0F; p
[8] = 0x0B;
4222 p
[9] = 0x0F; p
[10] = 0x0B;
4223 p
[11] = 0x0F; p
[12] = 0x0B;
4224 /* sanity check on the delta -- top 32 are all 0 or all 1 */
4226 vassert(delta
== 0LL || delta
== -1LL);
4228 /* Minimal modifications from the starting sequence. */
4229 write_misaligned_ULong_LE(&p
[2], (ULong
)(Addr
)place_to_jump_to
);
4232 VexInvalRange vir
= { (HWord
)place_to_chain
, 13 };
4237 /* NB: what goes on here has to be very closely coordinated with the
4238 emitInstr case for XDirect, above. */
4239 VexInvalRange
unchainXDirect_AMD64 ( VexEndness endness_host
,
4240 void* place_to_unchain
,
4241 const void* place_to_jump_to_EXPECTED
,
4242 const void* disp_cp_chain_me
)
4244 vassert(endness_host
== VexEndnessLE
);
4246 /* What we're expecting to see is either:
4248 movabsq $place_to_jump_to_EXPECTED, %r11
4251 49 BB <8 bytes value == place_to_jump_to_EXPECTED>
4254 in the case where the displacement falls within 32 bits
4258 E9 <4 bytes == disp32>
4259 0F 0B 0F 0B 0F 0B 0F 0B
4261 UChar
* p
= (UChar
*)place_to_unchain
;
4263 if (p
[0] == 0x49 && p
[1] == 0xBB
4264 && read_misaligned_ULong_LE(&p
[2])
4265 == (ULong
)(Addr
)place_to_jump_to_EXPECTED
4266 && p
[10] == 0x41 && p
[11] == 0xFF && p
[12] == 0xE3) {
4267 /* it's the long form */
4272 && p
[5] == 0x0F && p
[6] == 0x0B
4273 && p
[7] == 0x0F && p
[8] == 0x0B
4274 && p
[9] == 0x0F && p
[10] == 0x0B
4275 && p
[11] == 0x0F && p
[12] == 0x0B) {
4276 /* It's the short form. Check the offset is right. */
4277 Int s32
= (Int
)read_misaligned_UInt_LE(&p
[1]);
4278 Long s64
= (Long
)s32
;
4279 if ((UChar
*)p
+ 5 + s64
== place_to_jump_to_EXPECTED
) {
4282 vex_printf("QQQ unchainXDirect_AMD64: found short form\n");
4286 /* And what we want to change it to is:
4287 movabsq $disp_cp_chain_me, %r11
4290 49 BB <8 bytes value == disp_cp_chain_me>
4292 So it's the same length (convenient, huh).
4296 write_misaligned_ULong_LE(&p
[2], (ULong
)(Addr
)disp_cp_chain_me
);
4300 VexInvalRange vir
= { (HWord
)place_to_unchain
, 13 };
4305 /* Patch the counter address into a profile inc point, as previously
4306 created by the Ain_ProfInc case for emit_AMD64Instr. */
4307 VexInvalRange
patchProfInc_AMD64 ( VexEndness endness_host
,
4308 void* place_to_patch
,
4309 const ULong
* location_of_counter
)
4311 vassert(endness_host
== VexEndnessLE
);
4312 vassert(sizeof(ULong
*) == 8);
4313 UChar
* p
= (UChar
*)place_to_patch
;
4314 vassert(p
[0] == 0x49);
4315 vassert(p
[1] == 0xBB);
4316 vassert(p
[2] == 0x00);
4317 vassert(p
[3] == 0x00);
4318 vassert(p
[4] == 0x00);
4319 vassert(p
[5] == 0x00);
4320 vassert(p
[6] == 0x00);
4321 vassert(p
[7] == 0x00);
4322 vassert(p
[8] == 0x00);
4323 vassert(p
[9] == 0x00);
4324 vassert(p
[10] == 0x49);
4325 vassert(p
[11] == 0xFF);
4326 vassert(p
[12] == 0x03);
4327 ULong imm64
= (ULong
)(Addr
)location_of_counter
;
4328 p
[2] = imm64
& 0xFF; imm64
>>= 8;
4329 p
[3] = imm64
& 0xFF; imm64
>>= 8;
4330 p
[4] = imm64
& 0xFF; imm64
>>= 8;
4331 p
[5] = imm64
& 0xFF; imm64
>>= 8;
4332 p
[6] = imm64
& 0xFF; imm64
>>= 8;
4333 p
[7] = imm64
& 0xFF; imm64
>>= 8;
4334 p
[8] = imm64
& 0xFF; imm64
>>= 8;
4335 p
[9] = imm64
& 0xFF; imm64
>>= 8;
4336 VexInvalRange vir
= { (HWord
)place_to_patch
, 13 };
4341 /*---------------------------------------------------------------*/
4342 /*--- end host_amd64_defs.c ---*/
4343 /*---------------------------------------------------------------*/