2 /*---------------------------------------------------------------*/
3 /*--- begin host_arm64_defs.c ---*/
4 /*---------------------------------------------------------------*/
7 This file is part of Valgrind, a dynamic binary instrumentation
10 Copyright (C) 2013-2017 OpenWorks
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, see <http://www.gnu.org/licenses/>.
26 The GNU General Public License is contained in the file COPYING.
29 #include "libvex_basictypes.h"
31 #include "libvex_trc_values.h"
33 #include "main_util.h"
34 #include "host_generic_regs.h"
35 #include "host_arm64_defs.h"
38 /* --------- Registers. --------- */
40 /* The usual HReg abstraction. We use the following classes only:
42 D regs (64 bit float, also used for 32 bit float)
43 Q regs (128 bit vector)
46 const RRegUniverse
* getRRegUniverse_ARM64 ( void )
48 /* The real-register universe is a big constant, so we just want to
49 initialise it once. */
50 static RRegUniverse rRegUniverse_ARM64
;
51 static Bool rRegUniverse_ARM64_initted
= False
;
53 /* Handy shorthand, nothing more */
54 RRegUniverse
* ru
= &rRegUniverse_ARM64
;
56 /* This isn't thread-safe. Sigh. */
57 if (LIKELY(rRegUniverse_ARM64_initted
))
60 RRegUniverse__init(ru
);
62 /* Add the registers. The initial segment of this array must be
63 those available for allocation by reg-alloc, and those that
64 follow are not available for allocation. */
65 ru
->allocable_start
[HRcInt64
] = ru
->size
;
66 ru
->regs
[ru
->size
++] = hregARM64_X22();
67 ru
->regs
[ru
->size
++] = hregARM64_X23();
68 ru
->regs
[ru
->size
++] = hregARM64_X24();
69 ru
->regs
[ru
->size
++] = hregARM64_X25();
70 ru
->regs
[ru
->size
++] = hregARM64_X26();
71 ru
->regs
[ru
->size
++] = hregARM64_X27();
72 ru
->regs
[ru
->size
++] = hregARM64_X28();
74 ru
->regs
[ru
->size
++] = hregARM64_X0();
75 ru
->regs
[ru
->size
++] = hregARM64_X1();
76 ru
->regs
[ru
->size
++] = hregARM64_X2();
77 ru
->regs
[ru
->size
++] = hregARM64_X3();
78 ru
->regs
[ru
->size
++] = hregARM64_X4();
79 ru
->regs
[ru
->size
++] = hregARM64_X5();
80 ru
->regs
[ru
->size
++] = hregARM64_X6();
81 ru
->regs
[ru
->size
++] = hregARM64_X7();
82 ru
->allocable_end
[HRcInt64
] = ru
->size
- 1;
83 // X8 is used as a ProfInc temporary, not available to regalloc.
84 // X9 is a chaining/spill temporary, not available to regalloc.
86 // Do we really need all these?
87 //ru->regs[ru->size++] = hregARM64_X10();
88 //ru->regs[ru->size++] = hregARM64_X11();
89 //ru->regs[ru->size++] = hregARM64_X12();
90 //ru->regs[ru->size++] = hregARM64_X13();
91 //ru->regs[ru->size++] = hregARM64_X14();
92 //ru->regs[ru->size++] = hregARM64_X15();
93 // X21 is the guest state pointer, not available to regalloc.
95 // vector regs. Unfortunately not callee-saved.
96 ru
->allocable_start
[HRcVec128
] = ru
->size
;
97 ru
->regs
[ru
->size
++] = hregARM64_Q16();
98 ru
->regs
[ru
->size
++] = hregARM64_Q17();
99 ru
->regs
[ru
->size
++] = hregARM64_Q18();
100 ru
->regs
[ru
->size
++] = hregARM64_Q19();
101 ru
->regs
[ru
->size
++] = hregARM64_Q20();
102 ru
->allocable_end
[HRcVec128
] = ru
->size
- 1;
104 // F64 regs, all of which are callee-saved
105 ru
->allocable_start
[HRcFlt64
] = ru
->size
;
106 ru
->regs
[ru
->size
++] = hregARM64_D8();
107 ru
->regs
[ru
->size
++] = hregARM64_D9();
108 ru
->regs
[ru
->size
++] = hregARM64_D10();
109 ru
->regs
[ru
->size
++] = hregARM64_D11();
110 ru
->regs
[ru
->size
++] = hregARM64_D12();
111 ru
->regs
[ru
->size
++] = hregARM64_D13();
112 ru
->allocable_end
[HRcFlt64
] = ru
->size
- 1;
114 ru
->allocable
= ru
->size
;
115 /* And other regs, not available to the allocator. */
117 // unavail: x21 as GSP
118 // x8 is used as a ProfInc temporary
119 // x9 is used as a spill/reload/chaining/call temporary
121 // x31 because dealing with the SP-vs-ZR overloading is too
122 // confusing, and we don't need to do so, so let's just avoid
125 // Currently, we have 15 allocatable integer registers:
126 // 0 1 2 3 4 5 6 7 22 23 24 25 26 27 28
128 // Hence for the allocatable integer registers we have:
130 // callee-saved: 22 23 24 25 26 27 28
131 // caller-saved: 0 1 2 3 4 5 6 7
133 // If the set of available registers changes or if the e/r status
134 // changes, be sure to re-check/sync the definition of
135 // getRegUsage for ARM64Instr_Call too.
137 ru
->regs
[ru
->size
++] = hregARM64_X8();
138 ru
->regs
[ru
->size
++] = hregARM64_X9();
139 ru
->regs
[ru
->size
++] = hregARM64_X21();
141 rRegUniverse_ARM64_initted
= True
;
143 RRegUniverse__check_is_sane(ru
);
148 UInt
ppHRegARM64 ( HReg reg
) {
150 /* Be generic for all virtual regs. */
151 if (hregIsVirtual(reg
)) {
154 /* But specific for real regs. */
155 switch (hregClass(reg
)) {
157 r
= hregEncoding(reg
);
158 vassert(r
>= 0 && r
< 31);
159 return vex_printf("x%d", r
);
161 r
= hregEncoding(reg
);
162 vassert(r
>= 0 && r
< 32);
163 return vex_printf("d%d", r
);
165 r
= hregEncoding(reg
);
166 vassert(r
>= 0 && r
< 32);
167 return vex_printf("q%d", r
);
169 vpanic("ppHRegARM64");
173 static UInt
ppHRegARM64asSreg ( HReg reg
) {
174 UInt written
= ppHRegARM64(reg
);
175 written
+= vex_printf("(S-reg)");
179 static UInt
ppHRegARM64asHreg ( HReg reg
) {
180 UInt written
= ppHRegARM64(reg
);
181 written
+= vex_printf("(H-reg)");
186 /* --------- Condition codes, ARM64 encoding. --------- */
188 static const HChar
* showARM64CondCode ( ARM64CondCode cond
) {
190 case ARM64cc_EQ
: return "eq";
191 case ARM64cc_NE
: return "ne";
192 case ARM64cc_CS
: return "cs";
193 case ARM64cc_CC
: return "cc";
194 case ARM64cc_MI
: return "mi";
195 case ARM64cc_PL
: return "pl";
196 case ARM64cc_VS
: return "vs";
197 case ARM64cc_VC
: return "vc";
198 case ARM64cc_HI
: return "hi";
199 case ARM64cc_LS
: return "ls";
200 case ARM64cc_GE
: return "ge";
201 case ARM64cc_LT
: return "lt";
202 case ARM64cc_GT
: return "gt";
203 case ARM64cc_LE
: return "le";
204 case ARM64cc_AL
: return "al"; // default
205 case ARM64cc_NV
: return "nv";
206 default: vpanic("showARM64CondCode");
211 /* --------- Memory address expressions (amodes). --------- */
213 ARM64AMode
* ARM64AMode_RI9 ( HReg reg
, Int simm9
) {
214 ARM64AMode
* am
= LibVEX_Alloc_inline(sizeof(ARM64AMode
));
215 am
->tag
= ARM64am_RI9
;
216 am
->ARM64am
.RI9
.reg
= reg
;
217 am
->ARM64am
.RI9
.simm9
= simm9
;
218 vassert(-256 <= simm9
&& simm9
<= 255);
222 ARM64AMode
* ARM64AMode_RI12 ( HReg reg
, Int uimm12
, UChar szB
) {
223 ARM64AMode
* am
= LibVEX_Alloc_inline(sizeof(ARM64AMode
));
224 am
->tag
= ARM64am_RI12
;
225 am
->ARM64am
.RI12
.reg
= reg
;
226 am
->ARM64am
.RI12
.uimm12
= uimm12
;
227 am
->ARM64am
.RI12
.szB
= szB
;
228 vassert(uimm12
>= 0 && uimm12
<= 4095);
230 case 1: case 2: case 4: case 8: break;
236 ARM64AMode
* ARM64AMode_RR ( HReg base
, HReg index
) {
237 ARM64AMode
* am
= LibVEX_Alloc_inline(sizeof(ARM64AMode
));
238 am
->tag
= ARM64am_RR
;
239 am
->ARM64am
.RR
.base
= base
;
240 am
->ARM64am
.RR
.index
= index
;
244 static void ppARM64AMode ( ARM64AMode
* am
) {
247 vex_printf("%d(", am
->ARM64am
.RI9
.simm9
);
248 ppHRegARM64(am
->ARM64am
.RI9
.reg
);
252 vex_printf("%u(", (UInt
)am
->ARM64am
.RI12
.szB
253 * (UInt
)am
->ARM64am
.RI12
.uimm12
);
254 ppHRegARM64(am
->ARM64am
.RI12
.reg
);
259 ppHRegARM64(am
->ARM64am
.RR
.base
);
261 ppHRegARM64(am
->ARM64am
.RR
.index
);
269 static void addRegUsage_ARM64AMode ( HRegUsage
* u
, ARM64AMode
* am
) {
272 addHRegUse(u
, HRmRead
, am
->ARM64am
.RI9
.reg
);
275 addHRegUse(u
, HRmRead
, am
->ARM64am
.RI12
.reg
);
278 addHRegUse(u
, HRmRead
, am
->ARM64am
.RR
.base
);
279 addHRegUse(u
, HRmRead
, am
->ARM64am
.RR
.index
);
282 vpanic("addRegUsage_ARM64Amode");
286 static void mapRegs_ARM64AMode ( HRegRemap
* m
, ARM64AMode
* am
) {
289 am
->ARM64am
.RI9
.reg
= lookupHRegRemap(m
, am
->ARM64am
.RI9
.reg
);
292 am
->ARM64am
.RI12
.reg
= lookupHRegRemap(m
, am
->ARM64am
.RI12
.reg
);
295 am
->ARM64am
.RR
.base
= lookupHRegRemap(m
, am
->ARM64am
.RR
.base
);
296 am
->ARM64am
.RR
.index
= lookupHRegRemap(m
, am
->ARM64am
.RR
.index
);
299 vpanic("mapRegs_ARM64Amode");
304 /* --------- Reg or uimm12<<{0,12} operands --------- */
306 ARM64RIA
* ARM64RIA_I12 ( UShort imm12
, UChar shift
) {
307 ARM64RIA
* riA
= LibVEX_Alloc_inline(sizeof(ARM64RIA
));
308 riA
->tag
= ARM64riA_I12
;
309 riA
->ARM64riA
.I12
.imm12
= imm12
;
310 riA
->ARM64riA
.I12
.shift
= shift
;
311 vassert(imm12
< 4096);
312 vassert(shift
== 0 || shift
== 12);
315 ARM64RIA
* ARM64RIA_R ( HReg reg
) {
316 ARM64RIA
* riA
= LibVEX_Alloc_inline(sizeof(ARM64RIA
));
317 riA
->tag
= ARM64riA_R
;
318 riA
->ARM64riA
.R
.reg
= reg
;
322 static void ppARM64RIA ( ARM64RIA
* riA
) {
325 vex_printf("#%u",(UInt
)(riA
->ARM64riA
.I12
.imm12
326 << riA
->ARM64riA
.I12
.shift
));
329 ppHRegARM64(riA
->ARM64riA
.R
.reg
);
336 static void addRegUsage_ARM64RIA ( HRegUsage
* u
, ARM64RIA
* riA
) {
341 addHRegUse(u
, HRmRead
, riA
->ARM64riA
.R
.reg
);
344 vpanic("addRegUsage_ARM64RIA");
348 static void mapRegs_ARM64RIA ( HRegRemap
* m
, ARM64RIA
* riA
) {
353 riA
->ARM64riA
.R
.reg
= lookupHRegRemap(m
, riA
->ARM64riA
.R
.reg
);
356 vpanic("mapRegs_ARM64RIA");
361 /* --------- Reg or "bitfield" (logic immediate) operands --------- */
363 ARM64RIL
* ARM64RIL_I13 ( UChar bitN
, UChar immR
, UChar immS
) {
364 ARM64RIL
* riL
= LibVEX_Alloc_inline(sizeof(ARM64RIL
));
365 riL
->tag
= ARM64riL_I13
;
366 riL
->ARM64riL
.I13
.bitN
= bitN
;
367 riL
->ARM64riL
.I13
.immR
= immR
;
368 riL
->ARM64riL
.I13
.immS
= immS
;
374 ARM64RIL
* ARM64RIL_R ( HReg reg
) {
375 ARM64RIL
* riL
= LibVEX_Alloc_inline(sizeof(ARM64RIL
));
376 riL
->tag
= ARM64riL_R
;
377 riL
->ARM64riL
.R
.reg
= reg
;
381 static void ppARM64RIL ( ARM64RIL
* riL
) {
384 vex_printf("#nrs(%u,%u,%u)",
385 (UInt
)riL
->ARM64riL
.I13
.bitN
,
386 (UInt
)riL
->ARM64riL
.I13
.immR
,
387 (UInt
)riL
->ARM64riL
.I13
.immS
);
390 ppHRegARM64(riL
->ARM64riL
.R
.reg
);
397 static void addRegUsage_ARM64RIL ( HRegUsage
* u
, ARM64RIL
* riL
) {
402 addHRegUse(u
, HRmRead
, riL
->ARM64riL
.R
.reg
);
405 vpanic("addRegUsage_ARM64RIL");
409 static void mapRegs_ARM64RIL ( HRegRemap
* m
, ARM64RIL
* riL
) {
414 riL
->ARM64riL
.R
.reg
= lookupHRegRemap(m
, riL
->ARM64riL
.R
.reg
);
417 vpanic("mapRegs_ARM64RIL");
422 /* --------------- Reg or uimm6 operands --------------- */
424 ARM64RI6
* ARM64RI6_I6 ( UInt imm6
) {
425 ARM64RI6
* ri6
= LibVEX_Alloc_inline(sizeof(ARM64RI6
));
426 ri6
->tag
= ARM64ri6_I6
;
427 ri6
->ARM64ri6
.I6
.imm6
= imm6
;
428 vassert(imm6
> 0 && imm6
< 64);
431 ARM64RI6
* ARM64RI6_R ( HReg reg
) {
432 ARM64RI6
* ri6
= LibVEX_Alloc_inline(sizeof(ARM64RI6
));
433 ri6
->tag
= ARM64ri6_R
;
434 ri6
->ARM64ri6
.R
.reg
= reg
;
438 static void ppARM64RI6 ( ARM64RI6
* ri6
) {
441 vex_printf("#%u", ri6
->ARM64ri6
.I6
.imm6
);
444 ppHRegARM64(ri6
->ARM64ri6
.R
.reg
);
451 static void addRegUsage_ARM64RI6 ( HRegUsage
* u
, ARM64RI6
* ri6
) {
456 addHRegUse(u
, HRmRead
, ri6
->ARM64ri6
.R
.reg
);
459 vpanic("addRegUsage_ARM64RI6");
463 static void mapRegs_ARM64RI6 ( HRegRemap
* m
, ARM64RI6
* ri6
) {
468 ri6
->ARM64ri6
.R
.reg
= lookupHRegRemap(m
, ri6
->ARM64ri6
.R
.reg
);
471 vpanic("mapRegs_ARM64RI6");
476 /* --------- Instructions. --------- */
478 static const HChar
* showARM64LogicOp ( ARM64LogicOp op
) {
480 case ARM64lo_AND
: return "and";
481 case ARM64lo_OR
: return "orr";
482 case ARM64lo_XOR
: return "eor";
483 default: vpanic("showARM64LogicOp");
487 static const HChar
* showARM64ShiftOp ( ARM64ShiftOp op
) {
489 case ARM64sh_SHL
: return "lsl";
490 case ARM64sh_SHR
: return "lsr";
491 case ARM64sh_SAR
: return "asr";
492 default: vpanic("showARM64ShiftOp");
496 static const HChar
* showARM64UnaryOp ( ARM64UnaryOp op
) {
498 case ARM64un_NEG
: return "neg";
499 case ARM64un_NOT
: return "not";
500 case ARM64un_CLZ
: return "clz";
501 default: vpanic("showARM64UnaryOp");
505 static const HChar
* showARM64MulOp ( ARM64MulOp op
) {
507 case ARM64mul_PLAIN
: return "mul ";
508 case ARM64mul_ZX
: return "umulh";
509 case ARM64mul_SX
: return "smulh";
510 default: vpanic("showARM64MulOp");
514 static void characteriseARM64CvtOp ( /*OUT*/HChar
* syn
,
515 /*OUT*/UInt
* fszB
, /*OUT*/UInt
* iszB
,
518 case ARM64cvt_F32_I32S
:
519 *syn
= 's'; *fszB
= 4; *iszB
= 4; break;
520 case ARM64cvt_F64_I32S
:
521 *syn
= 's'; *fszB
= 8; *iszB
= 4; break;
522 case ARM64cvt_F32_I64S
:
523 *syn
= 's'; *fszB
= 4; *iszB
= 8; break;
524 case ARM64cvt_F64_I64S
:
525 *syn
= 's'; *fszB
= 8; *iszB
= 8; break;
526 case ARM64cvt_F32_I32U
:
527 *syn
= 'u'; *fszB
= 4; *iszB
= 4; break;
528 case ARM64cvt_F64_I32U
:
529 *syn
= 'u'; *fszB
= 8; *iszB
= 4; break;
530 case ARM64cvt_F32_I64U
:
531 *syn
= 'u'; *fszB
= 4; *iszB
= 8; break;
532 case ARM64cvt_F64_I64U
:
533 *syn
= 'u'; *fszB
= 8; *iszB
= 8; break;
535 vpanic("characteriseARM64CvtOp");
539 static const HChar
* showARM64FpBinOp ( ARM64FpBinOp op
) {
541 case ARM64fpb_ADD
: return "add";
542 case ARM64fpb_SUB
: return "sub";
543 case ARM64fpb_MUL
: return "mul";
544 case ARM64fpb_DIV
: return "div";
545 default: vpanic("showARM64FpBinOp");
549 static const HChar
* showARM64FpUnaryOp ( ARM64FpUnaryOp op
) {
551 case ARM64fpu_NEG
: return "neg ";
552 case ARM64fpu_ABS
: return "abs ";
553 case ARM64fpu_SQRT
: return "sqrt ";
554 case ARM64fpu_RINT
: return "rinti";
555 case ARM64fpu_RECPX
: return "recpx";
556 default: vpanic("showARM64FpUnaryOp");
560 static void showARM64VecBinOp(/*OUT*/const HChar
** nm
,
561 /*OUT*/const HChar
** ar
, ARM64VecBinOp op
) {
563 case ARM64vecb_ADD64x2
: *nm
= "add "; *ar
= "2d"; return;
564 case ARM64vecb_ADD32x4
: *nm
= "add "; *ar
= "4s"; return;
565 case ARM64vecb_ADD16x8
: *nm
= "add "; *ar
= "8h"; return;
566 case ARM64vecb_ADD8x16
: *nm
= "add "; *ar
= "16b"; return;
567 case ARM64vecb_SUB64x2
: *nm
= "sub "; *ar
= "2d"; return;
568 case ARM64vecb_SUB32x4
: *nm
= "sub "; *ar
= "4s"; return;
569 case ARM64vecb_SUB16x8
: *nm
= "sub "; *ar
= "8h"; return;
570 case ARM64vecb_SUB8x16
: *nm
= "sub "; *ar
= "16b"; return;
571 case ARM64vecb_MUL32x4
: *nm
= "mul "; *ar
= "4s"; return;
572 case ARM64vecb_MUL16x8
: *nm
= "mul "; *ar
= "8h"; return;
573 case ARM64vecb_MUL8x16
: *nm
= "mul "; *ar
= "16b"; return;
574 case ARM64vecb_FADD64x2
: *nm
= "fadd "; *ar
= "2d"; return;
575 case ARM64vecb_FSUB64x2
: *nm
= "fsub "; *ar
= "2d"; return;
576 case ARM64vecb_FMUL64x2
: *nm
= "fmul "; *ar
= "2d"; return;
577 case ARM64vecb_FDIV64x2
: *nm
= "fdiv "; *ar
= "2d"; return;
578 case ARM64vecb_FADD32x4
: *nm
= "fadd "; *ar
= "4s"; return;
579 case ARM64vecb_FSUB32x4
: *nm
= "fsub "; *ar
= "4s"; return;
580 case ARM64vecb_FMUL32x4
: *nm
= "fmul "; *ar
= "4s"; return;
581 case ARM64vecb_FDIV32x4
: *nm
= "fdiv "; *ar
= "4s"; return;
582 case ARM64vecb_FMAX64x2
: *nm
= "fmax "; *ar
= "2d"; return;
583 case ARM64vecb_FMAX32x4
: *nm
= "fmax "; *ar
= "4s"; return;
584 case ARM64vecb_FMIN64x2
: *nm
= "fmin "; *ar
= "2d"; return;
585 case ARM64vecb_FMIN32x4
: *nm
= "fmin "; *ar
= "4s"; return;
586 case ARM64vecb_UMAX32x4
: *nm
= "umax "; *ar
= "4s"; return;
587 case ARM64vecb_UMAX16x8
: *nm
= "umax "; *ar
= "8h"; return;
588 case ARM64vecb_UMAX8x16
: *nm
= "umax "; *ar
= "16b"; return;
589 case ARM64vecb_UMIN32x4
: *nm
= "umin "; *ar
= "4s"; return;
590 case ARM64vecb_UMIN16x8
: *nm
= "umin "; *ar
= "8h"; return;
591 case ARM64vecb_UMIN8x16
: *nm
= "umin "; *ar
= "16b"; return;
592 case ARM64vecb_SMAX32x4
: *nm
= "smax "; *ar
= "4s"; return;
593 case ARM64vecb_SMAX16x8
: *nm
= "smax "; *ar
= "8h"; return;
594 case ARM64vecb_SMAX8x16
: *nm
= "smax "; *ar
= "16b"; return;
595 case ARM64vecb_SMIN32x4
: *nm
= "smin "; *ar
= "4s"; return;
596 case ARM64vecb_SMIN16x8
: *nm
= "smin "; *ar
= "8h"; return;
597 case ARM64vecb_SMIN8x16
: *nm
= "smin "; *ar
= "16b"; return;
598 case ARM64vecb_AND
: *nm
= "and "; *ar
= "16b"; return;
599 case ARM64vecb_ORR
: *nm
= "orr "; *ar
= "16b"; return;
600 case ARM64vecb_XOR
: *nm
= "eor "; *ar
= "16b"; return;
601 case ARM64vecb_CMEQ64x2
: *nm
= "cmeq "; *ar
= "2d"; return;
602 case ARM64vecb_CMEQ32x4
: *nm
= "cmeq "; *ar
= "4s"; return;
603 case ARM64vecb_CMEQ16x8
: *nm
= "cmeq "; *ar
= "8h"; return;
604 case ARM64vecb_CMEQ8x16
: *nm
= "cmeq "; *ar
= "16b"; return;
605 case ARM64vecb_CMHI64x2
: *nm
= "cmhi "; *ar
= "2d"; return;
606 case ARM64vecb_CMHI32x4
: *nm
= "cmhi "; *ar
= "4s"; return;
607 case ARM64vecb_CMHI16x8
: *nm
= "cmhi "; *ar
= "8h"; return;
608 case ARM64vecb_CMHI8x16
: *nm
= "cmhi "; *ar
= "16b"; return;
609 case ARM64vecb_CMGT64x2
: *nm
= "cmgt "; *ar
= "2d"; return;
610 case ARM64vecb_CMGT32x4
: *nm
= "cmgt "; *ar
= "4s"; return;
611 case ARM64vecb_CMGT16x8
: *nm
= "cmgt "; *ar
= "8h"; return;
612 case ARM64vecb_CMGT8x16
: *nm
= "cmgt "; *ar
= "16b"; return;
613 case ARM64vecb_FCMEQ64x2
: *nm
= "fcmeq "; *ar
= "2d"; return;
614 case ARM64vecb_FCMEQ32x4
: *nm
= "fcmeq "; *ar
= "4s"; return;
615 case ARM64vecb_FCMGE64x2
: *nm
= "fcmge "; *ar
= "2d"; return;
616 case ARM64vecb_FCMGE32x4
: *nm
= "fcmge "; *ar
= "4s"; return;
617 case ARM64vecb_FCMGT64x2
: *nm
= "fcmgt "; *ar
= "2d"; return;
618 case ARM64vecb_FCMGT32x4
: *nm
= "fcmgt "; *ar
= "4s"; return;
619 case ARM64vecb_TBL1
: *nm
= "tbl "; *ar
= "16b"; return;
620 case ARM64vecb_UZP164x2
: *nm
= "uzp1 "; *ar
= "2d"; return;
621 case ARM64vecb_UZP132x4
: *nm
= "uzp1 "; *ar
= "4s"; return;
622 case ARM64vecb_UZP116x8
: *nm
= "uzp1 "; *ar
= "8h"; return;
623 case ARM64vecb_UZP18x16
: *nm
= "uzp1 "; *ar
= "16b"; return;
624 case ARM64vecb_UZP264x2
: *nm
= "uzp2 "; *ar
= "2d"; return;
625 case ARM64vecb_UZP232x4
: *nm
= "uzp2 "; *ar
= "4s"; return;
626 case ARM64vecb_UZP216x8
: *nm
= "uzp2 "; *ar
= "8h"; return;
627 case ARM64vecb_UZP28x16
: *nm
= "uzp2 "; *ar
= "16b"; return;
628 case ARM64vecb_ZIP132x4
: *nm
= "zip1 "; *ar
= "4s"; return;
629 case ARM64vecb_ZIP116x8
: *nm
= "zip1 "; *ar
= "8h"; return;
630 case ARM64vecb_ZIP18x16
: *nm
= "zip1 "; *ar
= "16b"; return;
631 case ARM64vecb_ZIP232x4
: *nm
= "zip2 "; *ar
= "4s"; return;
632 case ARM64vecb_ZIP216x8
: *nm
= "zip2 "; *ar
= "8h"; return;
633 case ARM64vecb_ZIP28x16
: *nm
= "zip2 "; *ar
= "16b"; return;
634 case ARM64vecb_PMUL8x16
: *nm
= "pmul "; *ar
= "16b"; return;
635 case ARM64vecb_PMULL8x8
: *nm
= "pmull "; *ar
= "8hbb"; return;
636 case ARM64vecb_UMULL2DSS
: *nm
= "umull "; *ar
= "2dss"; return;
637 case ARM64vecb_UMULL4SHH
: *nm
= "umull "; *ar
= "4shh"; return;
638 case ARM64vecb_UMULL8HBB
: *nm
= "umull "; *ar
= "8hbb"; return;
639 case ARM64vecb_SMULL2DSS
: *nm
= "smull "; *ar
= "2dss"; return;
640 case ARM64vecb_SMULL4SHH
: *nm
= "smull "; *ar
= "4shh"; return;
641 case ARM64vecb_SMULL8HBB
: *nm
= "smull "; *ar
= "8hbb"; return;
642 case ARM64vecb_SQADD64x2
: *nm
= "sqadd "; *ar
= "2d"; return;
643 case ARM64vecb_SQADD32x4
: *nm
= "sqadd "; *ar
= "4s"; return;
644 case ARM64vecb_SQADD16x8
: *nm
= "sqadd "; *ar
= "8h"; return;
645 case ARM64vecb_SQADD8x16
: *nm
= "sqadd "; *ar
= "16b"; return;
646 case ARM64vecb_UQADD64x2
: *nm
= "uqadd "; *ar
= "2d"; return;
647 case ARM64vecb_UQADD32x4
: *nm
= "uqadd "; *ar
= "4s"; return;
648 case ARM64vecb_UQADD16x8
: *nm
= "uqadd "; *ar
= "8h"; return;
649 case ARM64vecb_UQADD8x16
: *nm
= "uqadd "; *ar
= "16b"; return;
650 case ARM64vecb_SQSUB64x2
: *nm
= "sqsub "; *ar
= "2d"; return;
651 case ARM64vecb_SQSUB32x4
: *nm
= "sqsub "; *ar
= "4s"; return;
652 case ARM64vecb_SQSUB16x8
: *nm
= "sqsub "; *ar
= "8h"; return;
653 case ARM64vecb_SQSUB8x16
: *nm
= "sqsub "; *ar
= "16b"; return;
654 case ARM64vecb_UQSUB64x2
: *nm
= "uqsub "; *ar
= "2d"; return;
655 case ARM64vecb_UQSUB32x4
: *nm
= "uqsub "; *ar
= "4s"; return;
656 case ARM64vecb_UQSUB16x8
: *nm
= "uqsub "; *ar
= "8h"; return;
657 case ARM64vecb_UQSUB8x16
: *nm
= "uqsub "; *ar
= "16b"; return;
658 case ARM64vecb_SQDMULL2DSS
: *nm
= "sqdmull"; *ar
= "2dss"; return;
659 case ARM64vecb_SQDMULL4SHH
: *nm
= "sqdmull"; *ar
= "4shh"; return;
660 case ARM64vecb_SQDMULH32x4
: *nm
= "sqdmulh"; *ar
= "4s"; return;
661 case ARM64vecb_SQDMULH16x8
: *nm
= "sqdmulh"; *ar
= "8h"; return;
662 case ARM64vecb_SQRDMULH32x4
: *nm
= "sqrdmulh"; *ar
= "4s"; return;
663 case ARM64vecb_SQRDMULH16x8
: *nm
= "sqrdmulh"; *ar
= "8h"; return;
664 case ARM64vecb_SQSHL64x2
: *nm
= "sqshl "; *ar
= "2d"; return;
665 case ARM64vecb_SQSHL32x4
: *nm
= "sqshl "; *ar
= "4s"; return;
666 case ARM64vecb_SQSHL16x8
: *nm
= "sqshl "; *ar
= "8h"; return;
667 case ARM64vecb_SQSHL8x16
: *nm
= "sqshl "; *ar
= "16b"; return;
668 case ARM64vecb_UQSHL64x2
: *nm
= "uqshl "; *ar
= "2d"; return;
669 case ARM64vecb_UQSHL32x4
: *nm
= "uqshl "; *ar
= "4s"; return;
670 case ARM64vecb_UQSHL16x8
: *nm
= "uqshl "; *ar
= "8h"; return;
671 case ARM64vecb_UQSHL8x16
: *nm
= "uqshl "; *ar
= "16b"; return;
672 case ARM64vecb_SQRSHL64x2
: *nm
= "sqrshl"; *ar
= "2d"; return;
673 case ARM64vecb_SQRSHL32x4
: *nm
= "sqrshl"; *ar
= "4s"; return;
674 case ARM64vecb_SQRSHL16x8
: *nm
= "sqrshl"; *ar
= "8h"; return;
675 case ARM64vecb_SQRSHL8x16
: *nm
= "sqrshl"; *ar
= "16b"; return;
676 case ARM64vecb_UQRSHL64x2
: *nm
= "uqrshl"; *ar
= "2d"; return;
677 case ARM64vecb_UQRSHL32x4
: *nm
= "uqrshl"; *ar
= "4s"; return;
678 case ARM64vecb_UQRSHL16x8
: *nm
= "uqrshl"; *ar
= "8h"; return;
679 case ARM64vecb_UQRSHL8x16
: *nm
= "uqrshl"; *ar
= "16b"; return;
680 case ARM64vecb_SSHL64x2
: *nm
= "sshl "; *ar
= "2d"; return;
681 case ARM64vecb_SSHL32x4
: *nm
= "sshl "; *ar
= "4s"; return;
682 case ARM64vecb_SSHL16x8
: *nm
= "sshl "; *ar
= "8h"; return;
683 case ARM64vecb_SSHL8x16
: *nm
= "sshl "; *ar
= "16b"; return;
684 case ARM64vecb_USHL64x2
: *nm
= "ushl "; *ar
= "2d"; return;
685 case ARM64vecb_USHL32x4
: *nm
= "ushl "; *ar
= "4s"; return;
686 case ARM64vecb_USHL16x8
: *nm
= "ushl "; *ar
= "8h"; return;
687 case ARM64vecb_USHL8x16
: *nm
= "ushl "; *ar
= "16b"; return;
688 case ARM64vecb_SRSHL64x2
: *nm
= "srshl "; *ar
= "2d"; return;
689 case ARM64vecb_SRSHL32x4
: *nm
= "srshl "; *ar
= "4s"; return;
690 case ARM64vecb_SRSHL16x8
: *nm
= "srshl "; *ar
= "8h"; return;
691 case ARM64vecb_SRSHL8x16
: *nm
= "srshl "; *ar
= "16b"; return;
692 case ARM64vecb_URSHL64x2
: *nm
= "urshl "; *ar
= "2d"; return;
693 case ARM64vecb_URSHL32x4
: *nm
= "urshl "; *ar
= "4s"; return;
694 case ARM64vecb_URSHL16x8
: *nm
= "urshl "; *ar
= "8h"; return;
695 case ARM64vecb_URSHL8x16
: *nm
= "urshl "; *ar
= "16b"; return;
696 case ARM64vecb_FRECPS64x2
: *nm
= "frecps"; *ar
= "2d"; return;
697 case ARM64vecb_FRECPS32x4
: *nm
= "frecps"; *ar
= "4s"; return;
698 case ARM64vecb_FRSQRTS64x2
: *nm
= "frsqrts"; *ar
= "2d"; return;
699 case ARM64vecb_FRSQRTS32x4
: *nm
= "frsqrts"; *ar
= "4s"; return;
700 default: vpanic("showARM64VecBinOp");
704 static void showARM64VecModifyOp(/*OUT*/const HChar
** nm
,
705 /*OUT*/const HChar
** ar
,
706 ARM64VecModifyOp op
) {
708 case ARM64vecmo_SUQADD64x2
: *nm
= "suqadd"; *ar
= "2d"; return;
709 case ARM64vecmo_SUQADD32x4
: *nm
= "suqadd"; *ar
= "4s"; return;
710 case ARM64vecmo_SUQADD16x8
: *nm
= "suqadd"; *ar
= "8h"; return;
711 case ARM64vecmo_SUQADD8x16
: *nm
= "suqadd"; *ar
= "16b"; return;
712 case ARM64vecmo_USQADD64x2
: *nm
= "usqadd"; *ar
= "2d"; return;
713 case ARM64vecmo_USQADD32x4
: *nm
= "usqadd"; *ar
= "4s"; return;
714 case ARM64vecmo_USQADD16x8
: *nm
= "usqadd"; *ar
= "8h"; return;
715 case ARM64vecmo_USQADD8x16
: *nm
= "usqadd"; *ar
= "16b"; return;
716 default: vpanic("showARM64VecModifyOp");
720 static void showARM64VecUnaryOp(/*OUT*/const HChar
** nm
,
721 /*OUT*/const HChar
** ar
, ARM64VecUnaryOp op
)
724 case ARM64vecu_FNEG64x2
: *nm
= "fneg "; *ar
= "2d"; return;
725 case ARM64vecu_FNEG32x4
: *nm
= "fneg "; *ar
= "4s"; return;
726 case ARM64vecu_FABS64x2
: *nm
= "fabs "; *ar
= "2d"; return;
727 case ARM64vecu_FABS32x4
: *nm
= "fabs "; *ar
= "4s"; return;
728 case ARM64vecu_NOT
: *nm
= "not "; *ar
= "all"; return;
729 case ARM64vecu_ABS64x2
: *nm
= "abs "; *ar
= "2d"; return;
730 case ARM64vecu_ABS32x4
: *nm
= "abs "; *ar
= "4s"; return;
731 case ARM64vecu_ABS16x8
: *nm
= "abs "; *ar
= "8h"; return;
732 case ARM64vecu_ABS8x16
: *nm
= "abs "; *ar
= "16b"; return;
733 case ARM64vecu_CLS32x4
: *nm
= "cls "; *ar
= "4s"; return;
734 case ARM64vecu_CLS16x8
: *nm
= "cls "; *ar
= "8h"; return;
735 case ARM64vecu_CLS8x16
: *nm
= "cls "; *ar
= "16b"; return;
736 case ARM64vecu_CLZ32x4
: *nm
= "clz "; *ar
= "4s"; return;
737 case ARM64vecu_CLZ16x8
: *nm
= "clz "; *ar
= "8h"; return;
738 case ARM64vecu_CLZ8x16
: *nm
= "clz "; *ar
= "16b"; return;
739 case ARM64vecu_CNT8x16
: *nm
= "cnt "; *ar
= "16b"; return;
740 case ARM64vecu_RBIT
: *nm
= "rbit "; *ar
= "16b"; return;
741 case ARM64vecu_REV1616B
: *nm
= "rev16"; *ar
= "16b"; return;
742 case ARM64vecu_REV3216B
: *nm
= "rev32"; *ar
= "16b"; return;
743 case ARM64vecu_REV328H
: *nm
= "rev32"; *ar
= "8h"; return;
744 case ARM64vecu_REV6416B
: *nm
= "rev64"; *ar
= "16b"; return;
745 case ARM64vecu_REV648H
: *nm
= "rev64"; *ar
= "8h"; return;
746 case ARM64vecu_REV644S
: *nm
= "rev64"; *ar
= "4s"; return;
747 case ARM64vecu_URECPE32x4
: *nm
= "urecpe"; *ar
= "4s"; return;
748 case ARM64vecu_URSQRTE32x4
: *nm
= "ursqrte"; *ar
= "4s"; return;
749 case ARM64vecu_FRECPE64x2
: *nm
= "frecpe"; *ar
= "2d"; return;
750 case ARM64vecu_FRECPE32x4
: *nm
= "frecpe"; *ar
= "4s"; return;
751 case ARM64vecu_FRSQRTE64x2
: *nm
= "frsqrte"; *ar
= "2d"; return;
752 case ARM64vecu_FRSQRTE32x4
: *nm
= "frsqrte"; *ar
= "4s"; return;
753 case ARM64vecu_FSQRT64x2
: *nm
= "fsqrt"; *ar
= "2d"; return;
754 case ARM64vecu_FSQRT32x4
: *nm
= "fsqrt"; *ar
= "4s"; return;
755 default: vpanic("showARM64VecUnaryOp");
759 static void showARM64VecShiftImmOp(/*OUT*/const HChar
** nm
,
760 /*OUT*/const HChar
** ar
,
761 ARM64VecShiftImmOp op
)
764 case ARM64vecshi_USHR64x2
: *nm
= "ushr "; *ar
= "2d"; return;
765 case ARM64vecshi_USHR32x4
: *nm
= "ushr "; *ar
= "4s"; return;
766 case ARM64vecshi_USHR16x8
: *nm
= "ushr "; *ar
= "8h"; return;
767 case ARM64vecshi_USHR8x16
: *nm
= "ushr "; *ar
= "16b"; return;
768 case ARM64vecshi_SSHR64x2
: *nm
= "sshr "; *ar
= "2d"; return;
769 case ARM64vecshi_SSHR32x4
: *nm
= "sshr "; *ar
= "4s"; return;
770 case ARM64vecshi_SSHR16x8
: *nm
= "sshr "; *ar
= "8h"; return;
771 case ARM64vecshi_SSHR8x16
: *nm
= "sshr "; *ar
= "16b"; return;
772 case ARM64vecshi_SHL64x2
: *nm
= "shl "; *ar
= "2d"; return;
773 case ARM64vecshi_SHL32x4
: *nm
= "shl "; *ar
= "4s"; return;
774 case ARM64vecshi_SHL16x8
: *nm
= "shl "; *ar
= "8h"; return;
775 case ARM64vecshi_SHL8x16
: *nm
= "shl "; *ar
= "16b"; return;
776 case ARM64vecshi_SQSHRN2SD
: *nm
= "sqshrn"; *ar
= "2sd"; return;
777 case ARM64vecshi_SQSHRN4HS
: *nm
= "sqshrn"; *ar
= "4hs"; return;
778 case ARM64vecshi_SQSHRN8BH
: *nm
= "sqshrn"; *ar
= "8bh"; return;
779 case ARM64vecshi_UQSHRN2SD
: *nm
= "uqshrn"; *ar
= "2sd"; return;
780 case ARM64vecshi_UQSHRN4HS
: *nm
= "uqshrn"; *ar
= "4hs"; return;
781 case ARM64vecshi_UQSHRN8BH
: *nm
= "uqshrn"; *ar
= "8bh"; return;
782 case ARM64vecshi_SQSHRUN2SD
: *nm
= "sqshrun"; *ar
= "2sd"; return;
783 case ARM64vecshi_SQSHRUN4HS
: *nm
= "sqshrun"; *ar
= "4hs"; return;
784 case ARM64vecshi_SQSHRUN8BH
: *nm
= "sqshrun"; *ar
= "8bh"; return;
785 case ARM64vecshi_SQRSHRN2SD
: *nm
= "sqrshrn"; *ar
= "2sd"; return;
786 case ARM64vecshi_SQRSHRN4HS
: *nm
= "sqrshrn"; *ar
= "4hs"; return;
787 case ARM64vecshi_SQRSHRN8BH
: *nm
= "sqrshrn"; *ar
= "8bh"; return;
788 case ARM64vecshi_UQRSHRN2SD
: *nm
= "uqrshrn"; *ar
= "2sd"; return;
789 case ARM64vecshi_UQRSHRN4HS
: *nm
= "uqrshrn"; *ar
= "4hs"; return;
790 case ARM64vecshi_UQRSHRN8BH
: *nm
= "uqrshrn"; *ar
= "8bh"; return;
791 case ARM64vecshi_SQRSHRUN2SD
: *nm
= "sqrshrun"; *ar
= "2sd"; return;
792 case ARM64vecshi_SQRSHRUN4HS
: *nm
= "sqrshrun"; *ar
= "4hs"; return;
793 case ARM64vecshi_SQRSHRUN8BH
: *nm
= "sqrshrun"; *ar
= "8bh"; return;
794 case ARM64vecshi_UQSHL64x2
: *nm
= "uqshl "; *ar
= "2d"; return;
795 case ARM64vecshi_UQSHL32x4
: *nm
= "uqshl "; *ar
= "4s"; return;
796 case ARM64vecshi_UQSHL16x8
: *nm
= "uqshl "; *ar
= "8h"; return;
797 case ARM64vecshi_UQSHL8x16
: *nm
= "uqshl "; *ar
= "16b"; return;
798 case ARM64vecshi_SQSHL64x2
: *nm
= "sqshl "; *ar
= "2d"; return;
799 case ARM64vecshi_SQSHL32x4
: *nm
= "sqshl "; *ar
= "4s"; return;
800 case ARM64vecshi_SQSHL16x8
: *nm
= "sqshl "; *ar
= "8h"; return;
801 case ARM64vecshi_SQSHL8x16
: *nm
= "sqshl "; *ar
= "16b"; return;
802 case ARM64vecshi_SQSHLU64x2
: *nm
= "sqshlu"; *ar
= "2d"; return;
803 case ARM64vecshi_SQSHLU32x4
: *nm
= "sqshlu"; *ar
= "4s"; return;
804 case ARM64vecshi_SQSHLU16x8
: *nm
= "sqshlu"; *ar
= "8h"; return;
805 case ARM64vecshi_SQSHLU8x16
: *nm
= "sqshlu"; *ar
= "16b"; return;
806 default: vpanic("showARM64VecShiftImmOp");
810 static const HChar
* showARM64VecNarrowOp(ARM64VecNarrowOp op
) {
812 case ARM64vecna_XTN
: return "xtn ";
813 case ARM64vecna_SQXTN
: return "sqxtn ";
814 case ARM64vecna_UQXTN
: return "uqxtn ";
815 case ARM64vecna_SQXTUN
: return "sqxtun";
816 default: vpanic("showARM64VecNarrowOp");
820 ARM64Instr
* ARM64Instr_Arith ( HReg dst
,
821 HReg argL
, ARM64RIA
* argR
, Bool isAdd
) {
822 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
823 i
->tag
= ARM64in_Arith
;
824 i
->ARM64in
.Arith
.dst
= dst
;
825 i
->ARM64in
.Arith
.argL
= argL
;
826 i
->ARM64in
.Arith
.argR
= argR
;
827 i
->ARM64in
.Arith
.isAdd
= isAdd
;
830 ARM64Instr
* ARM64Instr_Cmp ( HReg argL
, ARM64RIA
* argR
, Bool is64
) {
831 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
832 i
->tag
= ARM64in_Cmp
;
833 i
->ARM64in
.Cmp
.argL
= argL
;
834 i
->ARM64in
.Cmp
.argR
= argR
;
835 i
->ARM64in
.Cmp
.is64
= is64
;
838 ARM64Instr
* ARM64Instr_Logic ( HReg dst
,
839 HReg argL
, ARM64RIL
* argR
, ARM64LogicOp op
) {
840 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
841 i
->tag
= ARM64in_Logic
;
842 i
->ARM64in
.Logic
.dst
= dst
;
843 i
->ARM64in
.Logic
.argL
= argL
;
844 i
->ARM64in
.Logic
.argR
= argR
;
845 i
->ARM64in
.Logic
.op
= op
;
848 ARM64Instr
* ARM64Instr_Test ( HReg argL
, ARM64RIL
* argR
) {
849 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
850 i
->tag
= ARM64in_Test
;
851 i
->ARM64in
.Test
.argL
= argL
;
852 i
->ARM64in
.Test
.argR
= argR
;
855 ARM64Instr
* ARM64Instr_Shift ( HReg dst
,
856 HReg argL
, ARM64RI6
* argR
, ARM64ShiftOp op
) {
857 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
858 i
->tag
= ARM64in_Shift
;
859 i
->ARM64in
.Shift
.dst
= dst
;
860 i
->ARM64in
.Shift
.argL
= argL
;
861 i
->ARM64in
.Shift
.argR
= argR
;
862 i
->ARM64in
.Shift
.op
= op
;
865 ARM64Instr
* ARM64Instr_Unary ( HReg dst
, HReg src
, ARM64UnaryOp op
) {
866 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
867 i
->tag
= ARM64in_Unary
;
868 i
->ARM64in
.Unary
.dst
= dst
;
869 i
->ARM64in
.Unary
.src
= src
;
870 i
->ARM64in
.Unary
.op
= op
;
873 ARM64Instr
* ARM64Instr_Set64 ( HReg dst
, ARM64CondCode cond
) {
874 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
875 i
->tag
= ARM64in_Set64
;
876 i
->ARM64in
.Set64
.dst
= dst
;
877 i
->ARM64in
.Set64
.cond
= cond
;
880 ARM64Instr
* ARM64Instr_MovI ( HReg dst
, HReg src
) {
881 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
882 i
->tag
= ARM64in_MovI
;
883 i
->ARM64in
.MovI
.dst
= dst
;
884 i
->ARM64in
.MovI
.src
= src
;
885 vassert(hregClass(src
) == HRcInt64
);
886 vassert(hregClass(dst
) == HRcInt64
);
889 ARM64Instr
* ARM64Instr_Imm64 ( HReg dst
, ULong imm64
) {
890 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
891 i
->tag
= ARM64in_Imm64
;
892 i
->ARM64in
.Imm64
.dst
= dst
;
893 i
->ARM64in
.Imm64
.imm64
= imm64
;
896 ARM64Instr
* ARM64Instr_LdSt64 ( Bool isLoad
, HReg rD
, ARM64AMode
* amode
) {
897 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
898 i
->tag
= ARM64in_LdSt64
;
899 i
->ARM64in
.LdSt64
.isLoad
= isLoad
;
900 i
->ARM64in
.LdSt64
.rD
= rD
;
901 i
->ARM64in
.LdSt64
.amode
= amode
;
904 ARM64Instr
* ARM64Instr_LdSt32 ( Bool isLoad
, HReg rD
, ARM64AMode
* amode
) {
905 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
906 i
->tag
= ARM64in_LdSt32
;
907 i
->ARM64in
.LdSt32
.isLoad
= isLoad
;
908 i
->ARM64in
.LdSt32
.rD
= rD
;
909 i
->ARM64in
.LdSt32
.amode
= amode
;
912 ARM64Instr
* ARM64Instr_LdSt16 ( Bool isLoad
, HReg rD
, ARM64AMode
* amode
) {
913 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
914 i
->tag
= ARM64in_LdSt16
;
915 i
->ARM64in
.LdSt16
.isLoad
= isLoad
;
916 i
->ARM64in
.LdSt16
.rD
= rD
;
917 i
->ARM64in
.LdSt16
.amode
= amode
;
920 ARM64Instr
* ARM64Instr_LdSt8 ( Bool isLoad
, HReg rD
, ARM64AMode
* amode
) {
921 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
922 i
->tag
= ARM64in_LdSt8
;
923 i
->ARM64in
.LdSt8
.isLoad
= isLoad
;
924 i
->ARM64in
.LdSt8
.rD
= rD
;
925 i
->ARM64in
.LdSt8
.amode
= amode
;
928 ARM64Instr
* ARM64Instr_XDirect ( Addr64 dstGA
, ARM64AMode
* amPC
,
929 ARM64CondCode cond
, Bool toFastEP
) {
930 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
931 i
->tag
= ARM64in_XDirect
;
932 i
->ARM64in
.XDirect
.dstGA
= dstGA
;
933 i
->ARM64in
.XDirect
.amPC
= amPC
;
934 i
->ARM64in
.XDirect
.cond
= cond
;
935 i
->ARM64in
.XDirect
.toFastEP
= toFastEP
;
938 ARM64Instr
* ARM64Instr_XIndir ( HReg dstGA
, ARM64AMode
* amPC
,
939 ARM64CondCode cond
) {
940 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
941 i
->tag
= ARM64in_XIndir
;
942 i
->ARM64in
.XIndir
.dstGA
= dstGA
;
943 i
->ARM64in
.XIndir
.amPC
= amPC
;
944 i
->ARM64in
.XIndir
.cond
= cond
;
947 ARM64Instr
* ARM64Instr_XAssisted ( HReg dstGA
, ARM64AMode
* amPC
,
948 ARM64CondCode cond
, IRJumpKind jk
) {
949 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
950 i
->tag
= ARM64in_XAssisted
;
951 i
->ARM64in
.XAssisted
.dstGA
= dstGA
;
952 i
->ARM64in
.XAssisted
.amPC
= amPC
;
953 i
->ARM64in
.XAssisted
.cond
= cond
;
954 i
->ARM64in
.XAssisted
.jk
= jk
;
957 ARM64Instr
* ARM64Instr_CSel ( HReg dst
, HReg argL
, HReg argR
,
958 ARM64CondCode cond
) {
959 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
960 i
->tag
= ARM64in_CSel
;
961 i
->ARM64in
.CSel
.dst
= dst
;
962 i
->ARM64in
.CSel
.argL
= argL
;
963 i
->ARM64in
.CSel
.argR
= argR
;
964 i
->ARM64in
.CSel
.cond
= cond
;
967 ARM64Instr
* ARM64Instr_Call ( ARM64CondCode cond
, Addr64 target
, Int nArgRegs
,
969 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
970 i
->tag
= ARM64in_Call
;
971 i
->ARM64in
.Call
.cond
= cond
;
972 i
->ARM64in
.Call
.target
= target
;
973 i
->ARM64in
.Call
.nArgRegs
= nArgRegs
;
974 i
->ARM64in
.Call
.rloc
= rloc
;
975 vassert(is_sane_RetLoc(rloc
));
978 extern ARM64Instr
* ARM64Instr_AddToSP ( Int simm
) {
979 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
980 i
->tag
= ARM64in_AddToSP
;
981 i
->ARM64in
.AddToSP
.simm
= simm
;
982 vassert(-4096 < simm
&& simm
< 4096);
983 vassert(0 == (simm
& 0xF));
986 extern ARM64Instr
* ARM64Instr_FromSP ( HReg dst
) {
987 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
988 i
->tag
= ARM64in_FromSP
;
989 i
->ARM64in
.FromSP
.dst
= dst
;
992 ARM64Instr
* ARM64Instr_Mul ( HReg dst
, HReg argL
, HReg argR
,
994 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
995 i
->tag
= ARM64in_Mul
;
996 i
->ARM64in
.Mul
.dst
= dst
;
997 i
->ARM64in
.Mul
.argL
= argL
;
998 i
->ARM64in
.Mul
.argR
= argR
;
999 i
->ARM64in
.Mul
.op
= op
;
1002 ARM64Instr
* ARM64Instr_LdrEX ( Int szB
) {
1003 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1004 i
->tag
= ARM64in_LdrEX
;
1005 i
->ARM64in
.LdrEX
.szB
= szB
;
1006 vassert(szB
== 8 || szB
== 4 || szB
== 2 || szB
== 1);
1009 ARM64Instr
* ARM64Instr_StrEX ( Int szB
) {
1010 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1011 i
->tag
= ARM64in_StrEX
;
1012 i
->ARM64in
.StrEX
.szB
= szB
;
1013 vassert(szB
== 8 || szB
== 4 || szB
== 2 || szB
== 1);
1016 ARM64Instr
* ARM64Instr_CAS ( Int szB
) {
1017 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1018 i
->tag
= ARM64in_CAS
;
1019 i
->ARM64in
.CAS
.szB
= szB
;
1020 vassert(szB
== 8 || szB
== 4 || szB
== 2 || szB
== 1);
1023 ARM64Instr
* ARM64Instr_CASP ( Int szB
) {
1024 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1025 i
->tag
= ARM64in_CASP
;
1026 i
->ARM64in
.CASP
.szB
= szB
;
1027 vassert(szB
== 8 || szB
== 4);
1030 ARM64Instr
* ARM64Instr_MFence ( void ) {
1031 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1032 i
->tag
= ARM64in_MFence
;
1035 ARM64Instr
* ARM64Instr_ClrEX ( void ) {
1036 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1037 i
->tag
= ARM64in_ClrEX
;
1040 ARM64Instr
* ARM64Instr_VLdStH ( Bool isLoad
, HReg sD
, HReg rN
, UInt uimm12
) {
1041 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1042 i
->tag
= ARM64in_VLdStH
;
1043 i
->ARM64in
.VLdStH
.isLoad
= isLoad
;
1044 i
->ARM64in
.VLdStH
.hD
= sD
;
1045 i
->ARM64in
.VLdStH
.rN
= rN
;
1046 i
->ARM64in
.VLdStH
.uimm12
= uimm12
;
1047 vassert(uimm12
< 8192 && 0 == (uimm12
& 1));
1050 ARM64Instr
* ARM64Instr_VLdStS ( Bool isLoad
, HReg sD
, HReg rN
, UInt uimm12
) {
1051 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1052 i
->tag
= ARM64in_VLdStS
;
1053 i
->ARM64in
.VLdStS
.isLoad
= isLoad
;
1054 i
->ARM64in
.VLdStS
.sD
= sD
;
1055 i
->ARM64in
.VLdStS
.rN
= rN
;
1056 i
->ARM64in
.VLdStS
.uimm12
= uimm12
;
1057 vassert(uimm12
< 16384 && 0 == (uimm12
& 3));
1060 ARM64Instr
* ARM64Instr_VLdStD ( Bool isLoad
, HReg dD
, HReg rN
, UInt uimm12
) {
1061 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1062 i
->tag
= ARM64in_VLdStD
;
1063 i
->ARM64in
.VLdStD
.isLoad
= isLoad
;
1064 i
->ARM64in
.VLdStD
.dD
= dD
;
1065 i
->ARM64in
.VLdStD
.rN
= rN
;
1066 i
->ARM64in
.VLdStD
.uimm12
= uimm12
;
1067 vassert(uimm12
< 32768 && 0 == (uimm12
& 7));
1070 ARM64Instr
* ARM64Instr_VLdStQ ( Bool isLoad
, HReg rQ
, HReg rN
) {
1071 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1072 i
->tag
= ARM64in_VLdStQ
;
1073 i
->ARM64in
.VLdStQ
.isLoad
= isLoad
;
1074 i
->ARM64in
.VLdStQ
.rQ
= rQ
;
1075 i
->ARM64in
.VLdStQ
.rN
= rN
;
1078 ARM64Instr
* ARM64Instr_VCvtI2F ( ARM64CvtOp how
, HReg rD
, HReg rS
) {
1079 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1080 i
->tag
= ARM64in_VCvtI2F
;
1081 i
->ARM64in
.VCvtI2F
.how
= how
;
1082 i
->ARM64in
.VCvtI2F
.rD
= rD
;
1083 i
->ARM64in
.VCvtI2F
.rS
= rS
;
1086 ARM64Instr
* ARM64Instr_VCvtF2I ( ARM64CvtOp how
, HReg rD
, HReg rS
,
1088 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1089 i
->tag
= ARM64in_VCvtF2I
;
1090 i
->ARM64in
.VCvtF2I
.how
= how
;
1091 i
->ARM64in
.VCvtF2I
.rD
= rD
;
1092 i
->ARM64in
.VCvtF2I
.rS
= rS
;
1093 i
->ARM64in
.VCvtF2I
.armRM
= armRM
;
1094 vassert(armRM
<= 3);
1097 ARM64Instr
* ARM64Instr_VCvtSD ( Bool sToD
, HReg dst
, HReg src
) {
1098 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1099 i
->tag
= ARM64in_VCvtSD
;
1100 i
->ARM64in
.VCvtSD
.sToD
= sToD
;
1101 i
->ARM64in
.VCvtSD
.dst
= dst
;
1102 i
->ARM64in
.VCvtSD
.src
= src
;
1105 ARM64Instr
* ARM64Instr_VCvtHS ( Bool hToS
, HReg dst
, HReg src
) {
1106 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1107 i
->tag
= ARM64in_VCvtHS
;
1108 i
->ARM64in
.VCvtHS
.hToS
= hToS
;
1109 i
->ARM64in
.VCvtHS
.dst
= dst
;
1110 i
->ARM64in
.VCvtHS
.src
= src
;
1113 ARM64Instr
* ARM64Instr_VCvtHD ( Bool hToD
, HReg dst
, HReg src
) {
1114 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1115 i
->tag
= ARM64in_VCvtHD
;
1116 i
->ARM64in
.VCvtHD
.hToD
= hToD
;
1117 i
->ARM64in
.VCvtHD
.dst
= dst
;
1118 i
->ARM64in
.VCvtHD
.src
= src
;
1121 ARM64Instr
* ARM64Instr_VUnaryD ( ARM64FpUnaryOp op
, HReg dst
, HReg src
) {
1122 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1123 i
->tag
= ARM64in_VUnaryD
;
1124 i
->ARM64in
.VUnaryD
.op
= op
;
1125 i
->ARM64in
.VUnaryD
.dst
= dst
;
1126 i
->ARM64in
.VUnaryD
.src
= src
;
1129 ARM64Instr
* ARM64Instr_VUnaryS ( ARM64FpUnaryOp op
, HReg dst
, HReg src
) {
1130 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1131 i
->tag
= ARM64in_VUnaryS
;
1132 i
->ARM64in
.VUnaryS
.op
= op
;
1133 i
->ARM64in
.VUnaryS
.dst
= dst
;
1134 i
->ARM64in
.VUnaryS
.src
= src
;
1137 ARM64Instr
* ARM64Instr_VBinD ( ARM64FpBinOp op
,
1138 HReg dst
, HReg argL
, HReg argR
) {
1139 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1140 i
->tag
= ARM64in_VBinD
;
1141 i
->ARM64in
.VBinD
.op
= op
;
1142 i
->ARM64in
.VBinD
.dst
= dst
;
1143 i
->ARM64in
.VBinD
.argL
= argL
;
1144 i
->ARM64in
.VBinD
.argR
= argR
;
1147 ARM64Instr
* ARM64Instr_VBinS ( ARM64FpBinOp op
,
1148 HReg dst
, HReg argL
, HReg argR
) {
1149 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1150 i
->tag
= ARM64in_VBinS
;
1151 i
->ARM64in
.VBinS
.op
= op
;
1152 i
->ARM64in
.VBinS
.dst
= dst
;
1153 i
->ARM64in
.VBinS
.argL
= argL
;
1154 i
->ARM64in
.VBinS
.argR
= argR
;
1157 ARM64Instr
* ARM64Instr_VCmpD ( HReg argL
, HReg argR
) {
1158 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1159 i
->tag
= ARM64in_VCmpD
;
1160 i
->ARM64in
.VCmpD
.argL
= argL
;
1161 i
->ARM64in
.VCmpD
.argR
= argR
;
1164 ARM64Instr
* ARM64Instr_VCmpS ( HReg argL
, HReg argR
) {
1165 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1166 i
->tag
= ARM64in_VCmpS
;
1167 i
->ARM64in
.VCmpS
.argL
= argL
;
1168 i
->ARM64in
.VCmpS
.argR
= argR
;
1171 ARM64Instr
* ARM64Instr_VFCSel ( HReg dst
, HReg argL
, HReg argR
,
1172 ARM64CondCode cond
, Bool isD
) {
1173 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1174 i
->tag
= ARM64in_VFCSel
;
1175 i
->ARM64in
.VFCSel
.dst
= dst
;
1176 i
->ARM64in
.VFCSel
.argL
= argL
;
1177 i
->ARM64in
.VFCSel
.argR
= argR
;
1178 i
->ARM64in
.VFCSel
.cond
= cond
;
1179 i
->ARM64in
.VFCSel
.isD
= isD
;
1182 ARM64Instr
* ARM64Instr_FPCR ( Bool toFPCR
, HReg iReg
) {
1183 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1184 i
->tag
= ARM64in_FPCR
;
1185 i
->ARM64in
.FPCR
.toFPCR
= toFPCR
;
1186 i
->ARM64in
.FPCR
.iReg
= iReg
;
1189 ARM64Instr
* ARM64Instr_FPSR ( Bool toFPSR
, HReg iReg
) {
1190 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1191 i
->tag
= ARM64in_FPSR
;
1192 i
->ARM64in
.FPSR
.toFPSR
= toFPSR
;
1193 i
->ARM64in
.FPSR
.iReg
= iReg
;
1196 ARM64Instr
* ARM64Instr_VBinV ( ARM64VecBinOp op
,
1197 HReg dst
, HReg argL
, HReg argR
) {
1198 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1199 i
->tag
= ARM64in_VBinV
;
1200 i
->ARM64in
.VBinV
.op
= op
;
1201 i
->ARM64in
.VBinV
.dst
= dst
;
1202 i
->ARM64in
.VBinV
.argL
= argL
;
1203 i
->ARM64in
.VBinV
.argR
= argR
;
1206 ARM64Instr
* ARM64Instr_VModifyV ( ARM64VecModifyOp op
, HReg mod
, HReg arg
) {
1207 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1208 i
->tag
= ARM64in_VModifyV
;
1209 i
->ARM64in
.VModifyV
.op
= op
;
1210 i
->ARM64in
.VModifyV
.mod
= mod
;
1211 i
->ARM64in
.VModifyV
.arg
= arg
;
1214 ARM64Instr
* ARM64Instr_VUnaryV ( ARM64VecUnaryOp op
, HReg dst
, HReg arg
) {
1215 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1216 i
->tag
= ARM64in_VUnaryV
;
1217 i
->ARM64in
.VUnaryV
.op
= op
;
1218 i
->ARM64in
.VUnaryV
.dst
= dst
;
1219 i
->ARM64in
.VUnaryV
.arg
= arg
;
1222 ARM64Instr
* ARM64Instr_VNarrowV ( ARM64VecNarrowOp op
,
1223 UInt dszBlg2
, HReg dst
, HReg src
) {
1224 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1225 i
->tag
= ARM64in_VNarrowV
;
1226 i
->ARM64in
.VNarrowV
.op
= op
;
1227 i
->ARM64in
.VNarrowV
.dszBlg2
= dszBlg2
;
1228 i
->ARM64in
.VNarrowV
.dst
= dst
;
1229 i
->ARM64in
.VNarrowV
.src
= src
;
1230 vassert(dszBlg2
== 0 || dszBlg2
== 1 || dszBlg2
== 2);
1233 ARM64Instr
* ARM64Instr_VShiftImmV ( ARM64VecShiftImmOp op
,
1234 HReg dst
, HReg src
, UInt amt
) {
1235 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1236 i
->tag
= ARM64in_VShiftImmV
;
1237 i
->ARM64in
.VShiftImmV
.op
= op
;
1238 i
->ARM64in
.VShiftImmV
.dst
= dst
;
1239 i
->ARM64in
.VShiftImmV
.src
= src
;
1240 i
->ARM64in
.VShiftImmV
.amt
= amt
;
1244 /* For right shifts, the allowed shift amounts are 1 .. lane_size.
1245 For left shifts, the allowed shift amounts are 0 .. lane_size-1.
1247 case ARM64vecshi_USHR64x2
: case ARM64vecshi_SSHR64x2
:
1248 case ARM64vecshi_UQSHRN2SD
: case ARM64vecshi_SQSHRN2SD
:
1249 case ARM64vecshi_SQSHRUN2SD
:
1250 case ARM64vecshi_UQRSHRN2SD
: case ARM64vecshi_SQRSHRN2SD
:
1251 case ARM64vecshi_SQRSHRUN2SD
:
1252 minSh
= 1; maxSh
= 64; break;
1253 case ARM64vecshi_SHL64x2
:
1254 case ARM64vecshi_UQSHL64x2
: case ARM64vecshi_SQSHL64x2
:
1255 case ARM64vecshi_SQSHLU64x2
:
1256 minSh
= 0; maxSh
= 63; break;
1257 case ARM64vecshi_USHR32x4
: case ARM64vecshi_SSHR32x4
:
1258 case ARM64vecshi_UQSHRN4HS
: case ARM64vecshi_SQSHRN4HS
:
1259 case ARM64vecshi_SQSHRUN4HS
:
1260 case ARM64vecshi_UQRSHRN4HS
: case ARM64vecshi_SQRSHRN4HS
:
1261 case ARM64vecshi_SQRSHRUN4HS
:
1262 minSh
= 1; maxSh
= 32; break;
1263 case ARM64vecshi_SHL32x4
:
1264 case ARM64vecshi_UQSHL32x4
: case ARM64vecshi_SQSHL32x4
:
1265 case ARM64vecshi_SQSHLU32x4
:
1266 minSh
= 0; maxSh
= 31; break;
1267 case ARM64vecshi_USHR16x8
: case ARM64vecshi_SSHR16x8
:
1268 case ARM64vecshi_UQSHRN8BH
: case ARM64vecshi_SQSHRN8BH
:
1269 case ARM64vecshi_SQSHRUN8BH
:
1270 case ARM64vecshi_UQRSHRN8BH
: case ARM64vecshi_SQRSHRN8BH
:
1271 case ARM64vecshi_SQRSHRUN8BH
:
1272 minSh
= 1; maxSh
= 16; break;
1273 case ARM64vecshi_SHL16x8
:
1274 case ARM64vecshi_UQSHL16x8
: case ARM64vecshi_SQSHL16x8
:
1275 case ARM64vecshi_SQSHLU16x8
:
1276 minSh
= 0; maxSh
= 15; break;
1277 case ARM64vecshi_USHR8x16
: case ARM64vecshi_SSHR8x16
:
1278 minSh
= 1; maxSh
= 8; break;
1279 case ARM64vecshi_SHL8x16
:
1280 case ARM64vecshi_UQSHL8x16
: case ARM64vecshi_SQSHL8x16
:
1281 case ARM64vecshi_SQSHLU8x16
:
1282 minSh
= 0; maxSh
= 7; break;
1287 vassert(amt
>= minSh
&& amt
<= maxSh
);
1290 ARM64Instr
* ARM64Instr_VExtV ( HReg dst
, HReg srcLo
, HReg srcHi
, UInt amtB
) {
1291 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1292 i
->tag
= ARM64in_VExtV
;
1293 i
->ARM64in
.VExtV
.dst
= dst
;
1294 i
->ARM64in
.VExtV
.srcLo
= srcLo
;
1295 i
->ARM64in
.VExtV
.srcHi
= srcHi
;
1296 i
->ARM64in
.VExtV
.amtB
= amtB
;
1297 vassert(amtB
>= 1 && amtB
<= 15);
1300 ARM64Instr
* ARM64Instr_VImmQ (HReg rQ
, UShort imm
) {
1301 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1302 i
->tag
= ARM64in_VImmQ
;
1303 i
->ARM64in
.VImmQ
.rQ
= rQ
;
1304 i
->ARM64in
.VImmQ
.imm
= imm
;
1305 /* Check that this is something that can actually be emitted. */
1307 case 0x0000: case 0x0001: case 0x0003:
1308 case 0x000F: case 0x003F: case 0x00FF: case 0xFFFF:
1315 ARM64Instr
* ARM64Instr_VDfromX ( HReg rD
, HReg rX
) {
1316 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1317 i
->tag
= ARM64in_VDfromX
;
1318 i
->ARM64in
.VDfromX
.rD
= rD
;
1319 i
->ARM64in
.VDfromX
.rX
= rX
;
1322 ARM64Instr
* ARM64Instr_VQfromX ( HReg rQ
, HReg rXlo
) {
1323 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1324 i
->tag
= ARM64in_VQfromX
;
1325 i
->ARM64in
.VQfromX
.rQ
= rQ
;
1326 i
->ARM64in
.VQfromX
.rXlo
= rXlo
;
1329 ARM64Instr
* ARM64Instr_VQfromXX ( HReg rQ
, HReg rXhi
, HReg rXlo
) {
1330 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1331 i
->tag
= ARM64in_VQfromXX
;
1332 i
->ARM64in
.VQfromXX
.rQ
= rQ
;
1333 i
->ARM64in
.VQfromXX
.rXhi
= rXhi
;
1334 i
->ARM64in
.VQfromXX
.rXlo
= rXlo
;
1337 ARM64Instr
* ARM64Instr_VXfromQ ( HReg rX
, HReg rQ
, UInt laneNo
) {
1338 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1339 i
->tag
= ARM64in_VXfromQ
;
1340 i
->ARM64in
.VXfromQ
.rX
= rX
;
1341 i
->ARM64in
.VXfromQ
.rQ
= rQ
;
1342 i
->ARM64in
.VXfromQ
.laneNo
= laneNo
;
1343 vassert(laneNo
<= 1);
1346 ARM64Instr
* ARM64Instr_VXfromDorS ( HReg rX
, HReg rDorS
, Bool fromD
) {
1347 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1348 i
->tag
= ARM64in_VXfromDorS
;
1349 i
->ARM64in
.VXfromDorS
.rX
= rX
;
1350 i
->ARM64in
.VXfromDorS
.rDorS
= rDorS
;
1351 i
->ARM64in
.VXfromDorS
.fromD
= fromD
;
1354 ARM64Instr
* ARM64Instr_VMov ( UInt szB
, HReg dst
, HReg src
) {
1355 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1356 i
->tag
= ARM64in_VMov
;
1357 i
->ARM64in
.VMov
.szB
= szB
;
1358 i
->ARM64in
.VMov
.dst
= dst
;
1359 i
->ARM64in
.VMov
.src
= src
;
1362 vassert(hregClass(src
) == HRcVec128
);
1363 vassert(hregClass(dst
) == HRcVec128
);
1366 vassert(hregClass(src
) == HRcFlt64
);
1367 vassert(hregClass(dst
) == HRcFlt64
);
1370 vpanic("ARM64Instr_VMov");
1374 ARM64Instr
* ARM64Instr_EvCheck ( ARM64AMode
* amCounter
,
1375 ARM64AMode
* amFailAddr
) {
1376 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1377 i
->tag
= ARM64in_EvCheck
;
1378 i
->ARM64in
.EvCheck
.amCounter
= amCounter
;
1379 i
->ARM64in
.EvCheck
.amFailAddr
= amFailAddr
;
1382 ARM64Instr
* ARM64Instr_ProfInc ( void ) {
1383 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1384 i
->tag
= ARM64in_ProfInc
;
1390 void ppARM64Instr ( const ARM64Instr
* i
) {
1393 vex_printf("%s ", i
->ARM64in
.Arith
.isAdd
? "add" : "sub");
1394 ppHRegARM64(i
->ARM64in
.Arith
.dst
);
1396 ppHRegARM64(i
->ARM64in
.Arith
.argL
);
1398 ppARM64RIA(i
->ARM64in
.Arith
.argR
);
1401 vex_printf("cmp%s ", i
->ARM64in
.Cmp
.is64
? " " : "(w)" );
1402 ppHRegARM64(i
->ARM64in
.Cmp
.argL
);
1404 ppARM64RIA(i
->ARM64in
.Cmp
.argR
);
1407 vex_printf("%s ", showARM64LogicOp(i
->ARM64in
.Logic
.op
));
1408 ppHRegARM64(i
->ARM64in
.Logic
.dst
);
1410 ppHRegARM64(i
->ARM64in
.Logic
.argL
);
1412 ppARM64RIL(i
->ARM64in
.Logic
.argR
);
1416 ppHRegARM64(i
->ARM64in
.Test
.argL
);
1418 ppARM64RIL(i
->ARM64in
.Test
.argR
);
1421 vex_printf("%s ", showARM64ShiftOp(i
->ARM64in
.Shift
.op
));
1422 ppHRegARM64(i
->ARM64in
.Shift
.dst
);
1424 ppHRegARM64(i
->ARM64in
.Shift
.argL
);
1426 ppARM64RI6(i
->ARM64in
.Shift
.argR
);
1429 vex_printf("%s ", showARM64UnaryOp(i
->ARM64in
.Unary
.op
));
1430 ppHRegARM64(i
->ARM64in
.Unary
.dst
);
1432 ppHRegARM64(i
->ARM64in
.Unary
.src
);
1435 vex_printf("cset ");
1436 ppHRegARM64(i
->ARM64in
.Set64
.dst
);
1437 vex_printf(", %s", showARM64CondCode(i
->ARM64in
.Set64
.cond
));
1441 ppHRegARM64(i
->ARM64in
.MovI
.dst
);
1443 ppHRegARM64(i
->ARM64in
.MovI
.src
);
1446 vex_printf("imm64 ");
1447 ppHRegARM64(i
->ARM64in
.Imm64
.dst
);
1448 vex_printf(", 0x%llx", i
->ARM64in
.Imm64
.imm64
);
1450 case ARM64in_LdSt64
:
1451 if (i
->ARM64in
.LdSt64
.isLoad
) {
1453 ppHRegARM64(i
->ARM64in
.LdSt64
.rD
);
1455 ppARM64AMode(i
->ARM64in
.LdSt64
.amode
);
1458 ppARM64AMode(i
->ARM64in
.LdSt64
.amode
);
1460 ppHRegARM64(i
->ARM64in
.LdSt64
.rD
);
1463 case ARM64in_LdSt32
:
1464 if (i
->ARM64in
.LdSt32
.isLoad
) {
1465 vex_printf("ldruw ");
1466 ppHRegARM64(i
->ARM64in
.LdSt32
.rD
);
1468 ppARM64AMode(i
->ARM64in
.LdSt32
.amode
);
1470 vex_printf("strw ");
1471 ppARM64AMode(i
->ARM64in
.LdSt32
.amode
);
1473 ppHRegARM64(i
->ARM64in
.LdSt32
.rD
);
1476 case ARM64in_LdSt16
:
1477 if (i
->ARM64in
.LdSt16
.isLoad
) {
1478 vex_printf("ldruh ");
1479 ppHRegARM64(i
->ARM64in
.LdSt16
.rD
);
1481 ppARM64AMode(i
->ARM64in
.LdSt16
.amode
);
1483 vex_printf("strh ");
1484 ppARM64AMode(i
->ARM64in
.LdSt16
.amode
);
1486 ppHRegARM64(i
->ARM64in
.LdSt16
.rD
);
1490 if (i
->ARM64in
.LdSt8
.isLoad
) {
1491 vex_printf("ldrub ");
1492 ppHRegARM64(i
->ARM64in
.LdSt8
.rD
);
1494 ppARM64AMode(i
->ARM64in
.LdSt8
.amode
);
1496 vex_printf("strb ");
1497 ppARM64AMode(i
->ARM64in
.LdSt8
.amode
);
1499 ppHRegARM64(i
->ARM64in
.LdSt8
.rD
);
1502 case ARM64in_XDirect
:
1503 vex_printf("(xDirect) ");
1504 vex_printf("if (%%pstate.%s) { ",
1505 showARM64CondCode(i
->ARM64in
.XDirect
.cond
));
1506 vex_printf("imm64 x9,0x%llx; ", i
->ARM64in
.XDirect
.dstGA
);
1507 vex_printf("str x9,");
1508 ppARM64AMode(i
->ARM64in
.XDirect
.amPC
);
1509 vex_printf("; imm64-exactly4 x9,$disp_cp_chain_me_to_%sEP; ",
1510 i
->ARM64in
.XDirect
.toFastEP
? "fast" : "slow");
1511 vex_printf("blr x9 }");
1513 case ARM64in_XIndir
:
1514 vex_printf("(xIndir) ");
1515 vex_printf("if (%%pstate.%s) { ",
1516 showARM64CondCode(i
->ARM64in
.XIndir
.cond
));
1518 ppHRegARM64(i
->ARM64in
.XIndir
.dstGA
);
1520 ppARM64AMode(i
->ARM64in
.XIndir
.amPC
);
1521 vex_printf("; imm64 x9,$disp_cp_xindir; ");
1522 vex_printf("br x9 }");
1524 case ARM64in_XAssisted
:
1525 vex_printf("(xAssisted) ");
1526 vex_printf("if (%%pstate.%s) { ",
1527 showARM64CondCode(i
->ARM64in
.XAssisted
.cond
));
1529 ppHRegARM64(i
->ARM64in
.XAssisted
.dstGA
);
1531 ppARM64AMode(i
->ARM64in
.XAssisted
.amPC
);
1532 vex_printf("; movw x21,$IRJumpKind_to_TRCVAL(%d); ",
1533 (Int
)i
->ARM64in
.XAssisted
.jk
);
1534 vex_printf("imm64 x9,$disp_cp_xassisted; ");
1535 vex_printf("br x9 }");
1538 vex_printf("csel ");
1539 ppHRegARM64(i
->ARM64in
.CSel
.dst
);
1541 ppHRegARM64(i
->ARM64in
.CSel
.argL
);
1543 ppHRegARM64(i
->ARM64in
.CSel
.argR
);
1544 vex_printf(", %s", showARM64CondCode(i
->ARM64in
.CSel
.cond
));
1547 vex_printf("call%s ",
1548 i
->ARM64in
.Call
.cond
==ARM64cc_AL
1549 ? " " : showARM64CondCode(i
->ARM64in
.Call
.cond
));
1550 vex_printf("0x%llx [nArgRegs=%d, ",
1551 i
->ARM64in
.Call
.target
, i
->ARM64in
.Call
.nArgRegs
);
1552 ppRetLoc(i
->ARM64in
.Call
.rloc
);
1555 case ARM64in_AddToSP
: {
1556 Int simm
= i
->ARM64in
.AddToSP
.simm
;
1557 vex_printf("%s xsp, xsp, #%d", simm
< 0 ? "sub" : "add",
1558 simm
< 0 ? -simm
: simm
);
1561 case ARM64in_FromSP
:
1563 ppHRegARM64(i
->ARM64in
.FromSP
.dst
);
1564 vex_printf(", xsp");
1567 vex_printf("%s ", showARM64MulOp(i
->ARM64in
.Mul
.op
));
1568 ppHRegARM64(i
->ARM64in
.Mul
.dst
);
1570 ppHRegARM64(i
->ARM64in
.Mul
.argL
);
1572 ppHRegARM64(i
->ARM64in
.Mul
.argR
);
1575 case ARM64in_LdrEX
: {
1576 const HChar
* sz
= " ";
1577 switch (i
->ARM64in
.LdrEX
.szB
) {
1578 case 1: sz
= "b"; break;
1579 case 2: sz
= "h"; break;
1580 case 4: case 8: break;
1581 default: vassert(0);
1583 vex_printf("ldxr%s %c2, [x4]",
1584 sz
, i
->ARM64in
.LdrEX
.szB
== 8 ? 'x' : 'w');
1587 case ARM64in_StrEX
: {
1588 const HChar
* sz
= " ";
1589 switch (i
->ARM64in
.StrEX
.szB
) {
1590 case 1: sz
= "b"; break;
1591 case 2: sz
= "h"; break;
1592 case 4: case 8: break;
1593 default: vassert(0);
1595 vex_printf("stxr%s w0, %c2, [x4]",
1596 sz
, i
->ARM64in
.StrEX
.szB
== 8 ? 'x' : 'w');
1600 vex_printf("x1 = cas(%dbit)(x3, x5 -> x7)", 8 * i
->ARM64in
.CAS
.szB
);
1603 case ARM64in_CASP
: {
1604 vex_printf("x0,x1 = casp(%dbit)(x2, x4,x5 -> x6,x7)", 8 * i
->ARM64in
.CASP
.szB
);
1607 case ARM64in_MFence
:
1608 vex_printf("(mfence) dsb sy; dmb sy; isb");
1611 vex_printf("clrex #15");
1613 case ARM64in_VLdStH
:
1614 if (i
->ARM64in
.VLdStH
.isLoad
) {
1616 ppHRegARM64asHreg(i
->ARM64in
.VLdStH
.hD
);
1617 vex_printf(", %u(", i
->ARM64in
.VLdStH
.uimm12
);
1618 ppHRegARM64(i
->ARM64in
.VLdStH
.rN
);
1622 vex_printf("%u(", i
->ARM64in
.VLdStH
.uimm12
);
1623 ppHRegARM64(i
->ARM64in
.VLdStH
.rN
);
1625 ppHRegARM64asHreg(i
->ARM64in
.VLdStH
.hD
);
1628 case ARM64in_VLdStS
:
1629 if (i
->ARM64in
.VLdStS
.isLoad
) {
1631 ppHRegARM64asSreg(i
->ARM64in
.VLdStS
.sD
);
1632 vex_printf(", %u(", i
->ARM64in
.VLdStS
.uimm12
);
1633 ppHRegARM64(i
->ARM64in
.VLdStS
.rN
);
1637 vex_printf("%u(", i
->ARM64in
.VLdStS
.uimm12
);
1638 ppHRegARM64(i
->ARM64in
.VLdStS
.rN
);
1640 ppHRegARM64asSreg(i
->ARM64in
.VLdStS
.sD
);
1643 case ARM64in_VLdStD
:
1644 if (i
->ARM64in
.VLdStD
.isLoad
) {
1646 ppHRegARM64(i
->ARM64in
.VLdStD
.dD
);
1647 vex_printf(", %u(", i
->ARM64in
.VLdStD
.uimm12
);
1648 ppHRegARM64(i
->ARM64in
.VLdStD
.rN
);
1652 vex_printf("%u(", i
->ARM64in
.VLdStD
.uimm12
);
1653 ppHRegARM64(i
->ARM64in
.VLdStD
.rN
);
1655 ppHRegARM64(i
->ARM64in
.VLdStD
.dD
);
1658 case ARM64in_VLdStQ
:
1659 if (i
->ARM64in
.VLdStQ
.isLoad
)
1660 vex_printf("ld1.2d {");
1662 vex_printf("st1.2d {");
1663 ppHRegARM64(i
->ARM64in
.VLdStQ
.rQ
);
1665 ppHRegARM64(i
->ARM64in
.VLdStQ
.rN
);
1668 case ARM64in_VCvtI2F
: {
1672 characteriseARM64CvtOp(&syn
, &fszB
, &iszB
, i
->ARM64in
.VCvtI2F
.how
);
1673 vex_printf("%ccvtf ", syn
);
1674 ppHRegARM64(i
->ARM64in
.VCvtI2F
.rD
);
1675 vex_printf("(%c-reg), ", fszB
== 4 ? 'S' : 'D');
1676 ppHRegARM64(i
->ARM64in
.VCvtI2F
.rS
);
1677 vex_printf("(%c-reg)", iszB
== 4 ? 'W' : 'X');
1680 case ARM64in_VCvtF2I
: {
1685 characteriseARM64CvtOp(&syn
, &fszB
, &iszB
, i
->ARM64in
.VCvtF2I
.how
);
1686 UChar armRM
= i
->ARM64in
.VCvtF2I
.armRM
;
1687 if (armRM
< 4) rmo
= "npmz"[armRM
];
1688 vex_printf("fcvt%c%c ", rmo
, syn
);
1689 ppHRegARM64(i
->ARM64in
.VCvtF2I
.rD
);
1690 vex_printf("(%c-reg), ", iszB
== 4 ? 'W' : 'X');
1691 ppHRegARM64(i
->ARM64in
.VCvtF2I
.rS
);
1692 vex_printf("(%c-reg)", fszB
== 4 ? 'S' : 'D');
1695 case ARM64in_VCvtSD
:
1696 vex_printf("fcvt%s ", i
->ARM64in
.VCvtSD
.sToD
? "s2d" : "d2s");
1697 if (i
->ARM64in
.VCvtSD
.sToD
) {
1698 ppHRegARM64(i
->ARM64in
.VCvtSD
.dst
);
1700 ppHRegARM64asSreg(i
->ARM64in
.VCvtSD
.src
);
1702 ppHRegARM64asSreg(i
->ARM64in
.VCvtSD
.dst
);
1704 ppHRegARM64(i
->ARM64in
.VCvtSD
.src
);
1707 case ARM64in_VCvtHS
:
1708 vex_printf("fcvt%s ", i
->ARM64in
.VCvtHS
.hToS
? "h2s" : "s2h");
1709 if (i
->ARM64in
.VCvtHS
.hToS
) {
1710 ppHRegARM64asSreg(i
->ARM64in
.VCvtHS
.dst
);
1712 ppHRegARM64asHreg(i
->ARM64in
.VCvtHS
.src
);
1714 ppHRegARM64asHreg(i
->ARM64in
.VCvtHS
.dst
);
1716 ppHRegARM64asSreg(i
->ARM64in
.VCvtHS
.src
);
1719 case ARM64in_VCvtHD
:
1720 vex_printf("fcvt%s ", i
->ARM64in
.VCvtHD
.hToD
? "h2d" : "d2h");
1721 if (i
->ARM64in
.VCvtHD
.hToD
) {
1722 ppHRegARM64(i
->ARM64in
.VCvtHD
.dst
);
1724 ppHRegARM64asHreg(i
->ARM64in
.VCvtHD
.src
);
1726 ppHRegARM64asHreg(i
->ARM64in
.VCvtHD
.dst
);
1728 ppHRegARM64(i
->ARM64in
.VCvtHD
.src
);
1731 case ARM64in_VUnaryD
:
1732 vex_printf("f%s ", showARM64FpUnaryOp(i
->ARM64in
.VUnaryD
.op
));
1733 ppHRegARM64(i
->ARM64in
.VUnaryD
.dst
);
1735 ppHRegARM64(i
->ARM64in
.VUnaryD
.src
);
1737 case ARM64in_VUnaryS
:
1738 vex_printf("f%s ", showARM64FpUnaryOp(i
->ARM64in
.VUnaryS
.op
));
1739 ppHRegARM64asSreg(i
->ARM64in
.VUnaryS
.dst
);
1741 ppHRegARM64asSreg(i
->ARM64in
.VUnaryS
.src
);
1744 vex_printf("f%s ", showARM64FpBinOp(i
->ARM64in
.VBinD
.op
));
1745 ppHRegARM64(i
->ARM64in
.VBinD
.dst
);
1747 ppHRegARM64(i
->ARM64in
.VBinD
.argL
);
1749 ppHRegARM64(i
->ARM64in
.VBinD
.argR
);
1752 vex_printf("f%s ", showARM64FpBinOp(i
->ARM64in
.VBinS
.op
));
1753 ppHRegARM64asSreg(i
->ARM64in
.VBinS
.dst
);
1755 ppHRegARM64asSreg(i
->ARM64in
.VBinS
.argL
);
1757 ppHRegARM64asSreg(i
->ARM64in
.VBinS
.argR
);
1760 vex_printf("fcmp ");
1761 ppHRegARM64(i
->ARM64in
.VCmpD
.argL
);
1763 ppHRegARM64(i
->ARM64in
.VCmpD
.argR
);
1766 vex_printf("fcmp ");
1767 ppHRegARM64asSreg(i
->ARM64in
.VCmpS
.argL
);
1769 ppHRegARM64asSreg(i
->ARM64in
.VCmpS
.argR
);
1771 case ARM64in_VFCSel
: {
1772 UInt (*ppHRegARM64fp
)(HReg
)
1773 = (i
->ARM64in
.VFCSel
.isD
? ppHRegARM64
: ppHRegARM64asSreg
);
1774 vex_printf("fcsel ");
1775 ppHRegARM64fp(i
->ARM64in
.VFCSel
.dst
);
1777 ppHRegARM64fp(i
->ARM64in
.VFCSel
.argL
);
1779 ppHRegARM64fp(i
->ARM64in
.VFCSel
.argR
);
1780 vex_printf(", %s", showARM64CondCode(i
->ARM64in
.VFCSel
.cond
));
1784 if (i
->ARM64in
.FPCR
.toFPCR
) {
1785 vex_printf("msr fpcr, ");
1786 ppHRegARM64(i
->ARM64in
.FPCR
.iReg
);
1789 ppHRegARM64(i
->ARM64in
.FPCR
.iReg
);
1790 vex_printf(", fpcr");
1794 if (i
->ARM64in
.FPSR
.toFPSR
) {
1795 vex_printf("msr fpsr, ");
1796 ppHRegARM64(i
->ARM64in
.FPSR
.iReg
);
1799 ppHRegARM64(i
->ARM64in
.FPSR
.iReg
);
1800 vex_printf(", fpsr");
1803 case ARM64in_VBinV
: {
1804 const HChar
* nm
= "??";
1805 const HChar
* ar
= "??";
1806 showARM64VecBinOp(&nm
, &ar
, i
->ARM64in
.VBinV
.op
);
1807 vex_printf("%s ", nm
);
1808 ppHRegARM64(i
->ARM64in
.VBinV
.dst
);
1809 vex_printf(".%s, ", ar
);
1810 ppHRegARM64(i
->ARM64in
.VBinV
.argL
);
1811 vex_printf(".%s, ", ar
);
1812 ppHRegARM64(i
->ARM64in
.VBinV
.argR
);
1813 vex_printf(".%s", ar
);
1816 case ARM64in_VModifyV
: {
1817 const HChar
* nm
= "??";
1818 const HChar
* ar
= "??";
1819 showARM64VecModifyOp(&nm
, &ar
, i
->ARM64in
.VModifyV
.op
);
1820 vex_printf("%s ", nm
);
1821 ppHRegARM64(i
->ARM64in
.VModifyV
.mod
);
1822 vex_printf(".%s, ", ar
);
1823 ppHRegARM64(i
->ARM64in
.VModifyV
.arg
);
1824 vex_printf(".%s", ar
);
1827 case ARM64in_VUnaryV
: {
1828 const HChar
* nm
= "??";
1829 const HChar
* ar
= "??";
1830 showARM64VecUnaryOp(&nm
, &ar
, i
->ARM64in
.VUnaryV
.op
);
1831 vex_printf("%s ", nm
);
1832 ppHRegARM64(i
->ARM64in
.VUnaryV
.dst
);
1833 vex_printf(".%s, ", ar
);
1834 ppHRegARM64(i
->ARM64in
.VUnaryV
.arg
);
1835 vex_printf(".%s", ar
);
1838 case ARM64in_VNarrowV
: {
1839 UInt dszBlg2
= i
->ARM64in
.VNarrowV
.dszBlg2
;
1840 const HChar
* darr
[3] = { "8b", "4h", "2s" };
1841 const HChar
* sarr
[3] = { "8h", "4s", "2d" };
1842 const HChar
* nm
= showARM64VecNarrowOp(i
->ARM64in
.VNarrowV
.op
);
1843 vex_printf("%s ", nm
);
1844 ppHRegARM64(i
->ARM64in
.VNarrowV
.dst
);
1845 vex_printf(".%s, ", dszBlg2
< 3 ? darr
[dszBlg2
] : "??");
1846 ppHRegARM64(i
->ARM64in
.VNarrowV
.src
);
1847 vex_printf(".%s", dszBlg2
< 3 ? sarr
[dszBlg2
] : "??");
1850 case ARM64in_VShiftImmV
: {
1851 const HChar
* nm
= "??";
1852 const HChar
* ar
= "??";
1853 showARM64VecShiftImmOp(&nm
, &ar
, i
->ARM64in
.VShiftImmV
.op
);
1854 vex_printf("%s ", nm
);
1855 ppHRegARM64(i
->ARM64in
.VShiftImmV
.dst
);
1856 vex_printf(".%s, ", ar
);
1857 ppHRegARM64(i
->ARM64in
.VShiftImmV
.src
);
1858 vex_printf(".%s, #%u", ar
, i
->ARM64in
.VShiftImmV
.amt
);
1861 case ARM64in_VExtV
: {
1863 ppHRegARM64(i
->ARM64in
.VExtV
.dst
);
1864 vex_printf(".16b, ");
1865 ppHRegARM64(i
->ARM64in
.VExtV
.srcLo
);
1866 vex_printf(".16b, ");
1867 ppHRegARM64(i
->ARM64in
.VExtV
.srcHi
);
1868 vex_printf(".16b, #%u", i
->ARM64in
.VExtV
.amtB
);
1872 vex_printf("qimm ");
1873 ppHRegARM64(i
->ARM64in
.VImmQ
.rQ
);
1874 vex_printf(", Bits16toBytes16(0x%x)", (UInt
)i
->ARM64in
.VImmQ
.imm
);
1876 case ARM64in_VDfromX
:
1877 vex_printf("fmov ");
1878 ppHRegARM64(i
->ARM64in
.VDfromX
.rD
);
1880 ppHRegARM64(i
->ARM64in
.VDfromX
.rX
);
1882 case ARM64in_VQfromX
:
1883 vex_printf("fmov ");
1884 ppHRegARM64(i
->ARM64in
.VQfromX
.rQ
);
1885 vex_printf(".d[0], ");
1886 ppHRegARM64(i
->ARM64in
.VQfromX
.rXlo
);
1888 case ARM64in_VQfromXX
:
1889 vex_printf("qFromXX ");
1890 ppHRegARM64(i
->ARM64in
.VQfromXX
.rQ
);
1892 ppHRegARM64(i
->ARM64in
.VQfromXX
.rXhi
);
1894 ppHRegARM64(i
->ARM64in
.VQfromXX
.rXlo
);
1896 case ARM64in_VXfromQ
:
1897 vex_printf("fmov ");
1898 ppHRegARM64(i
->ARM64in
.VXfromQ
.rX
);
1900 ppHRegARM64(i
->ARM64in
.VXfromQ
.rQ
);
1901 vex_printf(".d[%u]", i
->ARM64in
.VXfromQ
.laneNo
);
1903 case ARM64in_VXfromDorS
:
1904 vex_printf("fmov ");
1905 ppHRegARM64(i
->ARM64in
.VXfromDorS
.rX
);
1906 vex_printf("(%c-reg), ", i
->ARM64in
.VXfromDorS
.fromD
? 'X':'W');
1907 ppHRegARM64(i
->ARM64in
.VXfromDorS
.rDorS
);
1908 vex_printf("(%c-reg)", i
->ARM64in
.VXfromDorS
.fromD
? 'D' : 'S');
1910 case ARM64in_VMov
: {
1912 switch (i
->ARM64in
.VMov
.szB
) {
1913 case 16: aux
= 'q'; break;
1914 case 8: aux
= 'd'; break;
1915 case 4: aux
= 's'; break;
1918 vex_printf("mov(%c) ", aux
);
1919 ppHRegARM64(i
->ARM64in
.VMov
.dst
);
1921 ppHRegARM64(i
->ARM64in
.VMov
.src
);
1924 case ARM64in_EvCheck
:
1925 vex_printf("(evCheck) ldr w9,");
1926 ppARM64AMode(i
->ARM64in
.EvCheck
.amCounter
);
1927 vex_printf("; subs w9,w9,$1; str w9,");
1928 ppARM64AMode(i
->ARM64in
.EvCheck
.amCounter
);
1929 vex_printf("; bpl nofail; ldr x9,");
1930 ppARM64AMode(i
->ARM64in
.EvCheck
.amFailAddr
);
1931 vex_printf("; br x9; nofail:");
1933 case ARM64in_ProfInc
:
1934 vex_printf("(profInc) imm64-fixed4 x9,$NotKnownYet; "
1935 "ldr x8,[x9]; add x8,x8,#1, str x8,[x9]");
1938 vex_printf("ppARM64Instr: unhandled case (tag %d)", (Int
)i
->tag
);
1939 vpanic("ppARM64Instr(1)");
1945 /* --------- Helpers for register allocation. --------- */
1947 void getRegUsage_ARM64Instr ( HRegUsage
* u
, const ARM64Instr
* i
, Bool mode64
)
1949 vassert(mode64
== True
);
1953 addHRegUse(u
, HRmWrite
, i
->ARM64in
.Arith
.dst
);
1954 addHRegUse(u
, HRmRead
, i
->ARM64in
.Arith
.argL
);
1955 addRegUsage_ARM64RIA(u
, i
->ARM64in
.Arith
.argR
);
1958 addHRegUse(u
, HRmRead
, i
->ARM64in
.Cmp
.argL
);
1959 addRegUsage_ARM64RIA(u
, i
->ARM64in
.Cmp
.argR
);
1962 addHRegUse(u
, HRmWrite
, i
->ARM64in
.Logic
.dst
);
1963 addHRegUse(u
, HRmRead
, i
->ARM64in
.Logic
.argL
);
1964 addRegUsage_ARM64RIL(u
, i
->ARM64in
.Logic
.argR
);
1967 addHRegUse(u
, HRmRead
, i
->ARM64in
.Test
.argL
);
1968 addRegUsage_ARM64RIL(u
, i
->ARM64in
.Test
.argR
);
1971 addHRegUse(u
, HRmWrite
, i
->ARM64in
.Shift
.dst
);
1972 addHRegUse(u
, HRmRead
, i
->ARM64in
.Shift
.argL
);
1973 addRegUsage_ARM64RI6(u
, i
->ARM64in
.Shift
.argR
);
1976 addHRegUse(u
, HRmWrite
, i
->ARM64in
.Unary
.dst
);
1977 addHRegUse(u
, HRmRead
, i
->ARM64in
.Unary
.src
);
1980 addHRegUse(u
, HRmWrite
, i
->ARM64in
.Set64
.dst
);
1983 addHRegUse(u
, HRmWrite
, i
->ARM64in
.MovI
.dst
);
1984 addHRegUse(u
, HRmRead
, i
->ARM64in
.MovI
.src
);
1985 u
->isRegRegMove
= True
;
1986 u
->regMoveSrc
= i
->ARM64in
.MovI
.src
;
1987 u
->regMoveDst
= i
->ARM64in
.MovI
.dst
;
1990 addHRegUse(u
, HRmWrite
, i
->ARM64in
.Imm64
.dst
);
1992 case ARM64in_LdSt64
:
1993 addRegUsage_ARM64AMode(u
, i
->ARM64in
.LdSt64
.amode
);
1994 if (i
->ARM64in
.LdSt64
.isLoad
) {
1995 addHRegUse(u
, HRmWrite
, i
->ARM64in
.LdSt64
.rD
);
1997 addHRegUse(u
, HRmRead
, i
->ARM64in
.LdSt64
.rD
);
2000 case ARM64in_LdSt32
:
2001 addRegUsage_ARM64AMode(u
, i
->ARM64in
.LdSt32
.amode
);
2002 if (i
->ARM64in
.LdSt32
.isLoad
) {
2003 addHRegUse(u
, HRmWrite
, i
->ARM64in
.LdSt32
.rD
);
2005 addHRegUse(u
, HRmRead
, i
->ARM64in
.LdSt32
.rD
);
2008 case ARM64in_LdSt16
:
2009 addRegUsage_ARM64AMode(u
, i
->ARM64in
.LdSt16
.amode
);
2010 if (i
->ARM64in
.LdSt16
.isLoad
) {
2011 addHRegUse(u
, HRmWrite
, i
->ARM64in
.LdSt16
.rD
);
2013 addHRegUse(u
, HRmRead
, i
->ARM64in
.LdSt16
.rD
);
2017 addRegUsage_ARM64AMode(u
, i
->ARM64in
.LdSt8
.amode
);
2018 if (i
->ARM64in
.LdSt8
.isLoad
) {
2019 addHRegUse(u
, HRmWrite
, i
->ARM64in
.LdSt8
.rD
);
2021 addHRegUse(u
, HRmRead
, i
->ARM64in
.LdSt8
.rD
);
2024 /* XDirect/XIndir/XAssisted are also a bit subtle. They
2025 conditionally exit the block. Hence we only need to list (1)
2026 the registers that they read, and (2) the registers that they
2027 write in the case where the block is not exited. (2) is
2028 empty, hence only (1) is relevant here. */
2029 case ARM64in_XDirect
:
2030 addRegUsage_ARM64AMode(u
, i
->ARM64in
.XDirect
.amPC
);
2032 case ARM64in_XIndir
:
2033 addHRegUse(u
, HRmRead
, i
->ARM64in
.XIndir
.dstGA
);
2034 addRegUsage_ARM64AMode(u
, i
->ARM64in
.XIndir
.amPC
);
2036 case ARM64in_XAssisted
:
2037 addHRegUse(u
, HRmRead
, i
->ARM64in
.XAssisted
.dstGA
);
2038 addRegUsage_ARM64AMode(u
, i
->ARM64in
.XAssisted
.amPC
);
2041 addHRegUse(u
, HRmWrite
, i
->ARM64in
.CSel
.dst
);
2042 addHRegUse(u
, HRmRead
, i
->ARM64in
.CSel
.argL
);
2043 addHRegUse(u
, HRmRead
, i
->ARM64in
.CSel
.argR
);
2046 /* logic and comments copied/modified from x86 back end */
2047 /* This is a bit subtle. */
2048 /* First off, claim it trashes all the caller-saved regs
2049 which fall within the register allocator's jurisdiction.
2050 These I believe to be x0 to x7 and the 128-bit vector
2051 registers in use, q16 .. q20. */
2052 addHRegUse(u
, HRmWrite
, hregARM64_X0());
2053 addHRegUse(u
, HRmWrite
, hregARM64_X1());
2054 addHRegUse(u
, HRmWrite
, hregARM64_X2());
2055 addHRegUse(u
, HRmWrite
, hregARM64_X3());
2056 addHRegUse(u
, HRmWrite
, hregARM64_X4());
2057 addHRegUse(u
, HRmWrite
, hregARM64_X5());
2058 addHRegUse(u
, HRmWrite
, hregARM64_X6());
2059 addHRegUse(u
, HRmWrite
, hregARM64_X7());
2060 addHRegUse(u
, HRmWrite
, hregARM64_Q16());
2061 addHRegUse(u
, HRmWrite
, hregARM64_Q17());
2062 addHRegUse(u
, HRmWrite
, hregARM64_Q18());
2063 addHRegUse(u
, HRmWrite
, hregARM64_Q19());
2064 addHRegUse(u
, HRmWrite
, hregARM64_Q20());
2065 /* Now we have to state any parameter-carrying registers
2066 which might be read. This depends on nArgRegs. */
2067 switch (i
->ARM64in
.Call
.nArgRegs
) {
2068 case 8: addHRegUse(u
, HRmRead
, hregARM64_X7()); /*fallthru*/
2069 case 7: addHRegUse(u
, HRmRead
, hregARM64_X6()); /*fallthru*/
2070 case 6: addHRegUse(u
, HRmRead
, hregARM64_X5()); /*fallthru*/
2071 case 5: addHRegUse(u
, HRmRead
, hregARM64_X4()); /*fallthru*/
2072 case 4: addHRegUse(u
, HRmRead
, hregARM64_X3()); /*fallthru*/
2073 case 3: addHRegUse(u
, HRmRead
, hregARM64_X2()); /*fallthru*/
2074 case 2: addHRegUse(u
, HRmRead
, hregARM64_X1()); /*fallthru*/
2075 case 1: addHRegUse(u
, HRmRead
, hregARM64_X0()); break;
2077 default: vpanic("getRegUsage_ARM64:Call:regparms");
2079 /* Finally, there is the issue that the insn trashes a
2080 register because the literal target address has to be
2081 loaded into a register. However, we reserve x9 for that
2082 purpose so there's no further complexity here. Stating x9
2083 as trashed is pointless since it's not under the control
2084 of the allocator, but what the hell. */
2085 addHRegUse(u
, HRmWrite
, hregARM64_X9());
2087 case ARM64in_AddToSP
:
2088 /* Only changes SP, but regalloc doesn't control that, hence
2091 case ARM64in_FromSP
:
2092 addHRegUse(u
, HRmWrite
, i
->ARM64in
.FromSP
.dst
);
2095 addHRegUse(u
, HRmWrite
, i
->ARM64in
.Mul
.dst
);
2096 addHRegUse(u
, HRmRead
, i
->ARM64in
.Mul
.argL
);
2097 addHRegUse(u
, HRmRead
, i
->ARM64in
.Mul
.argR
);
2100 addHRegUse(u
, HRmRead
, hregARM64_X4());
2101 addHRegUse(u
, HRmWrite
, hregARM64_X2());
2104 addHRegUse(u
, HRmRead
, hregARM64_X4());
2105 addHRegUse(u
, HRmWrite
, hregARM64_X0());
2106 addHRegUse(u
, HRmRead
, hregARM64_X2());
2109 addHRegUse(u
, HRmRead
, hregARM64_X3());
2110 addHRegUse(u
, HRmRead
, hregARM64_X5());
2111 addHRegUse(u
, HRmRead
, hregARM64_X7());
2112 addHRegUse(u
, HRmWrite
, hregARM64_X1());
2113 /* Pointless to state this since X8 is not available to RA. */
2114 addHRegUse(u
, HRmWrite
, hregARM64_X8());
2117 addHRegUse(u
, HRmRead
, hregARM64_X2());
2118 addHRegUse(u
, HRmRead
, hregARM64_X4());
2119 addHRegUse(u
, HRmRead
, hregARM64_X5());
2120 addHRegUse(u
, HRmRead
, hregARM64_X6());
2121 addHRegUse(u
, HRmRead
, hregARM64_X7());
2122 addHRegUse(u
, HRmWrite
, hregARM64_X0());
2123 addHRegUse(u
, HRmWrite
, hregARM64_X1());
2124 addHRegUse(u
, HRmWrite
, hregARM64_X9());
2125 addHRegUse(u
, HRmWrite
, hregARM64_X8());
2127 case ARM64in_MFence
:
2131 case ARM64in_VLdStH
:
2132 addHRegUse(u
, HRmRead
, i
->ARM64in
.VLdStH
.rN
);
2133 if (i
->ARM64in
.VLdStH
.isLoad
) {
2134 addHRegUse(u
, HRmWrite
, i
->ARM64in
.VLdStH
.hD
);
2136 addHRegUse(u
, HRmRead
, i
->ARM64in
.VLdStH
.hD
);
2139 case ARM64in_VLdStS
:
2140 addHRegUse(u
, HRmRead
, i
->ARM64in
.VLdStS
.rN
);
2141 if (i
->ARM64in
.VLdStS
.isLoad
) {
2142 addHRegUse(u
, HRmWrite
, i
->ARM64in
.VLdStS
.sD
);
2144 addHRegUse(u
, HRmRead
, i
->ARM64in
.VLdStS
.sD
);
2147 case ARM64in_VLdStD
:
2148 addHRegUse(u
, HRmRead
, i
->ARM64in
.VLdStD
.rN
);
2149 if (i
->ARM64in
.VLdStD
.isLoad
) {
2150 addHRegUse(u
, HRmWrite
, i
->ARM64in
.VLdStD
.dD
);
2152 addHRegUse(u
, HRmRead
, i
->ARM64in
.VLdStD
.dD
);
2155 case ARM64in_VLdStQ
:
2156 addHRegUse(u
, HRmRead
, i
->ARM64in
.VLdStQ
.rN
);
2157 if (i
->ARM64in
.VLdStQ
.isLoad
)
2158 addHRegUse(u
, HRmWrite
, i
->ARM64in
.VLdStQ
.rQ
);
2160 addHRegUse(u
, HRmRead
, i
->ARM64in
.VLdStQ
.rQ
);
2162 case ARM64in_VCvtI2F
:
2163 addHRegUse(u
, HRmRead
, i
->ARM64in
.VCvtI2F
.rS
);
2164 addHRegUse(u
, HRmWrite
, i
->ARM64in
.VCvtI2F
.rD
);
2166 case ARM64in_VCvtF2I
:
2167 addHRegUse(u
, HRmRead
, i
->ARM64in
.VCvtF2I
.rS
);
2168 addHRegUse(u
, HRmWrite
, i
->ARM64in
.VCvtF2I
.rD
);
2170 case ARM64in_VCvtSD
:
2171 addHRegUse(u
, HRmWrite
, i
->ARM64in
.VCvtSD
.dst
);
2172 addHRegUse(u
, HRmRead
, i
->ARM64in
.VCvtSD
.src
);
2174 case ARM64in_VCvtHS
:
2175 addHRegUse(u
, HRmWrite
, i
->ARM64in
.VCvtHS
.dst
);
2176 addHRegUse(u
, HRmRead
, i
->ARM64in
.VCvtHS
.src
);
2178 case ARM64in_VCvtHD
:
2179 addHRegUse(u
, HRmWrite
, i
->ARM64in
.VCvtHD
.dst
);
2180 addHRegUse(u
, HRmRead
, i
->ARM64in
.VCvtHD
.src
);
2182 case ARM64in_VUnaryD
:
2183 addHRegUse(u
, HRmWrite
, i
->ARM64in
.VUnaryD
.dst
);
2184 addHRegUse(u
, HRmRead
, i
->ARM64in
.VUnaryD
.src
);
2186 case ARM64in_VUnaryS
:
2187 addHRegUse(u
, HRmWrite
, i
->ARM64in
.VUnaryS
.dst
);
2188 addHRegUse(u
, HRmRead
, i
->ARM64in
.VUnaryS
.src
);
2191 addHRegUse(u
, HRmWrite
, i
->ARM64in
.VBinD
.dst
);
2192 addHRegUse(u
, HRmRead
, i
->ARM64in
.VBinD
.argL
);
2193 addHRegUse(u
, HRmRead
, i
->ARM64in
.VBinD
.argR
);
2196 addHRegUse(u
, HRmWrite
, i
->ARM64in
.VBinS
.dst
);
2197 addHRegUse(u
, HRmRead
, i
->ARM64in
.VBinS
.argL
);
2198 addHRegUse(u
, HRmRead
, i
->ARM64in
.VBinS
.argR
);
2201 addHRegUse(u
, HRmRead
, i
->ARM64in
.VCmpD
.argL
);
2202 addHRegUse(u
, HRmRead
, i
->ARM64in
.VCmpD
.argR
);
2205 addHRegUse(u
, HRmRead
, i
->ARM64in
.VCmpS
.argL
);
2206 addHRegUse(u
, HRmRead
, i
->ARM64in
.VCmpS
.argR
);
2208 case ARM64in_VFCSel
:
2209 addHRegUse(u
, HRmRead
, i
->ARM64in
.VFCSel
.argL
);
2210 addHRegUse(u
, HRmRead
, i
->ARM64in
.VFCSel
.argR
);
2211 addHRegUse(u
, HRmWrite
, i
->ARM64in
.VFCSel
.dst
);
2214 if (i
->ARM64in
.FPCR
.toFPCR
)
2215 addHRegUse(u
, HRmRead
, i
->ARM64in
.FPCR
.iReg
);
2217 addHRegUse(u
, HRmWrite
, i
->ARM64in
.FPCR
.iReg
);
2220 if (i
->ARM64in
.FPSR
.toFPSR
)
2221 addHRegUse(u
, HRmRead
, i
->ARM64in
.FPSR
.iReg
);
2223 addHRegUse(u
, HRmWrite
, i
->ARM64in
.FPSR
.iReg
);
2226 addHRegUse(u
, HRmWrite
, i
->ARM64in
.VBinV
.dst
);
2227 addHRegUse(u
, HRmRead
, i
->ARM64in
.VBinV
.argL
);
2228 addHRegUse(u
, HRmRead
, i
->ARM64in
.VBinV
.argR
);
2230 case ARM64in_VModifyV
:
2231 addHRegUse(u
, HRmWrite
, i
->ARM64in
.VModifyV
.mod
);
2232 addHRegUse(u
, HRmRead
, i
->ARM64in
.VModifyV
.mod
);
2233 addHRegUse(u
, HRmRead
, i
->ARM64in
.VModifyV
.arg
);
2235 case ARM64in_VUnaryV
:
2236 addHRegUse(u
, HRmWrite
, i
->ARM64in
.VUnaryV
.dst
);
2237 addHRegUse(u
, HRmRead
, i
->ARM64in
.VUnaryV
.arg
);
2239 case ARM64in_VNarrowV
:
2240 addHRegUse(u
, HRmWrite
, i
->ARM64in
.VNarrowV
.dst
);
2241 addHRegUse(u
, HRmRead
, i
->ARM64in
.VNarrowV
.src
);
2243 case ARM64in_VShiftImmV
:
2244 addHRegUse(u
, HRmWrite
, i
->ARM64in
.VShiftImmV
.dst
);
2245 addHRegUse(u
, HRmRead
, i
->ARM64in
.VShiftImmV
.src
);
2248 addHRegUse(u
, HRmWrite
, i
->ARM64in
.VExtV
.dst
);
2249 addHRegUse(u
, HRmRead
, i
->ARM64in
.VExtV
.srcLo
);
2250 addHRegUse(u
, HRmRead
, i
->ARM64in
.VExtV
.srcHi
);
2253 addHRegUse(u
, HRmWrite
, i
->ARM64in
.VImmQ
.rQ
);
2255 case ARM64in_VDfromX
:
2256 addHRegUse(u
, HRmWrite
, i
->ARM64in
.VDfromX
.rD
);
2257 addHRegUse(u
, HRmRead
, i
->ARM64in
.VDfromX
.rX
);
2259 case ARM64in_VQfromX
:
2260 addHRegUse(u
, HRmWrite
, i
->ARM64in
.VQfromX
.rQ
);
2261 addHRegUse(u
, HRmRead
, i
->ARM64in
.VQfromX
.rXlo
);
2263 case ARM64in_VQfromXX
:
2264 addHRegUse(u
, HRmWrite
, i
->ARM64in
.VQfromXX
.rQ
);
2265 addHRegUse(u
, HRmRead
, i
->ARM64in
.VQfromXX
.rXhi
);
2266 addHRegUse(u
, HRmRead
, i
->ARM64in
.VQfromXX
.rXlo
);
2268 case ARM64in_VXfromQ
:
2269 addHRegUse(u
, HRmWrite
, i
->ARM64in
.VXfromQ
.rX
);
2270 addHRegUse(u
, HRmRead
, i
->ARM64in
.VXfromQ
.rQ
);
2272 case ARM64in_VXfromDorS
:
2273 addHRegUse(u
, HRmWrite
, i
->ARM64in
.VXfromDorS
.rX
);
2274 addHRegUse(u
, HRmRead
, i
->ARM64in
.VXfromDorS
.rDorS
);
2277 addHRegUse(u
, HRmWrite
, i
->ARM64in
.VMov
.dst
);
2278 addHRegUse(u
, HRmRead
, i
->ARM64in
.VMov
.src
);
2279 u
->isRegRegMove
= True
;
2280 u
->regMoveSrc
= i
->ARM64in
.VMov
.src
;
2281 u
->regMoveDst
= i
->ARM64in
.VMov
.dst
;
2283 case ARM64in_EvCheck
:
2284 /* We expect both amodes only to mention x21, so this is in
2285 fact pointless, since x21 isn't allocatable, but
2287 addRegUsage_ARM64AMode(u
, i
->ARM64in
.EvCheck
.amCounter
);
2288 addRegUsage_ARM64AMode(u
, i
->ARM64in
.EvCheck
.amFailAddr
);
2289 addHRegUse(u
, HRmWrite
, hregARM64_X9()); /* also unavail to RA */
2291 case ARM64in_ProfInc
:
2292 /* Again, pointless to actually state these since neither
2293 is available to RA. */
2294 addHRegUse(u
, HRmWrite
, hregARM64_X9()); /* unavail to RA */
2295 addHRegUse(u
, HRmWrite
, hregARM64_X8()); /* unavail to RA */
2299 vpanic("getRegUsage_ARM64Instr");
2304 void mapRegs_ARM64Instr ( HRegRemap
* m
, ARM64Instr
* i
, Bool mode64
)
2306 vassert(mode64
== True
);
2309 i
->ARM64in
.Arith
.dst
= lookupHRegRemap(m
, i
->ARM64in
.Arith
.dst
);
2310 i
->ARM64in
.Arith
.argL
= lookupHRegRemap(m
, i
->ARM64in
.Arith
.argL
);
2311 mapRegs_ARM64RIA(m
, i
->ARM64in
.Arith
.argR
);
2314 i
->ARM64in
.Cmp
.argL
= lookupHRegRemap(m
, i
->ARM64in
.Cmp
.argL
);
2315 mapRegs_ARM64RIA(m
, i
->ARM64in
.Cmp
.argR
);
2318 i
->ARM64in
.Logic
.dst
= lookupHRegRemap(m
, i
->ARM64in
.Logic
.dst
);
2319 i
->ARM64in
.Logic
.argL
= lookupHRegRemap(m
, i
->ARM64in
.Logic
.argL
);
2320 mapRegs_ARM64RIL(m
, i
->ARM64in
.Logic
.argR
);
2323 i
->ARM64in
.Test
.argL
= lookupHRegRemap(m
, i
->ARM64in
.Test
.argL
);
2324 mapRegs_ARM64RIL(m
, i
->ARM64in
.Logic
.argR
);
2327 i
->ARM64in
.Shift
.dst
= lookupHRegRemap(m
, i
->ARM64in
.Shift
.dst
);
2328 i
->ARM64in
.Shift
.argL
= lookupHRegRemap(m
, i
->ARM64in
.Shift
.argL
);
2329 mapRegs_ARM64RI6(m
, i
->ARM64in
.Shift
.argR
);
2332 i
->ARM64in
.Unary
.dst
= lookupHRegRemap(m
, i
->ARM64in
.Unary
.dst
);
2333 i
->ARM64in
.Unary
.src
= lookupHRegRemap(m
, i
->ARM64in
.Unary
.src
);
2336 i
->ARM64in
.Set64
.dst
= lookupHRegRemap(m
, i
->ARM64in
.Set64
.dst
);
2339 i
->ARM64in
.MovI
.dst
= lookupHRegRemap(m
, i
->ARM64in
.MovI
.dst
);
2340 i
->ARM64in
.MovI
.src
= lookupHRegRemap(m
, i
->ARM64in
.MovI
.src
);
2343 i
->ARM64in
.Imm64
.dst
= lookupHRegRemap(m
, i
->ARM64in
.Imm64
.dst
);
2345 case ARM64in_LdSt64
:
2346 i
->ARM64in
.LdSt64
.rD
= lookupHRegRemap(m
, i
->ARM64in
.LdSt64
.rD
);
2347 mapRegs_ARM64AMode(m
, i
->ARM64in
.LdSt64
.amode
);
2349 case ARM64in_LdSt32
:
2350 i
->ARM64in
.LdSt32
.rD
= lookupHRegRemap(m
, i
->ARM64in
.LdSt32
.rD
);
2351 mapRegs_ARM64AMode(m
, i
->ARM64in
.LdSt32
.amode
);
2353 case ARM64in_LdSt16
:
2354 i
->ARM64in
.LdSt16
.rD
= lookupHRegRemap(m
, i
->ARM64in
.LdSt16
.rD
);
2355 mapRegs_ARM64AMode(m
, i
->ARM64in
.LdSt16
.amode
);
2358 i
->ARM64in
.LdSt8
.rD
= lookupHRegRemap(m
, i
->ARM64in
.LdSt8
.rD
);
2359 mapRegs_ARM64AMode(m
, i
->ARM64in
.LdSt8
.amode
);
2361 case ARM64in_XDirect
:
2362 mapRegs_ARM64AMode(m
, i
->ARM64in
.XDirect
.amPC
);
2364 case ARM64in_XIndir
:
2365 i
->ARM64in
.XIndir
.dstGA
2366 = lookupHRegRemap(m
, i
->ARM64in
.XIndir
.dstGA
);
2367 mapRegs_ARM64AMode(m
, i
->ARM64in
.XIndir
.amPC
);
2369 case ARM64in_XAssisted
:
2370 i
->ARM64in
.XAssisted
.dstGA
2371 = lookupHRegRemap(m
, i
->ARM64in
.XAssisted
.dstGA
);
2372 mapRegs_ARM64AMode(m
, i
->ARM64in
.XAssisted
.amPC
);
2375 i
->ARM64in
.CSel
.dst
= lookupHRegRemap(m
, i
->ARM64in
.CSel
.dst
);
2376 i
->ARM64in
.CSel
.argL
= lookupHRegRemap(m
, i
->ARM64in
.CSel
.argL
);
2377 i
->ARM64in
.CSel
.argR
= lookupHRegRemap(m
, i
->ARM64in
.CSel
.argR
);
2381 case ARM64in_AddToSP
:
2383 case ARM64in_FromSP
:
2384 i
->ARM64in
.FromSP
.dst
= lookupHRegRemap(m
, i
->ARM64in
.FromSP
.dst
);
2387 i
->ARM64in
.Mul
.dst
= lookupHRegRemap(m
, i
->ARM64in
.Mul
.dst
);
2388 i
->ARM64in
.Mul
.argL
= lookupHRegRemap(m
, i
->ARM64in
.Mul
.argL
);
2389 i
->ARM64in
.Mul
.argR
= lookupHRegRemap(m
, i
->ARM64in
.Mul
.argR
);
2399 case ARM64in_MFence
:
2403 case ARM64in_VLdStH
:
2404 i
->ARM64in
.VLdStH
.hD
= lookupHRegRemap(m
, i
->ARM64in
.VLdStH
.hD
);
2405 i
->ARM64in
.VLdStH
.rN
= lookupHRegRemap(m
, i
->ARM64in
.VLdStH
.rN
);
2407 case ARM64in_VLdStS
:
2408 i
->ARM64in
.VLdStS
.sD
= lookupHRegRemap(m
, i
->ARM64in
.VLdStS
.sD
);
2409 i
->ARM64in
.VLdStS
.rN
= lookupHRegRemap(m
, i
->ARM64in
.VLdStS
.rN
);
2411 case ARM64in_VLdStD
:
2412 i
->ARM64in
.VLdStD
.dD
= lookupHRegRemap(m
, i
->ARM64in
.VLdStD
.dD
);
2413 i
->ARM64in
.VLdStD
.rN
= lookupHRegRemap(m
, i
->ARM64in
.VLdStD
.rN
);
2415 case ARM64in_VLdStQ
:
2416 i
->ARM64in
.VLdStQ
.rQ
= lookupHRegRemap(m
, i
->ARM64in
.VLdStQ
.rQ
);
2417 i
->ARM64in
.VLdStQ
.rN
= lookupHRegRemap(m
, i
->ARM64in
.VLdStQ
.rN
);
2419 case ARM64in_VCvtI2F
:
2420 i
->ARM64in
.VCvtI2F
.rS
= lookupHRegRemap(m
, i
->ARM64in
.VCvtI2F
.rS
);
2421 i
->ARM64in
.VCvtI2F
.rD
= lookupHRegRemap(m
, i
->ARM64in
.VCvtI2F
.rD
);
2423 case ARM64in_VCvtF2I
:
2424 i
->ARM64in
.VCvtF2I
.rS
= lookupHRegRemap(m
, i
->ARM64in
.VCvtF2I
.rS
);
2425 i
->ARM64in
.VCvtF2I
.rD
= lookupHRegRemap(m
, i
->ARM64in
.VCvtF2I
.rD
);
2427 case ARM64in_VCvtSD
:
2428 i
->ARM64in
.VCvtSD
.dst
= lookupHRegRemap(m
, i
->ARM64in
.VCvtSD
.dst
);
2429 i
->ARM64in
.VCvtSD
.src
= lookupHRegRemap(m
, i
->ARM64in
.VCvtSD
.src
);
2431 case ARM64in_VCvtHS
:
2432 i
->ARM64in
.VCvtHS
.dst
= lookupHRegRemap(m
, i
->ARM64in
.VCvtHS
.dst
);
2433 i
->ARM64in
.VCvtHS
.src
= lookupHRegRemap(m
, i
->ARM64in
.VCvtHS
.src
);
2435 case ARM64in_VCvtHD
:
2436 i
->ARM64in
.VCvtHD
.dst
= lookupHRegRemap(m
, i
->ARM64in
.VCvtHD
.dst
);
2437 i
->ARM64in
.VCvtHD
.src
= lookupHRegRemap(m
, i
->ARM64in
.VCvtHD
.src
);
2439 case ARM64in_VUnaryD
:
2440 i
->ARM64in
.VUnaryD
.dst
= lookupHRegRemap(m
, i
->ARM64in
.VUnaryD
.dst
);
2441 i
->ARM64in
.VUnaryD
.src
= lookupHRegRemap(m
, i
->ARM64in
.VUnaryD
.src
);
2443 case ARM64in_VUnaryS
:
2444 i
->ARM64in
.VUnaryS
.dst
= lookupHRegRemap(m
, i
->ARM64in
.VUnaryS
.dst
);
2445 i
->ARM64in
.VUnaryS
.src
= lookupHRegRemap(m
, i
->ARM64in
.VUnaryS
.src
);
2448 i
->ARM64in
.VBinD
.dst
= lookupHRegRemap(m
, i
->ARM64in
.VBinD
.dst
);
2449 i
->ARM64in
.VBinD
.argL
= lookupHRegRemap(m
, i
->ARM64in
.VBinD
.argL
);
2450 i
->ARM64in
.VBinD
.argR
= lookupHRegRemap(m
, i
->ARM64in
.VBinD
.argR
);
2453 i
->ARM64in
.VBinS
.dst
= lookupHRegRemap(m
, i
->ARM64in
.VBinS
.dst
);
2454 i
->ARM64in
.VBinS
.argL
= lookupHRegRemap(m
, i
->ARM64in
.VBinS
.argL
);
2455 i
->ARM64in
.VBinS
.argR
= lookupHRegRemap(m
, i
->ARM64in
.VBinS
.argR
);
2458 i
->ARM64in
.VCmpD
.argL
= lookupHRegRemap(m
, i
->ARM64in
.VCmpD
.argL
);
2459 i
->ARM64in
.VCmpD
.argR
= lookupHRegRemap(m
, i
->ARM64in
.VCmpD
.argR
);
2462 i
->ARM64in
.VCmpS
.argL
= lookupHRegRemap(m
, i
->ARM64in
.VCmpS
.argL
);
2463 i
->ARM64in
.VCmpS
.argR
= lookupHRegRemap(m
, i
->ARM64in
.VCmpS
.argR
);
2465 case ARM64in_VFCSel
:
2466 i
->ARM64in
.VFCSel
.argL
= lookupHRegRemap(m
, i
->ARM64in
.VFCSel
.argL
);
2467 i
->ARM64in
.VFCSel
.argR
= lookupHRegRemap(m
, i
->ARM64in
.VFCSel
.argR
);
2468 i
->ARM64in
.VFCSel
.dst
= lookupHRegRemap(m
, i
->ARM64in
.VFCSel
.dst
);
2471 i
->ARM64in
.FPCR
.iReg
= lookupHRegRemap(m
, i
->ARM64in
.FPCR
.iReg
);
2474 i
->ARM64in
.FPSR
.iReg
= lookupHRegRemap(m
, i
->ARM64in
.FPSR
.iReg
);
2477 i
->ARM64in
.VBinV
.dst
= lookupHRegRemap(m
, i
->ARM64in
.VBinV
.dst
);
2478 i
->ARM64in
.VBinV
.argL
= lookupHRegRemap(m
, i
->ARM64in
.VBinV
.argL
);
2479 i
->ARM64in
.VBinV
.argR
= lookupHRegRemap(m
, i
->ARM64in
.VBinV
.argR
);
2481 case ARM64in_VModifyV
:
2482 i
->ARM64in
.VModifyV
.mod
= lookupHRegRemap(m
, i
->ARM64in
.VModifyV
.mod
);
2483 i
->ARM64in
.VModifyV
.arg
= lookupHRegRemap(m
, i
->ARM64in
.VModifyV
.arg
);
2485 case ARM64in_VUnaryV
:
2486 i
->ARM64in
.VUnaryV
.dst
= lookupHRegRemap(m
, i
->ARM64in
.VUnaryV
.dst
);
2487 i
->ARM64in
.VUnaryV
.arg
= lookupHRegRemap(m
, i
->ARM64in
.VUnaryV
.arg
);
2489 case ARM64in_VNarrowV
:
2490 i
->ARM64in
.VNarrowV
.dst
= lookupHRegRemap(m
, i
->ARM64in
.VNarrowV
.dst
);
2491 i
->ARM64in
.VNarrowV
.src
= lookupHRegRemap(m
, i
->ARM64in
.VNarrowV
.src
);
2493 case ARM64in_VShiftImmV
:
2494 i
->ARM64in
.VShiftImmV
.dst
2495 = lookupHRegRemap(m
, i
->ARM64in
.VShiftImmV
.dst
);
2496 i
->ARM64in
.VShiftImmV
.src
2497 = lookupHRegRemap(m
, i
->ARM64in
.VShiftImmV
.src
);
2500 i
->ARM64in
.VExtV
.dst
= lookupHRegRemap(m
, i
->ARM64in
.VExtV
.dst
);
2501 i
->ARM64in
.VExtV
.srcLo
= lookupHRegRemap(m
, i
->ARM64in
.VExtV
.srcLo
);
2502 i
->ARM64in
.VExtV
.srcHi
= lookupHRegRemap(m
, i
->ARM64in
.VExtV
.srcHi
);
2505 i
->ARM64in
.VImmQ
.rQ
= lookupHRegRemap(m
, i
->ARM64in
.VImmQ
.rQ
);
2507 case ARM64in_VDfromX
:
2508 i
->ARM64in
.VDfromX
.rD
2509 = lookupHRegRemap(m
, i
->ARM64in
.VDfromX
.rD
);
2510 i
->ARM64in
.VDfromX
.rX
2511 = lookupHRegRemap(m
, i
->ARM64in
.VDfromX
.rX
);
2513 case ARM64in_VQfromX
:
2514 i
->ARM64in
.VQfromX
.rQ
2515 = lookupHRegRemap(m
, i
->ARM64in
.VQfromX
.rQ
);
2516 i
->ARM64in
.VQfromX
.rXlo
2517 = lookupHRegRemap(m
, i
->ARM64in
.VQfromX
.rXlo
);
2519 case ARM64in_VQfromXX
:
2520 i
->ARM64in
.VQfromXX
.rQ
2521 = lookupHRegRemap(m
, i
->ARM64in
.VQfromXX
.rQ
);
2522 i
->ARM64in
.VQfromXX
.rXhi
2523 = lookupHRegRemap(m
, i
->ARM64in
.VQfromXX
.rXhi
);
2524 i
->ARM64in
.VQfromXX
.rXlo
2525 = lookupHRegRemap(m
, i
->ARM64in
.VQfromXX
.rXlo
);
2527 case ARM64in_VXfromQ
:
2528 i
->ARM64in
.VXfromQ
.rX
2529 = lookupHRegRemap(m
, i
->ARM64in
.VXfromQ
.rX
);
2530 i
->ARM64in
.VXfromQ
.rQ
2531 = lookupHRegRemap(m
, i
->ARM64in
.VXfromQ
.rQ
);
2533 case ARM64in_VXfromDorS
:
2534 i
->ARM64in
.VXfromDorS
.rX
2535 = lookupHRegRemap(m
, i
->ARM64in
.VXfromDorS
.rX
);
2536 i
->ARM64in
.VXfromDorS
.rDorS
2537 = lookupHRegRemap(m
, i
->ARM64in
.VXfromDorS
.rDorS
);
2540 i
->ARM64in
.VMov
.dst
= lookupHRegRemap(m
, i
->ARM64in
.VMov
.dst
);
2541 i
->ARM64in
.VMov
.src
= lookupHRegRemap(m
, i
->ARM64in
.VMov
.src
);
2543 case ARM64in_EvCheck
:
2544 /* We expect both amodes only to mention x21, so this is in
2545 fact pointless, since x21 isn't allocatable, but
2547 mapRegs_ARM64AMode(m
, i
->ARM64in
.EvCheck
.amCounter
);
2548 mapRegs_ARM64AMode(m
, i
->ARM64in
.EvCheck
.amFailAddr
);
2550 case ARM64in_ProfInc
:
2551 /* hardwires x8 and x9 -- nothing to modify. */
2555 vpanic("mapRegs_ARM64Instr");
2559 /* Generate arm spill/reload instructions under the direction of the
2560 register allocator. Note it's critical these don't write the
2563 void genSpill_ARM64 ( /*OUT*/HInstr
** i1
, /*OUT*/HInstr
** i2
,
2564 HReg rreg
, Int offsetB
, Bool mode64
)
2567 vassert(offsetB
>= 0);
2568 vassert(!hregIsVirtual(rreg
));
2569 vassert(mode64
== True
);
2571 rclass
= hregClass(rreg
);
2574 vassert(0 == (offsetB
& 7));
2576 vassert(offsetB
< 4096);
2577 *i1
= ARM64Instr_LdSt64(
2580 ARM64AMode_RI12(hregARM64_X21(), offsetB
, 8)
2584 vassert(0 == (offsetB
& 7));
2585 vassert(offsetB
>= 0 && offsetB
< 32768);
2586 *i1
= ARM64Instr_VLdStD(False
/*!isLoad*/,
2587 rreg
, hregARM64_X21(), offsetB
);
2590 HReg x21
= hregARM64_X21(); // baseblock
2591 HReg x9
= hregARM64_X9(); // spill temporary
2592 vassert(0 == (offsetB
& 15)); // check sane alignment
2593 vassert(offsetB
< 4096);
2594 *i1
= ARM64Instr_Arith(x9
, x21
, ARM64RIA_I12(offsetB
, 0), True
);
2595 *i2
= ARM64Instr_VLdStQ(False
/*!isLoad*/, rreg
, x9
);
2599 ppHRegClass(rclass
);
2600 vpanic("genSpill_ARM: unimplemented regclass");
2604 void genReload_ARM64 ( /*OUT*/HInstr
** i1
, /*OUT*/HInstr
** i2
,
2605 HReg rreg
, Int offsetB
, Bool mode64
)
2608 vassert(offsetB
>= 0);
2609 vassert(!hregIsVirtual(rreg
));
2610 vassert(mode64
== True
);
2612 rclass
= hregClass(rreg
);
2615 vassert(0 == (offsetB
& 7));
2617 vassert(offsetB
< 4096);
2618 *i1
= ARM64Instr_LdSt64(
2621 ARM64AMode_RI12(hregARM64_X21(), offsetB
, 8)
2625 vassert(0 == (offsetB
& 7));
2626 vassert(offsetB
>= 0 && offsetB
< 32768);
2627 *i1
= ARM64Instr_VLdStD(True
/*isLoad*/,
2628 rreg
, hregARM64_X21(), offsetB
);
2631 HReg x21
= hregARM64_X21(); // baseblock
2632 HReg x9
= hregARM64_X9(); // spill temporary
2633 vassert(0 == (offsetB
& 15)); // check sane alignment
2634 vassert(offsetB
< 4096);
2635 *i1
= ARM64Instr_Arith(x9
, x21
, ARM64RIA_I12(offsetB
, 0), True
);
2636 *i2
= ARM64Instr_VLdStQ(True
/*isLoad*/, rreg
, x9
);
2640 ppHRegClass(rclass
);
2641 vpanic("genReload_ARM: unimplemented regclass");
2645 ARM64Instr
* genMove_ARM64(HReg from
, HReg to
, Bool mode64
)
2647 switch (hregClass(from
)) {
2649 return ARM64Instr_MovI(to
, from
);
2651 return ARM64Instr_VMov(8, to
, from
);
2653 return ARM64Instr_VMov(16, to
, from
);
2655 ppHRegClass(hregClass(from
));
2656 vpanic("genMove_ARM64: unimplemented regclass");
2661 /* Emit an instruction into buf and return the number of bytes used.
2662 Note that buf is not the insn's final place, and therefore it is
2663 imperative to emit position-independent code. */
2665 static inline UInt
iregEnc ( HReg r
)
2668 vassert(hregClass(r
) == HRcInt64
);
2669 vassert(!hregIsVirtual(r
));
2670 n
= hregEncoding(r
);
2675 static inline UInt
dregEnc ( HReg r
)
2678 vassert(hregClass(r
) == HRcFlt64
);
2679 vassert(!hregIsVirtual(r
));
2680 n
= hregEncoding(r
);
2685 static inline UInt
qregEnc ( HReg r
)
2688 vassert(hregClass(r
) == HRcVec128
);
2689 vassert(!hregIsVirtual(r
));
2690 n
= hregEncoding(r
);
2695 #define BITS4(zzb3,zzb2,zzb1,zzb0) \
2696 (((zzb3) << 3) | ((zzb2) << 2) | ((zzb1) << 1) | (zzb0))
2698 #define X00 BITS4(0,0, 0,0)
2699 #define X01 BITS4(0,0, 0,1)
2700 #define X10 BITS4(0,0, 1,0)
2701 #define X11 BITS4(0,0, 1,1)
2703 #define X000 BITS4(0, 0,0,0)
2704 #define X001 BITS4(0, 0,0,1)
2705 #define X010 BITS4(0, 0,1,0)
2706 #define X011 BITS4(0, 0,1,1)
2707 #define X100 BITS4(0, 1,0,0)
2708 #define X101 BITS4(0, 1,0,1)
2709 #define X110 BITS4(0, 1,1,0)
2710 #define X111 BITS4(0, 1,1,1)
2712 #define X0000 BITS4(0,0,0,0)
2713 #define X0001 BITS4(0,0,0,1)
2714 #define X0010 BITS4(0,0,1,0)
2715 #define X0011 BITS4(0,0,1,1)
2717 #define BITS8(zzb7,zzb6,zzb5,zzb4,zzb3,zzb2,zzb1,zzb0) \
2718 ((BITS4(zzb7,zzb6,zzb5,zzb4) << 4) | BITS4(zzb3,zzb2,zzb1,zzb0))
2720 #define X00000 BITS8(0,0,0, 0,0,0,0,0)
2721 #define X00001 BITS8(0,0,0, 0,0,0,0,1)
2722 #define X00110 BITS8(0,0,0, 0,0,1,1,0)
2723 #define X00111 BITS8(0,0,0, 0,0,1,1,1)
2724 #define X01000 BITS8(0,0,0, 0,1,0,0,0)
2725 #define X10000 BITS8(0,0,0, 1,0,0,0,0)
2726 #define X11000 BITS8(0,0,0, 1,1,0,0,0)
2727 #define X11110 BITS8(0,0,0, 1,1,1,1,0)
2728 #define X11111 BITS8(0,0,0, 1,1,1,1,1)
2730 #define X000000 BITS8(0,0, 0,0,0,0,0,0)
2731 #define X000001 BITS8(0,0, 0,0,0,0,0,1)
2732 #define X000010 BITS8(0,0, 0,0,0,0,1,0)
2733 #define X000011 BITS8(0,0, 0,0,0,0,1,1)
2734 #define X000100 BITS8(0,0, 0,0,0,1,0,0)
2735 #define X000110 BITS8(0,0, 0,0,0,1,1,0)
2736 #define X000111 BITS8(0,0, 0,0,0,1,1,1)
2737 #define X001000 BITS8(0,0, 0,0,1,0,0,0)
2738 #define X001001 BITS8(0,0, 0,0,1,0,0,1)
2739 #define X001010 BITS8(0,0, 0,0,1,0,1,0)
2740 #define X001011 BITS8(0,0, 0,0,1,0,1,1)
2741 #define X001101 BITS8(0,0, 0,0,1,1,0,1)
2742 #define X001110 BITS8(0,0, 0,0,1,1,1,0)
2743 #define X001111 BITS8(0,0, 0,0,1,1,1,1)
2744 #define X010000 BITS8(0,0, 0,1,0,0,0,0)
2745 #define X010001 BITS8(0,0, 0,1,0,0,0,1)
2746 #define X010010 BITS8(0,0, 0,1,0,0,1,0)
2747 #define X010011 BITS8(0,0, 0,1,0,0,1,1)
2748 #define X010101 BITS8(0,0, 0,1,0,1,0,1)
2749 #define X010110 BITS8(0,0, 0,1,0,1,1,0)
2750 #define X010111 BITS8(0,0, 0,1,0,1,1,1)
2751 #define X011001 BITS8(0,0, 0,1,1,0,0,1)
2752 #define X011010 BITS8(0,0, 0,1,1,0,1,0)
2753 #define X011011 BITS8(0,0, 0,1,1,0,1,1)
2754 #define X011101 BITS8(0,0, 0,1,1,1,0,1)
2755 #define X011110 BITS8(0,0, 0,1,1,1,1,0)
2756 #define X011111 BITS8(0,0, 0,1,1,1,1,1)
2757 #define X100001 BITS8(0,0, 1,0,0,0,0,1)
2758 #define X100011 BITS8(0,0, 1,0,0,0,1,1)
2759 #define X100100 BITS8(0,0, 1,0,0,1,0,0)
2760 #define X100101 BITS8(0,0, 1,0,0,1,0,1)
2761 #define X100110 BITS8(0,0, 1,0,0,1,1,0)
2762 #define X100111 BITS8(0,0, 1,0,0,1,1,1)
2763 #define X101101 BITS8(0,0, 1,0,1,1,0,1)
2764 #define X101110 BITS8(0,0, 1,0,1,1,1,0)
2765 #define X110000 BITS8(0,0, 1,1,0,0,0,0)
2766 #define X110001 BITS8(0,0, 1,1,0,0,0,1)
2767 #define X110010 BITS8(0,0, 1,1,0,0,1,0)
2768 #define X110100 BITS8(0,0, 1,1,0,1,0,0)
2769 #define X110101 BITS8(0,0, 1,1,0,1,0,1)
2770 #define X110110 BITS8(0,0, 1,1,0,1,1,0)
2771 #define X110111 BITS8(0,0, 1,1,0,1,1,1)
2772 #define X111000 BITS8(0,0, 1,1,1,0,0,0)
2773 #define X111001 BITS8(0,0, 1,1,1,0,0,1)
2774 #define X111101 BITS8(0,0, 1,1,1,1,0,1)
2775 #define X111110 BITS8(0,0, 1,1,1,1,1,0)
2776 #define X111111 BITS8(0,0, 1,1,1,1,1,1)
2778 #define X0001000 BITS8(0, 0,0,0,1,0,0,0)
2779 #define X0010000 BITS8(0, 0,0,1,0,0,0,0)
2780 #define X0100000 BITS8(0, 0,1,0,0,0,0,0)
2781 #define X1000000 BITS8(0, 1,0,0,0,0,0,0)
2783 #define X00100000 BITS8(0,0,1,0,0,0,0,0)
2784 #define X00100001 BITS8(0,0,1,0,0,0,0,1)
2785 #define X00100010 BITS8(0,0,1,0,0,0,1,0)
2786 #define X00100011 BITS8(0,0,1,0,0,0,1,1)
2787 #define X01010000 BITS8(0,1,0,1,0,0,0,0)
2788 #define X01010001 BITS8(0,1,0,1,0,0,0,1)
2789 #define X01010100 BITS8(0,1,0,1,0,1,0,0)
2790 #define X01011000 BITS8(0,1,0,1,1,0,0,0)
2791 #define X01100000 BITS8(0,1,1,0,0,0,0,0)
2792 #define X01100001 BITS8(0,1,1,0,0,0,0,1)
2793 #define X01100010 BITS8(0,1,1,0,0,0,1,0)
2794 #define X01100011 BITS8(0,1,1,0,0,0,1,1)
2795 #define X01110000 BITS8(0,1,1,1,0,0,0,0)
2796 #define X01110001 BITS8(0,1,1,1,0,0,0,1)
2797 #define X01110010 BITS8(0,1,1,1,0,0,1,0)
2798 #define X01110011 BITS8(0,1,1,1,0,0,1,1)
2799 #define X01110100 BITS8(0,1,1,1,0,1,0,0)
2800 #define X01110101 BITS8(0,1,1,1,0,1,0,1)
2801 #define X01110110 BITS8(0,1,1,1,0,1,1,0)
2802 #define X01110111 BITS8(0,1,1,1,0,1,1,1)
2803 #define X11000001 BITS8(1,1,0,0,0,0,0,1)
2804 #define X11000011 BITS8(1,1,0,0,0,0,1,1)
2805 #define X11010100 BITS8(1,1,0,1,0,1,0,0)
2806 #define X11010110 BITS8(1,1,0,1,0,1,1,0)
2807 #define X11011000 BITS8(1,1,0,1,1,0,0,0)
2808 #define X11011010 BITS8(1,1,0,1,1,0,1,0)
2809 #define X11011110 BITS8(1,1,0,1,1,1,1,0)
2810 #define X11100010 BITS8(1,1,1,0,0,0,1,0)
2811 #define X11110001 BITS8(1,1,1,1,0,0,0,1)
2812 #define X11110011 BITS8(1,1,1,1,0,0,1,1)
2813 #define X11110101 BITS8(1,1,1,1,0,1,0,1)
2814 #define X11110111 BITS8(1,1,1,1,0,1,1,1)
2817 /* --- 4 fields --- */
2819 static inline UInt
X_8_19_1_4 ( UInt f1
, UInt f2
, UInt f3
, UInt f4
) {
2820 vassert(8+19+1+4 == 32);
2821 vassert(f1
< (1<<8));
2822 vassert(f2
< (1<<19));
2823 vassert(f3
< (1<<1));
2824 vassert(f4
< (1<<4));
2833 /* --- 5 fields --- */
2835 static inline UInt
X_3_6_2_16_5 ( UInt f1
, UInt f2
,
2836 UInt f3
, UInt f4
, UInt f5
) {
2837 vassert(3+6+2+16+5 == 32);
2838 vassert(f1
< (1<<3));
2839 vassert(f2
< (1<<6));
2840 vassert(f3
< (1<<2));
2841 vassert(f4
< (1<<16));
2842 vassert(f5
< (1<<5));
2852 /* --- 6 fields --- */
2854 static inline UInt
X_2_6_2_12_5_5 ( UInt f1
, UInt f2
, UInt f3
,
2855 UInt f4
, UInt f5
, UInt f6
) {
2856 vassert(2+6+2+12+5+5 == 32);
2857 vassert(f1
< (1<<2));
2858 vassert(f2
< (1<<6));
2859 vassert(f3
< (1<<2));
2860 vassert(f4
< (1<<12));
2861 vassert(f5
< (1<<5));
2862 vassert(f6
< (1<<5));
2873 static inline UInt
X_3_8_5_6_5_5 ( UInt f1
, UInt f2
, UInt f3
,
2874 UInt f4
, UInt f5
, UInt f6
) {
2875 vassert(3+8+5+6+5+5 == 32);
2876 vassert(f1
< (1<<3));
2877 vassert(f2
< (1<<8));
2878 vassert(f3
< (1<<5));
2879 vassert(f4
< (1<<6));
2880 vassert(f5
< (1<<5));
2881 vassert(f6
< (1<<5));
2892 static inline UInt
X_3_5_8_6_5_5 ( UInt f1
, UInt f2
, UInt f3
,
2893 UInt f4
, UInt f5
, UInt f6
) {
2894 vassert(3+8+5+6+5+5 == 32);
2895 vassert(f1
< (1<<3));
2896 vassert(f2
< (1<<5));
2897 vassert(f3
< (1<<8));
2898 vassert(f4
< (1<<6));
2899 vassert(f5
< (1<<5));
2900 vassert(f6
< (1<<5));
2911 static inline UInt
X_3_6_7_6_5_5 ( UInt f1
, UInt f2
, UInt f3
,
2912 UInt f4
, UInt f5
, UInt f6
) {
2913 vassert(3+6+7+6+5+5 == 32);
2914 vassert(f1
< (1<<3));
2915 vassert(f2
< (1<<6));
2916 vassert(f3
< (1<<7));
2917 vassert(f4
< (1<<6));
2918 vassert(f5
< (1<<5));
2919 vassert(f6
< (1<<5));
2930 /* --- 7 fields --- */
2932 static inline UInt
X_2_6_3_9_2_5_5 ( UInt f1
, UInt f2
, UInt f3
,
2933 UInt f4
, UInt f5
, UInt f6
, UInt f7
) {
2934 vassert(2+6+3+9+2+5+5 == 32);
2935 vassert(f1
< (1<<2));
2936 vassert(f2
< (1<<6));
2937 vassert(f3
< (1<<3));
2938 vassert(f4
< (1<<9));
2939 vassert(f5
< (1<<2));
2940 vassert(f6
< (1<<5));
2941 vassert(f7
< (1<<5));
2953 static inline UInt
X_3_6_1_6_6_5_5 ( UInt f1
, UInt f2
, UInt f3
,
2954 UInt f4
, UInt f5
, UInt f6
, UInt f7
) {
2955 vassert(3+6+1+6+6+5+5 == 32);
2956 vassert(f1
< (1<<3));
2957 vassert(f2
< (1<<6));
2958 vassert(f3
< (1<<1));
2959 vassert(f4
< (1<<6));
2960 vassert(f5
< (1<<6));
2961 vassert(f6
< (1<<5));
2962 vassert(f7
< (1<<5));
2975 //ZZ #define X0000 BITS4(0,0,0,0)
2976 //ZZ #define X0001 BITS4(0,0,0,1)
2977 //ZZ #define X0010 BITS4(0,0,1,0)
2978 //ZZ #define X0011 BITS4(0,0,1,1)
2979 //ZZ #define X0100 BITS4(0,1,0,0)
2980 //ZZ #define X0101 BITS4(0,1,0,1)
2981 //ZZ #define X0110 BITS4(0,1,1,0)
2982 //ZZ #define X0111 BITS4(0,1,1,1)
2983 //ZZ #define X1000 BITS4(1,0,0,0)
2984 //ZZ #define X1001 BITS4(1,0,0,1)
2985 //ZZ #define X1010 BITS4(1,0,1,0)
2986 //ZZ #define X1011 BITS4(1,0,1,1)
2987 //ZZ #define X1100 BITS4(1,1,0,0)
2988 //ZZ #define X1101 BITS4(1,1,0,1)
2989 //ZZ #define X1110 BITS4(1,1,1,0)
2990 //ZZ #define X1111 BITS4(1,1,1,1)
2992 #define XXXXX___(zzx7,zzx6,zzx5,zzx4,zzx3) \
2993 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
2994 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
2995 (((zzx3) & 0xF) << 12))
2997 #define XXXXXX__(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2) \
2998 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
2999 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
3000 (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) << 8))
3002 #define XXXXX__X(zzx7,zzx6,zzx5,zzx4,zzx3,zzx0) \
3003 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
3004 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
3005 (((zzx3) & 0xF) << 12) | (((zzx0) & 0xF) << 0))
3007 #define XXX___XX(zzx7,zzx6,zzx5,zzx1,zzx0) \
3008 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
3009 (((zzx5) & 0xF) << 20) | (((zzx1) & 0xF) << 4) | \
3010 (((zzx0) & 0xF) << 0))
3012 #define XXXXXXXX(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2,zzx1,zzx0) \
3013 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
3014 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
3015 (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) << 8) | \
3016 (((zzx1) & 0xF) << 4) | (((zzx0) & 0xF) << 0))
3018 #define XX______(zzx7,zzx6) \
3019 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24))
3023 /* Get an immediate into a register, using only that register. */
3024 static UInt
* imm64_to_ireg ( UInt
* p
, Int xD
, ULong imm64
)
3027 // This has to be special-cased, since the logic below
3028 // will leave the register unchanged in this case.
3029 // MOVZ xD, #0, LSL #0
3030 *p
++ = X_3_6_2_16_5(X110
, X100101
, X00
, 0/*imm16*/, xD
);
3034 // There must be at least one non-zero halfword. Find the
3035 // lowest nonzero such, and use MOVZ to install it and zero
3036 // out the rest of the register.
3038 h
[3] = (UShort
)((imm64
>> 48) & 0xFFFF);
3039 h
[2] = (UShort
)((imm64
>> 32) & 0xFFFF);
3040 h
[1] = (UShort
)((imm64
>> 16) & 0xFFFF);
3041 h
[0] = (UShort
)((imm64
>> 0) & 0xFFFF);
3044 for (i
= 0; i
< 4; i
++) {
3050 // MOVZ xD, h[i], LSL (16*i)
3051 *p
++ = X_3_6_2_16_5(X110
, X100101
, i
, h
[i
], xD
);
3053 // Work on upwards through h[i], using MOVK to stuff in any
3054 // remaining nonzero elements.
3056 for (; i
< 4; i
++) {
3059 // MOVK xD, h[i], LSL (16*i)
3060 *p
++ = X_3_6_2_16_5(X111
, X100101
, i
, h
[i
], xD
);
3066 /* Get an immediate into a register, using only that register, and
3067 generating exactly 4 instructions, regardless of the value of the
3068 immediate. This is used when generating sections of code that need
3069 to be patched later, so as to guarantee a specific size. */
3070 static UInt
* imm64_to_ireg_EXACTLY4 ( UInt
* p
, Int xD
, ULong imm64
)
3073 h
[3] = (UShort
)((imm64
>> 48) & 0xFFFF);
3074 h
[2] = (UShort
)((imm64
>> 32) & 0xFFFF);
3075 h
[1] = (UShort
)((imm64
>> 16) & 0xFFFF);
3076 h
[0] = (UShort
)((imm64
>> 0) & 0xFFFF);
3077 // Work on upwards through h[i], using MOVK to stuff in the
3078 // remaining elements.
3080 for (i
= 0; i
< 4; i
++) {
3082 // MOVZ xD, h[0], LSL (16*0)
3083 *p
++ = X_3_6_2_16_5(X110
, X100101
, i
, h
[i
], xD
);
3085 // MOVK xD, h[i], LSL (16*i)
3086 *p
++ = X_3_6_2_16_5(X111
, X100101
, i
, h
[i
], xD
);
3092 /* Check whether p points at a 4-insn sequence cooked up by
3093 imm64_to_ireg_EXACTLY4(). */
3094 static Bool
is_imm64_to_ireg_EXACTLY4 ( UInt
* p
, Int xD
, ULong imm64
)
3097 h
[3] = (UShort
)((imm64
>> 48) & 0xFFFF);
3098 h
[2] = (UShort
)((imm64
>> 32) & 0xFFFF);
3099 h
[1] = (UShort
)((imm64
>> 16) & 0xFFFF);
3100 h
[0] = (UShort
)((imm64
>> 0) & 0xFFFF);
3101 // Work on upwards through h[i], using MOVK to stuff in the
3102 // remaining elements.
3104 for (i
= 0; i
< 4; i
++) {
3107 // MOVZ xD, h[0], LSL (16*0)
3108 expected
= X_3_6_2_16_5(X110
, X100101
, i
, h
[i
], xD
);
3110 // MOVK xD, h[i], LSL (16*i)
3111 expected
= X_3_6_2_16_5(X111
, X100101
, i
, h
[i
], xD
);
3113 if (p
[i
] != expected
)
3120 /* Generate a 8 bit store or 8-to-64 unsigned widening load from/to
3121 rD, using the given amode for the address. */
3122 static UInt
* do_load_or_store8 ( UInt
* p
,
3123 Bool isLoad
, UInt wD
, ARM64AMode
* am
)
3126 if (am
->tag
== ARM64am_RI9
) {
3127 /* STURB Wd, [Xn|SP + simm9]: 00 111000 000 simm9 00 n d
3128 LDURB Wd, [Xn|SP + simm9]: 00 111000 010 simm9 00 n d
3130 Int simm9
= am
->ARM64am
.RI9
.simm9
;
3131 vassert(-256 <= simm9
&& simm9
<= 255);
3132 UInt instr
= X_2_6_3_9_2_5_5(X00
, X111000
, isLoad
? X010
: X000
,
3134 iregEnc(am
->ARM64am
.RI9
.reg
), wD
);
3138 if (am
->tag
== ARM64am_RI12
) {
3139 /* STRB Wd, [Xn|SP + uimm12 * 1]: 00 111 001 00 imm12 n d
3140 LDRB Wd, [Xn|SP + uimm12 * 1]: 00 111 001 01 imm12 n d
3142 UInt uimm12
= am
->ARM64am
.RI12
.uimm12
;
3143 UInt scale
= am
->ARM64am
.RI12
.szB
;
3144 vassert(scale
== 1); /* failure of this is serious. Do not ignore. */
3145 UInt xN
= iregEnc(am
->ARM64am
.RI12
.reg
);
3147 UInt instr
= X_2_6_2_12_5_5(X00
, X111001
, isLoad
? X01
: X00
,
3152 if (am
->tag
== ARM64am_RR
) {
3153 /* STRB Xd, [Xn|SP, Xm]: 00 111 000 001 m 011 0 10 n d
3154 LDRB Xd, [Xn|SP, Xm]: 00 111 000 011 m 011 0 10 n d
3156 UInt xN
= iregEnc(am
->ARM64am
.RR
.base
);
3157 UInt xM
= iregEnc(am
->ARM64am
.RR
.index
);
3159 UInt instr
= X_3_8_5_6_5_5(X001
, isLoad
? X11000011
: X11000001
,
3160 xM
, X011010
, xN
, wD
);
3164 vpanic("do_load_or_store8");
3169 /* Generate a 16 bit store or 16-to-64 unsigned widening load from/to
3170 rD, using the given amode for the address. */
3171 static UInt
* do_load_or_store16 ( UInt
* p
,
3172 Bool isLoad
, UInt wD
, ARM64AMode
* am
)
3175 if (am
->tag
== ARM64am_RI9
) {
3176 /* STURH Wd, [Xn|SP + simm9]: 01 111000 000 simm9 00 n d
3177 LDURH Wd, [Xn|SP + simm9]: 01 111000 010 simm9 00 n d
3179 Int simm9
= am
->ARM64am
.RI9
.simm9
;
3180 vassert(-256 <= simm9
&& simm9
<= 255);
3181 UInt instr
= X_2_6_3_9_2_5_5(X01
, X111000
, isLoad
? X010
: X000
,
3183 iregEnc(am
->ARM64am
.RI9
.reg
), wD
);
3187 if (am
->tag
== ARM64am_RI12
) {
3188 /* STRH Wd, [Xn|SP + uimm12 * 2]: 01 111 001 00 imm12 n d
3189 LDRH Wd, [Xn|SP + uimm12 * 2]: 01 111 001 01 imm12 n d
3191 UInt uimm12
= am
->ARM64am
.RI12
.uimm12
;
3192 UInt scale
= am
->ARM64am
.RI12
.szB
;
3193 vassert(scale
== 2); /* failure of this is serious. Do not ignore. */
3194 UInt xN
= iregEnc(am
->ARM64am
.RI12
.reg
);
3196 UInt instr
= X_2_6_2_12_5_5(X01
, X111001
, isLoad
? X01
: X00
,
3201 if (am
->tag
== ARM64am_RR
) {
3202 /* STRH Xd, [Xn|SP, Xm]: 01 111 000 001 m 011 0 10 n d
3203 LDRH Xd, [Xn|SP, Xm]: 01 111 000 011 m 011 0 10 n d
3205 UInt xN
= iregEnc(am
->ARM64am
.RR
.base
);
3206 UInt xM
= iregEnc(am
->ARM64am
.RR
.index
);
3208 UInt instr
= X_3_8_5_6_5_5(X011
, isLoad
? X11000011
: X11000001
,
3209 xM
, X011010
, xN
, wD
);
3213 vpanic("do_load_or_store16");
3218 /* Generate a 32 bit store or 32-to-64 unsigned widening load from/to
3219 rD, using the given amode for the address. */
3220 static UInt
* do_load_or_store32 ( UInt
* p
,
3221 Bool isLoad
, UInt wD
, ARM64AMode
* am
)
3224 if (am
->tag
== ARM64am_RI9
) {
3225 /* STUR Wd, [Xn|SP + simm9]: 10 111000 000 simm9 00 n d
3226 LDUR Wd, [Xn|SP + simm9]: 10 111000 010 simm9 00 n d
3228 Int simm9
= am
->ARM64am
.RI9
.simm9
;
3229 vassert(-256 <= simm9
&& simm9
<= 255);
3230 UInt instr
= X_2_6_3_9_2_5_5(X10
, X111000
, isLoad
? X010
: X000
,
3232 iregEnc(am
->ARM64am
.RI9
.reg
), wD
);
3236 if (am
->tag
== ARM64am_RI12
) {
3237 /* STR Wd, [Xn|SP + uimm12 * 4]: 10 111 001 00 imm12 n d
3238 LDR Wd, [Xn|SP + uimm12 * 4]: 10 111 001 01 imm12 n d
3240 UInt uimm12
= am
->ARM64am
.RI12
.uimm12
;
3241 UInt scale
= am
->ARM64am
.RI12
.szB
;
3242 vassert(scale
== 4); /* failure of this is serious. Do not ignore. */
3243 UInt xN
= iregEnc(am
->ARM64am
.RI12
.reg
);
3245 UInt instr
= X_2_6_2_12_5_5(X10
, X111001
, isLoad
? X01
: X00
,
3250 if (am
->tag
== ARM64am_RR
) {
3251 /* STR Wd, [Xn|SP, Xm]: 10 111 000 001 m 011 0 10 n d
3252 LDR Wd, [Xn|SP, Xm]: 10 111 000 011 m 011 0 10 n d
3254 UInt xN
= iregEnc(am
->ARM64am
.RR
.base
);
3255 UInt xM
= iregEnc(am
->ARM64am
.RR
.index
);
3257 UInt instr
= X_3_8_5_6_5_5(X101
, isLoad
? X11000011
: X11000001
,
3258 xM
, X011010
, xN
, wD
);
3262 vpanic("do_load_or_store32");
3267 /* Generate a 64 bit load or store to/from xD, using the given amode
3269 static UInt
* do_load_or_store64 ( UInt
* p
,
3270 Bool isLoad
, UInt xD
, ARM64AMode
* am
)
3272 /* In all these cases, Rn can't be 31 since that means SP. */
3274 if (am
->tag
== ARM64am_RI9
) {
3275 /* STUR Xd, [Xn|SP + simm9]: 11 111000 000 simm9 00 n d
3276 LDUR Xd, [Xn|SP + simm9]: 11 111000 010 simm9 00 n d
3278 Int simm9
= am
->ARM64am
.RI9
.simm9
;
3279 vassert(-256 <= simm9
&& simm9
<= 255);
3280 UInt xN
= iregEnc(am
->ARM64am
.RI9
.reg
);
3282 UInt instr
= X_2_6_3_9_2_5_5(X11
, X111000
, isLoad
? X010
: X000
,
3283 simm9
& 0x1FF, X00
, xN
, xD
);
3287 if (am
->tag
== ARM64am_RI12
) {
3288 /* STR Xd, [Xn|SP + uimm12 * 8]: 11 111 001 00 imm12 n d
3289 LDR Xd, [Xn|SP + uimm12 * 8]: 11 111 001 01 imm12 n d
3291 UInt uimm12
= am
->ARM64am
.RI12
.uimm12
;
3292 UInt scale
= am
->ARM64am
.RI12
.szB
;
3293 vassert(scale
== 8); /* failure of this is serious. Do not ignore. */
3294 UInt xN
= iregEnc(am
->ARM64am
.RI12
.reg
);
3296 UInt instr
= X_2_6_2_12_5_5(X11
, X111001
, isLoad
? X01
: X00
,
3301 if (am
->tag
== ARM64am_RR
) {
3302 /* STR Xd, [Xn|SP, Xm]: 11 111 000 001 m 011 0 10 n d
3303 LDR Xd, [Xn|SP, Xm]: 11 111 000 011 m 011 0 10 n d
3305 UInt xN
= iregEnc(am
->ARM64am
.RR
.base
);
3306 UInt xM
= iregEnc(am
->ARM64am
.RR
.index
);
3308 UInt instr
= X_3_8_5_6_5_5(X111
, isLoad
? X11000011
: X11000001
,
3309 xM
, X011010
, xN
, xD
);
3313 vpanic("do_load_or_store64");
3318 /* Emit an instruction into buf and return the number of bytes used.
3319 Note that buf is not the insn's final place, and therefore it is
3320 imperative to emit position-independent code. If the emitted
3321 instruction was a profiler inc, set *is_profInc to True, else
3322 leave it unchanged. */
3324 Int
emit_ARM64Instr ( /*MB_MOD*/Bool
* is_profInc
,
3325 UChar
* buf
, Int nbuf
, const ARM64Instr
* i
,
3326 Bool mode64
, VexEndness endness_host
,
3327 const void* disp_cp_chain_me_to_slowEP
,
3328 const void* disp_cp_chain_me_to_fastEP
,
3329 const void* disp_cp_xindir
,
3330 const void* disp_cp_xassisted
)
3332 UInt
* p
= (UInt
*)buf
;
3333 vassert(nbuf
>= 32);
3334 vassert(mode64
== True
);
3335 vassert(0 == (((HWord
)buf
) & 3));
3338 case ARM64in_Arith
: {
3339 UInt rD
= iregEnc(i
->ARM64in
.Arith
.dst
);
3340 UInt rN
= iregEnc(i
->ARM64in
.Arith
.argL
);
3341 ARM64RIA
* argR
= i
->ARM64in
.Arith
.argR
;
3342 switch (argR
->tag
) {
3344 *p
++ = X_2_6_2_12_5_5(
3345 i
->ARM64in
.Arith
.isAdd
? X10
: X11
,
3347 argR
->ARM64riA
.I12
.shift
== 12 ? X01
: X00
,
3348 argR
->ARM64riA
.I12
.imm12
, rN
, rD
3352 UInt rM
= iregEnc(i
->ARM64in
.Arith
.argR
->ARM64riA
.R
.reg
);
3353 *p
++ = X_3_8_5_6_5_5(
3354 i
->ARM64in
.Arith
.isAdd
? X100
: X110
,
3355 X01011000
, rM
, X000000
, rN
, rD
3365 UInt rD
= 31; /* XZR, we are going to dump the result */
3366 UInt rN
= iregEnc(i
->ARM64in
.Cmp
.argL
);
3367 ARM64RIA
* argR
= i
->ARM64in
.Cmp
.argR
;
3368 Bool is64
= i
->ARM64in
.Cmp
.is64
;
3369 switch (argR
->tag
) {
3371 /* 1 11 10001 sh imm12 Rn Rd = SUBS Xd, Xn, #imm */
3372 /* 0 11 10001 sh imm12 Rn Rd = SUBS Wd, Wn, #imm */
3373 *p
++ = X_2_6_2_12_5_5(
3374 is64
? X11
: X01
, X110001
,
3375 argR
->ARM64riA
.I12
.shift
== 12 ? X01
: X00
,
3376 argR
->ARM64riA
.I12
.imm12
, rN
, rD
);
3379 /* 1 11 01011 00 0 Rm 000000 Rn Rd = SUBS Xd, Xn, Xm */
3380 /* 0 11 01011 00 0 Rm 000000 Rn Rd = SUBS Wd, Wn, Wm */
3381 UInt rM
= iregEnc(i
->ARM64in
.Cmp
.argR
->ARM64riA
.R
.reg
);
3382 *p
++ = X_3_8_5_6_5_5(is64
? X111
: X011
,
3383 X01011000
, rM
, X000000
, rN
, rD
);
3391 case ARM64in_Logic
: {
3392 UInt rD
= iregEnc(i
->ARM64in
.Logic
.dst
);
3393 UInt rN
= iregEnc(i
->ARM64in
.Logic
.argL
);
3394 ARM64RIL
* argR
= i
->ARM64in
.Logic
.argR
;
3395 UInt opc
= 0; /* invalid */
3398 switch (i
->ARM64in
.Logic
.op
) {
3399 case ARM64lo_OR
: opc
= X101
; break;
3400 case ARM64lo_AND
: opc
= X100
; break;
3401 case ARM64lo_XOR
: opc
= X110
; break;
3405 switch (argR
->tag
) {
3406 case ARM64riL_I13
: {
3407 /* 1 01 100100 N immR immS Rn Rd = ORR <Xd|Sp>, Xn, #imm */
3408 /* 1 00 100100 N immR immS Rn Rd = AND <Xd|Sp>, Xn, #imm */
3409 /* 1 10 100100 N immR immS Rn Rd = EOR <Xd|Sp>, Xn, #imm */
3410 *p
++ = X_3_6_1_6_6_5_5(
3411 opc
, X100100
, argR
->ARM64riL
.I13
.bitN
,
3412 argR
->ARM64riL
.I13
.immR
, argR
->ARM64riL
.I13
.immS
,
3418 /* 1 01 01010 00 0 m 000000 n d = ORR Xd, Xn, Xm */
3419 /* 1 00 01010 00 0 m 000000 n d = AND Xd, Xn, Xm */
3420 /* 1 10 01010 00 0 m 000000 n d = EOR Xd, Xn, Xm */
3421 UInt rM
= iregEnc(argR
->ARM64riL
.R
.reg
);
3423 *p
++ = X_3_8_5_6_5_5(opc
, X01010000
, rM
, X000000
, rN
, rD
);
3431 case ARM64in_Test
: {
3432 UInt rD
= 31; /* XZR, we are going to dump the result */
3433 UInt rN
= iregEnc(i
->ARM64in
.Test
.argL
);
3434 ARM64RIL
* argR
= i
->ARM64in
.Test
.argR
;
3435 switch (argR
->tag
) {
3436 case ARM64riL_I13
: {
3437 /* 1 11 100100 N immR immS Rn Rd = ANDS Xd, Xn, #imm */
3438 *p
++ = X_3_6_1_6_6_5_5(
3439 X111
, X100100
, argR
->ARM64riL
.I13
.bitN
,
3440 argR
->ARM64riL
.I13
.immR
, argR
->ARM64riL
.I13
.immS
,
3450 case ARM64in_Shift
: {
3451 UInt rD
= iregEnc(i
->ARM64in
.Shift
.dst
);
3452 UInt rN
= iregEnc(i
->ARM64in
.Shift
.argL
);
3453 ARM64RI6
* argR
= i
->ARM64in
.Shift
.argR
;
3456 switch (argR
->tag
) {
3458 /* 110 1001101 (63-sh) (64-sh) nn dd LSL Xd, Xn, sh */
3459 /* 110 1001101 sh 63 nn dd LSR Xd, Xn, sh */
3460 /* 100 1001101 sh 63 nn dd ASR Xd, Xn, sh */
3461 UInt sh
= argR
->ARM64ri6
.I6
.imm6
;
3462 vassert(sh
> 0 && sh
< 64);
3463 switch (i
->ARM64in
.Shift
.op
) {
3465 *p
++ = X_3_6_1_6_6_5_5(X110
, X100110
,
3466 1, 64-sh
, 63-sh
, rN
, rD
);
3469 *p
++ = X_3_6_1_6_6_5_5(X110
, X100110
, 1, sh
, 63, rN
, rD
);
3472 *p
++ = X_3_6_1_6_6_5_5(X100
, X100110
, 1, sh
, 63, rN
, rD
);
3480 /* 100 1101 0110 mm 001000 nn dd LSL Xd, Xn, Xm */
3481 /* 100 1101 0110 mm 001001 nn dd LSR Xd, Xn, Xm */
3482 /* 100 1101 0110 mm 001010 nn dd ASR Xd, Xn, Xm */
3483 UInt rM
= iregEnc(argR
->ARM64ri6
.R
.reg
);
3486 switch (i
->ARM64in
.Shift
.op
) {
3487 case ARM64sh_SHL
: subOpc
= X001000
; break;
3488 case ARM64sh_SHR
: subOpc
= X001001
; break;
3489 case ARM64sh_SAR
: subOpc
= X001010
; break;
3490 default: vassert(0);
3492 *p
++ = X_3_8_5_6_5_5(X100
, X11010110
, rM
, subOpc
, rN
, rD
);
3500 case ARM64in_Unary
: {
3501 UInt rDst
= iregEnc(i
->ARM64in
.Unary
.dst
);
3502 UInt rSrc
= iregEnc(i
->ARM64in
.Unary
.src
);
3503 switch (i
->ARM64in
.Unary
.op
) {
3505 /* 1 10 1101 0110 00000 00010 0 nn dd CLZ Xd, Xn */
3506 /* 1 10 1101 0110 00000 00010 1 nn dd CLS Xd, Xn (unimp) */
3507 *p
++ = X_3_8_5_6_5_5(X110
,
3508 X11010110
, X00000
, X000100
, rSrc
, rDst
);
3511 /* 1 10 01011 000 m 000000 11111 d NEG Xd,Xm */
3512 /* 0 10 01011 000 m 000000 11111 d NEG Wd,Wm (unimp) */
3513 *p
++ = X_3_8_5_6_5_5(X110
,
3514 X01011000
, rSrc
, X000000
, X11111
, rDst
);
3517 /* 1 01 01010 00 1 m 000000 11111 d MVN Xd,Xm */
3518 *p
++ = X_3_8_5_6_5_5(X101
,
3519 X01010001
, rSrc
, X000000
, X11111
, rDst
);
3527 case ARM64in_Set64
: {
3528 /* 1 00 1101 0100 11111 invert(cond) 01 11111 Rd CSET Rd, Cond */
3529 UInt rDst
= iregEnc(i
->ARM64in
.Set64
.dst
);
3530 UInt cc
= (UInt
)i
->ARM64in
.Set64
.cond
;
3532 *p
++ = X_3_8_5_6_5_5(X100
, X11010100
, X11111
,
3533 ((cc
^ 1) << 2) | X01
, X11111
, rDst
);
3536 case ARM64in_MovI
: {
3537 /* We generate the "preferred form", ORR Xd, XZR, Xm
3538 101 01010 00 0 m 000000 11111 d
3540 UInt instr
= 0xAA0003E0;
3541 UInt d
= iregEnc(i
->ARM64in
.MovI
.dst
);
3542 UInt m
= iregEnc(i
->ARM64in
.MovI
.src
);
3543 *p
++ = instr
| ((m
& 31) << 16) | ((d
& 31) << 0);
3546 case ARM64in_Imm64
: {
3547 p
= imm64_to_ireg( p
, iregEnc(i
->ARM64in
.Imm64
.dst
),
3548 i
->ARM64in
.Imm64
.imm64
);
3551 case ARM64in_LdSt64
: {
3552 p
= do_load_or_store64( p
, i
->ARM64in
.LdSt64
.isLoad
,
3553 iregEnc(i
->ARM64in
.LdSt64
.rD
),
3554 i
->ARM64in
.LdSt64
.amode
);
3557 case ARM64in_LdSt32
: {
3558 p
= do_load_or_store32( p
, i
->ARM64in
.LdSt32
.isLoad
,
3559 iregEnc(i
->ARM64in
.LdSt32
.rD
),
3560 i
->ARM64in
.LdSt32
.amode
);
3563 case ARM64in_LdSt16
: {
3564 p
= do_load_or_store16( p
, i
->ARM64in
.LdSt16
.isLoad
,
3565 iregEnc(i
->ARM64in
.LdSt16
.rD
),
3566 i
->ARM64in
.LdSt16
.amode
);
3569 case ARM64in_LdSt8
: {
3570 p
= do_load_or_store8( p
, i
->ARM64in
.LdSt8
.isLoad
,
3571 iregEnc(i
->ARM64in
.LdSt8
.rD
),
3572 i
->ARM64in
.LdSt8
.amode
);
3576 case ARM64in_XDirect
: {
3577 /* NB: what goes on here has to be very closely coordinated
3578 with chainXDirect_ARM64 and unchainXDirect_ARM64 below. */
3579 /* We're generating chain-me requests here, so we need to be
3580 sure this is actually allowed -- no-redir translations
3581 can't use chain-me's. Hence: */
3582 vassert(disp_cp_chain_me_to_slowEP
!= NULL
);
3583 vassert(disp_cp_chain_me_to_fastEP
!= NULL
);
3585 /* Use ptmp for backpatching conditional jumps. */
3588 /* First off, if this is conditional, create a conditional
3589 jump over the rest of it. Or at least, leave a space for
3590 it that we will shortly fill in. */
3591 if (i
->ARM64in
.XDirect
.cond
!= ARM64cc_AL
) {
3592 vassert(i
->ARM64in
.XDirect
.cond
!= ARM64cc_NV
);
3597 /* Update the guest PC. */
3598 /* imm64 x9, dstGA */
3600 p
= imm64_to_ireg(p
, /*x*/9, i
->ARM64in
.XDirect
.dstGA
);
3601 p
= do_load_or_store64(p
, False
/*!isLoad*/,
3602 /*x*/9, i
->ARM64in
.XDirect
.amPC
);
3604 /* --- FIRST PATCHABLE BYTE follows --- */
3605 /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're
3606 calling to) backs up the return address, so as to find the
3607 address of the first patchable byte. So: don't change the
3608 number of instructions (5) below. */
3609 /* movw x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[15:0] */
3610 /* movk x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[31:15], lsl 16 */
3611 /* movk x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[47:32], lsl 32 */
3612 /* movk x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[63:48], lsl 48 */
3614 const void* disp_cp_chain_me
3615 = i
->ARM64in
.XDirect
.toFastEP
? disp_cp_chain_me_to_fastEP
3616 : disp_cp_chain_me_to_slowEP
;
3617 p
= imm64_to_ireg_EXACTLY4(p
, /*x*/9, (Addr
)disp_cp_chain_me
);
3619 /* --- END of PATCHABLE BYTES --- */
3621 /* Fix up the conditional jump, if there was one. */
3622 if (i
->ARM64in
.XDirect
.cond
!= ARM64cc_AL
) {
3623 Int delta
= (UChar
*)p
- (UChar
*)ptmp
; /* must be signed */
3624 vassert(delta
> 0 && delta
<= 40);
3625 vassert((delta
& 3) == 0);
3626 UInt notCond
= 1 ^ (UInt
)i
->ARM64in
.XDirect
.cond
;
3627 vassert(notCond
<= 13); /* Neither AL nor NV */
3628 vassert(ptmp
!= NULL
);
3630 *ptmp
= X_8_19_1_4(X01010100
, delta
& ((1<<19)-1), 0, notCond
);
3635 case ARM64in_XIndir
: {
3636 // XIndir is more or less the same as XAssisted, except
3637 // we don't have a trc value to hand back, so there's no
3639 /* Use ptmp for backpatching conditional jumps. */
3640 //UInt* ptmp = NULL;
3642 /* First off, if this is conditional, create a conditional
3643 jump over the rest of it. Or at least, leave a space for
3644 it that we will shortly fill in. */
3645 if (i
->ARM64in
.XIndir
.cond
!= ARM64cc_AL
) {
3647 //ZZ vassert(i->ARMin.XIndir.cond != ARMcc_NV);
3652 /* Update the guest PC. */
3653 /* str r-dstGA, amPC */
3654 p
= do_load_or_store64(p
, False
/*!isLoad*/,
3655 iregEnc(i
->ARM64in
.XIndir
.dstGA
),
3656 i
->ARM64in
.XIndir
.amPC
);
3658 /* imm64 x9, VG_(disp_cp_xindir) */
3660 p
= imm64_to_ireg(p
, /*x*/9, (Addr
)disp_cp_xindir
);
3661 *p
++ = 0xD61F0120; /* br x9 */
3663 /* Fix up the conditional jump, if there was one. */
3664 if (i
->ARM64in
.XIndir
.cond
!= ARM64cc_AL
) {
3666 //ZZ Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
3667 //ZZ vassert(delta > 0 && delta < 40);
3668 //ZZ vassert((delta & 3) == 0);
3669 //ZZ UInt notCond = 1 ^ (UInt)i->ARMin.XIndir.cond;
3670 //ZZ vassert(notCond <= 13); /* Neither AL nor NV */
3671 //ZZ delta = (delta >> 2) - 2;
3672 //ZZ *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
3677 case ARM64in_XAssisted
: {
3678 /* Use ptmp for backpatching conditional jumps. */
3681 /* First off, if this is conditional, create a conditional
3682 jump over the rest of it. Or at least, leave a space for
3683 it that we will shortly fill in. I think this can only
3684 ever happen when VEX is driven by the switchbacker. */
3685 if (i
->ARM64in
.XAssisted
.cond
!= ARM64cc_AL
) {
3686 vassert(i
->ARM64in
.XDirect
.cond
!= ARM64cc_NV
);
3691 /* Update the guest PC. */
3692 /* str r-dstGA, amPC */
3693 p
= do_load_or_store64(p
, False
/*!isLoad*/,
3694 iregEnc(i
->ARM64in
.XAssisted
.dstGA
),
3695 i
->ARM64in
.XAssisted
.amPC
);
3697 /* movw r21, $magic_number */
3699 switch (i
->ARM64in
.XAssisted
.jk
) {
3700 case Ijk_ClientReq
: trcval
= VEX_TRC_JMP_CLIENTREQ
; break;
3701 case Ijk_Sys_syscall
: trcval
= VEX_TRC_JMP_SYS_SYSCALL
; break;
3702 //case Ijk_Sys_int128: trcval = VEX_TRC_JMP_SYS_INT128; break;
3703 case Ijk_Yield
: trcval
= VEX_TRC_JMP_YIELD
; break;
3704 //case Ijk_EmWarn: trcval = VEX_TRC_JMP_EMWARN; break;
3705 //case Ijk_MapFail: trcval = VEX_TRC_JMP_MAPFAIL; break;
3706 case Ijk_NoDecode
: trcval
= VEX_TRC_JMP_NODECODE
; break;
3707 case Ijk_InvalICache
: trcval
= VEX_TRC_JMP_INVALICACHE
; break;
3708 case Ijk_FlushDCache
: trcval
= VEX_TRC_JMP_FLUSHDCACHE
; break;
3709 case Ijk_NoRedir
: trcval
= VEX_TRC_JMP_NOREDIR
; break;
3710 case Ijk_SigTRAP
: trcval
= VEX_TRC_JMP_SIGTRAP
; break;
3711 //case Ijk_SigSEGV: trcval = VEX_TRC_JMP_SIGSEGV; break;
3712 case Ijk_Boring
: trcval
= VEX_TRC_JMP_BORING
; break;
3713 /* We don't expect to see the following being assisted. */
3718 ppIRJumpKind(i
->ARM64in
.XAssisted
.jk
);
3719 vpanic("emit_ARM64Instr.ARM64in_XAssisted: "
3720 "unexpected jump kind");
3722 vassert(trcval
!= 0);
3723 p
= imm64_to_ireg(p
, /*x*/21, (ULong
)trcval
);
3725 /* imm64 x9, VG_(disp_cp_xassisted) */
3727 p
= imm64_to_ireg(p
, /*x*/9, (Addr
)disp_cp_xassisted
);
3728 *p
++ = 0xD61F0120; /* br x9 */
3730 /* Fix up the conditional jump, if there was one. */
3731 if (i
->ARM64in
.XAssisted
.cond
!= ARM64cc_AL
) {
3732 Int delta
= (UChar
*)p
- (UChar
*)ptmp
; /* must be signed */
3733 vassert(delta
> 0 && delta
< 40);
3734 vassert((delta
& 3) == 0);
3735 UInt notCond
= 1 ^ (UInt
)i
->ARM64in
.XDirect
.cond
;
3736 vassert(notCond
<= 13); /* Neither AL nor NV */
3737 vassert(ptmp
!= NULL
);
3739 *ptmp
= X_8_19_1_4(X01010100
, delta
& ((1<<19)-1), 0, notCond
);
3744 case ARM64in_CSel
: {
3745 /* 100 1101 0100 mm cond 00 nn dd = CSEL Xd, Xn, Xm, cond */
3746 UInt dd
= iregEnc(i
->ARM64in
.CSel
.dst
);
3747 UInt nn
= iregEnc(i
->ARM64in
.CSel
.argL
);
3748 UInt mm
= iregEnc(i
->ARM64in
.CSel
.argR
);
3749 UInt cond
= (UInt
)i
->ARM64in
.CSel
.cond
;
3750 vassert(dd
< 31 && nn
< 31 && mm
< 31 && cond
< 16);
3751 *p
++ = X_3_8_5_6_5_5(X100
, X11010100
, mm
, cond
<< 2, nn
, dd
);
3755 case ARM64in_Call
: {
3756 /* We'll use x9 as a scratch register to put the target
3758 if (i
->ARM64in
.Call
.cond
!= ARM64cc_AL
3759 && i
->ARM64in
.Call
.rloc
.pri
!= RLPri_None
) {
3760 /* The call might not happen (it isn't unconditional) and
3761 it returns a result. In this case we will need to
3762 generate a control flow diamond to put 0x555..555 in
3763 the return register(s) in the case where the call
3764 doesn't happen. If this ever becomes necessary, maybe
3765 copy code from the 32-bit ARM equivalent. Until that
3766 day, just give up. */
3771 if (i
->ARM64in
.Call
.cond
!= ARM64cc_AL
) {
3772 /* Create a hole to put a conditional branch in. We'll
3773 patch it once we know the branch length. */
3779 p
= imm64_to_ireg( (UInt
*)p
, /*x*/9, (ULong
)i
->ARM64in
.Call
.target
);
3783 // Patch the hole if necessary
3784 if (i
->ARM64in
.Call
.cond
!= ARM64cc_AL
) {
3785 ULong dist
= (ULong
)(p
- ptmp
);
3786 /* imm64_to_ireg produces between 1 and 4 insns, and
3787 then there's the BLR itself. Hence: */
3788 vassert(dist
>= 2 && dist
<= 5);
3789 vassert(ptmp
!= NULL
);
3790 // 01010100 simm19 0 cond = B.cond (here + simm19 << 2)
3791 *ptmp
= X_8_19_1_4(X01010100
, dist
, 0,
3792 1 ^ (UInt
)i
->ARM64in
.Call
.cond
);
3794 vassert(ptmp
== NULL
);
3800 case ARM64in_AddToSP
: {
3801 /* 10,0 10001 00 imm12 11111 11111 ADD xsp, xsp, #imm12
3802 11,0 10001 00 imm12 11111 11111 SUB xsp, xsp, #imm12
3804 Int simm12
= i
->ARM64in
.AddToSP
.simm
;
3805 vassert(-4096 < simm12
&& simm12
< 4096);
3806 vassert(0 == (simm12
& 0xF));
3808 *p
++ = X_2_6_2_12_5_5(X10
, X010001
, X00
, simm12
, X11111
, X11111
);
3810 *p
++ = X_2_6_2_12_5_5(X11
, X010001
, X00
, -simm12
, X11111
, X11111
);
3815 case ARM64in_FromSP
: {
3816 /* 10,0 10001 00 0..(12)..0 11111 dd MOV Xd, xsp */
3817 UInt dd
= iregEnc(i
->ARM64in
.FromSP
.dst
);
3819 *p
++ = X_2_6_2_12_5_5(X10
, X010001
, X00
, 0, X11111
, dd
);
3824 /* 100 11011 110 mm 011111 nn dd UMULH Xd, Xn,Xm
3825 100 11011 010 mm 011111 nn dd SMULH Xd, Xn,Xm
3826 100 11011 000 mm 011111 nn dd MUL Xd, Xn,Xm
3828 UInt dd
= iregEnc(i
->ARM64in
.Mul
.dst
);
3829 UInt nn
= iregEnc(i
->ARM64in
.Mul
.argL
);
3830 UInt mm
= iregEnc(i
->ARM64in
.Mul
.argR
);
3831 vassert(dd
< 31 && nn
< 31 && mm
< 31);
3832 switch (i
->ARM64in
.Mul
.op
) {
3834 *p
++ = X_3_8_5_6_5_5(X100
, X11011110
, mm
, X011111
, nn
, dd
);
3837 *p
++ = X_3_8_5_6_5_5(X100
, X11011010
, mm
, X011111
, nn
, dd
);
3839 case ARM64mul_PLAIN
:
3840 *p
++ = X_3_8_5_6_5_5(X100
, X11011000
, mm
, X011111
, nn
, dd
);
3847 case ARM64in_LdrEX
: {
3848 /* 085F7C82 ldxrb w2, [x4]
3849 485F7C82 ldxrh w2, [x4]
3850 885F7C82 ldxr w2, [x4]
3851 C85F7C82 ldxr x2, [x4]
3853 switch (i
->ARM64in
.LdrEX
.szB
) {
3854 case 1: *p
++ = 0x085F7C82; goto done
;
3855 case 2: *p
++ = 0x485F7C82; goto done
;
3856 case 4: *p
++ = 0x885F7C82; goto done
;
3857 case 8: *p
++ = 0xC85F7C82; goto done
;
3862 case ARM64in_StrEX
: {
3863 /* 08007C82 stxrb w0, w2, [x4]
3864 48007C82 stxrh w0, w2, [x4]
3865 88007C82 stxr w0, w2, [x4]
3866 C8007C82 stxr w0, x2, [x4]
3868 switch (i
->ARM64in
.StrEX
.szB
) {
3869 case 1: *p
++ = 0x08007C82; goto done
;
3870 case 2: *p
++ = 0x48007C82; goto done
;
3871 case 4: *p
++ = 0x88007C82; goto done
;
3872 case 8: *p
++ = 0xC8007C82; goto done
;
3878 /* This isn't simple. For an explanation see the comment in
3879 host_arm64_defs.h on the definition of ARM64Instr case CAS.
3881 NOTE: We could place "loop:" after mov/and but then we need
3882 an additional scratch register.
3888 mov x8, x5 // AA0503E8
3889 and x8, x5, #0xFFFFFFFF // 92407CA8
3890 and x8, x5, #0xFFFF // 92403CA8
3891 and x8, x5, #0xFF // 92401CA8
3894 ldxr x1, [x3] // C85F7C61
3895 ldxr w1, [x3] // 885F7C61
3896 ldxrh w1, [x3] // 485F7C61
3897 ldxrb w1, [x3] // 085F7C61
3900 cmp x1, x8 // EB08003F
3904 stxr w8, x7, [x3] // C8087C67
3905 stxr w8, w7, [x3] // 88087C67
3906 stxrh w8, w7, [x3] // 48087C67
3907 stxrb w8, w7, [x3] // 08087C67
3910 cbne w8, loop // 35FFFF68
3913 switch (i
->ARM64in
.CAS
.szB
) {
3914 case 8: *p
++ = 0xAA0503E8; break;
3915 case 4: *p
++ = 0x92407CA8; break;
3916 case 2: *p
++ = 0x92403CA8; break;
3917 case 1: *p
++ = 0x92401CA8; break;
3918 default: vassert(0);
3920 switch (i
->ARM64in
.CAS
.szB
) {
3921 case 8: *p
++ = 0xC85F7C61; break;
3922 case 4: *p
++ = 0x885F7C61; break;
3923 case 2: *p
++ = 0x485F7C61; break;
3924 case 1: *p
++ = 0x085F7C61; break;
3928 switch (i
->ARM64in
.CAS
.szB
) {
3929 case 8: *p
++ = 0xC8087C67; break;
3930 case 4: *p
++ = 0x88087C67; break;
3931 case 2: *p
++ = 0x48087C67; break;
3932 case 1: *p
++ = 0x08087C67; break;
3937 case ARM64in_CASP
: {
3939 CASP <Xs>, <X(s+1)>, <Xt>, <X(t+1)>, [<Xn|SP>{,#0}]
3941 Register allocation (see ARM64in_CASP in getRegUsage_ARM64Instr):
3944 Xs, X(s+1): values to be compared with value read from address
3946 -> X0,X1 (OUTPUTS) loaded from memory and compared with
3947 scratch registers X8,X9 (CLOBBERED) which contain
3949 Xt, X(t+1): values to be stored to memory if X0,X1==X8,X9
3954 mov x8, x4 // AA0403E8
3955 mov x9, x5 // AA0503E9
3956 and x8, x4, #0xFFFFFFFF // 92407C88
3957 and x9, x5, #0xFFFFFFFF // 92407CA9
3960 ldxp x0,x1, [x2] // C87F0440
3961 ldxp w0,w1, [x2] // 887F0440
3964 cmp x0, x8 // EB08001F
3965 bne out // 540000E1 (b.ne #28 <out>)
3966 cmp x1, x9 // EB09003F
3967 bne out // 540000A1 (b.ne #20 <out>)
3970 stxp w1, x6, x7, [x2] // C8211C46
3971 stxp w1, w6, w7, [x2] // 88211C46
3974 cbnz w1, loop // 35FFFE81 (cbnz w1, #-48 <loop>)
3977 switch (i
->ARM64in
.CASP
.szB
) {
3978 case 8: *p
++ = 0xAA0403E8; *p
++ = 0xAA0503E9; break;
3979 case 4: *p
++ = 0x92407C88; *p
++ = 0x92407CA9; break;
3980 default: vassert(0);
3982 switch (i
->ARM64in
.CASP
.szB
) {
3983 case 8: *p
++ = 0xC87F0440; break;
3984 case 4: *p
++ = 0x887F0440; break;
3985 default: vassert(0);
3991 switch (i
->ARM64in
.CASP
.szB
) {
3992 case 8: *p
++ = 0xC8211C46; break;
3993 case 4: *p
++ = 0x88211C46; break;
3994 default: vassert(0);
3999 case ARM64in_MFence
: {
4000 *p
++ = 0xD5033F9F; /* DSB sy */
4001 *p
++ = 0xD5033FBF; /* DMB sy */
4002 *p
++ = 0xD5033FDF; /* ISB */
4005 case ARM64in_ClrEX
: {
4006 *p
++ = 0xD5033F5F; /* clrex #15 */
4009 case ARM64in_VLdStH
: {
4010 /* 01 111101 01 imm12 n t LDR Ht, [Xn|SP, #imm12 * 2]
4011 01 111101 00 imm12 n t STR Ht, [Xn|SP, #imm12 * 2]
4013 UInt hD
= dregEnc(i
->ARM64in
.VLdStH
.hD
);
4014 UInt rN
= iregEnc(i
->ARM64in
.VLdStH
.rN
);
4015 UInt uimm12
= i
->ARM64in
.VLdStH
.uimm12
;
4016 Bool isLD
= i
->ARM64in
.VLdStH
.isLoad
;
4017 vassert(uimm12
< 8192 && 0 == (uimm12
& 1));
4019 vassert(uimm12
< (1<<12));
4022 *p
++ = X_2_6_2_12_5_5(X01
, X111101
, isLD
? X01
: X00
,
4026 case ARM64in_VLdStS
: {
4027 /* 10 111101 01 imm12 n t LDR St, [Xn|SP, #imm12 * 4]
4028 10 111101 00 imm12 n t STR St, [Xn|SP, #imm12 * 4]
4030 UInt sD
= dregEnc(i
->ARM64in
.VLdStS
.sD
);
4031 UInt rN
= iregEnc(i
->ARM64in
.VLdStS
.rN
);
4032 UInt uimm12
= i
->ARM64in
.VLdStS
.uimm12
;
4033 Bool isLD
= i
->ARM64in
.VLdStS
.isLoad
;
4034 vassert(uimm12
< 16384 && 0 == (uimm12
& 3));
4036 vassert(uimm12
< (1<<12));
4039 *p
++ = X_2_6_2_12_5_5(X10
, X111101
, isLD
? X01
: X00
,
4043 case ARM64in_VLdStD
: {
4044 /* 11 111101 01 imm12 n t LDR Dt, [Xn|SP, #imm12 * 8]
4045 11 111101 00 imm12 n t STR Dt, [Xn|SP, #imm12 * 8]
4047 UInt dD
= dregEnc(i
->ARM64in
.VLdStD
.dD
);
4048 UInt rN
= iregEnc(i
->ARM64in
.VLdStD
.rN
);
4049 UInt uimm12
= i
->ARM64in
.VLdStD
.uimm12
;
4050 Bool isLD
= i
->ARM64in
.VLdStD
.isLoad
;
4051 vassert(uimm12
< 32768 && 0 == (uimm12
& 7));
4053 vassert(uimm12
< (1<<12));
4056 *p
++ = X_2_6_2_12_5_5(X11
, X111101
, isLD
? X01
: X00
,
4060 case ARM64in_VLdStQ
: {
4061 /* 0100 1100 0000 0000 0111 11 rN rQ st1 {vQ.2d}, [<rN|SP>]
4062 0100 1100 0100 0000 0111 11 rN rQ ld1 {vQ.2d}, [<rN|SP>]
4064 UInt rQ
= qregEnc(i
->ARM64in
.VLdStQ
.rQ
);
4065 UInt rN
= iregEnc(i
->ARM64in
.VLdStQ
.rN
);
4068 if (i
->ARM64in
.VLdStQ
.isLoad
) {
4069 *p
++ = 0x4C407C00 | (rN
<< 5) | rQ
;
4071 *p
++ = 0x4C007C00 | (rN
<< 5) | rQ
;
4075 case ARM64in_VCvtI2F
: {
4076 /* 31 28 23 21 20 18 15 9 4
4077 000 11110 00 1 00 010 000000 n d SCVTF Sd, Wn
4078 000 11110 01 1 00 010 000000 n d SCVTF Dd, Wn
4079 100 11110 00 1 00 010 000000 n d SCVTF Sd, Xn
4080 100 11110 01 1 00 010 000000 n d SCVTF Dd, Xn
4081 000 11110 00 1 00 011 000000 n d UCVTF Sd, Wn
4082 000 11110 01 1 00 011 000000 n d UCVTF Dd, Wn
4083 100 11110 00 1 00 011 000000 n d UCVTF Sd, Xn
4084 100 11110 01 1 00 011 000000 n d UCVTF Dd, Xn
4086 UInt rN
= iregEnc(i
->ARM64in
.VCvtI2F
.rS
);
4087 UInt rD
= dregEnc(i
->ARM64in
.VCvtI2F
.rD
);
4088 ARM64CvtOp how
= i
->ARM64in
.VCvtI2F
.how
;
4089 /* Just handle cases as they show up. */
4091 case ARM64cvt_F32_I32S
: /* SCVTF Sd, Wn */
4092 *p
++ = X_3_5_8_6_5_5(X000
, X11110
, X00100010
, X000000
, rN
, rD
);
4094 case ARM64cvt_F64_I32S
: /* SCVTF Dd, Wn */
4095 *p
++ = X_3_5_8_6_5_5(X000
, X11110
, X01100010
, X000000
, rN
, rD
);
4097 case ARM64cvt_F32_I64S
: /* SCVTF Sd, Xn */
4098 *p
++ = X_3_5_8_6_5_5(X100
, X11110
, X00100010
, X000000
, rN
, rD
);
4100 case ARM64cvt_F64_I64S
: /* SCVTF Dd, Xn */
4101 *p
++ = X_3_5_8_6_5_5(X100
, X11110
, X01100010
, X000000
, rN
, rD
);
4103 case ARM64cvt_F32_I32U
: /* UCVTF Sd, Wn */
4104 *p
++ = X_3_5_8_6_5_5(X000
, X11110
, X00100011
, X000000
, rN
, rD
);
4106 case ARM64cvt_F64_I32U
: /* UCVTF Dd, Wn */
4107 *p
++ = X_3_5_8_6_5_5(X000
, X11110
, X01100011
, X000000
, rN
, rD
);
4109 case ARM64cvt_F32_I64U
: /* UCVTF Sd, Xn */
4110 *p
++ = X_3_5_8_6_5_5(X100
, X11110
, X00100011
, X000000
, rN
, rD
);
4112 case ARM64cvt_F64_I64U
: /* UCVTF Dd, Xn */
4113 *p
++ = X_3_5_8_6_5_5(X100
, X11110
, X01100011
, X000000
, rN
, rD
);
4120 case ARM64in_VCvtF2I
: {
4121 /* 30 23 20 18 15 9 4
4122 sf 00,11110,0x 1 00 000,000000 n d FCVTNS Rd, Fn (round to
4123 sf 00,11110,0x 1 00 001,000000 n d FCVTNU Rd, Fn nearest)
4124 ---------------- 01 -------------- FCVTP-------- (round to +inf)
4125 ---------------- 10 -------------- FCVTM-------- (round to -inf)
4126 ---------------- 11 -------------- FCVTZ-------- (round to zero)
4128 Rd is Xd when sf==1, Wd when sf==0
4129 Fn is Dn when x==1, Sn when x==0
4130 20:19 carry the rounding mode, using the same encoding as FPCR
4132 UInt rD
= iregEnc(i
->ARM64in
.VCvtF2I
.rD
);
4133 UInt rN
= dregEnc(i
->ARM64in
.VCvtF2I
.rS
);
4134 ARM64CvtOp how
= i
->ARM64in
.VCvtF2I
.how
;
4135 UChar armRM
= i
->ARM64in
.VCvtF2I
.armRM
;
4136 /* Just handle cases as they show up. */
4138 case ARM64cvt_F64_I32S
: /* FCVTxS Wd, Dn */
4139 *p
++ = X_3_5_8_6_5_5(X000
, X11110
, X01100000
| (armRM
<< 3),
4142 case ARM64cvt_F64_I32U
: /* FCVTxU Wd, Dn */
4143 *p
++ = X_3_5_8_6_5_5(X000
, X11110
, X01100001
| (armRM
<< 3),
4146 case ARM64cvt_F64_I64S
: /* FCVTxS Xd, Dn */
4147 *p
++ = X_3_5_8_6_5_5(X100
, X11110
, X01100000
| (armRM
<< 3),
4150 case ARM64cvt_F64_I64U
: /* FCVTxU Xd, Dn */
4151 *p
++ = X_3_5_8_6_5_5(X100
, X11110
, X01100001
| (armRM
<< 3),
4154 case ARM64cvt_F32_I32S
: /* FCVTxS Wd, Sn */
4155 *p
++ = X_3_5_8_6_5_5(X000
, X11110
, X00100000
| (armRM
<< 3),
4158 case ARM64cvt_F32_I32U
: /* FCVTxU Wd, Sn */
4159 *p
++ = X_3_5_8_6_5_5(X000
, X11110
, X00100001
| (armRM
<< 3),
4162 case ARM64cvt_F32_I64S
: /* FCVTxS Xd, Sn */
4163 *p
++ = X_3_5_8_6_5_5(X100
, X11110
, X00100000
| (armRM
<< 3),
4166 case ARM64cvt_F32_I64U
: /* FCVTxU Xd, Sn */
4167 *p
++ = X_3_5_8_6_5_5(X100
, X11110
, X00100001
| (armRM
<< 3),
4175 case ARM64in_VCvtSD
: {
4176 /* 31 23 21 16 14 9 4
4177 000,11110, 00 10001 0,1 10000 n d FCVT Dd, Sn (S->D)
4178 ---------- 01 ----- 0,0 --------- FCVT Sd, Dn (D->S)
4179 Rounding, when dst is smaller than src, is per the FPCR.
4181 UInt dd
= dregEnc(i
->ARM64in
.VCvtSD
.dst
);
4182 UInt nn
= dregEnc(i
->ARM64in
.VCvtSD
.src
);
4183 if (i
->ARM64in
.VCvtSD
.sToD
) {
4184 *p
++ = X_3_5_8_6_5_5(X000
, X11110
, X00100010
, X110000
, nn
, dd
);
4186 *p
++ = X_3_5_8_6_5_5(X000
, X11110
, X01100010
, X010000
, nn
, dd
);
4190 case ARM64in_VCvtHS
: {
4191 /* 31 23 21 16 14 9 4
4192 000,11110, 11 10001 0,0 10000 n d FCVT Sd, Hn (H->S)
4193 ---------- 00 ----- 1,1 --------- FCVT Hd, Sn (S->H)
4194 Rounding, when dst is smaller than src, is per the FPCR.
4196 UInt dd
= dregEnc(i
->ARM64in
.VCvtHS
.dst
);
4197 UInt nn
= dregEnc(i
->ARM64in
.VCvtHS
.src
);
4198 if (i
->ARM64in
.VCvtHS
.hToS
) {
4199 *p
++ = X_3_5_8_6_5_5(X000
, X11110
, X11100010
, X010000
, nn
, dd
);
4201 *p
++ = X_3_5_8_6_5_5(X000
, X11110
, X00100011
, X110000
, nn
, dd
);
4205 case ARM64in_VCvtHD
: {
4206 /* 31 23 21 16 14 9 4
4207 000,11110, 11 10001 0,1 10000 n d FCVT Dd, Hn (H->D)
4208 ---------- 01 ----- 1,1 --------- FCVT Hd, Dn (D->H)
4209 Rounding, when dst is smaller than src, is per the FPCR.
4211 UInt dd
= dregEnc(i
->ARM64in
.VCvtHD
.dst
);
4212 UInt nn
= dregEnc(i
->ARM64in
.VCvtHD
.src
);
4213 if (i
->ARM64in
.VCvtHD
.hToD
) {
4214 *p
++ = X_3_5_8_6_5_5(X000
, X11110
, X11100010
, X110000
, nn
, dd
);
4216 *p
++ = X_3_5_8_6_5_5(X000
, X11110
, X01100011
, X110000
, nn
, dd
);
4220 case ARM64in_VUnaryD
: {
4221 /* 31 23 21 16 14 9 4
4222 000,11110 01 1,0000 0,0 10000 n d FMOV Dd, Dn (not handled)
4223 ------------------- 0,1 --------- FABS ------
4224 ------------------- 1,0 --------- FNEG ------
4225 ------------------- 1,1 --------- FSQRT -----
4227 UInt dD
= dregEnc(i
->ARM64in
.VUnaryD
.dst
);
4228 UInt dN
= dregEnc(i
->ARM64in
.VUnaryD
.src
);
4229 UInt b16
= 2; /* impossible */
4230 UInt b15
= 2; /* impossible */
4231 switch (i
->ARM64in
.VUnaryD
.op
) {
4232 case ARM64fpu_NEG
: b16
= 1; b15
= 0; break;
4233 case ARM64fpu_SQRT
: b16
= 1; b15
= 1; break;
4234 case ARM64fpu_ABS
: b16
= 0; b15
= 1; break;
4237 if (b16
< 2 && b15
< 2) {
4238 *p
++ = X_3_8_5_6_5_5(X000
, X11110011
, (X0000
<< 1) | b16
,
4239 (b15
<< 5) | X10000
, dN
, dD
);
4243 000, 11110 01 1,001 11,1 10000 n d FRINTI Dd, Dm (round per FPCR)
4245 if (i
->ARM64in
.VUnaryD
.op
== ARM64fpu_RINT
) {
4246 *p
++ = X_3_8_5_6_5_5(X000
, X11110011
, X00111
, X110000
, dN
, dD
);
4250 010, 11110 11 1,0000 1,1111 10 n d FRECPX Dd, Dm
4252 if (i
->ARM64in
.VUnaryD
.op
== ARM64fpu_RECPX
) {
4253 *p
++ = X_3_8_5_6_5_5(X010
, X11110111
, X00001
, X111110
, dN
, dD
);
4258 case ARM64in_VUnaryS
: {
4259 /* 31 23 21 16 14 9 4
4260 000,11110 00 1,0000 0,0 10000 n d FMOV Sd, Sn (not handled)
4261 ------------------- 0,1 --------- FABS ------
4262 ------------------- 1,0 --------- FNEG ------
4263 ------------------- 1,1 --------- FSQRT -----
4265 UInt sD
= dregEnc(i
->ARM64in
.VUnaryS
.dst
);
4266 UInt sN
= dregEnc(i
->ARM64in
.VUnaryS
.src
);
4267 UInt b16
= 2; /* impossible */
4268 UInt b15
= 2; /* impossible */
4269 switch (i
->ARM64in
.VUnaryS
.op
) {
4270 case ARM64fpu_NEG
: b16
= 1; b15
= 0; break;
4271 case ARM64fpu_SQRT
: b16
= 1; b15
= 1; break;
4272 case ARM64fpu_ABS
: b16
= 0; b15
= 1; break;
4275 if (b16
< 2 && b15
< 2) {
4276 *p
++ = X_3_8_5_6_5_5(X000
, X11110001
, (X0000
<< 1) | b16
,
4277 (b15
<< 5) | X10000
, sN
, sD
);
4281 000, 11110 00 1,001 11,1 10000 n d FRINTI Sd, Sm (round per FPCR)
4283 if (i
->ARM64in
.VUnaryS
.op
== ARM64fpu_RINT
) {
4284 *p
++ = X_3_8_5_6_5_5(X000
, X11110001
, X00111
, X110000
, sN
, sD
);
4288 010, 11110 10 1,0000 1,1111 10 n d FRECPX Sd, Sm
4290 if (i
->ARM64in
.VUnaryS
.op
== ARM64fpu_RECPX
) {
4291 *p
++ = X_3_8_5_6_5_5(X010
, X11110101
, X00001
, X111110
, sN
, sD
);
4296 case ARM64in_VBinD
: {
4297 /* 31 23 20 15 11 9 4
4298 ---------------- 0000 ------ FMUL --------
4299 000 11110 011 m 0001 10 n d FDIV Dd,Dn,Dm
4300 ---------------- 0010 ------ FADD --------
4301 ---------------- 0011 ------ FSUB --------
4303 UInt dD
= dregEnc(i
->ARM64in
.VBinD
.dst
);
4304 UInt dN
= dregEnc(i
->ARM64in
.VBinD
.argL
);
4305 UInt dM
= dregEnc(i
->ARM64in
.VBinD
.argR
);
4306 UInt b1512
= 16; /* impossible */
4307 switch (i
->ARM64in
.VBinD
.op
) {
4308 case ARM64fpb_DIV
: b1512
= X0001
; break;
4309 case ARM64fpb_MUL
: b1512
= X0000
; break;
4310 case ARM64fpb_SUB
: b1512
= X0011
; break;
4311 case ARM64fpb_ADD
: b1512
= X0010
; break;
4314 vassert(b1512
< 16);
4316 = X_3_8_5_6_5_5(X000
, X11110011
, dM
, (b1512
<< 2) | X10
, dN
, dD
);
4319 case ARM64in_VBinS
: {
4320 /* 31 23 20 15 11 9 4
4321 ---------------- 0000 ------ FMUL --------
4322 000 11110 001 m 0001 10 n d FDIV Dd,Dn,Dm
4323 ---------------- 0010 ------ FADD --------
4324 ---------------- 0011 ------ FSUB --------
4326 UInt sD
= dregEnc(i
->ARM64in
.VBinS
.dst
);
4327 UInt sN
= dregEnc(i
->ARM64in
.VBinS
.argL
);
4328 UInt sM
= dregEnc(i
->ARM64in
.VBinS
.argR
);
4329 UInt b1512
= 16; /* impossible */
4330 switch (i
->ARM64in
.VBinS
.op
) {
4331 case ARM64fpb_DIV
: b1512
= X0001
; break;
4332 case ARM64fpb_MUL
: b1512
= X0000
; break;
4333 case ARM64fpb_SUB
: b1512
= X0011
; break;
4334 case ARM64fpb_ADD
: b1512
= X0010
; break;
4337 vassert(b1512
< 16);
4339 = X_3_8_5_6_5_5(X000
, X11110001
, sM
, (b1512
<< 2) | X10
, sN
, sD
);
4342 case ARM64in_VCmpD
: {
4343 /* 000 11110 01 1 m 00 1000 n 00 000 FCMP Dn, Dm */
4344 UInt dN
= dregEnc(i
->ARM64in
.VCmpD
.argL
);
4345 UInt dM
= dregEnc(i
->ARM64in
.VCmpD
.argR
);
4346 *p
++ = X_3_8_5_6_5_5(X000
, X11110011
, dM
, X001000
, dN
, X00000
);
4349 case ARM64in_VCmpS
: {
4350 /* 000 11110 00 1 m 00 1000 n 00 000 FCMP Sn, Sm */
4351 UInt sN
= dregEnc(i
->ARM64in
.VCmpS
.argL
);
4352 UInt sM
= dregEnc(i
->ARM64in
.VCmpS
.argR
);
4353 *p
++ = X_3_8_5_6_5_5(X000
, X11110001
, sM
, X001000
, sN
, X00000
);
4356 case ARM64in_VFCSel
: {
4357 /* 31 23 21 20 15 11 9 5
4358 000 11110 00 1 m cond 11 n d FCSEL Sd,Sn,Sm,cond
4359 000 11110 01 1 m cond 11 n d FCSEL Dd,Dn,Dm,cond
4361 Bool isD
= i
->ARM64in
.VFCSel
.isD
;
4362 UInt dd
= dregEnc(i
->ARM64in
.VFCSel
.dst
);
4363 UInt nn
= dregEnc(i
->ARM64in
.VFCSel
.argL
);
4364 UInt mm
= dregEnc(i
->ARM64in
.VFCSel
.argR
);
4365 UInt cond
= (UInt
)i
->ARM64in
.VFCSel
.cond
;
4367 *p
++ = X_3_8_5_6_5_5(X000
, isD
? X11110011
: X11110001
,
4368 mm
, (cond
<< 2) | X000011
, nn
, dd
);
4371 case ARM64in_FPCR
: {
4372 Bool toFPCR
= i
->ARM64in
.FPCR
.toFPCR
;
4373 UInt iReg
= iregEnc(i
->ARM64in
.FPCR
.iReg
);
4375 /* 0xD51B44 000 Rt MSR fpcr, rT */
4376 *p
++ = 0xD51B4400 | (iReg
& 0x1F);
4379 goto bad
; // FPCR -> iReg case currently ATC
4381 case ARM64in_FPSR
: {
4382 Bool toFPSR
= i
->ARM64in
.FPSR
.toFPSR
;
4383 UInt iReg
= iregEnc(i
->ARM64in
.FPSR
.iReg
);
4385 /* 0xD51B44 001 Rt MSR fpsr, rT */
4386 *p
++ = 0xD51B4420 | (iReg
& 0x1F);
4388 /* 0xD53B44 001 Rt MRS rT, fpsr */
4389 *p
++ = 0xD53B4420 | (iReg
& 0x1F);
4393 case ARM64in_VBinV
: {
4395 010 01110 11 1 m 100001 n d ADD Vd.2d, Vn.2d, Vm.2d
4396 010 01110 10 1 m 100001 n d ADD Vd.4s, Vn.4s, Vm.4s
4397 010 01110 01 1 m 100001 n d ADD Vd.8h, Vn.8h, Vm.8h
4398 010 01110 00 1 m 100001 n d ADD Vd.16b, Vn.16b, Vm.16b
4400 011 01110 11 1 m 100001 n d SUB Vd.2d, Vn.2d, Vm.2d
4401 011 01110 10 1 m 100001 n d SUB Vd.4s, Vn.4s, Vm.4s
4402 011 01110 01 1 m 100001 n d SUB Vd.8h, Vn.8h, Vm.8h
4403 011 01110 00 1 m 100001 n d SUB Vd.16b, Vn.16b, Vm.16b
4405 010 01110 10 1 m 100111 n d MUL Vd.4s, Vn.4s, Vm.4s
4406 010 01110 01 1 m 100111 n d MUL Vd.8h, Vn.8h, Vm.8h
4407 010 01110 00 1 m 100111 n d MUL Vd.16b, Vn.16b, Vm.16b
4409 010 01110 01 1 m 110101 n d FADD Vd.2d, Vn.2d, Vm.2d
4410 010 01110 00 1 m 110101 n d FADD Vd.4s, Vn.4s, Vm.4s
4411 010 01110 11 1 m 110101 n d FSUB Vd.2d, Vn.2d, Vm.2d
4412 010 01110 10 1 m 110101 n d FSUB Vd.4s, Vn.4s, Vm.4s
4414 011 01110 01 1 m 110111 n d FMUL Vd.2d, Vn.2d, Vm.2d
4415 011 01110 00 1 m 110111 n d FMUL Vd.4s, Vn.4s, Vm.4s
4416 011 01110 01 1 m 111111 n d FDIV Vd.2d, Vn.2d, Vm.2d
4417 011 01110 00 1 m 111111 n d FDIV Vd.4s, Vn.4s, Vm.4s
4419 010 01110 01 1 m 111101 n d FMAX Vd.2d, Vn.2d, Vm.2d
4420 010 01110 00 1 m 111101 n d FMAX Vd.4s, Vn.4s, Vm.4s
4421 010 01110 11 1 m 111101 n d FMIN Vd.2d, Vn.2d, Vm.2d
4422 010 01110 10 1 m 111101 n d FMIN Vd.4s, Vn.4s, Vm.4s
4424 011 01110 10 1 m 011001 n d UMAX Vd.4s, Vn.4s, Vm.4s
4425 011 01110 01 1 m 011001 n d UMAX Vd.8h, Vn.8h, Vm.8h
4426 011 01110 00 1 m 011001 n d UMAX Vd.16b, Vn.16b, Vm.16b
4428 011 01110 10 1 m 011011 n d UMIN Vd.4s, Vn.4s, Vm.4s
4429 011 01110 01 1 m 011011 n d UMIN Vd.8h, Vn.8h, Vm.8h
4430 011 01110 00 1 m 011011 n d UMIN Vd.16b, Vn.16b, Vm.16b
4432 010 01110 10 1 m 011001 n d SMAX Vd.4s, Vn.4s, Vm.4s
4433 010 01110 01 1 m 011001 n d SMAX Vd.8h, Vn.8h, Vm.8h
4434 010 01110 00 1 m 011001 n d SMAX Vd.16b, Vn.16b, Vm.16b
4436 010 01110 10 1 m 011011 n d SMIN Vd.4s, Vn.4s, Vm.4s
4437 010 01110 01 1 m 011011 n d SMIN Vd.8h, Vn.8h, Vm.8h
4438 010 01110 00 1 m 011011 n d SMIN Vd.16b, Vn.16b, Vm.16b
4440 010 01110 00 1 m 000111 n d AND Vd, Vn, Vm
4441 010 01110 10 1 m 000111 n d ORR Vd, Vn, Vm
4442 011 01110 00 1 m 000111 n d EOR Vd, Vn, Vm
4444 011 01110 11 1 m 100011 n d CMEQ Vd.2d, Vn.2d, Vm.2d
4445 011 01110 10 1 m 100011 n d CMEQ Vd.4s, Vn.4s, Vm.4s
4446 011 01110 01 1 m 100011 n d CMEQ Vd.8h, Vn.8h, Vm.8h
4447 011 01110 00 1 m 100011 n d CMEQ Vd.16b, Vn.16b, Vm.16b
4449 011 01110 11 1 m 001101 n d CMHI Vd.2d, Vn.2d, Vm.2d
4450 011 01110 10 1 m 001101 n d CMHI Vd.4s, Vn.4s, Vm.4s
4451 011 01110 01 1 m 001101 n d CMHI Vd.8h, Vn.8h, Vm.8h
4452 011 01110 00 1 m 001101 n d CMHI Vd.16b, Vn.16b, Vm.16b
4454 010 01110 11 1 m 001101 n d CMGT Vd.2d, Vn.2d, Vm.2d
4455 010 01110 10 1 m 001101 n d CMGT Vd.4s, Vn.4s, Vm.4s
4456 010 01110 01 1 m 001101 n d CMGT Vd.8h, Vn.8h, Vm.8h
4457 010 01110 00 1 m 001101 n d CMGT Vd.16b, Vn.16b, Vm.16b
4459 010 01110 01 1 m 111001 n d FCMEQ Vd.2d, Vn.2d, Vm.2d
4460 010 01110 00 1 m 111001 n d FCMEQ Vd.4s, Vn.4s, Vm.4s
4462 011 01110 01 1 m 111001 n d FCMGE Vd.2d, Vn.2d, Vm.2d
4463 011 01110 00 1 m 111001 n d FCMGE Vd.4s, Vn.4s, Vm.4s
4465 011 01110 11 1 m 111001 n d FCMGT Vd.2d, Vn.2d, Vm.2d
4466 011 01110 10 1 m 111001 n d FCMGT Vd.4s, Vn.4s, Vm.4s
4468 010 01110 00 0 m 000000 n d TBL Vd.16b, {Vn.16b}, Vm.16b
4470 010 01110 11 0 m 000110 n d UZP1 Vd.2d, Vn.2d, Vm.2d
4471 010 01110 10 0 m 000110 n d UZP1 Vd.4s, Vn.4s, Vm.4s
4472 010 01110 01 0 m 000110 n d UZP1 Vd.8h, Vn.8h, Vm.8h
4473 010 01110 00 0 m 000110 n d UZP1 Vd.16b, Vn.16b, Vm.16b
4475 010 01110 11 0 m 010110 n d UZP2 Vd.2d, Vn.2d, Vm.2d
4476 010 01110 10 0 m 010110 n d UZP2 Vd.4s, Vn.4s, Vm.4s
4477 010 01110 01 0 m 010110 n d UZP2 Vd.8h, Vn.8h, Vm.8h
4478 010 01110 00 0 m 010110 n d UZP2 Vd.16b, Vn.16b, Vm.16b
4480 010 01110 10 0 m 001110 n d ZIP1 Vd.4s, Vn.4s, Vm.4s
4481 010 01110 01 0 m 001110 n d ZIP1 Vd.8h, Vn.8h, Vm.8h
4482 010 01110 10 0 m 001110 n d ZIP1 Vd.16b, Vn.16b, Vm.16b
4484 010 01110 10 0 m 011110 n d ZIP2 Vd.4s, Vn.4s, Vm.4s
4485 010 01110 01 0 m 011110 n d ZIP2 Vd.8h, Vn.8h, Vm.8h
4486 010 01110 10 0 m 011110 n d ZIP2 Vd.16b, Vn.16b, Vm.16b
4488 011 01110 00 1 m 100111 n d PMUL Vd.16b, Vn.16b, Vm.16b
4490 000 01110 00 1 m 111000 n d PMULL Vd.8h, Vn.8b, Vm.8b
4492 001 01110 10 1 m 110000 n d UMULL Vd.2d, Vn.2s, Vm.2s
4493 001 01110 01 1 m 110000 n d UMULL Vd.4s, Vn.4h, Vm.4h
4494 001 01110 00 1 m 110000 n d UMULL Vd.8h, Vn.8b, Vm.8b
4496 000 01110 10 1 m 110000 n d SMULL Vd.2d, Vn.2s, Vm.2s
4497 000 01110 01 1 m 110000 n d SMULL Vd.4s, Vn.4h, Vm.4h
4498 000 01110 00 1 m 110000 n d SMULL Vd.8h, Vn.8b, Vm.8b
4500 010 01110 11 1 m 000011 n d SQADD Vd.2d, Vn.2d, Vm.2d
4501 010 01110 10 1 m 000011 n d SQADD Vd.4s, Vn.4s, Vm.4s
4502 010 01110 01 1 m 000011 n d SQADD Vd.8h, Vn.8h, Vm.8h
4503 010 01110 00 1 m 000011 n d SQADD Vd.16b, Vn.16b, Vm.16b
4505 011 01110 11 1 m 000011 n d UQADD Vd.2d, Vn.2d, Vm.2d
4506 011 01110 10 1 m 000011 n d UQADD Vd.4s, Vn.4s, Vm.4s
4507 011 01110 01 1 m 000011 n d UQADD Vd.8h, Vn.8h, Vm.8h
4508 011 01110 00 1 m 000011 n d UQADD Vd.16b, Vn.16b, Vm.16b
4510 010 01110 11 1 m 001011 n d SQSUB Vd.2d, Vn.2d, Vm.2d
4511 010 01110 10 1 m 001011 n d SQSUB Vd.4s, Vn.4s, Vm.4s
4512 010 01110 01 1 m 001011 n d SQSUB Vd.8h, Vn.8h, Vm.8h
4513 010 01110 00 1 m 001011 n d SQSUB Vd.16b, Vn.16b, Vm.16b
4515 011 01110 11 1 m 001011 n d UQSUB Vd.2d, Vn.2d, Vm.2d
4516 011 01110 10 1 m 001011 n d UQSUB Vd.4s, Vn.4s, Vm.4s
4517 011 01110 01 1 m 001011 n d UQSUB Vd.8h, Vn.8h, Vm.8h
4518 011 01110 00 1 m 001011 n d UQSUB Vd.16b, Vn.16b, Vm.16b
4520 000 01110 10 1 m 110100 n d SQDMULL Vd.2d, Vn.2s, Vm.2s
4521 000 01110 01 1 m 110100 n d SQDMULL Vd.4s, Vn.4h, Vm.4h
4523 010 01110 10 1 m 101101 n d SQDMULH Vd.4s, Vn.4s, Vm.4s
4524 010 01110 01 1 m 101101 n d SQDMULH Vd.8h, Vn.8h, Vm.8h
4525 011 01110 10 1 m 101101 n d SQRDMULH Vd.4s, Vn.4s, Vm.4s
4526 011 01110 10 1 m 101101 n d SQRDMULH Vd.8h, Vn.8h, Vm.8h
4528 010 01110 sz 1 m 010011 n d SQSHL@sz Vd, Vn, Vm
4529 010 01110 sz 1 m 010111 n d SQRSHL@sz Vd, Vn, Vm
4530 011 01110 sz 1 m 010011 n d UQSHL@sz Vd, Vn, Vm
4531 011 01110 sz 1 m 010111 n d URQSHL@sz Vd, Vn, Vm
4533 010 01110 sz 1 m 010001 n d SSHL@sz Vd, Vn, Vm
4534 010 01110 sz 1 m 010101 n d SRSHL@sz Vd, Vn, Vm
4535 011 01110 sz 1 m 010001 n d USHL@sz Vd, Vn, Vm
4536 011 01110 sz 1 m 010101 n d URSHL@sz Vd, Vn, Vm
4538 010 01110 01 1 m 111111 n d FRECPS Vd.2d, Vn.2d, Vm.2d
4539 010 01110 00 1 m 111111 n d FRECPS Vd.4s, Vn.4s, Vm.4s
4540 010 01110 11 1 m 111111 n d FRSQRTS Vd.2d, Vn.2d, Vm.2d
4541 010 01110 10 1 m 111111 n d FRSQRTS Vd.4s, Vn.4s, Vm.4s
4543 UInt vD
= qregEnc(i
->ARM64in
.VBinV
.dst
);
4544 UInt vN
= qregEnc(i
->ARM64in
.VBinV
.argL
);
4545 UInt vM
= qregEnc(i
->ARM64in
.VBinV
.argR
);
4546 switch (i
->ARM64in
.VBinV
.op
) {
4547 case ARM64vecb_ADD64x2
:
4548 *p
++ = X_3_8_5_6_5_5(X010
, X01110111
, vM
, X100001
, vN
, vD
);
4550 case ARM64vecb_ADD32x4
:
4551 *p
++ = X_3_8_5_6_5_5(X010
, X01110101
, vM
, X100001
, vN
, vD
);
4553 case ARM64vecb_ADD16x8
:
4554 *p
++ = X_3_8_5_6_5_5(X010
, X01110011
, vM
, X100001
, vN
, vD
);
4556 case ARM64vecb_ADD8x16
:
4557 *p
++ = X_3_8_5_6_5_5(X010
, X01110001
, vM
, X100001
, vN
, vD
);
4559 case ARM64vecb_SUB64x2
:
4560 *p
++ = X_3_8_5_6_5_5(X011
, X01110111
, vM
, X100001
, vN
, vD
);
4562 case ARM64vecb_SUB32x4
:
4563 *p
++ = X_3_8_5_6_5_5(X011
, X01110101
, vM
, X100001
, vN
, vD
);
4565 case ARM64vecb_SUB16x8
:
4566 *p
++ = X_3_8_5_6_5_5(X011
, X01110011
, vM
, X100001
, vN
, vD
);
4568 case ARM64vecb_SUB8x16
:
4569 *p
++ = X_3_8_5_6_5_5(X011
, X01110001
, vM
, X100001
, vN
, vD
);
4571 case ARM64vecb_MUL32x4
:
4572 *p
++ = X_3_8_5_6_5_5(X010
, X01110101
, vM
, X100111
, vN
, vD
);
4574 case ARM64vecb_MUL16x8
:
4575 *p
++ = X_3_8_5_6_5_5(X010
, X01110011
, vM
, X100111
, vN
, vD
);
4577 case ARM64vecb_MUL8x16
:
4578 *p
++ = X_3_8_5_6_5_5(X010
, X01110001
, vM
, X100111
, vN
, vD
);
4580 case ARM64vecb_FADD64x2
:
4581 *p
++ = X_3_8_5_6_5_5(X010
, X01110011
, vM
, X110101
, vN
, vD
);
4583 case ARM64vecb_FADD32x4
:
4584 *p
++ = X_3_8_5_6_5_5(X010
, X01110001
, vM
, X110101
, vN
, vD
);
4586 case ARM64vecb_FSUB64x2
:
4587 *p
++ = X_3_8_5_6_5_5(X010
, X01110111
, vM
, X110101
, vN
, vD
);
4589 case ARM64vecb_FSUB32x4
:
4590 *p
++ = X_3_8_5_6_5_5(X010
, X01110101
, vM
, X110101
, vN
, vD
);
4592 case ARM64vecb_FMUL64x2
:
4593 *p
++ = X_3_8_5_6_5_5(X011
, X01110011
, vM
, X110111
, vN
, vD
);
4595 case ARM64vecb_FMUL32x4
:
4596 *p
++ = X_3_8_5_6_5_5(X011
, X01110001
, vM
, X110111
, vN
, vD
);
4598 case ARM64vecb_FDIV64x2
:
4599 *p
++ = X_3_8_5_6_5_5(X011
, X01110011
, vM
, X111111
, vN
, vD
);
4601 case ARM64vecb_FDIV32x4
:
4602 *p
++ = X_3_8_5_6_5_5(X011
, X01110001
, vM
, X111111
, vN
, vD
);
4605 case ARM64vecb_FMAX64x2
:
4606 *p
++ = X_3_8_5_6_5_5(X010
, X01110011
, vM
, X111101
, vN
, vD
);
4608 case ARM64vecb_FMAX32x4
:
4609 *p
++ = X_3_8_5_6_5_5(X010
, X01110001
, vM
, X111101
, vN
, vD
);
4611 case ARM64vecb_FMIN64x2
:
4612 *p
++ = X_3_8_5_6_5_5(X010
, X01110111
, vM
, X111101
, vN
, vD
);
4614 case ARM64vecb_FMIN32x4
:
4615 *p
++ = X_3_8_5_6_5_5(X010
, X01110101
, vM
, X111101
, vN
, vD
);
4618 case ARM64vecb_UMAX32x4
:
4619 *p
++ = X_3_8_5_6_5_5(X011
, X01110101
, vM
, X011001
, vN
, vD
);
4621 case ARM64vecb_UMAX16x8
:
4622 *p
++ = X_3_8_5_6_5_5(X011
, X01110011
, vM
, X011001
, vN
, vD
);
4624 case ARM64vecb_UMAX8x16
:
4625 *p
++ = X_3_8_5_6_5_5(X011
, X01110001
, vM
, X011001
, vN
, vD
);
4628 case ARM64vecb_UMIN32x4
:
4629 *p
++ = X_3_8_5_6_5_5(X011
, X01110101
, vM
, X011011
, vN
, vD
);
4631 case ARM64vecb_UMIN16x8
:
4632 *p
++ = X_3_8_5_6_5_5(X011
, X01110011
, vM
, X011011
, vN
, vD
);
4634 case ARM64vecb_UMIN8x16
:
4635 *p
++ = X_3_8_5_6_5_5(X011
, X01110001
, vM
, X011011
, vN
, vD
);
4638 case ARM64vecb_SMAX32x4
:
4639 *p
++ = X_3_8_5_6_5_5(X010
, X01110101
, vM
, X011001
, vN
, vD
);
4641 case ARM64vecb_SMAX16x8
:
4642 *p
++ = X_3_8_5_6_5_5(X010
, X01110011
, vM
, X011001
, vN
, vD
);
4644 case ARM64vecb_SMAX8x16
:
4645 *p
++ = X_3_8_5_6_5_5(X010
, X01110001
, vM
, X011001
, vN
, vD
);
4648 case ARM64vecb_SMIN32x4
:
4649 *p
++ = X_3_8_5_6_5_5(X010
, X01110101
, vM
, X011011
, vN
, vD
);
4651 case ARM64vecb_SMIN16x8
:
4652 *p
++ = X_3_8_5_6_5_5(X010
, X01110011
, vM
, X011011
, vN
, vD
);
4654 case ARM64vecb_SMIN8x16
:
4655 *p
++ = X_3_8_5_6_5_5(X010
, X01110001
, vM
, X011011
, vN
, vD
);
4659 *p
++ = X_3_8_5_6_5_5(X010
, X01110001
, vM
, X000111
, vN
, vD
);
4662 *p
++ = X_3_8_5_6_5_5(X010
, X01110101
, vM
, X000111
, vN
, vD
);
4665 *p
++ = X_3_8_5_6_5_5(X011
, X01110001
, vM
, X000111
, vN
, vD
);
4668 case ARM64vecb_CMEQ64x2
:
4669 *p
++ = X_3_8_5_6_5_5(X011
, X01110111
, vM
, X100011
, vN
, vD
);
4671 case ARM64vecb_CMEQ32x4
:
4672 *p
++ = X_3_8_5_6_5_5(X011
, X01110101
, vM
, X100011
, vN
, vD
);
4674 case ARM64vecb_CMEQ16x8
:
4675 *p
++ = X_3_8_5_6_5_5(X011
, X01110011
, vM
, X100011
, vN
, vD
);
4677 case ARM64vecb_CMEQ8x16
:
4678 *p
++ = X_3_8_5_6_5_5(X011
, X01110001
, vM
, X100011
, vN
, vD
);
4681 case ARM64vecb_CMHI64x2
:
4682 *p
++ = X_3_8_5_6_5_5(X011
, X01110111
, vM
, X001101
, vN
, vD
);
4684 case ARM64vecb_CMHI32x4
:
4685 *p
++ = X_3_8_5_6_5_5(X011
, X01110101
, vM
, X001101
, vN
, vD
);
4687 case ARM64vecb_CMHI16x8
:
4688 *p
++ = X_3_8_5_6_5_5(X011
, X01110011
, vM
, X001101
, vN
, vD
);
4690 case ARM64vecb_CMHI8x16
:
4691 *p
++ = X_3_8_5_6_5_5(X011
, X01110001
, vM
, X001101
, vN
, vD
);
4694 case ARM64vecb_CMGT64x2
:
4695 *p
++ = X_3_8_5_6_5_5(X010
, X01110111
, vM
, X001101
, vN
, vD
);
4697 case ARM64vecb_CMGT32x4
:
4698 *p
++ = X_3_8_5_6_5_5(X010
, X01110101
, vM
, X001101
, vN
, vD
);
4700 case ARM64vecb_CMGT16x8
:
4701 *p
++ = X_3_8_5_6_5_5(X010
, X01110011
, vM
, X001101
, vN
, vD
);
4703 case ARM64vecb_CMGT8x16
:
4704 *p
++ = X_3_8_5_6_5_5(X010
, X01110001
, vM
, X001101
, vN
, vD
);
4707 case ARM64vecb_FCMEQ64x2
:
4708 *p
++ = X_3_8_5_6_5_5(X010
, X01110011
, vM
, X111001
, vN
, vD
);
4710 case ARM64vecb_FCMEQ32x4
:
4711 *p
++ = X_3_8_5_6_5_5(X010
, X01110001
, vM
, X111001
, vN
, vD
);
4714 case ARM64vecb_FCMGE64x2
:
4715 *p
++ = X_3_8_5_6_5_5(X011
, X01110011
, vM
, X111001
, vN
, vD
);
4717 case ARM64vecb_FCMGE32x4
:
4718 *p
++ = X_3_8_5_6_5_5(X011
, X01110001
, vM
, X111001
, vN
, vD
);
4721 case ARM64vecb_FCMGT64x2
:
4722 *p
++ = X_3_8_5_6_5_5(X011
, X01110111
, vM
, X111001
, vN
, vD
);
4724 case ARM64vecb_FCMGT32x4
:
4725 *p
++ = X_3_8_5_6_5_5(X011
, X01110101
, vM
, X111001
, vN
, vD
);
4728 case ARM64vecb_TBL1
:
4729 *p
++ = X_3_8_5_6_5_5(X010
, X01110000
, vM
, X000000
, vN
, vD
);
4732 case ARM64vecb_UZP164x2
:
4733 *p
++ = X_3_8_5_6_5_5(X010
, X01110110
, vM
, X000110
, vN
, vD
);
4735 case ARM64vecb_UZP132x4
:
4736 *p
++ = X_3_8_5_6_5_5(X010
, X01110100
, vM
, X000110
, vN
, vD
);
4738 case ARM64vecb_UZP116x8
:
4739 *p
++ = X_3_8_5_6_5_5(X010
, X01110010
, vM
, X000110
, vN
, vD
);
4741 case ARM64vecb_UZP18x16
:
4742 *p
++ = X_3_8_5_6_5_5(X010
, X01110000
, vM
, X000110
, vN
, vD
);
4745 case ARM64vecb_UZP264x2
:
4746 *p
++ = X_3_8_5_6_5_5(X010
, X01110110
, vM
, X010110
, vN
, vD
);
4748 case ARM64vecb_UZP232x4
:
4749 *p
++ = X_3_8_5_6_5_5(X010
, X01110100
, vM
, X010110
, vN
, vD
);
4751 case ARM64vecb_UZP216x8
:
4752 *p
++ = X_3_8_5_6_5_5(X010
, X01110010
, vM
, X010110
, vN
, vD
);
4754 case ARM64vecb_UZP28x16
:
4755 *p
++ = X_3_8_5_6_5_5(X010
, X01110000
, vM
, X010110
, vN
, vD
);
4758 case ARM64vecb_ZIP132x4
:
4759 *p
++ = X_3_8_5_6_5_5(X010
, X01110100
, vM
, X001110
, vN
, vD
);
4761 case ARM64vecb_ZIP116x8
:
4762 *p
++ = X_3_8_5_6_5_5(X010
, X01110010
, vM
, X001110
, vN
, vD
);
4764 case ARM64vecb_ZIP18x16
:
4765 *p
++ = X_3_8_5_6_5_5(X010
, X01110000
, vM
, X001110
, vN
, vD
);
4768 case ARM64vecb_ZIP232x4
:
4769 *p
++ = X_3_8_5_6_5_5(X010
, X01110100
, vM
, X011110
, vN
, vD
);
4771 case ARM64vecb_ZIP216x8
:
4772 *p
++ = X_3_8_5_6_5_5(X010
, X01110010
, vM
, X011110
, vN
, vD
);
4774 case ARM64vecb_ZIP28x16
:
4775 *p
++ = X_3_8_5_6_5_5(X010
, X01110000
, vM
, X011110
, vN
, vD
);
4778 case ARM64vecb_PMUL8x16
:
4779 *p
++ = X_3_8_5_6_5_5(X011
, X01110001
, vM
, X100111
, vN
, vD
);
4782 case ARM64vecb_PMULL8x8
:
4783 *p
++ = X_3_8_5_6_5_5(X000
, X01110001
, vM
, X111000
, vN
, vD
);
4786 case ARM64vecb_UMULL2DSS
:
4787 *p
++ = X_3_8_5_6_5_5(X001
, X01110101
, vM
, X110000
, vN
, vD
);
4789 case ARM64vecb_UMULL4SHH
:
4790 *p
++ = X_3_8_5_6_5_5(X001
, X01110011
, vM
, X110000
, vN
, vD
);
4792 case ARM64vecb_UMULL8HBB
:
4793 *p
++ = X_3_8_5_6_5_5(X001
, X01110001
, vM
, X110000
, vN
, vD
);
4796 case ARM64vecb_SMULL2DSS
:
4797 *p
++ = X_3_8_5_6_5_5(X000
, X01110101
, vM
, X110000
, vN
, vD
);
4799 case ARM64vecb_SMULL4SHH
:
4800 *p
++ = X_3_8_5_6_5_5(X000
, X01110011
, vM
, X110000
, vN
, vD
);
4802 case ARM64vecb_SMULL8HBB
:
4803 *p
++ = X_3_8_5_6_5_5(X000
, X01110001
, vM
, X110000
, vN
, vD
);
4806 case ARM64vecb_SQADD64x2
:
4807 *p
++ = X_3_8_5_6_5_5(X010
, X01110111
, vM
, X000011
, vN
, vD
);
4809 case ARM64vecb_SQADD32x4
:
4810 *p
++ = X_3_8_5_6_5_5(X010
, X01110101
, vM
, X000011
, vN
, vD
);
4812 case ARM64vecb_SQADD16x8
:
4813 *p
++ = X_3_8_5_6_5_5(X010
, X01110011
, vM
, X000011
, vN
, vD
);
4815 case ARM64vecb_SQADD8x16
:
4816 *p
++ = X_3_8_5_6_5_5(X010
, X01110001
, vM
, X000011
, vN
, vD
);
4819 case ARM64vecb_UQADD64x2
:
4820 *p
++ = X_3_8_5_6_5_5(X011
, X01110111
, vM
, X000011
, vN
, vD
);
4822 case ARM64vecb_UQADD32x4
:
4823 *p
++ = X_3_8_5_6_5_5(X011
, X01110101
, vM
, X000011
, vN
, vD
);
4825 case ARM64vecb_UQADD16x8
:
4826 *p
++ = X_3_8_5_6_5_5(X011
, X01110011
, vM
, X000011
, vN
, vD
);
4828 case ARM64vecb_UQADD8x16
:
4829 *p
++ = X_3_8_5_6_5_5(X011
, X01110001
, vM
, X000011
, vN
, vD
);
4832 case ARM64vecb_SQSUB64x2
:
4833 *p
++ = X_3_8_5_6_5_5(X010
, X01110111
, vM
, X001011
, vN
, vD
);
4835 case ARM64vecb_SQSUB32x4
:
4836 *p
++ = X_3_8_5_6_5_5(X010
, X01110101
, vM
, X001011
, vN
, vD
);
4838 case ARM64vecb_SQSUB16x8
:
4839 *p
++ = X_3_8_5_6_5_5(X010
, X01110011
, vM
, X001011
, vN
, vD
);
4841 case ARM64vecb_SQSUB8x16
:
4842 *p
++ = X_3_8_5_6_5_5(X010
, X01110001
, vM
, X001011
, vN
, vD
);
4845 case ARM64vecb_UQSUB64x2
:
4846 *p
++ = X_3_8_5_6_5_5(X011
, X01110111
, vM
, X001011
, vN
, vD
);
4848 case ARM64vecb_UQSUB32x4
:
4849 *p
++ = X_3_8_5_6_5_5(X011
, X01110101
, vM
, X001011
, vN
, vD
);
4851 case ARM64vecb_UQSUB16x8
:
4852 *p
++ = X_3_8_5_6_5_5(X011
, X01110011
, vM
, X001011
, vN
, vD
);
4854 case ARM64vecb_UQSUB8x16
:
4855 *p
++ = X_3_8_5_6_5_5(X011
, X01110001
, vM
, X001011
, vN
, vD
);
4858 case ARM64vecb_SQDMULL2DSS
:
4859 *p
++ = X_3_8_5_6_5_5(X000
, X01110101
, vM
, X110100
, vN
, vD
);
4861 case ARM64vecb_SQDMULL4SHH
:
4862 *p
++ = X_3_8_5_6_5_5(X000
, X01110011
, vM
, X110100
, vN
, vD
);
4865 case ARM64vecb_SQDMULH32x4
:
4866 *p
++ = X_3_8_5_6_5_5(X010
, X01110101
, vM
, X101101
, vN
, vD
);
4868 case ARM64vecb_SQDMULH16x8
:
4869 *p
++ = X_3_8_5_6_5_5(X010
, X01110011
, vM
, X101101
, vN
, vD
);
4871 case ARM64vecb_SQRDMULH32x4
:
4872 *p
++ = X_3_8_5_6_5_5(X011
, X01110101
, vM
, X101101
, vN
, vD
);
4874 case ARM64vecb_SQRDMULH16x8
:
4875 *p
++ = X_3_8_5_6_5_5(X011
, X01110011
, vM
, X101101
, vN
, vD
);
4878 case ARM64vecb_SQSHL64x2
:
4879 *p
++ = X_3_8_5_6_5_5(X010
, X01110111
, vM
, X010011
, vN
, vD
);
4881 case ARM64vecb_SQSHL32x4
:
4882 *p
++ = X_3_8_5_6_5_5(X010
, X01110101
, vM
, X010011
, vN
, vD
);
4884 case ARM64vecb_SQSHL16x8
:
4885 *p
++ = X_3_8_5_6_5_5(X010
, X01110011
, vM
, X010011
, vN
, vD
);
4887 case ARM64vecb_SQSHL8x16
:
4888 *p
++ = X_3_8_5_6_5_5(X010
, X01110001
, vM
, X010011
, vN
, vD
);
4891 case ARM64vecb_SQRSHL64x2
:
4892 *p
++ = X_3_8_5_6_5_5(X010
, X01110111
, vM
, X010111
, vN
, vD
);
4894 case ARM64vecb_SQRSHL32x4
:
4895 *p
++ = X_3_8_5_6_5_5(X010
, X01110101
, vM
, X010111
, vN
, vD
);
4897 case ARM64vecb_SQRSHL16x8
:
4898 *p
++ = X_3_8_5_6_5_5(X010
, X01110011
, vM
, X010111
, vN
, vD
);
4900 case ARM64vecb_SQRSHL8x16
:
4901 *p
++ = X_3_8_5_6_5_5(X010
, X01110001
, vM
, X010111
, vN
, vD
);
4904 case ARM64vecb_UQSHL64x2
:
4905 *p
++ = X_3_8_5_6_5_5(X011
, X01110111
, vM
, X010011
, vN
, vD
);
4907 case ARM64vecb_UQSHL32x4
:
4908 *p
++ = X_3_8_5_6_5_5(X011
, X01110101
, vM
, X010011
, vN
, vD
);
4910 case ARM64vecb_UQSHL16x8
:
4911 *p
++ = X_3_8_5_6_5_5(X011
, X01110011
, vM
, X010011
, vN
, vD
);
4913 case ARM64vecb_UQSHL8x16
:
4914 *p
++ = X_3_8_5_6_5_5(X011
, X01110001
, vM
, X010011
, vN
, vD
);
4917 case ARM64vecb_UQRSHL64x2
:
4918 *p
++ = X_3_8_5_6_5_5(X011
, X01110111
, vM
, X010111
, vN
, vD
);
4920 case ARM64vecb_UQRSHL32x4
:
4921 *p
++ = X_3_8_5_6_5_5(X011
, X01110101
, vM
, X010111
, vN
, vD
);
4923 case ARM64vecb_UQRSHL16x8
:
4924 *p
++ = X_3_8_5_6_5_5(X011
, X01110011
, vM
, X010111
, vN
, vD
);
4926 case ARM64vecb_UQRSHL8x16
:
4927 *p
++ = X_3_8_5_6_5_5(X011
, X01110001
, vM
, X010111
, vN
, vD
);
4930 case ARM64vecb_SSHL64x2
:
4931 *p
++ = X_3_8_5_6_5_5(X010
, X01110111
, vM
, X010001
, vN
, vD
);
4933 case ARM64vecb_SSHL32x4
:
4934 *p
++ = X_3_8_5_6_5_5(X010
, X01110101
, vM
, X010001
, vN
, vD
);
4936 case ARM64vecb_SSHL16x8
:
4937 *p
++ = X_3_8_5_6_5_5(X010
, X01110011
, vM
, X010001
, vN
, vD
);
4939 case ARM64vecb_SSHL8x16
:
4940 *p
++ = X_3_8_5_6_5_5(X010
, X01110001
, vM
, X010001
, vN
, vD
);
4943 case ARM64vecb_SRSHL64x2
:
4944 *p
++ = X_3_8_5_6_5_5(X010
, X01110111
, vM
, X010101
, vN
, vD
);
4946 case ARM64vecb_SRSHL32x4
:
4947 *p
++ = X_3_8_5_6_5_5(X010
, X01110101
, vM
, X010101
, vN
, vD
);
4949 case ARM64vecb_SRSHL16x8
:
4950 *p
++ = X_3_8_5_6_5_5(X010
, X01110011
, vM
, X010101
, vN
, vD
);
4952 case ARM64vecb_SRSHL8x16
:
4953 *p
++ = X_3_8_5_6_5_5(X010
, X01110001
, vM
, X010101
, vN
, vD
);
4956 case ARM64vecb_USHL64x2
:
4957 *p
++ = X_3_8_5_6_5_5(X011
, X01110111
, vM
, X010001
, vN
, vD
);
4959 case ARM64vecb_USHL32x4
:
4960 *p
++ = X_3_8_5_6_5_5(X011
, X01110101
, vM
, X010001
, vN
, vD
);
4962 case ARM64vecb_USHL16x8
:
4963 *p
++ = X_3_8_5_6_5_5(X011
, X01110011
, vM
, X010001
, vN
, vD
);
4965 case ARM64vecb_USHL8x16
:
4966 *p
++ = X_3_8_5_6_5_5(X011
, X01110001
, vM
, X010001
, vN
, vD
);
4969 case ARM64vecb_URSHL64x2
:
4970 *p
++ = X_3_8_5_6_5_5(X011
, X01110111
, vM
, X010101
, vN
, vD
);
4972 case ARM64vecb_URSHL32x4
:
4973 *p
++ = X_3_8_5_6_5_5(X011
, X01110101
, vM
, X010101
, vN
, vD
);
4975 case ARM64vecb_URSHL16x8
:
4976 *p
++ = X_3_8_5_6_5_5(X011
, X01110011
, vM
, X010101
, vN
, vD
);
4978 case ARM64vecb_URSHL8x16
:
4979 *p
++ = X_3_8_5_6_5_5(X011
, X01110001
, vM
, X010101
, vN
, vD
);
4982 case ARM64vecb_FRECPS64x2
:
4983 *p
++ = X_3_8_5_6_5_5(X010
, X01110011
, vM
, X111111
, vN
, vD
);
4985 case ARM64vecb_FRECPS32x4
:
4986 *p
++ = X_3_8_5_6_5_5(X010
, X01110001
, vM
, X111111
, vN
, vD
);
4988 case ARM64vecb_FRSQRTS64x2
:
4989 *p
++ = X_3_8_5_6_5_5(X010
, X01110111
, vM
, X111111
, vN
, vD
);
4991 case ARM64vecb_FRSQRTS32x4
:
4992 *p
++ = X_3_8_5_6_5_5(X010
, X01110101
, vM
, X111111
, vN
, vD
);
5000 case ARM64in_VModifyV
: {
5002 010 01110 sz 1 00000 001110 n d SUQADD@sz Vd, Vn
5003 011 01110 sz 1 00000 001110 n d USQADD@sz Vd, Vn
5005 UInt vD
= qregEnc(i
->ARM64in
.VModifyV
.mod
);
5006 UInt vN
= qregEnc(i
->ARM64in
.VModifyV
.arg
);
5007 switch (i
->ARM64in
.VModifyV
.op
) {
5008 case ARM64vecmo_SUQADD64x2
:
5009 *p
++ = X_3_8_5_6_5_5(X010
, X01110111
, X00000
, X001110
, vN
, vD
);
5011 case ARM64vecmo_SUQADD32x4
:
5012 *p
++ = X_3_8_5_6_5_5(X010
, X01110101
, X00000
, X001110
, vN
, vD
);
5014 case ARM64vecmo_SUQADD16x8
:
5015 *p
++ = X_3_8_5_6_5_5(X010
, X01110011
, X00000
, X001110
, vN
, vD
);
5017 case ARM64vecmo_SUQADD8x16
:
5018 *p
++ = X_3_8_5_6_5_5(X010
, X01110001
, X00000
, X001110
, vN
, vD
);
5020 case ARM64vecmo_USQADD64x2
:
5021 *p
++ = X_3_8_5_6_5_5(X011
, X01110111
, X00000
, X001110
, vN
, vD
);
5023 case ARM64vecmo_USQADD32x4
:
5024 *p
++ = X_3_8_5_6_5_5(X011
, X01110101
, X00000
, X001110
, vN
, vD
);
5026 case ARM64vecmo_USQADD16x8
:
5027 *p
++ = X_3_8_5_6_5_5(X011
, X01110011
, X00000
, X001110
, vN
, vD
);
5029 case ARM64vecmo_USQADD8x16
:
5030 *p
++ = X_3_8_5_6_5_5(X011
, X01110001
, X00000
, X001110
, vN
, vD
);
5037 case ARM64in_VUnaryV
: {
5039 010 01110 11 1 00000 111110 n d FABS Vd.2d, Vn.2d
5040 010 01110 10 1 00000 111110 n d FABS Vd.4s, Vn.4s
5041 011 01110 11 1 00000 111110 n d FNEG Vd.2d, Vn.2d
5042 011 01110 10 1 00000 111110 n d FNEG Vd.4s, Vn.4s
5043 011 01110 00 1 00000 010110 n d NOT Vd.16b, Vn.16b
5045 010 01110 11 1 00000 101110 n d ABS Vd.2d, Vn.2d
5046 010 01110 10 1 00000 101110 n d ABS Vd.4s, Vn.4s
5047 010 01110 01 1 00000 101110 n d ABS Vd.8h, Vn.8h
5048 010 01110 00 1 00000 101110 n d ABS Vd.16b, Vn.16b
5050 010 01110 10 1 00000 010010 n d CLS Vd.4s, Vn.4s
5051 010 01110 01 1 00000 010010 n d CLS Vd.8h, Vn.8h
5052 010 01110 00 1 00000 010010 n d CLS Vd.16b, Vn.16b
5054 011 01110 10 1 00000 010010 n d CLZ Vd.4s, Vn.4s
5055 011 01110 01 1 00000 010010 n d CLZ Vd.8h, Vn.8h
5056 011 01110 00 1 00000 010010 n d CLZ Vd.16b, Vn.16b
5058 010 01110 00 1 00000 010110 n d CNT Vd.16b, Vn.16b
5060 011 01110 01 1 00000 010110 n d RBIT Vd.16b, Vn.16b
5061 010 01110 00 1 00000 000110 n d REV16 Vd.16b, Vn.16b
5062 011 01110 00 1 00000 000010 n d REV32 Vd.16b, Vn.16b
5063 011 01110 01 1 00000 000010 n d REV32 Vd.8h, Vn.8h
5065 010 01110 00 1 00000 000010 n d REV64 Vd.16b, Vn.16b
5066 010 01110 01 1 00000 000010 n d REV64 Vd.8h, Vn.8h
5067 010 01110 10 1 00000 000010 n d REV64 Vd.4s, Vn.4s
5069 010 01110 10 1 00001 110010 n d URECPE Vd.4s, Vn.4s
5070 011 01110 10 1 00001 110010 n d URSQRTE Vd.4s, Vn.4s
5072 010 01110 11 1 00001 110110 n d FRECPE Vd.2d, Vn.2d
5073 010 01110 10 1 00001 110110 n d FRECPE Vd.4s, Vn.4s
5075 011 01110 11 1 00001 110110 n d FRECPE Vd.2d, Vn.2d
5076 011 01110 10 1 00001 110110 n d FRECPE Vd.4s, Vn.4s
5078 011 01110 11 1 00001 111110 n d FSQRT Vd.2d, Vn.2d
5079 011 01110 10 1 00001 111110 n d FSQRT Vd.4s, Vn.4s
5081 UInt vD
= qregEnc(i
->ARM64in
.VUnaryV
.dst
);
5082 UInt vN
= qregEnc(i
->ARM64in
.VUnaryV
.arg
);
5083 switch (i
->ARM64in
.VUnaryV
.op
) {
5084 case ARM64vecu_FABS64x2
:
5085 *p
++ = X_3_8_5_6_5_5(X010
, X01110111
, X00000
, X111110
, vN
, vD
);
5087 case ARM64vecu_FABS32x4
:
5088 *p
++ = X_3_8_5_6_5_5(X010
, X01110101
, X00000
, X111110
, vN
, vD
);
5090 case ARM64vecu_FNEG64x2
:
5091 *p
++ = X_3_8_5_6_5_5(X011
, X01110111
, X00000
, X111110
, vN
, vD
);
5093 case ARM64vecu_FNEG32x4
:
5094 *p
++ = X_3_8_5_6_5_5(X011
, X01110101
, X00000
, X111110
, vN
, vD
);
5097 *p
++ = X_3_8_5_6_5_5(X011
, X01110001
, X00000
, X010110
, vN
, vD
);
5099 case ARM64vecu_ABS64x2
:
5100 *p
++ = X_3_8_5_6_5_5(X010
, X01110111
, X00000
, X101110
, vN
, vD
);
5102 case ARM64vecu_ABS32x4
:
5103 *p
++ = X_3_8_5_6_5_5(X010
, X01110101
, X00000
, X101110
, vN
, vD
);
5105 case ARM64vecu_ABS16x8
:
5106 *p
++ = X_3_8_5_6_5_5(X010
, X01110011
, X00000
, X101110
, vN
, vD
);
5108 case ARM64vecu_ABS8x16
:
5109 *p
++ = X_3_8_5_6_5_5(X010
, X01110001
, X00000
, X101110
, vN
, vD
);
5111 case ARM64vecu_CLS32x4
:
5112 *p
++ = X_3_8_5_6_5_5(X010
, X01110101
, X00000
, X010010
, vN
, vD
);
5114 case ARM64vecu_CLS16x8
:
5115 *p
++ = X_3_8_5_6_5_5(X010
, X01110011
, X00000
, X010010
, vN
, vD
);
5117 case ARM64vecu_CLS8x16
:
5118 *p
++ = X_3_8_5_6_5_5(X010
, X01110001
, X00000
, X010010
, vN
, vD
);
5120 case ARM64vecu_CLZ32x4
:
5121 *p
++ = X_3_8_5_6_5_5(X011
, X01110101
, X00000
, X010010
, vN
, vD
);
5123 case ARM64vecu_CLZ16x8
:
5124 *p
++ = X_3_8_5_6_5_5(X011
, X01110011
, X00000
, X010010
, vN
, vD
);
5126 case ARM64vecu_CLZ8x16
:
5127 *p
++ = X_3_8_5_6_5_5(X011
, X01110001
, X00000
, X010010
, vN
, vD
);
5129 case ARM64vecu_CNT8x16
:
5130 *p
++ = X_3_8_5_6_5_5(X010
, X01110001
, X00000
, X010110
, vN
, vD
);
5132 case ARM64vecu_RBIT
:
5133 *p
++ = X_3_8_5_6_5_5(X011
, X01110011
, X00000
, X010110
, vN
, vD
);
5135 case ARM64vecu_REV1616B
:
5136 *p
++ = X_3_8_5_6_5_5(X010
, X01110001
, X00000
, X000110
, vN
, vD
);
5138 case ARM64vecu_REV3216B
:
5139 *p
++ = X_3_8_5_6_5_5(X011
, X01110001
, X00000
, X000010
, vN
, vD
);
5141 case ARM64vecu_REV328H
:
5142 *p
++ = X_3_8_5_6_5_5(X011
, X01110011
, X00000
, X000010
, vN
, vD
);
5144 case ARM64vecu_REV6416B
:
5145 *p
++ = X_3_8_5_6_5_5(X010
, X01110001
, X00000
, X000010
, vN
, vD
);
5147 case ARM64vecu_REV648H
:
5148 *p
++ = X_3_8_5_6_5_5(X010
, X01110011
, X00000
, X000010
, vN
, vD
);
5150 case ARM64vecu_REV644S
:
5151 *p
++ = X_3_8_5_6_5_5(X010
, X01110101
, X00000
, X000010
, vN
, vD
);
5153 case ARM64vecu_URECPE32x4
:
5154 *p
++ = X_3_8_5_6_5_5(X010
, X01110101
, X00001
, X110010
, vN
, vD
);
5156 case ARM64vecu_URSQRTE32x4
:
5157 *p
++ = X_3_8_5_6_5_5(X011
, X01110101
, X00001
, X110010
, vN
, vD
);
5159 case ARM64vecu_FRECPE64x2
:
5160 *p
++ = X_3_8_5_6_5_5(X010
, X01110111
, X00001
, X110110
, vN
, vD
);
5162 case ARM64vecu_FRECPE32x4
:
5163 *p
++ = X_3_8_5_6_5_5(X010
, X01110101
, X00001
, X110110
, vN
, vD
);
5165 case ARM64vecu_FRSQRTE64x2
:
5166 *p
++ = X_3_8_5_6_5_5(X011
, X01110111
, X00001
, X110110
, vN
, vD
);
5168 case ARM64vecu_FRSQRTE32x4
:
5169 *p
++ = X_3_8_5_6_5_5(X011
, X01110101
, X00001
, X110110
, vN
, vD
);
5171 case ARM64vecu_FSQRT64x2
:
5172 *p
++ = X_3_8_5_6_5_5(X011
, X01110111
, X00001
, X111110
, vN
, vD
);
5174 case ARM64vecu_FSQRT32x4
:
5175 *p
++ = X_3_8_5_6_5_5(X011
, X01110101
, X00001
, X111110
, vN
, vD
);
5182 case ARM64in_VNarrowV
: {
5184 000 01110 00 1,00001 001010 n d XTN Vd.8b, Vn.8h
5185 000 01110 01 1,00001 001010 n d XTN Vd.4h, Vn.4s
5186 000 01110 10 1,00001 001010 n d XTN Vd.2s, Vn.2d
5188 001 01110 00 1,00001 001010 n d SQXTUN Vd.8b, Vn.8h
5189 001 01110 01 1,00001 001010 n d SQXTUN Vd.4h, Vn.4s
5190 001 01110 10 1,00001 001010 n d SQXTUN Vd.2s, Vn.2d
5192 000 01110 00 1,00001 010010 n d SQXTN Vd.8b, Vn.8h
5193 000 01110 01 1,00001 010010 n d SQXTN Vd.4h, Vn.4s
5194 000 01110 10 1,00001 010010 n d SQXTN Vd.2s, Vn.2d
5196 001 01110 00 1,00001 010010 n d UQXTN Vd.8b, Vn.8h
5197 001 01110 01 1,00001 010010 n d UQXTN Vd.4h, Vn.4s
5198 001 01110 10 1,00001 010010 n d UQXTN Vd.2s, Vn.2d
5200 UInt vD
= qregEnc(i
->ARM64in
.VNarrowV
.dst
);
5201 UInt vN
= qregEnc(i
->ARM64in
.VNarrowV
.src
);
5202 UInt dszBlg2
= i
->ARM64in
.VNarrowV
.dszBlg2
;
5203 vassert(dszBlg2
>= 0 && dszBlg2
<= 2);
5204 switch (i
->ARM64in
.VNarrowV
.op
) {
5205 case ARM64vecna_XTN
:
5206 *p
++ = X_3_8_5_6_5_5(X000
, X01110001
| (dszBlg2
<< 1),
5207 X00001
, X001010
, vN
, vD
);
5209 case ARM64vecna_SQXTUN
:
5210 *p
++ = X_3_8_5_6_5_5(X001
, X01110001
| (dszBlg2
<< 1),
5211 X00001
, X001010
, vN
, vD
);
5213 case ARM64vecna_SQXTN
:
5214 *p
++ = X_3_8_5_6_5_5(X000
, X01110001
| (dszBlg2
<< 1),
5215 X00001
, X010010
, vN
, vD
);
5217 case ARM64vecna_UQXTN
:
5218 *p
++ = X_3_8_5_6_5_5(X001
, X01110001
| (dszBlg2
<< 1),
5219 X00001
, X010010
, vN
, vD
);
5226 case ARM64in_VShiftImmV
: {
5228 011 011110 immh immb 000001 n d USHR Vd.T, Vn.T, #sh
5229 010 011110 immh immb 000001 n d SSHR Vd.T, Vn.T, #sh
5231 001 011110 immh immb 100101 n d UQSHRN ,,#sh
5232 000 011110 immh immb 100101 n d SQSHRN ,,#sh
5233 001 011110 immh immb 100001 n d SQSHRUN ,,#sh
5235 001 011110 immh immb 100111 n d UQRSHRN ,,#sh
5236 000 011110 immh immb 100111 n d SQRSHRN ,,#sh
5237 001 011110 immh immb 100011 n d SQRSHRUN ,,#sh
5241 2d | sh in 1..64 -> let xxxxxx = 64-sh in 1xxx:xxx
5242 4s | sh in 1..32 -> let xxxxx = 32-sh in 01xx:xxx
5243 8h | sh in 1..16 -> let xxxx = 16-sh in 001x:xxx
5244 16b | sh in 1..8 -> let xxx = 8-sh in 0001:xxx
5246 010 011110 immh immb 010101 n d SHL Vd.T, Vn.T, #sh
5248 011 011110 immh immb 011101 n d UQSHL Vd.T, Vn.T, #sh
5249 010 011110 immh immb 011101 n d SQSHL Vd.T, Vn.T, #sh
5250 011 011110 immh immb 011001 n d SQSHLU Vd.T, Vn.T, #sh
5254 2d | sh in 0..63 -> let xxxxxx = sh in 1xxx:xxx
5255 4s | sh in 0..31 -> let xxxxx = sh in 01xx:xxx
5256 8h | sh in 0..15 -> let xxxx = sh in 001x:xxx
5257 16b | sh in 0..7 -> let xxx = sh in 0001:xxx
5259 UInt vD
= qregEnc(i
->ARM64in
.VShiftImmV
.dst
);
5260 UInt vN
= qregEnc(i
->ARM64in
.VShiftImmV
.src
);
5261 UInt sh
= i
->ARM64in
.VShiftImmV
.amt
;
5262 UInt tmpl
= 0; /* invalid */
5264 const UInt tmpl_USHR
5265 = X_3_6_7_6_5_5(X011
, X011110
, 0, X000001
, vN
, vD
);
5266 const UInt tmpl_SSHR
5267 = X_3_6_7_6_5_5(X010
, X011110
, 0, X000001
, vN
, vD
);
5269 const UInt tmpl_UQSHRN
5270 = X_3_6_7_6_5_5(X001
, X011110
, 0, X100101
, vN
, vD
);
5271 const UInt tmpl_SQSHRN
5272 = X_3_6_7_6_5_5(X000
, X011110
, 0, X100101
, vN
, vD
);
5273 const UInt tmpl_SQSHRUN
5274 = X_3_6_7_6_5_5(X001
, X011110
, 0, X100001
, vN
, vD
);
5276 const UInt tmpl_UQRSHRN
5277 = X_3_6_7_6_5_5(X001
, X011110
, 0, X100111
, vN
, vD
);
5278 const UInt tmpl_SQRSHRN
5279 = X_3_6_7_6_5_5(X000
, X011110
, 0, X100111
, vN
, vD
);
5280 const UInt tmpl_SQRSHRUN
5281 = X_3_6_7_6_5_5(X001
, X011110
, 0, X100011
, vN
, vD
);
5284 = X_3_6_7_6_5_5(X010
, X011110
, 0, X010101
, vN
, vD
);
5286 const UInt tmpl_UQSHL
5287 = X_3_6_7_6_5_5(X011
, X011110
, 0, X011101
, vN
, vD
);
5288 const UInt tmpl_SQSHL
5289 = X_3_6_7_6_5_5(X010
, X011110
, 0, X011101
, vN
, vD
);
5290 const UInt tmpl_SQSHLU
5291 = X_3_6_7_6_5_5(X011
, X011110
, 0, X011001
, vN
, vD
);
5293 switch (i
->ARM64in
.VShiftImmV
.op
) {
5294 case ARM64vecshi_SSHR64x2
: tmpl
= tmpl_SSHR
; goto right64x2
;
5295 case ARM64vecshi_USHR64x2
: tmpl
= tmpl_USHR
; goto right64x2
;
5296 case ARM64vecshi_SHL64x2
: tmpl
= tmpl_SHL
; goto left64x2
;
5297 case ARM64vecshi_UQSHL64x2
: tmpl
= tmpl_UQSHL
; goto left64x2
;
5298 case ARM64vecshi_SQSHL64x2
: tmpl
= tmpl_SQSHL
; goto left64x2
;
5299 case ARM64vecshi_SQSHLU64x2
: tmpl
= tmpl_SQSHLU
; goto left64x2
;
5300 case ARM64vecshi_SSHR32x4
: tmpl
= tmpl_SSHR
; goto right32x4
;
5301 case ARM64vecshi_USHR32x4
: tmpl
= tmpl_USHR
; goto right32x4
;
5302 case ARM64vecshi_UQSHRN2SD
: tmpl
= tmpl_UQSHRN
; goto right32x4
;
5303 case ARM64vecshi_SQSHRN2SD
: tmpl
= tmpl_SQSHRN
; goto right32x4
;
5304 case ARM64vecshi_SQSHRUN2SD
: tmpl
= tmpl_SQSHRUN
; goto right32x4
;
5305 case ARM64vecshi_UQRSHRN2SD
: tmpl
= tmpl_UQRSHRN
; goto right32x4
;
5306 case ARM64vecshi_SQRSHRN2SD
: tmpl
= tmpl_SQRSHRN
; goto right32x4
;
5307 case ARM64vecshi_SQRSHRUN2SD
: tmpl
= tmpl_SQRSHRUN
; goto right32x4
;
5308 case ARM64vecshi_SHL32x4
: tmpl
= tmpl_SHL
; goto left32x4
;
5309 case ARM64vecshi_UQSHL32x4
: tmpl
= tmpl_UQSHL
; goto left32x4
;
5310 case ARM64vecshi_SQSHL32x4
: tmpl
= tmpl_SQSHL
; goto left32x4
;
5311 case ARM64vecshi_SQSHLU32x4
: tmpl
= tmpl_SQSHLU
; goto left32x4
;
5312 case ARM64vecshi_SSHR16x8
: tmpl
= tmpl_SSHR
; goto right16x8
;
5313 case ARM64vecshi_USHR16x8
: tmpl
= tmpl_USHR
; goto right16x8
;
5314 case ARM64vecshi_UQSHRN4HS
: tmpl
= tmpl_UQSHRN
; goto right16x8
;
5315 case ARM64vecshi_SQSHRN4HS
: tmpl
= tmpl_SQSHRN
; goto right16x8
;
5316 case ARM64vecshi_SQSHRUN4HS
: tmpl
= tmpl_SQSHRUN
; goto right16x8
;
5317 case ARM64vecshi_UQRSHRN4HS
: tmpl
= tmpl_UQRSHRN
; goto right16x8
;
5318 case ARM64vecshi_SQRSHRN4HS
: tmpl
= tmpl_SQRSHRN
; goto right16x8
;
5319 case ARM64vecshi_SQRSHRUN4HS
: tmpl
= tmpl_SQRSHRUN
; goto right16x8
;
5320 case ARM64vecshi_SHL16x8
: tmpl
= tmpl_SHL
; goto left16x8
;
5321 case ARM64vecshi_UQSHL16x8
: tmpl
= tmpl_UQSHL
; goto left16x8
;
5322 case ARM64vecshi_SQSHL16x8
: tmpl
= tmpl_SQSHL
; goto left16x8
;
5323 case ARM64vecshi_SQSHLU16x8
: tmpl
= tmpl_SQSHLU
; goto left16x8
;
5324 case ARM64vecshi_SSHR8x16
: tmpl
= tmpl_SSHR
; goto right8x16
;
5325 case ARM64vecshi_USHR8x16
: tmpl
= tmpl_USHR
; goto right8x16
;
5326 case ARM64vecshi_UQSHRN8BH
: tmpl
= tmpl_UQSHRN
; goto right8x16
;
5327 case ARM64vecshi_SQSHRN8BH
: tmpl
= tmpl_SQSHRN
; goto right8x16
;
5328 case ARM64vecshi_SQSHRUN8BH
: tmpl
= tmpl_SQSHRUN
; goto right8x16
;
5329 case ARM64vecshi_UQRSHRN8BH
: tmpl
= tmpl_UQRSHRN
; goto right8x16
;
5330 case ARM64vecshi_SQRSHRN8BH
: tmpl
= tmpl_SQRSHRN
; goto right8x16
;
5331 case ARM64vecshi_SQRSHRUN8BH
: tmpl
= tmpl_SQRSHRUN
; goto right8x16
;
5332 case ARM64vecshi_SHL8x16
: tmpl
= tmpl_SHL
; goto left8x16
;
5333 case ARM64vecshi_UQSHL8x16
: tmpl
= tmpl_UQSHL
; goto left8x16
;
5334 case ARM64vecshi_SQSHL8x16
: tmpl
= tmpl_SQSHL
; goto left8x16
;
5335 case ARM64vecshi_SQSHLU8x16
: tmpl
= tmpl_SQSHLU
; goto left8x16
;
5340 if (sh
>= 1 && sh
<= 63) {
5341 *p
++ = tmpl
| X_3_6_7_6_5_5(0,0, X1000000
| (64-sh
), 0,0,0);
5346 if (sh
>= 1 && sh
<= 32) {
5347 *p
++ = tmpl
| X_3_6_7_6_5_5(0,0, X0100000
| (32-sh
), 0,0,0);
5352 if (sh
>= 1 && sh
<= 16) {
5353 *p
++ = tmpl
| X_3_6_7_6_5_5(0,0, X0010000
| (16-sh
), 0,0,0);
5358 if (sh
>= 1 && sh
<= 8) {
5359 *p
++ = tmpl
| X_3_6_7_6_5_5(0,0, X0001000
| (8-sh
), 0,0,0);
5365 if (sh
>= 0 && sh
<= 63) {
5366 *p
++ = tmpl
| X_3_6_7_6_5_5(0,0, X1000000
| sh
, 0,0,0);
5371 if (sh
>= 0 && sh
<= 31) {
5372 *p
++ = tmpl
| X_3_6_7_6_5_5(0,0, X0100000
| sh
, 0,0,0);
5377 if (sh
>= 0 && sh
<= 15) {
5378 *p
++ = tmpl
| X_3_6_7_6_5_5(0,0, X0010000
| sh
, 0,0,0);
5383 if (sh
>= 0 && sh
<= 7) {
5384 *p
++ = tmpl
| X_3_6_7_6_5_5(0,0, X0001000
| sh
, 0,0,0);
5391 case ARM64in_VExtV
: {
5393 011 01110 000 m 0 imm4 0 n d EXT Vd.16b, Vn.16b, Vm.16b, #imm4
5394 where imm4 = the shift amount, in bytes,
5395 Vn is low operand, Vm is high operand
5397 UInt vD
= qregEnc(i
->ARM64in
.VExtV
.dst
);
5398 UInt vN
= qregEnc(i
->ARM64in
.VExtV
.srcLo
);
5399 UInt vM
= qregEnc(i
->ARM64in
.VExtV
.srcHi
);
5400 UInt imm4
= i
->ARM64in
.VExtV
.amtB
;
5401 vassert(imm4
>= 1 && imm4
<= 15);
5402 *p
++ = X_3_8_5_6_5_5(X011
, X01110000
, vM
,
5403 X000000
| (imm4
<< 1), vN
, vD
);
5406 case ARM64in_VImmQ
: {
5407 UInt rQ
= qregEnc(i
->ARM64in
.VImmQ
.rQ
);
5408 UShort imm
= i
->ARM64in
.VImmQ
.imm
;
5412 // movi rQ.4s, #0x0 == 0x4F 0x00 0x04 000 rQ
5413 *p
++ = 0x4F000400 | rQ
;
5416 // movi rQ, #0xFF == 0x2F 0x00 0xE4 001 rQ
5417 *p
++ = 0x2F00E420 | rQ
;
5420 // movi rQ, #0xFFFF == 0x2F 0x00 0xE4 011 rQ
5421 *p
++ = 0x2F00E460 | rQ
;
5424 // movi rQ, #0xFFFFFFFF == 0x2F 0x00 0xE5 111 rQ
5425 *p
++ = 0x2F00E5E0 | rQ
;
5428 // movi rQ, #0xFFFFFFFFFFFF == 0x2F 0x01 0xE7 111 rQ
5429 *p
++ = 0x2F01E7E0 | rQ
;
5432 // movi rQ, #0xFFFFFFFFFFFFFFFF == 0x2F 0x07 0xE7 111 rQ
5433 *p
++ = 0x2F07E7E0 | rQ
;
5436 // mvni rQ.4s, #0x0 == 0x6F 0x00 0x04 000 rQ
5437 *p
++ = 0x6F000400 | rQ
;
5442 goto bad
; /* no other handled cases right now */
5445 case ARM64in_VDfromX
: {
5447 0100 1110 0000 1000 0001 11 nn dd INS Vd.D[0], Xn
5448 This isn't wonderful, in the sense that the upper half of
5449 the vector register stays unchanged and thus the insn is
5450 data dependent on its output register. */
5451 UInt dd
= dregEnc(i
->ARM64in
.VDfromX
.rD
);
5452 UInt xx
= iregEnc(i
->ARM64in
.VDfromX
.rX
);
5454 *p
++ = 0x4E081C00 | X_2_6_2_12_5_5(0,0,0,0,xx
,dd
);
5458 case ARM64in_VQfromX
: {
5460 1001 1110 0110 0111 0000 00 nn dd FMOV Vd.D[0], Xn
5461 I think this zeroes out the top half of the destination, which
5462 is what we need. TODO: can we do VDfromX and VQfromXX better? */
5463 UInt dd
= qregEnc(i
->ARM64in
.VQfromX
.rQ
);
5464 UInt xx
= iregEnc(i
->ARM64in
.VQfromX
.rXlo
);
5466 *p
++ = 0x9E670000 | X_2_6_2_12_5_5(0,0,0,0,xx
,dd
);
5470 case ARM64in_VQfromXX
: {
5471 /* What we really generate is a two insn sequence:
5472 INS Vd.D[0], Xlo; INS Vd.D[1], Xhi
5473 0100 1110 0000 1000 0001 11 nn dd INS Vd.D[0], Xn
5474 0100 1110 0001 1000 0001 11 nn dd INS Vd.D[1], Xn
5476 UInt qq
= qregEnc(i
->ARM64in
.VQfromXX
.rQ
);
5477 UInt xhi
= iregEnc(i
->ARM64in
.VQfromXX
.rXhi
);
5478 UInt xlo
= iregEnc(i
->ARM64in
.VQfromXX
.rXlo
);
5479 vassert(xhi
< 31 && xlo
< 31);
5480 *p
++ = 0x4E081C00 | X_2_6_2_12_5_5(0,0,0,0,xlo
,qq
);
5481 *p
++ = 0x4E181C00 | X_2_6_2_12_5_5(0,0,0,0,xhi
,qq
);
5485 case ARM64in_VXfromQ
: {
5486 /* 010 0111 0000 01000 001111 nn dd UMOV Xd, Vn.D[0]
5487 010 0111 0000 11000 001111 nn dd UMOV Xd, Vn.D[1]
5489 UInt dd
= iregEnc(i
->ARM64in
.VXfromQ
.rX
);
5490 UInt nn
= qregEnc(i
->ARM64in
.VXfromQ
.rQ
);
5491 UInt laneNo
= i
->ARM64in
.VXfromQ
.laneNo
;
5493 vassert(laneNo
< 2);
5494 *p
++ = X_3_8_5_6_5_5(X010
, X01110000
,
5495 laneNo
== 1 ? X11000
: X01000
, X001111
, nn
, dd
);
5499 case ARM64in_VXfromDorS
: {
5500 /* 000 11110001 00110 000000 n d FMOV Wd, Sn
5501 100 11110011 00110 000000 n d FMOV Xd, Dn
5503 UInt dd
= iregEnc(i
->ARM64in
.VXfromDorS
.rX
);
5504 UInt nn
= dregEnc(i
->ARM64in
.VXfromDorS
.rDorS
);
5505 Bool fromD
= i
->ARM64in
.VXfromDorS
.fromD
;
5507 *p
++ = X_3_8_5_6_5_5(fromD
? X100
: X000
,
5508 fromD
? X11110011
: X11110001
,
5509 X00110
, X000000
, nn
, dd
);
5513 case ARM64in_VMov
: {
5514 /* 000 11110 00 10000 00 10000 n d FMOV Sd, Sn
5515 000 11110 01 10000 00 10000 n d FMOV Dd, Dn
5516 010 01110 10 1 n 0 00111 n d MOV Vd.16b, Vn.16b
5518 HReg rD
= i
->ARM64in
.VMov
.dst
;
5519 HReg rN
= i
->ARM64in
.VMov
.src
;
5520 switch (i
->ARM64in
.VMov
.szB
) {
5522 UInt dd
= qregEnc(rD
);
5523 UInt nn
= qregEnc(rN
);
5524 *p
++ = X_3_8_5_6_5_5(X010
, X01110101
, nn
, X000111
, nn
, dd
);
5528 UInt dd
= dregEnc(rD
);
5529 UInt nn
= dregEnc(rN
);
5530 *p
++ = X_3_8_5_6_5_5(X000
, X11110011
, X00000
, X010000
, nn
, dd
);
5539 case ARM64in_EvCheck
: {
5540 /* The sequence is fixed (canned) except for the two amodes
5541 supplied by the insn. These don't change the length, though.
5543 ldr w9, [x21 + #8] 8 == offsetof(host_EvC_COUNTER)
5545 str w9, [x21 + #8] 8 == offsetof(host_EvC_COUNTER)
5547 ldr x9, [x21 + #0] 0 == offsetof(host_EvC_FAILADDR)
5552 p
= do_load_or_store32(p
, True
/*isLoad*/, /*w*/9,
5553 i
->ARM64in
.EvCheck
.amCounter
);
5554 *p
++ = 0x71000529; /* subs w9, w9, #1 */
5555 p
= do_load_or_store32(p
, False
/*!isLoad*/, /*w*/9,
5556 i
->ARM64in
.EvCheck
.amCounter
);
5557 *p
++ = 0x54000065; /* bpl nofail */
5558 p
= do_load_or_store64(p
, True
/*isLoad*/, /*x*/9,
5559 i
->ARM64in
.EvCheck
.amFailAddr
);
5560 *p
++ = 0xD61F0120; /* br x9 */
5564 vassert(evCheckSzB_ARM64() == (UChar
*)p
- (UChar
*)p0
);
5568 case ARM64in_ProfInc
: {
5570 (ctrP is unknown now, so use 0x6555'7555'8555'9566 in the
5571 expectation that a later call to LibVEX_patchProfCtr
5572 will be used to fill in the immediate fields once the
5573 right value is known.)
5574 imm64-exactly4 x9, 0x6555'7555'8555'9566
5579 p
= imm64_to_ireg_EXACTLY4(p
, /*x*/9, 0x6555755585559566ULL
);
5583 /* Tell the caller .. */
5584 vassert(!(*is_profInc
));
5596 vpanic("emit_ARM64Instr");
5600 vassert(((UChar
*)p
) - &buf
[0] <= 40);
5601 return ((UChar
*)p
) - &buf
[0];
5605 /* How big is an event check? See case for ARM64in_EvCheck in
5606 emit_ARM64Instr just above. That crosschecks what this returns, so
5607 we can tell if we're inconsistent. */
5608 Int
evCheckSzB_ARM64 (void)
5614 /* NB: what goes on here has to be very closely coordinated with the
5615 emitInstr case for XDirect, above. */
5616 VexInvalRange
chainXDirect_ARM64 ( VexEndness endness_host
,
5617 void* place_to_chain
,
5618 const void* disp_cp_chain_me_EXPECTED
,
5619 const void* place_to_jump_to
)
5621 vassert(endness_host
== VexEndnessLE
);
5623 /* What we're expecting to see is:
5624 movw x9, disp_cp_chain_me_to_EXPECTED[15:0]
5625 movk x9, disp_cp_chain_me_to_EXPECTED[31:15], lsl 16
5626 movk x9, disp_cp_chain_me_to_EXPECTED[47:32], lsl 32
5627 movk x9, disp_cp_chain_me_to_EXPECTED[63:48], lsl 48
5630 <16 bytes generated by imm64_to_ireg_EXACTLY4>
5633 UInt
* p
= (UInt
*)place_to_chain
;
5634 vassert(0 == (3 & (HWord
)p
));
5635 vassert(is_imm64_to_ireg_EXACTLY4(
5636 p
, /*x*/9, (Addr
)disp_cp_chain_me_EXPECTED
));
5637 vassert(p
[4] == 0xD63F0120);
5639 /* And what we want to change it to is:
5640 movw x9, place_to_jump_to[15:0]
5641 movk x9, place_to_jump_to[31:15], lsl 16
5642 movk x9, place_to_jump_to[47:32], lsl 32
5643 movk x9, place_to_jump_to[63:48], lsl 48
5646 <16 bytes generated by imm64_to_ireg_EXACTLY4>
5649 The replacement has the same length as the original.
5651 (void)imm64_to_ireg_EXACTLY4(p
, /*x*/9, (Addr
)place_to_jump_to
);
5654 VexInvalRange vir
= {(HWord
)p
, 20};
5659 /* NB: what goes on here has to be very closely coordinated with the
5660 emitInstr case for XDirect, above. */
5661 VexInvalRange
unchainXDirect_ARM64 ( VexEndness endness_host
,
5662 void* place_to_unchain
,
5663 const void* place_to_jump_to_EXPECTED
,
5664 const void* disp_cp_chain_me
)
5666 vassert(endness_host
== VexEndnessLE
);
5668 /* What we're expecting to see is:
5669 movw x9, place_to_jump_to_EXPECTED[15:0]
5670 movk x9, place_to_jump_to_EXPECTED[31:15], lsl 16
5671 movk x9, place_to_jump_to_EXPECTED[47:32], lsl 32
5672 movk x9, place_to_jump_to_EXPECTED[63:48], lsl 48
5675 <16 bytes generated by imm64_to_ireg_EXACTLY4>
5678 UInt
* p
= (UInt
*)place_to_unchain
;
5679 vassert(0 == (3 & (HWord
)p
));
5680 vassert(is_imm64_to_ireg_EXACTLY4(
5681 p
, /*x*/9, (Addr
)place_to_jump_to_EXPECTED
));
5682 vassert(p
[4] == 0xD61F0120);
5684 /* And what we want to change it to is:
5685 movw x9, disp_cp_chain_me_to[15:0]
5686 movk x9, disp_cp_chain_me_to[31:15], lsl 16
5687 movk x9, disp_cp_chain_me_to[47:32], lsl 32
5688 movk x9, disp_cp_chain_me_to[63:48], lsl 48
5691 <16 bytes generated by imm64_to_ireg_EXACTLY4>
5694 (void)imm64_to_ireg_EXACTLY4(p
, /*x*/9, (Addr
)disp_cp_chain_me
);
5697 VexInvalRange vir
= {(HWord
)p
, 20};
5702 /* Patch the counter address into a profile inc point, as previously
5703 created by the ARM64in_ProfInc case for emit_ARM64Instr. */
5704 VexInvalRange
patchProfInc_ARM64 ( VexEndness endness_host
,
5705 void* place_to_patch
,
5706 const ULong
* location_of_counter
)
5708 vassert(sizeof(ULong
*) == 8);
5709 vassert(endness_host
== VexEndnessLE
);
5710 UInt
* p
= (UInt
*)place_to_patch
;
5711 vassert(0 == (3 & (HWord
)p
));
5712 vassert(is_imm64_to_ireg_EXACTLY4(p
, /*x*/9, 0x6555755585559566ULL
));
5713 vassert(p
[4] == 0xF9400128);
5714 vassert(p
[5] == 0x91000508);
5715 vassert(p
[6] == 0xF9000128);
5716 imm64_to_ireg_EXACTLY4(p
, /*x*/9, (Addr
)location_of_counter
);
5717 VexInvalRange vir
= {(HWord
)p
, 4*4};
5721 /*---------------------------------------------------------------*/
5722 /*--- end host_arm64_defs.c ---*/
5723 /*---------------------------------------------------------------*/